From d74550c605277797162aa504718fe9c1e92c6242 Mon Sep 17 00:00:00 2001 From: Virgil Date: Thu, 2 Apr 2026 09:23:13 +0000 Subject: [PATCH] fix(reversal): expose low-information confidence constants Co-Authored-By: Virgil --- reversal/tokeniser.go | 23 +++++++++++++++-------- reversal/tokeniser_test.go | 12 ++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index e8f27d0..84be543 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -907,9 +907,16 @@ var clauseBoundaries = map[string]bool{ } const ( - lowInformationScoreThreshold = 0.10 - lowInformationVerbConfidence = 0.55 - lowInformationNounConfidence = 0.45 + // LowInformationScoreThreshold is the cutoff below which the tokeniser + // treats a classification as weakly supported and keeps confidence near + // chance rather than normalising a single prior into 100% confidence. + LowInformationScoreThreshold = 0.10 + // LowInformationVerbConfidence is the confidence assigned to the verb side + // of a low-information ambiguous token when verb wins the tie-break. + LowInformationVerbConfidence = 0.55 + // LowInformationNounConfidence is the confidence assigned to the noun side + // of a low-information ambiguous token when noun wins the tie-break. + LowInformationNounConfidence = 0.45 ) // Tokenise splits text on whitespace and classifies each word using a @@ -1539,17 +1546,17 @@ func (t *Tokeniser) resolveToken(tok *Token, verbScore, nounScore float64, compo // If only the default prior fired, keep confidence near chance rather than // pretending the classification is strongly supported. - if total < lowInformationScoreThreshold { + if total < LowInformationScoreThreshold { if verbScore >= nounScore { tok.Type = TokenVerb - tok.Confidence = lowInformationVerbConfidence + tok.Confidence = LowInformationVerbConfidence tok.AltType = TokenNoun - tok.AltConf = lowInformationNounConfidence + tok.AltConf = LowInformationNounConfidence } else { tok.Type = TokenNoun - tok.Confidence = lowInformationNounConfidence + tok.Confidence = LowInformationNounConfidence tok.AltType = TokenVerb - tok.AltConf = lowInformationVerbConfidence + tok.AltConf = LowInformationVerbConfidence } } else { if verbScore >= nounScore { diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index b827f85..0b1e2cc 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -1323,6 +1323,18 @@ func TestTokeniserSignalWeights_ReturnsCopy(t *testing.T) { } } +func TestLowInformationConfidenceConstants(t *testing.T) { + if LowInformationScoreThreshold != 0.10 { + t.Fatalf("LowInformationScoreThreshold = %v, want 0.10", LowInformationScoreThreshold) + } + if LowInformationVerbConfidence != 0.55 { + t.Fatalf("LowInformationVerbConfidence = %v, want 0.55", LowInformationVerbConfidence) + } + if LowInformationNounConfidence != 0.45 { + t.Fatalf("LowInformationNounConfidence = %v, want 0.45", LowInformationNounConfidence) + } +} + func TestTokeniser_LowInformationConfidenceFloor(t *testing.T) { setup(t) tok := NewTokeniser() -- 2.45.3