diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index 0e4738b..58d51cf 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -892,6 +892,12 @@ var clauseBoundaries = map[string]bool{ "when": true, "while": true, "if": true, "then": true, "so": true, } +const ( + lowInformationScoreThreshold = 0.10 + lowInformationVerbConfidence = 0.55 + lowInformationNounConfidence = 0.45 +) + // Tokenise splits text on whitespace and classifies each word using a // two-pass algorithm: // @@ -1519,17 +1525,17 @@ func (t *Tokeniser) resolveToken(tok *Token, verbScore, nounScore float64, compo // If only the default prior fired, keep confidence near chance rather than // pretending the classification is strongly supported. - if total < 0.10 { + if total < lowInformationScoreThreshold { if verbScore >= nounScore { tok.Type = TokenVerb - tok.Confidence = 0.55 + tok.Confidence = lowInformationVerbConfidence tok.AltType = TokenNoun - tok.AltConf = 0.45 + tok.AltConf = lowInformationNounConfidence } else { tok.Type = TokenNoun - tok.Confidence = 0.55 + tok.Confidence = lowInformationNounConfidence tok.AltType = TokenVerb - tok.AltConf = 0.45 + tok.AltConf = lowInformationVerbConfidence } } else { if verbScore >= nounScore { diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index 3fbee23..01600a0 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -1305,6 +1305,28 @@ func TestDefaultWeights_ReturnsCopy(t *testing.T) { } } +func TestTokeniser_LowInformationConfidenceFloor(t *testing.T) { + setup(t) + tok := NewTokeniser() + + tokens := tok.Tokenise("maybe commit") + if len(tokens) != 2 { + t.Fatalf("Tokenise(maybe commit) produced %d tokens, want 2", len(tokens)) + } + if tokens[1].Type != TokenVerb { + t.Fatalf("Tokenise(maybe commit) Type = %v, want TokenVerb", tokens[1].Type) + } + if tokens[1].Confidence != 0.55 { + t.Fatalf("Tokenise(maybe commit) Confidence = %v, want 0.55", tokens[1].Confidence) + } + if tokens[1].AltType != TokenNoun { + t.Fatalf("Tokenise(maybe commit) AltType = %v, want TokenNoun", tokens[1].AltType) + } + if tokens[1].AltConf != 0.45 { + t.Fatalf("Tokenise(maybe commit) AltConf = %v, want 0.45", tokens[1].AltConf) + } +} + // --- Benchmarks --- func benchSetup(b *testing.B) {