From 0f62be1abd4cdb15fde121d91939f458b415a6d7 Mon Sep 17 00:00:00 2001 From: Virgil Date: Thu, 2 Apr 2026 09:26:53 +0000 Subject: [PATCH] refactor(reversal): clarify ambiguous token scoring Co-Authored-By: Virgil --- reversal/tokeniser.go | 47 +++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index 84be543..0d96653 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -1542,35 +1542,7 @@ func (t *Tokeniser) checkInflectionEcho(tokens []Token, idx int) (bool, bool) { // resolveToken assigns the final classification to an ambiguous token // based on verb and noun scores from disambiguation signals. func (t *Tokeniser) resolveToken(tok *Token, verbScore, nounScore float64, components []SignalComponent) { - total := verbScore + nounScore - - // If only the default prior fired, keep confidence near chance rather than - // pretending the classification is strongly supported. - if total < LowInformationScoreThreshold { - if verbScore >= nounScore { - tok.Type = TokenVerb - tok.Confidence = LowInformationVerbConfidence - tok.AltType = TokenNoun - tok.AltConf = LowInformationNounConfidence - } else { - tok.Type = TokenNoun - tok.Confidence = LowInformationNounConfidence - tok.AltType = TokenVerb - tok.AltConf = LowInformationVerbConfidence - } - } else { - if verbScore >= nounScore { - tok.Type = TokenVerb - tok.Confidence = verbScore / total - tok.AltType = TokenNoun - tok.AltConf = nounScore / total - } else { - tok.Type = TokenNoun - tok.Confidence = nounScore / total - tok.AltType = TokenVerb - tok.AltConf = verbScore / total - } - } + tok.Type, tok.Confidence, tok.AltType, tok.AltConf = classifyAmbiguousToken(verbScore, nounScore) if t.withSignals { tok.Signals = &SignalBreakdown{ @@ -1581,6 +1553,23 @@ func (t *Tokeniser) resolveToken(tok *Token, verbScore, nounScore float64, compo } } +func classifyAmbiguousToken(verbScore, nounScore float64) (TokenType, float64, TokenType, float64) { + total := verbScore + nounScore + + // Keep the fallback close to chance when only the default prior fired. + if total < LowInformationScoreThreshold { + if verbScore >= nounScore { + return TokenVerb, LowInformationVerbConfidence, TokenNoun, LowInformationNounConfidence + } + return TokenNoun, LowInformationNounConfidence, TokenVerb, LowInformationVerbConfidence + } + + if verbScore >= nounScore { + return TokenVerb, verbScore / total, TokenNoun, nounScore / total + } + return TokenNoun, nounScore / total, TokenVerb, verbScore / total +} + // splitTrailingPunct separates a word from its trailing punctuation. // Returns the word and the punctuation suffix. It also recognises // standalone punctuation tokens such as "." and ")". -- 2.45.3