diff --git a/reversal/imprint.go b/reversal/imprint.go index 490bff5..1fe14cf 100644 --- a/reversal/imprint.go +++ b/reversal/imprint.go @@ -41,22 +41,44 @@ func NewImprint(tokens []Token) GrammarImprint { for _, tok := range tokens { switch tok.Type { case TokenVerb: + conf := tok.Confidence + if conf == 0 { + conf = 1.0 + } verbCount++ base := tok.VerbInfo.Base - imp.VerbDistribution[base]++ - imp.TenseDistribution[tok.VerbInfo.Tense]++ + imp.VerbDistribution[base] += conf + imp.TenseDistribution[tok.VerbInfo.Tense] += conf verbBases[base] = true + // Dual-class: contribute alt confidence to noun distribution + if tok.AltType == TokenNoun && tok.NounInfo.Base != "" { + imp.NounDistribution[tok.NounInfo.Base] += tok.AltConf + nounBases[tok.NounInfo.Base] = true + totalNouns++ + } + case TokenNoun: + conf := tok.Confidence + if conf == 0 { + conf = 1.0 + } nounCount++ base := tok.NounInfo.Base - imp.NounDistribution[base]++ + imp.NounDistribution[base] += conf nounBases[base] = true totalNouns++ if tok.NounInfo.Plural { pluralNouns++ } + // Dual-class: contribute alt confidence to verb distribution + if tok.AltType == TokenVerb && tok.VerbInfo.Base != "" { + imp.VerbDistribution[tok.VerbInfo.Base] += tok.AltConf + imp.TenseDistribution[tok.VerbInfo.Tense] += tok.AltConf + verbBases[tok.VerbInfo.Base] = true + } + case TokenArticle: articleCount++ imp.ArticleUsage[tok.ArtType]++ diff --git a/reversal/imprint_test.go b/reversal/imprint_test.go index 77f3079..795dc68 100644 --- a/reversal/imprint_test.go +++ b/reversal/imprint_test.go @@ -116,3 +116,44 @@ func TestImprint_Similar_Empty(t *testing.T) { t.Errorf("Empty imprint similarity = %f, want 1.0", sim) } } + +func TestImprint_ConfidenceWeighting(t *testing.T) { + svc, _ := i18n.New() + i18n.SetDefault(svc) + tok := NewTokeniser() + + // "the commit was approved" — "commit" should be noun with high confidence + tokens := tok.Tokenise("the commit was approved") + imp := NewImprint(tokens) + + // Commit should contribute primarily to noun distribution + if imp.NounDistribution["commit"] == 0 { + t.Error("NounDistribution should contain 'commit'") + } + + // But also fractionally to verb distribution (via AltConf) + if imp.VerbDistribution["commit"] == 0 { + t.Error("VerbDistribution should contain fractional 'commit' from AltConf") + } + + // Noun contribution should be larger than verb contribution + // (before normalisation, noun ~0.96, verb ~0.04) + // After normalisation we check the raw pre-norm isn't zero +} + +func TestImprint_ConfidenceWeighting_BackwardsCompat(t *testing.T) { + svc, _ := i18n.New() + i18n.SetDefault(svc) + tok := NewTokeniser() + + // Non-ambiguous tokens should work identically (Confidence=1.0, AltConf=0) + tokens := tok.Tokenise("Deleted the files") + imp := NewImprint(tokens) + + if imp.VerbDistribution["delete"] == 0 { + t.Error("VerbDistribution should contain 'delete'") + } + if imp.NounDistribution["file"] == 0 { + t.Error("NounDistribution should contain 'file'") + } +}