feat(reversal): confidence-weighted imprint contributions

Dual-class tokens contribute to both verb and noun distributions weighted by Confidence and AltConf. Non-ambiguous tokens (Confidence 1.0, AltConf 0.0) behave identically to before. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 16:18:44 +00:00 · 2026-02-19 16:18:44 +00:00 · c1d347f079
commit c1d347f079
parent 4cdd6e59d4
2 changed files with 66 additions and 3 deletions
--- a/reversal/imprint.go
+++ b/reversal/imprint.go
@ -41,22 +41,44 @@ func NewImprint(tokens []Token) GrammarImprint {
 	for _, tok := range tokens {
 		switch tok.Type {
 		case TokenVerb:
+			conf := tok.Confidence
+			if conf == 0 {
+				conf = 1.0
+			}
 			verbCount++
 			base := tok.VerbInfo.Base
-			imp.VerbDistribution[base]++
-			imp.TenseDistribution[tok.VerbInfo.Tense]++
+			imp.VerbDistribution[base] += conf
+			imp.TenseDistribution[tok.VerbInfo.Tense] += conf
 			verbBases[base] = true

+			// Dual-class: contribute alt confidence to noun distribution
+			if tok.AltType == TokenNoun && tok.NounInfo.Base != "" {
+				imp.NounDistribution[tok.NounInfo.Base] += tok.AltConf
+				nounBases[tok.NounInfo.Base] = true
+				totalNouns++
+			}
+
 		case TokenNoun:
+			conf := tok.Confidence
+			if conf == 0 {
+				conf = 1.0
+			}
 			nounCount++
 			base := tok.NounInfo.Base
-			imp.NounDistribution[base]++
+			imp.NounDistribution[base] += conf
 			nounBases[base] = true
 			totalNouns++
 			if tok.NounInfo.Plural {
 				pluralNouns++
 			}

+			// Dual-class: contribute alt confidence to verb distribution
+			if tok.AltType == TokenVerb && tok.VerbInfo.Base != "" {
+				imp.VerbDistribution[tok.VerbInfo.Base] += tok.AltConf
+				imp.TenseDistribution[tok.VerbInfo.Tense] += tok.AltConf
+				verbBases[tok.VerbInfo.Base] = true
+			}
+
 		case TokenArticle:
 			articleCount++
 			imp.ArticleUsage[tok.ArtType]++
--- a/reversal/imprint_test.go
+++ b/reversal/imprint_test.go
@ -116,3 +116,44 @@ func TestImprint_Similar_Empty(t *testing.T) {
 		t.Errorf("Empty imprint similarity = %f, want 1.0", sim)
 	}
 }
+
+func TestImprint_ConfidenceWeighting(t *testing.T) {
+	svc, _ := i18n.New()
+	i18n.SetDefault(svc)
+	tok := NewTokeniser()
+
+	// "the commit was approved" — "commit" should be noun with high confidence
+	tokens := tok.Tokenise("the commit was approved")
+	imp := NewImprint(tokens)
+
+	// Commit should contribute primarily to noun distribution
+	if imp.NounDistribution["commit"] == 0 {
+		t.Error("NounDistribution should contain 'commit'")
+	}
+
+	// But also fractionally to verb distribution (via AltConf)
+	if imp.VerbDistribution["commit"] == 0 {
+		t.Error("VerbDistribution should contain fractional 'commit' from AltConf")
+	}
+
+	// Noun contribution should be larger than verb contribution
+	// (before normalisation, noun ~0.96, verb ~0.04)
+	// After normalisation we check the raw pre-norm isn't zero
+}
+
+func TestImprint_ConfidenceWeighting_BackwardsCompat(t *testing.T) {
+	svc, _ := i18n.New()
+	i18n.SetDefault(svc)
+	tok := NewTokeniser()
+
+	// Non-ambiguous tokens should work identically (Confidence=1.0, AltConf=0)
+	tokens := tok.Tokenise("Deleted the files")
+	imp := NewImprint(tokens)
+
+	if imp.VerbDistribution["delete"] == 0 {
+		t.Error("VerbDistribution should contain 'delete'")
+	}
+	if imp.NounDistribution["file"] == 0 {
+		t.Error("NounDistribution should contain 'file'")
+	}
+}