feat(reversal): add DisambiguationStats and dual-class round-trip tests

DisambiguationStatsFromTokens provides aggregate disambiguation metrics for Phase 2 calibration. Round-trip tests verify all 6 dual-class words disambiguate correctly in both verb and noun contexts, and that same-role imprints converge while different-role imprints diverge. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 16:21:34 +00:00 · 2026-02-19 16:21:34 +00:00 · 3848297496
commit 3848297496
parent 4ffe614840
2 changed files with 148 additions and 0 deletions
--- a/reversal/roundtrip_test.go
+++ b/reversal/roundtrip_test.go
@ -91,3 +91,110 @@ func TestRoundTrip_SimilarDocuments(t *testing.T) {
 		t.Errorf("Similar documents (%f) should score higher than different (%f)", simSame, simDiff)
 	}
 }
+
+func TestRoundTrip_DualClassDisambiguation(t *testing.T) {
+	svc, _ := i18n.New()
+	i18n.SetDefault(svc)
+	tok := NewTokeniser()
+
+	tests := []struct {
+		name     string
+		text     string
+		word     string
+		wantType TokenType
+	}{
+		{"commit as noun", "Delete the commit", "commit", TokenNoun},
+		{"commit as verb", "Commit the changes", "commit", TokenVerb},
+		{"run as verb", "Run the tests", "run", TokenVerb},
+		{"test as noun", "The test passed", "test", TokenNoun},
+		{"build as verb", "Build the project", "build", TokenVerb},
+		{"build as noun", "The build failed", "build", TokenNoun},
+		{"check as noun", "The check passed", "check", TokenNoun},
+		{"check as verb", "Check the logs", "check", TokenVerb},
+		{"file as noun", "Delete the file", "file", TokenNoun},
+		{"file as verb", "File the report", "file", TokenVerb},
+		{"test as verb after aux", "will test the system", "test", TokenVerb},
+		{"run as noun with possessive", "his run was fast", "run", TokenNoun},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tokens := tok.Tokenise(tt.text)
+			found := false
+			for _, token := range tokens {
+				if token.Lower == tt.word {
+					found = true
+					if token.Type != tt.wantType {
+						t.Errorf("%q in %q: got Type %v, want %v (Confidence: %.2f)",
+							tt.word, tt.text, token.Type, tt.wantType, token.Confidence)
+					}
+				}
+			}
+			if !found {
+				t.Errorf("did not find %q in tokens from %q", tt.word, tt.text)
+			}
+		})
+	}
+}
+
+func TestRoundTrip_DualClassImprintConvergence(t *testing.T) {
+	svc, _ := i18n.New()
+	i18n.SetDefault(svc)
+	tok := NewTokeniser()
+
+	// Two texts using "commit" as noun should produce similar imprints
+	imp1 := NewImprint(tok.Tokenise("the commit was approved"))
+	imp2 := NewImprint(tok.Tokenise("the commit was merged"))
+
+	sim := imp1.Similar(imp2)
+	if sim < 0.7 {
+		t.Errorf("Same-role imprint similarity = %f, want >= 0.7", sim)
+	}
+
+	// Text using "commit" as verb should diverge more
+	imp3 := NewImprint(tok.Tokenise("Commit the changes now"))
+	simDiff := imp1.Similar(imp3)
+
+	if simDiff >= sim {
+		t.Errorf("Different-role similarity (%f) should be less than same-role (%f)",
+			simDiff, sim)
+	}
+}
+
+func TestDisambiguationStats(t *testing.T) {
+	svc, _ := i18n.New()
+	i18n.SetDefault(svc)
+	tok := NewTokeniser()
+
+	tokens := tok.Tokenise("The commit was approved")
+	stats := DisambiguationStatsFromTokens(tokens)
+
+	if stats.TotalTokens != 4 {
+		t.Errorf("TotalTokens = %d, want 4", stats.TotalTokens)
+	}
+	if stats.AmbiguousTokens < 1 {
+		t.Errorf("AmbiguousTokens = %d, want >= 1 ('commit' is dual-class)", stats.AmbiguousTokens)
+	}
+	if stats.ResolvedAsNoun < 1 {
+		t.Errorf("ResolvedAsNoun = %d, want >= 1", stats.ResolvedAsNoun)
+	}
+	if stats.AvgConfidence == 0 {
+		t.Error("AvgConfidence = 0, want > 0")
+	}
+}
+
+func TestDisambiguationStats_NoAmbiguity(t *testing.T) {
+	svc, _ := i18n.New()
+	i18n.SetDefault(svc)
+	tok := NewTokeniser()
+
+	tokens := tok.Tokenise("Deleted the files")
+	stats := DisambiguationStatsFromTokens(tokens)
+
+	if stats.AmbiguousTokens != 0 {
+		t.Errorf("AmbiguousTokens = %d, want 0 (no dual-class words)", stats.AmbiguousTokens)
+	}
+	if stats.AvgConfidence != 1.0 {
+		t.Errorf("AvgConfidence = %f, want 1.0 (all unambiguous)", stats.AvgConfidence)
+	}
+}
--- a/reversal/tokeniser.go
+++ b/reversal/tokeniser.go
@ -960,3 +960,44 @@ func matchPunctuation(punct string) (string, bool) {
 	}
 	return "", false
 }
+
+// DisambiguationStats provides aggregate statistics about token disambiguation.
+type DisambiguationStats struct {
+	TotalTokens     int
+	AmbiguousTokens int
+	ResolvedAsVerb  int
+	ResolvedAsNoun  int
+	AvgConfidence   float64
+	LowConfidence   int // count where confidence < 0.7
+}
+
+// DisambiguationStatsFromTokens computes aggregate disambiguation stats from a token slice.
+func DisambiguationStatsFromTokens(tokens []Token) DisambiguationStats {
+	var s DisambiguationStats
+	s.TotalTokens = len(tokens)
+	var confSum float64
+	var confCount int
+
+	for _, tok := range tokens {
+		if tok.AltType != 0 && tok.AltConf > 0 {
+			s.AmbiguousTokens++
+			if tok.Type == TokenVerb {
+				s.ResolvedAsVerb++
+			} else if tok.Type == TokenNoun {
+				s.ResolvedAsNoun++
+			}
+		}
+		if tok.Type != TokenUnknown && tok.Confidence > 0 {
+			confSum += tok.Confidence
+			confCount++
+			if tok.Confidence < 0.7 {
+				s.LowConfidence++
+			}
+		}
+	}
+
+	if confCount > 0 {
+		s.AvgConfidence = confSum / float64(confCount)
+	}
+	return s
+}