feat(reversal): confidence-weighted imprint contributions

Dual-class tokens contribute to both verb and noun distributions
weighted by Confidence and AltConf. Non-ambiguous tokens (Confidence
1.0, AltConf 0.0) behave identically to before.

Co-Authored-By: Virgil <virgil@lethean.io>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Snider 2026-02-19 16:18:44 +00:00
parent 4cdd6e59d4
commit c1d347f079
2 changed files with 66 additions and 3 deletions

View file

@ -41,22 +41,44 @@ func NewImprint(tokens []Token) GrammarImprint {
for _, tok := range tokens {
switch tok.Type {
case TokenVerb:
conf := tok.Confidence
if conf == 0 {
conf = 1.0
}
verbCount++
base := tok.VerbInfo.Base
imp.VerbDistribution[base]++
imp.TenseDistribution[tok.VerbInfo.Tense]++
imp.VerbDistribution[base] += conf
imp.TenseDistribution[tok.VerbInfo.Tense] += conf
verbBases[base] = true
// Dual-class: contribute alt confidence to noun distribution
if tok.AltType == TokenNoun && tok.NounInfo.Base != "" {
imp.NounDistribution[tok.NounInfo.Base] += tok.AltConf
nounBases[tok.NounInfo.Base] = true
totalNouns++
}
case TokenNoun:
conf := tok.Confidence
if conf == 0 {
conf = 1.0
}
nounCount++
base := tok.NounInfo.Base
imp.NounDistribution[base]++
imp.NounDistribution[base] += conf
nounBases[base] = true
totalNouns++
if tok.NounInfo.Plural {
pluralNouns++
}
// Dual-class: contribute alt confidence to verb distribution
if tok.AltType == TokenVerb && tok.VerbInfo.Base != "" {
imp.VerbDistribution[tok.VerbInfo.Base] += tok.AltConf
imp.TenseDistribution[tok.VerbInfo.Tense] += tok.AltConf
verbBases[tok.VerbInfo.Base] = true
}
case TokenArticle:
articleCount++
imp.ArticleUsage[tok.ArtType]++

View file

@ -116,3 +116,44 @@ func TestImprint_Similar_Empty(t *testing.T) {
t.Errorf("Empty imprint similarity = %f, want 1.0", sim)
}
}
func TestImprint_ConfidenceWeighting(t *testing.T) {
svc, _ := i18n.New()
i18n.SetDefault(svc)
tok := NewTokeniser()
// "the commit was approved" — "commit" should be noun with high confidence
tokens := tok.Tokenise("the commit was approved")
imp := NewImprint(tokens)
// Commit should contribute primarily to noun distribution
if imp.NounDistribution["commit"] == 0 {
t.Error("NounDistribution should contain 'commit'")
}
// But also fractionally to verb distribution (via AltConf)
if imp.VerbDistribution["commit"] == 0 {
t.Error("VerbDistribution should contain fractional 'commit' from AltConf")
}
// Noun contribution should be larger than verb contribution
// (before normalisation, noun ~0.96, verb ~0.04)
// After normalisation we check the raw pre-norm isn't zero
}
func TestImprint_ConfidenceWeighting_BackwardsCompat(t *testing.T) {
svc, _ := i18n.New()
i18n.SetDefault(svc)
tok := NewTokeniser()
// Non-ambiguous tokens should work identically (Confidence=1.0, AltConf=0)
tokens := tok.Tokenise("Deleted the files")
imp := NewImprint(tokens)
if imp.VerbDistribution["delete"] == 0 {
t.Error("VerbDistribution should contain 'delete'")
}
if imp.NounDistribution["file"] == 0 {
t.Error("NounDistribution should contain 'file'")
}
}