feat(reversal): confidence-weighted imprint contributions
Dual-class tokens contribute to both verb and noun distributions weighted by Confidence and AltConf. Non-ambiguous tokens (Confidence 1.0, AltConf 0.0) behave identically to before. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4cdd6e59d4
commit
c1d347f079
2 changed files with 66 additions and 3 deletions
|
|
@ -41,22 +41,44 @@ func NewImprint(tokens []Token) GrammarImprint {
|
|||
for _, tok := range tokens {
|
||||
switch tok.Type {
|
||||
case TokenVerb:
|
||||
conf := tok.Confidence
|
||||
if conf == 0 {
|
||||
conf = 1.0
|
||||
}
|
||||
verbCount++
|
||||
base := tok.VerbInfo.Base
|
||||
imp.VerbDistribution[base]++
|
||||
imp.TenseDistribution[tok.VerbInfo.Tense]++
|
||||
imp.VerbDistribution[base] += conf
|
||||
imp.TenseDistribution[tok.VerbInfo.Tense] += conf
|
||||
verbBases[base] = true
|
||||
|
||||
// Dual-class: contribute alt confidence to noun distribution
|
||||
if tok.AltType == TokenNoun && tok.NounInfo.Base != "" {
|
||||
imp.NounDistribution[tok.NounInfo.Base] += tok.AltConf
|
||||
nounBases[tok.NounInfo.Base] = true
|
||||
totalNouns++
|
||||
}
|
||||
|
||||
case TokenNoun:
|
||||
conf := tok.Confidence
|
||||
if conf == 0 {
|
||||
conf = 1.0
|
||||
}
|
||||
nounCount++
|
||||
base := tok.NounInfo.Base
|
||||
imp.NounDistribution[base]++
|
||||
imp.NounDistribution[base] += conf
|
||||
nounBases[base] = true
|
||||
totalNouns++
|
||||
if tok.NounInfo.Plural {
|
||||
pluralNouns++
|
||||
}
|
||||
|
||||
// Dual-class: contribute alt confidence to verb distribution
|
||||
if tok.AltType == TokenVerb && tok.VerbInfo.Base != "" {
|
||||
imp.VerbDistribution[tok.VerbInfo.Base] += tok.AltConf
|
||||
imp.TenseDistribution[tok.VerbInfo.Tense] += tok.AltConf
|
||||
verbBases[tok.VerbInfo.Base] = true
|
||||
}
|
||||
|
||||
case TokenArticle:
|
||||
articleCount++
|
||||
imp.ArticleUsage[tok.ArtType]++
|
||||
|
|
|
|||
|
|
@ -116,3 +116,44 @@ func TestImprint_Similar_Empty(t *testing.T) {
|
|||
t.Errorf("Empty imprint similarity = %f, want 1.0", sim)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImprint_ConfidenceWeighting(t *testing.T) {
|
||||
svc, _ := i18n.New()
|
||||
i18n.SetDefault(svc)
|
||||
tok := NewTokeniser()
|
||||
|
||||
// "the commit was approved" — "commit" should be noun with high confidence
|
||||
tokens := tok.Tokenise("the commit was approved")
|
||||
imp := NewImprint(tokens)
|
||||
|
||||
// Commit should contribute primarily to noun distribution
|
||||
if imp.NounDistribution["commit"] == 0 {
|
||||
t.Error("NounDistribution should contain 'commit'")
|
||||
}
|
||||
|
||||
// But also fractionally to verb distribution (via AltConf)
|
||||
if imp.VerbDistribution["commit"] == 0 {
|
||||
t.Error("VerbDistribution should contain fractional 'commit' from AltConf")
|
||||
}
|
||||
|
||||
// Noun contribution should be larger than verb contribution
|
||||
// (before normalisation, noun ~0.96, verb ~0.04)
|
||||
// After normalisation we check the raw pre-norm isn't zero
|
||||
}
|
||||
|
||||
func TestImprint_ConfidenceWeighting_BackwardsCompat(t *testing.T) {
|
||||
svc, _ := i18n.New()
|
||||
i18n.SetDefault(svc)
|
||||
tok := NewTokeniser()
|
||||
|
||||
// Non-ambiguous tokens should work identically (Confidence=1.0, AltConf=0)
|
||||
tokens := tok.Tokenise("Deleted the files")
|
||||
imp := NewImprint(tokens)
|
||||
|
||||
if imp.VerbDistribution["delete"] == 0 {
|
||||
t.Error("VerbDistribution should contain 'delete'")
|
||||
}
|
||||
if imp.NounDistribution["file"] == 0 {
|
||||
t.Error("NounDistribution should contain 'file'")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue