fix(reversal): include contraction auxiliaries in fallback signals
All checks were successful
Security Scan / security (push) Successful in 11s
Test / test (push) Successful in 1m8s

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-04-01 23:08:59 +00:00
parent 277445cc5d
commit 460d9e8dd6
2 changed files with 26 additions and 5 deletions

View file

@ -546,11 +546,7 @@ func (t *Tokeniser) buildSignalIndex() {
t.verbAux[core.Lower(w)] = true
}
} else {
for _, w := range []string{
"is", "are", "was", "were", "has", "had", "have",
"do", "does", "did", "will", "would", "could", "should",
"can", "may", "might", "shall", "must",
} {
for _, w := range defaultVerbAuxiliaries() {
t.verbAux[w] = true
}
}
@ -564,6 +560,19 @@ func (t *Tokeniser) buildSignalIndex() {
}
}
func defaultVerbAuxiliaries() []string {
return []string{
"am", "is", "are", "was", "were",
"has", "had", "have",
"do", "does", "did",
"will", "would", "could", "should",
"can", "may", "might", "shall", "must",
"don't", "can't", "won't", "shouldn't", "couldn't", "wouldn't",
"doesn't", "didn't", "isn't", "aren't", "wasn't", "weren't",
"hasn't", "hadn't", "haven't",
}
}
func defaultWeights() map[string]float64 {
return map[string]float64{
"noun_determiner": 0.35,

View file

@ -694,6 +694,18 @@ func TestTokeniser_Disambiguate_ContractionAux(t *testing.T) {
}
}
func TestTokeniser_Disambiguate_ContractionAux_FallbackDefaults(t *testing.T) {
tok := NewTokeniserForLang("zz")
tokens := tok.Tokenise("don't run the tests")
// The hardcoded fallback auxiliaries should still recognise contractions
// even when no locale grammar data is loaded.
for _, token := range tokens {
if token.Lower == "run" && token.Type != TokenVerb {
t.Errorf("'run' after \"don't\": Type = %v, want TokenVerb", token.Type)
}
}
}
func TestTokeniser_WithSignals_Breakdown(t *testing.T) {
setup(t)
tok := NewTokeniser(WithSignals())