fix(i18n): ignore deprecated grammar entries
Some checks failed
Security Scan / security (push) Successful in 12s
Test / test (push) Has been cancelled

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-04-02 03:03:44 +00:00
parent 6758585c36
commit eeffe92da0
4 changed files with 88 additions and 1 deletions

View file

@ -108,6 +108,9 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa
case string:
if grammar != nil && core.HasPrefix(fullKey, "gram.word.") {
wordKey := core.TrimPrefix(fullKey, "gram.word.")
if shouldSkipDeprecatedEnglishGrammarEntry(fullKey) {
continue
}
grammar.Words[core.Lower(wordKey)] = v
continue
}
@ -142,6 +145,9 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa
if after, ok := strings.CutPrefix(fullKey, "gram.noun."); ok {
nounName = after
}
if shouldSkipDeprecatedEnglishGrammarEntry(fullKey) {
continue
}
_, hasOne := v["one"]
_, hasOther := v["other"]
if hasOne && hasOther {
@ -338,3 +344,13 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) {
}
}
}
func shouldSkipDeprecatedEnglishGrammarEntry(fullKey string) bool {
switch fullKey {
case "gram.noun.passed", "gram.noun.failed", "gram.noun.skipped",
"gram.word.passed", "gram.word.failed", "gram.word.skipped":
return true
default:
return false
}
}

View file

@ -149,9 +149,15 @@ func TestFlattenWithGrammar(t *testing.T) {
"one": "widget",
"other": "widgets",
},
"passed": map[string]any{
"one": "passed",
"other": "passed",
},
},
"word": map[string]any{
"api": "API",
"api": "API",
"failed": "failed",
"skipped": "skipped",
},
"punct": map[string]any{
"label": ":",
@ -233,11 +239,20 @@ func TestFlattenWithGrammar(t *testing.T) {
t.Errorf("widget.other = %q, want 'widgets'", n.Other)
}
}
if _, ok := grammar.Nouns["passed"]; ok {
t.Error("deprecated noun 'passed' should be ignored")
}
// Word extracted
if grammar.Words["api"] != "API" {
t.Errorf("word 'api' = %q, want 'API'", grammar.Words["api"])
}
if _, ok := grammar.Words["failed"]; ok {
t.Error("deprecated word 'failed' should be ignored")
}
if _, ok := grammar.Words["skipped"]; ok {
t.Error("deprecated word 'skipped' should be ignored")
}
// Punct extracted
if grammar.Punct.LabelSuffix != ":" {

View file

@ -202,6 +202,9 @@ func (t *Tokeniser) buildNounIndex() {
data := i18n.GetGrammarData(t.lang)
if data != nil && data.Nouns != nil {
for base, forms := range data.Nouns {
if skipDeprecatedEnglishGrammarEntry(base) {
continue
}
t.baseNouns[base] = true
if forms.Other != "" && forms.Other != base {
t.pluralToBase[forms.Other] = base
@ -505,6 +508,9 @@ func (t *Tokeniser) buildWordIndex() {
return
}
for key, display := range data.Words {
if skipDeprecatedEnglishGrammarEntry(key) {
continue
}
// Map the key itself (already lowercase)
t.words[core.Lower(key)] = key
// Map the display form (e.g., "URL" → "url", "SSH" → "ssh")
@ -612,6 +618,15 @@ func defaultWeights() map[string]float64 {
}
}
func skipDeprecatedEnglishGrammarEntry(key string) bool {
switch core.Lower(key) {
case "passed", "failed", "skipped":
return true
default:
return false
}
}
// MatchWord performs a case-insensitive lookup in the words map.
// Returns the category key and true if found, or ("", false) otherwise.
func (t *Tokeniser) MatchWord(word string) (string, bool) {

View file

@ -667,6 +667,47 @@ func TestTokeniser_DualClassDetection(t *testing.T) {
}
}
func TestTokeniser_IgnoresDeprecatedGrammarEntries(t *testing.T) {
setup(t)
const lang = "zz-deprecated"
original := i18n.GetGrammarData(lang)
t.Cleanup(func() {
i18n.SetGrammarData(lang, original)
})
i18n.SetGrammarData(lang, &i18n.GrammarData{
Nouns: map[string]i18n.NounForms{
"passed": {One: "passed", Other: "passed"},
"failed": {One: "failed", Other: "failed"},
"skipped": {One: "skipped", Other: "skipped"},
"commit": {One: "commit", Other: "commits"},
},
Words: map[string]string{
"passed": "passed",
"failed": "failed",
"skipped": "skipped",
"url": "URL",
},
})
tok := NewTokeniserForLang(lang)
for _, word := range []string{"passed", "failed", "skipped"} {
if tok.IsDualClass(word) {
t.Fatalf("%q should not be treated as dual-class", word)
}
if cat, ok := tok.MatchWord(word); ok {
t.Fatalf("MatchWord(%q) = %q, %v; want not found", word, cat, ok)
}
if _, ok := tok.MatchNoun(word); ok {
t.Fatalf("MatchNoun(%q) should be ignored", word)
}
}
if cat, ok := tok.MatchWord("url"); !ok || cat != "url" {
t.Fatalf("MatchWord(%q) = %q, %v; want %q, true", "url", cat, ok, "url")
}
}
func TestToken_ConfidenceField(t *testing.T) {
setup(t)
tok := NewTokeniser()