From eeffe92da0fb7ea69696f5bbbabe30eb2c0d02a1 Mon Sep 17 00:00:00 2001 From: Virgil Date: Thu, 2 Apr 2026 03:03:44 +0000 Subject: [PATCH] fix(i18n): ignore deprecated grammar entries Co-Authored-By: Virgil --- loader.go | 16 +++++++++++++++ loader_test.go | 17 +++++++++++++++- reversal/tokeniser.go | 15 ++++++++++++++ reversal/tokeniser_test.go | 41 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/loader.go b/loader.go index f1fbe06..8d131af 100644 --- a/loader.go +++ b/loader.go @@ -108,6 +108,9 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa case string: if grammar != nil && core.HasPrefix(fullKey, "gram.word.") { wordKey := core.TrimPrefix(fullKey, "gram.word.") + if shouldSkipDeprecatedEnglishGrammarEntry(fullKey) { + continue + } grammar.Words[core.Lower(wordKey)] = v continue } @@ -142,6 +145,9 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa if after, ok := strings.CutPrefix(fullKey, "gram.noun."); ok { nounName = after } + if shouldSkipDeprecatedEnglishGrammarEntry(fullKey) { + continue + } _, hasOne := v["one"] _, hasOther := v["other"] if hasOne && hasOther { @@ -338,3 +344,13 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) { } } } + +func shouldSkipDeprecatedEnglishGrammarEntry(fullKey string) bool { + switch fullKey { + case "gram.noun.passed", "gram.noun.failed", "gram.noun.skipped", + "gram.word.passed", "gram.word.failed", "gram.word.skipped": + return true + default: + return false + } +} diff --git a/loader_test.go b/loader_test.go index b51fdfe..f740dd7 100644 --- a/loader_test.go +++ b/loader_test.go @@ -149,9 +149,15 @@ func TestFlattenWithGrammar(t *testing.T) { "one": "widget", "other": "widgets", }, + "passed": map[string]any{ + "one": "passed", + "other": "passed", + }, }, "word": map[string]any{ - "api": "API", + "api": "API", + "failed": "failed", + "skipped": "skipped", }, "punct": map[string]any{ "label": ":", @@ -233,11 +239,20 @@ func TestFlattenWithGrammar(t *testing.T) { t.Errorf("widget.other = %q, want 'widgets'", n.Other) } } + if _, ok := grammar.Nouns["passed"]; ok { + t.Error("deprecated noun 'passed' should be ignored") + } // Word extracted if grammar.Words["api"] != "API" { t.Errorf("word 'api' = %q, want 'API'", grammar.Words["api"]) } + if _, ok := grammar.Words["failed"]; ok { + t.Error("deprecated word 'failed' should be ignored") + } + if _, ok := grammar.Words["skipped"]; ok { + t.Error("deprecated word 'skipped' should be ignored") + } // Punct extracted if grammar.Punct.LabelSuffix != ":" { diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index a36169d..dcb91e0 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -202,6 +202,9 @@ func (t *Tokeniser) buildNounIndex() { data := i18n.GetGrammarData(t.lang) if data != nil && data.Nouns != nil { for base, forms := range data.Nouns { + if skipDeprecatedEnglishGrammarEntry(base) { + continue + } t.baseNouns[base] = true if forms.Other != "" && forms.Other != base { t.pluralToBase[forms.Other] = base @@ -505,6 +508,9 @@ func (t *Tokeniser) buildWordIndex() { return } for key, display := range data.Words { + if skipDeprecatedEnglishGrammarEntry(key) { + continue + } // Map the key itself (already lowercase) t.words[core.Lower(key)] = key // Map the display form (e.g., "URL" → "url", "SSH" → "ssh") @@ -612,6 +618,15 @@ func defaultWeights() map[string]float64 { } } +func skipDeprecatedEnglishGrammarEntry(key string) bool { + switch core.Lower(key) { + case "passed", "failed", "skipped": + return true + default: + return false + } +} + // MatchWord performs a case-insensitive lookup in the words map. // Returns the category key and true if found, or ("", false) otherwise. func (t *Tokeniser) MatchWord(word string) (string, bool) { diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index 05a9364..840a87c 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -667,6 +667,47 @@ func TestTokeniser_DualClassDetection(t *testing.T) { } } +func TestTokeniser_IgnoresDeprecatedGrammarEntries(t *testing.T) { + setup(t) + + const lang = "zz-deprecated" + original := i18n.GetGrammarData(lang) + t.Cleanup(func() { + i18n.SetGrammarData(lang, original) + }) + + i18n.SetGrammarData(lang, &i18n.GrammarData{ + Nouns: map[string]i18n.NounForms{ + "passed": {One: "passed", Other: "passed"}, + "failed": {One: "failed", Other: "failed"}, + "skipped": {One: "skipped", Other: "skipped"}, + "commit": {One: "commit", Other: "commits"}, + }, + Words: map[string]string{ + "passed": "passed", + "failed": "failed", + "skipped": "skipped", + "url": "URL", + }, + }) + + tok := NewTokeniserForLang(lang) + for _, word := range []string{"passed", "failed", "skipped"} { + if tok.IsDualClass(word) { + t.Fatalf("%q should not be treated as dual-class", word) + } + if cat, ok := tok.MatchWord(word); ok { + t.Fatalf("MatchWord(%q) = %q, %v; want not found", word, cat, ok) + } + if _, ok := tok.MatchNoun(word); ok { + t.Fatalf("MatchNoun(%q) should be ignored", word) + } + } + if cat, ok := tok.MatchWord("url"); !ok || cat != "url" { + t.Fatalf("MatchWord(%q) = %q, %v; want %q, true", "url", cat, ok, "url") + } +} + func TestToken_ConfidenceField(t *testing.T) { setup(t) tok := NewTokeniser()