fix(i18n): ignore deprecated grammar entries

Co-Authored-By: Virgil <virgil@lethean.io>
2026-04-02 03:03:44 +00:00 · 2026-04-02 03:03:44 +00:00 · eeffe92da0
commit eeffe92da0
parent 6758585c36
4 changed files with 88 additions and 1 deletions
--- a/loader.go
+++ b/loader.go
@ -108,6 +108,9 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa
 		case string:
 			if grammar != nil && core.HasPrefix(fullKey, "gram.word.") {
 				wordKey := core.TrimPrefix(fullKey, "gram.word.")
+				if shouldSkipDeprecatedEnglishGrammarEntry(fullKey) {
+					continue
+				}
 				grammar.Words[core.Lower(wordKey)] = v
 				continue
 			}
@ -142,6 +145,9 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa
 				if after, ok := strings.CutPrefix(fullKey, "gram.noun."); ok {
 					nounName = after
 				}
+				if shouldSkipDeprecatedEnglishGrammarEntry(fullKey) {
+					continue
+				}
 				_, hasOne := v["one"]
 				_, hasOther := v["other"]
 				if hasOne && hasOther {
@ -338,3 +344,13 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) {
 		}
 	}
 }
+
+func shouldSkipDeprecatedEnglishGrammarEntry(fullKey string) bool {
+	switch fullKey {
+	case "gram.noun.passed", "gram.noun.failed", "gram.noun.skipped",
+		"gram.word.passed", "gram.word.failed", "gram.word.skipped":
+		return true
+	default:
+		return false
+	}
+}
--- a/loader_test.go
+++ b/loader_test.go
@ -149,9 +149,15 @@ func TestFlattenWithGrammar(t *testing.T) {
 					"one":   "widget",
 					"other": "widgets",
 				},
+				"passed": map[string]any{
+					"one":   "passed",
+					"other": "passed",
+				},
 			},
 			"word": map[string]any{
-				"api": "API",
+				"api":     "API",
+				"failed":  "failed",
+				"skipped": "skipped",
 			},
 			"punct": map[string]any{
 				"label":    ":",
@ -233,11 +239,20 @@ func TestFlattenWithGrammar(t *testing.T) {
 			t.Errorf("widget.other = %q, want 'widgets'", n.Other)
 		}
 	}
+	if _, ok := grammar.Nouns["passed"]; ok {
+		t.Error("deprecated noun 'passed' should be ignored")
+	}

 	// Word extracted
 	if grammar.Words["api"] != "API" {
 		t.Errorf("word 'api' = %q, want 'API'", grammar.Words["api"])
 	}
+	if _, ok := grammar.Words["failed"]; ok {
+		t.Error("deprecated word 'failed' should be ignored")
+	}
+	if _, ok := grammar.Words["skipped"]; ok {
+		t.Error("deprecated word 'skipped' should be ignored")
+	}

 	// Punct extracted
 	if grammar.Punct.LabelSuffix != ":" {
--- a/reversal/tokeniser.go
+++ b/reversal/tokeniser.go
@ -202,6 +202,9 @@ func (t *Tokeniser) buildNounIndex() {
 	data := i18n.GetGrammarData(t.lang)
 	if data != nil && data.Nouns != nil {
 		for base, forms := range data.Nouns {
+			if skipDeprecatedEnglishGrammarEntry(base) {
+				continue
+			}
 			t.baseNouns[base] = true
 			if forms.Other != "" && forms.Other != base {
 				t.pluralToBase[forms.Other] = base
@ -505,6 +508,9 @@ func (t *Tokeniser) buildWordIndex() {
 		return
 	}
 	for key, display := range data.Words {
+		if skipDeprecatedEnglishGrammarEntry(key) {
+			continue
+		}
 		// Map the key itself (already lowercase)
 		t.words[core.Lower(key)] = key
 		// Map the display form (e.g., "URL" → "url", "SSH" → "ssh")
@ -612,6 +618,15 @@ func defaultWeights() map[string]float64 {
 	}
 }

+func skipDeprecatedEnglishGrammarEntry(key string) bool {
+	switch core.Lower(key) {
+	case "passed", "failed", "skipped":
+		return true
+	default:
+		return false
+	}
+}
+
 // MatchWord performs a case-insensitive lookup in the words map.
 // Returns the category key and true if found, or ("", false) otherwise.
 func (t *Tokeniser) MatchWord(word string) (string, bool) {
--- a/reversal/tokeniser_test.go
+++ b/reversal/tokeniser_test.go
@ -667,6 +667,47 @@ func TestTokeniser_DualClassDetection(t *testing.T) {
 	}
 }

+func TestTokeniser_IgnoresDeprecatedGrammarEntries(t *testing.T) {
+	setup(t)
+
+	const lang = "zz-deprecated"
+	original := i18n.GetGrammarData(lang)
+	t.Cleanup(func() {
+		i18n.SetGrammarData(lang, original)
+	})
+
+	i18n.SetGrammarData(lang, &i18n.GrammarData{
+		Nouns: map[string]i18n.NounForms{
+			"passed":  {One: "passed", Other: "passed"},
+			"failed":  {One: "failed", Other: "failed"},
+			"skipped": {One: "skipped", Other: "skipped"},
+			"commit":  {One: "commit", Other: "commits"},
+		},
+		Words: map[string]string{
+			"passed":  "passed",
+			"failed":  "failed",
+			"skipped": "skipped",
+			"url":     "URL",
+		},
+	})
+
+	tok := NewTokeniserForLang(lang)
+	for _, word := range []string{"passed", "failed", "skipped"} {
+		if tok.IsDualClass(word) {
+			t.Fatalf("%q should not be treated as dual-class", word)
+		}
+		if cat, ok := tok.MatchWord(word); ok {
+			t.Fatalf("MatchWord(%q) = %q, %v; want not found", word, cat, ok)
+		}
+		if _, ok := tok.MatchNoun(word); ok {
+			t.Fatalf("MatchNoun(%q) should be ignored", word)
+		}
+	}
+	if cat, ok := tok.MatchWord("url"); !ok || cat != "url" {
+		t.Fatalf("MatchWord(%q) = %q, %v; want %q, true", "url", cat, ok, "url")
+	}
+}
+
 func TestToken_ConfidenceField(t *testing.T) {
 	setup(t)
 	tok := NewTokeniser()