From cb7404456ffb5ed439cfbe8fd4c4839abd67bc29 Mon Sep 17 00:00:00 2001 From: Snider Date: Thu, 19 Feb 2026 15:50:45 +0000 Subject: [PATCH] feat(grammar): add SignalData for disambiguation signals Load noun_determiner, verb_auxiliary, and verb_infinitive word lists from gram.signal in locale JSON. Reserve Priors field for future corpus-derived per-word disambiguation priors. Co-Authored-By: Virgil Co-Authored-By: Claude Opus 4.6 --- grammar_test.go | 33 +++++++++++++++++++++++++++++++++ loader.go | 32 ++++++++++++++++++++++++++++++++ locales/en.json | 17 +++++++++++++++++ types.go | 9 +++++++++ 4 files changed, 91 insertions(+) diff --git a/grammar_test.go b/grammar_test.go index 6a62526..c6eeea3 100644 --- a/grammar_test.go +++ b/grammar_test.go @@ -346,6 +346,39 @@ func TestActionFailed(t *testing.T) { } } +func TestGrammarData_Signals(t *testing.T) { + svc, err := New() + if err != nil { + t.Fatalf("New() failed: %v", err) + } + SetDefault(svc) + + data := GetGrammarData("en") + if data == nil { + t.Fatal("GetGrammarData(\"en\") returned nil") + } + if len(data.Signals.NounDeterminers) == 0 { + t.Error("Signals.NounDeterminers is empty") + } + if len(data.Signals.VerbAuxiliaries) == 0 { + t.Error("Signals.VerbAuxiliaries is empty") + } + if len(data.Signals.VerbInfinitive) == 0 { + t.Error("Signals.VerbInfinitive is empty") + } + + // Spot-check known values + found := false + for _, d := range data.Signals.NounDeterminers { + if d == "the" { + found = true + } + } + if !found { + t.Error("NounDeterminers missing 'the'") + } +} + func TestTemplateFuncs(t *testing.T) { funcs := TemplateFuncs() expected := []string{"title", "lower", "upper", "past", "gerund", "plural", "pluralForm", "article", "quote"} diff --git a/loader.go b/loader.go index 1173fef..898afa5 100644 --- a/loader.go +++ b/loader.go @@ -154,6 +154,38 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa } } + // Signal data for disambiguation + if grammar != nil && fullKey == "gram.signal" { + if nd, ok := v["noun_determiner"]; ok { + if arr, ok := nd.([]any); ok { + for _, item := range arr { + if s, ok := item.(string); ok { + grammar.Signals.NounDeterminers = append(grammar.Signals.NounDeterminers, s) + } + } + } + } + if va, ok := v["verb_auxiliary"]; ok { + if arr, ok := va.([]any); ok { + for _, item := range arr { + if s, ok := item.(string); ok { + grammar.Signals.VerbAuxiliaries = append(grammar.Signals.VerbAuxiliaries, s) + } + } + } + } + if vi, ok := v["verb_infinitive"]; ok { + if arr, ok := vi.([]any); ok { + for _, item := range arr { + if s, ok := item.(string); ok { + grammar.Signals.VerbInfinitive = append(grammar.Signals.VerbInfinitive, s) + } + } + } + } + continue + } + // Article configuration if grammar != nil && fullKey == "gram.article" { if indef, ok := v["indefinite"].(map[string]any); ok { diff --git a/locales/en.json b/locales/en.json index 92feb45..27989b3 100644 --- a/locales/en.json +++ b/locales/en.json @@ -110,6 +110,23 @@ "label": ":", "progress": "..." }, + "signal": { + "noun_determiner": [ + "the", "a", "an", + "this", "that", "these", "those", + "my", "your", "his", "her", "its", "our", "their", + "every", "each", "some", "any", "no", + "many", "few", "several", "all", "both" + ], + "verb_auxiliary": [ + "is", "are", "was", "were", + "has", "had", "have", + "do", "does", "did", + "will", "would", "could", "should", + "can", "may", "might", "shall", "must" + ], + "verb_infinitive": ["to"] + }, "number": { "thousands": ",", "decimal": ".", diff --git a/types.go b/types.go index 1f5ba9a..a001bb4 100644 --- a/types.go +++ b/types.go @@ -192,6 +192,7 @@ type GrammarData struct { Articles ArticleForms // article configuration Words map[string]string // base word translations Punct PunctuationRules // language-specific punctuation + Signals SignalData // disambiguation signal word lists } // VerbForms holds verb conjugations. @@ -221,6 +222,14 @@ type PunctuationRules struct { ProgressSuffix string // "..." } +// SignalData holds word lists used for disambiguation signals. +type SignalData struct { + NounDeterminers []string // Words that precede nouns: "the", "a", "this", "my", ... + VerbAuxiliaries []string // Auxiliaries/modals before verbs: "is", "was", "will", ... + VerbInfinitive []string // Infinitive markers: "to" + Priors map[string]map[string]float64 // Reserved: per-word priors {"commit": {"verb": 0.4, "noun": 0.6}} +} + // --- Number Formatting --- // NumberFormat defines locale-specific number formatting rules.