feat(grammar): add SignalData for disambiguation signals
Load noun_determiner, verb_auxiliary, and verb_infinitive word lists from gram.signal in locale JSON. Reserve Priors field for future corpus-derived per-word disambiguation priors. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d7fc2cda7d
commit
cb7404456f
4 changed files with 91 additions and 0 deletions
|
|
@ -346,6 +346,39 @@ func TestActionFailed(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestGrammarData_Signals(t *testing.T) {
|
||||
svc, err := New()
|
||||
if err != nil {
|
||||
t.Fatalf("New() failed: %v", err)
|
||||
}
|
||||
SetDefault(svc)
|
||||
|
||||
data := GetGrammarData("en")
|
||||
if data == nil {
|
||||
t.Fatal("GetGrammarData(\"en\") returned nil")
|
||||
}
|
||||
if len(data.Signals.NounDeterminers) == 0 {
|
||||
t.Error("Signals.NounDeterminers is empty")
|
||||
}
|
||||
if len(data.Signals.VerbAuxiliaries) == 0 {
|
||||
t.Error("Signals.VerbAuxiliaries is empty")
|
||||
}
|
||||
if len(data.Signals.VerbInfinitive) == 0 {
|
||||
t.Error("Signals.VerbInfinitive is empty")
|
||||
}
|
||||
|
||||
// Spot-check known values
|
||||
found := false
|
||||
for _, d := range data.Signals.NounDeterminers {
|
||||
if d == "the" {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Error("NounDeterminers missing 'the'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTemplateFuncs(t *testing.T) {
|
||||
funcs := TemplateFuncs()
|
||||
expected := []string{"title", "lower", "upper", "past", "gerund", "plural", "pluralForm", "article", "quote"}
|
||||
|
|
|
|||
32
loader.go
32
loader.go
|
|
@ -154,6 +154,38 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa
|
|||
}
|
||||
}
|
||||
|
||||
// Signal data for disambiguation
|
||||
if grammar != nil && fullKey == "gram.signal" {
|
||||
if nd, ok := v["noun_determiner"]; ok {
|
||||
if arr, ok := nd.([]any); ok {
|
||||
for _, item := range arr {
|
||||
if s, ok := item.(string); ok {
|
||||
grammar.Signals.NounDeterminers = append(grammar.Signals.NounDeterminers, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if va, ok := v["verb_auxiliary"]; ok {
|
||||
if arr, ok := va.([]any); ok {
|
||||
for _, item := range arr {
|
||||
if s, ok := item.(string); ok {
|
||||
grammar.Signals.VerbAuxiliaries = append(grammar.Signals.VerbAuxiliaries, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if vi, ok := v["verb_infinitive"]; ok {
|
||||
if arr, ok := vi.([]any); ok {
|
||||
for _, item := range arr {
|
||||
if s, ok := item.(string); ok {
|
||||
grammar.Signals.VerbInfinitive = append(grammar.Signals.VerbInfinitive, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Article configuration
|
||||
if grammar != nil && fullKey == "gram.article" {
|
||||
if indef, ok := v["indefinite"].(map[string]any); ok {
|
||||
|
|
|
|||
|
|
@ -110,6 +110,23 @@
|
|||
"label": ":",
|
||||
"progress": "..."
|
||||
},
|
||||
"signal": {
|
||||
"noun_determiner": [
|
||||
"the", "a", "an",
|
||||
"this", "that", "these", "those",
|
||||
"my", "your", "his", "her", "its", "our", "their",
|
||||
"every", "each", "some", "any", "no",
|
||||
"many", "few", "several", "all", "both"
|
||||
],
|
||||
"verb_auxiliary": [
|
||||
"is", "are", "was", "were",
|
||||
"has", "had", "have",
|
||||
"do", "does", "did",
|
||||
"will", "would", "could", "should",
|
||||
"can", "may", "might", "shall", "must"
|
||||
],
|
||||
"verb_infinitive": ["to"]
|
||||
},
|
||||
"number": {
|
||||
"thousands": ",",
|
||||
"decimal": ".",
|
||||
|
|
|
|||
9
types.go
9
types.go
|
|
@ -192,6 +192,7 @@ type GrammarData struct {
|
|||
Articles ArticleForms // article configuration
|
||||
Words map[string]string // base word translations
|
||||
Punct PunctuationRules // language-specific punctuation
|
||||
Signals SignalData // disambiguation signal word lists
|
||||
}
|
||||
|
||||
// VerbForms holds verb conjugations.
|
||||
|
|
@ -221,6 +222,14 @@ type PunctuationRules struct {
|
|||
ProgressSuffix string // "..."
|
||||
}
|
||||
|
||||
// SignalData holds word lists used for disambiguation signals.
|
||||
type SignalData struct {
|
||||
NounDeterminers []string // Words that precede nouns: "the", "a", "this", "my", ...
|
||||
VerbAuxiliaries []string // Auxiliaries/modals before verbs: "is", "was", "will", ...
|
||||
VerbInfinitive []string // Infinitive markers: "to"
|
||||
Priors map[string]map[string]float64 // Reserved: per-word priors {"commit": {"verb": 0.4, "noun": 0.6}}
|
||||
}
|
||||
|
||||
// --- Number Formatting ---
|
||||
|
||||
// NumberFormat defines locale-specific number formatting rules.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue