From d3a12bfe745c1e80cb21348f41e334d32b51e0cd Mon Sep 17 00:00:00 2001 From: Virgil Date: Fri, 3 Apr 2026 07:47:28 +0000 Subject: [PATCH] fix(loader): validate signal priors Co-Authored-By: Virgil --- loader.go | 7 ++++++- reversal/tokeniser.go | 8 ++++++++ types.go | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/loader.go b/loader.go index 3dbe1fc..f6654f4 100644 --- a/loader.go +++ b/loader.go @@ -3,6 +3,7 @@ package i18n import ( "errors" "io/fs" + "math" "path" "slices" "sync" @@ -407,7 +408,7 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) { } for role, value := range bucket { score, ok := float64Value(value) - if !ok { + if !ok || !validSignalPriorScore(score) { continue } grammar.Signals.Priors[key][core.Lower(role)] = score @@ -415,6 +416,10 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) { } } +func validSignalPriorScore(score float64) bool { + return !math.IsNaN(score) && !math.IsInf(score, 0) && score >= 0 +} + func float64Value(v any) (float64, bool) { if v == nil { return 0, false diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index 98bb9f3..e0a12bf 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -17,6 +17,7 @@ package reversal import ( "maps" + "math" "strings" "unicode/utf8" @@ -1485,6 +1486,9 @@ func (t *Tokeniser) corpusPrior(word string) (float64, float64, bool) { } verb := bucket["verb"] noun := bucket["noun"] + if !validSignalPriorScore(verb) || !validSignalPriorScore(noun) { + return 0, 0, false + } total := verb + noun if total <= 0 { return 0, 0, false @@ -1492,6 +1496,10 @@ func (t *Tokeniser) corpusPrior(word string) (float64, float64, bool) { return verb / total, noun / total, true } +func validSignalPriorScore(score float64) bool { + return !math.IsNaN(score) && !math.IsInf(score, 0) && score >= 0 +} + // hasConfidentVerbInClause scans for a confident verb (Confidence >= 1.0) // within the same clause as the token at idx. Clause boundaries are // punctuation tokens and clause-boundary conjunctions/subordinators. diff --git a/types.go b/types.go index 9375341..f688a39 100644 --- a/types.go +++ b/types.go @@ -261,7 +261,7 @@ type SignalData struct { VerbAuxiliaries []string // Auxiliaries/modals before verbs: "is", "was", "will", ... VerbInfinitive []string // Infinitive markers: "to" VerbNegation []string // Negation cues that weakly signal a verb: "not", "never", ... - Priors map[string]map[string]float64 // Corpus-derived verb/noun priors for ambiguous words. + Priors map[string]map[string]float64 // Corpus-derived verb/noun priors for ambiguous words, consumed by the reversal tokeniser. } // --- Number Formatting ---