fix(loader): validate signal priors
Some checks are pending
Security Scan / security (push) Waiting to run
Test / test (push) Waiting to run

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-04-03 07:47:28 +00:00
parent dd9d0832af
commit d3a12bfe74
3 changed files with 15 additions and 2 deletions

View file

@ -3,6 +3,7 @@ package i18n
import (
"errors"
"io/fs"
"math"
"path"
"slices"
"sync"
@ -407,7 +408,7 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) {
}
for role, value := range bucket {
score, ok := float64Value(value)
if !ok {
if !ok || !validSignalPriorScore(score) {
continue
}
grammar.Signals.Priors[key][core.Lower(role)] = score
@ -415,6 +416,10 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) {
}
}
func validSignalPriorScore(score float64) bool {
return !math.IsNaN(score) && !math.IsInf(score, 0) && score >= 0
}
func float64Value(v any) (float64, bool) {
if v == nil {
return 0, false

View file

@ -17,6 +17,7 @@ package reversal
import (
"maps"
"math"
"strings"
"unicode/utf8"
@ -1485,6 +1486,9 @@ func (t *Tokeniser) corpusPrior(word string) (float64, float64, bool) {
}
verb := bucket["verb"]
noun := bucket["noun"]
if !validSignalPriorScore(verb) || !validSignalPriorScore(noun) {
return 0, 0, false
}
total := verb + noun
if total <= 0 {
return 0, 0, false
@ -1492,6 +1496,10 @@ func (t *Tokeniser) corpusPrior(word string) (float64, float64, bool) {
return verb / total, noun / total, true
}
func validSignalPriorScore(score float64) bool {
return !math.IsNaN(score) && !math.IsInf(score, 0) && score >= 0
}
// hasConfidentVerbInClause scans for a confident verb (Confidence >= 1.0)
// within the same clause as the token at idx. Clause boundaries are
// punctuation tokens and clause-boundary conjunctions/subordinators.

View file

@ -261,7 +261,7 @@ type SignalData struct {
VerbAuxiliaries []string // Auxiliaries/modals before verbs: "is", "was", "will", ...
VerbInfinitive []string // Infinitive markers: "to"
VerbNegation []string // Negation cues that weakly signal a verb: "not", "never", ...
Priors map[string]map[string]float64 // Corpus-derived verb/noun priors for ambiguous words.
Priors map[string]map[string]float64 // Corpus-derived verb/noun priors for ambiguous words, consumed by the reversal tokeniser.
}
// --- Number Formatting ---