fix(loader): validate signal priors
Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
dd9d0832af
commit
d3a12bfe74
3 changed files with 15 additions and 2 deletions
|
|
@ -3,6 +3,7 @@ package i18n
|
|||
import (
|
||||
"errors"
|
||||
"io/fs"
|
||||
"math"
|
||||
"path"
|
||||
"slices"
|
||||
"sync"
|
||||
|
|
@ -407,7 +408,7 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) {
|
|||
}
|
||||
for role, value := range bucket {
|
||||
score, ok := float64Value(value)
|
||||
if !ok {
|
||||
if !ok || !validSignalPriorScore(score) {
|
||||
continue
|
||||
}
|
||||
grammar.Signals.Priors[key][core.Lower(role)] = score
|
||||
|
|
@ -415,6 +416,10 @@ func loadSignalPriors(grammar *GrammarData, priors map[string]any) {
|
|||
}
|
||||
}
|
||||
|
||||
func validSignalPriorScore(score float64) bool {
|
||||
return !math.IsNaN(score) && !math.IsInf(score, 0) && score >= 0
|
||||
}
|
||||
|
||||
func float64Value(v any) (float64, bool) {
|
||||
if v == nil {
|
||||
return 0, false
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ package reversal
|
|||
|
||||
import (
|
||||
"maps"
|
||||
"math"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
|
|
@ -1485,6 +1486,9 @@ func (t *Tokeniser) corpusPrior(word string) (float64, float64, bool) {
|
|||
}
|
||||
verb := bucket["verb"]
|
||||
noun := bucket["noun"]
|
||||
if !validSignalPriorScore(verb) || !validSignalPriorScore(noun) {
|
||||
return 0, 0, false
|
||||
}
|
||||
total := verb + noun
|
||||
if total <= 0 {
|
||||
return 0, 0, false
|
||||
|
|
@ -1492,6 +1496,10 @@ func (t *Tokeniser) corpusPrior(word string) (float64, float64, bool) {
|
|||
return verb / total, noun / total, true
|
||||
}
|
||||
|
||||
func validSignalPriorScore(score float64) bool {
|
||||
return !math.IsNaN(score) && !math.IsInf(score, 0) && score >= 0
|
||||
}
|
||||
|
||||
// hasConfidentVerbInClause scans for a confident verb (Confidence >= 1.0)
|
||||
// within the same clause as the token at idx. Clause boundaries are
|
||||
// punctuation tokens and clause-boundary conjunctions/subordinators.
|
||||
|
|
|
|||
2
types.go
2
types.go
|
|
@ -261,7 +261,7 @@ type SignalData struct {
|
|||
VerbAuxiliaries []string // Auxiliaries/modals before verbs: "is", "was", "will", ...
|
||||
VerbInfinitive []string // Infinitive markers: "to"
|
||||
VerbNegation []string // Negation cues that weakly signal a verb: "not", "never", ...
|
||||
Priors map[string]map[string]float64 // Corpus-derived verb/noun priors for ambiguous words.
|
||||
Priors map[string]map[string]float64 // Corpus-derived verb/noun priors for ambiguous words, consumed by the reversal tokeniser.
|
||||
}
|
||||
|
||||
// --- Number Formatting ---
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue