[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #91
2 changed files with 39 additions and 3 deletions
|
|
@ -115,7 +115,19 @@ func WithSignals() TokeniserOption {
|
|||
// WithWeights overrides the default signal weights for disambiguation.
|
||||
// All signal keys must be present; omitted keys silently disable those signals.
|
||||
func WithWeights(w map[string]float64) TokeniserOption {
|
||||
return func(t *Tokeniser) { t.weights = w }
|
||||
return func(t *Tokeniser) {
|
||||
if len(w) == 0 {
|
||||
t.weights = nil
|
||||
return
|
||||
}
|
||||
// Copy the map so callers can safely reuse or mutate their input after
|
||||
// constructing the tokeniser.
|
||||
copied := make(map[string]float64, len(w))
|
||||
for key, value := range w {
|
||||
copied[key] = value
|
||||
}
|
||||
t.weights = copied
|
||||
}
|
||||
}
|
||||
|
||||
// NewTokeniser creates a Tokeniser for English ("en").
|
||||
|
|
|
|||
|
|
@ -896,7 +896,7 @@ func TestDisambiguationStats_NoAmbiguous(t *testing.T) {
|
|||
func TestWithWeights_Override(t *testing.T) {
|
||||
setup(t)
|
||||
// Override noun_determiner to 0 — "The commit" should no longer resolve as noun
|
||||
tok := NewTokeniser(WithWeights(map[string]float64{
|
||||
weights := map[string]float64{
|
||||
"noun_determiner": 0.0,
|
||||
"verb_auxiliary": 0.25,
|
||||
"following_class": 0.15,
|
||||
|
|
@ -904,7 +904,8 @@ func TestWithWeights_Override(t *testing.T) {
|
|||
"verb_saturation": 0.10,
|
||||
"inflection_echo": 0.03,
|
||||
"default_prior": 0.02,
|
||||
}))
|
||||
}
|
||||
tok := NewTokeniser(WithWeights(weights))
|
||||
tokens := tok.Tokenise("The commit")
|
||||
// With noun_determiner zeroed, default_prior (verb) should win
|
||||
if tokens[1].Type != TokenVerb {
|
||||
|
|
@ -912,6 +913,29 @@ func TestWithWeights_Override(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestWithWeights_CopiesInputMap(t *testing.T) {
|
||||
setup(t)
|
||||
weights := map[string]float64{
|
||||
"noun_determiner": 0.35,
|
||||
"verb_auxiliary": 0.25,
|
||||
"following_class": 0.15,
|
||||
"sentence_position": 0.10,
|
||||
"verb_saturation": 0.10,
|
||||
"inflection_echo": 0.03,
|
||||
"default_prior": 0.02,
|
||||
}
|
||||
tok := NewTokeniser(WithWeights(weights))
|
||||
|
||||
// Mutate the caller's map after construction; the tokeniser should keep
|
||||
// using the original copied values.
|
||||
weights["noun_determiner"] = 0
|
||||
|
||||
tokens := tok.Tokenise("The commit")
|
||||
if tokens[1].Type != TokenNoun {
|
||||
t.Fatalf("with copied weights, 'commit' Type = %v, want TokenNoun", tokens[1].Type)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Benchmarks ---
|
||||
|
||||
func benchSetup(b *testing.B) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue