[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #91

Merged
Virgil merged 1 commit from agent/read---spec-code-core-go-i18n-rfc-md-ful into dev 2026-04-02 02:50:45 +00:00
2 changed files with 39 additions and 3 deletions

View file

@ -115,7 +115,19 @@ func WithSignals() TokeniserOption {
// WithWeights overrides the default signal weights for disambiguation.
// All signal keys must be present; omitted keys silently disable those signals.
func WithWeights(w map[string]float64) TokeniserOption {
return func(t *Tokeniser) { t.weights = w }
return func(t *Tokeniser) {
if len(w) == 0 {
t.weights = nil
return
}
// Copy the map so callers can safely reuse or mutate their input after
// constructing the tokeniser.
copied := make(map[string]float64, len(w))
for key, value := range w {
copied[key] = value
}
t.weights = copied
}
}
// NewTokeniser creates a Tokeniser for English ("en").

View file

@ -896,7 +896,7 @@ func TestDisambiguationStats_NoAmbiguous(t *testing.T) {
func TestWithWeights_Override(t *testing.T) {
setup(t)
// Override noun_determiner to 0 — "The commit" should no longer resolve as noun
tok := NewTokeniser(WithWeights(map[string]float64{
weights := map[string]float64{
"noun_determiner": 0.0,
"verb_auxiliary": 0.25,
"following_class": 0.15,
@ -904,7 +904,8 @@ func TestWithWeights_Override(t *testing.T) {
"verb_saturation": 0.10,
"inflection_echo": 0.03,
"default_prior": 0.02,
}))
}
tok := NewTokeniser(WithWeights(weights))
tokens := tok.Tokenise("The commit")
// With noun_determiner zeroed, default_prior (verb) should win
if tokens[1].Type != TokenVerb {
@ -912,6 +913,29 @@ func TestWithWeights_Override(t *testing.T) {
}
}
func TestWithWeights_CopiesInputMap(t *testing.T) {
setup(t)
weights := map[string]float64{
"noun_determiner": 0.35,
"verb_auxiliary": 0.25,
"following_class": 0.15,
"sentence_position": 0.10,
"verb_saturation": 0.10,
"inflection_echo": 0.03,
"default_prior": 0.02,
}
tok := NewTokeniser(WithWeights(weights))
// Mutate the caller's map after construction; the tokeniser should keep
// using the original copied values.
weights["noun_determiner"] = 0
tokens := tok.Tokenise("The commit")
if tokens[1].Type != TokenNoun {
t.Fatalf("with copied weights, 'commit' Type = %v, want TokenNoun", tokens[1].Type)
}
}
// --- Benchmarks ---
func benchSetup(b *testing.B) {