From 10e95cfdd21b84d184f3dd510224e09513320883 Mon Sep 17 00:00:00 2001 From: Virgil Date: Thu, 2 Apr 2026 03:22:43 +0000 Subject: [PATCH] fix(tokeniser): preserve default signal weights Co-Authored-By: Virgil --- reversal/tokeniser.go | 8 ++++---- reversal/tokeniser_test.go | 12 ++++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index c4b38c4..7192b17 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -113,16 +113,16 @@ func WithSignals() TokeniserOption { } // WithWeights overrides the default signal weights for disambiguation. -// All signal keys must be present; omitted keys silently disable those signals. +// Omitted keys keep their default weights so partial overrides stay safe. func WithWeights(w map[string]float64) TokeniserOption { return func(t *Tokeniser) { if len(w) == 0 { t.weights = nil return } - // Copy the map so callers can safely reuse or mutate their input after - // constructing the tokeniser. - copied := make(map[string]float64, len(w)) + // Start from the defaults so callers can override only the weights they + // care about without accidentally disabling the rest of the signal set. + copied := defaultWeights() for key, value := range w { copied[key] = value } diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index dd90f60..0a0a322 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -977,6 +977,18 @@ func TestWithWeights_CopiesInputMap(t *testing.T) { } } +func TestWithWeights_PartialOverrideKeepsDefaults(t *testing.T) { + setup(t) + tok := NewTokeniser(WithWeights(map[string]float64{ + "verb_auxiliary": 0.25, + })) + + tokens := tok.Tokenise("The commit") + if tokens[1].Type != TokenNoun { + t.Fatalf("with partial weights, 'commit' Type = %v, want TokenNoun", tokens[1].Type) + } +} + // --- Benchmarks --- func benchSetup(b *testing.B) { -- 2.45.3