[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #14

Merged
Virgil merged 1 commit from agent/read---spec-code-core-go-i18n-rfc-md-ful into dev 2026-04-01 05:10:57 +00:00
2 changed files with 54 additions and 14 deletions

View file

@ -50,17 +50,17 @@ const (
// Token represents a single classified token from a text string.
type Token struct {
Raw string // Original text as it appeared in input
Lower string // Lowercased form
Type TokenType // Classification
Confidence float64 // 0.0-1.0 classification confidence
AltType TokenType // Runner-up classification (dual-class only)
AltConf float64 // Runner-up confidence
VerbInfo VerbMatch // Set when Type OR AltType == TokenVerb
NounInfo NounMatch // Set when Type OR AltType == TokenNoun
WordCat string // Set when Type == TokenWord
ArtType string // Set when Type == TokenArticle
PunctType string // Set when Type == TokenPunctuation
Raw string // Original text as it appeared in input
Lower string // Lowercased form
Type TokenType // Classification
Confidence float64 // 0.0-1.0 classification confidence
AltType TokenType // Runner-up classification (dual-class only)
AltConf float64 // Runner-up confidence
VerbInfo VerbMatch // Set when Type OR AltType == TokenVerb
NounInfo NounMatch // Set when Type OR AltType == TokenNoun
WordCat string // Set when Type == TokenWord
ArtType string // Set when Type == TokenArticle
PunctType string // Set when Type == TokenPunctuation
Signals *SignalBreakdown // Non-nil only when WithSignals() option is set
}
@ -593,6 +593,11 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
if lower == core.Lower(data.Articles.Definite) {
return "definite", true
}
for _, article := range data.Articles.ByGender {
if lower == core.Lower(article) {
return "definite", true
}
}
return "", false
}

View file

@ -20,9 +20,9 @@ func TestTokeniser_MatchVerb_Irregular(t *testing.T) {
tok := NewTokeniser()
tests := []struct {
word string
wantOK bool
wantBase string
word string
wantOK bool
wantBase string
wantTense string
}{
// Irregular past tense
@ -206,6 +206,41 @@ func TestTokeniser_MatchArticle(t *testing.T) {
}
}
func TestTokeniser_MatchArticle_FrenchGendered(t *testing.T) {
setup(t)
tok := NewTokeniserForLang("fr")
tests := []struct {
word string
wantType string
wantOK bool
}{
{"le", "definite", true},
{"la", "definite", true},
{"Le", "definite", true},
{"La", "definite", true},
{"un", "indefinite", true},
{"une", "", false},
}
for _, tt := range tests {
t.Run(tt.word, func(t *testing.T) {
artType, ok := tok.MatchArticle(tt.word)
if ok != tt.wantOK {
t.Fatalf("MatchArticle(%q) ok=%v, want %v", tt.word, ok, tt.wantOK)
}
if ok && artType != tt.wantType {
t.Errorf("MatchArticle(%q) = %q, want %q", tt.word, artType, tt.wantType)
}
})
}
tokens := tok.Tokenise("la branche")
if len(tokens) == 0 || tokens[0].Type != TokenArticle {
t.Fatalf("Tokenise(%q)[0] should be TokenArticle, got %#v", "la branche", tokens)
}
}
func TestTokeniser_Tokenise(t *testing.T) {
setup(t)
tok := NewTokeniser()