[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #14
2 changed files with 54 additions and 14 deletions
|
|
@ -50,17 +50,17 @@ const (
|
|||
|
||||
// Token represents a single classified token from a text string.
|
||||
type Token struct {
|
||||
Raw string // Original text as it appeared in input
|
||||
Lower string // Lowercased form
|
||||
Type TokenType // Classification
|
||||
Confidence float64 // 0.0-1.0 classification confidence
|
||||
AltType TokenType // Runner-up classification (dual-class only)
|
||||
AltConf float64 // Runner-up confidence
|
||||
VerbInfo VerbMatch // Set when Type OR AltType == TokenVerb
|
||||
NounInfo NounMatch // Set when Type OR AltType == TokenNoun
|
||||
WordCat string // Set when Type == TokenWord
|
||||
ArtType string // Set when Type == TokenArticle
|
||||
PunctType string // Set when Type == TokenPunctuation
|
||||
Raw string // Original text as it appeared in input
|
||||
Lower string // Lowercased form
|
||||
Type TokenType // Classification
|
||||
Confidence float64 // 0.0-1.0 classification confidence
|
||||
AltType TokenType // Runner-up classification (dual-class only)
|
||||
AltConf float64 // Runner-up confidence
|
||||
VerbInfo VerbMatch // Set when Type OR AltType == TokenVerb
|
||||
NounInfo NounMatch // Set when Type OR AltType == TokenNoun
|
||||
WordCat string // Set when Type == TokenWord
|
||||
ArtType string // Set when Type == TokenArticle
|
||||
PunctType string // Set when Type == TokenPunctuation
|
||||
Signals *SignalBreakdown // Non-nil only when WithSignals() option is set
|
||||
}
|
||||
|
||||
|
|
@ -593,6 +593,11 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
if lower == core.Lower(data.Articles.Definite) {
|
||||
return "definite", true
|
||||
}
|
||||
for _, article := range data.Articles.ByGender {
|
||||
if lower == core.Lower(article) {
|
||||
return "definite", true
|
||||
}
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,9 +20,9 @@ func TestTokeniser_MatchVerb_Irregular(t *testing.T) {
|
|||
tok := NewTokeniser()
|
||||
|
||||
tests := []struct {
|
||||
word string
|
||||
wantOK bool
|
||||
wantBase string
|
||||
word string
|
||||
wantOK bool
|
||||
wantBase string
|
||||
wantTense string
|
||||
}{
|
||||
// Irregular past tense
|
||||
|
|
@ -206,6 +206,41 @@ func TestTokeniser_MatchArticle(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_MatchArticle_FrenchGendered(t *testing.T) {
|
||||
setup(t)
|
||||
tok := NewTokeniserForLang("fr")
|
||||
|
||||
tests := []struct {
|
||||
word string
|
||||
wantType string
|
||||
wantOK bool
|
||||
}{
|
||||
{"le", "definite", true},
|
||||
{"la", "definite", true},
|
||||
{"Le", "definite", true},
|
||||
{"La", "definite", true},
|
||||
{"un", "indefinite", true},
|
||||
{"une", "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.word, func(t *testing.T) {
|
||||
artType, ok := tok.MatchArticle(tt.word)
|
||||
if ok != tt.wantOK {
|
||||
t.Fatalf("MatchArticle(%q) ok=%v, want %v", tt.word, ok, tt.wantOK)
|
||||
}
|
||||
if ok && artType != tt.wantType {
|
||||
t.Errorf("MatchArticle(%q) = %q, want %q", tt.word, artType, tt.wantType)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
tokens := tok.Tokenise("la branche")
|
||||
if len(tokens) == 0 || tokens[0].Type != TokenArticle {
|
||||
t.Fatalf("Tokenise(%q)[0] should be TokenArticle, got %#v", "la branche", tokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_Tokenise(t *testing.T) {
|
||||
setup(t)
|
||||
tok := NewTokeniser()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue