[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #72
2 changed files with 61 additions and 0 deletions
|
|
@ -899,6 +899,33 @@ func (t *Tokeniser) matchFrenchArticlePhrase(parts []string, start int) (int, To
|
|||
}
|
||||
return 2, tok, &extra, punctTok
|
||||
}
|
||||
// Handle spaced elision forms such as "de l' enfant" or "de l’ enfant".
|
||||
if (second == "l'" || second == "l’") && start+2 < len(parts) {
|
||||
third, thirdPunct := splitTrailingPunct(parts[start+2])
|
||||
if third != "" {
|
||||
tok := Token{
|
||||
Raw: first + " " + second,
|
||||
Lower: core.Lower(first + " " + second),
|
||||
Type: TokenArticle,
|
||||
ArtType: "definite",
|
||||
Confidence: 1.0,
|
||||
}
|
||||
extra := t.classifyElidedFrenchWord(third)
|
||||
var punctTok *Token
|
||||
if thirdPunct != "" {
|
||||
if punctType, ok := matchPunctuation(thirdPunct); ok {
|
||||
punctTok = &Token{
|
||||
Raw: thirdPunct,
|
||||
Lower: thirdPunct,
|
||||
Type: TokenPunctuation,
|
||||
PunctType: punctType,
|
||||
Confidence: 1.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
return 3, tok, &extra, punctTok
|
||||
}
|
||||
}
|
||||
return 0, Token{}, nil, nil
|
||||
}
|
||||
tok := Token{
|
||||
|
|
|
|||
|
|
@ -356,6 +356,40 @@ func TestTokeniser_Tokenise_FrenchElision(t *testing.T) {
|
|||
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
|
||||
}
|
||||
|
||||
tokens = tok.Tokenise("de l' enfant")
|
||||
if len(tokens) != 2 {
|
||||
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "de l' enfant", len(tokens))
|
||||
}
|
||||
if tokens[0].Type != TokenArticle {
|
||||
t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type)
|
||||
}
|
||||
if tokens[0].Lower != "de l'" {
|
||||
t.Fatalf("tokens[0].Lower = %q, want %q", tokens[0].Lower, "de l'")
|
||||
}
|
||||
if tokens[1].Type != TokenNoun {
|
||||
t.Fatalf("tokens[1].Type = %v, want TokenNoun", tokens[1].Type)
|
||||
}
|
||||
if tokens[1].Lower != "enfant" {
|
||||
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
|
||||
}
|
||||
|
||||
tokens = tok.Tokenise("de l’ enfant")
|
||||
if len(tokens) != 2 {
|
||||
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "de l’ enfant", len(tokens))
|
||||
}
|
||||
if tokens[0].Type != TokenArticle {
|
||||
t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type)
|
||||
}
|
||||
if tokens[0].Lower != "de l’" {
|
||||
t.Fatalf("tokens[0].Lower = %q, want %q", tokens[0].Lower, "de l’")
|
||||
}
|
||||
if tokens[1].Type != TokenNoun {
|
||||
t.Fatalf("tokens[1].Type = %v, want TokenNoun", tokens[1].Type)
|
||||
}
|
||||
if tokens[1].Lower != "enfant" {
|
||||
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
|
||||
}
|
||||
|
||||
tokens = tok.Tokenise("d'enfant")
|
||||
if len(tokens) != 2 {
|
||||
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "d'enfant", len(tokens))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue