[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #120
2 changed files with 38 additions and 3 deletions
|
|
@ -671,7 +671,7 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
if base, _ := splitTrailingPunct(word); base != "" {
|
||||
word = base
|
||||
}
|
||||
lower := core.Lower(word)
|
||||
lower := normalizeFrenchApostrophes(core.Lower(word))
|
||||
|
||||
if artType, ok := matchConfiguredArticleText(lower, data); ok {
|
||||
return artType, true
|
||||
|
|
@ -705,6 +705,7 @@ func matchConfiguredArticleText(lower string, data *i18n.GrammarData) (string, b
|
|||
if data == nil {
|
||||
return "", false
|
||||
}
|
||||
lower = normalizeFrenchApostrophes(lower)
|
||||
|
||||
if lower == core.Lower(data.Articles.IndefiniteDefault) ||
|
||||
lower == core.Lower(data.Articles.IndefiniteVowel) {
|
||||
|
|
@ -742,6 +743,7 @@ func matchConfiguredArticleText(lower string, data *i18n.GrammarData) (string, b
|
|||
}
|
||||
|
||||
func matchFrenchLeadingArticlePhrase(lower string) (string, bool) {
|
||||
lower = normalizeFrenchApostrophes(lower)
|
||||
switch {
|
||||
case lower == "le", lower == "la", lower == "les",
|
||||
lower == "l'", lower == "l’", lower == "au", lower == "aux":
|
||||
|
|
@ -777,8 +779,9 @@ func matchFrenchLeadingArticlePhrase(lower string) (string, bool) {
|
|||
}
|
||||
|
||||
func matchFrenchArticleText(lower string) (string, bool) {
|
||||
lower = normalizeFrenchApostrophes(lower)
|
||||
switch {
|
||||
case strings.HasPrefix(lower, "de l'"), strings.HasPrefix(lower, "de l’"):
|
||||
case strings.HasPrefix(lower, "de l'"):
|
||||
return "indefinite", true
|
||||
case strings.HasPrefix(lower, "de la "), strings.HasPrefix(lower, "de le "), strings.HasPrefix(lower, "de les "), strings.HasPrefix(lower, "du "), strings.HasPrefix(lower, "des "):
|
||||
return "indefinite", true
|
||||
|
|
@ -821,6 +824,7 @@ func matchFrenchArticleText(lower string) (string, bool) {
|
|||
}
|
||||
|
||||
func matchFrenchAttachedArticle(lower string) (string, bool) {
|
||||
lower = normalizeFrenchApostrophes(lower)
|
||||
for _, prefix := range frenchElisionPrefixes {
|
||||
if !strings.HasPrefix(lower, prefix) {
|
||||
continue
|
||||
|
|
@ -1530,7 +1534,7 @@ func (t *Tokeniser) splitFrenchElision(raw string) (string, string, bool) {
|
|||
return "", raw, false
|
||||
}
|
||||
|
||||
lower := core.Lower(raw)
|
||||
lower := normalizeFrenchApostrophes(core.Lower(raw))
|
||||
if len(lower) < 2 {
|
||||
return "", raw, false
|
||||
}
|
||||
|
|
@ -1562,6 +1566,13 @@ func (t *Tokeniser) isFrenchLanguage() bool {
|
|||
return lang == "fr" || core.HasPrefix(lang, "fr-")
|
||||
}
|
||||
|
||||
func normalizeFrenchApostrophes(s string) string {
|
||||
if s == "" || !strings.ContainsRune(s, '’') {
|
||||
return s
|
||||
}
|
||||
return strings.ReplaceAll(s, "’", "'")
|
||||
}
|
||||
|
||||
// matchPunctuation detects known punctuation patterns.
|
||||
// Returns the punctuation type and true if recognised.
|
||||
func matchPunctuation(punct string) (string, bool) {
|
||||
|
|
|
|||
|
|
@ -319,6 +319,7 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {
|
|||
{"l'enfant", "definite", true},
|
||||
{"de l'enfant", "indefinite", true},
|
||||
{"de l’ami", "indefinite", true},
|
||||
{"De l’enfant", "indefinite", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
|
@ -438,6 +439,29 @@ func TestTokeniser_Tokenise_FrenchElision(t *testing.T) {
|
|||
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
|
||||
}
|
||||
|
||||
tokens = tok.Tokenise("De l’enfant.")
|
||||
if len(tokens) != 3 {
|
||||
t.Fatalf("Tokenise(%q) returned %d tokens, want 3", "De l’enfant.", len(tokens))
|
||||
}
|
||||
if tokens[0].Type != TokenArticle {
|
||||
t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type)
|
||||
}
|
||||
if tokens[0].ArtType != "indefinite" {
|
||||
t.Fatalf("tokens[0].ArtType = %q, want %q", tokens[0].ArtType, "indefinite")
|
||||
}
|
||||
if tokens[1].Type != TokenNoun {
|
||||
t.Fatalf("tokens[1].Type = %v, want TokenNoun", tokens[1].Type)
|
||||
}
|
||||
if tokens[1].Lower != "enfant" {
|
||||
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
|
||||
}
|
||||
if tokens[2].Type != TokenPunctuation {
|
||||
t.Fatalf("tokens[2].Type = %v, want TokenPunctuation", tokens[2].Type)
|
||||
}
|
||||
if tokens[2].PunctType != "sentence_end" {
|
||||
t.Fatalf("tokens[2].PunctType = %q, want %q", tokens[2].PunctType, "sentence_end")
|
||||
}
|
||||
|
||||
tokens = tok.Tokenise("de le serveur")
|
||||
if len(tokens) != 2 {
|
||||
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "de le serveur", len(tokens))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue