[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #120

Merged
Virgil merged 1 commit from agent/read---spec-code-core-go-i18n-rfc-md-ful into dev 2026-04-02 04:57:51 +00:00
2 changed files with 38 additions and 3 deletions

View file

@ -671,7 +671,7 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
if base, _ := splitTrailingPunct(word); base != "" {
word = base
}
lower := core.Lower(word)
lower := normalizeFrenchApostrophes(core.Lower(word))
if artType, ok := matchConfiguredArticleText(lower, data); ok {
return artType, true
@ -705,6 +705,7 @@ func matchConfiguredArticleText(lower string, data *i18n.GrammarData) (string, b
if data == nil {
return "", false
}
lower = normalizeFrenchApostrophes(lower)
if lower == core.Lower(data.Articles.IndefiniteDefault) ||
lower == core.Lower(data.Articles.IndefiniteVowel) {
@ -742,6 +743,7 @@ func matchConfiguredArticleText(lower string, data *i18n.GrammarData) (string, b
}
func matchFrenchLeadingArticlePhrase(lower string) (string, bool) {
lower = normalizeFrenchApostrophes(lower)
switch {
case lower == "le", lower == "la", lower == "les",
lower == "l'", lower == "l", lower == "au", lower == "aux":
@ -777,8 +779,9 @@ func matchFrenchLeadingArticlePhrase(lower string) (string, bool) {
}
func matchFrenchArticleText(lower string) (string, bool) {
lower = normalizeFrenchApostrophes(lower)
switch {
case strings.HasPrefix(lower, "de l'"), strings.HasPrefix(lower, "de l"):
case strings.HasPrefix(lower, "de l'"):
return "indefinite", true
case strings.HasPrefix(lower, "de la "), strings.HasPrefix(lower, "de le "), strings.HasPrefix(lower, "de les "), strings.HasPrefix(lower, "du "), strings.HasPrefix(lower, "des "):
return "indefinite", true
@ -821,6 +824,7 @@ func matchFrenchArticleText(lower string) (string, bool) {
}
func matchFrenchAttachedArticle(lower string) (string, bool) {
lower = normalizeFrenchApostrophes(lower)
for _, prefix := range frenchElisionPrefixes {
if !strings.HasPrefix(lower, prefix) {
continue
@ -1530,7 +1534,7 @@ func (t *Tokeniser) splitFrenchElision(raw string) (string, string, bool) {
return "", raw, false
}
lower := core.Lower(raw)
lower := normalizeFrenchApostrophes(core.Lower(raw))
if len(lower) < 2 {
return "", raw, false
}
@ -1562,6 +1566,13 @@ func (t *Tokeniser) isFrenchLanguage() bool {
return lang == "fr" || core.HasPrefix(lang, "fr-")
}
func normalizeFrenchApostrophes(s string) string {
if s == "" || !strings.ContainsRune(s, '') {
return s
}
return strings.ReplaceAll(s, "", "'")
}
// matchPunctuation detects known punctuation patterns.
// Returns the punctuation type and true if recognised.
func matchPunctuation(punct string) (string, bool) {

View file

@ -319,6 +319,7 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {
{"l'enfant", "definite", true},
{"de l'enfant", "indefinite", true},
{"de lami", "indefinite", true},
{"De lenfant", "indefinite", true},
}
for _, tt := range tests {
@ -438,6 +439,29 @@ func TestTokeniser_Tokenise_FrenchElision(t *testing.T) {
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
}
tokens = tok.Tokenise("De lenfant.")
if len(tokens) != 3 {
t.Fatalf("Tokenise(%q) returned %d tokens, want 3", "De lenfant.", len(tokens))
}
if tokens[0].Type != TokenArticle {
t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type)
}
if tokens[0].ArtType != "indefinite" {
t.Fatalf("tokens[0].ArtType = %q, want %q", tokens[0].ArtType, "indefinite")
}
if tokens[1].Type != TokenNoun {
t.Fatalf("tokens[1].Type = %v, want TokenNoun", tokens[1].Type)
}
if tokens[1].Lower != "enfant" {
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
}
if tokens[2].Type != TokenPunctuation {
t.Fatalf("tokens[2].Type = %v, want TokenPunctuation", tokens[2].Type)
}
if tokens[2].PunctType != "sentence_end" {
t.Fatalf("tokens[2].PunctType = %q, want %q", tokens[2].PunctType, "sentence_end")
}
tokens = tok.Tokenise("de le serveur")
if len(tokens) != 2 {
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "de le serveur", len(tokens))