[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #108

Merged
Virgil merged 1 commit from agent/read---spec-code-core-go-i18n-rfc-md-ful into dev 2026-04-02 04:05:35 +00:00
2 changed files with 85 additions and 11 deletions

View file

@ -670,17 +670,8 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
lower := core.Lower(word)
if lower == core.Lower(data.Articles.IndefiniteDefault) ||
lower == core.Lower(data.Articles.IndefiniteVowel) {
return "indefinite", true
}
if lower == core.Lower(data.Articles.Definite) {
return "definite", true
}
for _, article := range data.Articles.ByGender {
if lower == core.Lower(article) {
return "definite", true
}
if artType, ok := matchConfiguredArticleText(lower, data); ok {
return artType, true
}
if t.isFrenchLanguage() {
if artType, ok := matchFrenchLeadingArticlePhrase(lower); ok {
@ -707,6 +698,46 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
return "", false
}
func matchConfiguredArticleText(lower string, data *i18n.GrammarData) (string, bool) {
if data == nil {
return "", false
}
if lower == core.Lower(data.Articles.IndefiniteDefault) ||
lower == core.Lower(data.Articles.IndefiniteVowel) {
return "indefinite", true
}
if lower == core.Lower(data.Articles.Definite) {
return "definite", true
}
for _, article := range data.Articles.ByGender {
if lower == core.Lower(article) {
return "definite", true
}
}
if idx := strings.IndexAny(lower, " \t"); idx > 0 {
prefix := core.Trim(lower[:idx])
if prefix == "" {
return "", false
}
if prefix == core.Lower(data.Articles.IndefiniteDefault) ||
prefix == core.Lower(data.Articles.IndefiniteVowel) {
return "indefinite", true
}
if prefix == core.Lower(data.Articles.Definite) {
return "definite", true
}
for _, article := range data.Articles.ByGender {
if prefix == core.Lower(article) {
return "definite", true
}
}
}
return "", false
}
func matchFrenchLeadingArticlePhrase(lower string) (string, bool) {
switch {
case lower == "le", lower == "la", lower == "les",

View file

@ -330,6 +330,49 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {
}
}
func TestTokeniser_MatchArticle_ConfiguredPhrasePrefix(t *testing.T) {
setup(t)
const lang = "xx"
prev := i18n.GetGrammarData(lang)
t.Cleanup(func() {
i18n.SetGrammarData(lang, prev)
})
i18n.SetGrammarData(lang, &i18n.GrammarData{
Articles: i18n.ArticleForms{
IndefiniteDefault: "a",
IndefiniteVowel: "an",
Definite: "the",
},
})
tok := NewTokeniserForLang(lang)
tests := []struct {
word string
wantType string
wantOK bool
}{
{"the file", "definite", true},
{"a file", "indefinite", true},
{"an error", "indefinite", true},
{"file", "", false},
}
for _, tt := range tests {
t.Run(tt.word, func(t *testing.T) {
artType, ok := tok.MatchArticle(tt.word)
if ok != tt.wantOK {
t.Fatalf("MatchArticle(%q) ok=%v, want %v", tt.word, ok, tt.wantOK)
}
if ok && artType != tt.wantType {
t.Errorf("MatchArticle(%q) = %q, want %q", tt.word, artType, tt.wantType)
}
})
}
}
func TestTokeniser_Tokenise_FrenchElision(t *testing.T) {
setup(t)
tok := NewTokeniserForLang("fr")