[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #108
2 changed files with 85 additions and 11 deletions
|
|
@ -670,17 +670,8 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
|
||||
lower := core.Lower(word)
|
||||
|
||||
if lower == core.Lower(data.Articles.IndefiniteDefault) ||
|
||||
lower == core.Lower(data.Articles.IndefiniteVowel) {
|
||||
return "indefinite", true
|
||||
}
|
||||
if lower == core.Lower(data.Articles.Definite) {
|
||||
return "definite", true
|
||||
}
|
||||
for _, article := range data.Articles.ByGender {
|
||||
if lower == core.Lower(article) {
|
||||
return "definite", true
|
||||
}
|
||||
if artType, ok := matchConfiguredArticleText(lower, data); ok {
|
||||
return artType, true
|
||||
}
|
||||
if t.isFrenchLanguage() {
|
||||
if artType, ok := matchFrenchLeadingArticlePhrase(lower); ok {
|
||||
|
|
@ -707,6 +698,46 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
return "", false
|
||||
}
|
||||
|
||||
func matchConfiguredArticleText(lower string, data *i18n.GrammarData) (string, bool) {
|
||||
if data == nil {
|
||||
return "", false
|
||||
}
|
||||
|
||||
if lower == core.Lower(data.Articles.IndefiniteDefault) ||
|
||||
lower == core.Lower(data.Articles.IndefiniteVowel) {
|
||||
return "indefinite", true
|
||||
}
|
||||
if lower == core.Lower(data.Articles.Definite) {
|
||||
return "definite", true
|
||||
}
|
||||
for _, article := range data.Articles.ByGender {
|
||||
if lower == core.Lower(article) {
|
||||
return "definite", true
|
||||
}
|
||||
}
|
||||
|
||||
if idx := strings.IndexAny(lower, " \t"); idx > 0 {
|
||||
prefix := core.Trim(lower[:idx])
|
||||
if prefix == "" {
|
||||
return "", false
|
||||
}
|
||||
if prefix == core.Lower(data.Articles.IndefiniteDefault) ||
|
||||
prefix == core.Lower(data.Articles.IndefiniteVowel) {
|
||||
return "indefinite", true
|
||||
}
|
||||
if prefix == core.Lower(data.Articles.Definite) {
|
||||
return "definite", true
|
||||
}
|
||||
for _, article := range data.Articles.ByGender {
|
||||
if prefix == core.Lower(article) {
|
||||
return "definite", true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
func matchFrenchLeadingArticlePhrase(lower string) (string, bool) {
|
||||
switch {
|
||||
case lower == "le", lower == "la", lower == "les",
|
||||
|
|
|
|||
|
|
@ -330,6 +330,49 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_MatchArticle_ConfiguredPhrasePrefix(t *testing.T) {
|
||||
setup(t)
|
||||
|
||||
const lang = "xx"
|
||||
prev := i18n.GetGrammarData(lang)
|
||||
t.Cleanup(func() {
|
||||
i18n.SetGrammarData(lang, prev)
|
||||
})
|
||||
|
||||
i18n.SetGrammarData(lang, &i18n.GrammarData{
|
||||
Articles: i18n.ArticleForms{
|
||||
IndefiniteDefault: "a",
|
||||
IndefiniteVowel: "an",
|
||||
Definite: "the",
|
||||
},
|
||||
})
|
||||
|
||||
tok := NewTokeniserForLang(lang)
|
||||
|
||||
tests := []struct {
|
||||
word string
|
||||
wantType string
|
||||
wantOK bool
|
||||
}{
|
||||
{"the file", "definite", true},
|
||||
{"a file", "indefinite", true},
|
||||
{"an error", "indefinite", true},
|
||||
{"file", "", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.word, func(t *testing.T) {
|
||||
artType, ok := tok.MatchArticle(tt.word)
|
||||
if ok != tt.wantOK {
|
||||
t.Fatalf("MatchArticle(%q) ok=%v, want %v", tt.word, ok, tt.wantOK)
|
||||
}
|
||||
if ok && artType != tt.wantType {
|
||||
t.Errorf("MatchArticle(%q) = %q, want %q", tt.word, artType, tt.wantType)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_Tokenise_FrenchElision(t *testing.T) {
|
||||
setup(t)
|
||||
tok := NewTokeniserForLang("fr")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue