[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #98
2 changed files with 21 additions and 1 deletions
|
|
@ -658,6 +658,9 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
}
|
||||
}
|
||||
if t.isFrenchLanguage() {
|
||||
if artType, ok := matchFrenchAttachedArticle(lower); ok {
|
||||
return artType, true
|
||||
}
|
||||
switch lower {
|
||||
case "l'", "l’", "d'", "d’", "j'", "j’", "m'", "m’", "t'", "t’", "s'", "s’", "n'", "n’", "c'", "c’", "qu'", "qu’", "de l'", "de l’", "de la", "les", "au", "aux", "du":
|
||||
return "definite", true
|
||||
|
|
@ -669,6 +672,23 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
return "", false
|
||||
}
|
||||
|
||||
func matchFrenchAttachedArticle(lower string) (string, bool) {
|
||||
for _, prefix := range frenchElisionPrefixes {
|
||||
if !strings.HasPrefix(lower, prefix) {
|
||||
continue
|
||||
}
|
||||
rest := strings.TrimPrefix(lower, prefix)
|
||||
if rest == "" {
|
||||
continue
|
||||
}
|
||||
if !strings.HasPrefix(rest, "'") && !strings.HasPrefix(rest, "’") {
|
||||
continue
|
||||
}
|
||||
return "definite", true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// tokenAmbiguous is an internal sentinel used during Pass 1 to mark
|
||||
// dual-class base forms that need disambiguation in Pass 2.
|
||||
const tokenAmbiguous TokenType = -1
|
||||
|
|
|
|||
|
|
@ -303,7 +303,7 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {
|
|||
{"aux", "definite", true},
|
||||
{"du", "definite", true},
|
||||
{"des", "indefinite", true},
|
||||
{"l'enfant", "", false},
|
||||
{"l'enfant", "definite", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue