feat(i18n): generalise French elision contractions
All checks were successful
Security Scan / security (push) Successful in 11s
Test / test (push) Successful in 1m8s

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-04-01 22:20:07 +00:00
parent e71280189b
commit 2c7175b892
4 changed files with 116 additions and 13 deletions

View file

@ -496,11 +496,16 @@ func maybeElideArticle(article, word, lang string) string {
if !isFrenchLanguage(lang) {
return article
}
if !startsWithVowelSound(word) {
return article
}
switch core.Lower(article) {
case "le", "la", "de", "je", "me", "te", "se", "ne":
if startsWithVowelSound(word) {
return "l'"
}
case "le", "la", "de", "je", "me", "te", "se", "ne", "ce":
// French elision keeps the leading consonant and replaces the final
// vowel with an apostrophe: le/la -> l', de -> d', je -> j', etc.
return core.Lower(article[:1]) + "'"
case "que":
return "qu'"
}
return article
}

View file

@ -349,6 +349,83 @@ func TestArticleFrenchLocale(t *testing.T) {
}
}
func TestArticleFrenchElisionKeepsLeadingConsonant(t *testing.T) {
prevData := GetGrammarData("fr")
t.Cleanup(func() {
SetGrammarData("fr", prevData)
})
prev := Default()
svc, err := New()
if err != nil {
t.Fatalf("New() failed: %v", err)
}
SetDefault(svc)
t.Cleanup(func() {
SetDefault(prev)
})
if err := SetLanguage("fr"); err != nil {
t.Fatalf("SetLanguage(fr) failed: %v", err)
}
SetGrammarData("fr", &GrammarData{
Nouns: map[string]NounForms{
"amie": {One: "amie", Other: "amies", Gender: "f"},
"accord": {One: "accord", Other: "accords", Gender: "d"},
"homme": {One: "homme", Other: "hommes", Gender: "m"},
"idole": {One: "idole", Other: "idoles", Gender: "j"},
},
Articles: ArticleForms{
IndefiniteDefault: "un",
IndefiniteVowel: "un",
Definite: "le",
ByGender: map[string]string{
"d": "de",
"f": "la",
"j": "je",
"m": "le",
},
},
})
tests := []struct {
word string
want string
}{
{"homme", "l'"},
{"amie", "l'"},
{"accord", "d'"},
{"idole", "j'"},
}
for _, tt := range tests {
t.Run(tt.word, func(t *testing.T) {
got := Article(tt.word)
if got != tt.want {
t.Errorf("Article(%q) = %q, want %q", tt.word, got, tt.want)
}
})
}
phraseTests := []struct {
word string
want string
}{
{"accord", "d'accord"},
{"idole", "j'idole"},
}
for _, tt := range phraseTests {
t.Run(tt.word+"_phrase", func(t *testing.T) {
got := ArticlePhrase(tt.word)
if got != tt.want {
t.Errorf("ArticlePhrase(%q) = %q, want %q", tt.word, got, tt.want)
}
})
}
}
type pluralizeOverrideLoader struct{}
func (pluralizeOverrideLoader) Languages() []string {

View file

@ -23,6 +23,8 @@ import (
i18n "dappco.re/go/core/i18n"
)
var frenchElisionPrefixes = []string{"l", "d", "j", "m", "t", "s", "n", "c", "qu"}
// VerbMatch holds the result of a reverse verb lookup.
type VerbMatch struct {
Base string // Base form of the verb ("delete", "run")
@ -606,7 +608,7 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
}
if t.isFrenchLanguage() {
switch lower {
case "l'", "l", "les":
case "l'", "l", "d'", "d", "j'", "j", "m'", "m", "t'", "t", "s'", "s", "n'", "n", "c'", "c", "qu'", "qu", "les":
return "definite", true
case "un", "une", "des":
return "indefinite", true
@ -1122,14 +1124,22 @@ func (t *Tokeniser) splitFrenchElision(raw string) (string, string, bool) {
return "", raw, false
}
if lower[0] != 'l' {
return "", raw, false
}
if idx := strings.IndexAny(raw, "'"); idx == 1 {
_, size := utf8.DecodeRuneInString(raw[idx:])
if size > 0 {
return raw[:idx+size], raw[idx+size:], true
for _, prefix := range frenchElisionPrefixes {
if !strings.HasPrefix(lower, prefix) {
continue
}
idx := len(prefix)
if idx >= len(raw) {
continue
}
if idx < len(raw) {
r, size := utf8.DecodeRuneInString(raw[idx:])
if r != '\'' && r != '' {
continue
}
if size > 0 {
return raw[:idx+size], raw[idx+size:], true
}
}
}

View file

@ -334,6 +334,17 @@ func TestTokeniser_Tokenise_FrenchElision(t *testing.T) {
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
}
tokens = tok.Tokenise("d'enfant")
if len(tokens) != 2 {
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "d'enfant", len(tokens))
}
if tokens[0].Type != TokenArticle {
t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type)
}
if tokens[1].Type != TokenNoun {
t.Fatalf("tokens[1].Type = %v, want TokenNoun", tokens[1].Type)
}
tokens = tok.Tokenise("lenfant")
if len(tokens) != 2 {
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "lenfant", len(tokens))