diff --git a/grammar.go b/grammar.go index aeabf22..ba52611 100644 --- a/grammar.go +++ b/grammar.go @@ -496,11 +496,16 @@ func maybeElideArticle(article, word, lang string) string { if !isFrenchLanguage(lang) { return article } + if !startsWithVowelSound(word) { + return article + } switch core.Lower(article) { - case "le", "la", "de", "je", "me", "te", "se", "ne": - if startsWithVowelSound(word) { - return "l'" - } + case "le", "la", "de", "je", "me", "te", "se", "ne", "ce": + // French elision keeps the leading consonant and replaces the final + // vowel with an apostrophe: le/la -> l', de -> d', je -> j', etc. + return core.Lower(article[:1]) + "'" + case "que": + return "qu'" } return article } diff --git a/grammar_test.go b/grammar_test.go index 5dfbac1..07c63ba 100644 --- a/grammar_test.go +++ b/grammar_test.go @@ -349,6 +349,83 @@ func TestArticleFrenchLocale(t *testing.T) { } } +func TestArticleFrenchElisionKeepsLeadingConsonant(t *testing.T) { + prevData := GetGrammarData("fr") + t.Cleanup(func() { + SetGrammarData("fr", prevData) + }) + + prev := Default() + svc, err := New() + if err != nil { + t.Fatalf("New() failed: %v", err) + } + SetDefault(svc) + t.Cleanup(func() { + SetDefault(prev) + }) + + if err := SetLanguage("fr"); err != nil { + t.Fatalf("SetLanguage(fr) failed: %v", err) + } + + SetGrammarData("fr", &GrammarData{ + Nouns: map[string]NounForms{ + "amie": {One: "amie", Other: "amies", Gender: "f"}, + "accord": {One: "accord", Other: "accords", Gender: "d"}, + "homme": {One: "homme", Other: "hommes", Gender: "m"}, + "idole": {One: "idole", Other: "idoles", Gender: "j"}, + }, + Articles: ArticleForms{ + IndefiniteDefault: "un", + IndefiniteVowel: "un", + Definite: "le", + ByGender: map[string]string{ + "d": "de", + "f": "la", + "j": "je", + "m": "le", + }, + }, + }) + + tests := []struct { + word string + want string + }{ + {"homme", "l'"}, + {"amie", "l'"}, + {"accord", "d'"}, + {"idole", "j'"}, + } + + for _, tt := range tests { + t.Run(tt.word, func(t *testing.T) { + got := Article(tt.word) + if got != tt.want { + t.Errorf("Article(%q) = %q, want %q", tt.word, got, tt.want) + } + }) + } + + phraseTests := []struct { + word string + want string + }{ + {"accord", "d'accord"}, + {"idole", "j'idole"}, + } + + for _, tt := range phraseTests { + t.Run(tt.word+"_phrase", func(t *testing.T) { + got := ArticlePhrase(tt.word) + if got != tt.want { + t.Errorf("ArticlePhrase(%q) = %q, want %q", tt.word, got, tt.want) + } + }) + } +} + type pluralizeOverrideLoader struct{} func (pluralizeOverrideLoader) Languages() []string { diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index 58de779..7ed8189 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -23,6 +23,8 @@ import ( i18n "dappco.re/go/core/i18n" ) +var frenchElisionPrefixes = []string{"l", "d", "j", "m", "t", "s", "n", "c", "qu"} + // VerbMatch holds the result of a reverse verb lookup. type VerbMatch struct { Base string // Base form of the verb ("delete", "run") @@ -606,7 +608,7 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) { } if t.isFrenchLanguage() { switch lower { - case "l'", "l’", "les": + case "l'", "l’", "d'", "d’", "j'", "j’", "m'", "m’", "t'", "t’", "s'", "s’", "n'", "n’", "c'", "c’", "qu'", "qu’", "les": return "definite", true case "un", "une", "des": return "indefinite", true @@ -1122,14 +1124,22 @@ func (t *Tokeniser) splitFrenchElision(raw string) (string, string, bool) { return "", raw, false } - if lower[0] != 'l' { - return "", raw, false - } - - if idx := strings.IndexAny(raw, "'’"); idx == 1 { - _, size := utf8.DecodeRuneInString(raw[idx:]) - if size > 0 { - return raw[:idx+size], raw[idx+size:], true + for _, prefix := range frenchElisionPrefixes { + if !strings.HasPrefix(lower, prefix) { + continue + } + idx := len(prefix) + if idx >= len(raw) { + continue + } + if idx < len(raw) { + r, size := utf8.DecodeRuneInString(raw[idx:]) + if r != '\'' && r != '’' { + continue + } + if size > 0 { + return raw[:idx+size], raw[idx+size:], true + } } } diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index da1587d..762041d 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -334,6 +334,17 @@ func TestTokeniser_Tokenise_FrenchElision(t *testing.T) { t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant") } + tokens = tok.Tokenise("d'enfant") + if len(tokens) != 2 { + t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "d'enfant", len(tokens)) + } + if tokens[0].Type != TokenArticle { + t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type) + } + if tokens[1].Type != TokenNoun { + t.Fatalf("tokens[1].Type = %v, want TokenNoun", tokens[1].Type) + } + tokens = tok.Tokenise("l’enfant") if len(tokens) != 2 { t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "l’enfant", len(tokens))