feat(reversal): recognise French contracted articles
All checks were successful
Security Scan / security (push) Successful in 10s
Test / test (push) Successful in 1m13s

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-04-01 23:26:58 +00:00
parent d0e1312abf
commit 08149135c7
2 changed files with 15 additions and 1 deletions

View file

@ -617,7 +617,7 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
}
if t.isFrenchLanguage() {
switch lower {
case "l'", "l", "d'", "d", "j'", "j", "m'", "m", "t'", "t", "s'", "s", "n'", "n", "c'", "c", "qu'", "qu", "les":
case "l'", "l", "d'", "d", "j'", "j", "m'", "m", "t'", "t", "s'", "s", "n'", "n", "c'", "c", "qu'", "qu", "les", "au", "aux", "du":
return "definite", true
case "un", "une", "des":
return "indefinite", true

View file

@ -296,6 +296,9 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {
{"L'", "definite", true},
{"L", "definite", true},
{"les", "definite", true},
{"au", "definite", true},
{"aux", "definite", true},
{"du", "definite", true},
{"des", "indefinite", true},
{"l'enfant", "", false},
}
@ -358,6 +361,17 @@ func TestTokeniser_Tokenise_FrenchElision(t *testing.T) {
if tokens[1].Lower != "enfant" {
t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant")
}
tokens = tok.Tokenise("au serveur")
if len(tokens) != 2 {
t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "au serveur", len(tokens))
}
if tokens[0].Type != TokenArticle {
t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type)
}
if tokens[0].ArtType != "definite" {
t.Fatalf("tokens[0].ArtType = %q, want %q", tokens[0].ArtType, "definite")
}
}
func TestTokeniser_Tokenise(t *testing.T) {