diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index 730e64b..f18e29d 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -617,7 +617,7 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) { } if t.isFrenchLanguage() { switch lower { - case "l'", "l’", "d'", "d’", "j'", "j’", "m'", "m’", "t'", "t’", "s'", "s’", "n'", "n’", "c'", "c’", "qu'", "qu’", "les": + case "l'", "l’", "d'", "d’", "j'", "j’", "m'", "m’", "t'", "t’", "s'", "s’", "n'", "n’", "c'", "c’", "qu'", "qu’", "les", "au", "aux", "du": return "definite", true case "un", "une", "des": return "indefinite", true diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index 44fae39..2e4bae2 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -296,6 +296,9 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) { {"L'", "definite", true}, {"L’", "definite", true}, {"les", "definite", true}, + {"au", "definite", true}, + {"aux", "definite", true}, + {"du", "definite", true}, {"des", "indefinite", true}, {"l'enfant", "", false}, } @@ -358,6 +361,17 @@ func TestTokeniser_Tokenise_FrenchElision(t *testing.T) { if tokens[1].Lower != "enfant" { t.Fatalf("tokens[1].Lower = %q, want %q", tokens[1].Lower, "enfant") } + + tokens = tok.Tokenise("au serveur") + if len(tokens) != 2 { + t.Fatalf("Tokenise(%q) returned %d tokens, want 2", "au serveur", len(tokens)) + } + if tokens[0].Type != TokenArticle { + t.Fatalf("tokens[0].Type = %v, want TokenArticle", tokens[0].Type) + } + if tokens[0].ArtType != "definite" { + t.Fatalf("tokens[0].ArtType = %q, want %q", tokens[0].ArtType, "definite") + } } func TestTokeniser_Tokenise(t *testing.T) {