diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index c25dfdb..34ccd95 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -602,7 +602,7 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) { switch lower { case "l'", "les": return "definite", true - case "des": + case "un", "une", "des": return "indefinite", true } } diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index 7b642ad..bbe988e 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -220,7 +220,7 @@ func TestTokeniser_MatchArticle_FrenchGendered(t *testing.T) { {"Le", "definite", true}, {"La", "definite", true}, {"un", "indefinite", true}, - {"une", "", false}, + {"une", "indefinite", true}, } for _, tt := range tests { @@ -239,6 +239,14 @@ func TestTokeniser_MatchArticle_FrenchGendered(t *testing.T) { if len(tokens) == 0 || tokens[0].Type != TokenArticle { t.Fatalf("Tokenise(%q)[0] should be TokenArticle, got %#v", "la branche", tokens) } + + tokens = tok.Tokenise("une branche") + if len(tokens) == 0 || tokens[0].Type != TokenArticle { + t.Fatalf("Tokenise(%q)[0] should be TokenArticle, got %#v", "une branche", tokens) + } + if tokens[0].ArtType != "indefinite" { + t.Fatalf("Tokenise(%q)[0].ArtType = %q, want %q", "une branche", tokens[0].ArtType, "indefinite") + } } func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {