diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index a615219..ad17075 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -668,6 +668,9 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) { return "", false } + if base, _ := splitTrailingPunct(word); base != "" { + word = base + } lower := core.Lower(word) if artType, ok := matchConfiguredArticleText(lower, data); ok { diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index 854fe8e..3b0b8d4 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -189,6 +189,7 @@ func TestTokeniser_MatchArticle(t *testing.T) { {"a", "indefinite", true}, {"an", "indefinite", true}, {"the", "definite", true}, + {"the.", "definite", true}, {"A", "indefinite", true}, {"The", "definite", true}, {"foo", "", false}, @@ -219,6 +220,7 @@ func TestTokeniser_MatchArticle_FrenchGendered(t *testing.T) { {"le", "definite", true}, {"la", "definite", true}, {"le serveur", "definite", true}, + {"le serveur.", "definite", true}, {"la branche", "definite", true}, {"les amis", "definite", true}, {"Le", "definite", true},