From c5fbf6ee0cee88c0114a056a8dfa589114d83bb9 Mon Sep 17 00:00:00 2001 From: Virgil Date: Thu, 2 Apr 2026 04:34:12 +0000 Subject: [PATCH] fix(reversal): ignore punctuation in article matching Co-Authored-By: Virgil --- reversal/tokeniser.go | 3 +++ reversal/tokeniser_test.go | 2 ++ 2 files changed, 5 insertions(+) diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index a615219..ad17075 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -668,6 +668,9 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) { return "", false } + if base, _ := splitTrailingPunct(word); base != "" { + word = base + } lower := core.Lower(word) if artType, ok := matchConfiguredArticleText(lower, data); ok { diff --git a/reversal/tokeniser_test.go b/reversal/tokeniser_test.go index 854fe8e..3b0b8d4 100644 --- a/reversal/tokeniser_test.go +++ b/reversal/tokeniser_test.go @@ -189,6 +189,7 @@ func TestTokeniser_MatchArticle(t *testing.T) { {"a", "indefinite", true}, {"an", "indefinite", true}, {"the", "definite", true}, + {"the.", "definite", true}, {"A", "indefinite", true}, {"The", "definite", true}, {"foo", "", false}, @@ -219,6 +220,7 @@ func TestTokeniser_MatchArticle_FrenchGendered(t *testing.T) { {"le", "definite", true}, {"la", "definite", true}, {"le serveur", "definite", true}, + {"le serveur.", "definite", true}, {"la branche", "definite", true}, {"les amis", "definite", true}, {"Le", "definite", true}, -- 2.45.3