fix(reversal): recognise French article phrases
Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
c21afd4263
commit
632681c0df
2 changed files with 49 additions and 0 deletions
|
|
@ -683,6 +683,9 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
}
|
||||
}
|
||||
if t.isFrenchLanguage() {
|
||||
if artType, ok := matchFrenchArticleText(lower); ok {
|
||||
return artType, true
|
||||
}
|
||||
if artType, ok := matchFrenchAttachedArticle(lower); ok {
|
||||
return artType, true
|
||||
}
|
||||
|
|
@ -701,6 +704,50 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) {
|
|||
return "", false
|
||||
}
|
||||
|
||||
func matchFrenchArticleText(lower string) (string, bool) {
|
||||
switch {
|
||||
case strings.HasPrefix(lower, "de l'"), strings.HasPrefix(lower, "de l’"):
|
||||
return "indefinite", true
|
||||
case strings.HasPrefix(lower, "de la "), strings.HasPrefix(lower, "de le "), strings.HasPrefix(lower, "du "), strings.HasPrefix(lower, "des "):
|
||||
return "indefinite", true
|
||||
case strings.HasPrefix(lower, "au "), strings.HasPrefix(lower, "aux "):
|
||||
return "definite", true
|
||||
}
|
||||
|
||||
fields := strings.Fields(lower)
|
||||
if len(fields) == 0 {
|
||||
return "", false
|
||||
}
|
||||
|
||||
switch fields[0] {
|
||||
case "l'", "l’", "les", "au", "aux":
|
||||
return "definite", true
|
||||
case "un", "une":
|
||||
return "indefinite", true
|
||||
case "du", "des":
|
||||
return "indefinite", true
|
||||
case "de":
|
||||
if len(fields) >= 2 {
|
||||
switch fields[1] {
|
||||
case "la", "l'", "l’":
|
||||
return "indefinite", true
|
||||
case "le", "les":
|
||||
return "definite", true
|
||||
}
|
||||
}
|
||||
case "d'", "d’":
|
||||
return "indefinite", true
|
||||
case "j'", "j’", "m'", "m’", "t'", "t’", "s'", "s’", "n'", "n’", "c'", "c’", "qu'", "qu’":
|
||||
return "definite", true
|
||||
}
|
||||
|
||||
if artType, ok := matchFrenchAttachedArticle(lower); ok {
|
||||
return artType, true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
func matchFrenchAttachedArticle(lower string) (string, bool) {
|
||||
for _, prefix := range frenchElisionPrefixes {
|
||||
if !strings.HasPrefix(lower, prefix) {
|
||||
|
|
|
|||
|
|
@ -304,6 +304,8 @@ func TestTokeniser_MatchArticle_FrenchExtended(t *testing.T) {
|
|||
{"du", "indefinite", true},
|
||||
{"des", "indefinite", true},
|
||||
{"l'enfant", "definite", true},
|
||||
{"de l'enfant", "indefinite", true},
|
||||
{"de l’ami", "indefinite", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue