fix(reversal): include contraction auxiliaries in fallback signals
Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
277445cc5d
commit
460d9e8dd6
2 changed files with 26 additions and 5 deletions
|
|
@ -546,11 +546,7 @@ func (t *Tokeniser) buildSignalIndex() {
|
|||
t.verbAux[core.Lower(w)] = true
|
||||
}
|
||||
} else {
|
||||
for _, w := range []string{
|
||||
"is", "are", "was", "were", "has", "had", "have",
|
||||
"do", "does", "did", "will", "would", "could", "should",
|
||||
"can", "may", "might", "shall", "must",
|
||||
} {
|
||||
for _, w := range defaultVerbAuxiliaries() {
|
||||
t.verbAux[w] = true
|
||||
}
|
||||
}
|
||||
|
|
@ -564,6 +560,19 @@ func (t *Tokeniser) buildSignalIndex() {
|
|||
}
|
||||
}
|
||||
|
||||
func defaultVerbAuxiliaries() []string {
|
||||
return []string{
|
||||
"am", "is", "are", "was", "were",
|
||||
"has", "had", "have",
|
||||
"do", "does", "did",
|
||||
"will", "would", "could", "should",
|
||||
"can", "may", "might", "shall", "must",
|
||||
"don't", "can't", "won't", "shouldn't", "couldn't", "wouldn't",
|
||||
"doesn't", "didn't", "isn't", "aren't", "wasn't", "weren't",
|
||||
"hasn't", "hadn't", "haven't",
|
||||
}
|
||||
}
|
||||
|
||||
func defaultWeights() map[string]float64 {
|
||||
return map[string]float64{
|
||||
"noun_determiner": 0.35,
|
||||
|
|
|
|||
|
|
@ -694,6 +694,18 @@ func TestTokeniser_Disambiguate_ContractionAux(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_Disambiguate_ContractionAux_FallbackDefaults(t *testing.T) {
|
||||
tok := NewTokeniserForLang("zz")
|
||||
tokens := tok.Tokenise("don't run the tests")
|
||||
// The hardcoded fallback auxiliaries should still recognise contractions
|
||||
// even when no locale grammar data is loaded.
|
||||
for _, token := range tokens {
|
||||
if token.Lower == "run" && token.Type != TokenVerb {
|
||||
t.Errorf("'run' after \"don't\": Type = %v, want TokenVerb", token.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_WithSignals_Breakdown(t *testing.T) {
|
||||
setup(t)
|
||||
tok := NewTokeniser(WithSignals())
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue