refactor(reversal): name low-information confidence floor
Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
f2d2f2e5fd
commit
f13446fb73
2 changed files with 33 additions and 5 deletions
|
|
@ -892,6 +892,12 @@ var clauseBoundaries = map[string]bool{
|
|||
"when": true, "while": true, "if": true, "then": true, "so": true,
|
||||
}
|
||||
|
||||
const (
|
||||
lowInformationScoreThreshold = 0.10
|
||||
lowInformationVerbConfidence = 0.55
|
||||
lowInformationNounConfidence = 0.45
|
||||
)
|
||||
|
||||
// Tokenise splits text on whitespace and classifies each word using a
|
||||
// two-pass algorithm:
|
||||
//
|
||||
|
|
@ -1519,17 +1525,17 @@ func (t *Tokeniser) resolveToken(tok *Token, verbScore, nounScore float64, compo
|
|||
|
||||
// If only the default prior fired, keep confidence near chance rather than
|
||||
// pretending the classification is strongly supported.
|
||||
if total < 0.10 {
|
||||
if total < lowInformationScoreThreshold {
|
||||
if verbScore >= nounScore {
|
||||
tok.Type = TokenVerb
|
||||
tok.Confidence = 0.55
|
||||
tok.Confidence = lowInformationVerbConfidence
|
||||
tok.AltType = TokenNoun
|
||||
tok.AltConf = 0.45
|
||||
tok.AltConf = lowInformationNounConfidence
|
||||
} else {
|
||||
tok.Type = TokenNoun
|
||||
tok.Confidence = 0.55
|
||||
tok.Confidence = lowInformationNounConfidence
|
||||
tok.AltType = TokenVerb
|
||||
tok.AltConf = 0.45
|
||||
tok.AltConf = lowInformationVerbConfidence
|
||||
}
|
||||
} else {
|
||||
if verbScore >= nounScore {
|
||||
|
|
|
|||
|
|
@ -1305,6 +1305,28 @@ func TestDefaultWeights_ReturnsCopy(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_LowInformationConfidenceFloor(t *testing.T) {
|
||||
setup(t)
|
||||
tok := NewTokeniser()
|
||||
|
||||
tokens := tok.Tokenise("maybe commit")
|
||||
if len(tokens) != 2 {
|
||||
t.Fatalf("Tokenise(maybe commit) produced %d tokens, want 2", len(tokens))
|
||||
}
|
||||
if tokens[1].Type != TokenVerb {
|
||||
t.Fatalf("Tokenise(maybe commit) Type = %v, want TokenVerb", tokens[1].Type)
|
||||
}
|
||||
if tokens[1].Confidence != 0.55 {
|
||||
t.Fatalf("Tokenise(maybe commit) Confidence = %v, want 0.55", tokens[1].Confidence)
|
||||
}
|
||||
if tokens[1].AltType != TokenNoun {
|
||||
t.Fatalf("Tokenise(maybe commit) AltType = %v, want TokenNoun", tokens[1].AltType)
|
||||
}
|
||||
if tokens[1].AltConf != 0.45 {
|
||||
t.Fatalf("Tokenise(maybe commit) AltConf = %v, want 0.45", tokens[1].AltConf)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Benchmarks ---
|
||||
|
||||
func benchSetup(b *testing.B) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue