feat(grammar): add dual-class verb/noun entries and contractions
Add test, check, file as verbs and run, build as nouns so the tokeniser can detect them in both grammatical roles. Add 15 contractions to verb_auxiliary signal list for dev text support. Update reversal tests to use noun-only words (branch) in test phrases to avoid dual-class ambiguity until disambiguation (Task 5). Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
05d0483fd7
commit
f0c4bebfb3
4 changed files with 39 additions and 12 deletions
|
|
@ -389,6 +389,22 @@ func TestGrammarData_Signals(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestGrammarData_DualClassEntries(t *testing.T) {
|
||||
svc, _ := New()
|
||||
SetDefault(svc)
|
||||
data := GetGrammarData("en")
|
||||
|
||||
dualClass := []string{"commit", "run", "test", "check", "file", "build"}
|
||||
for _, word := range dualClass {
|
||||
if _, ok := data.Verbs[word]; !ok {
|
||||
t.Errorf("gram.verb missing dual-class word %q", word)
|
||||
}
|
||||
if _, ok := data.Nouns[word]; !ok {
|
||||
t.Errorf("gram.noun missing dual-class word %q", word)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTemplateFuncs(t *testing.T) {
|
||||
funcs := TemplateFuncs()
|
||||
expected := []string{"title", "lower", "upper", "past", "gerund", "plural", "pluralForm", "article", "quote"}
|
||||
|
|
|
|||
|
|
@ -42,7 +42,10 @@
|
|||
"shut": { "base": "shut", "past": "shut", "gerund": "shutting" },
|
||||
"delete": { "base": "delete", "past": "deleted", "gerund": "deleting" },
|
||||
"update": { "base": "update", "past": "updated", "gerund": "updating" },
|
||||
"push": { "base": "push", "past": "pushed", "gerund": "pushing" }
|
||||
"push": { "base": "push", "past": "pushed", "gerund": "pushing" },
|
||||
"test": { "base": "test", "past": "tested", "gerund": "testing" },
|
||||
"check": { "base": "check", "past": "checked", "gerund": "checking" },
|
||||
"file": { "base": "file", "past": "filed", "gerund": "filing" }
|
||||
},
|
||||
"noun": {
|
||||
"file": { "one": "file", "other": "files" },
|
||||
|
|
@ -67,7 +70,9 @@
|
|||
"failed": { "one": "failed", "other": "failed" },
|
||||
"skipped": { "one": "skipped", "other": "skipped" },
|
||||
"check": { "one": "check", "other": "checks" },
|
||||
"test": { "one": "test", "other": "tests" }
|
||||
"test": { "one": "test", "other": "tests" },
|
||||
"run": { "one": "run", "other": "runs" },
|
||||
"build": { "one": "build", "other": "builds" }
|
||||
},
|
||||
"article": {
|
||||
"indefinite": { "default": "a", "vowel": "an" },
|
||||
|
|
@ -123,7 +128,10 @@
|
|||
"has", "had", "have",
|
||||
"do", "does", "did",
|
||||
"will", "would", "could", "should",
|
||||
"can", "may", "might", "shall", "must"
|
||||
"can", "may", "might", "shall", "must",
|
||||
"don't", "can't", "won't", "shouldn't", "couldn't", "wouldn't",
|
||||
"doesn't", "didn't", "isn't", "aren't", "wasn't", "weren't",
|
||||
"hasn't", "hadn't", "haven't"
|
||||
],
|
||||
"verb_infinitive": ["to"]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -65,7 +65,8 @@ func TestImprint_Similar_SameText(t *testing.T) {
|
|||
svc, _ := i18n.New()
|
||||
i18n.SetDefault(svc)
|
||||
tok := NewTokeniser()
|
||||
tokens := tok.Tokenise("Delete the configuration file")
|
||||
// Use "branch" (noun-only) to avoid dual-class ambiguity with "file" (now both verb and noun).
|
||||
tokens := tok.Tokenise("Delete the configuration branch")
|
||||
imp1 := NewImprint(tokens)
|
||||
imp2 := NewImprint(tokens)
|
||||
|
||||
|
|
@ -80,8 +81,9 @@ func TestImprint_Similar_SimilarText(t *testing.T) {
|
|||
i18n.SetDefault(svc)
|
||||
tok := NewTokeniser()
|
||||
|
||||
imp1 := NewImprint(tok.Tokenise("Delete the configuration file"))
|
||||
imp2 := NewImprint(tok.Tokenise("Deleted the configuration files"))
|
||||
// Use "branch" (noun-only) to avoid dual-class ambiguity with "file" (now both verb and noun).
|
||||
imp1 := NewImprint(tok.Tokenise("Delete the configuration branch"))
|
||||
imp2 := NewImprint(tok.Tokenise("Deleted the configuration branches"))
|
||||
|
||||
sim := imp1.Similar(imp2)
|
||||
if sim < 0.3 {
|
||||
|
|
@ -97,7 +99,7 @@ func TestImprint_Similar_DifferentText(t *testing.T) {
|
|||
i18n.SetDefault(svc)
|
||||
tok := NewTokeniser()
|
||||
|
||||
imp1 := NewImprint(tok.Tokenise("Delete the configuration file"))
|
||||
imp1 := NewImprint(tok.Tokenise("Delete the configuration branch"))
|
||||
imp2 := NewImprint(tok.Tokenise("Building the project successfully"))
|
||||
|
||||
sim := imp1.Similar(imp2)
|
||||
|
|
|
|||
|
|
@ -10,17 +10,18 @@ func TestMultiplier_Expand(t *testing.T) {
|
|||
svc, _ := i18n.New()
|
||||
i18n.SetDefault(svc)
|
||||
m := NewMultiplier()
|
||||
variants := m.Expand("Delete the configuration file")
|
||||
// Use "branch" (noun-only) to avoid dual-class ambiguity with "file" (now both verb and noun).
|
||||
variants := m.Expand("Delete the configuration branch")
|
||||
|
||||
if len(variants) < 4 {
|
||||
t.Errorf("Expand() returned %d variants, want >= 4", len(variants))
|
||||
}
|
||||
|
||||
expected := map[string]bool{
|
||||
"Delete the configuration file": true, // original
|
||||
"Deleted the configuration file": true, // past
|
||||
"Deleting the configuration file": true, // gerund
|
||||
"Delete the configuration files": true, // plural
|
||||
"Delete the configuration branch": true, // original
|
||||
"Deleted the configuration branch": true, // past
|
||||
"Deleting the configuration branch": true, // gerund
|
||||
"Delete the configuration branches": true, // plural
|
||||
}
|
||||
for _, v := range variants {
|
||||
delete(expected, v)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue