[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #103
4 changed files with 123 additions and 0 deletions
16
grammar.go
16
grammar.go
|
|
@ -163,6 +163,22 @@ func IrregularNouns() map[string]string {
|
|||
return result
|
||||
}
|
||||
|
||||
// DualClassVerbs returns a copy of the additional regular verbs that also act
|
||||
// as common nouns in dev/ops text.
|
||||
func DualClassVerbs() map[string]VerbForms {
|
||||
result := make(map[string]VerbForms, len(dualClassVerbs))
|
||||
maps.Copy(result, dualClassVerbs)
|
||||
return result
|
||||
}
|
||||
|
||||
// DualClassNouns returns a copy of the additional regular nouns that also act
|
||||
// as common verbs in dev/ops text.
|
||||
func DualClassNouns() map[string]string {
|
||||
result := make(map[string]string, len(dualClassNouns))
|
||||
maps.Copy(result, dualClassNouns)
|
||||
return result
|
||||
}
|
||||
|
||||
// Lower returns the lowercase form of s.
|
||||
func Lower(s string) string {
|
||||
return core.Lower(s)
|
||||
|
|
|
|||
|
|
@ -193,6 +193,19 @@ func (t *Tokeniser) buildVerbIndex() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tier 2b: Seed additional regular dual-class bases that are common in
|
||||
// dev/ops text. These are regular forms, but they need to behave like
|
||||
// known bases so the dual-class resolver can disambiguate them.
|
||||
for base, forms := range i18n.DualClassVerbs() {
|
||||
t.baseVerbs[base] = true
|
||||
if forms.Past != "" && t.pastToBase[forms.Past] == "" {
|
||||
t.pastToBase[forms.Past] = base
|
||||
}
|
||||
if forms.Gerund != "" && t.gerundToBase[forms.Gerund] == "" {
|
||||
t.gerundToBase[forms.Gerund] = base
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// buildNounIndex reads grammar tables and irregular noun maps to build
|
||||
|
|
@ -221,6 +234,18 @@ func (t *Tokeniser) buildNounIndex() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tier 2b: Seed additional regular dual-class bases that are common in
|
||||
// dev/ops text. The plural forms are regular, but the entries need to
|
||||
// appear in the base noun set so the ambiguous-token pass can see them.
|
||||
for base, plural := range i18n.DualClassNouns() {
|
||||
t.baseNouns[base] = true
|
||||
if plural != base {
|
||||
if _, exists := t.pluralToBase[plural]; !exists {
|
||||
t.pluralToBase[plural] = base
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MatchNoun performs a 3-tier reverse lookup for a noun form.
|
||||
|
|
|
|||
|
|
@ -671,6 +671,12 @@ func TestTokeniser_DualClassDetection(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
for _, word := range []string{"change", "export", "function", "handle", "host", "import", "link", "log", "merge", "patch", "process", "queue", "release", "stream", "tag", "trigger", "watch"} {
|
||||
if !tok.IsDualClass(word) {
|
||||
t.Errorf("%q should be dual-class after expansion", word)
|
||||
}
|
||||
}
|
||||
|
||||
notDual := []string{"delete", "go", "push", "branch", "repo"}
|
||||
for _, word := range notDual {
|
||||
if tok.IsDualClass(word) {
|
||||
|
|
@ -720,6 +726,37 @@ func TestTokeniser_IgnoresDeprecatedGrammarEntries(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestTokeniser_DualClassExpansion_ClassifiesCommonDevOpsWords(t *testing.T) {
|
||||
setup(t)
|
||||
tok := NewTokeniser()
|
||||
|
||||
tests := []struct {
|
||||
text string
|
||||
wantType TokenType
|
||||
wantLower string
|
||||
}{
|
||||
{"the merge", TokenNoun, "merge"},
|
||||
{"please merge the file", TokenVerb, "merge"},
|
||||
{"the process", TokenNoun, "process"},
|
||||
{"please process the log", TokenVerb, "process"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.text, func(t *testing.T) {
|
||||
tokens := tok.Tokenise(tt.text)
|
||||
if len(tokens) < 2 {
|
||||
t.Fatalf("Tokenise(%q) returned %d tokens, want at least 2", tt.text, len(tokens))
|
||||
}
|
||||
if tokens[1].Lower != tt.wantLower {
|
||||
t.Fatalf("Tokenise(%q)[1].Lower = %q, want %q", tt.text, tokens[1].Lower, tt.wantLower)
|
||||
}
|
||||
if tokens[1].Type != tt.wantType {
|
||||
t.Fatalf("Tokenise(%q)[1].Type = %v, want %v", tt.text, tokens[1].Type, tt.wantType)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestToken_ConfidenceField(t *testing.T) {
|
||||
setup(t)
|
||||
tok := NewTokeniser()
|
||||
|
|
|
|||
45
types.go
45
types.go
|
|
@ -450,6 +450,51 @@ var irregularNouns = map[string]string{
|
|||
"calf": "calves", "loaf": "loaves", "thief": "thieves",
|
||||
}
|
||||
|
||||
// dualClassVerbs seeds additional regular verbs that are also common nouns in
|
||||
// dev/ops text. The forms are regular, but listing them here makes the
|
||||
// reversal tokeniser treat them as known bases for dual-class disambiguation.
|
||||
var dualClassVerbs = map[string]VerbForms{
|
||||
"change": {Past: "changed", Gerund: "changing"},
|
||||
"export": {Past: "exported", Gerund: "exporting"},
|
||||
"function": {Past: "functioned", Gerund: "functioning"},
|
||||
"handle": {Past: "handled", Gerund: "handling"},
|
||||
"host": {Past: "hosted", Gerund: "hosting"},
|
||||
"import": {Past: "imported", Gerund: "importing"},
|
||||
"link": {Past: "linked", Gerund: "linking"},
|
||||
"log": {Past: "logged", Gerund: "logging"},
|
||||
"merge": {Past: "merged", Gerund: "merging"},
|
||||
"patch": {Past: "patched", Gerund: "patching"},
|
||||
"process": {Past: "processed", Gerund: "processing"},
|
||||
"queue": {Past: "queued", Gerund: "queuing"},
|
||||
"release": {Past: "released", Gerund: "releasing"},
|
||||
"stream": {Past: "streamed", Gerund: "streaming"},
|
||||
"tag": {Past: "tagged", Gerund: "tagging"},
|
||||
"trigger": {Past: "triggered", Gerund: "triggering"},
|
||||
"watch": {Past: "watched", Gerund: "watching"},
|
||||
}
|
||||
|
||||
// dualClassNouns mirrors the same vocabulary as nouns so the tokeniser can
|
||||
// classify the base forms as ambiguous when they appear without inflection.
|
||||
var dualClassNouns = map[string]string{
|
||||
"change": "changes",
|
||||
"export": "exports",
|
||||
"function": "functions",
|
||||
"handle": "handles",
|
||||
"host": "hosts",
|
||||
"import": "imports",
|
||||
"link": "links",
|
||||
"log": "logs",
|
||||
"merge": "merges",
|
||||
"patch": "patches",
|
||||
"process": "processes",
|
||||
"queue": "queues",
|
||||
"release": "releases",
|
||||
"stream": "streams",
|
||||
"tag": "tags",
|
||||
"trigger": "triggers",
|
||||
"watch": "watches",
|
||||
}
|
||||
|
||||
var vowelSounds = map[string]bool{
|
||||
"hour": true, "honest": true, "honour": true, "honor": true, "heir": true, "herb": true,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue