[agent/codex:gpt-5.4-mini] Read ~/spec/code/core/go/i18n/RFC.md fully. Find ONE feature... #103

Merged
Virgil merged 1 commit from agent/read---spec-code-core-go-i18n-rfc-md-ful into dev 2026-04-02 03:42:39 +00:00
4 changed files with 123 additions and 0 deletions

View file

@ -163,6 +163,22 @@ func IrregularNouns() map[string]string {
return result
}
// DualClassVerbs returns a copy of the additional regular verbs that also act
// as common nouns in dev/ops text.
func DualClassVerbs() map[string]VerbForms {
result := make(map[string]VerbForms, len(dualClassVerbs))
maps.Copy(result, dualClassVerbs)
return result
}
// DualClassNouns returns a copy of the additional regular nouns that also act
// as common verbs in dev/ops text.
func DualClassNouns() map[string]string {
result := make(map[string]string, len(dualClassNouns))
maps.Copy(result, dualClassNouns)
return result
}
// Lower returns the lowercase form of s.
func Lower(s string) string {
return core.Lower(s)

View file

@ -193,6 +193,19 @@ func (t *Tokeniser) buildVerbIndex() {
}
}
}
// Tier 2b: Seed additional regular dual-class bases that are common in
// dev/ops text. These are regular forms, but they need to behave like
// known bases so the dual-class resolver can disambiguate them.
for base, forms := range i18n.DualClassVerbs() {
t.baseVerbs[base] = true
if forms.Past != "" && t.pastToBase[forms.Past] == "" {
t.pastToBase[forms.Past] = base
}
if forms.Gerund != "" && t.gerundToBase[forms.Gerund] == "" {
t.gerundToBase[forms.Gerund] = base
}
}
}
// buildNounIndex reads grammar tables and irregular noun maps to build
@ -221,6 +234,18 @@ func (t *Tokeniser) buildNounIndex() {
}
}
}
// Tier 2b: Seed additional regular dual-class bases that are common in
// dev/ops text. The plural forms are regular, but the entries need to
// appear in the base noun set so the ambiguous-token pass can see them.
for base, plural := range i18n.DualClassNouns() {
t.baseNouns[base] = true
if plural != base {
if _, exists := t.pluralToBase[plural]; !exists {
t.pluralToBase[plural] = base
}
}
}
}
// MatchNoun performs a 3-tier reverse lookup for a noun form.

View file

@ -671,6 +671,12 @@ func TestTokeniser_DualClassDetection(t *testing.T) {
}
}
for _, word := range []string{"change", "export", "function", "handle", "host", "import", "link", "log", "merge", "patch", "process", "queue", "release", "stream", "tag", "trigger", "watch"} {
if !tok.IsDualClass(word) {
t.Errorf("%q should be dual-class after expansion", word)
}
}
notDual := []string{"delete", "go", "push", "branch", "repo"}
for _, word := range notDual {
if tok.IsDualClass(word) {
@ -720,6 +726,37 @@ func TestTokeniser_IgnoresDeprecatedGrammarEntries(t *testing.T) {
}
}
func TestTokeniser_DualClassExpansion_ClassifiesCommonDevOpsWords(t *testing.T) {
setup(t)
tok := NewTokeniser()
tests := []struct {
text string
wantType TokenType
wantLower string
}{
{"the merge", TokenNoun, "merge"},
{"please merge the file", TokenVerb, "merge"},
{"the process", TokenNoun, "process"},
{"please process the log", TokenVerb, "process"},
}
for _, tt := range tests {
t.Run(tt.text, func(t *testing.T) {
tokens := tok.Tokenise(tt.text)
if len(tokens) < 2 {
t.Fatalf("Tokenise(%q) returned %d tokens, want at least 2", tt.text, len(tokens))
}
if tokens[1].Lower != tt.wantLower {
t.Fatalf("Tokenise(%q)[1].Lower = %q, want %q", tt.text, tokens[1].Lower, tt.wantLower)
}
if tokens[1].Type != tt.wantType {
t.Fatalf("Tokenise(%q)[1].Type = %v, want %v", tt.text, tokens[1].Type, tt.wantType)
}
})
}
}
func TestToken_ConfidenceField(t *testing.T) {
setup(t)
tok := NewTokeniser()

View file

@ -450,6 +450,51 @@ var irregularNouns = map[string]string{
"calf": "calves", "loaf": "loaves", "thief": "thieves",
}
// dualClassVerbs seeds additional regular verbs that are also common nouns in
// dev/ops text. The forms are regular, but listing them here makes the
// reversal tokeniser treat them as known bases for dual-class disambiguation.
var dualClassVerbs = map[string]VerbForms{
"change": {Past: "changed", Gerund: "changing"},
"export": {Past: "exported", Gerund: "exporting"},
"function": {Past: "functioned", Gerund: "functioning"},
"handle": {Past: "handled", Gerund: "handling"},
"host": {Past: "hosted", Gerund: "hosting"},
"import": {Past: "imported", Gerund: "importing"},
"link": {Past: "linked", Gerund: "linking"},
"log": {Past: "logged", Gerund: "logging"},
"merge": {Past: "merged", Gerund: "merging"},
"patch": {Past: "patched", Gerund: "patching"},
"process": {Past: "processed", Gerund: "processing"},
"queue": {Past: "queued", Gerund: "queuing"},
"release": {Past: "released", Gerund: "releasing"},
"stream": {Past: "streamed", Gerund: "streaming"},
"tag": {Past: "tagged", Gerund: "tagging"},
"trigger": {Past: "triggered", Gerund: "triggering"},
"watch": {Past: "watched", Gerund: "watching"},
}
// dualClassNouns mirrors the same vocabulary as nouns so the tokeniser can
// classify the base forms as ambiguous when they appear without inflection.
var dualClassNouns = map[string]string{
"change": "changes",
"export": "exports",
"function": "functions",
"handle": "handles",
"host": "hosts",
"import": "imports",
"link": "links",
"log": "logs",
"merge": "merges",
"patch": "patches",
"process": "processes",
"queue": "queues",
"release": "releases",
"stream": "streams",
"tag": "tags",
"trigger": "triggers",
"watch": "watches",
}
var vowelSounds = map[string]bool{
"hour": true, "honest": true, "honour": true, "honor": true, "heir": true, "herb": true,
}