go-i18n/validate.go
Claude 97f9c758d1
chore(ax): AX compliance sweep — banned imports, naming, Good/Bad/Ugly tests
- compose.go: remove fmt import, use local stringer interface + core.Sprint
- hooks.go: replace stdlib log with dappco.re/go/core/log
- localise.go: replace os.Getenv with core.Env
- loader.go: replace strings.CutPrefix with core.HasPrefix/TrimPrefix
- reversal/tokeniser.go: replace strings.Fields with local splitFields helper
- validate.go: rename sb → builder (AX naming)
- calibrate.go, classify.go: rename cfg → configuration (AX naming)
- numbers.go: rename local fmt variable → numberFormat
- All test files: add Good/Bad/Ugly triads per AX test naming convention

Co-Authored-By: Virgil <virgil@lethean.io>
2026-03-31 08:42:40 +01:00

138 lines
4.6 KiB
Go

// SPDX-Licence-Identifier: EUPL-1.2
package i18n
import (
"context"
"dappco.re/go/core"
log "dappco.re/go/core/log"
"forge.lthn.ai/core/go-inference"
)
// ArticlePair holds a noun and its proposed article for validation.
type ArticlePair struct {
Noun string
Article string
}
// ArticleResult reports whether a given article usage is grammatically correct.
type ArticleResult struct {
Noun string // the noun being checked
Given string // the article provided by the caller
Predicted string // what the model predicted
Valid bool // Given == Predicted
Prompt string // the prompt used (for debugging)
}
// IrregularForm holds a verb, tense, and proposed inflected form for validation.
type IrregularForm struct {
Verb string
Tense string
Form string
}
// IrregularResult reports whether a given irregular verb form is correct.
type IrregularResult struct {
Verb string // base verb
Tense string // tense being checked (e.g. "past", "past participle")
Given string // the form provided by the caller
Predicted string // what the model predicted
Valid bool // Given == Predicted
Prompt string // the prompt used (for debugging)
}
// articlePrompt builds a fill-in-the-blank prompt for article prediction.
func articlePrompt(noun string) string {
return core.Sprintf(
"Complete with the correct article (a/an/the): ___ %s. Answer with just the article:",
noun,
)
}
// irregularPrompt builds a fill-in-the-blank prompt for irregular verb prediction.
func irregularPrompt(verb, tense string) string {
return core.Sprintf(
"What is the %s form of the verb '%s'? Answer with just the word:",
tense, verb,
)
}
// collectGenerated runs a single-token generation and returns the trimmed, lowercased output.
func collectGenerated(ctx context.Context, m inference.TextModel, prompt string) (string, error) {
builder := core.NewBuilder()
for tok := range m.Generate(ctx, prompt, inference.WithMaxTokens(1), inference.WithTemperature(0.05)) {
builder.WriteString(tok.Text)
}
if err := m.Err(); err != nil {
return "", err
}
return core.Trim(core.Lower(builder.String())), nil
}
// ValidateArticle checks whether a given article usage is grammatically correct
// by asking the model to predict the correct article in context.
// Uses single-token generation with near-zero temperature for deterministic output.
func ValidateArticle(ctx context.Context, m inference.TextModel, noun string, article string) (ArticleResult, error) {
prompt := articlePrompt(noun)
predicted, err := collectGenerated(ctx, m, prompt)
if err != nil {
return ArticleResult{}, log.E("ValidateArticle", "validate: "+noun, err)
}
given := core.Trim(core.Lower(article))
return ArticleResult{
Noun: noun,
Given: given,
Predicted: predicted,
Valid: given == predicted,
Prompt: prompt,
}, nil
}
// ValidateIrregular checks whether a given irregular verb form is correct
// by asking the model to predict the correct form in context.
// Uses single-token generation with near-zero temperature for deterministic output.
func ValidateIrregular(ctx context.Context, m inference.TextModel, verb string, tense string, form string) (IrregularResult, error) {
prompt := irregularPrompt(verb, tense)
predicted, err := collectGenerated(ctx, m, prompt)
if err != nil {
return IrregularResult{}, log.E("ValidateIrregular", "validate: "+verb+" ("+tense+")", err)
}
given := core.Trim(core.Lower(form))
return IrregularResult{
Verb: verb,
Tense: tense,
Given: given,
Predicted: predicted,
Valid: given == predicted,
Prompt: prompt,
}, nil
}
// BatchValidateArticles validates multiple article-noun pairs efficiently.
// Each pair is validated independently via single-token generation.
func BatchValidateArticles(ctx context.Context, m inference.TextModel, pairs []ArticlePair) ([]ArticleResult, error) {
results := make([]ArticleResult, 0, len(pairs))
for _, p := range pairs {
r, err := ValidateArticle(ctx, m, p.Noun, p.Article)
if err != nil {
return results, err
}
results = append(results, r)
}
return results, nil
}
// BatchValidateIrregulars validates multiple irregular verb forms efficiently.
// Each form is validated independently via single-token generation.
func BatchValidateIrregulars(ctx context.Context, m inference.TextModel, forms []IrregularForm) ([]IrregularResult, error) {
results := make([]IrregularResult, 0, len(forms))
for _, f := range forms {
r, err := ValidateIrregular(ctx, m, f.Verb, f.Tense, f.Form)
if err != nil {
return results, err
}
results = append(results, r)
}
return results, nil
}