From 01463be4e9fe664f71bd4ea11acc00139ad2ab5f Mon Sep 17 00:00:00 2001 From: Virgil Date: Mon, 23 Mar 2026 07:38:16 +0000 Subject: [PATCH] fix(i18n): resolve issue 5 audit findings Co-Authored-By: Virgil --- calibrate.go | 8 ++-- core_service.go | 4 +- core_service_test.go | 15 +++++++ go.mod | 9 ++-- go.sum | 6 ++- grammar.go | 65 ++++++++++++++------------- handler.go | 29 ++++++------ i18n.go | 11 ++--- i18n_test.go | 7 +-- loader.go | 41 ++++++++++------- localise.go | 14 +++--- localise_test.go | 33 +++++++++++++- reversal/tokeniser.go | 102 +++++++++++++++++++++++++----------------- service.go | 29 ++++++------ service_test.go | 21 ++++++++- 15 files changed, 242 insertions(+), 152 deletions(-) create mode 100644 core_service_test.go diff --git a/calibrate.go b/calibrate.go index f858bcd..5e94713 100644 --- a/calibrate.go +++ b/calibrate.go @@ -2,9 +2,9 @@ package i18n import ( "context" - "fmt" "time" + "dappco.re/go/core" log "dappco.re/go/core/log" "forge.lthn.ai/core/go-inference" ) @@ -66,7 +66,7 @@ func CalibrateDomains(ctx context.Context, modelA, modelB inference.TextModel, // Build classification prompts from sample texts. prompts := make([]string, len(samples)) for i, s := range samples { - prompts[i] = fmt.Sprintf(cfg.promptTemplate, s.Text) + prompts[i] = core.Sprintf(cfg.promptTemplate, s.Text) } // Classify with model A. @@ -93,7 +93,7 @@ func CalibrateDomains(ctx context.Context, modelA, modelB inference.TextModel, if agree { stats.Agreed++ } else { - key := fmt.Sprintf("%s->%s", a, b) + key := core.Sprintf("%s->%s", a, b) stats.ConfusionPairs[key]++ } stats.ByDomainA[a]++ @@ -140,7 +140,7 @@ func classifyAll(ctx context.Context, model inference.TextModel, prompts []strin results, err := model.Classify(ctx, batch, inference.WithMaxTokens(1)) if err != nil { - return nil, 0, log.E("classifyAll", fmt.Sprintf("classify batch [%d:%d]", i, end), err) + return nil, 0, log.E("classifyAll", core.Sprintf("classify batch [%d:%d]", i, end), err) } for j, r := range results { diff --git a/core_service.go b/core_service.go index e99b819..1b5cde4 100644 --- a/core_service.go +++ b/core_service.go @@ -57,7 +57,9 @@ func NewCoreService(opts ServiceOptions) func(*core.Core) (any, error) { } if opts.Language != "" { - _ = svc.SetLanguage(opts.Language) + if err := svc.SetLanguage(opts.Language); err != nil { + return nil, err + } } svc.SetMode(opts.Mode) diff --git a/core_service_test.go b/core_service_test.go new file mode 100644 index 0000000..9a92baa --- /dev/null +++ b/core_service_test.go @@ -0,0 +1,15 @@ +package i18n + +import ( + "testing" + + "dappco.re/go/core" +) + +func TestNewCoreService_Bad_InvalidLanguage(t *testing.T) { + factory := NewCoreService(ServiceOptions{Language: "not-a-language-tag!"}) + + if _, err := factory(core.New()); err == nil { + t.Fatal("NewCoreService() should fail for an invalid language option") + } +} diff --git a/go.mod b/go.mod index 9b2b037..7a563a3 100644 --- a/go.mod +++ b/go.mod @@ -5,15 +5,12 @@ go 1.26.0 require golang.org/x/text v0.35.0 require ( - dappco.re/go/core v0.4.7 + dappco.re/go/core v0.6.0 dappco.re/go/core/log v0.0.4 forge.lthn.ai/core/go-inference v0.1.4 ) -require ( - forge.lthn.ai/core/go-log v0.0.4 // indirect - github.com/kr/text v0.2.0 // indirect -) +require github.com/kr/text v0.2.0 // indirect require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -22,4 +19,4 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect ) -replace dappco.re/go/core/log => ../go-log +replace dappco.re/go/core/log => forge.lthn.ai/core/go-log v0.0.4 diff --git a/go.sum b/go.sum index e1c45c5..750aa78 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,9 @@ -dappco.re/go/core v0.4.7 h1:KmIA/2lo6rl1NMtLrKqCWfMlUqpDZYH3q0/d10dTtGA= -dappco.re/go/core v0.4.7/go.mod h1:f2/tBZ3+3IqDrg2F5F598llv0nmb/4gJVCFzM5geE4A= +dappco.re/go/core v0.6.0 h1:0wmuO/UmCWXxJkxQ6XvVLnqkAuWitbd49PhxjCsplyk= +dappco.re/go/core v0.6.0/go.mod h1:f2/tBZ3+3IqDrg2F5F598llv0nmb/4gJVCFzM5geE4A= forge.lthn.ai/core/go-inference v0.1.4 h1:fuAgWbqsEDajHniqAKyvHYbRcBrkGEiGSqR2pfTMRY0= forge.lthn.ai/core/go-inference v0.1.4/go.mod h1:jfWz+IJX55wAH98+ic6FEqqGB6/P31CHlg7VY7pxREw= +forge.lthn.ai/core/go-log v0.0.4 h1:KTuCEPgFmuM8KJfnyQ8vPOU1Jg654W74h8IJvfQMfv0= +forge.lthn.ai/core/go-log v0.0.4/go.mod h1:r14MXKOD3LF/sI8XUJQhRk/SZHBE7jAFVuCfgkXoZPw= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/grammar.go b/grammar.go index bb3037b..f4f50a3 100644 --- a/grammar.go +++ b/grammar.go @@ -2,9 +2,10 @@ package i18n import ( "maps" - "strings" "text/template" "unicode" + + "dappco.re/go/core" ) // GetGrammarData returns the grammar data for the specified language. @@ -40,7 +41,7 @@ func getVerbForm(lang, verb, form string) string { if data == nil || data.Verbs == nil { return "" } - verb = strings.ToLower(verb) + verb = core.Lower(verb) if forms, ok := data.Verbs[verb]; ok { switch form { case "past": @@ -57,7 +58,7 @@ func getWord(lang, word string) string { if data == nil || data.Words == nil { return "" } - return data.Words[strings.ToLower(word)] + return data.Words[core.Lower(word)] } func getPunct(lang, rule, defaultVal string) string { @@ -83,7 +84,7 @@ func getNounForm(lang, noun, form string) string { if data == nil || data.Nouns == nil { return "" } - noun = strings.ToLower(noun) + noun = core.Lower(noun) if forms, ok := data.Nouns[noun]; ok { switch form { case "one": @@ -111,7 +112,7 @@ func currentLangForGrammar() string { // PastTense("run") // "ran" // PastTense("copy") // "copied" func PastTense(verb string) string { - verb = strings.ToLower(strings.TrimSpace(verb)) + verb = core.Lower(core.Trim(verb)) if verb == "" { return "" } @@ -125,16 +126,16 @@ func PastTense(verb string) string { } func applyRegularPastTense(verb string) string { - if strings.HasSuffix(verb, "ed") && len(verb) > 2 { + if core.HasSuffix(verb, "ed") && len(verb) > 2 { thirdFromEnd := verb[len(verb)-3] if !isVowel(rune(thirdFromEnd)) && thirdFromEnd != 'e' { return verb } } - if strings.HasSuffix(verb, "e") { + if core.HasSuffix(verb, "e") { return verb + "d" } - if strings.HasSuffix(verb, "y") && len(verb) > 1 { + if core.HasSuffix(verb, "y") && len(verb) > 1 { prev := rune(verb[len(verb)-2]) if !isVowel(prev) { return verb[:len(verb)-1] + "ied" @@ -174,7 +175,7 @@ func shouldDoubleConsonant(verb string) bool { // Gerund("run") // "running" // Gerund("die") // "dying" func Gerund(verb string) string { - verb = strings.ToLower(strings.TrimSpace(verb)) + verb = core.Lower(core.Trim(verb)) if verb == "" { return "" } @@ -188,10 +189,10 @@ func Gerund(verb string) string { } func applyRegularGerund(verb string) string { - if strings.HasSuffix(verb, "ie") { + if core.HasSuffix(verb, "ie") { return verb[:len(verb)-2] + "ying" } - if strings.HasSuffix(verb, "e") && len(verb) > 1 { + if core.HasSuffix(verb, "e") && len(verb) > 1 { secondLast := rune(verb[len(verb)-2]) if secondLast != 'e' && secondLast != 'y' && secondLast != 'o' { return verb[:len(verb)-1] + "ing" @@ -217,20 +218,20 @@ func Pluralize(noun string, count int) string { // PluralForm returns the plural form of a noun. func PluralForm(noun string) string { - noun = strings.TrimSpace(noun) + noun = core.Trim(noun) if noun == "" { return "" } - lower := strings.ToLower(noun) + lower := core.Lower(noun) if form := getNounForm(currentLangForGrammar(), lower, "other"); form != "" { if unicode.IsUpper(rune(noun[0])) && len(form) > 0 { - return strings.ToUpper(string(form[0])) + form[1:] + return core.Upper(string(form[0])) + form[1:] } return form } if plural, ok := irregularNouns[lower]; ok { if unicode.IsUpper(rune(noun[0])) { - return strings.ToUpper(string(plural[0])) + plural[1:] + return core.Upper(string(plural[0])) + plural[1:] } return plural } @@ -238,28 +239,28 @@ func PluralForm(noun string) string { } func applyRegularPlural(noun string) string { - lower := strings.ToLower(noun) - if strings.HasSuffix(lower, "s") || - strings.HasSuffix(lower, "ss") || - strings.HasSuffix(lower, "sh") || - strings.HasSuffix(lower, "ch") || - strings.HasSuffix(lower, "x") || - strings.HasSuffix(lower, "z") { + lower := core.Lower(noun) + if core.HasSuffix(lower, "s") || + core.HasSuffix(lower, "ss") || + core.HasSuffix(lower, "sh") || + core.HasSuffix(lower, "ch") || + core.HasSuffix(lower, "x") || + core.HasSuffix(lower, "z") { return noun + "es" } - if strings.HasSuffix(lower, "y") && len(noun) > 1 { + if core.HasSuffix(lower, "y") && len(noun) > 1 { prev := rune(lower[len(lower)-2]) if !isVowel(prev) { return noun[:len(noun)-1] + "ies" } } - if strings.HasSuffix(lower, "f") { + if core.HasSuffix(lower, "f") { return noun[:len(noun)-1] + "ves" } - if strings.HasSuffix(lower, "fe") { + if core.HasSuffix(lower, "fe") { return noun[:len(noun)-2] + "ves" } - if strings.HasSuffix(lower, "o") && len(noun) > 1 { + if core.HasSuffix(lower, "o") && len(noun) > 1 { prev := rune(lower[len(lower)-2]) if !isVowel(prev) { if lower == "hero" || lower == "potato" || lower == "tomato" || lower == "echo" || lower == "veto" { @@ -280,14 +281,14 @@ func Article(word string) string { if word == "" { return "" } - lower := strings.ToLower(strings.TrimSpace(word)) + lower := core.Lower(core.Trim(word)) for key := range consonantSounds { - if strings.HasPrefix(lower, key) { + if core.HasPrefix(lower, key) { return "a" } } for key := range vowelSounds { - if strings.HasPrefix(lower, key) { + if core.HasPrefix(lower, key) { return "an" } } @@ -307,7 +308,7 @@ func isVowel(r rune) bool { // Title capitalises the first letter of each word. func Title(s string) string { - var b strings.Builder + b := core.NewBuilder() b.Grow(len(s)) prev := ' ' for _, r := range s { @@ -330,8 +331,8 @@ func Quote(s string) string { func TemplateFuncs() template.FuncMap { return template.FuncMap{ "title": Title, - "lower": strings.ToLower, - "upper": strings.ToUpper, + "lower": core.Lower, + "upper": core.Upper, "past": PastTense, "gerund": Gerund, "plural": Pluralize, diff --git a/handler.go b/handler.go index 962d3f2..236a289 100644 --- a/handler.go +++ b/handler.go @@ -1,19 +1,18 @@ package i18n import ( - "fmt" - "strings" + "dappco.re/go/core" ) // LabelHandler handles i18n.label.{word} -> "Status:" patterns. type LabelHandler struct{} func (h LabelHandler) Match(key string) bool { - return strings.HasPrefix(key, "i18n.label.") + return core.HasPrefix(key, "i18n.label.") } func (h LabelHandler) Handle(key string, args []any, next func() string) string { - word := strings.TrimPrefix(key, "i18n.label.") + word := core.TrimPrefix(key, "i18n.label.") return Label(word) } @@ -21,11 +20,11 @@ func (h LabelHandler) Handle(key string, args []any, next func() string) string type ProgressHandler struct{} func (h ProgressHandler) Match(key string) bool { - return strings.HasPrefix(key, "i18n.progress.") + return core.HasPrefix(key, "i18n.progress.") } func (h ProgressHandler) Handle(key string, args []any, next func() string) string { - verb := strings.TrimPrefix(key, "i18n.progress.") + verb := core.TrimPrefix(key, "i18n.progress.") if len(args) > 0 { if subj, ok := args[0].(string); ok { return ProgressSubject(verb, subj) @@ -38,14 +37,14 @@ func (h ProgressHandler) Handle(key string, args []any, next func() string) stri type CountHandler struct{} func (h CountHandler) Match(key string) bool { - return strings.HasPrefix(key, "i18n.count.") + return core.HasPrefix(key, "i18n.count.") } func (h CountHandler) Handle(key string, args []any, next func() string) string { - noun := strings.TrimPrefix(key, "i18n.count.") + noun := core.TrimPrefix(key, "i18n.count.") if len(args) > 0 { count := toInt(args[0]) - return fmt.Sprintf("%d %s", count, Pluralize(noun, count)) + return core.Sprintf("%d %s", count, Pluralize(noun, count)) } return noun } @@ -54,11 +53,11 @@ func (h CountHandler) Handle(key string, args []any, next func() string) string type DoneHandler struct{} func (h DoneHandler) Match(key string) bool { - return strings.HasPrefix(key, "i18n.done.") + return core.HasPrefix(key, "i18n.done.") } func (h DoneHandler) Handle(key string, args []any, next func() string) string { - verb := strings.TrimPrefix(key, "i18n.done.") + verb := core.TrimPrefix(key, "i18n.done.") if len(args) > 0 { if subj, ok := args[0].(string); ok { return ActionResult(verb, subj) @@ -71,11 +70,11 @@ func (h DoneHandler) Handle(key string, args []any, next func() string) string { type FailHandler struct{} func (h FailHandler) Match(key string) bool { - return strings.HasPrefix(key, "i18n.fail.") + return core.HasPrefix(key, "i18n.fail.") } func (h FailHandler) Handle(key string, args []any, next func() string) string { - verb := strings.TrimPrefix(key, "i18n.fail.") + verb := core.TrimPrefix(key, "i18n.fail.") if len(args) > 0 { if subj, ok := args[0].(string); ok { return ActionFailed(verb, subj) @@ -88,14 +87,14 @@ func (h FailHandler) Handle(key string, args []any, next func() string) string { type NumericHandler struct{} func (h NumericHandler) Match(key string) bool { - return strings.HasPrefix(key, "i18n.numeric.") + return core.HasPrefix(key, "i18n.numeric.") } func (h NumericHandler) Handle(key string, args []any, next func() string) string { if len(args) == 0 { return next() } - format := strings.TrimPrefix(key, "i18n.numeric.") + format := core.TrimPrefix(key, "i18n.numeric.") switch format { case "number", "int": return FormatNumber(toInt64(args[0])) diff --git a/i18n.go b/i18n.go index e0d3188..6186c83 100644 --- a/i18n.go +++ b/i18n.go @@ -2,9 +2,9 @@ package i18n import ( "bytes" - "errors" - "strings" "text/template" + + "dappco.re/go/core" ) // T translates a message using the default service. @@ -24,10 +24,7 @@ func Raw(messageID string, args ...any) string { } // ErrServiceNotInitialised is returned when the service is not initialised. -var ErrServiceNotInitialised = errors.New("i18n: service not initialised") - -// ErrServiceNotInitialized is deprecated: use ErrServiceNotInitialised. -var ErrServiceNotInitialized = ErrServiceNotInitialised +var ErrServiceNotInitialised = core.NewError("i18n: service not initialised") // SetLanguage sets the language for the default service. func SetLanguage(lang string) error { @@ -109,7 +106,7 @@ func executeIntentTemplate(tmplStr string, data templateData) string { } func applyTemplate(text string, data any) string { - if !strings.Contains(text, "{{") { + if !core.Contains(text, "{{") { return text } if cached, ok := templateCache.Load(text); ok { diff --git a/i18n_test.go b/i18n_test.go index 2f0d4c5..9428acd 100644 --- a/i18n_test.go +++ b/i18n_test.go @@ -81,7 +81,8 @@ func TestSetLanguage_Bad_Unsupported(t *testing.T) { _ = Init() SetDefault(svc) - _ = SetLanguage("xx") + err = SetLanguage("xx") + assert.Error(t, err) } func TestCurrentLanguage_Good(t *testing.T) { @@ -257,7 +258,3 @@ func TestApplyTemplate_Bad_ExecuteError(t *testing.T) { func TestErrServiceNotInitialised_Good(t *testing.T) { assert.Equal(t, "i18n: service not initialised", ErrServiceNotInitialised.Error()) } - -func TestErrServiceNotInitialized_DeprecatedAlias(t *testing.T) { - assert.Equal(t, ErrServiceNotInitialised, ErrServiceNotInitialized, "deprecated alias must point to the same error") -} diff --git a/loader.go b/loader.go index 3ca917c..dfde74f 100644 --- a/loader.go +++ b/loader.go @@ -4,9 +4,9 @@ import ( "encoding/json" "io/fs" "path" - "strings" "sync" + "dappco.re/go/core" log "dappco.re/go/core/log" ) @@ -29,8 +29,8 @@ func NewFSLoader(fsys fs.FS, dir string) *FSLoader { func (l *FSLoader) Load(lang string) (map[string]Message, *GrammarData, error) { variants := []string{ lang + ".json", - strings.ReplaceAll(lang, "-", "_") + ".json", - strings.ReplaceAll(lang, "_", "-") + ".json", + core.Replace(lang, "-", "_") + ".json", + core.Replace(lang, "_", "-") + ".json", } var data []byte @@ -72,11 +72,11 @@ func (l *FSLoader) Languages() []string { return } for _, entry := range entries { - if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") { + if entry.IsDir() || !core.HasSuffix(entry.Name(), ".json") { continue } - lang := strings.TrimSuffix(entry.Name(), ".json") - lang = strings.ReplaceAll(lang, "_", "-") + lang := core.TrimSuffix(entry.Name(), ".json") + lang = core.Replace(lang, "_", "-") l.languages = append(l.languages, lang) } }) @@ -106,9 +106,9 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa switch v := value.(type) { case string: - if grammar != nil && strings.HasPrefix(fullKey, "gram.word.") { - wordKey := strings.TrimPrefix(fullKey, "gram.word.") - grammar.Words[strings.ToLower(wordKey)] = v + if grammar != nil && core.HasPrefix(fullKey, "gram.word.") { + wordKey := core.TrimPrefix(fullKey, "gram.word.") + grammar.Words[core.Lower(wordKey)] = v continue } out[fullKey] = Message{Text: v} @@ -117,7 +117,7 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa // Verb form object (has base/past/gerund keys) if grammar != nil && isVerbFormObject(v) { verbName := key - if after, ok := strings.CutPrefix(fullKey, "gram.verb."); ok { + if after, ok := cutPrefix(fullKey, "gram.verb."); ok { verbName = after } forms := VerbForms{} @@ -127,14 +127,14 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa if gerund, ok := v["gerund"].(string); ok { forms.Gerund = gerund } - grammar.Verbs[strings.ToLower(verbName)] = forms + grammar.Verbs[core.Lower(verbName)] = forms continue } // Noun form object (under gram.noun.* or has gender field) - if grammar != nil && (strings.HasPrefix(fullKey, "gram.noun.") || isNounFormObject(v)) { + if grammar != nil && (core.HasPrefix(fullKey, "gram.noun.") || isNounFormObject(v)) { nounName := key - if after, ok := strings.CutPrefix(fullKey, "gram.noun."); ok { + if after, ok := cutPrefix(fullKey, "gram.noun."); ok { nounName = after } _, hasOne := v["one"] @@ -150,7 +150,7 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa if gender, ok := v["gender"].(string); ok { forms.Gender = gender } - grammar.Nouns[strings.ToLower(nounName)] = forms + grammar.Nouns[core.Lower(nounName)] = forms continue } } @@ -161,7 +161,7 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa if arr, ok := nd.([]any); ok { for _, item := range arr { if s, ok := item.(string); ok { - grammar.Signals.NounDeterminers = append(grammar.Signals.NounDeterminers, strings.ToLower(s)) + grammar.Signals.NounDeterminers = append(grammar.Signals.NounDeterminers, core.Lower(s)) } } } @@ -170,7 +170,7 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa if arr, ok := va.([]any); ok { for _, item := range arr { if s, ok := item.(string); ok { - grammar.Signals.VerbAuxiliaries = append(grammar.Signals.VerbAuxiliaries, strings.ToLower(s)) + grammar.Signals.VerbAuxiliaries = append(grammar.Signals.VerbAuxiliaries, core.Lower(s)) } } } @@ -179,7 +179,7 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa if arr, ok := vi.([]any); ok { for _, item := range arr { if s, ok := item.(string); ok { - grammar.Signals.VerbInfinitive = append(grammar.Signals.VerbInfinitive, strings.ToLower(s)) + grammar.Signals.VerbInfinitive = append(grammar.Signals.VerbInfinitive, core.Lower(s)) } } } @@ -251,6 +251,13 @@ func flattenWithGrammar(prefix string, data map[string]any, out map[string]Messa } } +func cutPrefix(s, prefix string) (string, bool) { + if !core.HasPrefix(s, prefix) { + return "", false + } + return core.TrimPrefix(s, prefix), true +} + func isVerbFormObject(m map[string]any) bool { _, hasBase := m["base"] _, hasPast := m["past"] diff --git a/localise.go b/localise.go index b59da6a..17599d1 100644 --- a/localise.go +++ b/localise.go @@ -1,9 +1,7 @@ package i18n import ( - "os" - "strings" - + "dappco.re/go/core" "golang.org/x/text/language" ) @@ -85,18 +83,18 @@ func Direction() TextDirection { func IsRTL() bool { return Direction() == DirRTL } func detectLanguage(supported []language.Tag) string { - langEnv := os.Getenv("LANG") + langEnv := core.Env("LANG") if langEnv == "" { - langEnv = os.Getenv("LC_ALL") + langEnv = core.Env("LC_ALL") if langEnv == "" { - langEnv = os.Getenv("LC_MESSAGES") + langEnv = core.Env("LC_MESSAGES") } } if langEnv == "" { return "" } - baseLang := strings.Split(langEnv, ".")[0] - baseLang = strings.ReplaceAll(baseLang, "_", "-") + baseLang := core.Split(langEnv, ".")[0] + baseLang = core.Replace(baseLang, "_", "-") parsedLang, err := language.Parse(baseLang) if err != nil { return "" diff --git a/localise_test.go b/localise_test.go index 9f6157b..96308b1 100644 --- a/localise_test.go +++ b/localise_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/text/language" ) // --- Formality.String() --- @@ -95,7 +96,7 @@ func TestIsRTLLanguage_Good(t *testing.T) { {"german", "de", false}, {"french", "fr", false}, {"unknown", "xx", false}, - {"arabic_variant", "ar-EG-extra", true}, // len > 2 prefix check + {"arabic_variant", "ar-EG-extra", true}, // len > 2 prefix check {"english_variant", "en-US-extra", false}, // len > 2, not RTL } for _, tt := range tests { @@ -149,6 +150,36 @@ func TestDetectLanguage_Good(t *testing.T) { assert.Equal(t, "", result, "should return empty with no supported languages") } +func TestDetectLanguage_Good_UsesLANG(t *testing.T) { + t.Setenv("LANG", "fr_FR.UTF-8") + t.Setenv("LC_ALL", "") + t.Setenv("LC_MESSAGES", "") + + result := detectLanguage([]language.Tag{language.English, language.French}) + require.NotEmpty(t, result) + assert.Equal(t, "fr", result[:2]) +} + +func TestDetectLanguage_Good_FallsBackToLCALL(t *testing.T) { + t.Setenv("LANG", "") + t.Setenv("LC_ALL", "fr_CA.UTF-8") + t.Setenv("LC_MESSAGES", "") + + result := detectLanguage([]language.Tag{language.English, language.French}) + require.NotEmpty(t, result) + assert.Equal(t, "fr", result[:2]) +} + +func TestDetectLanguage_Good_FallsBackToLCMessages(t *testing.T) { + t.Setenv("LANG", "") + t.Setenv("LC_ALL", "") + t.Setenv("LC_MESSAGES", "fr_BE.UTF-8") + + result := detectLanguage([]language.Tag{language.English, language.French}) + require.NotEmpty(t, result) + assert.Equal(t, "fr", result[:2]) +} + // --- Mode.String() --- func TestMode_String_Good(t *testing.T) { diff --git a/reversal/tokeniser.go b/reversal/tokeniser.go index 46e46b4..d1d61ed 100644 --- a/reversal/tokeniser.go +++ b/reversal/tokeniser.go @@ -16,8 +16,9 @@ package reversal import ( - "strings" + "unicode" + "dappco.re/go/core" i18n "dappco.re/go/core/i18n" ) @@ -49,17 +50,17 @@ const ( // Token represents a single classified token from a text string. type Token struct { - Raw string // Original text as it appeared in input - Lower string // Lowercased form - Type TokenType // Classification - Confidence float64 // 0.0-1.0 classification confidence - AltType TokenType // Runner-up classification (dual-class only) - AltConf float64 // Runner-up confidence - VerbInfo VerbMatch // Set when Type OR AltType == TokenVerb - NounInfo NounMatch // Set when Type OR AltType == TokenNoun - WordCat string // Set when Type == TokenWord - ArtType string // Set when Type == TokenArticle - PunctType string // Set when Type == TokenPunctuation + Raw string // Original text as it appeared in input + Lower string // Lowercased form + Type TokenType // Classification + Confidence float64 // 0.0-1.0 classification confidence + AltType TokenType // Runner-up classification (dual-class only) + AltConf float64 // Runner-up confidence + VerbInfo VerbMatch // Set when Type OR AltType == TokenVerb + NounInfo NounMatch // Set when Type OR AltType == TokenNoun + WordCat string // Set when Type == TokenWord + ArtType string // Set when Type == TokenArticle + PunctType string // Set when Type == TokenPunctuation Signals *SignalBreakdown // Non-nil only when WithSignals() option is set } @@ -209,7 +210,7 @@ func (t *Tokeniser) buildNounIndex() { // Tier 3: Try reverse morphology rules and round-trip verify via // the forward function PluralForm(). func (t *Tokeniser) MatchNoun(word string) (NounMatch, bool) { - word = strings.ToLower(strings.TrimSpace(word)) + word = core.Lower(core.Trim(word)) if word == "" { return NounMatch{}, false } @@ -250,27 +251,27 @@ func (t *Tokeniser) reverseRegularPlural(word string) []string { var candidates []string // Rule: consonant + "ies" → consonant + "y" (e.g., "entries" → "entry") - if strings.HasSuffix(word, "ies") && len(word) > 3 { + if core.HasSuffix(word, "ies") && len(word) > 3 { base := word[:len(word)-3] + "y" candidates = append(candidates, base) } // Rule: "ves" → "f" or "fe" (e.g., "wolves" → "wolf", "knives" → "knife") - if strings.HasSuffix(word, "ves") && len(word) > 3 { + if core.HasSuffix(word, "ves") && len(word) > 3 { candidates = append(candidates, word[:len(word)-3]+"f") candidates = append(candidates, word[:len(word)-3]+"fe") } // Rule: sibilant + "es" (e.g., "processes" → "process", "branches" → "branch") - if strings.HasSuffix(word, "ses") || strings.HasSuffix(word, "xes") || - strings.HasSuffix(word, "zes") || strings.HasSuffix(word, "ches") || - strings.HasSuffix(word, "shes") { + if core.HasSuffix(word, "ses") || core.HasSuffix(word, "xes") || + core.HasSuffix(word, "zes") || core.HasSuffix(word, "ches") || + core.HasSuffix(word, "shes") { base := word[:len(word)-2] // strip "es" candidates = append(candidates, base) } // Rule: drop "s" (e.g., "servers" → "server") - if strings.HasSuffix(word, "s") && len(word) > 1 { + if core.HasSuffix(word, "s") && len(word) > 1 { base := word[:len(word)-1] candidates = append(candidates, base) } @@ -285,7 +286,7 @@ func (t *Tokeniser) reverseRegularPlural(word string) []string { // Tier 3: Try reverse morphology rules and round-trip verify via // the forward functions PastTense() and Gerund(). func (t *Tokeniser) MatchVerb(word string) (VerbMatch, bool) { - word = strings.ToLower(strings.TrimSpace(word)) + word = core.Lower(core.Trim(word)) if word == "" { return VerbMatch{}, false } @@ -358,7 +359,7 @@ func (t *Tokeniser) bestRoundTrip(target string, candidates []string, forward fu // Priority 3: prefer candidate not ending in "e" (avoids phantom verbs // with CCe endings like "walke", "processe") for _, m := range matches { - if !strings.HasSuffix(m, "e") { + if !core.HasSuffix(m, "e") { return m } } @@ -402,12 +403,12 @@ func isVowelByte(b byte) bool { func (t *Tokeniser) reverseRegularPast(word string) []string { var candidates []string - if !strings.HasSuffix(word, "ed") { + if !core.HasSuffix(word, "ed") { return candidates } // Rule: consonant + "ied" → consonant + "y" (e.g., "copied" → "copy") - if strings.HasSuffix(word, "ied") && len(word) > 3 { + if core.HasSuffix(word, "ied") && len(word) > 3 { base := word[:len(word)-3] + "y" candidates = append(candidates, base) } @@ -448,14 +449,14 @@ func (t *Tokeniser) reverseRegularPast(word string) []string { func (t *Tokeniser) reverseRegularGerund(word string) []string { var candidates []string - if !strings.HasSuffix(word, "ing") || len(word) < 4 { + if !core.HasSuffix(word, "ing") || len(word) < 4 { return candidates } stem := word[:len(word)-3] // strip "ing" // Rule: "ying" → "ie" (e.g., "dying" → "die") - if strings.HasSuffix(word, "ying") && len(word) > 4 { + if core.HasSuffix(word, "ying") && len(word) > 4 { base := word[:len(word)-4] + "ie" candidates = append(candidates, base) } @@ -488,15 +489,15 @@ func (t *Tokeniser) buildWordIndex() { } for key, display := range data.Words { // Map the key itself (already lowercase) - t.words[strings.ToLower(key)] = key + t.words[core.Lower(key)] = key // Map the display form (e.g., "URL" → "url", "SSH" → "ssh") - t.words[strings.ToLower(display)] = key + t.words[core.Lower(display)] = key } } // IsDualClass returns true if the word exists in both verb and noun tables. func (t *Tokeniser) IsDualClass(word string) bool { - return t.dualClass[strings.ToLower(word)] + return t.dualClass[core.Lower(word)] } func (t *Tokeniser) buildDualClassIndex() { @@ -519,7 +520,7 @@ func (t *Tokeniser) buildSignalIndex() { // falls back per-field rather than silently disabling signals. if data != nil && len(data.Signals.NounDeterminers) > 0 { for _, w := range data.Signals.NounDeterminers { - t.nounDet[strings.ToLower(w)] = true + t.nounDet[core.Lower(w)] = true } } else { for _, w := range []string{ @@ -534,7 +535,7 @@ func (t *Tokeniser) buildSignalIndex() { if data != nil && len(data.Signals.VerbAuxiliaries) > 0 { for _, w := range data.Signals.VerbAuxiliaries { - t.verbAux[strings.ToLower(w)] = true + t.verbAux[core.Lower(w)] = true } } else { for _, w := range []string{ @@ -548,7 +549,7 @@ func (t *Tokeniser) buildSignalIndex() { if data != nil && len(data.Signals.VerbInfinitive) > 0 { for _, w := range data.Signals.VerbInfinitive { - t.verbInf[strings.ToLower(w)] = true + t.verbInf[core.Lower(w)] = true } } else { t.verbInf["to"] = true @@ -570,7 +571,7 @@ func defaultWeights() map[string]float64 { // MatchWord performs a case-insensitive lookup in the words map. // Returns the category key and true if found, or ("", false) otherwise. func (t *Tokeniser) MatchWord(word string) (string, bool) { - cat, ok := t.words[strings.ToLower(word)] + cat, ok := t.words[core.Lower(word)] return cat, ok } @@ -583,13 +584,13 @@ func (t *Tokeniser) MatchArticle(word string) (string, bool) { return "", false } - lower := strings.ToLower(word) + lower := core.Lower(word) - if lower == strings.ToLower(data.Articles.IndefiniteDefault) || - lower == strings.ToLower(data.Articles.IndefiniteVowel) { + if lower == core.Lower(data.Articles.IndefiniteDefault) || + lower == core.Lower(data.Articles.IndefiniteVowel) { return "indefinite", true } - if lower == strings.ToLower(data.Articles.Definite) { + if lower == core.Lower(data.Articles.Definite) { return "definite", true } @@ -613,12 +614,12 @@ var clauseBoundaries = map[string]bool{ // Pass 1 classifies unambiguous tokens and marks dual-class base forms. // Pass 2 resolves ambiguous tokens using weighted disambiguation signals. func (t *Tokeniser) Tokenise(text string) []Token { - text = strings.TrimSpace(text) + text = core.Trim(text) if text == "" { return nil } - parts := strings.Fields(text) + parts := splitFields(text) var tokens []Token // --- Pass 1: Classify & Mark --- @@ -628,7 +629,7 @@ func (t *Tokeniser) Tokenise(text string) []Token { // Classify the word portion (if any). if word != "" { - tok := Token{Raw: raw, Lower: strings.ToLower(word)} + tok := Token{Raw: raw, Lower: core.Lower(word)} if artType, ok := t.MatchArticle(word); ok { // Articles are unambiguous. @@ -938,7 +939,7 @@ func (t *Tokeniser) resolveToken(tok *Token, verbScore, nounScore float64, compo // recognised: "..." (progress), "?" (question), ":" (label). func splitTrailingPunct(s string) (string, string) { // Check for "..." suffix first (3-char pattern). - if strings.HasSuffix(s, "...") { + if core.HasSuffix(s, "...") { return s[:len(s)-3], "..." } // Check single-char trailing punctuation. @@ -951,6 +952,27 @@ func splitTrailingPunct(s string) (string, string) { return s, "" } +func splitFields(s string) []string { + fields := make([]string, 0, 8) + start := -1 + for i, r := range s { + if unicode.IsSpace(r) { + if start >= 0 { + fields = append(fields, s[start:i]) + start = -1 + } + continue + } + if start < 0 { + start = i + } + } + if start >= 0 { + fields = append(fields, s[start:]) + } + return fields +} + // matchPunctuation detects known punctuation patterns. // Returns the punctuation type and true if recognised. func matchPunctuation(punct string) (string, bool) { diff --git a/service.go b/service.go index b40c257..ae685fa 100644 --- a/service.go +++ b/service.go @@ -3,15 +3,14 @@ package i18n import ( "embed" "encoding/json" - "fmt" "io/fs" "maps" "path" "slices" - "strings" "sync" "sync/atomic" + "dappco.re/go/core" log "dappco.re/go/core/log" "golang.org/x/text/language" ) @@ -168,13 +167,17 @@ func SetDefault(s *Service) { // // //go:embed *.json // var localeFS embed.FS -// func init() { i18n.AddLoader(i18n.NewFSLoader(localeFS, ".")) } -func AddLoader(loader Loader) { +// func init() { +// if err := i18n.AddLoader(i18n.NewFSLoader(localeFS, ".")); err != nil { +// panic(err) +// } +// } +func AddLoader(loader Loader) error { svc := Default() if svc == nil { - return + return ErrServiceNotInitialised } - _ = svc.AddLoader(loader) + return svc.AddLoader(loader) } func (s *Service) loadJSON(lang string, data []byte) error { @@ -318,8 +321,8 @@ func (s *Service) resolveWithFallback(messageID string, data any) string { if text := s.tryResolve(s.fallbackLang, messageID, data); text != "" { return text } - if strings.Contains(messageID, ".") { - parts := strings.Split(messageID, ".") + if core.Contains(messageID, ".") { + parts := core.Split(messageID, ".") verb := parts[len(parts)-1] commonKey := "common.action." + verb if text := s.tryResolve(s.currentLang, commonKey, data); text != "" { @@ -388,7 +391,7 @@ func (s *Service) getEffectiveFormality(data any) Formality { return f } case string: - switch strings.ToLower(f) { + switch core.Lower(f) { case "formal": return FormalityFormal case "informal": @@ -402,7 +405,7 @@ func (s *Service) getEffectiveFormality(data any) Formality { func (s *Service) handleMissingKey(key string, args []any) string { switch s.mode { case ModeStrict: - panic(fmt.Sprintf("i18n: missing translation key %q", key)) + panic(core.Sprintf("i18n: missing translation key %q", key)) case ModeCollect: var argsMap map[string]any if len(args) > 0 { @@ -499,7 +502,7 @@ func (s *Service) LoadFS(fsys fs.FS, dir string) error { return log.E("Service.LoadFS", "read locales directory", err) } for _, entry := range entries { - if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") { + if entry.IsDir() || !core.HasSuffix(entry.Name(), ".json") { continue } filePath := path.Join(dir, entry.Name()) @@ -507,8 +510,8 @@ func (s *Service) LoadFS(fsys fs.FS, dir string) error { if err != nil { return log.E("Service.LoadFS", "read locale: "+entry.Name(), err) } - lang := strings.TrimSuffix(entry.Name(), ".json") - lang = strings.ReplaceAll(lang, "_", "-") + lang := core.TrimSuffix(entry.Name(), ".json") + lang = core.Replace(lang, "_", "-") if err := s.loadJSON(lang, data); err != nil { return log.E("Service.LoadFS", "parse locale: "+entry.Name(), err) } diff --git a/service_test.go b/service_test.go index 76076c0..47605db 100644 --- a/service_test.go +++ b/service_test.go @@ -340,7 +340,9 @@ func TestPackageLevelAddLoader(t *testing.T) { Data: []byte(`{"pkg.hello": "from package"}`), }, } - AddLoader(NewFSLoader(extra, ".")) + if err := AddLoader(NewFSLoader(extra, ".")); err != nil { + t.Fatalf("AddLoader() failed: %v", err) + } got := T("pkg.hello") if got != "from package" { @@ -348,6 +350,23 @@ func TestPackageLevelAddLoader(t *testing.T) { } } +func TestPackageLevelAddLoader_Bad(t *testing.T) { + svc, err := New() + if err != nil { + t.Fatalf("New() failed: %v", err) + } + SetDefault(svc) + + broken := fstest.MapFS{ + "en.json": &fstest.MapFile{ + Data: []byte(`{invalid json}`), + }, + } + if err := AddLoader(NewFSLoader(broken, ".")); err == nil { + t.Error("AddLoader() should fail with invalid JSON") + } +} + func TestServiceLoadFS_Good(t *testing.T) { svc, err := New() if err != nil { -- 2.45.3