fix: resolve IO migration test failures in node, cache, and cli

- pkg/io/node: implement ReadFile (fs.ReadFileFS), Walk with WalkOptions,
  CopyFile, FromTar constructor; fix Exists test calls to match bool return
- pkg/cache: add Medium DI parameter, use errors.Is for wrapped ErrNotExist
- pkg/cli: add Medium DI to PIDFile and DaemonOptions for testability
- TODO.md: mark go-i18n article/irregular validator complete

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Snider 2026-02-20 10:58:57 +00:00
parent 94480ca38e
commit b2e78bf29e
5 changed files with 144 additions and 47 deletions

View file

@ -21,7 +21,7 @@ Format: `- [ ] REPO: task description` / `- [x]` when done.
- [ ] **go-i18n: Classification benchmark suite**`classify_bench_test.go` with 200+ domain-tagged sentences. Categories: {technical, creative, ethical, casual}. Ground truth for calibrating 1B pre-tags.
- [ ] **go-i18n: 1B pre-sort pipeline tool** — CLI/func that reads JSONL corpus, classifies via LEK-Gemma3-1B, writes back with `domain_1b` field. Target: ~5K sentences/sec on M3.
- [ ] **go-i18n: 1B vs 27B calibration check** — Sample 500 sentences, classify with both, measure agreement. 75% baseline from benchmarks, technical↔creative is known weak spot.
- [ ] **go-i18n: Article/irregular validator** — Lightweight funcs using 1B's strong article (100%) and irregular base form (100%) accuracy as fast validators.
- [x] **go-i18n: Article/irregular validator**`validate.go` + `validate_test.go` (14 tests). `ValidateArticle()`, `ValidateIrregular()`, batch variants. Commit `3c55d91`.
#### 2b: Reference Distributions

30
pkg/cache/cache.go vendored
View file

@ -3,6 +3,7 @@ package cache
import (
"encoding/json"
"errors"
"os"
"path/filepath"
"time"
@ -15,6 +16,7 @@ const DefaultTTL = 1 * time.Hour
// Cache represents a file-based cache.
type Cache struct {
medium io.Medium
baseDir string
ttl time.Duration
}
@ -27,8 +29,13 @@ type Entry struct {
}
// New creates a new cache instance.
// If baseDir is empty, uses .core/cache in current directory
func New(baseDir string, ttl time.Duration) (*Cache, error) {
// If medium is nil, uses io.Local (filesystem).
// If baseDir is empty, uses .core/cache in current directory.
func New(medium io.Medium, baseDir string, ttl time.Duration) (*Cache, error) {
if medium == nil {
medium = io.Local
}
if baseDir == "" {
// Use .core/cache in current working directory
cwd, err := os.Getwd()
@ -43,11 +50,12 @@ func New(baseDir string, ttl time.Duration) (*Cache, error) {
}
// Ensure cache directory exists
if err := io.Local.EnsureDir(baseDir); err != nil {
if err := medium.EnsureDir(baseDir); err != nil {
return nil, err
}
return &Cache{
medium: medium,
baseDir: baseDir,
ttl: ttl,
}, nil
@ -62,9 +70,9 @@ func (c *Cache) Path(key string) string {
func (c *Cache) Get(key string, dest interface{}) (bool, error) {
path := c.Path(key)
dataStr, err := io.Local.Read(path)
dataStr, err := c.medium.Read(path)
if err != nil {
if os.IsNotExist(err) {
if errors.Is(err, os.ErrNotExist) {
return false, nil
}
return false, err
@ -94,7 +102,7 @@ func (c *Cache) Set(key string, data interface{}) error {
path := c.Path(key)
// Ensure parent directory exists
if err := io.Local.EnsureDir(filepath.Dir(path)); err != nil {
if err := c.medium.EnsureDir(filepath.Dir(path)); err != nil {
return err
}
@ -115,14 +123,14 @@ func (c *Cache) Set(key string, data interface{}) error {
return err
}
return io.Local.Write(path, string(entryBytes))
return c.medium.Write(path, string(entryBytes))
}
// Delete removes an item from the cache.
func (c *Cache) Delete(key string) error {
path := c.Path(key)
err := io.Local.Delete(path)
if os.IsNotExist(err) {
err := c.medium.Delete(path)
if errors.Is(err, os.ErrNotExist) {
return nil
}
return err
@ -130,14 +138,14 @@ func (c *Cache) Delete(key string) error {
// Clear removes all cached items.
func (c *Cache) Clear() error {
return io.Local.DeleteAll(c.baseDir)
return c.medium.DeleteAll(c.baseDir)
}
// Age returns how old a cached item is, or -1 if not cached.
func (c *Cache) Age(key string) time.Duration {
path := c.Path(key)
dataStr, err := io.Local.Read(path)
dataStr, err := c.medium.Read(path)
if err != nil {
return -1
}

View file

@ -74,13 +74,18 @@ func IsStderrTTY() bool {
// PIDFile manages a process ID file for single-instance enforcement.
type PIDFile struct {
path string
mu sync.Mutex
medium io.Medium
path string
mu sync.Mutex
}
// NewPIDFile creates a PID file manager.
func NewPIDFile(path string) *PIDFile {
return &PIDFile{path: path}
// If medium is nil, uses io.Local (filesystem).
func NewPIDFile(medium io.Medium, path string) *PIDFile {
if medium == nil {
medium = io.Local
}
return &PIDFile{medium: medium, path: path}
}
// Acquire writes the current PID to the file.
@ -90,7 +95,7 @@ func (p *PIDFile) Acquire() error {
defer p.mu.Unlock()
// Check if PID file exists
if data, err := io.Local.Read(p.path); err == nil {
if data, err := p.medium.Read(p.path); err == nil {
pid, err := strconv.Atoi(data)
if err == nil && pid > 0 {
// Check if process is still running
@ -101,19 +106,19 @@ func (p *PIDFile) Acquire() error {
}
}
// Stale PID file, remove it
_ = io.Local.Delete(p.path)
_ = p.medium.Delete(p.path)
}
// Ensure directory exists
if dir := filepath.Dir(p.path); dir != "." {
if err := io.Local.EnsureDir(dir); err != nil {
if err := p.medium.EnsureDir(dir); err != nil {
return fmt.Errorf("failed to create PID directory: %w", err)
}
}
// Write current PID
pid := os.Getpid()
if err := io.Local.Write(p.path, strconv.Itoa(pid)); err != nil {
if err := p.medium.Write(p.path, strconv.Itoa(pid)); err != nil {
return fmt.Errorf("failed to write PID file: %w", err)
}
@ -124,7 +129,7 @@ func (p *PIDFile) Acquire() error {
func (p *PIDFile) Release() error {
p.mu.Lock()
defer p.mu.Unlock()
return io.Local.Delete(p.path)
return p.medium.Delete(p.path)
}
// Path returns the PID file path.
@ -246,6 +251,10 @@ func (h *HealthServer) Addr() string {
// DaemonOptions configures daemon mode execution.
type DaemonOptions struct {
// Medium is the filesystem for PID file operations.
// If nil, uses io.Local (filesystem).
Medium io.Medium
// PIDFile path for single-instance enforcement.
// Leave empty to skip PID file management.
PIDFile string
@ -289,7 +298,7 @@ func NewDaemon(opts DaemonOptions) *Daemon {
}
if opts.PIDFile != "" {
d.pid = NewPIDFile(opts.PIDFile)
d.pid = NewPIDFile(opts.Medium, opts.PIDFile)
}
if opts.HealthAddr != "" {

View file

@ -24,8 +24,9 @@ type Node struct {
files map[string]*dataFile
}
// compile-time interface check
// compile-time interface checks
var _ coreio.Medium = (*Node)(nil)
var _ fs.ReadFileFS = (*Node)(nil)
// New creates a new, empty Node.
func New() *Node {
@ -78,8 +79,17 @@ func (n *Node) ToTar() ([]byte, error) {
return buf.Bytes(), nil
}
// FromTar replaces the in-memory tree with the contents of a tar archive.
func (n *Node) FromTar(data []byte) error {
// FromTar creates a new Node from a tar archive.
func FromTar(data []byte) (*Node, error) {
n := New()
if err := n.LoadTar(data); err != nil {
return nil, err
}
return n, nil
}
// LoadTar replaces the in-memory tree with the contents of a tar archive.
func (n *Node) LoadTar(data []byte) error {
newFiles := make(map[string]*dataFile)
tr := tar.NewReader(bytes.NewReader(data))
@ -118,6 +128,91 @@ func (n *Node) WalkNode(root string, fn fs.WalkDirFunc) error {
return fs.WalkDir(n, root, fn)
}
// WalkOptions configures the behaviour of Walk.
type WalkOptions struct {
// MaxDepth limits how many directory levels to descend. 0 means unlimited.
MaxDepth int
// Filter, if set, is called for each entry. Return true to include the
// entry (and descend into it if it is a directory).
Filter func(path string, d fs.DirEntry) bool
// SkipErrors suppresses errors (e.g. nonexistent root) instead of
// propagating them through the callback.
SkipErrors bool
}
// Walk walks the in-memory tree with optional WalkOptions.
func (n *Node) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error {
var opt WalkOptions
if len(opts) > 0 {
opt = opts[0]
}
if opt.SkipErrors {
// If root doesn't exist, silently return nil.
if _, err := n.Stat(root); err != nil {
return nil
}
}
return fs.WalkDir(n, root, func(p string, d fs.DirEntry, err error) error {
if opt.Filter != nil && err == nil {
if !opt.Filter(p, d) {
if d != nil && d.IsDir() {
return fs.SkipDir
}
return nil
}
}
// Call the user's function first so the entry is visited.
result := fn(p, d, err)
// After visiting a directory at MaxDepth, prevent descending further.
if result == nil && opt.MaxDepth > 0 && d != nil && d.IsDir() && p != root {
rel := strings.TrimPrefix(p, root)
rel = strings.TrimPrefix(rel, "/")
depth := strings.Count(rel, "/") + 1
if depth >= opt.MaxDepth {
return fs.SkipDir
}
}
return result
})
}
// ReadFile returns the content of the named file as a byte slice.
// Implements fs.ReadFileFS.
func (n *Node) ReadFile(name string) ([]byte, error) {
name = strings.TrimPrefix(name, "/")
f, ok := n.files[name]
if !ok {
return nil, &fs.PathError{Op: "read", Path: name, Err: fs.ErrNotExist}
}
// Return a copy to prevent callers from mutating internal state.
result := make([]byte, len(f.content))
copy(result, f.content)
return result, nil
}
// CopyFile copies a file from the in-memory tree to the local filesystem.
func (n *Node) CopyFile(src, dst string, perm fs.FileMode) error {
src = strings.TrimPrefix(src, "/")
f, ok := n.files[src]
if !ok {
// Check if it's a directory — can't copy directories this way.
info, err := n.Stat(src)
if err != nil {
return &fs.PathError{Op: "copyfile", Path: src, Err: fs.ErrNotExist}
}
if info.IsDir() {
return &fs.PathError{Op: "copyfile", Path: src, Err: fs.ErrInvalid}
}
return &fs.PathError{Op: "copyfile", Path: src, Err: fs.ErrNotExist}
}
return os.WriteFile(dst, f.content, perm)
}
// CopyTo copies a file (or directory tree) from the node to any Medium.
func (n *Node) CopyTo(target coreio.Medium, sourcePath, destPath string) error {
sourcePath = strings.TrimPrefix(sourcePath, "/")

View file

@ -243,33 +243,21 @@ func TestExists_Good(t *testing.T) {
n.AddData("foo.txt", []byte("foo"))
n.AddData("bar/baz.txt", []byte("baz"))
exists, err := n.Exists("foo.txt")
require.NoError(t, err)
assert.True(t, exists)
exists, err = n.Exists("bar")
require.NoError(t, err)
assert.True(t, exists)
assert.True(t, n.Exists("foo.txt"))
assert.True(t, n.Exists("bar"))
}
func TestExists_Bad(t *testing.T) {
n := New()
exists, err := n.Exists("nonexistent")
require.NoError(t, err)
assert.False(t, exists)
assert.False(t, n.Exists("nonexistent"))
}
func TestExists_Ugly(t *testing.T) {
n := New()
n.AddData("dummy.txt", []byte("dummy"))
exists, err := n.Exists(".")
require.NoError(t, err)
assert.True(t, exists, "root '.' must exist")
exists, err = n.Exists("")
require.NoError(t, err)
assert.True(t, exists, "empty path (root) must exist")
assert.True(t, n.Exists("."), "root '.' must exist")
assert.True(t, n.Exists(""), "empty path (root) must exist")
}
// ---------------------------------------------------------------------------
@ -466,11 +454,8 @@ func TestFromTar_Good(t *testing.T) {
n, err := FromTar(buf.Bytes())
require.NoError(t, err)
exists, _ := n.Exists("foo.txt")
assert.True(t, exists, "foo.txt should exist")
exists, _ = n.Exists("bar/baz.txt")
assert.True(t, exists, "bar/baz.txt should exist")
assert.True(t, n.Exists("foo.txt"), "foo.txt should exist")
assert.True(t, n.Exists("bar/baz.txt"), "bar/baz.txt should exist")
}
func TestFromTar_Bad(t *testing.T) {