diff --git a/TODO.md b/TODO.md index 10e6473..d0333d5 100644 --- a/TODO.md +++ b/TODO.md @@ -21,7 +21,7 @@ Format: `- [ ] REPO: task description` / `- [x]` when done. - [ ] **go-i18n: Classification benchmark suite** — `classify_bench_test.go` with 200+ domain-tagged sentences. Categories: {technical, creative, ethical, casual}. Ground truth for calibrating 1B pre-tags. - [ ] **go-i18n: 1B pre-sort pipeline tool** — CLI/func that reads JSONL corpus, classifies via LEK-Gemma3-1B, writes back with `domain_1b` field. Target: ~5K sentences/sec on M3. - [ ] **go-i18n: 1B vs 27B calibration check** — Sample 500 sentences, classify with both, measure agreement. 75% baseline from benchmarks, technical↔creative is known weak spot. -- [ ] **go-i18n: Article/irregular validator** — Lightweight funcs using 1B's strong article (100%) and irregular base form (100%) accuracy as fast validators. +- [x] **go-i18n: Article/irregular validator** — `validate.go` + `validate_test.go` (14 tests). `ValidateArticle()`, `ValidateIrregular()`, batch variants. Commit `3c55d91`. #### 2b: Reference Distributions diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index 47f382d..60aefb9 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -3,6 +3,7 @@ package cache import ( "encoding/json" + "errors" "os" "path/filepath" "time" @@ -15,6 +16,7 @@ const DefaultTTL = 1 * time.Hour // Cache represents a file-based cache. type Cache struct { + medium io.Medium baseDir string ttl time.Duration } @@ -27,8 +29,13 @@ type Entry struct { } // New creates a new cache instance. -// If baseDir is empty, uses .core/cache in current directory -func New(baseDir string, ttl time.Duration) (*Cache, error) { +// If medium is nil, uses io.Local (filesystem). +// If baseDir is empty, uses .core/cache in current directory. +func New(medium io.Medium, baseDir string, ttl time.Duration) (*Cache, error) { + if medium == nil { + medium = io.Local + } + if baseDir == "" { // Use .core/cache in current working directory cwd, err := os.Getwd() @@ -43,11 +50,12 @@ func New(baseDir string, ttl time.Duration) (*Cache, error) { } // Ensure cache directory exists - if err := io.Local.EnsureDir(baseDir); err != nil { + if err := medium.EnsureDir(baseDir); err != nil { return nil, err } return &Cache{ + medium: medium, baseDir: baseDir, ttl: ttl, }, nil @@ -62,9 +70,9 @@ func (c *Cache) Path(key string) string { func (c *Cache) Get(key string, dest interface{}) (bool, error) { path := c.Path(key) - dataStr, err := io.Local.Read(path) + dataStr, err := c.medium.Read(path) if err != nil { - if os.IsNotExist(err) { + if errors.Is(err, os.ErrNotExist) { return false, nil } return false, err @@ -94,7 +102,7 @@ func (c *Cache) Set(key string, data interface{}) error { path := c.Path(key) // Ensure parent directory exists - if err := io.Local.EnsureDir(filepath.Dir(path)); err != nil { + if err := c.medium.EnsureDir(filepath.Dir(path)); err != nil { return err } @@ -115,14 +123,14 @@ func (c *Cache) Set(key string, data interface{}) error { return err } - return io.Local.Write(path, string(entryBytes)) + return c.medium.Write(path, string(entryBytes)) } // Delete removes an item from the cache. func (c *Cache) Delete(key string) error { path := c.Path(key) - err := io.Local.Delete(path) - if os.IsNotExist(err) { + err := c.medium.Delete(path) + if errors.Is(err, os.ErrNotExist) { return nil } return err @@ -130,14 +138,14 @@ func (c *Cache) Delete(key string) error { // Clear removes all cached items. func (c *Cache) Clear() error { - return io.Local.DeleteAll(c.baseDir) + return c.medium.DeleteAll(c.baseDir) } // Age returns how old a cached item is, or -1 if not cached. func (c *Cache) Age(key string) time.Duration { path := c.Path(key) - dataStr, err := io.Local.Read(path) + dataStr, err := c.medium.Read(path) if err != nil { return -1 } diff --git a/pkg/cli/daemon.go b/pkg/cli/daemon.go index 961bb26..bdf42c7 100644 --- a/pkg/cli/daemon.go +++ b/pkg/cli/daemon.go @@ -74,13 +74,18 @@ func IsStderrTTY() bool { // PIDFile manages a process ID file for single-instance enforcement. type PIDFile struct { - path string - mu sync.Mutex + medium io.Medium + path string + mu sync.Mutex } // NewPIDFile creates a PID file manager. -func NewPIDFile(path string) *PIDFile { - return &PIDFile{path: path} +// If medium is nil, uses io.Local (filesystem). +func NewPIDFile(medium io.Medium, path string) *PIDFile { + if medium == nil { + medium = io.Local + } + return &PIDFile{medium: medium, path: path} } // Acquire writes the current PID to the file. @@ -90,7 +95,7 @@ func (p *PIDFile) Acquire() error { defer p.mu.Unlock() // Check if PID file exists - if data, err := io.Local.Read(p.path); err == nil { + if data, err := p.medium.Read(p.path); err == nil { pid, err := strconv.Atoi(data) if err == nil && pid > 0 { // Check if process is still running @@ -101,19 +106,19 @@ func (p *PIDFile) Acquire() error { } } // Stale PID file, remove it - _ = io.Local.Delete(p.path) + _ = p.medium.Delete(p.path) } // Ensure directory exists if dir := filepath.Dir(p.path); dir != "." { - if err := io.Local.EnsureDir(dir); err != nil { + if err := p.medium.EnsureDir(dir); err != nil { return fmt.Errorf("failed to create PID directory: %w", err) } } // Write current PID pid := os.Getpid() - if err := io.Local.Write(p.path, strconv.Itoa(pid)); err != nil { + if err := p.medium.Write(p.path, strconv.Itoa(pid)); err != nil { return fmt.Errorf("failed to write PID file: %w", err) } @@ -124,7 +129,7 @@ func (p *PIDFile) Acquire() error { func (p *PIDFile) Release() error { p.mu.Lock() defer p.mu.Unlock() - return io.Local.Delete(p.path) + return p.medium.Delete(p.path) } // Path returns the PID file path. @@ -246,6 +251,10 @@ func (h *HealthServer) Addr() string { // DaemonOptions configures daemon mode execution. type DaemonOptions struct { + // Medium is the filesystem for PID file operations. + // If nil, uses io.Local (filesystem). + Medium io.Medium + // PIDFile path for single-instance enforcement. // Leave empty to skip PID file management. PIDFile string @@ -289,7 +298,7 @@ func NewDaemon(opts DaemonOptions) *Daemon { } if opts.PIDFile != "" { - d.pid = NewPIDFile(opts.PIDFile) + d.pid = NewPIDFile(opts.Medium, opts.PIDFile) } if opts.HealthAddr != "" { diff --git a/pkg/io/node/node.go b/pkg/io/node/node.go index 66ff250..184ccc0 100644 --- a/pkg/io/node/node.go +++ b/pkg/io/node/node.go @@ -24,8 +24,9 @@ type Node struct { files map[string]*dataFile } -// compile-time interface check +// compile-time interface checks var _ coreio.Medium = (*Node)(nil) +var _ fs.ReadFileFS = (*Node)(nil) // New creates a new, empty Node. func New() *Node { @@ -78,8 +79,17 @@ func (n *Node) ToTar() ([]byte, error) { return buf.Bytes(), nil } -// FromTar replaces the in-memory tree with the contents of a tar archive. -func (n *Node) FromTar(data []byte) error { +// FromTar creates a new Node from a tar archive. +func FromTar(data []byte) (*Node, error) { + n := New() + if err := n.LoadTar(data); err != nil { + return nil, err + } + return n, nil +} + +// LoadTar replaces the in-memory tree with the contents of a tar archive. +func (n *Node) LoadTar(data []byte) error { newFiles := make(map[string]*dataFile) tr := tar.NewReader(bytes.NewReader(data)) @@ -118,6 +128,91 @@ func (n *Node) WalkNode(root string, fn fs.WalkDirFunc) error { return fs.WalkDir(n, root, fn) } +// WalkOptions configures the behaviour of Walk. +type WalkOptions struct { + // MaxDepth limits how many directory levels to descend. 0 means unlimited. + MaxDepth int + // Filter, if set, is called for each entry. Return true to include the + // entry (and descend into it if it is a directory). + Filter func(path string, d fs.DirEntry) bool + // SkipErrors suppresses errors (e.g. nonexistent root) instead of + // propagating them through the callback. + SkipErrors bool +} + +// Walk walks the in-memory tree with optional WalkOptions. +func (n *Node) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error { + var opt WalkOptions + if len(opts) > 0 { + opt = opts[0] + } + + if opt.SkipErrors { + // If root doesn't exist, silently return nil. + if _, err := n.Stat(root); err != nil { + return nil + } + } + + return fs.WalkDir(n, root, func(p string, d fs.DirEntry, err error) error { + if opt.Filter != nil && err == nil { + if !opt.Filter(p, d) { + if d != nil && d.IsDir() { + return fs.SkipDir + } + return nil + } + } + + // Call the user's function first so the entry is visited. + result := fn(p, d, err) + + // After visiting a directory at MaxDepth, prevent descending further. + if result == nil && opt.MaxDepth > 0 && d != nil && d.IsDir() && p != root { + rel := strings.TrimPrefix(p, root) + rel = strings.TrimPrefix(rel, "/") + depth := strings.Count(rel, "/") + 1 + if depth >= opt.MaxDepth { + return fs.SkipDir + } + } + + return result + }) +} + +// ReadFile returns the content of the named file as a byte slice. +// Implements fs.ReadFileFS. +func (n *Node) ReadFile(name string) ([]byte, error) { + name = strings.TrimPrefix(name, "/") + f, ok := n.files[name] + if !ok { + return nil, &fs.PathError{Op: "read", Path: name, Err: fs.ErrNotExist} + } + // Return a copy to prevent callers from mutating internal state. + result := make([]byte, len(f.content)) + copy(result, f.content) + return result, nil +} + +// CopyFile copies a file from the in-memory tree to the local filesystem. +func (n *Node) CopyFile(src, dst string, perm fs.FileMode) error { + src = strings.TrimPrefix(src, "/") + f, ok := n.files[src] + if !ok { + // Check if it's a directory — can't copy directories this way. + info, err := n.Stat(src) + if err != nil { + return &fs.PathError{Op: "copyfile", Path: src, Err: fs.ErrNotExist} + } + if info.IsDir() { + return &fs.PathError{Op: "copyfile", Path: src, Err: fs.ErrInvalid} + } + return &fs.PathError{Op: "copyfile", Path: src, Err: fs.ErrNotExist} + } + return os.WriteFile(dst, f.content, perm) +} + // CopyTo copies a file (or directory tree) from the node to any Medium. func (n *Node) CopyTo(target coreio.Medium, sourcePath, destPath string) error { sourcePath = strings.TrimPrefix(sourcePath, "/") diff --git a/pkg/io/node/node_test.go b/pkg/io/node/node_test.go index 5ef1afa..f7520dc 100644 --- a/pkg/io/node/node_test.go +++ b/pkg/io/node/node_test.go @@ -243,33 +243,21 @@ func TestExists_Good(t *testing.T) { n.AddData("foo.txt", []byte("foo")) n.AddData("bar/baz.txt", []byte("baz")) - exists, err := n.Exists("foo.txt") - require.NoError(t, err) - assert.True(t, exists) - - exists, err = n.Exists("bar") - require.NoError(t, err) - assert.True(t, exists) + assert.True(t, n.Exists("foo.txt")) + assert.True(t, n.Exists("bar")) } func TestExists_Bad(t *testing.T) { n := New() - exists, err := n.Exists("nonexistent") - require.NoError(t, err) - assert.False(t, exists) + assert.False(t, n.Exists("nonexistent")) } func TestExists_Ugly(t *testing.T) { n := New() n.AddData("dummy.txt", []byte("dummy")) - exists, err := n.Exists(".") - require.NoError(t, err) - assert.True(t, exists, "root '.' must exist") - - exists, err = n.Exists("") - require.NoError(t, err) - assert.True(t, exists, "empty path (root) must exist") + assert.True(t, n.Exists("."), "root '.' must exist") + assert.True(t, n.Exists(""), "empty path (root) must exist") } // --------------------------------------------------------------------------- @@ -466,11 +454,8 @@ func TestFromTar_Good(t *testing.T) { n, err := FromTar(buf.Bytes()) require.NoError(t, err) - exists, _ := n.Exists("foo.txt") - assert.True(t, exists, "foo.txt should exist") - - exists, _ = n.Exists("bar/baz.txt") - assert.True(t, exists, "bar/baz.txt should exist") + assert.True(t, n.Exists("foo.txt"), "foo.txt should exist") + assert.True(t, n.Exists("bar/baz.txt"), "bar/baz.txt should exist") } func TestFromTar_Bad(t *testing.T) {