[agent/claude:opus] DX audit and fix. 1) Review CLAUDE.md — update any outdate... #1

Merged
Virgil merged 1 commit from agent/dx-audit-and-fix--1--review-claude-md into main 2026-03-17 08:43:18 +00:00
8 changed files with 360 additions and 19 deletions

View file

@ -46,13 +46,19 @@ Builder-pattern wrapper around `os/exec` with structured logging via a pluggable
## Key Patterns
- **Core integration**: `Service` embeds `*core.ServiceRuntime[Options]` and uses `s.Core().ACTION(...)` to broadcast typed action messages. Tests create a Core instance via `framework.New(framework.WithName("process", NewService(...)))`.
- **Output capture**: All process output goes through a fixed-size `RingBuffer` (default 1MB). Oldest data is silently overwritten when full.
- **Output capture**: All process output goes through a fixed-size `RingBuffer` (default 1MB). Oldest data is silently overwritten when full. Set `RunOptions.DisableCapture` to skip buffering for long-running processes where output is only streamed via IPC.
- **Process lifecycle**: Status transitions are `StatusPending → StatusRunning → StatusExited|StatusFailed|StatusKilled`. The `done` channel closes on exit; use `<-proc.Done()` or `proc.Wait()`.
- **Detach / process group isolation**: Set `RunOptions.Detach = true` to run the subprocess in its own process group (`Setpgid`). Detached processes use `context.Background()` so they survive parent context cancellation and parent death.
- **Graceful shutdown**: `Service.OnShutdown` kills all running processes. `Daemon.Stop()` performs ordered teardown: sets health to not-ready → shuts down health server → releases PID file → unregisters from registry. `DaemonOptions.ShutdownTimeout` (default 30 s) bounds the shutdown context.
- **Auto-registration**: Pass a `Registry` and `RegistryEntry` in `DaemonOptions` to automatically register the daemon on `Start()` and unregister on `Stop()`.
- **PID liveness checks**: Both `PIDFile` and `Registry` use `proc.Signal(syscall.Signal(0))` to check if a PID is alive before trusting stored state.
- **Error handling**: All errors MUST use `coreerr.E()` from `go-log` (imported as `coreerr`), never `fmt.Errorf` or `errors.New`. Sentinel errors are package-level vars created with `coreerr.E("", "message", nil)`.
## Dependencies
- `forge.lthn.ai/core/go/pkg/core` — Core DI framework, IPC actions, `ServiceRuntime`
- `forge.lthn.ai/core/go-log` — Structured error constructor (`coreerr.E()`)
- `forge.lthn.ai/core/go-io` — Filesystem abstraction (`coreio.Local`) used by PIDFile and Registry
- `github.com/stretchr/testify` — test assertions (require/assert)
## Testing

View file

@ -93,6 +93,47 @@ func TestDaemon_DefaultShutdownTimeout(t *testing.T) {
assert.Equal(t, 30*time.Second, d.opts.ShutdownTimeout)
}
func TestDaemon_RunBlocksUntilCancelled(t *testing.T) {
d := NewDaemon(DaemonOptions{
HealthAddr: "127.0.0.1:0",
})
err := d.Start()
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
done := make(chan error, 1)
go func() {
done <- d.Run(ctx)
}()
// Run should be blocking
select {
case <-done:
t.Fatal("Run should block until context is cancelled")
case <-time.After(50 * time.Millisecond):
// Expected — still blocking
}
cancel()
select {
case err := <-done:
assert.NoError(t, err)
case <-time.After(2 * time.Second):
t.Fatal("Run should return after context cancellation")
}
}
func TestDaemon_StopIdempotent(t *testing.T) {
d := NewDaemon(DaemonOptions{})
// Stop without Start should be a no-op
err := d.Stop()
assert.NoError(t, err)
}
func TestDaemon_AutoRegisters(t *testing.T) {
dir := t.TempDir()
reg := NewRegistry(filepath.Join(dir, "daemons"))

View file

@ -146,3 +146,67 @@ func TestCommand_UsesDefaultLogger(t *testing.T) {
t.Errorf("expected default logger to receive 1 debug call, got %d", len(logger.debugCalls))
}
}
func TestCommand_WithDir(t *testing.T) {
ctx := context.Background()
out, err := exec.Command(ctx, "pwd").
WithDir("/tmp").
WithLogger(&mockLogger{}).
Output()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
trimmed := strings.TrimSpace(string(out))
if trimmed != "/tmp" && trimmed != "/private/tmp" {
t.Errorf("expected /tmp or /private/tmp, got %q", trimmed)
}
}
func TestCommand_WithEnv(t *testing.T) {
ctx := context.Background()
out, err := exec.Command(ctx, "sh", "-c", "echo $TEST_EXEC_VAR").
WithEnv([]string{"TEST_EXEC_VAR=exec_val"}).
WithLogger(&mockLogger{}).
Output()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if strings.TrimSpace(string(out)) != "exec_val" {
t.Errorf("expected 'exec_val', got %q", string(out))
}
}
func TestCommand_WithStdinStdoutStderr(t *testing.T) {
ctx := context.Background()
input := strings.NewReader("piped input\n")
var stdout, stderr strings.Builder
err := exec.Command(ctx, "cat").
WithStdin(input).
WithStdout(&stdout).
WithStderr(&stderr).
WithLogger(&mockLogger{}).
Run()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if strings.TrimSpace(stdout.String()) != "piped input" {
t.Errorf("expected 'piped input', got %q", stdout.String())
}
}
func TestRunQuiet_Good(t *testing.T) {
ctx := context.Background()
err := exec.RunQuiet(ctx, "echo", "quiet")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
func TestRunQuiet_Bad(t *testing.T) {
ctx := context.Background()
err := exec.RunQuiet(ctx, "sh", "-c", "echo fail >&2; exit 1")
if err == nil {
t.Fatal("expected error")
}
}

View file

@ -60,7 +60,10 @@ func (p *PIDFile) Acquire() error {
func (p *PIDFile) Release() error {
p.mu.Lock()
defer p.mu.Unlock()
return coreio.Local.Delete(p.path)
if err := coreio.Local.Delete(p.path); err != nil {
return coreerr.E("PIDFile.Release", "failed to remove PID file", err)
}
return nil
}
// Path returns the PID file path.

View file

@ -6,6 +6,7 @@ import (
"sync/atomic"
"forge.lthn.ai/core/go/pkg/core"
coreerr "forge.lthn.ai/core/go-log"
)
// Global default service (follows i18n pattern).
@ -25,7 +26,7 @@ func Default() *Service {
// Thread-safe: can be called concurrently with Default().
func SetDefault(s *Service) error {
if s == nil {
return &ServiceError{msg: "process: SetDefault called with nil service"}
return ErrSetDefaultNil
}
defaultService.Store(s)
return nil
@ -120,15 +121,10 @@ func Running() []*Process {
return svc.Running()
}
// ErrServiceNotInitialized is returned when the service is not initialized.
var ErrServiceNotInitialized = &ServiceError{msg: "process: service not initialized; call process.Init(core) first"}
// ServiceError represents a service-level error.
type ServiceError struct {
msg string
}
// Error returns the service error message.
func (e *ServiceError) Error() string {
return e.msg
}
// Errors
var (
// ErrServiceNotInitialized is returned when the service is not initialized.
ErrServiceNotInitialized = coreerr.E("", "process: service not initialized; call process.Init(core) first", nil)
// ErrSetDefaultNil is returned when SetDefault is called with nil.
ErrSetDefaultNil = coreerr.E("", "process: SetDefault called with nil service", nil)
)

View file

@ -2,6 +2,7 @@ package process
import (
"context"
"os"
"testing"
"time"
@ -189,6 +190,39 @@ func TestProcess_SendInput(t *testing.T) {
})
}
func TestProcess_Signal(t *testing.T) {
t.Run("sends signal to running process", func(t *testing.T) {
svc, _ := newTestService(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
proc, err := svc.Start(ctx, "sleep", "60")
require.NoError(t, err)
err = proc.Signal(os.Interrupt)
assert.NoError(t, err)
select {
case <-proc.Done():
// Process terminated by signal
case <-time.After(2 * time.Second):
t.Fatal("process should have been terminated by signal")
}
})
t.Run("error on completed process", func(t *testing.T) {
svc, _ := newTestService(t)
proc, err := svc.Start(context.Background(), "echo", "done")
require.NoError(t, err)
<-proc.Done()
err = proc.Signal(os.Interrupt)
assert.ErrorIs(t, err, ErrProcessNotRunning)
})
}
func TestProcess_CloseStdin(t *testing.T) {
t.Run("closes stdin pipe", func(t *testing.T) {
svc, _ := newTestService(t)

View file

@ -9,6 +9,7 @@ import (
"time"
coreio "forge.lthn.ai/core/go-io"
coreerr "forge.lthn.ai/core/go-log"
)
// DaemonEntry records a running daemon in the registry.
@ -50,20 +51,26 @@ func (r *Registry) Register(entry DaemonEntry) error {
}
if err := coreio.Local.EnsureDir(r.dir); err != nil {
return err
return coreerr.E("Registry.Register", "failed to create registry directory", err)
}
data, err := json.MarshalIndent(entry, "", " ")
if err != nil {
return err
return coreerr.E("Registry.Register", "failed to marshal entry", err)
}
return coreio.Local.Write(r.entryPath(entry.Code, entry.Daemon), string(data))
if err := coreio.Local.Write(r.entryPath(entry.Code, entry.Daemon), string(data)); err != nil {
return coreerr.E("Registry.Register", "failed to write entry file", err)
}
return nil
}
// Unregister removes a daemon entry from the registry.
func (r *Registry) Unregister(code, daemon string) error {
return coreio.Local.Delete(r.entryPath(code, daemon))
if err := coreio.Local.Delete(r.entryPath(code, daemon)); err != nil {
return coreerr.E("Registry.Unregister", "failed to delete entry file", err)
}
return nil
}
// Get reads a single daemon entry and checks whether its process is alive.

View file

@ -98,6 +98,60 @@ func TestService_Start(t *testing.T) {
t.Fatal("process should have been killed")
}
})
t.Run("disable capture", func(t *testing.T) {
svc, _ := newTestService(t)
proc, err := svc.StartWithOptions(context.Background(), RunOptions{
Command: "echo",
Args: []string{"no-capture"},
DisableCapture: true,
})
require.NoError(t, err)
<-proc.Done()
assert.Equal(t, StatusExited, proc.Status)
assert.Equal(t, "", proc.Output(), "output should be empty when capture is disabled")
})
t.Run("with environment variables", func(t *testing.T) {
svc, _ := newTestService(t)
proc, err := svc.StartWithOptions(context.Background(), RunOptions{
Command: "sh",
Args: []string{"-c", "echo $MY_TEST_VAR"},
Env: []string{"MY_TEST_VAR=hello_env"},
})
require.NoError(t, err)
<-proc.Done()
assert.Contains(t, proc.Output(), "hello_env")
})
t.Run("detach survives parent context", func(t *testing.T) {
svc, _ := newTestService(t)
ctx, cancel := context.WithCancel(context.Background())
proc, err := svc.StartWithOptions(ctx, RunOptions{
Command: "echo",
Args: []string{"detached"},
Detach: true,
})
require.NoError(t, err)
// Cancel the parent context
cancel()
// Detached process should still complete normally
select {
case <-proc.Done():
assert.Equal(t, StatusExited, proc.Status)
assert.Equal(t, 0, proc.ExitCode)
case <-time.After(2 * time.Second):
t.Fatal("detached process should have completed")
}
})
}
func TestService_Run(t *testing.T) {
@ -255,3 +309,139 @@ func TestService_Clear(t *testing.T) {
assert.Len(t, svc.List(), 0)
})
}
func TestService_Kill(t *testing.T) {
t.Run("kills running process", func(t *testing.T) {
svc, _ := newTestService(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
proc, err := svc.Start(ctx, "sleep", "60")
require.NoError(t, err)
err = svc.Kill(proc.ID)
assert.NoError(t, err)
select {
case <-proc.Done():
// Process killed successfully
case <-time.After(2 * time.Second):
t.Fatal("process should have been killed")
}
})
t.Run("error on unknown id", func(t *testing.T) {
svc, _ := newTestService(t)
err := svc.Kill("nonexistent")
assert.ErrorIs(t, err, ErrProcessNotFound)
})
}
func TestService_Output(t *testing.T) {
t.Run("returns captured output", func(t *testing.T) {
svc, _ := newTestService(t)
proc, err := svc.Start(context.Background(), "echo", "captured")
require.NoError(t, err)
<-proc.Done()
output, err := svc.Output(proc.ID)
require.NoError(t, err)
assert.Contains(t, output, "captured")
})
t.Run("error on unknown id", func(t *testing.T) {
svc, _ := newTestService(t)
_, err := svc.Output("nonexistent")
assert.ErrorIs(t, err, ErrProcessNotFound)
})
}
func TestService_OnShutdown(t *testing.T) {
t.Run("kills all running processes", func(t *testing.T) {
svc, _ := newTestService(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
proc1, err := svc.Start(ctx, "sleep", "60")
require.NoError(t, err)
proc2, err := svc.Start(ctx, "sleep", "60")
require.NoError(t, err)
assert.True(t, proc1.IsRunning())
assert.True(t, proc2.IsRunning())
err = svc.OnShutdown(context.Background())
assert.NoError(t, err)
select {
case <-proc1.Done():
case <-time.After(2 * time.Second):
t.Fatal("proc1 should have been killed")
}
select {
case <-proc2.Done():
case <-time.After(2 * time.Second):
t.Fatal("proc2 should have been killed")
}
})
}
func TestService_OnStartup(t *testing.T) {
t.Run("returns nil", func(t *testing.T) {
svc, _ := newTestService(t)
err := svc.OnStartup(context.Background())
assert.NoError(t, err)
})
}
func TestService_RunWithOptions(t *testing.T) {
t.Run("returns output on success", func(t *testing.T) {
svc, _ := newTestService(t)
output, err := svc.RunWithOptions(context.Background(), RunOptions{
Command: "echo",
Args: []string{"opts-test"},
})
require.NoError(t, err)
assert.Contains(t, output, "opts-test")
})
t.Run("returns error on failure", func(t *testing.T) {
svc, _ := newTestService(t)
_, err := svc.RunWithOptions(context.Background(), RunOptions{
Command: "sh",
Args: []string{"-c", "exit 2"},
})
assert.Error(t, err)
assert.Contains(t, err.Error(), "exited with code 2")
})
}
func TestService_Running(t *testing.T) {
t.Run("returns only running processes", func(t *testing.T) {
svc, _ := newTestService(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
proc1, err := svc.Start(ctx, "sleep", "60")
require.NoError(t, err)
proc2, err := svc.Start(context.Background(), "echo", "done")
require.NoError(t, err)
<-proc2.Done()
running := svc.Running()
assert.Len(t, running, 1)
assert.Equal(t, proc1.ID, running[0].ID)
cancel()
<-proc1.Done()
})
}