feat(cli): add daemon mode support

Add comprehensive daemon mode support to pkg/cli:

- Mode detection (Interactive/Pipe/Daemon) based on TTY and CORE_DAEMON env
- TTY helpers using golang.org/x/term (IsTTY, IsStdinTTY, IsStderrTTY)
- PIDFile for single-instance enforcement with stale PID detection
- HealthServer with /health and /ready endpoints for orchestration
- Daemon lifecycle manager combining PID, health, and graceful shutdown
- SIGHUP support for configuration reloading in runtime.go
- Fix i18n.go type references (MissingKey, OnMissingKey)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Snider 2026-01-30 19:28:50 +00:00
parent e9be13bcd3
commit 6062853649
4 changed files with 764 additions and 20 deletions

445
pkg/cli/daemon.go Normal file
View file

@ -0,0 +1,445 @@
// Package cli provides the CLI runtime and utilities.
package cli
import (
"context"
"fmt"
"net"
"net/http"
"os"
"path/filepath"
"strconv"
"sync"
"syscall"
"time"
"golang.org/x/term"
)
// Mode represents the CLI execution mode.
type Mode int
const (
// ModeInteractive indicates TTY attached with coloured output.
ModeInteractive Mode = iota
// ModePipe indicates stdout is piped, colours disabled.
ModePipe
// ModeDaemon indicates headless execution, log-only output.
ModeDaemon
)
// String returns the string representation of the Mode.
func (m Mode) String() string {
switch m {
case ModeInteractive:
return "interactive"
case ModePipe:
return "pipe"
case ModeDaemon:
return "daemon"
default:
return "unknown"
}
}
// DetectMode determines the execution mode based on environment.
// Checks CORE_DAEMON env var first, then TTY status.
func DetectMode() Mode {
if os.Getenv("CORE_DAEMON") == "1" {
return ModeDaemon
}
if !IsTTY() {
return ModePipe
}
return ModeInteractive
}
// IsTTY returns true if stdout is a terminal.
func IsTTY() bool {
return term.IsTerminal(int(os.Stdout.Fd()))
}
// IsStdinTTY returns true if stdin is a terminal.
func IsStdinTTY() bool {
return term.IsTerminal(int(os.Stdin.Fd()))
}
// IsStderrTTY returns true if stderr is a terminal.
func IsStderrTTY() bool {
return term.IsTerminal(int(os.Stderr.Fd()))
}
// --- PID File Management ---
// PIDFile manages a process ID file for single-instance enforcement.
type PIDFile struct {
path string
mu sync.Mutex
}
// NewPIDFile creates a PID file manager.
func NewPIDFile(path string) *PIDFile {
return &PIDFile{path: path}
}
// Acquire writes the current PID to the file.
// Returns error if another instance is running.
func (p *PIDFile) Acquire() error {
p.mu.Lock()
defer p.mu.Unlock()
// Check if PID file exists
if data, err := os.ReadFile(p.path); err == nil {
pid, err := strconv.Atoi(string(data))
if err == nil && pid > 0 {
// Check if process is still running
if process, err := os.FindProcess(pid); err == nil {
if err := process.Signal(syscall.Signal(0)); err == nil {
return fmt.Errorf("another instance is running (PID %d)", pid)
}
}
}
// Stale PID file, remove it
os.Remove(p.path)
}
// Ensure directory exists
if dir := filepath.Dir(p.path); dir != "." {
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create PID directory: %w", err)
}
}
// Write current PID
pid := os.Getpid()
if err := os.WriteFile(p.path, []byte(strconv.Itoa(pid)), 0644); err != nil {
return fmt.Errorf("failed to write PID file: %w", err)
}
return nil
}
// Release removes the PID file.
func (p *PIDFile) Release() error {
p.mu.Lock()
defer p.mu.Unlock()
return os.Remove(p.path)
}
// Path returns the PID file path.
func (p *PIDFile) Path() string {
return p.path
}
// --- Health Check Server ---
// HealthServer provides a minimal HTTP health check endpoint.
type HealthServer struct {
addr string
server *http.Server
listener net.Listener
mu sync.Mutex
ready bool
checks []HealthCheck
}
// HealthCheck is a function that returns nil if healthy.
type HealthCheck func() error
// NewHealthServer creates a health check server.
func NewHealthServer(addr string) *HealthServer {
return &HealthServer{
addr: addr,
ready: true,
}
}
// AddCheck registers a health check function.
func (h *HealthServer) AddCheck(check HealthCheck) {
h.mu.Lock()
h.checks = append(h.checks, check)
h.mu.Unlock()
}
// SetReady sets the readiness status.
func (h *HealthServer) SetReady(ready bool) {
h.mu.Lock()
h.ready = ready
h.mu.Unlock()
}
// Start begins serving health check endpoints.
// Endpoints:
// - /health - liveness probe (always 200 if server is up)
// - /ready - readiness probe (200 if ready, 503 if not)
func (h *HealthServer) Start() error {
mux := http.NewServeMux()
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
h.mu.Lock()
checks := h.checks
h.mu.Unlock()
for _, check := range checks {
if err := check(); err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
fmt.Fprintf(w, "unhealthy: %v\n", err)
return
}
}
w.WriteHeader(http.StatusOK)
fmt.Fprintln(w, "ok")
})
mux.HandleFunc("/ready", func(w http.ResponseWriter, r *http.Request) {
h.mu.Lock()
ready := h.ready
h.mu.Unlock()
if !ready {
w.WriteHeader(http.StatusServiceUnavailable)
fmt.Fprintln(w, "not ready")
return
}
w.WriteHeader(http.StatusOK)
fmt.Fprintln(w, "ready")
})
listener, err := net.Listen("tcp", h.addr)
if err != nil {
return fmt.Errorf("failed to listen on %s: %w", h.addr, err)
}
h.listener = listener
h.server = &http.Server{Handler: mux}
go func() {
if err := h.server.Serve(listener); err != http.ErrServerClosed {
LogError(fmt.Sprintf("health server error: %v", err))
}
}()
return nil
}
// Stop gracefully shuts down the health server.
func (h *HealthServer) Stop(ctx context.Context) error {
if h.server == nil {
return nil
}
return h.server.Shutdown(ctx)
}
// Addr returns the actual address the server is listening on.
// Useful when using port 0 for dynamic port assignment.
func (h *HealthServer) Addr() string {
if h.listener != nil {
return h.listener.Addr().String()
}
return h.addr
}
// --- Daemon Runner ---
// DaemonOptions configures daemon mode execution.
type DaemonOptions struct {
// PIDFile path for single-instance enforcement.
// Leave empty to skip PID file management.
PIDFile string
// ShutdownTimeout is the maximum time to wait for graceful shutdown.
// Default: 30 seconds.
ShutdownTimeout time.Duration
// HealthAddr is the address for health check endpoints.
// Example: ":8080", "127.0.0.1:9000"
// Leave empty to disable health checks.
HealthAddr string
// HealthChecks are additional health check functions.
HealthChecks []HealthCheck
// OnReload is called when SIGHUP is received.
// Use for config reloading. Leave nil to ignore SIGHUP.
OnReload func() error
}
// Daemon manages daemon lifecycle.
type Daemon struct {
opts DaemonOptions
pid *PIDFile
health *HealthServer
reload chan struct{}
running bool
mu sync.Mutex
}
// NewDaemon creates a daemon runner with the given options.
func NewDaemon(opts DaemonOptions) *Daemon {
if opts.ShutdownTimeout == 0 {
opts.ShutdownTimeout = 30 * time.Second
}
d := &Daemon{
opts: opts,
reload: make(chan struct{}, 1),
}
if opts.PIDFile != "" {
d.pid = NewPIDFile(opts.PIDFile)
}
if opts.HealthAddr != "" {
d.health = NewHealthServer(opts.HealthAddr)
for _, check := range opts.HealthChecks {
d.health.AddCheck(check)
}
}
return d
}
// Start initialises the daemon (PID file, health server).
// Call this after cli.Init().
func (d *Daemon) Start() error {
d.mu.Lock()
defer d.mu.Unlock()
if d.running {
return fmt.Errorf("daemon already running")
}
// Acquire PID file
if d.pid != nil {
if err := d.pid.Acquire(); err != nil {
return err
}
}
// Start health server
if d.health != nil {
if err := d.health.Start(); err != nil {
if d.pid != nil {
d.pid.Release()
}
return err
}
}
d.running = true
return nil
}
// Run blocks until the context is cancelled or a signal is received.
// Handles graceful shutdown with the configured timeout.
func (d *Daemon) Run(ctx context.Context) error {
d.mu.Lock()
if !d.running {
d.mu.Unlock()
return fmt.Errorf("daemon not started - call Start() first")
}
d.mu.Unlock()
// Wait for context cancellation (from signal handler)
<-ctx.Done()
return d.Stop()
}
// Stop performs graceful shutdown.
func (d *Daemon) Stop() error {
d.mu.Lock()
defer d.mu.Unlock()
if !d.running {
return nil
}
var errs []error
// Create shutdown context with timeout
shutdownCtx, cancel := context.WithTimeout(context.Background(), d.opts.ShutdownTimeout)
defer cancel()
// Stop health server
if d.health != nil {
d.health.SetReady(false)
if err := d.health.Stop(shutdownCtx); err != nil {
errs = append(errs, fmt.Errorf("health server: %w", err))
}
}
// Release PID file
if d.pid != nil {
if err := d.pid.Release(); err != nil && !os.IsNotExist(err) {
errs = append(errs, fmt.Errorf("pid file: %w", err))
}
}
d.running = false
if len(errs) > 0 {
return fmt.Errorf("shutdown errors: %v", errs)
}
return nil
}
// SetReady sets the daemon readiness status for health checks.
func (d *Daemon) SetReady(ready bool) {
if d.health != nil {
d.health.SetReady(ready)
}
}
// HealthAddr returns the health server address, or empty if disabled.
func (d *Daemon) HealthAddr() string {
if d.health != nil {
return d.health.Addr()
}
return ""
}
// --- Convenience Functions ---
// Run blocks until context is cancelled or signal received.
// Simple helper for daemon mode without advanced features.
//
// cli.Init(cli.Options{AppName: "myapp"})
// defer cli.Shutdown()
// cli.Run(cli.Context())
func Run(ctx context.Context) error {
mustInit()
<-ctx.Done()
return ctx.Err()
}
// RunWithTimeout wraps Run with a graceful shutdown timeout.
// The returned function should be deferred to replace cli.Shutdown().
//
// cli.Init(cli.Options{AppName: "myapp"})
// shutdown := cli.RunWithTimeout(30 * time.Second)
// defer shutdown()
// cli.Run(cli.Context())
func RunWithTimeout(timeout time.Duration) func() {
return func() {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
// Create done channel for shutdown completion
done := make(chan struct{})
go func() {
Shutdown()
close(done)
}()
select {
case <-done:
// Clean shutdown
case <-ctx.Done():
// Timeout - force exit
LogWarn("shutdown timeout exceeded, forcing exit")
}
}
}

255
pkg/cli/daemon_test.go Normal file
View file

@ -0,0 +1,255 @@
package cli
import (
"context"
"net/http"
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestDetectMode(t *testing.T) {
t.Run("daemon mode from env", func(t *testing.T) {
t.Setenv("CORE_DAEMON", "1")
assert.Equal(t, ModeDaemon, DetectMode())
})
t.Run("mode string", func(t *testing.T) {
assert.Equal(t, "interactive", ModeInteractive.String())
assert.Equal(t, "pipe", ModePipe.String())
assert.Equal(t, "daemon", ModeDaemon.String())
assert.Equal(t, "unknown", Mode(99).String())
})
}
func TestPIDFile(t *testing.T) {
t.Run("acquire and release", func(t *testing.T) {
tmpDir := t.TempDir()
pidPath := filepath.Join(tmpDir, "test.pid")
pid := NewPIDFile(pidPath)
// Acquire should succeed
err := pid.Acquire()
require.NoError(t, err)
// File should exist with our PID
data, err := os.ReadFile(pidPath)
require.NoError(t, err)
assert.Contains(t, string(data), "")
// Release should remove file
err = pid.Release()
require.NoError(t, err)
_, err = os.Stat(pidPath)
assert.True(t, os.IsNotExist(err))
})
t.Run("stale pid file", func(t *testing.T) {
tmpDir := t.TempDir()
pidPath := filepath.Join(tmpDir, "stale.pid")
// Write a stale PID (non-existent process)
err := os.WriteFile(pidPath, []byte("999999999"), 0644)
require.NoError(t, err)
pid := NewPIDFile(pidPath)
// Should acquire successfully (stale PID removed)
err = pid.Acquire()
require.NoError(t, err)
err = pid.Release()
require.NoError(t, err)
})
t.Run("creates parent directory", func(t *testing.T) {
tmpDir := t.TempDir()
pidPath := filepath.Join(tmpDir, "subdir", "nested", "test.pid")
pid := NewPIDFile(pidPath)
err := pid.Acquire()
require.NoError(t, err)
_, err = os.Stat(pidPath)
require.NoError(t, err)
err = pid.Release()
require.NoError(t, err)
})
t.Run("path getter", func(t *testing.T) {
pid := NewPIDFile("/tmp/test.pid")
assert.Equal(t, "/tmp/test.pid", pid.Path())
})
}
func TestHealthServer(t *testing.T) {
t.Run("health and ready endpoints", func(t *testing.T) {
hs := NewHealthServer("127.0.0.1:0") // Random port
err := hs.Start()
require.NoError(t, err)
defer hs.Stop(context.Background())
addr := hs.Addr()
require.NotEmpty(t, addr)
// Health should be OK
resp, err := http.Get("http://" + addr + "/health")
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
resp.Body.Close()
// Ready should be OK by default
resp, err = http.Get("http://" + addr + "/ready")
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
resp.Body.Close()
// Set not ready
hs.SetReady(false)
resp, err = http.Get("http://" + addr + "/ready")
require.NoError(t, err)
assert.Equal(t, http.StatusServiceUnavailable, resp.StatusCode)
resp.Body.Close()
})
t.Run("with health checks", func(t *testing.T) {
hs := NewHealthServer("127.0.0.1:0")
healthy := true
hs.AddCheck(func() error {
if !healthy {
return assert.AnError
}
return nil
})
err := hs.Start()
require.NoError(t, err)
defer hs.Stop(context.Background())
addr := hs.Addr()
// Should be healthy
resp, err := http.Get("http://" + addr + "/health")
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
resp.Body.Close()
// Make unhealthy
healthy = false
resp, err = http.Get("http://" + addr + "/health")
require.NoError(t, err)
assert.Equal(t, http.StatusServiceUnavailable, resp.StatusCode)
resp.Body.Close()
})
}
func TestDaemon(t *testing.T) {
t.Run("start and stop", func(t *testing.T) {
tmpDir := t.TempDir()
d := NewDaemon(DaemonOptions{
PIDFile: filepath.Join(tmpDir, "test.pid"),
HealthAddr: "127.0.0.1:0",
ShutdownTimeout: 5 * time.Second,
})
err := d.Start()
require.NoError(t, err)
// Health server should be running
addr := d.HealthAddr()
require.NotEmpty(t, addr)
resp, err := http.Get("http://" + addr + "/health")
require.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
resp.Body.Close()
// Stop should succeed
err = d.Stop()
require.NoError(t, err)
// PID file should be removed
_, err = os.Stat(filepath.Join(tmpDir, "test.pid"))
assert.True(t, os.IsNotExist(err))
})
t.Run("double start fails", func(t *testing.T) {
d := NewDaemon(DaemonOptions{
HealthAddr: "127.0.0.1:0",
})
err := d.Start()
require.NoError(t, err)
defer d.Stop()
err = d.Start()
assert.Error(t, err)
assert.Contains(t, err.Error(), "already running")
})
t.Run("run without start fails", func(t *testing.T) {
d := NewDaemon(DaemonOptions{})
ctx, cancel := context.WithCancel(context.Background())
cancel()
err := d.Run(ctx)
assert.Error(t, err)
assert.Contains(t, err.Error(), "not started")
})
t.Run("set ready", func(t *testing.T) {
d := NewDaemon(DaemonOptions{
HealthAddr: "127.0.0.1:0",
})
err := d.Start()
require.NoError(t, err)
defer d.Stop()
addr := d.HealthAddr()
// Initially ready
resp, _ := http.Get("http://" + addr + "/ready")
assert.Equal(t, http.StatusOK, resp.StatusCode)
resp.Body.Close()
// Set not ready
d.SetReady(false)
resp, _ = http.Get("http://" + addr + "/ready")
assert.Equal(t, http.StatusServiceUnavailable, resp.StatusCode)
resp.Body.Close()
})
t.Run("no health addr returns empty", func(t *testing.T) {
d := NewDaemon(DaemonOptions{})
assert.Empty(t, d.HealthAddr())
})
t.Run("default shutdown timeout", func(t *testing.T) {
d := NewDaemon(DaemonOptions{})
assert.Equal(t, 30*time.Second, d.opts.ShutdownTimeout)
})
}
func TestRunWithTimeout(t *testing.T) {
t.Run("creates shutdown function", func(t *testing.T) {
// Just test that it returns a function
shutdown := RunWithTimeout(100 * time.Millisecond)
assert.NotNil(t, shutdown)
})
}

View file

@ -14,7 +14,7 @@ type I18nService struct {
svc *i18n.Service
// Collect mode state
missingKeys []i18n.MissingKeyAction
missingKeys []i18n.MissingKey
missingKeysMu sync.Mutex
}
@ -44,7 +44,7 @@ func NewI18nService(opts I18nOptions) func(*framework.Core) (any, error) {
return &I18nService{
ServiceRuntime: framework.NewServiceRuntime(c, opts),
svc: svc,
missingKeys: make([]i18n.MissingKeyAction, 0),
missingKeys: make([]i18n.MissingKey, 0),
}, nil
}
}
@ -55,25 +55,25 @@ func (s *I18nService) OnStartup(ctx context.Context) error {
// Register action handler for collect mode
if s.svc.Mode() == i18n.ModeCollect {
i18n.SetActionHandler(s.handleMissingKey)
i18n.OnMissingKey(s.handleMissingKey)
}
return nil
}
// handleMissingKey accumulates missing keys in collect mode.
func (s *I18nService) handleMissingKey(action i18n.MissingKeyAction) {
func (s *I18nService) handleMissingKey(mk i18n.MissingKey) {
s.missingKeysMu.Lock()
defer s.missingKeysMu.Unlock()
s.missingKeys = append(s.missingKeys, action)
s.missingKeys = append(s.missingKeys, mk)
}
// MissingKeys returns all missing keys collected in collect mode.
// Call this at the end of a QA session to report missing translations.
func (s *I18nService) MissingKeys() []i18n.MissingKeyAction {
func (s *I18nService) MissingKeys() []i18n.MissingKey {
s.missingKeysMu.Lock()
defer s.missingKeysMu.Unlock()
result := make([]i18n.MissingKeyAction, len(s.missingKeys))
result := make([]i18n.MissingKey, len(s.missingKeys))
copy(result, s.missingKeys)
return result
}
@ -91,9 +91,9 @@ func (s *I18nService) SetMode(mode i18n.Mode) {
// Update action handler registration
if mode == i18n.ModeCollect {
i18n.SetActionHandler(s.handleMissingKey)
i18n.OnMissingKey(s.handleMissingKey)
} else {
i18n.SetActionHandler(nil)
i18n.OnMissingKey(nil)
}
}

View file

@ -41,6 +41,10 @@ type Options struct {
AppName string
Version string
Services []framework.Option // Additional services to register
// OnReload is called when SIGHUP is received (daemon mode).
// Use for configuration reloading. Leave nil to ignore SIGHUP.
OnReload func() error
}
// Init initialises the global CLI runtime.
@ -50,9 +54,15 @@ func Init(opts Options) error {
once.Do(func() {
ctx, cancel := context.WithCancel(context.Background())
// Build signal service options
var signalOpts []SignalOption
if opts.OnReload != nil {
signalOpts = append(signalOpts, WithReloadHandler(opts.OnReload))
}
// Build options: signal service + any additional services
coreOpts := []framework.Option{
framework.WithName("signal", newSignalService(cancel)),
framework.WithName("signal", newSignalService(cancel, signalOpts...)),
}
coreOpts = append(coreOpts, opts.Services...)
coreOpts = append(coreOpts, framework.WithServiceLock())
@ -143,27 +153,61 @@ func Dim(msg string) {
// --- Signal Service (internal) ---
type signalService struct {
cancel context.CancelFunc
sigChan chan os.Signal
cancel context.CancelFunc
sigChan chan os.Signal
onReload func() error
}
func newSignalService(cancel context.CancelFunc) func(*framework.Core) (any, error) {
// SignalOption configures signal handling.
type SignalOption func(*signalService)
// WithReloadHandler sets a callback for SIGHUP.
func WithReloadHandler(fn func() error) SignalOption {
return func(s *signalService) {
s.onReload = fn
}
}
func newSignalService(cancel context.CancelFunc, opts ...SignalOption) func(*framework.Core) (any, error) {
return func(c *framework.Core) (any, error) {
return &signalService{
svc := &signalService{
cancel: cancel,
sigChan: make(chan os.Signal, 1),
}, nil
}
for _, opt := range opts {
opt(svc)
}
return svc, nil
}
}
func (s *signalService) OnStartup(ctx context.Context) error {
signal.Notify(s.sigChan, syscall.SIGINT, syscall.SIGTERM)
signals := []os.Signal{syscall.SIGINT, syscall.SIGTERM}
if s.onReload != nil {
signals = append(signals, syscall.SIGHUP)
}
signal.Notify(s.sigChan, signals...)
go func() {
select {
case <-s.sigChan:
s.cancel()
case <-ctx.Done():
for {
select {
case sig := <-s.sigChan:
switch sig {
case syscall.SIGHUP:
if s.onReload != nil {
if err := s.onReload(); err != nil {
LogError(fmt.Sprintf("reload failed: %v", err))
} else {
LogInfo("configuration reloaded")
}
}
case syscall.SIGINT, syscall.SIGTERM:
s.cancel()
return
}
case <-ctx.Done():
return
}
}
}()