Mining/pkg/mining/supervisor.go
Claude f32bf6f3ba
ax(mining): replace prose comment on NewTaskSupervisor with usage example
AX Principle 2: comments show HOW with real values, not WHAT the
function name already says. All other constructors in the file already
use usage-example comments; NewTaskSupervisor was the outlier.

Co-Authored-By: Charon <charon@lethean.io>
2026-04-02 10:00:10 +01:00

205 lines
4.6 KiB
Go

package mining
import (
"context"
"sync"
"time"
"forge.lthn.ai/Snider/Mining/pkg/logging"
)
// TaskFunc is a function that can be supervised.
type TaskFunc func(ctx context.Context)
// SupervisedTask represents a background task with restart capability.
type SupervisedTask struct {
name string
task TaskFunc
restartDelay time.Duration
maxRestarts int
restartCount int
running bool
lastStartTime time.Time
cancel context.CancelFunc
mu sync.Mutex
}
// TaskSupervisor manages background tasks with automatic restart on failure.
type TaskSupervisor struct {
tasks map[string]*SupervisedTask
ctx context.Context
cancel context.CancelFunc
waitGroup sync.WaitGroup
mu sync.RWMutex
started bool
}
// supervisor := NewTaskSupervisor()
// supervisor.RegisterTask("stats-collector", collectStats, 5*time.Second, -1)
// supervisor.Start()
func NewTaskSupervisor() *TaskSupervisor {
ctx, cancel := context.WithCancel(context.Background())
return &TaskSupervisor{
tasks: make(map[string]*SupervisedTask),
ctx: ctx,
cancel: cancel,
}
}
// supervisor.RegisterTask("stats-collector", collectStats, 5*time.Second, -1)
// supervisor.RegisterTask("cleanup", runCleanup, 30*time.Second, 10)
func (s *TaskSupervisor) RegisterTask(name string, task TaskFunc, restartDelay time.Duration, maxRestarts int) {
s.mu.Lock()
defer s.mu.Unlock()
s.tasks[name] = &SupervisedTask{
name: name,
task: task,
restartDelay: restartDelay,
maxRestarts: maxRestarts,
}
}
// supervisor.Start() // begins all registered tasks; no-op if already started
func (s *TaskSupervisor) Start() {
s.mu.Lock()
if s.started {
s.mu.Unlock()
return
}
s.started = true
s.mu.Unlock()
s.mu.RLock()
for name, task := range s.tasks {
s.startTask(name, task)
}
s.mu.RUnlock()
}
// s.startTask("stats-collector", s.tasks["stats-collector"])
func (s *TaskSupervisor) startTask(name string, st *SupervisedTask) {
st.mu.Lock()
if st.running {
st.mu.Unlock()
return
}
st.running = true
st.lastStartTime = time.Now()
taskCtx, taskCancel := context.WithCancel(s.ctx)
st.cancel = taskCancel
st.mu.Unlock()
s.waitGroup.Add(1)
go func() {
defer s.waitGroup.Done()
for {
select {
case <-s.ctx.Done():
return
default:
}
// Run the task with panic recovery
func() {
defer func() {
if r := recover(); r != nil {
logging.Error("supervised task panicked", logging.Fields{
"task": name,
"panic": r,
})
}
}()
st.task(taskCtx)
}()
// Check if we should restart
st.mu.Lock()
st.restartCount++
shouldRestart := st.restartCount <= st.maxRestarts || st.maxRestarts < 0
restartDelay := st.restartDelay
st.mu.Unlock()
if !shouldRestart {
logging.Warn("supervised task reached max restarts", logging.Fields{
"task": name,
"maxRestart": st.maxRestarts,
})
return
}
select {
case <-s.ctx.Done():
return
case <-time.After(restartDelay):
logging.Info("restarting supervised task", logging.Fields{
"task": name,
"restartCount": st.restartCount,
})
}
}
}()
logging.Info("started supervised task", logging.Fields{"task": name})
}
// supervisor.Stop() // cancels all tasks and waits for clean exit
func (s *TaskSupervisor) Stop() {
s.cancel()
s.waitGroup.Wait()
s.mu.Lock()
s.started = false
for _, task := range s.tasks {
task.mu.Lock()
task.running = false
task.mu.Unlock()
}
s.mu.Unlock()
logging.Info("task supervisor stopped")
}
// running, restarts, ok := supervisor.GetTaskStatus("stats-collector")
func (s *TaskSupervisor) GetTaskStatus(name string) (running bool, restartCount int, found bool) {
s.mu.RLock()
task, ok := s.tasks[name]
s.mu.RUnlock()
if !ok {
return false, 0, false
}
task.mu.Lock()
defer task.mu.Unlock()
return task.running, task.restartCount, true
}
// for name, status := range supervisor.GetAllTaskStatuses() { log(name, status.Running) }
func (s *TaskSupervisor) GetAllTaskStatuses() map[string]TaskStatus {
s.mu.RLock()
defer s.mu.RUnlock()
statuses := make(map[string]TaskStatus, len(s.tasks))
for name, task := range s.tasks {
task.mu.Lock()
statuses[name] = TaskStatus{
Name: name,
Running: task.running,
RestartCount: task.restartCount,
LastStart: task.lastStartTime,
}
task.mu.Unlock()
}
return statuses
}
// TaskStatus contains the status of a supervised task.
type TaskStatus struct {
Name string `json:"name"`
Running bool `json:"running"`
RestartCount int `json:"restartCount"`
LastStart time.Time `json:"lastStart"`
}