package mining import ( "context" "sync" "time" "forge.lthn.ai/Snider/Mining/pkg/logging" ) // supervisor.RegisterTask("cleanup", func(ctx context.Context) { runCleanup(ctx) }, 30*time.Second, 10) type TaskFunc func(ctx context.Context) // task := &SupervisedTask{name: "stats-collector", restartDelay: 5 * time.Second, maxRestarts: -1} type SupervisedTask struct { name string task TaskFunc restartDelay time.Duration maxRestarts int restartCount int running bool lastStartTime time.Time cancel context.CancelFunc mu sync.Mutex } // supervisor := NewTaskSupervisor() // supervisor.RegisterTask("stats-collector", collectStats, 5*time.Second, -1) // supervisor.Start(); defer supervisor.Stop() type TaskSupervisor struct { tasks map[string]*SupervisedTask ctx context.Context cancel context.CancelFunc waitGroup sync.WaitGroup mu sync.RWMutex started bool } // supervisor := NewTaskSupervisor() // supervisor.RegisterTask("stats-collector", collectStats, 5*time.Second, -1) // supervisor.Start() func NewTaskSupervisor() *TaskSupervisor { ctx, cancel := context.WithCancel(context.Background()) return &TaskSupervisor{ tasks: make(map[string]*SupervisedTask), ctx: ctx, cancel: cancel, } } // supervisor.RegisterTask("stats-collector", collectStats, 5*time.Second, -1) // supervisor.RegisterTask("cleanup", runCleanup, 30*time.Second, 10) func (s *TaskSupervisor) RegisterTask(name string, task TaskFunc, restartDelay time.Duration, maxRestarts int) { s.mu.Lock() defer s.mu.Unlock() s.tasks[name] = &SupervisedTask{ name: name, task: task, restartDelay: restartDelay, maxRestarts: maxRestarts, } } // supervisor.Start() // begins all registered tasks; no-op if already started func (s *TaskSupervisor) Start() { s.mu.Lock() if s.started { s.mu.Unlock() return } s.started = true s.mu.Unlock() s.mu.RLock() for name, task := range s.tasks { s.startTask(name, task) } s.mu.RUnlock() } // s.startTask("stats-collector", s.tasks["stats-collector"]) func (s *TaskSupervisor) startTask(name string, st *SupervisedTask) { st.mu.Lock() if st.running { st.mu.Unlock() return } st.running = true st.lastStartTime = time.Now() taskCtx, taskCancel := context.WithCancel(s.ctx) st.cancel = taskCancel st.mu.Unlock() s.waitGroup.Add(1) go func() { defer s.waitGroup.Done() for { select { case <-s.ctx.Done(): return default: } // Run the task with panic recovery func() { defer func() { if r := recover(); r != nil { logging.Error("supervised task panicked", logging.Fields{ "task": name, "panic": r, }) } }() st.task(taskCtx) }() // Check if we should restart st.mu.Lock() st.restartCount++ shouldRestart := st.restartCount <= st.maxRestarts || st.maxRestarts < 0 restartDelay := st.restartDelay st.mu.Unlock() if !shouldRestart { logging.Warn("supervised task reached max restarts", logging.Fields{ "task": name, "maxRestart": st.maxRestarts, }) return } select { case <-s.ctx.Done(): return case <-time.After(restartDelay): logging.Info("restarting supervised task", logging.Fields{ "task": name, "restartCount": st.restartCount, }) } } }() logging.Info("started supervised task", logging.Fields{"task": name}) } // supervisor.Stop() // cancels all tasks and waits for clean exit func (s *TaskSupervisor) Stop() { s.cancel() s.waitGroup.Wait() s.mu.Lock() s.started = false for _, task := range s.tasks { task.mu.Lock() task.running = false task.mu.Unlock() } s.mu.Unlock() logging.Info("task supervisor stopped") } // running, restarts, ok := supervisor.GetTaskStatus("stats-collector") func (s *TaskSupervisor) GetTaskStatus(name string) (running bool, restartCount int, found bool) { s.mu.RLock() task, ok := s.tasks[name] s.mu.RUnlock() if !ok { return false, 0, false } task.mu.Lock() defer task.mu.Unlock() return task.running, task.restartCount, true } // for name, status := range supervisor.GetAllTaskStatuses() { log(name, status.Running) } func (s *TaskSupervisor) GetAllTaskStatuses() map[string]TaskStatus { s.mu.RLock() defer s.mu.RUnlock() statuses := make(map[string]TaskStatus, len(s.tasks)) for name, task := range s.tasks { task.mu.Lock() statuses[name] = TaskStatus{ Name: name, Running: task.running, RestartCount: task.restartCount, LastStart: task.lastStartTime, } task.mu.Unlock() } return statuses } // status := supervisor.GetAllTaskStatuses()["stats-collector"] // if status.Running { log(status.RestartCount, status.LastStart) } type TaskStatus struct { Name string `json:"name"` Running bool `json:"running"` RestartCount int `json:"restartCount"` LastStart time.Time `json:"lastStart"` }