agent/pkg/runner/runner.go
Snider 39914fbf14 refactor: AX compliance sweep — replace banned stdlib imports with core primitives
Replaced fmt, strings, sort, os, io, sync, encoding/json, path/filepath,
errors, log, reflect with core.Sprintf, core.E, core.Contains, core.Trim,
core.Split, core.Join, core.JoinPath, slices.Sort, c.Fs(), c.Lock(),
core.JSONMarshal, core.ReadAll and other CoreGO v0.8.0 primitives.

Framework boundary exceptions preserved where stdlib types are required
by external interfaces (Gin, net/http, CGo, Wails, bubbletea).

Co-Authored-By: Virgil <virgil@lethean.io>
2026-04-13 09:32:00 +01:00

472 lines
14 KiB
Go

// SPDX-License-Identifier: EUPL-1.2
// service := runner.New()
// service.TrackWorkspace("core/go-io/task-5", &runner.WorkspaceStatus{Status: "running", Agent: "codex"})
package runner
import (
"context"
"time"
"dappco.re/go/agent/pkg/agentic"
"dappco.re/go/agent/pkg/messages"
core "dappco.re/go/core"
)
// options := runner.Options{}
type Options struct{}
// service := runner.New()
// service.TrackWorkspace("core/go-io/task-5", &runner.WorkspaceStatus{Status: "running", Agent: "codex"})
type Service struct {
*core.ServiceRuntime[Options]
pokeCh chan struct{}
dispatchLock chan struct{}
drainLock chan struct{}
frozen bool
backoff map[string]time.Time
failCount map[string]int
workspaces *core.Registry[*WorkspaceStatus]
}
// lock acquires a named mutex — uses c.Lock(name) when Core is
// available, falls back to a channel-based lock for standalone use.
//
// unlock := s.lock("runner.dispatch", s.dispatchLock)
// defer unlock()
func (s *Service) lock(name string, fallback chan struct{}) (unlock func()) {
if s.ServiceRuntime != nil {
mu := s.Core().Lock(name).Mutex
mu.Lock()
return mu.Unlock
}
fallback <- struct{}{}
return func() { <-fallback }
}
type channelSender interface {
ChannelSend(ctx context.Context, channel string, data any)
}
// service := runner.New()
// service.TrackWorkspace("core/go-io/task-5", &runner.WorkspaceStatus{Status: "running", Agent: "codex"})
func New() *Service {
return &Service{
dispatchLock: make(chan struct{}, 1),
drainLock: make(chan struct{}, 1),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
workspaces: core.NewRegistry[*WorkspaceStatus](),
}
}
// c := core.New(core.WithService(runner.Register))
// service := c.Service("runner")
func Register(coreApp *core.Core) core.Result {
service := New()
service.ServiceRuntime = core.NewServiceRuntime(coreApp, Options{})
config := service.loadAgentsConfig()
coreApp.Config().Set("agents.concurrency", config.Concurrency)
coreApp.Config().Set("agents.rates", config.Rates)
coreApp.Config().Set("agents.dispatch", config.Dispatch)
coreApp.Config().Set("agents.config_path", core.JoinPath(CoreRoot(), "agents.yaml"))
codexTotal := 0
if limit, ok := config.Concurrency["codex"]; ok {
codexTotal = limit.Total
}
coreApp.Config().Set("agents.codex_limit_debug", codexTotal)
return core.Result{Value: service, OK: true}
}
// c.Action("runner.dispatch").Run(ctx, core.NewOptions(
//
// core.Option{Key: "repo", Value: "go-io"},
// core.Option{Key: "agent", Value: "codex"},
//
// ))
// c.Action("runner.status").Run(ctx, core.NewOptions())
func (s *Service) OnStartup(ctx context.Context) core.Result {
coreApp := s.Core()
coreApp.Action("runner.dispatch", s.actionDispatch).Description = "Dispatch a subagent (checks frozen + concurrency)"
coreApp.Action("runner.status", s.actionStatus).Description = "Query workspace status"
coreApp.Action("runner.start", s.actionStart).Description = "Unfreeze dispatch queue"
coreApp.Action("runner.stop", s.actionStop).Description = "Freeze dispatch queue (graceful)"
coreApp.Action("runner.kill", s.actionKill).Description = "Kill all running agents (hard stop)"
coreApp.Action("runner.poke", s.actionPoke).Description = "Drain next queued task"
s.hydrateWorkspaces()
coreApp.RegisterQuery(s.handleWorkspaceQuery)
s.startRunner()
return core.Result{OK: true}
}
// result := service.OnShutdown(context.Background())
//
// if result.OK {
// core.Println(service.IsFrozen())
// }
func (s *Service) OnShutdown(_ context.Context) core.Result {
s.frozen = true
return core.Result{OK: true}
}
// service.HandleIPCEvents(c, messages.PokeQueue{})
//
// service.HandleIPCEvents(c, messages.AgentCompleted{
// Agent: "codex", Repo: "go-io", Workspace: "core/go-io/task-5", Status: "completed",
// })
func (s *Service) HandleIPCEvents(coreApp *core.Core, msg core.Message) core.Result {
sendNotification := func(channel string, data any) {
serviceResult := coreApp.Service("mcp")
if !serviceResult.OK {
return
}
notifier, ok := serviceResult.Value.(channelSender)
if !ok {
return
}
notifier.ChannelSend(context.Background(), channel, data)
}
switch ev := msg.(type) {
case messages.AgentStarted:
baseAgentName := baseAgent(ev.Agent)
runningCount := s.countRunningByAgent(baseAgentName)
var limit int
concurrencyResult := coreApp.Config().Get("agents.concurrency")
if concurrencyResult.OK {
if concurrency, ok := concurrencyResult.Value.(map[string]ConcurrencyLimit); ok {
if concurrencyLimit, has := concurrency[baseAgentName]; has {
limit = concurrencyLimit.Total
}
}
}
notification := &AgentNotification{
Status: "started",
Repo: ev.Repo,
Agent: ev.Agent,
Workspace: ev.Workspace,
Running: runningCount,
Limit: limit,
}
sendNotification("agent.status", notification)
case messages.AgentCompleted:
if ev.Workspace != "" {
if workspaceResult := s.workspaces.Get(ev.Workspace); workspaceResult.OK {
if workspaceStatus, ok := workspaceResult.Value.(*WorkspaceStatus); ok && workspaceStatus.Status == "running" {
workspaceStatus.Status = ev.Status
workspaceStatus.PID = 0
}
}
} else {
s.workspaces.Each(func(_ string, workspaceStatus *WorkspaceStatus) {
if workspaceStatus.Repo == ev.Repo && workspaceStatus.Status == "running" {
workspaceStatus.Status = ev.Status
workspaceStatus.PID = 0
}
})
}
completedBaseAgentName := baseAgent(ev.Agent)
runningCount := s.countRunningByAgent(completedBaseAgentName)
var limit int
completionResult := coreApp.Config().Get("agents.concurrency")
if completionResult.OK {
if concurrency, ok := completionResult.Value.(map[string]ConcurrencyLimit); ok {
if concurrencyLimit, has := concurrency[completedBaseAgentName]; has {
limit = concurrencyLimit.Total
}
}
}
notification := &AgentNotification{
Status: ev.Status,
Repo: ev.Repo,
Agent: ev.Agent,
Workspace: ev.Workspace,
Running: runningCount,
Limit: limit,
}
sendNotification("agent.status", notification)
s.Poke()
case messages.PokeQueue:
s.drainQueueAndNotify(coreApp)
_ = ev
}
return core.Result{OK: true}
}
// if s.IsFrozen() { return "queue is frozen" }
func (s *Service) IsFrozen() bool {
return s.frozen
}
// s.Poke()
func (s *Service) Poke() {
if s.pokeCh == nil {
return
}
select {
case s.pokeCh <- struct{}{}:
default:
}
}
// s.TrackWorkspace("core/go-io/task-5", &WorkspaceStatus{Status: "running", Agent: "codex"})
// s.TrackWorkspace("core/go-io/task-5", &agentic.WorkspaceStatus{Status: "running", Agent: "codex"})
func (s *Service) TrackWorkspace(name string, status any) {
if s.workspaces == nil {
return
}
var workspaceStatus *WorkspaceStatus
switch value := status.(type) {
case *WorkspaceStatus:
workspaceStatus = value
case *agentic.WorkspaceStatus:
workspaceStatus = runnerWorkspaceStatusFromAgentic(value)
default:
statusJSON := core.JSONMarshalString(status)
var decodedWorkspace WorkspaceStatus
if result := core.JSONUnmarshalString(statusJSON, &decodedWorkspace); result.OK {
workspaceStatus = &decodedWorkspace
}
}
if workspaceStatus == nil {
return
}
s.workspaces.Set(name, workspaceStatus)
s.workspaces.Delete(core.Concat("pending/", workspaceStatus.Repo))
}
// s.Workspaces().Each(func(name string, workspaceStatus *WorkspaceStatus) { core.Println(name, workspaceStatus.Status) })
func (s *Service) Workspaces() *core.Registry[*WorkspaceStatus] {
return s.workspaces
}
// result := c.QUERY(runner.WorkspaceQuery{Name: "core/go-io/task-42"})
// result := c.QUERY(runner.WorkspaceQuery{Status: "running"})
func (s *Service) handleWorkspaceQuery(_ *core.Core, query core.Query) core.Result {
workspaceQuery, ok := query.(WorkspaceQuery)
if !ok {
return core.Result{}
}
if workspaceQuery.Name != "" {
return s.workspaces.Get(workspaceQuery.Name)
}
if workspaceQuery.Status != "" {
var names []string
s.workspaces.Each(func(name string, workspaceStatus *WorkspaceStatus) {
if workspaceStatus.Status == workspaceQuery.Status {
names = append(names, name)
}
})
return core.Result{Value: names, OK: true}
}
return core.Result{Value: s.workspaces, OK: true}
}
func (s *Service) actionDispatch(_ context.Context, options core.Options) core.Result {
if s.frozen {
return core.Result{Value: core.E("runner.actionDispatch", "queue is frozen", nil), OK: false}
}
agent := options.String("agent")
if agent == "" {
agent = "codex"
}
repo := options.String("repo")
unlock := s.lock("runner.dispatch", s.dispatchLock)
defer unlock()
can, reason := s.canDispatchAgent(agent)
if !can {
return core.Result{Value: core.E("runner.actionDispatch", core.Concat("queue at capacity: ", reason), nil), OK: false}
}
workspaceName := core.Concat("pending/", repo)
s.workspaces.Set(workspaceName, &WorkspaceStatus{
Status: "running",
Agent: agent,
Repo: repo,
PID: -1,
})
return core.Result{OK: true}
}
func (s *Service) actionStatus(_ context.Context, _ core.Options) core.Result {
running, queued, completed, failed := 0, 0, 0, 0
s.workspaces.Each(func(_ string, workspaceStatus *WorkspaceStatus) {
switch workspaceStatus.Status {
case "running":
running++
case "queued":
queued++
case "completed", "merged", "ready-for-review":
completed++
case "failed", "blocked":
failed++
}
})
return core.Result{Value: map[string]int{
"running": running, "queued": queued,
"completed": completed, "failed": failed,
"total": running + queued + completed + failed,
}, OK: true}
}
func (s *Service) actionStart(_ context.Context, _ core.Options) core.Result {
s.frozen = false
s.Poke()
return core.Result{Value: "dispatch started", OK: true}
}
func (s *Service) actionStop(_ context.Context, _ core.Options) core.Result {
s.frozen = true
return core.Result{Value: "queue frozen", OK: true}
}
func (s *Service) actionKill(_ context.Context, _ core.Options) core.Result {
s.frozen = true
var runtime *core.Core
if s.ServiceRuntime != nil {
runtime = s.Core()
}
killed := 0
cleared := 0
seenQueued := make(map[string]bool)
for _, statusPath := range agentic.WorkspaceStatusPaths() {
workspaceDir := core.PathDir(statusPath)
statusResult := ReadStatusResult(workspaceDir)
workspaceStatus, ok := statusResult.Value.(*WorkspaceStatus)
if !ok || workspaceStatus == nil {
continue
}
switch workspaceStatus.Status {
case "running":
if workspaceStatus.PID > 0 && agentic.ProcessTerminate(runtime, "", workspaceStatus.PID) {
killed++
}
workspaceStatus.Status = "failed"
workspaceStatus.PID = 0
_ = WriteStatus(workspaceDir, workspaceStatus)
if s.workspaces != nil {
s.workspaces.Set(agentic.WorkspaceName(workspaceDir), workspaceStatus)
}
case "queued":
workspaceName := agentic.WorkspaceName(workspaceDir)
if seenQueued[workspaceName] {
continue
}
seenQueued[workspaceName] = true
if deleteResult := fs.DeleteAll(workspaceDir); !deleteResult.OK {
core.Warn("runner.actionKill: failed to delete queued workspace", "workspace", workspaceName, "reason", core.Sprint(deleteResult.Value))
continue
}
cleared++
if s.workspaces != nil {
s.workspaces.Delete(workspaceName)
}
}
}
return core.Result{Value: core.Sprintf("killed %d agents, cleared %d queued", killed, cleared), OK: true}
}
func (s *Service) actionPoke(_ context.Context, _ core.Options) core.Result {
s.drainQueueAndNotify(s.Core())
return core.Result{OK: true}
}
func (s *Service) startRunner() {
s.pokeCh = make(chan struct{}, 1)
if core.Env("CORE_AGENT_DISPATCH") == "1" {
s.frozen = false
} else {
s.frozen = true
}
go s.runLoop()
}
func (s *Service) runLoop() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
s.drainQueueAndNotify(s.Core())
case <-s.pokeCh:
s.drainQueueAndNotify(s.Core())
}
}
}
func (s *Service) drainQueueAndNotify(coreApp *core.Core) {
completed := s.drainQueue()
if coreApp != nil {
coreApp.ACTION(messages.QueueDrained{Completed: completed})
}
}
func (s *Service) hydrateWorkspaces() {
if s.workspaces == nil {
s.workspaces = core.NewRegistry[*WorkspaceStatus]()
}
for _, path := range agentic.WorkspaceStatusPaths() {
workspaceDir := core.PathDir(path)
statusResult := ReadStatusResult(workspaceDir)
if !statusResult.OK {
continue
}
workspaceStatus, ok := statusResult.Value.(*WorkspaceStatus)
if !ok || workspaceStatus == nil {
continue
}
if workspaceStatus.Status == "running" {
workspaceStatus.Status = "queued"
}
s.workspaces.Set(agentic.WorkspaceName(workspaceDir), workspaceStatus)
}
}
// notification := runner.AgentNotification{Status: "started", Repo: "go-io", Agent: "codex", Workspace: "core/go-io/task-5", Running: 1, Limit: 2}
type AgentNotification struct {
Status string `json:"status"`
Repo string `json:"repo"`
Agent string `json:"agent"`
Workspace string `json:"workspace"`
Running int `json:"running"`
Limit int `json:"limit"`
}
// result := c.QUERY(runner.WorkspaceQuery{Status: "running"})
type WorkspaceQuery struct {
Name string
Status string
}
// workspaceStatus := &runner.WorkspaceStatus{Status: "running", Agent: "codex", Repo: "go-io", PID: 12345}
type WorkspaceStatus struct {
Status string `json:"status"`
Agent string `json:"agent"`
Repo string `json:"repo"`
Org string `json:"org,omitempty"`
Task string `json:"task,omitempty"`
Branch string `json:"branch,omitempty"`
PID int `json:"pid,omitempty"`
Question string `json:"question,omitempty"`
PRURL string `json:"pr_url,omitempty"`
StartedAt time.Time `json:"started_at"`
Runs int `json:"runs"`
}