fix(runner): reserve slot on approval to prevent TOCTOU race

Runner now creates a reservation entry (PID=-1) in the workspace Registry
immediately when approving a dispatch. This prevents parallel requests
from all seeing count < limit before any spawn completes.

Reservations are counted by countRunningByAgent/ByModel (PID < 0 = always
count). Agentic overwrites with real PID via TrackWorkspace after spawn.

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Snider 2026-03-26 11:23:04 +00:00
parent 8911dc5f42
commit 53db749738
3 changed files with 17 additions and 5 deletions

View file

@ -506,6 +506,7 @@ func (s *PrepSubsystem) dispatch(ctx context.Context, req *mcp.CallToolRequest,
if s.ServiceRuntime != nil {
r := s.Core().Action("runner.dispatch").Run(ctx, core.NewOptions(
core.Option{Key: "agent", Value: input.Agent},
core.Option{Key: "repo", Value: input.Repo},
))
if !r.OK {
// Runner denied — queue it

View file

@ -147,7 +147,9 @@ func (s *Service) countRunningByAgent(agent string) int {
count := 0
s.workspaces.Each(func(_ string, st *WorkspaceStatus) {
if st.Status == "running" && baseAgent(st.Agent) == agent {
if st.PID > 0 && syscall.Kill(st.PID, 0) == nil {
// PID < 0 = reservation (pending spawn), always count
// PID > 0 = verify process is alive
if st.PID < 0 || (st.PID > 0 && syscall.Kill(st.PID, 0) == nil) {
count++
}
}
@ -184,7 +186,7 @@ func (s *Service) countRunningByModel(agent string) int {
count := 0
s.workspaces.Each(func(_ string, st *WorkspaceStatus) {
if st.Status == "running" && st.Agent == agent {
if st.PID > 0 && syscall.Kill(st.PID, 0) == nil {
if st.PID < 0 || (st.PID > 0 && syscall.Kill(st.PID, 0) == nil) {
count++
}
}

View file

@ -193,13 +193,12 @@ func (s *Service) actionDispatch(_ context.Context, opts core.Options) core.Resu
if s.frozen {
return core.Result{Value: "queue is frozen", OK: false}
}
// Dispatch is called by agentic via IPC — the actual spawn logic
// is delegated back to agentic which owns workspace prep + prompt building.
// Runner just gates: frozen check + concurrency check.
agent := opts.String("agent")
if agent == "" {
agent = "codex"
}
repo := opts.String("repo")
s.dispatchMu.Lock()
defer s.dispatchMu.Unlock()
@ -208,6 +207,16 @@ func (s *Service) actionDispatch(_ context.Context, opts core.Options) core.Resu
return core.Result{Value: "queued — at concurrency limit", OK: false}
}
// Reserve the slot immediately — before returning to agentic.
// Without this, parallel dispatches all see count < limit.
name := core.Concat("pending/", repo)
s.workspaces.Set(name, &WorkspaceStatus{
Status: "running",
Agent: agent,
Repo: repo,
PID: -1, // placeholder — agentic will update with real PID via TrackWorkspace
})
return core.Result{OK: true}
}