fix(runner): reserve slot on approval to prevent TOCTOU race
Runner now creates a reservation entry (PID=-1) in the workspace Registry immediately when approving a dispatch. This prevents parallel requests from all seeing count < limit before any spawn completes. Reservations are counted by countRunningByAgent/ByModel (PID < 0 = always count). Agentic overwrites with real PID via TrackWorkspace after spawn. Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
8911dc5f42
commit
53db749738
3 changed files with 17 additions and 5 deletions
|
|
@ -506,6 +506,7 @@ func (s *PrepSubsystem) dispatch(ctx context.Context, req *mcp.CallToolRequest,
|
|||
if s.ServiceRuntime != nil {
|
||||
r := s.Core().Action("runner.dispatch").Run(ctx, core.NewOptions(
|
||||
core.Option{Key: "agent", Value: input.Agent},
|
||||
core.Option{Key: "repo", Value: input.Repo},
|
||||
))
|
||||
if !r.OK {
|
||||
// Runner denied — queue it
|
||||
|
|
|
|||
|
|
@ -147,7 +147,9 @@ func (s *Service) countRunningByAgent(agent string) int {
|
|||
count := 0
|
||||
s.workspaces.Each(func(_ string, st *WorkspaceStatus) {
|
||||
if st.Status == "running" && baseAgent(st.Agent) == agent {
|
||||
if st.PID > 0 && syscall.Kill(st.PID, 0) == nil {
|
||||
// PID < 0 = reservation (pending spawn), always count
|
||||
// PID > 0 = verify process is alive
|
||||
if st.PID < 0 || (st.PID > 0 && syscall.Kill(st.PID, 0) == nil) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
|
@ -184,7 +186,7 @@ func (s *Service) countRunningByModel(agent string) int {
|
|||
count := 0
|
||||
s.workspaces.Each(func(_ string, st *WorkspaceStatus) {
|
||||
if st.Status == "running" && st.Agent == agent {
|
||||
if st.PID > 0 && syscall.Kill(st.PID, 0) == nil {
|
||||
if st.PID < 0 || (st.PID > 0 && syscall.Kill(st.PID, 0) == nil) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -193,13 +193,12 @@ func (s *Service) actionDispatch(_ context.Context, opts core.Options) core.Resu
|
|||
if s.frozen {
|
||||
return core.Result{Value: "queue is frozen", OK: false}
|
||||
}
|
||||
// Dispatch is called by agentic via IPC — the actual spawn logic
|
||||
// is delegated back to agentic which owns workspace prep + prompt building.
|
||||
// Runner just gates: frozen check + concurrency check.
|
||||
|
||||
agent := opts.String("agent")
|
||||
if agent == "" {
|
||||
agent = "codex"
|
||||
}
|
||||
repo := opts.String("repo")
|
||||
|
||||
s.dispatchMu.Lock()
|
||||
defer s.dispatchMu.Unlock()
|
||||
|
|
@ -208,6 +207,16 @@ func (s *Service) actionDispatch(_ context.Context, opts core.Options) core.Resu
|
|||
return core.Result{Value: "queued — at concurrency limit", OK: false}
|
||||
}
|
||||
|
||||
// Reserve the slot immediately — before returning to agentic.
|
||||
// Without this, parallel dispatches all see count < limit.
|
||||
name := core.Concat("pending/", repo)
|
||||
s.workspaces.Set(name, &WorkspaceStatus{
|
||||
Status: "running",
|
||||
Agent: agent,
|
||||
Repo: repo,
|
||||
PID: -1, // placeholder — agentic will update with real PID via TrackWorkspace
|
||||
})
|
||||
|
||||
return core.Result{OK: true}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue