agent/tests/cli/restart/Taskfile.yaml
Snider 716546d0d5 feat(agent): workspace state mirror + ghost reap + sync queue via go-store
- prep.go TrackWorkspace mirrors into queue + concurrency store groups
  (previously only registry); hydrateWorkspaces reaps filesystem ghosts
  (dead PID → failed, persisted back to status.json) so cmdStatus and
  out-of-process consumers see coherent state (RFC §15.3)
- sync.go queue read/write goes through go-store first per RFC §16.5
  ("Queue persists across restarts in db.duckdb"), file remains fallback
  for graceful degradation
- statestore.go stateStoreGet helper for go-store-first reads
- tests/cli/restart — new CLI test for RFC §15.7 "dispatch → kill →
  restart → no ghost agents" dead-PID reap flow
- 4 new statestore tests: queue group mirror, concurrency refresh,
  sync queue persistence, fs ghost reap with disk write-back

Co-Authored-By: Virgil <virgil@lethean.io>
2026-04-14 12:36:44 +01:00

71 lines
2.9 KiB
YAML

version: "3"
# RFC §15.7 — `tests/cli/restart/` validates that dispatch state survives a
# kill+restart cycle without leaving ghost agents in the registry. The
# Taskfile path mirrors the surface under test (`core-agent restart` is not a
# command — restart is the lifecycle, validated by simulating a stale
# workspace and confirming `status` reports it as failed/queued, not running).
tasks:
test:
cmds:
- |
bash <<'EOF'
set -euo pipefail
source ../_lib/run.sh
go build -trimpath -ldflags="-s -w" -o bin/core-agent ../../../cmd/core-agent
# Use an isolated CORE_WORKSPACE so the test cannot disturb the
# operator's own .core/ directory.
workspace="$(mktemp -d)"
export CORE_WORKSPACE="$workspace"
export CORE_HOME="$workspace"
export DIR_HOME="$workspace"
# Pre-seed a workspace with status.json showing a process that is
# not alive — simulating the "dispatch -> kill -> restart" cycle.
ghost_dir="$workspace/workspace/core/go-io/task-restart"
mkdir -p "$ghost_dir"
cat >"$ghost_dir/status.json" <<JSON
{
"status": "running",
"agent": "codex:gpt-5.4",
"repo": "go-io",
"org": "core",
"task": "restart-survival",
"branch": "agent/restart-survival",
"pid": 99999,
"started_at": "2026-04-14T12:00:00Z",
"updated_at": "2026-04-14T12:00:00Z",
"runs": 1
}
JSON
# Run status — the new agent process must NOT report the ghost as
# "running"; it must mark the workspace failed (RFC §15.3 ghost
# detection on hydrate).
output="$(mktemp)"
run_capture_all 0 "$output" ./bin/core-agent status
assert_contains "core/go-io/task-restart" "$output"
# Verify the ghost did not survive as `running`. The status command
# surfaces both the workspace and its current state — accepting any
# non-running label (failed, completed, queued, blocked) confirms
# the ghost was reaped.
if grep -E 'core/go-io/task-restart.*\brunning\b' "$output"; then
printf 'expected ghost agent reaped on restart, still reports running\n' >&2
cat "$output" >&2
exit 1
fi
assert_contains "failed" "$output"
# The reaped status should be persisted back to disk per RFC §15.3 —
# cross-process consumers (other tooling reading status.json) must
# see the same coherent state.
status_file="$ghost_dir/status.json"
if grep -q '"status":[[:space:]]*"running"' "$status_file"; then
printf 'expected reaped status persisted to %s\n' "$status_file" >&2
cat "$status_file" >&2
exit 1
fi
EOF