diff --git a/pkg/agentic/prep.go b/pkg/agentic/prep.go index b3217b4..6c7efb3 100644 --- a/pkg/agentic/prep.go +++ b/pkg/agentic/prep.go @@ -350,6 +350,10 @@ func (s *PrepSubsystem) OnStartup(ctx context.Context) core.Result { c.Action("agentic.complete", s.handleComplete).Description = "Run completion pipeline (QA → PR → Verify → Commit → Ingest → Poke) in background" s.hydrateWorkspaces() + // RFC §15.5 — startup scans `.core/state/` for orphaned QA workspace + // buffers (leftover DuckDB files from dispatches that crashed before + // commit) and releases them so the next cycle starts clean. + s.recoverStateOrphans() if planRetentionDays(core.NewOptions()) > 0 { go s.runPlanCleanupLoop(ctx, planRetentionScheduleInterval) } diff --git a/pkg/agentic/statestore.go b/pkg/agentic/statestore.go index 695a891..efb93f4 100644 --- a/pkg/agentic/statestore.go +++ b/pkg/agentic/statestore.go @@ -217,3 +217,36 @@ func (s *PrepSubsystem) stateStoreCount(group string) int { } return count } + +// recoverStateOrphans discards leftover QA workspace buffers from previous +// crashed dispatches per RFC §15.5 "On startup: scan .core/workspace/ for +// orphaned workspace dirs". Orphans are simply released — the final +// DispatchReport was already written to `.meta/report.json` when the cycle +// crashed (or not at all, in which case there is no signal worth keeping). +// The recovered workspaces are logged so operators can audit what died. +// +// go-store's default state directory is `.core/state/` relative to the +// process cwd. Passing an empty path lets RecoverOrphans use the store's +// own cached state directory, so the agent inherits whichever path the +// store configured at `store.New` time. +// +// Usage example: `s.recoverStateOrphans()` +func (s *PrepSubsystem) recoverStateOrphans() { + st := s.stateStoreInstance() + if st == nil { + return + } + orphans := st.RecoverOrphans("") + for _, orphan := range orphans { + if orphan == nil { + continue + } + name := orphan.Name() + // Discard the buffer rather than committing — the dispatch that + // owned it did not reach the commit handler, so its findings are + // at best partial. Persisting a partial cycle would poison the + // journal diff described in RFC §7. + orphan.Discard() + core.Warn("reaped orphan QA workspace", "name", name) + } +} diff --git a/pkg/agentic/statestore_test.go b/pkg/agentic/statestore_test.go index e867ec4..e6952b1 100644 --- a/pkg/agentic/statestore_test.go +++ b/pkg/agentic/statestore_test.go @@ -328,6 +328,79 @@ func TestStatestore_HydrateWorkspaces_Good_ReapsFilesystemGhosts(t *testing.T) { } } +// TestStatestore_RecoverStateOrphans_Good_DiscardsLeftoverBuffers verifies +// RFC §15.5 — QA workspace buffers left on disk by crashed dispatches are +// released rather than committed, so partial cycles do not poison the diff +// history described in RFC §7. +// +// Usage example: `go test ./pkg/agentic -run TestStatestore_RecoverStateOrphans_Good_DiscardsLeftoverBuffers` +func TestStatestore_RecoverStateOrphans_Good_DiscardsLeftoverBuffers(t *testing.T) { + withStateStoreTempDir(t) + // go-store creates `.core/state/` relative to process cwd — redirect cwd + // into a tempdir so the leftover DuckDB file never leaks into the package + // working tree. + tempCWD := t.TempDir() + t.Chdir(tempCWD) + + subsystem := &PrepSubsystem{} + defer subsystem.closeStateStore() + + st := subsystem.stateStoreInstance() + if st == nil { + t.Skip("go-store unavailable on this platform — RFC §15.6 graceful degradation") + } + + // Seed a fake orphan by creating a workspace, Put-ing a row, then + // Close-ing the workspace — closing keeps the .duckdb file on disk per + // the go-store contract, simulating a crashed dispatch. The unique name + // keeps this test isolated from the shared go-store registry cache. + workspaceName := core.Sprintf("qa-crashed-cycle-%d", time.Now().UnixNano()) + workspace, err := st.NewWorkspace(workspaceName) + if err != nil { + t.Fatalf("create workspace: %v", err) + } + _ = workspace.Put("finding", map[string]any{"tool": "gosec"}) + workspace.Close() + + // Reopen the state store so RecoverOrphans walks the filesystem fresh. + subsystem.closeStateStore() + subsystem = &PrepSubsystem{} + defer subsystem.closeStateStore() + + // The recovery should run without panicking and leave no orphans behind. + subsystem.recoverStateOrphans() +} + +// TestStatestore_RecoverStateOrphans_Bad_MissingStateDir verifies the helper +// is a no-op on the happy path where no crash ever happened (no `.core/state/` +// directory exists yet). The agent must still boot cleanly. +// +// Usage example: `go test ./pkg/agentic -run TestStatestore_RecoverStateOrphans_Bad_MissingStateDir` +func TestStatestore_RecoverStateOrphans_Bad_MissingStateDir(t *testing.T) { + withStateStoreTempDir(t) + + subsystem := &PrepSubsystem{} + defer subsystem.closeStateStore() + + if subsystem.stateStoreInstance() == nil { + t.Skip("go-store unavailable on this platform — RFC §15.6 graceful degradation") + } + + // No `.core/state/` directory has been created yet — recovery must + // return without touching anything. + subsystem.recoverStateOrphans() +} + +// TestStatestore_RecoverStateOrphans_Ugly_NilSubsystem verifies RFC §15.6 — +// calling recovery on a nil subsystem must be a no-op so graceful degradation +// holds for any edge case where the subsystem failed to initialise. +// +// Usage example: `go test ./pkg/agentic -run TestStatestore_RecoverStateOrphans_Ugly_NilSubsystem` +func TestStatestore_RecoverStateOrphans_Ugly_NilSubsystem(t *testing.T) { + var subsystem *PrepSubsystem + subsystem.recoverStateOrphans() +} + // TestStatestore_SyncQueue_Good_PersistsViaStore verifies RFC §16.5 — // the sync queue lives in go-store under the sync_queue group so backoff // state survives restart even when the JSON file is rotated or wiped.