fix(ax): surface atomic write failures

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-03-30 18:59:41 +00:00 committed by Snider
parent faf6b8b6fb
commit a1db312c7b
5 changed files with 74 additions and 31 deletions

View file

@ -369,10 +369,18 @@ func (s *PrepSubsystem) storeReviewOutput(repoDir, repo, reviewer, output string
core.WriteAll(r.Value, core.Concat(jsonLine, "\n"))
}
// saveRateLimitState persists rate limit info for cross-run awareness.
// saveRateLimitState writes the current rate-limit snapshot.
//
// s.saveRateLimitState(&RateLimitInfo{Limited: true, RetryAt: time.Now().Add(30 * time.Minute)})
func (s *PrepSubsystem) saveRateLimitState(info *RateLimitInfo) {
path := core.JoinPath(HomeDir(), ".core", "coderabbit-ratelimit.json")
fs.WriteAtomic(path, core.JSONMarshalString(info))
if r := fs.WriteAtomic(path, core.JSONMarshalString(info)); !r.OK {
if err, ok := r.Value.(error); ok {
core.Warn("reviewQueue: failed to persist rate limit state", "path", path, "reason", err)
return
}
core.Warn("reviewQueue: failed to persist rate limit state", "path", path)
}
}
// loadRateLimitState reads persisted rate limit info.

View file

@ -53,8 +53,8 @@ func TestReviewqueue_SaveLoadRateLimitState_Good_Roundtrip(t *testing.T) {
// but save/load use DIR_HOME. Skip if not writable.
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
info := &RateLimitInfo{
@ -79,8 +79,8 @@ func TestReviewqueue_StoreReviewOutput_Good(t *testing.T) {
// but we can verify it doesn't panic
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
assert.NotPanics(t, func() {
s.storeReviewOutput(t.TempDir(), "test-repo", "coderabbit", "No findings — LGTM")
@ -99,9 +99,9 @@ func TestReviewqueue_NoCandidates_Good(t *testing.T) {
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
codePath: root,
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
codePath: root,
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
_, out, err := s.reviewQueue(context.Background(), nil, ReviewQueueInput{DryRun: true})
@ -135,8 +135,8 @@ func TestReviewqueue_StatusFiltered_Good(t *testing.T) {
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
_, out, err := s.status(context.Background(), nil, StatusInput{})
@ -229,8 +229,8 @@ func TestReviewqueue_LoadRateLimitState_Ugly(t *testing.T) {
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
result := s.loadRateLimitState()
@ -243,8 +243,8 @@ func TestReviewqueue_BuildReviewCommand_Bad(t *testing.T) {
// Empty reviewer string — defaults to coderabbit
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
cmd, args := s.buildReviewCommand("/tmp/repo", "")
assert.Equal(t, "coderabbit", cmd)
@ -289,8 +289,8 @@ func TestReviewqueue_StoreReviewOutput_Bad(t *testing.T) {
// Empty output
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
assert.NotPanics(t, func() {
s.storeReviewOutput(t.TempDir(), "test-repo", "coderabbit", "")
@ -301,8 +301,8 @@ func TestReviewqueue_StoreReviewOutput_Ugly(t *testing.T) {
// Very large output
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
largeOutput := strings.Repeat("Finding: something is wrong on this line\n", 10000)
assert.NotPanics(t, func() {
@ -315,8 +315,8 @@ func TestReviewqueue_StoreReviewOutput_Ugly(t *testing.T) {
func TestReviewqueue_SaveRateLimitState_Good(t *testing.T) {
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
info := &RateLimitInfo{
@ -333,20 +333,38 @@ func TestReviewqueue_SaveRateLimitState_Bad(t *testing.T) {
// Save nil info
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
assert.NotPanics(t, func() {
s.saveRateLimitState(nil)
})
}
func TestReviewqueue_SaveRateLimitState_Bad_WriteFailure(t *testing.T) {
t.Setenv("CORE_HOME", "/dev/null")
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
info := &RateLimitInfo{
Limited: true,
RetryAt: time.Now().Add(5 * time.Minute).Truncate(time.Second),
Message: "write failure",
}
assert.NotPanics(t, func() {
s.saveRateLimitState(info)
})
}
func TestReviewqueue_SaveRateLimitState_Ugly(t *testing.T) {
// Save with far-future RetryAt
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
info := &RateLimitInfo{
Limited: true,
@ -364,8 +382,8 @@ func TestReviewqueue_LoadRateLimitState_Good(t *testing.T) {
// Write then load valid state
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
info := &RateLimitInfo{
@ -389,8 +407,8 @@ func TestReviewqueue_LoadRateLimitState_Bad(t *testing.T) {
// File doesn't exist — should return nil
s := &PrepSubsystem{
ServiceRuntime: core.NewServiceRuntime(testCore, AgentOptions{}),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
backoff: make(map[string]time.Time),
failCount: make(map[string]int),
}
// loadRateLimitState reads from DIR_HOME/.core/coderabbit-ratelimit.json.

View file

@ -81,8 +81,10 @@ func writeStatusResult(wsDir string, status *WorkspaceStatus) core.Result {
if r := fs.WriteAtomic(statusPath, core.JSONMarshalString(status)); !r.OK {
err, _ := r.Value.(error)
if err == nil {
core.Warn("agentic.writeStatus: failed to write status", "path", statusPath)
return core.Result{Value: core.E("writeStatus", "failed to write status", nil), OK: false}
}
core.Warn("agentic.writeStatus: failed to write status", "path", statusPath, "reason", err)
return core.Result{Value: core.E("writeStatus", "failed to write status", err), OK: false}
}
return core.Result{OK: true}

View file

@ -245,7 +245,9 @@ func (m *Subsystem) pushBranch(srcDir, branch string) error {
return nil
}
// updateStatus updates the workspace status.json.
// updateStatus rewrites status.json after a harvest decision.
//
// updateStatus(wsDir, "ready-for-review", "")
func updateStatus(wsDir, status, question string) {
r := fs.Read(agentic.WorkspaceStatusPath(wsDir))
if !r.OK {
@ -265,5 +267,12 @@ func updateStatus(wsDir, status, question string) {
} else {
delete(st, "question") // clear stale question from previous state
}
fs.WriteAtomic(agentic.WorkspaceStatusPath(wsDir), core.JSONMarshalString(st))
statusPath := agentic.WorkspaceStatusPath(wsDir)
if r := fs.WriteAtomic(statusPath, core.JSONMarshalString(st)); !r.OK {
if err, ok := r.Value.(error); ok {
core.Warn("monitor.updateStatus: failed to write status", "path", statusPath, "reason", err)
return
}
core.Warn("monitor.updateStatus: failed to write status", "path", statusPath)
}
}

View file

@ -127,6 +127,12 @@ func TestHarvest_CheckSafety_Bad_LargeFile(t *testing.T) {
assert.Contains(t, reason, "huge.txt")
}
func TestHarvest_UpdateStatus_Bad_WriteFailure(t *testing.T) {
assert.NotPanics(t, func() {
updateStatus("/dev/null/impossible", "ready-for-review", "")
})
}
func TestHarvest_HarvestWorkspace_Good(t *testing.T) {
_, wsDir := initTestRepo(t)
writeStatus(t, wsDir, "completed", "test-repo", "agent/test-task")