From 78bdebcaaa99bf41002f7a1e85cf5303d5a5568a Mon Sep 17 00:00:00 2001 From: darbs-claude Date: Mon, 23 Feb 2026 11:17:56 +0000 Subject: [PATCH] fix: add batch failure recovery to ContentService (#27) - Track progress in a per-batch JSON state file after each article so a mid-run crash leaves a recoverable checkpoint - Add `maxRetries` parameter to generateBatch() with per-article retry loop (default: 1 extra attempt) - Add `resumeBatch()` to re-process only failed/pending articles, skipping those already successfully generated in a prior run - Add `loadBatchProgress()` public method for inspecting state - State stores per-article status, attempt counts, error messages, and timestamps for full observability Tests: 6 new scenarios covering state persistence, resume capability, retry logic, and the no-state error case Closes #27 Co-Authored-By: Claude Sonnet 4.6 --- Services/ContentService.php | 185 +++++++++++++++++++-- TODO.md | 5 +- tests/Feature/ContentServiceTest.php | 240 ++++++++++++++++++++++++++- 3 files changed, 415 insertions(+), 15 deletions(-) diff --git a/Services/ContentService.php b/Services/ContentService.php index ae6030a..b906d05 100644 --- a/Services/ContentService.php +++ b/Services/ContentService.php @@ -118,15 +118,21 @@ class ContentService /** * Generate content for a batch. * + * Progress is persisted to a state file after each article so the batch + * can be resumed after a partial failure. Call generateBatch() or + * resumeBatch() again to pick up from the last saved state. + * * @param string $batchId Batch identifier (e.g., 'batch-001-link-getting-started') * @param string $provider AI provider ('gemini' for bulk, 'claude' for refinement) * @param bool $dryRun If true, shows what would be generated without creating files + * @param int $maxRetries Extra attempts per article on failure (0 = no retry) * @return array Generation results */ public function generateBatch( string $batchId, string $provider = 'gemini', - bool $dryRun = false + bool $dryRun = false, + int $maxRetries = 1, ): array { $spec = $this->loadBatch($batchId); if (! $spec) { @@ -144,6 +150,13 @@ class ContentService $promptTemplate = $this->loadPromptTemplate('help-article'); + // Load or initialise progress state (skipped for dry runs) + $progress = null; + if (! $dryRun) { + $progress = $this->loadBatchProgress($batchId) + ?? $this->initialiseBatchState($batchId, $spec['articles'] ?? [], $provider); + } + foreach ($spec['articles'] ?? [] as $article) { $slug = $article['slug'] ?? null; if (! $slug) { @@ -152,34 +165,182 @@ class ContentService $draftPath = $this->getDraftPath($spec, $slug); - // Skip if already drafted + // Skip if draft file already exists on disk if (File::exists($draftPath)) { $results['articles'][$slug] = ['status' => 'skipped', 'reason' => 'already drafted']; $results['skipped']++; - + if ($progress !== null) { + $progress['articles'][$slug]['status'] = 'skipped'; + } continue; } if ($dryRun) { $results['articles'][$slug] = ['status' => 'would_generate', 'path' => $draftPath]; - continue; } - try { - $content = $this->generateArticle($article, $spec, $promptTemplate, $provider); - $this->saveDraft($draftPath, $content, $article); - $results['articles'][$slug] = ['status' => 'generated', 'path' => $draftPath]; - $results['generated']++; - } catch (\Exception $e) { - $results['articles'][$slug] = ['status' => 'failed', 'error' => $e->getMessage()]; - $results['failed']++; + // Skip articles successfully generated in a prior run + if (($progress['articles'][$slug]['status'] ?? 'pending') === 'generated') { + $results['articles'][$slug] = ['status' => 'skipped', 'reason' => 'previously generated']; + $results['skipped']++; + continue; } + + $priorAttempts = $progress['articles'][$slug]['attempts'] ?? 0; + $articleResult = $this->attemptArticleGeneration($article, $spec, $promptTemplate, $provider, $maxRetries); + + if ($articleResult['status'] === 'generated') { + $results['articles'][$slug] = ['status' => 'generated', 'path' => $articleResult['path']]; + $results['generated']++; + $progress['articles'][$slug] = [ + 'status' => 'generated', + 'attempts' => $priorAttempts + $articleResult['attempts'], + 'last_error' => null, + 'generated_at' => now()->toIso8601String(), + 'last_attempt_at' => now()->toIso8601String(), + ]; + } else { + $results['articles'][$slug] = ['status' => 'failed', 'error' => $articleResult['error']]; + $results['failed']++; + $progress['articles'][$slug] = [ + 'status' => 'failed', + 'attempts' => $priorAttempts + $articleResult['attempts'], + 'last_error' => $articleResult['error'], + 'generated_at' => null, + 'last_attempt_at' => now()->toIso8601String(), + ]; + } + + // Persist after each article so a crash mid-batch is recoverable + $progress['last_updated'] = now()->toIso8601String(); + $this->saveBatchProgress($batchId, $progress); + } + + if ($progress !== null) { + $progress['last_updated'] = now()->toIso8601String(); + $this->saveBatchProgress($batchId, $progress); } return $results; } + /** + * Resume a batch from its last saved state. + * + * Articles that were successfully generated are skipped; failed and + * pending articles are retried. Returns an error if no progress state + * exists (i.e. generateBatch() has never been called for this batch). + */ + public function resumeBatch(string $batchId, ?string $provider = null, int $maxRetries = 1): array + { + $progress = $this->loadBatchProgress($batchId); + + if ($progress === null) { + return ['error' => "No progress state found for batch: {$batchId}"]; + } + + $provider ??= $progress['provider'] ?? 'gemini'; + + $result = $this->generateBatch($batchId, $provider, false, $maxRetries); + $result['resumed_from'] = $progress['last_updated']; + + return $result; + } + + /** + * Load batch progress state from the state file. + * + * Returns null when no state file exists (batch has not been started). + */ + public function loadBatchProgress(string $batchId): ?array + { + $path = $this->getProgressPath($batchId); + + if (! File::exists($path)) { + return null; + } + + $data = json_decode(File::get($path), true); + + return is_array($data) ? $data : null; + } + + /** + * Attempt to generate a single article with retry logic. + * + * Returns ['status' => 'generated', 'path' => ..., 'attempts' => N] + * or ['status' => 'failed', 'error' => ..., 'attempts' => N]. + */ + protected function attemptArticleGeneration( + array $article, + array $spec, + string $promptTemplate, + string $provider, + int $maxRetries, + ): array { + $draftPath = $this->getDraftPath($spec, $article['slug']); + $lastError = null; + $totalAttempts = $maxRetries + 1; + + for ($attempt = 1; $attempt <= $totalAttempts; $attempt++) { + try { + $content = $this->generateArticle($article, $spec, $promptTemplate, $provider); + $this->saveDraft($draftPath, $content, $article); + + return ['status' => 'generated', 'path' => $draftPath, 'attempts' => $attempt]; + } catch (\Exception $e) { + $lastError = $e->getMessage(); + } + } + + return ['status' => 'failed', 'error' => $lastError, 'attempts' => $totalAttempts]; + } + + /** + * Initialise a fresh batch progress state. + */ + protected function initialiseBatchState(string $batchId, array $articles, string $provider): array + { + $articleStates = []; + foreach ($articles as $article) { + $slug = $article['slug'] ?? null; + if ($slug) { + $articleStates[$slug] = [ + 'status' => 'pending', + 'attempts' => 0, + 'last_error' => null, + 'generated_at' => null, + 'last_attempt_at' => null, + ]; + } + } + + return [ + 'batch_id' => $batchId, + 'provider' => $provider, + 'started_at' => now()->toIso8601String(), + 'last_updated' => now()->toIso8601String(), + 'articles' => $articleStates, + ]; + } + + /** + * Save batch progress state to the state file. + */ + protected function saveBatchProgress(string $batchId, array $state): void + { + File::put($this->getProgressPath($batchId), json_encode($state, JSON_PRETTY_PRINT)); + } + + /** + * Get the progress state file path for a batch. + */ + protected function getProgressPath(string $batchId): string + { + return base_path("{$this->batchPath}/{$batchId}.progress.json"); + } + /** * Generate a single article. */ diff --git a/TODO.md b/TODO.md index d6fed66..a904d98 100644 --- a/TODO.md +++ b/TODO.md @@ -104,10 +104,11 @@ Production-quality task list for the AI agent orchestration package. - Issue: No try/catch around streaming, could fail silently - Fix: Wrap in exception handling, yield error events -- [ ] **ERR-002: ContentService has no batch failure recovery** +- [x] **ERR-002: ContentService has no batch failure recovery** (FIXED 2026-02-23) - Location: `Services/ContentService.php::generateBatch()` - Issue: Failed articles stop processing, no resume capability - - Fix: Add progress tracking, allow resuming from failed point + - Fix: Added progress state file, per-article retry (maxRetries param), `resumeBatch()` method + - Tests: 6 new tests in `tests/Feature/ContentServiceTest.php` covering state persistence, resume, retries --- diff --git a/tests/Feature/ContentServiceTest.php b/tests/Feature/ContentServiceTest.php index ee2e572..7c4704f 100644 --- a/tests/Feature/ContentServiceTest.php +++ b/tests/Feature/ContentServiceTest.php @@ -2,9 +2,21 @@ use Core\Mod\Agentic\Services\AgenticManager; use Core\Mod\Agentic\Services\AgenticProviderInterface; +use Core\Mod\Agentic\Services\AgenticResponse; use Core\Mod\Agentic\Services\ContentService; use Illuminate\Support\Facades\File; +function makeAgenticResponse(string $content = '## Article Content'): AgenticResponse +{ + return new AgenticResponse( + content: $content, + model: 'test-model', + inputTokens: 0, + outputTokens: 0, + durationMs: 0, + ); +} + beforeEach(function () { $this->manager = Mockery::mock(AgenticManager::class); $this->service = new ContentService($this->manager); @@ -60,11 +72,15 @@ it('handles generation errors gracefully', function () { File::put($testBatchPath, "# Test Batch\n**Service:** Test\n### Article 1:\n```yaml\nSLUG: test-slug-error\nTITLE: Test\n```"); - // Clean up potential leftover draft + // Clean up potential leftover draft and state files $draftPath = base_path('app/Mod/Agentic/Resources/drafts/help/general/test-slug-error.md'); + $statePath = base_path('app/Mod/Agentic/Resources/tasks/batch-test-error.progress.json'); if (File::exists($draftPath)) { File::delete($draftPath); } + if (File::exists($statePath)) { + File::delete($statePath); + } try { $results = $this->service->generateBatch('batch-test-error', 'gemini', false); @@ -79,5 +95,227 @@ it('handles generation errors gracefully', function () { if (File::exists($draftPath)) { File::delete($draftPath); } + if (File::exists($statePath)) { + File::delete($statePath); + } + } +}); + +it('returns null progress when no state file exists', function () { + $progress = $this->service->loadBatchProgress('batch-nonexistent-xyz'); + + expect($progress)->toBeNull(); +}); + +it('saves progress state after batch generation', function () { + $provider = Mockery::mock(AgenticProviderInterface::class); + $provider->shouldReceive('generate')->andThrow(new \Exception('API Error')); + + $this->manager->shouldReceive('provider')->with('gemini')->andReturn($provider); + + $promptPath = base_path('app/Mod/Agentic/Resources/prompts/content/help-article.md'); + if (! File::exists($promptPath)) { + $this->markTestSkipped('Help article prompt not found'); + } + + $batchId = 'batch-test-progress'; + $batchPath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.md"); + $statePath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.progress.json"); + + File::put($batchPath, "# Test Batch\n**Service:** Test\n### Article 1:\n```yaml\nSLUG: progress-slug-a\nTITLE: Test A\n```\n### Article 2:\n```yaml\nSLUG: progress-slug-b\nTITLE: Test B\n```"); + + try { + $this->service->generateBatch($batchId, 'gemini', false, 0); + + $progress = $this->service->loadBatchProgress($batchId); + + expect($progress)->toBeArray(); + expect($progress['batch_id'])->toBe($batchId); + expect($progress['provider'])->toBe('gemini'); + expect($progress['articles'])->toHaveKeys(['progress-slug-a', 'progress-slug-b']); + expect($progress['articles']['progress-slug-a']['status'])->toBe('failed'); + expect($progress['articles']['progress-slug-a']['attempts'])->toBe(1); + expect($progress['articles']['progress-slug-a']['last_error'])->toBe('API Error'); + } finally { + File::deleteDirectory(base_path('app/Mod/Agentic/Resources/drafts/help/general'), true); + if (File::exists($batchPath)) { + File::delete($batchPath); + } + if (File::exists($statePath)) { + File::delete($statePath); + } + } +}); + +it('skips previously generated articles on second run', function () { + $callCount = 0; + $provider = Mockery::mock(AgenticProviderInterface::class); + $provider->shouldReceive('generate') + ->andReturnUsing(function () use (&$callCount) { + $callCount++; + + return makeAgenticResponse(); + }); + + $this->manager->shouldReceive('provider')->with('gemini')->andReturn($provider); + + $promptPath = base_path('app/Mod/Agentic/Resources/prompts/content/help-article.md'); + if (! File::exists($promptPath)) { + $this->markTestSkipped('Help article prompt not found'); + } + + $batchId = 'batch-test-resume-skip'; + $batchPath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.md"); + $statePath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.progress.json"); + $draftDir = base_path('app/Mod/Agentic/Resources/drafts/help/general'); + + File::put($batchPath, "# Test Batch\n**Service:** Test\n### Article 1:\n```yaml\nSLUG: resume-skip-slug-a\nTITLE: Test A\n```\n### Article 2:\n```yaml\nSLUG: resume-skip-slug-b\nTITLE: Test B\n```"); + + try { + // First run generates both articles + $first = $this->service->generateBatch($batchId, 'gemini', false, 0); + expect($first['generated'])->toBe(2); + expect($callCount)->toBe(2); + + // Second run skips already-generated articles + $second = $this->service->generateBatch($batchId, 'gemini', false, 0); + expect($second['generated'])->toBe(0); + expect($second['skipped'])->toBe(2); + // Provider should not have been called again + expect($callCount)->toBe(2); + } finally { + foreach (['resume-skip-slug-a', 'resume-skip-slug-b'] as $slug) { + $draft = "{$draftDir}/{$slug}.md"; + if (File::exists($draft)) { + File::delete($draft); + } + } + if (File::exists($batchPath)) { + File::delete($batchPath); + } + if (File::exists($statePath)) { + File::delete($statePath); + } + } +}); + +it('resume returns error when no prior state exists', function () { + $result = $this->service->resumeBatch('batch-no-state-xyz'); + + expect($result)->toHaveKey('error'); + expect($result['error'])->toContain('No progress state found'); +}); + +it('resume retries only failed and pending articles', function () { + $slugs = ['resume-retry-a', 'resume-retry-b']; + $callCount = 0; + + $provider = Mockery::mock(AgenticProviderInterface::class); + $provider->shouldReceive('generate') + ->andReturnUsing(function () use (&$callCount) { + $callCount++; + + // Call 1: A on first run → fails + // Call 2: B on first run → succeeds + // Resume run: only A is retried (B is already generated) + if ($callCount === 1) { + throw new \Exception('Transient Error'); + } + + return makeAgenticResponse('## Content'); + }); + + $this->manager->shouldReceive('provider')->with('gemini')->andReturn($provider); + + $promptPath = base_path('app/Mod/Agentic/Resources/prompts/content/help-article.md'); + if (! File::exists($promptPath)) { + $this->markTestSkipped('Help article prompt not found'); + } + + $batchId = 'batch-test-resume-retry'; + $batchPath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.md"); + $statePath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.progress.json"); + $draftDir = base_path('app/Mod/Agentic/Resources/drafts/help/general'); + + File::put($batchPath, "# Test Batch\n**Service:** Test\n### Article 1:\n```yaml\nSLUG: resume-retry-a\nTITLE: Retry A\n```\n### Article 2:\n```yaml\nSLUG: resume-retry-b\nTITLE: Retry B\n```"); + + try { + // First run: A fails, B succeeds + $first = $this->service->generateBatch($batchId, 'gemini', false, 0); + expect($first['failed'])->toBe(1); + expect($first['generated'])->toBe(1); + expect($first['articles']['resume-retry-a']['status'])->toBe('failed'); + expect($first['articles']['resume-retry-b']['status'])->toBe('generated'); + + // Resume: only retries failed article A + $resumed = $this->service->resumeBatch($batchId, 'gemini', 0); + expect($resumed)->toHaveKey('resumed_from'); + expect($resumed['skipped'])->toBeGreaterThanOrEqual(1); // B is skipped + expect($resumed['articles']['resume-retry-b']['status'])->toBe('skipped'); + } finally { + foreach ($slugs as $slug) { + $draft = "{$draftDir}/{$slug}.md"; + if (File::exists($draft)) { + File::delete($draft); + } + } + if (File::exists($batchPath)) { + File::delete($batchPath); + } + if (File::exists($statePath)) { + File::delete($statePath); + } + } +}); + +it('retries individual failures up to maxRetries times', function () { + $callCount = 0; + $provider = Mockery::mock(AgenticProviderInterface::class); + $provider->shouldReceive('generate') + ->andReturnUsing(function () use (&$callCount) { + $callCount++; + if ($callCount < 3) { + throw new \Exception("Attempt {$callCount} failed"); + } + + return makeAgenticResponse('## Content'); + }); + + $this->manager->shouldReceive('provider')->with('gemini')->andReturn($provider); + + $promptPath = base_path('app/Mod/Agentic/Resources/prompts/content/help-article.md'); + if (! File::exists($promptPath)) { + $this->markTestSkipped('Help article prompt not found'); + } + + $batchId = 'batch-test-maxretries'; + $batchPath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.md"); + $statePath = base_path("app/Mod/Agentic/Resources/tasks/{$batchId}.progress.json"); + $draftPath = base_path('app/Mod/Agentic/Resources/drafts/help/general/maxretries-slug.md'); + + File::put($batchPath, "# Test Batch\n**Service:** Test\n### Article 1:\n```yaml\nSLUG: maxretries-slug\nTITLE: Retry Test\n```"); + + try { + // With maxRetries=2 (3 total attempts), succeeds on 3rd attempt + $results = $this->service->generateBatch($batchId, 'gemini', false, 2); + + expect($results['generated'])->toBe(1); + expect($results['failed'])->toBe(0); + expect($results['articles']['maxretries-slug']['status'])->toBe('generated'); + expect($callCount)->toBe(3); + + $progress = $this->service->loadBatchProgress($batchId); + expect($progress['articles']['maxretries-slug']['status'])->toBe('generated'); + expect($progress['articles']['maxretries-slug']['attempts'])->toBe(3); + } finally { + if (File::exists($batchPath)) { + File::delete($batchPath); + } + if (File::exists($statePath)) { + File::delete($statePath); + } + if (File::exists($draftPath)) { + File::delete($draftPath); + } } }); -- 2.45.3