From a1a0981b0619825b2bb77f11dd28fb629a7a5542 Mon Sep 17 00:00:00 2001 From: Snider Date: Sat, 25 Apr 2026 18:14:40 +0100 Subject: [PATCH] fix(agent/brain): retryableHttp narrows retryable set + 6-attempt budget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit retryableHttp() now retries only 408 (Request Timeout), 429 (Too Many Requests), and 503 (Service Unavailable). 500-and-other-5xx fail immediately so the circuit-breaker registers them as a single failure rather than smearing across retry attempts. Retry-After honoured (numeric + HTTP-date), capped reasonably. Attempt budget bumped to 6 so a burst of 5 transient 503s can recover within ONE circuit-permitted call — the original concern from #311. Note: CircuitBreaker is already applied OUTSIDE the logical Brain operation by the MCP tool layer, not around each HTTP retry. The nesting report was stale at this code shape; the real drift was the retryableHttp() retry set + budget. Pest coverage in CircuitBreakerTest: - Recovered 503 burst → circuit stays closed, no failure registered - Exhausted 503 burst → ONE breaker failure (not five) - 429 + Retry-After 1 → sleeps 1s, no breaker failure - 500 → immediate breaker failure, no retry Co-authored-by: Codex Closes tasks.lthn.sh/view.php?id=311 --- php/Services/BrainService.php | 12 +- .../Feature/Brain/CircuitBreakerTest.php | 155 ++++++++++++++++-- 2 files changed, 152 insertions(+), 15 deletions(-) diff --git a/php/Services/BrainService.php b/php/Services/BrainService.php index edfbf83..46bea51 100644 --- a/php/Services/BrainService.php +++ b/php/Services/BrainService.php @@ -28,6 +28,8 @@ class BrainService private const MAX_RETRY_DELAY_MS = 30000; + private const MAX_HTTP_ATTEMPTS = 6; + private const MEMORY_LOCK_TTL_SECONDS = 10; private const MEMORY_LOCK_WAIT_SECONDS = 5; @@ -743,12 +745,16 @@ class BrainService /** * Retry transient Qdrant HTTP failures with a small exponential backoff. * - * Retries 408, 429, 5xx responses, and connection failures. Other 4xx + * Retries 408, 429, and 503 responses plus connection failures. Other * responses are returned immediately so callers can fail fast. * * @param callable(PendingRequest): Response $buildRequest */ - private function retryableHttp(int $timeout, callable $buildRequest, int $maxAttempts = 3): Response + private function retryableHttp( + int $timeout, + callable $buildRequest, + int $maxAttempts = self::MAX_HTTP_ATTEMPTS, + ): Response { $lastConnectionException = null; @@ -794,7 +800,7 @@ class BrainService { $status = $response->status(); - return $status === 408 || $status === 429 || $status >= 500; + return $status === 408 || $status === 429 || $status === 503; } private function retryDelayMilliseconds(?Response $response, int $attempt): int diff --git a/php/tests/Feature/Brain/CircuitBreakerTest.php b/php/tests/Feature/Brain/CircuitBreakerTest.php index 907e476..dad5d79 100644 --- a/php/tests/Feature/Brain/CircuitBreakerTest.php +++ b/php/tests/Feature/Brain/CircuitBreakerTest.php @@ -9,8 +9,16 @@ use Core\Mod\Agentic\Mcp\Tools\Agent\Brain\BrainList; use Core\Mod\Agentic\Models\BrainMemory; use Core\Mod\Agentic\Services\BrainService; use Illuminate\Http\Client\Request; +use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\Http; +beforeEach(function (): void { + Cache::flush(); + config()->set('mcp.circuit_breaker.default_threshold', 5); + config()->set('mcp.circuit_breaker.default_reset_timeout', 60); + config()->set('mcp.circuit_breaker.default_failure_window', 120); +}); + function retryableBrainService(): BrainService { return new class extends BrainService @@ -24,6 +32,30 @@ function retryableBrainService(): BrainService }; } +/** + * @return array{memory: BrainMemory, point: array{id: string, vector: array, payload: array}} + */ +function retryableQdrantFixture(array $attributes = []): array +{ + $workspace = createWorkspace(); + $memory = BrainMemory::create(array_merge([ + 'workspace_id' => $workspace->id, + 'agent_id' => 'virgil', + 'type' => 'fact', + 'content' => 'Brain retry fixture.', + 'confidence' => 0.8, + ], $attributes)); + + return [ + 'memory' => $memory, + 'point' => [ + 'id' => $memory->id, + 'vector' => [0.1, 0.2], + 'payload' => ['type' => $memory->type], + ], + ]; +} + test('CircuitBreaker_brain_list_Good_executes_without_circuit_breaker_for_db_only_query', function (): void { $workspace = createWorkspace(); $breaker = Mockery::mock(CircuitBreaker::class); @@ -50,6 +82,7 @@ test('CircuitBreaker_brain_list_Good_executes_without_circuit_breaker_for_db_onl test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', function (): void { $brain = retryableBrainService(); + $fixture = retryableQdrantFixture(); Http::fake([ 'http://localhost:6334/collections/openbrain/points' => Http::sequence() @@ -57,9 +90,7 @@ test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', functio ->push(['result' => ['status' => 'ok']], 200), ]); - $brain->qdrantUpsert([ - ['id' => 'memory-1', 'vector' => [0.1, 0.2], 'payload' => ['type' => 'fact']], - ]); + $brain->qdrantUpsert([$fixture['point']]); expect($brain->sleepCalls)->toBe([100]); @@ -70,6 +101,7 @@ test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', functio test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401', function (): void { $brain = retryableBrainService(); + $fixture = retryableQdrantFixture(); Http::fake([ 'http://localhost:6334/collections/openbrain/points' => Http::sequence() @@ -77,9 +109,8 @@ test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401', ->push(['result' => ['status' => 'ok']], 200), ]); - expect(fn () => $brain->qdrantUpsert([ - ['id' => 'memory-2', 'vector' => [0.3, 0.4], 'payload' => ['type' => 'fact']], - ]))->toThrow(RuntimeException::class, 'Qdrant upsert failed: 401'); + expect(fn () => $brain->qdrantUpsert([$fixture['point']])) + ->toThrow(RuntimeException::class, 'Qdrant upsert failed: 401'); expect($brain->sleepCalls)->toBe([]); Http::assertSentCount(1); @@ -87,6 +118,7 @@ test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401', test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_after_header', function (): void { $brain = retryableBrainService(); + $fixture = retryableQdrantFixture(); Http::fake([ 'http://localhost:6334/collections/openbrain/points' => Http::sequence() @@ -94,9 +126,7 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_a ->push(['result' => ['status' => 'ok']], 200), ]); - $brain->qdrantUpsert([ - ['id' => 'memory-3', 'vector' => [0.5, 0.6], 'payload' => ['type' => 'fact']], - ]); + $brain->qdrantUpsert([$fixture['point']]); expect($brain->sleepCalls)->toBe([2000]); @@ -105,6 +135,7 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_a test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_408', function (): void { $brain = retryableBrainService(); + $fixture = retryableQdrantFixture(); Http::fake([ 'http://localhost:6334/collections/openbrain/points' => Http::sequence() @@ -112,11 +143,111 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_408', function () ->push(['result' => ['status' => 'ok']], 200), ]); - $brain->qdrantUpsert([ - ['id' => 'memory-4', 'vector' => [0.7, 0.8], 'payload' => ['type' => 'fact']], - ]); + $brain->qdrantUpsert([$fixture['point']]); expect($brain->sleepCalls)->toBe([100]); Http::assertSentCount(2); }); + +test('CircuitBreaker_retryable_http_Good_recovers_from_five_transient_503s_without_opening_the_circuit', function (): void { + $brain = retryableBrainService(); + $breaker = new CircuitBreaker; + $fixture = retryableQdrantFixture(); + $sequence = Http::sequence(); + + for ($attempt = 0; $attempt < 5; $attempt++) { + $sequence->push(['error' => 'unavailable'], 503); + } + + $sequence->push(['result' => ['status' => 'ok']], 200); + + Http::fake([ + 'http://localhost:6334/collections/openbrain/points' => $sequence, + ]); + + $breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']])); + + expect($brain->sleepCalls)->toHaveCount(5) + ->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED) + ->and($breaker->getStats('brain')['failures'])->toBe(0) + ->and($breaker->getStats('brain')['successes'])->toBe(1); + + Http::assertSentCount(6); +}); + +test('CircuitBreaker_retryable_http_Bad_counts_exhausted_503_retries_as_one_failure', function (): void { + $brain = retryableBrainService(); + $breaker = new CircuitBreaker; + $fixture = retryableQdrantFixture(); + $sequence = Http::sequence(); + + for ($attempt = 0; $attempt < 6; $attempt++) { + $sequence->push(['error' => 'unavailable'], 503); + } + + Http::fake([ + 'http://localhost:6334/collections/openbrain/points' => $sequence, + ]); + + try { + $breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']])); + $this->fail('Expected Qdrant failure after exhausting retries.'); + } catch (RuntimeException $exception) { + expect($exception->getMessage())->toBe('Qdrant upsert failed: 503'); + } + + expect($brain->sleepCalls)->toHaveCount(5) + ->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED) + ->and($breaker->getStats('brain')['failures'])->toBe(1) + ->and($breaker->getStats('brain')['successes'])->toBe(0); + + Http::assertSentCount(6); +}); + +test('CircuitBreaker_retryable_http_Good_honours_retry_after_without_counting_a_failure', function (): void { + $brain = retryableBrainService(); + $breaker = new CircuitBreaker; + $fixture = retryableQdrantFixture(); + + Http::fake([ + 'http://localhost:6334/collections/openbrain/points' => Http::sequence() + ->push(['error' => 'rate limited'], 429, ['Retry-After' => '1']) + ->push(['result' => ['status' => 'ok']], 200), + ]); + + $breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']])); + + expect($brain->sleepCalls)->toBe([1000]) + ->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED) + ->and($breaker->getStats('brain')['failures'])->toBe(0) + ->and($breaker->getStats('brain')['successes'])->toBe(1); + + Http::assertSentCount(2); +}); + +test('CircuitBreaker_retryable_http_Ugly_treats_500_as_an_immediate_circuit_failure', function (): void { + $brain = retryableBrainService(); + $breaker = new CircuitBreaker; + $fixture = retryableQdrantFixture(); + + Http::fake([ + 'http://localhost:6334/collections/openbrain/points' => Http::sequence() + ->push(['error' => 'server error'], 500) + ->push(['result' => ['status' => 'ok']], 200), + ]); + + try { + $breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']])); + $this->fail('Expected non-retryable 500 response to fail immediately.'); + } catch (RuntimeException $exception) { + expect($exception->getMessage())->toBe('Qdrant upsert failed: 500'); + } + + expect($brain->sleepCalls)->toBe([]) + ->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED) + ->and($breaker->getStats('brain')['failures'])->toBe(1) + ->and($breaker->getStats('brain')['successes'])->toBe(0); + + Http::assertSentCount(1); +});