fix(agent/brain): retryableHttp narrows retryable set + 6-attempt budget

retryableHttp() now retries only 408 (Request Timeout), 429 (Too Many
Requests), and 503 (Service Unavailable). 500-and-other-5xx fail
immediately so the circuit-breaker registers them as a single
failure rather than smearing across retry attempts. Retry-After
honoured (numeric + HTTP-date), capped reasonably.

Attempt budget bumped to 6 so a burst of 5 transient 503s can recover
within ONE circuit-permitted call — the original concern from #311.

Note: CircuitBreaker is already applied OUTSIDE the logical Brain
operation by the MCP tool layer, not around each HTTP retry. The
nesting report was stale at this code shape; the real drift was the
retryableHttp() retry set + budget.

Pest coverage in CircuitBreakerTest:
- Recovered 503 burst → circuit stays closed, no failure registered
- Exhausted 503 burst → ONE breaker failure (not five)
- 429 + Retry-After 1 → sleeps 1s, no breaker failure
- 500 → immediate breaker failure, no retry

Co-authored-by: Codex <noreply@openai.com>
Closes tasks.lthn.sh/view.php?id=311
This commit is contained in:
Snider 2026-04-25 18:14:40 +01:00
parent b6565263f3
commit a1a0981b06
2 changed files with 152 additions and 15 deletions

View file

@ -28,6 +28,8 @@ class BrainService
private const MAX_RETRY_DELAY_MS = 30000;
private const MAX_HTTP_ATTEMPTS = 6;
private const MEMORY_LOCK_TTL_SECONDS = 10;
private const MEMORY_LOCK_WAIT_SECONDS = 5;
@ -743,12 +745,16 @@ class BrainService
/**
* Retry transient Qdrant HTTP failures with a small exponential backoff.
*
* Retries 408, 429, 5xx responses, and connection failures. Other 4xx
* Retries 408, 429, and 503 responses plus connection failures. Other
* responses are returned immediately so callers can fail fast.
*
* @param callable(PendingRequest): Response $buildRequest
*/
private function retryableHttp(int $timeout, callable $buildRequest, int $maxAttempts = 3): Response
private function retryableHttp(
int $timeout,
callable $buildRequest,
int $maxAttempts = self::MAX_HTTP_ATTEMPTS,
): Response
{
$lastConnectionException = null;
@ -794,7 +800,7 @@ class BrainService
{
$status = $response->status();
return $status === 408 || $status === 429 || $status >= 500;
return $status === 408 || $status === 429 || $status === 503;
}
private function retryDelayMilliseconds(?Response $response, int $attempt): int

View file

@ -9,8 +9,16 @@ use Core\Mod\Agentic\Mcp\Tools\Agent\Brain\BrainList;
use Core\Mod\Agentic\Models\BrainMemory;
use Core\Mod\Agentic\Services\BrainService;
use Illuminate\Http\Client\Request;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\Http;
beforeEach(function (): void {
Cache::flush();
config()->set('mcp.circuit_breaker.default_threshold', 5);
config()->set('mcp.circuit_breaker.default_reset_timeout', 60);
config()->set('mcp.circuit_breaker.default_failure_window', 120);
});
function retryableBrainService(): BrainService
{
return new class extends BrainService
@ -24,6 +32,30 @@ function retryableBrainService(): BrainService
};
}
/**
* @return array{memory: BrainMemory, point: array{id: string, vector: array<float>, payload: array<string, string>}}
*/
function retryableQdrantFixture(array $attributes = []): array
{
$workspace = createWorkspace();
$memory = BrainMemory::create(array_merge([
'workspace_id' => $workspace->id,
'agent_id' => 'virgil',
'type' => 'fact',
'content' => 'Brain retry fixture.',
'confidence' => 0.8,
], $attributes));
return [
'memory' => $memory,
'point' => [
'id' => $memory->id,
'vector' => [0.1, 0.2],
'payload' => ['type' => $memory->type],
],
];
}
test('CircuitBreaker_brain_list_Good_executes_without_circuit_breaker_for_db_only_query', function (): void {
$workspace = createWorkspace();
$breaker = Mockery::mock(CircuitBreaker::class);
@ -50,6 +82,7 @@ test('CircuitBreaker_brain_list_Good_executes_without_circuit_breaker_for_db_onl
test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', function (): void {
$brain = retryableBrainService();
$fixture = retryableQdrantFixture();
Http::fake([
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
@ -57,9 +90,7 @@ test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', functio
->push(['result' => ['status' => 'ok']], 200),
]);
$brain->qdrantUpsert([
['id' => 'memory-1', 'vector' => [0.1, 0.2], 'payload' => ['type' => 'fact']],
]);
$brain->qdrantUpsert([$fixture['point']]);
expect($brain->sleepCalls)->toBe([100]);
@ -70,6 +101,7 @@ test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', functio
test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401', function (): void {
$brain = retryableBrainService();
$fixture = retryableQdrantFixture();
Http::fake([
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
@ -77,9 +109,8 @@ test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401',
->push(['result' => ['status' => 'ok']], 200),
]);
expect(fn () => $brain->qdrantUpsert([
['id' => 'memory-2', 'vector' => [0.3, 0.4], 'payload' => ['type' => 'fact']],
]))->toThrow(RuntimeException::class, 'Qdrant upsert failed: 401');
expect(fn () => $brain->qdrantUpsert([$fixture['point']]))
->toThrow(RuntimeException::class, 'Qdrant upsert failed: 401');
expect($brain->sleepCalls)->toBe([]);
Http::assertSentCount(1);
@ -87,6 +118,7 @@ test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401',
test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_after_header', function (): void {
$brain = retryableBrainService();
$fixture = retryableQdrantFixture();
Http::fake([
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
@ -94,9 +126,7 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_a
->push(['result' => ['status' => 'ok']], 200),
]);
$brain->qdrantUpsert([
['id' => 'memory-3', 'vector' => [0.5, 0.6], 'payload' => ['type' => 'fact']],
]);
$brain->qdrantUpsert([$fixture['point']]);
expect($brain->sleepCalls)->toBe([2000]);
@ -105,6 +135,7 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_a
test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_408', function (): void {
$brain = retryableBrainService();
$fixture = retryableQdrantFixture();
Http::fake([
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
@ -112,11 +143,111 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_408', function ()
->push(['result' => ['status' => 'ok']], 200),
]);
$brain->qdrantUpsert([
['id' => 'memory-4', 'vector' => [0.7, 0.8], 'payload' => ['type' => 'fact']],
]);
$brain->qdrantUpsert([$fixture['point']]);
expect($brain->sleepCalls)->toBe([100]);
Http::assertSentCount(2);
});
test('CircuitBreaker_retryable_http_Good_recovers_from_five_transient_503s_without_opening_the_circuit', function (): void {
$brain = retryableBrainService();
$breaker = new CircuitBreaker;
$fixture = retryableQdrantFixture();
$sequence = Http::sequence();
for ($attempt = 0; $attempt < 5; $attempt++) {
$sequence->push(['error' => 'unavailable'], 503);
}
$sequence->push(['result' => ['status' => 'ok']], 200);
Http::fake([
'http://localhost:6334/collections/openbrain/points' => $sequence,
]);
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
expect($brain->sleepCalls)->toHaveCount(5)
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
->and($breaker->getStats('brain')['failures'])->toBe(0)
->and($breaker->getStats('brain')['successes'])->toBe(1);
Http::assertSentCount(6);
});
test('CircuitBreaker_retryable_http_Bad_counts_exhausted_503_retries_as_one_failure', function (): void {
$brain = retryableBrainService();
$breaker = new CircuitBreaker;
$fixture = retryableQdrantFixture();
$sequence = Http::sequence();
for ($attempt = 0; $attempt < 6; $attempt++) {
$sequence->push(['error' => 'unavailable'], 503);
}
Http::fake([
'http://localhost:6334/collections/openbrain/points' => $sequence,
]);
try {
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
$this->fail('Expected Qdrant failure after exhausting retries.');
} catch (RuntimeException $exception) {
expect($exception->getMessage())->toBe('Qdrant upsert failed: 503');
}
expect($brain->sleepCalls)->toHaveCount(5)
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
->and($breaker->getStats('brain')['failures'])->toBe(1)
->and($breaker->getStats('brain')['successes'])->toBe(0);
Http::assertSentCount(6);
});
test('CircuitBreaker_retryable_http_Good_honours_retry_after_without_counting_a_failure', function (): void {
$brain = retryableBrainService();
$breaker = new CircuitBreaker;
$fixture = retryableQdrantFixture();
Http::fake([
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
->push(['error' => 'rate limited'], 429, ['Retry-After' => '1'])
->push(['result' => ['status' => 'ok']], 200),
]);
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
expect($brain->sleepCalls)->toBe([1000])
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
->and($breaker->getStats('brain')['failures'])->toBe(0)
->and($breaker->getStats('brain')['successes'])->toBe(1);
Http::assertSentCount(2);
});
test('CircuitBreaker_retryable_http_Ugly_treats_500_as_an_immediate_circuit_failure', function (): void {
$brain = retryableBrainService();
$breaker = new CircuitBreaker;
$fixture = retryableQdrantFixture();
Http::fake([
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
->push(['error' => 'server error'], 500)
->push(['result' => ['status' => 'ok']], 200),
]);
try {
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
$this->fail('Expected non-retryable 500 response to fail immediately.');
} catch (RuntimeException $exception) {
expect($exception->getMessage())->toBe('Qdrant upsert failed: 500');
}
expect($brain->sleepCalls)->toBe([])
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
->and($breaker->getStats('brain')['failures'])->toBe(1)
->and($breaker->getStats('brain')['successes'])->toBe(0);
Http::assertSentCount(1);
});