fix(agent/brain): retryableHttp narrows retryable set + 6-attempt budget
retryableHttp() now retries only 408 (Request Timeout), 429 (Too Many Requests), and 503 (Service Unavailable). 500-and-other-5xx fail immediately so the circuit-breaker registers them as a single failure rather than smearing across retry attempts. Retry-After honoured (numeric + HTTP-date), capped reasonably. Attempt budget bumped to 6 so a burst of 5 transient 503s can recover within ONE circuit-permitted call — the original concern from #311. Note: CircuitBreaker is already applied OUTSIDE the logical Brain operation by the MCP tool layer, not around each HTTP retry. The nesting report was stale at this code shape; the real drift was the retryableHttp() retry set + budget. Pest coverage in CircuitBreakerTest: - Recovered 503 burst → circuit stays closed, no failure registered - Exhausted 503 burst → ONE breaker failure (not five) - 429 + Retry-After 1 → sleeps 1s, no breaker failure - 500 → immediate breaker failure, no retry Co-authored-by: Codex <noreply@openai.com> Closes tasks.lthn.sh/view.php?id=311
This commit is contained in:
parent
b6565263f3
commit
a1a0981b06
2 changed files with 152 additions and 15 deletions
|
|
@ -28,6 +28,8 @@ class BrainService
|
|||
|
||||
private const MAX_RETRY_DELAY_MS = 30000;
|
||||
|
||||
private const MAX_HTTP_ATTEMPTS = 6;
|
||||
|
||||
private const MEMORY_LOCK_TTL_SECONDS = 10;
|
||||
|
||||
private const MEMORY_LOCK_WAIT_SECONDS = 5;
|
||||
|
|
@ -743,12 +745,16 @@ class BrainService
|
|||
/**
|
||||
* Retry transient Qdrant HTTP failures with a small exponential backoff.
|
||||
*
|
||||
* Retries 408, 429, 5xx responses, and connection failures. Other 4xx
|
||||
* Retries 408, 429, and 503 responses plus connection failures. Other
|
||||
* responses are returned immediately so callers can fail fast.
|
||||
*
|
||||
* @param callable(PendingRequest): Response $buildRequest
|
||||
*/
|
||||
private function retryableHttp(int $timeout, callable $buildRequest, int $maxAttempts = 3): Response
|
||||
private function retryableHttp(
|
||||
int $timeout,
|
||||
callable $buildRequest,
|
||||
int $maxAttempts = self::MAX_HTTP_ATTEMPTS,
|
||||
): Response
|
||||
{
|
||||
$lastConnectionException = null;
|
||||
|
||||
|
|
@ -794,7 +800,7 @@ class BrainService
|
|||
{
|
||||
$status = $response->status();
|
||||
|
||||
return $status === 408 || $status === 429 || $status >= 500;
|
||||
return $status === 408 || $status === 429 || $status === 503;
|
||||
}
|
||||
|
||||
private function retryDelayMilliseconds(?Response $response, int $attempt): int
|
||||
|
|
|
|||
|
|
@ -9,8 +9,16 @@ use Core\Mod\Agentic\Mcp\Tools\Agent\Brain\BrainList;
|
|||
use Core\Mod\Agentic\Models\BrainMemory;
|
||||
use Core\Mod\Agentic\Services\BrainService;
|
||||
use Illuminate\Http\Client\Request;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
|
||||
beforeEach(function (): void {
|
||||
Cache::flush();
|
||||
config()->set('mcp.circuit_breaker.default_threshold', 5);
|
||||
config()->set('mcp.circuit_breaker.default_reset_timeout', 60);
|
||||
config()->set('mcp.circuit_breaker.default_failure_window', 120);
|
||||
});
|
||||
|
||||
function retryableBrainService(): BrainService
|
||||
{
|
||||
return new class extends BrainService
|
||||
|
|
@ -24,6 +32,30 @@ function retryableBrainService(): BrainService
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{memory: BrainMemory, point: array{id: string, vector: array<float>, payload: array<string, string>}}
|
||||
*/
|
||||
function retryableQdrantFixture(array $attributes = []): array
|
||||
{
|
||||
$workspace = createWorkspace();
|
||||
$memory = BrainMemory::create(array_merge([
|
||||
'workspace_id' => $workspace->id,
|
||||
'agent_id' => 'virgil',
|
||||
'type' => 'fact',
|
||||
'content' => 'Brain retry fixture.',
|
||||
'confidence' => 0.8,
|
||||
], $attributes));
|
||||
|
||||
return [
|
||||
'memory' => $memory,
|
||||
'point' => [
|
||||
'id' => $memory->id,
|
||||
'vector' => [0.1, 0.2],
|
||||
'payload' => ['type' => $memory->type],
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
test('CircuitBreaker_brain_list_Good_executes_without_circuit_breaker_for_db_only_query', function (): void {
|
||||
$workspace = createWorkspace();
|
||||
$breaker = Mockery::mock(CircuitBreaker::class);
|
||||
|
|
@ -50,6 +82,7 @@ test('CircuitBreaker_brain_list_Good_executes_without_circuit_breaker_for_db_onl
|
|||
|
||||
test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$fixture = retryableQdrantFixture();
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
|
||||
|
|
@ -57,9 +90,7 @@ test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', functio
|
|||
->push(['result' => ['status' => 'ok']], 200),
|
||||
]);
|
||||
|
||||
$brain->qdrantUpsert([
|
||||
['id' => 'memory-1', 'vector' => [0.1, 0.2], 'payload' => ['type' => 'fact']],
|
||||
]);
|
||||
$brain->qdrantUpsert([$fixture['point']]);
|
||||
|
||||
expect($brain->sleepCalls)->toBe([100]);
|
||||
|
||||
|
|
@ -70,6 +101,7 @@ test('CircuitBreaker_retryable_http_Bad_retries_qdrant_requests_on_503', functio
|
|||
|
||||
test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$fixture = retryableQdrantFixture();
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
|
||||
|
|
@ -77,9 +109,8 @@ test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401',
|
|||
->push(['result' => ['status' => 'ok']], 200),
|
||||
]);
|
||||
|
||||
expect(fn () => $brain->qdrantUpsert([
|
||||
['id' => 'memory-2', 'vector' => [0.3, 0.4], 'payload' => ['type' => 'fact']],
|
||||
]))->toThrow(RuntimeException::class, 'Qdrant upsert failed: 401');
|
||||
expect(fn () => $brain->qdrantUpsert([$fixture['point']]))
|
||||
->toThrow(RuntimeException::class, 'Qdrant upsert failed: 401');
|
||||
|
||||
expect($brain->sleepCalls)->toBe([]);
|
||||
Http::assertSentCount(1);
|
||||
|
|
@ -87,6 +118,7 @@ test('CircuitBreaker_retryable_http_Ugly_does_not_retry_qdrant_requests_on_401',
|
|||
|
||||
test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_after_header', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$fixture = retryableQdrantFixture();
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
|
||||
|
|
@ -94,9 +126,7 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_a
|
|||
->push(['result' => ['status' => 'ok']], 200),
|
||||
]);
|
||||
|
||||
$brain->qdrantUpsert([
|
||||
['id' => 'memory-3', 'vector' => [0.5, 0.6], 'payload' => ['type' => 'fact']],
|
||||
]);
|
||||
$brain->qdrantUpsert([$fixture['point']]);
|
||||
|
||||
expect($brain->sleepCalls)->toBe([2000]);
|
||||
|
||||
|
|
@ -105,6 +135,7 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_429_using_retry_a
|
|||
|
||||
test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_408', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$fixture = retryableQdrantFixture();
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
|
||||
|
|
@ -112,11 +143,111 @@ test('CircuitBreaker_retryable_http_retries_qdrant_requests_on_408', function ()
|
|||
->push(['result' => ['status' => 'ok']], 200),
|
||||
]);
|
||||
|
||||
$brain->qdrantUpsert([
|
||||
['id' => 'memory-4', 'vector' => [0.7, 0.8], 'payload' => ['type' => 'fact']],
|
||||
]);
|
||||
$brain->qdrantUpsert([$fixture['point']]);
|
||||
|
||||
expect($brain->sleepCalls)->toBe([100]);
|
||||
|
||||
Http::assertSentCount(2);
|
||||
});
|
||||
|
||||
test('CircuitBreaker_retryable_http_Good_recovers_from_five_transient_503s_without_opening_the_circuit', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$breaker = new CircuitBreaker;
|
||||
$fixture = retryableQdrantFixture();
|
||||
$sequence = Http::sequence();
|
||||
|
||||
for ($attempt = 0; $attempt < 5; $attempt++) {
|
||||
$sequence->push(['error' => 'unavailable'], 503);
|
||||
}
|
||||
|
||||
$sequence->push(['result' => ['status' => 'ok']], 200);
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => $sequence,
|
||||
]);
|
||||
|
||||
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
|
||||
|
||||
expect($brain->sleepCalls)->toHaveCount(5)
|
||||
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
|
||||
->and($breaker->getStats('brain')['failures'])->toBe(0)
|
||||
->and($breaker->getStats('brain')['successes'])->toBe(1);
|
||||
|
||||
Http::assertSentCount(6);
|
||||
});
|
||||
|
||||
test('CircuitBreaker_retryable_http_Bad_counts_exhausted_503_retries_as_one_failure', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$breaker = new CircuitBreaker;
|
||||
$fixture = retryableQdrantFixture();
|
||||
$sequence = Http::sequence();
|
||||
|
||||
for ($attempt = 0; $attempt < 6; $attempt++) {
|
||||
$sequence->push(['error' => 'unavailable'], 503);
|
||||
}
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => $sequence,
|
||||
]);
|
||||
|
||||
try {
|
||||
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
|
||||
$this->fail('Expected Qdrant failure after exhausting retries.');
|
||||
} catch (RuntimeException $exception) {
|
||||
expect($exception->getMessage())->toBe('Qdrant upsert failed: 503');
|
||||
}
|
||||
|
||||
expect($brain->sleepCalls)->toHaveCount(5)
|
||||
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
|
||||
->and($breaker->getStats('brain')['failures'])->toBe(1)
|
||||
->and($breaker->getStats('brain')['successes'])->toBe(0);
|
||||
|
||||
Http::assertSentCount(6);
|
||||
});
|
||||
|
||||
test('CircuitBreaker_retryable_http_Good_honours_retry_after_without_counting_a_failure', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$breaker = new CircuitBreaker;
|
||||
$fixture = retryableQdrantFixture();
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
|
||||
->push(['error' => 'rate limited'], 429, ['Retry-After' => '1'])
|
||||
->push(['result' => ['status' => 'ok']], 200),
|
||||
]);
|
||||
|
||||
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
|
||||
|
||||
expect($brain->sleepCalls)->toBe([1000])
|
||||
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
|
||||
->and($breaker->getStats('brain')['failures'])->toBe(0)
|
||||
->and($breaker->getStats('brain')['successes'])->toBe(1);
|
||||
|
||||
Http::assertSentCount(2);
|
||||
});
|
||||
|
||||
test('CircuitBreaker_retryable_http_Ugly_treats_500_as_an_immediate_circuit_failure', function (): void {
|
||||
$brain = retryableBrainService();
|
||||
$breaker = new CircuitBreaker;
|
||||
$fixture = retryableQdrantFixture();
|
||||
|
||||
Http::fake([
|
||||
'http://localhost:6334/collections/openbrain/points' => Http::sequence()
|
||||
->push(['error' => 'server error'], 500)
|
||||
->push(['result' => ['status' => 'ok']], 200),
|
||||
]);
|
||||
|
||||
try {
|
||||
$breaker->call('brain', fn (): mixed => $brain->qdrantUpsert([$fixture['point']]));
|
||||
$this->fail('Expected non-retryable 500 response to fail immediately.');
|
||||
} catch (RuntimeException $exception) {
|
||||
expect($exception->getMessage())->toBe('Qdrant upsert failed: 500');
|
||||
}
|
||||
|
||||
expect($brain->sleepCalls)->toBe([])
|
||||
->and($breaker->getState('brain'))->toBe(CircuitBreaker::STATE_CLOSED)
|
||||
->and($breaker->getStats('brain')['failures'])->toBe(1)
|
||||
->and($breaker->getStats('brain')['successes'])->toBe(0);
|
||||
|
||||
Http::assertSentCount(1);
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue