From b7342ec819c4cb80ff16a85328d43441a71b73ab Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 19 Feb 2026 21:43:06 +0000 Subject: [PATCH] fix: only retry startServer on process exit, not timeout Distinguishes retryable failures (process exited, e.g. port conflict) from non-retryable ones (60s timeout, e.g. stuck server). Avoids 3x timeout penalty. Co-Authored-By: Virgil --- server.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/server.go b/server.go index fb31b31..4947df2 100644 --- a/server.go +++ b/server.go @@ -131,8 +131,17 @@ func startServer(binary, modelPath string, gpuLayers, ctxSize int) (*server, err return s, nil } - _ = s.stop() - lastErr = fmt.Errorf("attempt %d: %w", attempt+1, err) + // Only retry if the process actually exited (e.g. port conflict). + // A timeout means the server is stuck, not a port issue. + select { + case <-s.exited: + _ = s.stop() + lastErr = fmt.Errorf("attempt %d: %w", attempt+1, err) + continue + default: + _ = s.stop() + return nil, fmt.Errorf("rocm: llama-server not ready: %w", err) + } } return nil, fmt.Errorf("rocm: server failed after %d attempts: %w", maxAttempts, lastErr)