From c50a8e9e9b347cd77dadaeae17da923c840e008b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 19 Feb 2026 21:40:05 +0000 Subject: [PATCH] feat: retry port selection in startServer on process failure Co-Authored-By: Virgil Co-Authored-By: Claude Opus 4.6 --- backend.go | 8 +----- server.go | 76 +++++++++++++++++++++++++++++--------------------- server_test.go | 8 ++++++ 3 files changed, 53 insertions(+), 39 deletions(-) diff --git a/backend.go b/backend.go index bfeb35d..c796d2c 100644 --- a/backend.go +++ b/backend.go @@ -3,7 +3,6 @@ package rocm import ( - "fmt" "os" "path/filepath" "strings" @@ -37,12 +36,7 @@ func (b *rocmBackend) LoadModel(path string, opts ...inference.LoadOption) (infe return nil, err } - port, err := freePort() - if err != nil { - return nil, fmt.Errorf("rocm: find free port: %w", err) - } - - srv, err := startServer(binary, path, port, cfg.GPULayers, cfg.ContextLen) + srv, err := startServer(binary, path, cfg.GPULayers, cfg.ContextLen) if err != nil { return nil, err } diff --git a/server.go b/server.go index a376b26..fb31b31 100644 --- a/server.go +++ b/server.go @@ -79,51 +79,63 @@ func serverEnv() []string { } // startServer spawns llama-server and waits for it to become ready. -func startServer(binary, modelPath string, port, gpuLayers, ctxSize int) (*server, error) { +// It selects a free port automatically, retrying up to 3 times if the +// process exits during startup (e.g. port conflict). +func startServer(binary, modelPath string, gpuLayers, ctxSize int) (*server, error) { if gpuLayers < 0 { gpuLayers = 999 } - args := []string{ - "--model", modelPath, - "--host", "127.0.0.1", - "--port", strconv.Itoa(port), - "--n-gpu-layers", strconv.Itoa(gpuLayers), - } - if ctxSize > 0 { - args = append(args, "--ctx-size", strconv.Itoa(ctxSize)) - } + const maxAttempts = 3 + var lastErr error - cmd := exec.Command(binary, args...) - cmd.Env = serverEnv() + for attempt := range maxAttempts { + port, err := freePort() + if err != nil { + return nil, fmt.Errorf("rocm: find free port: %w", err) + } - if err := cmd.Start(); err != nil { - return nil, fmt.Errorf("start llama-server: %w", err) - } + args := []string{ + "--model", modelPath, + "--host", "127.0.0.1", + "--port", strconv.Itoa(port), + "--n-gpu-layers", strconv.Itoa(gpuLayers), + } + if ctxSize > 0 { + args = append(args, "--ctx-size", strconv.Itoa(ctxSize)) + } - s := &server{ - cmd: cmd, - port: port, - client: llamacpp.NewClient(fmt.Sprintf("http://127.0.0.1:%d", port)), - exited: make(chan struct{}), - } + cmd := exec.Command(binary, args...) + cmd.Env = serverEnv() - // Goroutine to detect process exit. - go func() { - s.exitErr = cmd.Wait() - close(s.exited) - }() + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("start llama-server: %w", err) + } - // Wait for the health endpoint with a 60s timeout. - ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) - defer cancel() + s := &server{ + cmd: cmd, + port: port, + client: llamacpp.NewClient(fmt.Sprintf("http://127.0.0.1:%d", port)), + exited: make(chan struct{}), + } + + go func() { + s.exitErr = cmd.Wait() + close(s.exited) + }() + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + err = s.waitReady(ctx) + cancel() + if err == nil { + return s, nil + } - if err := s.waitReady(ctx); err != nil { _ = s.stop() - return nil, fmt.Errorf("llama-server not ready: %w", err) + lastErr = fmt.Errorf("attempt %d: %w", attempt+1, err) } - return s, nil + return nil, fmt.Errorf("rocm: server failed after %d attempts: %w", maxAttempts, lastErr) } // waitReady polls the health endpoint until the server is ready. diff --git a/server_test.go b/server_test.go index 6a4e074..a3592fe 100644 --- a/server_test.go +++ b/server_test.go @@ -128,6 +128,14 @@ func TestGenerate_ServerDead(t *testing.T) { assert.ErrorContains(t, m.Err(), "server has exited") } +func TestStartServer_RetriesOnProcessExit(t *testing.T) { + // /bin/false starts successfully but exits immediately with code 1. + // startServer should retry up to 3 times, then fail. + _, err := startServer("/bin/false", "/nonexistent/model.gguf", 999, 0) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed after 3 attempts") +} + func TestChat_ServerDead(t *testing.T) { exited := make(chan struct{}) close(exited)