Adds server.go with the process lifecycle layer that manages spawning llama-server, waiting for readiness, and graceful shutdown. Includes three helper functions (findLlamaServer, freePort, serverEnv) and the full startServer/waitReady/stop lifecycle. The serverEnv function critically filters HIP_VISIBLE_DEVICES to mask the Ryzen 9 iGPU which crashes llama-server if not excluded. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
168 lines
3.8 KiB
Go
168 lines
3.8 KiB
Go
//go:build linux && amd64
|
|
|
|
package rocm
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"forge.lthn.ai/core/go-rocm/internal/llamacpp"
|
|
)
|
|
|
|
// server manages a llama-server subprocess.
|
|
type server struct {
|
|
cmd *exec.Cmd
|
|
port int
|
|
client *llamacpp.Client
|
|
exited chan struct{}
|
|
exitErr error
|
|
}
|
|
|
|
// findLlamaServer locates the llama-server binary.
|
|
// Checks ROCM_LLAMA_SERVER_PATH first, then PATH.
|
|
func findLlamaServer() (string, error) {
|
|
if p := os.Getenv("ROCM_LLAMA_SERVER_PATH"); p != "" {
|
|
if _, err := os.Stat(p); err != nil {
|
|
return "", fmt.Errorf("llama-server not found at ROCM_LLAMA_SERVER_PATH=%s: %w", p, err)
|
|
}
|
|
return p, nil
|
|
}
|
|
p, err := exec.LookPath("llama-server")
|
|
if err != nil {
|
|
return "", fmt.Errorf("llama-server not found in PATH: %w", err)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// freePort asks the kernel for a free TCP port on localhost.
|
|
func freePort() (int, error) {
|
|
ln, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
return 0, fmt.Errorf("freePort: %w", err)
|
|
}
|
|
port := ln.Addr().(*net.TCPAddr).Port
|
|
ln.Close()
|
|
return port, nil
|
|
}
|
|
|
|
// serverEnv returns the environment for the llama-server subprocess.
|
|
// Filters any existing HIP_VISIBLE_DEVICES and sets it to 0 to mask the iGPU.
|
|
// This is critical — the Ryzen 9 iGPU crashes llama-server if not masked.
|
|
func serverEnv() []string {
|
|
environ := os.Environ()
|
|
env := make([]string, 0, len(environ)+1)
|
|
for _, e := range environ {
|
|
if strings.HasPrefix(e, "HIP_VISIBLE_DEVICES=") {
|
|
continue
|
|
}
|
|
env = append(env, e)
|
|
}
|
|
env = append(env, "HIP_VISIBLE_DEVICES=0")
|
|
return env
|
|
}
|
|
|
|
// startServer spawns llama-server and waits for it to become ready.
|
|
func startServer(binary, modelPath string, port, gpuLayers, ctxSize int) (*server, error) {
|
|
if gpuLayers < 0 {
|
|
gpuLayers = 999
|
|
}
|
|
|
|
args := []string{
|
|
"--model", modelPath,
|
|
"--host", "127.0.0.1",
|
|
"--port", strconv.Itoa(port),
|
|
"--n-gpu-layers", strconv.Itoa(gpuLayers),
|
|
}
|
|
if ctxSize > 0 {
|
|
args = append(args, "--ctx-size", strconv.Itoa(ctxSize))
|
|
}
|
|
|
|
cmd := exec.Command(binary, args...)
|
|
cmd.Env = serverEnv()
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
return nil, fmt.Errorf("start llama-server: %w", err)
|
|
}
|
|
|
|
s := &server{
|
|
cmd: cmd,
|
|
port: port,
|
|
client: llamacpp.NewClient(fmt.Sprintf("http://127.0.0.1:%d", port)),
|
|
exited: make(chan struct{}),
|
|
}
|
|
|
|
// Goroutine to detect process exit.
|
|
go func() {
|
|
s.exitErr = cmd.Wait()
|
|
close(s.exited)
|
|
}()
|
|
|
|
// Wait for the health endpoint with a 60s timeout.
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
defer cancel()
|
|
|
|
if err := s.waitReady(ctx); err != nil {
|
|
_ = s.stop()
|
|
return nil, fmt.Errorf("llama-server not ready: %w", err)
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
// waitReady polls the health endpoint until the server is ready.
|
|
func (s *server) waitReady(ctx context.Context) error {
|
|
ticker := time.NewTicker(100 * time.Millisecond)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return fmt.Errorf("timeout waiting for llama-server: %w", ctx.Err())
|
|
case <-s.exited:
|
|
return fmt.Errorf("llama-server exited before becoming ready: %v", s.exitErr)
|
|
case <-ticker.C:
|
|
if err := s.client.Health(ctx); err == nil {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// stop sends SIGTERM and waits up to 5s, then SIGKILL.
|
|
func (s *server) stop() error {
|
|
if s.cmd.Process == nil {
|
|
return nil
|
|
}
|
|
|
|
// Already exited?
|
|
select {
|
|
case <-s.exited:
|
|
return s.exitErr
|
|
default:
|
|
}
|
|
|
|
// Send SIGTERM for graceful shutdown.
|
|
if err := s.cmd.Process.Signal(syscall.SIGTERM); err != nil {
|
|
return fmt.Errorf("sigterm llama-server: %w", err)
|
|
}
|
|
|
|
// Wait up to 5 seconds for clean exit.
|
|
select {
|
|
case <-s.exited:
|
|
return s.exitErr
|
|
case <-time.After(5 * time.Second):
|
|
// Force kill.
|
|
if err := s.cmd.Process.Kill(); err != nil {
|
|
return fmt.Errorf("kill llama-server: %w", err)
|
|
}
|
|
<-s.exited
|
|
return s.exitErr
|
|
}
|
|
}
|