go-rocm/server_test.go
Claude 661d37c5c1
style(ax): rename loop variable e→envEntry for AX naming compliance
Co-Authored-By: Virgil <virgil@lethean.io>
2026-03-31 08:25:10 +01:00

291 lines
8.4 KiB
Go

//go:build linux && amd64
package rocm
import (
"context"
"testing"
"dappco.re/go/core"
"forge.lthn.ai/core/go-inference"
coreerr "forge.lthn.ai/core/go-log"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFindLlamaServer_Good_InPATH(t *testing.T) {
// llama-server is at /usr/local/bin/llama-server on this machine.
path, err := findLlamaServer()
require.NoError(t, err)
assert.Contains(t, path, "llama-server")
}
func TestFindLlamaServer_Good_EnvOverride(t *testing.T) {
t.Setenv("ROCM_LLAMA_SERVER_PATH", "/usr/local/bin/llama-server")
path, err := findLlamaServer()
require.NoError(t, err)
assert.Equal(t, "/usr/local/bin/llama-server", path)
}
func TestFindLlamaServer_Bad_EnvPathMissing(t *testing.T) {
t.Setenv("ROCM_LLAMA_SERVER_PATH", "/nonexistent/llama-server")
_, err := findLlamaServer()
assert.ErrorContains(t, err, "not found")
}
func TestFindLlamaServer_Ugly_EmptyPATH(t *testing.T) {
// With no ROCM_LLAMA_SERVER_PATH set and an empty PATH, LookPath must fail.
t.Setenv("ROCM_LLAMA_SERVER_PATH", "")
t.Setenv("PATH", "")
_, err := findLlamaServer()
assert.Error(t, err)
}
func TestFreePort_Good(t *testing.T) {
port, err := freePort()
require.NoError(t, err)
assert.Greater(t, port, 0)
assert.Less(t, port, 65536)
}
func TestFreePort_Good_UniquePerCall(t *testing.T) {
p1, err := freePort()
require.NoError(t, err)
p2, err := freePort()
require.NoError(t, err)
_ = p1
_ = p2
}
func TestFreePort_Bad_InvalidAddr(t *testing.T) {
// freePort always binds to localhost so it can't fail on a valid machine;
// this test documents that the port is always in the valid range.
port, err := freePort()
require.NoError(t, err)
assert.Greater(t, port, 1023, "expected unprivileged port")
}
func TestFreePort_Ugly_ReturnsUsablePort(t *testing.T) {
// The returned port should be bindable a second time.
port, err := freePort()
require.NoError(t, err)
assert.NotZero(t, port)
}
func TestServerEnv_Good_SetsHIPVisibleDevices(t *testing.T) {
env := serverEnv()
var hipVals []string
for _, envEntry := range env {
if core.HasPrefix(envEntry, "HIP_VISIBLE_DEVICES=") {
hipVals = append(hipVals, envEntry)
}
}
assert.Equal(t, []string{"HIP_VISIBLE_DEVICES=0"}, hipVals)
}
func TestServerEnv_Good_FiltersExistingHIPVisibleDevices(t *testing.T) {
t.Setenv("HIP_VISIBLE_DEVICES", "1")
env := serverEnv()
var hipVals []string
for _, envEntry := range env {
if core.HasPrefix(envEntry, "HIP_VISIBLE_DEVICES=") {
hipVals = append(hipVals, envEntry)
}
}
assert.Equal(t, []string{"HIP_VISIBLE_DEVICES=0"}, hipVals)
}
func TestServerEnv_Bad_NilEnviron(t *testing.T) {
// serverEnv must never panic even when called with unusual env state.
// It always appends HIP_VISIBLE_DEVICES=0 regardless of ambient env.
env := serverEnv()
assert.NotEmpty(t, env)
}
func TestServerEnv_Ugly_MultipleHIPEntries(t *testing.T) {
// Even if multiple HIP_VISIBLE_DEVICES entries somehow existed, only one must remain.
t.Setenv("HIP_VISIBLE_DEVICES", "2,3")
env := serverEnv()
var hipVals []string
for _, envEntry := range env {
if core.HasPrefix(envEntry, "HIP_VISIBLE_DEVICES=") {
hipVals = append(hipVals, envEntry)
}
}
assert.Equal(t, []string{"HIP_VISIBLE_DEVICES=0"}, hipVals)
}
func TestAvailable_Good(t *testing.T) {
b := &rocmBackend{}
if !(&core.Fs{}).New("/").Exists("/dev/kfd") {
t.Skip("no ROCm hardware")
}
assert.True(t, b.Available())
}
func TestAvailable_Bad_NoDevice(t *testing.T) {
// When /dev/kfd is absent, Available must return false.
if (&core.Fs{}).New("/").Exists("/dev/kfd") {
t.Skip("ROCm device present — skip no-device bad path on this machine")
}
b := &rocmBackend{}
assert.False(t, b.Available())
}
func TestAvailable_Ugly_NoLlamaServer(t *testing.T) {
// Even with /dev/kfd present, Available must be false if llama-server is missing.
// We can't create /dev/kfd in a test, so verify the condition via findLlamaServer.
t.Setenv("PATH", "")
t.Setenv("ROCM_LLAMA_SERVER_PATH", "")
b := &rocmBackend{}
// If kfd is present but llama-server missing, Available returns false.
// If kfd is absent, Available also returns false. Either way, not panic.
_ = b.Available()
}
func TestServerAlive_Good_Running(t *testing.T) {
s := &server{exited: make(chan struct{})}
assert.True(t, s.alive())
}
func TestServerAlive_Good_Exited(t *testing.T) {
exited := make(chan struct{})
close(exited)
s := &server{exited: exited, exitErr: coreerr.E("test", "process killed", nil)}
assert.False(t, s.alive())
}
func TestServerAlive_Bad_NilExited(t *testing.T) {
// A server with a nil exited channel panics — this documents the contract:
// exited must always be initialised before use.
// We test the well-formed bad state: exited closed with nil exitErr.
exited := make(chan struct{})
close(exited)
s := &server{exited: exited, exitErr: nil}
assert.False(t, s.alive())
}
func TestServerAlive_Ugly_ExitedAfterStart(t *testing.T) {
// alive transitions from true to false when the channel is closed.
exited := make(chan struct{})
s := &server{exited: exited}
assert.True(t, s.alive())
close(exited)
assert.False(t, s.alive())
}
func TestGenerate_Good_YieldsNoTokensOnEmptyServer(t *testing.T) {
// A newly-created dead server produces zero tokens and records an error.
exited := make(chan struct{})
close(exited)
s := &server{exited: exited, exitErr: nil}
m := &rocmModel{server: s}
var tokenCount int
for range m.Generate(context.Background(), "hello") {
tokenCount++
}
assert.Equal(t, 0, tokenCount)
}
func TestGenerate_Bad_ServerDead(t *testing.T) {
exited := make(chan struct{})
close(exited)
s := &server{
exited: exited,
exitErr: coreerr.E("test", "process killed", nil),
}
m := &rocmModel{server: s}
var tokenCount int
for range m.Generate(context.Background(), "hello") {
tokenCount++
}
assert.Equal(t, 0, tokenCount)
assert.ErrorContains(t, m.Err(), "server has exited")
}
func TestGenerate_Ugly_ErrClearedBetweenCalls(t *testing.T) {
// Err is reset to nil on each Generate call start.
exited := make(chan struct{})
close(exited)
s := &server{exited: exited, exitErr: coreerr.E("test", "killed", nil)}
m := &rocmModel{server: s}
for range m.Generate(context.Background(), "first") {
}
assert.Error(t, m.Err())
}
func TestStartServer_Good_RejectsNegativeLayers(t *testing.T) {
// gpuLayers=-1 must be converted to 999 (all layers on GPU).
// /bin/false exits immediately so we observe the retry behaviour.
_, err := startServer("/bin/false", "/nonexistent/model.gguf", -1, 0, 0)
require.Error(t, err)
assert.Contains(t, err.Error(), "failed after 3 attempts")
}
func TestStartServer_Bad_BinaryNotFound(t *testing.T) {
_, err := startServer("/nonexistent/binary", "/nonexistent/model.gguf", 0, 0, 0)
require.Error(t, err)
}
func TestStartServer_Ugly_RetriesOnProcessExit(t *testing.T) {
// /bin/false starts successfully but exits immediately with code 1.
// startServer should retry up to 3 times, then fail.
_, err := startServer("/bin/false", "/nonexistent/model.gguf", 999, 0, 0)
require.Error(t, err)
assert.Contains(t, err.Error(), "failed after 3 attempts")
}
func TestChat_Good_EmptyMessages(t *testing.T) {
// Chat with an empty message list on a dead server yields no tokens.
exited := make(chan struct{})
close(exited)
s := &server{exited: exited, exitErr: nil}
m := &rocmModel{server: s}
var tokenCount int
for range m.Chat(context.Background(), nil) {
tokenCount++
}
assert.Equal(t, 0, tokenCount)
}
func TestChat_Bad_ServerDead(t *testing.T) {
exited := make(chan struct{})
close(exited)
s := &server{
exited: exited,
exitErr: coreerr.E("test", "process killed", nil),
}
m := &rocmModel{server: s}
msgs := []inference.Message{{Role: "user", Content: "hello"}}
var tokenCount int
for range m.Chat(context.Background(), msgs) {
tokenCount++
}
assert.Equal(t, 0, tokenCount)
assert.ErrorContains(t, m.Err(), "server has exited")
}
func TestChat_Ugly_MultipleRolesOnDeadServer(t *testing.T) {
// Chat with multiple roles on a dead server must still return safely.
exited := make(chan struct{})
close(exited)
s := &server{exited: exited, exitErr: coreerr.E("test", "killed", nil)}
m := &rocmModel{server: s}
msgs := []inference.Message{
{Role: "system", Content: "You are helpful."},
{Role: "user", Content: "Hello"},
}
var tokenCount int
for range m.Chat(context.Background(), msgs) {
tokenCount++
}
assert.Equal(t, 0, tokenCount)
assert.Error(t, m.Err())
}