go-rocm/backend.go
Snider aa42cff417 feat: scaffold go-rocm AMD GPU inference package
Implements inference.Backend via llama-server subprocess (llama.cpp + HIP/ROCm).
Targets RX 7800 XT (gfx1101, RDNA 3, 16GB VRAM).

Includes:
- Backend registration with build tags (linux/amd64)
- Stub backend.go with llama-server lifecycle outline
- CLAUDE.md with build instructions for llama.cpp + ROCm
- TODO.md with 5-phase task queue
- FINDINGS.md with hardware specs, VRAM budget, design rationale

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-19 19:39:40 +00:00

28 lines
953 B
Go

//go:build linux && amd64
package rocm
import "forge.lthn.ai/core/go-inference"
// rocmBackend implements inference.Backend for AMD ROCm GPUs.
// Uses llama-server (llama.cpp built with HIP) as the inference engine.
type rocmBackend struct{}
func (b *rocmBackend) Name() string { return "rocm" }
func (b *rocmBackend) Available() bool {
// TODO: Check for ROCm runtime + GPU presence
// - /dev/kfd exists (ROCm kernel driver)
// - rocm-smi detects a GPU
// - llama-server binary is findable
return false // Stub until Phase 1 implementation
}
func (b *rocmBackend) LoadModel(path string, opts ...inference.LoadOption) (inference.TextModel, error) {
// TODO: Phase 1 implementation
// 1. Find llama-server binary (PATH or configured location)
// 2. Spawn llama-server with --model path --port <free> --n-gpu-layers cfg.GPULayers
// 3. Wait for health endpoint to respond
// 4. Return rocmModel wrapping the HTTP client
return nil, nil
}