Implements inference.Backend via llama-server subprocess (llama.cpp + HIP/ROCm). Targets RX 7800 XT (gfx1101, RDNA 3, 16GB VRAM). Includes: - Backend registration with build tags (linux/amd64) - Stub backend.go with llama-server lifecycle outline - CLAUDE.md with build instructions for llama.cpp + ROCm - TODO.md with 5-phase task queue - FINDINGS.md with hardware specs, VRAM budget, design rationale Co-Authored-By: Virgil <virgil@lethean.io>
25 lines
785 B
Go
25 lines
785 B
Go
// Package rocm provides AMD ROCm GPU inference for Linux.
|
|
//
|
|
// This package implements the inference.Backend and inference.TextModel interfaces
|
|
// using llama.cpp compiled with HIP/ROCm for AMD GPUs (RDNA 3+).
|
|
//
|
|
// # Quick Start
|
|
//
|
|
// import (
|
|
// "forge.lthn.ai/core/go-inference"
|
|
// _ "forge.lthn.ai/core/go-rocm" // auto-registers ROCm backend
|
|
// )
|
|
//
|
|
// m, err := inference.LoadModel("/path/to/model.gguf")
|
|
// defer m.Close()
|
|
// for tok := range m.Generate(ctx, "Hello", inference.WithMaxTokens(128)) {
|
|
// fmt.Print(tok.Text)
|
|
// }
|
|
//
|
|
// # Requirements
|
|
//
|
|
// - Linux (amd64)
|
|
// - AMD GPU with ROCm support (RDNA 2+ / gfx10xx+, tested on RDNA 3 / gfx1100)
|
|
// - ROCm 6.x+ installed
|
|
// - llama-server binary (from llama.cpp built with -DGGML_HIP=ON)
|
|
package rocm
|