Principle 1 — Predictable Names: - rocmModel.srv → rocmModel.server (struct field) - recordMetrics: met → metrics (local var) - backend.go/model.go: cfg → config (local vars) - gguf.go: tc/kc → tensorCount32/kvCount32 (v2 count reads) Principle 2 — Comments as Usage Examples: - Added concrete usage examples to all exported functions: VRAMInfo, ModelInfo, DiscoverModels, GetVRAMInfo, ROCmAvailable, LoadModel, Available, NewClient, Health, ChatComplete, Complete, ReadMetadata, FileTypeName Principle 5 — Test naming (_Good/_Bad/_Ugly): - All test functions renamed to AX-7 convention across: discover_test.go, vram_test.go, server_test.go, internal/gguf/gguf_test.go, internal/llamacpp/client_test.go, internal/llamacpp/health_test.go Also: fix go.sum missing entry for dappco.re/go/core transitive dep (pulled in by go-inference replace directive). All tests pass: go test ./... -short -count=1 Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
51 lines
1.7 KiB
Go
51 lines
1.7 KiB
Go
// Package rocm provides AMD ROCm GPU inference for Linux.
|
|
//
|
|
// This package implements the inference.Backend and inference.TextModel interfaces
|
|
// using llama.cpp compiled with HIP/ROCm for AMD GPUs (RDNA 3+).
|
|
//
|
|
// # Quick Start
|
|
//
|
|
// import (
|
|
// "forge.lthn.ai/core/go-inference"
|
|
// _ "forge.lthn.ai/core/go-rocm" // auto-registers ROCm backend
|
|
// )
|
|
//
|
|
// m, err := inference.LoadModel("/path/to/model.gguf")
|
|
// defer m.Close()
|
|
// for tok := range m.Generate(ctx, "Hello", inference.WithMaxTokens(128)) {
|
|
// fmt.Print(tok.Text)
|
|
// }
|
|
//
|
|
// # Requirements
|
|
//
|
|
// - Linux (amd64)
|
|
// - AMD GPU with ROCm support (RDNA 2+ / gfx10xx+, tested on RDNA 3 / gfx1100)
|
|
// - ROCm 6.x+ installed
|
|
// - llama-server binary (from llama.cpp built with -DGGML_HIP=ON)
|
|
package rocm
|
|
|
|
// VRAMInfo reports GPU video memory usage in bytes.
|
|
//
|
|
// info, err := rocm.GetVRAMInfo()
|
|
// fmt.Printf("VRAM: %d MiB used / %d MiB total", info.Used/(1024*1024), info.Total/(1024*1024))
|
|
type VRAMInfo struct {
|
|
Total uint64
|
|
Used uint64
|
|
Free uint64
|
|
}
|
|
|
|
// ModelInfo describes a GGUF model file discovered on disk.
|
|
//
|
|
// models, _ := rocm.DiscoverModels("/data/lem/gguf")
|
|
// for _, m := range models {
|
|
// fmt.Printf("%s (%s %s, ctx=%d)\n", m.Name, m.Architecture, m.Quantisation, m.ContextLen)
|
|
// }
|
|
type ModelInfo struct {
|
|
Path string // full path to .gguf file
|
|
Architecture string // GGUF architecture (e.g. "gemma3", "llama", "qwen2")
|
|
Name string // human-readable model name from GGUF metadata
|
|
Quantisation string // quantisation level (e.g. "Q4_K_M", "Q8_0")
|
|
Parameters string // parameter size label (e.g. "1B", "8B")
|
|
FileSize int64 // file size in bytes
|
|
ContextLen uint32 // native context window length
|
|
}
|