go-rocm/vram.go
Claude 41b34b6779
feat(ax): apply RFC-025 AX compliance review
Principle 1 — Predictable Names:
- rocmModel.srv → rocmModel.server (struct field)
- recordMetrics: met → metrics (local var)
- backend.go/model.go: cfg → config (local vars)
- gguf.go: tc/kc → tensorCount32/kvCount32 (v2 count reads)

Principle 2 — Comments as Usage Examples:
- Added concrete usage examples to all exported functions:
  VRAMInfo, ModelInfo, DiscoverModels, GetVRAMInfo,
  ROCmAvailable, LoadModel, Available, NewClient, Health,
  ChatComplete, Complete, ReadMetadata, FileTypeName

Principle 5 — Test naming (_Good/_Bad/_Ugly):
- All test functions renamed to AX-7 convention across:
  discover_test.go, vram_test.go, server_test.go,
  internal/gguf/gguf_test.go, internal/llamacpp/client_test.go,
  internal/llamacpp/health_test.go

Also: fix go.sum missing entry for dappco.re/go/core transitive dep
(pulled in by go-inference replace directive).

All tests pass: go test ./... -short -count=1

Co-Authored-By: Virgil <virgil@lethean.io>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:33:47 +01:00

74 lines
1.8 KiB
Go

//go:build linux && amd64
package rocm
import (
"os"
"path/filepath"
"strconv"
"strings"
coreerr "forge.lthn.ai/core/go-log"
)
// GetVRAMInfo reads VRAM usage for the discrete GPU from sysfs.
// It identifies the dGPU by selecting the card with the largest VRAM total,
// which avoids hardcoding card numbers (e.g. card0=iGPU, card1=dGPU on Ryzen).
//
// Note: total and used are read non-atomically from sysfs; transient
// inconsistencies are possible under heavy allocation churn.
//
// info, err := rocm.GetVRAMInfo()
// fmt.Printf("VRAM: %d MiB used / %d MiB total (free: %d MiB)",
// info.Used/(1024*1024), info.Total/(1024*1024), info.Free/(1024*1024))
func GetVRAMInfo() (VRAMInfo, error) {
cards, err := filepath.Glob("/sys/class/drm/card[0-9]*/device/mem_info_vram_total")
if err != nil {
return VRAMInfo{}, coreerr.E("rocm.GetVRAMInfo", "glob vram sysfs", err)
}
if len(cards) == 0 {
return VRAMInfo{}, coreerr.E("rocm.GetVRAMInfo", "no GPU VRAM info found in sysfs", nil)
}
var bestDir string
var bestTotal uint64
for _, totalPath := range cards {
total, err := readSysfsUint64(totalPath)
if err != nil {
continue
}
if total > bestTotal {
bestTotal = total
bestDir = filepath.Dir(totalPath)
}
}
if bestDir == "" {
return VRAMInfo{}, coreerr.E("rocm.GetVRAMInfo", "no readable VRAM sysfs entries", nil)
}
used, err := readSysfsUint64(filepath.Join(bestDir, "mem_info_vram_used"))
if err != nil {
return VRAMInfo{}, coreerr.E("rocm.GetVRAMInfo", "read vram used", err)
}
free := uint64(0)
if bestTotal > used {
free = bestTotal - used
}
return VRAMInfo{
Total: bestTotal,
Used: used,
Free: free,
}, nil
}
func readSysfsUint64(path string) (uint64, error) {
data, err := os.ReadFile(path)
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
}