fix: clamp VRAM Free to prevent uint64 underflow
Guard against transient sysfs inconsistency where used > total. Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
501de83d3b
commit
954c57071a
1 changed files with 9 additions and 1 deletions
10
vram.go
10
vram.go
|
|
@ -13,6 +13,9 @@ import (
|
||||||
// GetVRAMInfo reads VRAM usage for the discrete GPU from sysfs.
|
// GetVRAMInfo reads VRAM usage for the discrete GPU from sysfs.
|
||||||
// It identifies the dGPU by selecting the card with the largest VRAM total,
|
// It identifies the dGPU by selecting the card with the largest VRAM total,
|
||||||
// which avoids hardcoding card numbers (e.g. card0=iGPU, card1=dGPU on Ryzen).
|
// which avoids hardcoding card numbers (e.g. card0=iGPU, card1=dGPU on Ryzen).
|
||||||
|
//
|
||||||
|
// Note: total and used are read non-atomically from sysfs; transient
|
||||||
|
// inconsistencies are possible under heavy allocation churn.
|
||||||
func GetVRAMInfo() (VRAMInfo, error) {
|
func GetVRAMInfo() (VRAMInfo, error) {
|
||||||
cards, err := filepath.Glob("/sys/class/drm/card[0-9]*/device/mem_info_vram_total")
|
cards, err := filepath.Glob("/sys/class/drm/card[0-9]*/device/mem_info_vram_total")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -45,10 +48,15 @@ func GetVRAMInfo() (VRAMInfo, error) {
|
||||||
return VRAMInfo{}, fmt.Errorf("rocm: read vram used: %w", err)
|
return VRAMInfo{}, fmt.Errorf("rocm: read vram used: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free := uint64(0)
|
||||||
|
if bestTotal > used {
|
||||||
|
free = bestTotal - used
|
||||||
|
}
|
||||||
|
|
||||||
return VRAMInfo{
|
return VRAMInfo{
|
||||||
Total: bestTotal,
|
Total: bestTotal,
|
||||||
Used: used,
|
Used: used,
|
||||||
Free: bestTotal - used,
|
Free: free,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue