feat: add ModelInfo type and Info() to TextModel

Expose model metadata: architecture, vocab size, layer count, hidden
dimension, quantisation bits and group size.

Co-Authored-By: Virgil <virgil@lethean.io>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Snider 2026-02-19 23:36:16 +00:00
parent df176765e7
commit 28f444ced4

View file

@ -74,6 +74,16 @@ type GenerateMetrics struct {
ActiveMemoryBytes uint64 // Active GPU memory after operation
}
// ModelInfo holds metadata about a loaded model.
type ModelInfo struct {
Architecture string // e.g. "gemma3", "qwen3", "llama"
VocabSize int // Vocabulary size
NumLayers int // Number of transformer layers
HiddenSize int // Hidden dimension
QuantBits int // Quantisation bits (0 = unquantised, 4 = 4-bit, 8 = 8-bit)
QuantGroup int // Quantisation group size (0 if unquantised)
}
// TextModel generates text from a loaded model.
type TextModel interface {
// Generate streams tokens for the given prompt.
@ -95,6 +105,9 @@ type TextModel interface {
// ModelType returns the architecture identifier (e.g. "gemma3", "qwen3", "llama3").
ModelType() string
// Info returns metadata about the loaded model (architecture, quantisation, etc.).
Info() ModelInfo
// Metrics returns performance metrics from the last inference operation.
// Valid after Generate (iterator exhausted), Chat, Classify, or BatchGenerate.
Metrics() GenerateMetrics