feat: add ModelInfo type and Info() to TextModel

Expose model metadata: architecture, vocab size, layer count, hidden dimension, quantisation bits and group size. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 23:36:16 +00:00 · 2026-02-19 23:36:16 +00:00 · 28f444ced4
commit 28f444ced4
parent df176765e7
1 changed files with 13 additions and 0 deletions
--- a/inference.go
+++ b/inference.go
@ -74,6 +74,16 @@ type GenerateMetrics struct {
 	ActiveMemoryBytes uint64 // Active GPU memory after operation
 }

+// ModelInfo holds metadata about a loaded model.
+type ModelInfo struct {
+	Architecture string // e.g. "gemma3", "qwen3", "llama"
+	VocabSize    int    // Vocabulary size
+	NumLayers    int    // Number of transformer layers
+	HiddenSize   int    // Hidden dimension
+	QuantBits    int    // Quantisation bits (0 = unquantised, 4 = 4-bit, 8 = 8-bit)
+	QuantGroup   int    // Quantisation group size (0 if unquantised)
+}
+
 // TextModel generates text from a loaded model.
 type TextModel interface {
 	// Generate streams tokens for the given prompt.
@ -95,6 +105,9 @@ type TextModel interface {
 	// ModelType returns the architecture identifier (e.g. "gemma3", "qwen3", "llama3").
 	ModelType() string

+	// Info returns metadata about the loaded model (architecture, quantisation, etc.).
+	Info() ModelInfo
+
 	// Metrics returns performance metrics from the last inference operation.
 	// Valid after Generate (iterator exhausted), Chat, Classify, or BatchGenerate.
 	Metrics() GenerateMetrics