feat: add ModelInfo type and Info() to TextModel
Expose model metadata: architecture, vocab size, layer count, hidden dimension, quantisation bits and group size. Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
df176765e7
commit
28f444ced4
1 changed files with 13 additions and 0 deletions
13
inference.go
13
inference.go
|
|
@ -74,6 +74,16 @@ type GenerateMetrics struct {
|
|||
ActiveMemoryBytes uint64 // Active GPU memory after operation
|
||||
}
|
||||
|
||||
// ModelInfo holds metadata about a loaded model.
|
||||
type ModelInfo struct {
|
||||
Architecture string // e.g. "gemma3", "qwen3", "llama"
|
||||
VocabSize int // Vocabulary size
|
||||
NumLayers int // Number of transformer layers
|
||||
HiddenSize int // Hidden dimension
|
||||
QuantBits int // Quantisation bits (0 = unquantised, 4 = 4-bit, 8 = 8-bit)
|
||||
QuantGroup int // Quantisation group size (0 if unquantised)
|
||||
}
|
||||
|
||||
// TextModel generates text from a loaded model.
|
||||
type TextModel interface {
|
||||
// Generate streams tokens for the given prompt.
|
||||
|
|
@ -95,6 +105,9 @@ type TextModel interface {
|
|||
// ModelType returns the architecture identifier (e.g. "gemma3", "qwen3", "llama3").
|
||||
ModelType() string
|
||||
|
||||
// Info returns metadata about the loaded model (architecture, quantisation, etc.).
|
||||
Info() ModelInfo
|
||||
|
||||
// Metrics returns performance metrics from the last inference operation.
|
||||
// Valid after Generate (iterator exhausted), Chat, Classify, or BatchGenerate.
|
||||
Metrics() GenerateMetrics
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue