diff --git a/inference.go b/inference.go
index 1fbf744..93eebe2 100644
--- a/inference.go
+++ b/inference.go
@@ -74,6 +74,16 @@ type GenerateMetrics struct {
 	ActiveMemoryBytes uint64 // Active GPU memory after operation
 }
 
+// ModelInfo holds metadata about a loaded model.
+type ModelInfo struct {
+	Architecture string // e.g. "gemma3", "qwen3", "llama"
+	VocabSize    int    // Vocabulary size
+	NumLayers    int    // Number of transformer layers
+	HiddenSize   int    // Hidden dimension
+	QuantBits    int    // Quantisation bits (0 = unquantised, 4 = 4-bit, 8 = 8-bit)
+	QuantGroup   int    // Quantisation group size (0 if unquantised)
+}
+
 // TextModel generates text from a loaded model.
 type TextModel interface {
 	// Generate streams tokens for the given prompt.
@@ -95,6 +105,9 @@ type TextModel interface {
 	// ModelType returns the architecture identifier (e.g. "gemma3", "qwen3", "llama3").
 	ModelType() string
 
+	// Info returns metadata about the loaded model (architecture, quantisation, etc.).
+	Info() ModelInfo
+
 	// Metrics returns performance metrics from the last inference operation.
 	// Valid after Generate (iterator exhausted), Chat, Classify, or BatchGenerate.
 	Metrics() GenerateMetrics