diff --git a/inference.go b/inference.go
index 7601f11..76f68cf 100644
--- a/inference.go
+++ b/inference.go
@@ -17,15 +17,23 @@ import (
 	"forge.lthn.ai/core/go-inference"
 )
 
+// Result holds the response text and optional inference metrics.
+// Backends that support metrics (e.g. MLX via InferenceAdapter) populate
+// Metrics; HTTP and subprocess backends leave it nil.
+type Result struct {
+	Text    string
+	Metrics *inference.GenerateMetrics
+}
+
 // Backend generates text from prompts. Implementations include HTTPBackend
 // (OpenAI-compatible API), LlamaBackend (managed llama-server process), and
 // OllamaBackend (Ollama native API).
 type Backend interface {
 	// Generate sends a single user prompt and returns the response.
-	Generate(ctx context.Context, prompt string, opts GenOpts) (string, error)
+	Generate(ctx context.Context, prompt string, opts GenOpts) (Result, error)
 
 	// Chat sends a multi-turn conversation and returns the response.
-	Chat(ctx context.Context, messages []Message, opts GenOpts) (string, error)
+	Chat(ctx context.Context, messages []Message, opts GenOpts) (Result, error)
 
 	// Name returns the backend identifier (e.g. "http", "llama", "ollama").
 	Name() string