From 5578671f90aaf82976a5fe19d441e43143369770 Mon Sep 17 00:00:00 2001
From: Snider <snider@host.uk.com>
Date: Sun, 22 Feb 2026 17:36:16 +0000
Subject: [PATCH] feat: add Result type, break Backend interface to return
 Result

Backend.Generate and Backend.Chat now return (Result, error) instead of
(string, error). Result carries the response text and optional
inference.GenerateMetrics for backends that support them.

Co-Authored-By: Virgil <virgil@lethean.io>
---
 inference.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/inference.go b/inference.go
index 7601f11..76f68cf 100644
--- a/inference.go
+++ b/inference.go
@@ -17,15 +17,23 @@ import (
 	"forge.lthn.ai/core/go-inference"
 )
 
+// Result holds the response text and optional inference metrics.
+// Backends that support metrics (e.g. MLX via InferenceAdapter) populate
+// Metrics; HTTP and subprocess backends leave it nil.
+type Result struct {
+	Text    string
+	Metrics *inference.GenerateMetrics
+}
+
 // Backend generates text from prompts. Implementations include HTTPBackend
 // (OpenAI-compatible API), LlamaBackend (managed llama-server process), and
 // OllamaBackend (Ollama native API).
 type Backend interface {
 	// Generate sends a single user prompt and returns the response.
-	Generate(ctx context.Context, prompt string, opts GenOpts) (string, error)
+	Generate(ctx context.Context, prompt string, opts GenOpts) (Result, error)
 
 	// Chat sends a multi-turn conversation and returns the response.
-	Chat(ctx context.Context, messages []Message, opts GenOpts) (string, error)
+	Chat(ctx context.Context, messages []Message, opts GenOpts) (Result, error)
 
 	// Name returns the backend identifier (e.g. "http", "llama", "ollama").
 	Name() string