feat: add Result type, break Backend interface to return Result

Backend.Generate and Backend.Chat now return (Result, error) instead of (string, error). Result carries the response text and optional inference.GenerateMetrics for backends that support them. Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-22 17:36:16 +00:00 · 2026-02-22 17:36:16 +00:00 · 5578671f90
commit 5578671f90
parent dcf58d041d
1 changed files with 10 additions and 2 deletions
--- a/inference.go
+++ b/inference.go
@ -17,15 +17,23 @@ import (
 	"forge.lthn.ai/core/go-inference"
 )

+// Result holds the response text and optional inference metrics.
+// Backends that support metrics (e.g. MLX via InferenceAdapter) populate
+// Metrics; HTTP and subprocess backends leave it nil.
+type Result struct {
+	Text    string
+	Metrics *inference.GenerateMetrics
+}
+
 // Backend generates text from prompts. Implementations include HTTPBackend
 // (OpenAI-compatible API), LlamaBackend (managed llama-server process), and
 // OllamaBackend (Ollama native API).
 type Backend interface {
 	// Generate sends a single user prompt and returns the response.
-	Generate(ctx context.Context, prompt string, opts GenOpts) (string, error)
+	Generate(ctx context.Context, prompt string, opts GenOpts) (Result, error)

 	// Chat sends a multi-turn conversation and returns the response.
-	Chat(ctx context.Context, messages []Message, opts GenOpts) (string, error)
+	Chat(ctx context.Context, messages []Message, opts GenOpts) (Result, error)

 	// Name returns the backend identifier (e.g. "http", "llama", "ollama").
 	Name() string