go-ml/backend_http_textmodel.go

// SPDX-Licence-Identifier: EUPL-1.2

package ml

import (
	"context"
	"errors"
	"iter"

	"forge.lthn.ai/core/go-inference"
)

// HTTPTextModel wraps an HTTPBackend to satisfy the inference.TextModel interface.
// This enables cross-platform consistency — HTTP backends can be used anywhere
// that expects a go-inference TextModel (e.g. go-ai, go-i18n).
//
// Generate and Chat yield the entire HTTP response as a single Token since
// the OpenAI-compatible API returns complete responses (non-streaming).
type HTTPTextModel struct {
	http    *HTTPBackend
	lastErr error
}

// Compile-time check: HTTPTextModel implements inference.TextModel.
var _ inference.TextModel = (*HTTPTextModel)(nil)

// NewHTTPTextModel wraps an HTTPBackend as an inference.TextModel.
func NewHTTPTextModel(backend *HTTPBackend) *HTTPTextModel {
	return &HTTPTextModel{http: backend}
}

// Generate sends a single prompt to the HTTP backend and yields the entire
// response as a single Token.
func (m *HTTPTextModel) Generate(ctx context.Context, prompt string, opts ...inference.GenerateOption) iter.Seq[inference.Token] {
	return func(yield func(inference.Token) bool) {
		cfg := inference.ApplyGenerateOpts(opts)
		genOpts := GenOpts{
			Temperature: float64(cfg.Temperature),
			MaxTokens:   cfg.MaxTokens,
			Model:       m.http.Model(),
		}
		result, err := m.http.Generate(ctx, prompt, genOpts)
		if err != nil {
			m.lastErr = err
			return
		}
		m.lastErr = nil
		yield(inference.Token{Text: result.Text})
	}
}

// Chat sends a multi-turn conversation to the HTTP backend and yields the
// entire response as a single Token.
func (m *HTTPTextModel) Chat(ctx context.Context, messages []inference.Message, opts ...inference.GenerateOption) iter.Seq[inference.Token] {
	return func(yield func(inference.Token) bool) {
		cfg := inference.ApplyGenerateOpts(opts)
		genOpts := GenOpts{
			Temperature: float64(cfg.Temperature),
			MaxTokens:   cfg.MaxTokens,
			Model:       m.http.Model(),
		}

		// ml.Message is now a type alias for inference.Message — no conversion needed.
		result, err := m.http.Chat(ctx, messages, genOpts)
		if err != nil {
			m.lastErr = err
			return
		}
		m.lastErr = nil
		yield(inference.Token{Text: result.Text})
	}
}

// Classify is not supported by HTTP backends. Returns an error.
func (m *HTTPTextModel) Classify(_ context.Context, _ []string, _ ...inference.GenerateOption) ([]inference.ClassifyResult, error) {
	return nil, errors.New("classify not supported by HTTP backend")
}

// BatchGenerate processes multiple prompts sequentially via Generate.
func (m *HTTPTextModel) BatchGenerate(ctx context.Context, prompts []string, opts ...inference.GenerateOption) ([]inference.BatchResult, error) {
	results := make([]inference.BatchResult, len(prompts))
	for i, prompt := range prompts {
		var tokens []inference.Token
		for tok := range m.Generate(ctx, prompt, opts...) {
			tokens = append(tokens, tok)
		}
		results[i] = inference.BatchResult{
			Tokens: tokens,
			Err:    m.lastErr,
		}
	}
	return results, nil
}

// ModelType returns the configured model name from the underlying HTTPBackend.
func (m *HTTPTextModel) ModelType() string {
	model := m.http.Model()
	if model == "" {
		return "http"
	}
	return model
}

// Info returns minimal model metadata for an HTTP backend.
func (m *HTTPTextModel) Info() inference.ModelInfo {
	return inference.ModelInfo{Architecture: "http"}
}

// Metrics returns zero metrics — HTTP backends don't track token-level performance.
func (m *HTTPTextModel) Metrics() inference.GenerateMetrics {
	return inference.GenerateMetrics{}
}

// Err returns the error from the last Generate or Chat call, if any.
func (m *HTTPTextModel) Err() error {
	return m.lastErr
}

// Close is a no-op for HTTP backends — there are no resources to release.
func (m *HTTPTextModel) Close() error {
	return nil
}

// LlamaTextModel wraps a LlamaBackend as an inference.TextModel. It embeds
// HTTPTextModel for Generate/Chat but overrides ModelType and Close to
// reflect the managed llama-server process.
type LlamaTextModel struct {
	*HTTPTextModel
	llama *LlamaBackend
}

// Compile-time check: LlamaTextModel implements inference.TextModel.
var _ inference.TextModel = (*LlamaTextModel)(nil)

// NewLlamaTextModel wraps a LlamaBackend as an inference.TextModel.
func NewLlamaTextModel(backend *LlamaBackend) *LlamaTextModel {
	return &LlamaTextModel{
		HTTPTextModel: NewHTTPTextModel(backend.http),
		llama:         backend,
	}
}

// ModelType returns "llama" to identify this as a managed llama-server backend.
func (m *LlamaTextModel) ModelType() string {
	return "llama"
}

// Close stops the managed llama-server process.
func (m *LlamaTextModel) Close() error {
	return m.llama.Stop()
}
feat(backend): add HTTP/Llama TextModel wrappers + verify downstream Phase 1 Steps 1.4 and 1.5 complete: - HTTPTextModel wraps HTTPBackend as inference.TextModel (Generate/Chat yield entire response as single Token, Classify unsupported, BatchGenerate sequential) - LlamaTextModel embeds HTTPTextModel, overrides ModelType -> "llama" and Close -> llama.Stop() - 19 new tests (17 HTTPTextModel + 2 LlamaTextModel), all passing - Verified service.go and judge.go downstream consumers unchanged - Updated CLAUDE.md: backend_mlx.go DONE, architecture table current, critical context reflects Phase 1 complete - Updated TODO.md: Steps 1.4 and 1.5 marked done Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-20 01:23:34 +00:00			`// SPDX-Licence-Identifier: EUPL-1.2`

			`package ml`

			`import (`
			`"context"`
chore: fmt.Errorf(static) → errors.New 2026-02-24 15:58:11 +00:00			`"errors"`
feat(backend): add HTTP/Llama TextModel wrappers + verify downstream Phase 1 Steps 1.4 and 1.5 complete: - HTTPTextModel wraps HTTPBackend as inference.TextModel (Generate/Chat yield entire response as single Token, Classify unsupported, BatchGenerate sequential) - LlamaTextModel embeds HTTPTextModel, overrides ModelType -> "llama" and Close -> llama.Stop() - 19 new tests (17 HTTPTextModel + 2 LlamaTextModel), all passing - Verified service.go and judge.go downstream consumers unchanged - Updated CLAUDE.md: backend_mlx.go DONE, architecture table current, critical context reflects Phase 1 complete - Updated TODO.md: Steps 1.4 and 1.5 marked done Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-20 01:23:34 +00:00			`"iter"`

			`"forge.lthn.ai/core/go-inference"`
			`)`

			`// HTTPTextModel wraps an HTTPBackend to satisfy the inference.TextModel interface.`
			`// This enables cross-platform consistency — HTTP backends can be used anywhere`
			`// that expects a go-inference TextModel (e.g. go-ai, go-i18n).`
			`//`
			`// Generate and Chat yield the entire HTTP response as a single Token since`
			`// the OpenAI-compatible API returns complete responses (non-streaming).`
			`type HTTPTextModel struct {`
			`http *HTTPBackend`
			`lastErr error`
			`}`

			`// Compile-time check: HTTPTextModel implements inference.TextModel.`
			`var _ inference.TextModel = (*HTTPTextModel)(nil)`

			`// NewHTTPTextModel wraps an HTTPBackend as an inference.TextModel.`
			`func NewHTTPTextModel(backend HTTPBackend) HTTPTextModel {`
			`return &HTTPTextModel{http: backend}`
			`}`

			`// Generate sends a single prompt to the HTTP backend and yields the entire`
			`// response as a single Token.`
			`func (m *HTTPTextModel) Generate(ctx context.Context, prompt string, opts ...inference.GenerateOption) iter.Seq[inference.Token] {`
			`return func(yield func(inference.Token) bool) {`
			`cfg := inference.ApplyGenerateOpts(opts)`
			`genOpts := GenOpts{`
			`Temperature: float64(cfg.Temperature),`
			`MaxTokens: cfg.MaxTokens,`
			`Model: m.http.Model(),`
			`}`
			`result, err := m.http.Generate(ctx, prompt, genOpts)`
			`if err != nil {`
			`m.lastErr = err`
			`return`
			`}`
			`m.lastErr = nil`
refactor(http-textmodel): unwrap Result.Text from Backend calls Co-Authored-By: Virgil <virgil@lethean.io> 2026-02-22 17:37:56 +00:00			`yield(inference.Token{Text: result.Text})`
feat(backend): add HTTP/Llama TextModel wrappers + verify downstream Phase 1 Steps 1.4 and 1.5 complete: - HTTPTextModel wraps HTTPBackend as inference.TextModel (Generate/Chat yield entire response as single Token, Classify unsupported, BatchGenerate sequential) - LlamaTextModel embeds HTTPTextModel, overrides ModelType -> "llama" and Close -> llama.Stop() - 19 new tests (17 HTTPTextModel + 2 LlamaTextModel), all passing - Verified service.go and judge.go downstream consumers unchanged - Updated CLAUDE.md: backend_mlx.go DONE, architecture table current, critical context reflects Phase 1 complete - Updated TODO.md: Steps 1.4 and 1.5 marked done Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-20 01:23:34 +00:00			`}`
			`}`

			`// Chat sends a multi-turn conversation to the HTTP backend and yields the`
			`// entire response as a single Token.`
			`func (m *HTTPTextModel) Chat(ctx context.Context, messages []inference.Message, opts ...inference.GenerateOption) iter.Seq[inference.Token] {`
			`return func(yield func(inference.Token) bool) {`
			`cfg := inference.ApplyGenerateOpts(opts)`
			`genOpts := GenOpts{`
			`Temperature: float64(cfg.Temperature),`
			`MaxTokens: cfg.MaxTokens,`
			`Model: m.http.Model(),`
			`}`

feat: Phase 2 backend consolidation — Message alias, GenOpts, deprecation - Replace Message struct with type alias for inference.Message (backward compat) - Remove convertMessages() — types are now identical via alias - Extend GenOpts with TopK, TopP, RepeatPenalty (mapped in convertOpts) - Deprecate StreamingBackend with doc comment (only 2 callers, both in cli/) - Simplify HTTPTextModel.Chat() — pass messages directly - Update CLAUDE.md with Backend Architecture section - Add 2 new tests, remove 1 obsolete test Co-Authored-By: Virgil <virgil@lethean.io> 2026-02-20 02:05:59 +00:00			`// ml.Message is now a type alias for inference.Message — no conversion needed.`
			`result, err := m.http.Chat(ctx, messages, genOpts)`
feat(backend): add HTTP/Llama TextModel wrappers + verify downstream Phase 1 Steps 1.4 and 1.5 complete: - HTTPTextModel wraps HTTPBackend as inference.TextModel (Generate/Chat yield entire response as single Token, Classify unsupported, BatchGenerate sequential) - LlamaTextModel embeds HTTPTextModel, overrides ModelType -> "llama" and Close -> llama.Stop() - 19 new tests (17 HTTPTextModel + 2 LlamaTextModel), all passing - Verified service.go and judge.go downstream consumers unchanged - Updated CLAUDE.md: backend_mlx.go DONE, architecture table current, critical context reflects Phase 1 complete - Updated TODO.md: Steps 1.4 and 1.5 marked done Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-20 01:23:34 +00:00			`if err != nil {`
			`m.lastErr = err`
			`return`
			`}`
			`m.lastErr = nil`
refactor(http-textmodel): unwrap Result.Text from Backend calls Co-Authored-By: Virgil <virgil@lethean.io> 2026-02-22 17:37:56 +00:00			`yield(inference.Token{Text: result.Text})`
feat(backend): add HTTP/Llama TextModel wrappers + verify downstream Phase 1 Steps 1.4 and 1.5 complete: - HTTPTextModel wraps HTTPBackend as inference.TextModel (Generate/Chat yield entire response as single Token, Classify unsupported, BatchGenerate sequential) - LlamaTextModel embeds HTTPTextModel, overrides ModelType -> "llama" and Close -> llama.Stop() - 19 new tests (17 HTTPTextModel + 2 LlamaTextModel), all passing - Verified service.go and judge.go downstream consumers unchanged - Updated CLAUDE.md: backend_mlx.go DONE, architecture table current, critical context reflects Phase 1 complete - Updated TODO.md: Steps 1.4 and 1.5 marked done Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-20 01:23:34 +00:00			`}`
			`}`

			`// Classify is not supported by HTTP backends. Returns an error.`
			`func (m *HTTPTextModel) Classify(_ context.Context, _ []string, _ ...inference.GenerateOption) ([]inference.ClassifyResult, error) {`
chore: fmt.Errorf(static) → errors.New 2026-02-24 15:58:11 +00:00			`return nil, errors.New("classify not supported by HTTP backend")`
feat(backend): add HTTP/Llama TextModel wrappers + verify downstream Phase 1 Steps 1.4 and 1.5 complete: - HTTPTextModel wraps HTTPBackend as inference.TextModel (Generate/Chat yield entire response as single Token, Classify unsupported, BatchGenerate sequential) - LlamaTextModel embeds HTTPTextModel, overrides ModelType -> "llama" and Close -> llama.Stop() - 19 new tests (17 HTTPTextModel + 2 LlamaTextModel), all passing - Verified service.go and judge.go downstream consumers unchanged - Updated CLAUDE.md: backend_mlx.go DONE, architecture table current, critical context reflects Phase 1 complete - Updated TODO.md: Steps 1.4 and 1.5 marked done Co-Authored-By: Virgil <virgil@lethean.io> Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-20 01:23:34 +00:00			`}`

			`// BatchGenerate processes multiple prompts sequentially via Generate.`
			`func (m *HTTPTextModel) BatchGenerate(ctx context.Context, prompts []string, opts ...inference.GenerateOption) ([]inference.BatchResult, error) {`
			`results := make([]inference.BatchResult, len(prompts))`
			`for i, prompt := range prompts {`
			`var tokens []inference.Token`
			`for tok := range m.Generate(ctx, prompt, opts...) {`
			`tokens = append(tokens, tok)`
			`}`
			`results[i] = inference.BatchResult{`
			`Tokens: tokens,`
			`Err: m.lastErr,`
			`}`
			`}`
			`return results, nil`
			`}`

			`// ModelType returns the configured model name from the underlying HTTPBackend.`
			`func (m *HTTPTextModel) ModelType() string {`
			`model := m.http.Model()`
			`if model == "" {`
			`return "http"`
			`}`
			`return model`
			`}`

			`// Info returns minimal model metadata for an HTTP backend.`
			`func (m *HTTPTextModel) Info() inference.ModelInfo {`
			`return inference.ModelInfo{Architecture: "http"}`
			`}`

			`// Metrics returns zero metrics — HTTP backends don't track token-level performance.`
			`func (m *HTTPTextModel) Metrics() inference.GenerateMetrics {`
			`return inference.GenerateMetrics{}`
			`}`

			`// Err returns the error from the last Generate or Chat call, if any.`
			`func (m *HTTPTextModel) Err() error {`
			`return m.lastErr`
			`}`

			`// Close is a no-op for HTTP backends — there are no resources to release.`
			`func (m *HTTPTextModel) Close() error {`
			`return nil`
			`}`

			`// LlamaTextModel wraps a LlamaBackend as an inference.TextModel. It embeds`
			`// HTTPTextModel for Generate/Chat but overrides ModelType and Close to`
			`// reflect the managed llama-server process.`
			`type LlamaTextModel struct {`
			`*HTTPTextModel`
			`llama *LlamaBackend`
			`}`

			`// Compile-time check: LlamaTextModel implements inference.TextModel.`
			`var _ inference.TextModel = (*LlamaTextModel)(nil)`

			`// NewLlamaTextModel wraps a LlamaBackend as an inference.TextModel.`
			`func NewLlamaTextModel(backend LlamaBackend) LlamaTextModel {`
			`return &LlamaTextModel{`
			`HTTPTextModel: NewHTTPTextModel(backend.http),`
			`llama: backend,`
			`}`
			`}`

			`// ModelType returns "llama" to identify this as a managed llama-server backend.`
			`func (m *LlamaTextModel) ModelType() string {`
			`return "llama"`
			`}`

			`// Close stops the managed llama-server process.`
			`func (m *LlamaTextModel) Close() error {`
			`return m.llama.Stop()`
			`}`