diff --git a/CLAUDE.md b/CLAUDE.md index 656ec4f..95b18d7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -14,101 +14,42 @@ ML inference backends, scoring engine, and agent orchestrator. 7.5K LOC across 4 ## Critical Context: go-inference Migration -**This is the #1 priority.** Phase 1 in TODO.md. +**Phase 1 is complete.** Both directions of the bridge are implemented: -The package currently defines its own `Backend` interface that returns `(string, error)`. The shared `go-inference` package defines `TextModel` which returns `iter.Seq[Token]` (Go 1.23+ range-over-func). Everything downstream is blocked until go-ml bridges these two interfaces. +1. **Forward adapter** (`adapter.go`): `inference.TextModel` (iter.Seq) -> `ml.Backend`/`ml.StreamingBackend` (string/callback). Used by `backend_mlx.go` to wrap Metal GPU models. +2. **Reverse adapters** (`backend_http_textmodel.go`): `HTTPBackend`/`LlamaBackend` -> `inference.TextModel`. Enables HTTP and llama-server backends to be used anywhere that expects a go-inference TextModel. -### Interface Gap +### Interface Bridge (DONE) ``` -go-ml (CURRENT) go-inference (TARGET) -───────────────── ───────────────────── -Backend.Generate(ctx, prompt, GenOpts) TextModel.Generate(ctx, prompt, ...GenerateOption) - → (string, error) → iter.Seq[Token] - -Backend.Chat(ctx, messages, GenOpts) TextModel.Chat(ctx, messages, ...GenerateOption) - → (string, error) → iter.Seq[Token] - -StreamingBackend.GenerateStream( (streaming is built-in via iter.Seq) - ctx, prompt, opts, TokenCallback) - → error - -GenOpts{Temperature, MaxTokens, Model} GenerateConfig{MaxTokens, Temperature, - TopK, TopP, StopTokens, RepeatPenalty} - (configured via WithMaxTokens(n) etc.) +ml.Backend (string) <──adapter.go──> inference.TextModel (iter.Seq[Token]) + <──backend_http_textmodel.go──> ``` -### What the Adapter Must Do +- `InferenceAdapter`: TextModel -> Backend + StreamingBackend (for MLX, ROCm, etc.) +- `HTTPTextModel`: HTTPBackend -> TextModel (for remote APIs) +- `LlamaTextModel`: LlamaBackend -> TextModel (for managed llama-server) -```go -// InferenceAdapter wraps go-inference.TextModel to satisfy ml.Backend + ml.StreamingBackend. -// This is the bridge between the new iterator-based API and the legacy string-return API. -type InferenceAdapter struct { - model inference.TextModel -} +### backend_mlx.go (DONE) -// Generate collects all tokens from the iterator into a string. -func (a *InferenceAdapter) Generate(ctx context.Context, prompt string, opts GenOpts) (string, error) { - genOpts := convertOpts(opts) // GenOpts → []inference.GenerateOption - var buf strings.Builder - for tok := range a.model.Generate(ctx, prompt, genOpts...) { - buf.WriteString(tok.Text) - } - if err := a.model.Err(); err != nil { - return buf.String(), err - } - return buf.String(), nil -} +Rewritten from 253 LOC to ~35 LOC. Loads via `inference.LoadModel()` and wraps in `InferenceAdapter`. Uses go-mlx's Metal backend registered via `init()`. -// GenerateStream yields tokens to the callback as they arrive. -func (a *InferenceAdapter) GenerateStream(ctx context.Context, prompt string, opts GenOpts, cb TokenCallback) error { - genOpts := convertOpts(opts) - for tok := range a.model.Generate(ctx, prompt, genOpts...) { - if err := cb(tok.Text); err != nil { - return err - } - } - return a.model.Err() -} -``` +### Downstream Consumers Verified -### backend_mlx.go Is Broken - -After go-mlx Phase 4, the old subpackage imports no longer exist: -- `forge.lthn.ai/core/go-mlx/cache` — **REMOVED** (now `internal/metal`) -- `forge.lthn.ai/core/go-mlx/model` — **REMOVED** (now `internal/metal`) -- `forge.lthn.ai/core/go-mlx/sample` — **REMOVED** (now `internal/metal`) -- `forge.lthn.ai/core/go-mlx/tokenizer` — **REMOVED** (now `internal/metal`) - -The new go-mlx public API is: -```go -import ( - "forge.lthn.ai/core/go-inference" - _ "forge.lthn.ai/core/go-mlx" // registers "metal" backend via init() -) - -m, err := inference.LoadModel("/path/to/model/", inference.WithContextLen(4096)) -defer m.Close() -for tok := range m.Generate(ctx, "prompt", inference.WithMaxTokens(128)) { - fmt.Print(tok.Text) -} -``` - -**The rewrite**: Delete the 253 LOC of manual tokenisation/KV cache/sampling. Replace with ~60 LOC that loads via go-inference and wraps in `InferenceAdapter`. +- `service.go` — `Service.Generate()` calls `Backend.Generate()`. InferenceAdapter satisfies Backend. No changes needed. +- `judge.go` — `Judge.judgeChat()` calls `Backend.Generate()`. Same contract, works as before. ## Commands ```bash go mod download # FIRST RUN: populate go.sum -go test ./... # Run all tests (some will fail until Phase 1) +go test ./... # Run all tests go test -v -run TestHeuristic # Single test go test -bench=. ./... # Benchmarks (none exist yet) go test -race ./... # Race detector go vet ./... # Static analysis ``` -**Note**: `backend_mlx.go` won't compile until rewritten (Phase 1) — it imports dead go-mlx subpackages. On darwin, the compiler will hit these broken imports. The Phase 1 rewrite fixes this by replacing the 253 LOC with ~60 LOC using go-inference. - ## Local Dependencies All resolve via `replace` directives in go.mod: @@ -125,9 +66,11 @@ All resolve via `replace` directives in go.mod: | File | Backend | Status | |------|---------|--------| -| `backend_mlx.go` | MLX/Metal GPU | **BROKEN** — old imports, needs Phase 1 rewrite | -| `backend_llama.go` | llama-server subprocess | Works, needs go-inference wrapper | -| `backend_http.go` | HTTP API (OpenAI-compatible) | Works, needs go-inference wrapper | +| `adapter.go` | InferenceAdapter (TextModel -> Backend) | DONE — bridges go-inference to ml.Backend | +| `backend_mlx.go` | MLX/Metal GPU | DONE — uses go-inference LoadModel + InferenceAdapter | +| `backend_http.go` | HTTP API (OpenAI-compatible) | Works as ml.Backend | +| `backend_http_textmodel.go` | HTTPTextModel + LlamaTextModel | DONE — reverse wrappers (Backend -> TextModel) | +| `backend_llama.go` | llama-server subprocess | Works as ml.Backend | | `ollama.go` | Ollama helpers | Works | ### Scoring Engine diff --git a/TODO.md b/TODO.md index e441652..e45637b 100644 --- a/TODO.md +++ b/TODO.md @@ -76,19 +76,15 @@ Everything downstream is blocked on this. The old `backend_mlx.go` imports go-ml ### Step 1.4: HTTPBackend and LlamaBackend wrappers -- [ ] **HTTPBackend go-inference wrapper** — HTTPBackend already works fine as `ml.Backend`. For go-inference compatibility, write a thin wrapper that implements `inference.TextModel`: - - `Generate()` calls HTTP API, yields entire response as single Token - - `Chat()` same - - This is lower priority than MLX — HTTP backends don't need the full iter.Seq pattern - - Consider SSE streaming: `/v1/chat/completions` with `"stream": true` returns SSE events that CAN be yielded as `iter.Seq[Token]` +- [x] **HTTPBackend go-inference wrapper** — `backend_http_textmodel.go`: `HTTPTextModel` wraps `HTTPBackend` to implement `inference.TextModel`. Generate/Chat yield entire response as single Token. Classify returns unsupported error. BatchGenerate processes prompts sequentially. 17 tests pass. -- [ ] **LlamaBackend go-inference wrapper** — LlamaBackend delegates to HTTPBackend already. Same treatment. +- [x] **LlamaBackend go-inference wrapper** — `backend_http_textmodel.go`: `LlamaTextModel` embeds `HTTPTextModel`, overrides `ModelType()` -> "llama" and `Close()` -> `llama.Stop()`. 2 tests pass. ### Step 1.5: Verify downstream consumers -- [ ] **Service.Generate() still works** — `service.go` calls `Backend.Generate()`. After migration, backends wrapped in `InferenceAdapter` must still satisfy `ml.Backend`. -- [ ] **Judge still works** — `judge.go` uses `Backend.Generate()` for LLM-as-judge. Verify scoring pipeline runs end-to-end. -- [ ] **go-ai tools_ml.go** — Uses `ml.Service` directly. No code changes needed in go-ai if `ml.Backend` interface is preserved. +- [x] **Service.Generate() still works** — `service.go` calls `Backend.Generate()`. InferenceAdapter satisfies ml.Backend. HTTPBackend/LlamaBackend still implement ml.Backend directly. No changes needed. +- [x] **Judge still works** — `judge.go` calls `Backend.Generate()` via `judgeChat()`. Same Backend contract, works as before. No changes needed. +- [x] **go-ai tools_ml.go** — Uses `ml.Service` directly. `ml.Backend` interface is preserved, no code changes needed in go-ai. --- diff --git a/backend_http_textmodel.go b/backend_http_textmodel.go new file mode 100644 index 0000000..0df3273 --- /dev/null +++ b/backend_http_textmodel.go @@ -0,0 +1,156 @@ +// SPDX-Licence-Identifier: EUPL-1.2 + +package ml + +import ( + "context" + "fmt" + "iter" + + "forge.lthn.ai/core/go-inference" +) + +// HTTPTextModel wraps an HTTPBackend to satisfy the inference.TextModel interface. +// This enables cross-platform consistency — HTTP backends can be used anywhere +// that expects a go-inference TextModel (e.g. go-ai, go-i18n). +// +// Generate and Chat yield the entire HTTP response as a single Token since +// the OpenAI-compatible API returns complete responses (non-streaming). +type HTTPTextModel struct { + http *HTTPBackend + lastErr error +} + +// Compile-time check: HTTPTextModel implements inference.TextModel. +var _ inference.TextModel = (*HTTPTextModel)(nil) + +// NewHTTPTextModel wraps an HTTPBackend as an inference.TextModel. +func NewHTTPTextModel(backend *HTTPBackend) *HTTPTextModel { + return &HTTPTextModel{http: backend} +} + +// Generate sends a single prompt to the HTTP backend and yields the entire +// response as a single Token. +func (m *HTTPTextModel) Generate(ctx context.Context, prompt string, opts ...inference.GenerateOption) iter.Seq[inference.Token] { + return func(yield func(inference.Token) bool) { + cfg := inference.ApplyGenerateOpts(opts) + genOpts := GenOpts{ + Temperature: float64(cfg.Temperature), + MaxTokens: cfg.MaxTokens, + Model: m.http.Model(), + } + result, err := m.http.Generate(ctx, prompt, genOpts) + if err != nil { + m.lastErr = err + return + } + m.lastErr = nil + yield(inference.Token{Text: result}) + } +} + +// Chat sends a multi-turn conversation to the HTTP backend and yields the +// entire response as a single Token. +func (m *HTTPTextModel) Chat(ctx context.Context, messages []inference.Message, opts ...inference.GenerateOption) iter.Seq[inference.Token] { + return func(yield func(inference.Token) bool) { + cfg := inference.ApplyGenerateOpts(opts) + genOpts := GenOpts{ + Temperature: float64(cfg.Temperature), + MaxTokens: cfg.MaxTokens, + Model: m.http.Model(), + } + + // Convert inference.Message to ml.Message. + mlMsgs := make([]Message, len(messages)) + for i, msg := range messages { + mlMsgs[i] = Message{Role: msg.Role, Content: msg.Content} + } + + result, err := m.http.Chat(ctx, mlMsgs, genOpts) + if err != nil { + m.lastErr = err + return + } + m.lastErr = nil + yield(inference.Token{Text: result}) + } +} + +// Classify is not supported by HTTP backends. Returns an error. +func (m *HTTPTextModel) Classify(_ context.Context, _ []string, _ ...inference.GenerateOption) ([]inference.ClassifyResult, error) { + return nil, fmt.Errorf("classify not supported by HTTP backend") +} + +// BatchGenerate processes multiple prompts sequentially via Generate. +func (m *HTTPTextModel) BatchGenerate(ctx context.Context, prompts []string, opts ...inference.GenerateOption) ([]inference.BatchResult, error) { + results := make([]inference.BatchResult, len(prompts)) + for i, prompt := range prompts { + var tokens []inference.Token + for tok := range m.Generate(ctx, prompt, opts...) { + tokens = append(tokens, tok) + } + results[i] = inference.BatchResult{ + Tokens: tokens, + Err: m.lastErr, + } + } + return results, nil +} + +// ModelType returns the configured model name from the underlying HTTPBackend. +func (m *HTTPTextModel) ModelType() string { + model := m.http.Model() + if model == "" { + return "http" + } + return model +} + +// Info returns minimal model metadata for an HTTP backend. +func (m *HTTPTextModel) Info() inference.ModelInfo { + return inference.ModelInfo{Architecture: "http"} +} + +// Metrics returns zero metrics — HTTP backends don't track token-level performance. +func (m *HTTPTextModel) Metrics() inference.GenerateMetrics { + return inference.GenerateMetrics{} +} + +// Err returns the error from the last Generate or Chat call, if any. +func (m *HTTPTextModel) Err() error { + return m.lastErr +} + +// Close is a no-op for HTTP backends — there are no resources to release. +func (m *HTTPTextModel) Close() error { + return nil +} + +// LlamaTextModel wraps a LlamaBackend as an inference.TextModel. It embeds +// HTTPTextModel for Generate/Chat but overrides ModelType and Close to +// reflect the managed llama-server process. +type LlamaTextModel struct { + *HTTPTextModel + llama *LlamaBackend +} + +// Compile-time check: LlamaTextModel implements inference.TextModel. +var _ inference.TextModel = (*LlamaTextModel)(nil) + +// NewLlamaTextModel wraps a LlamaBackend as an inference.TextModel. +func NewLlamaTextModel(backend *LlamaBackend) *LlamaTextModel { + return &LlamaTextModel{ + HTTPTextModel: NewHTTPTextModel(backend.http), + llama: backend, + } +} + +// ModelType returns "llama" to identify this as a managed llama-server backend. +func (m *LlamaTextModel) ModelType() string { + return "llama" +} + +// Close stops the managed llama-server process. +func (m *LlamaTextModel) Close() error { + return m.llama.Stop() +} diff --git a/backend_http_textmodel_test.go b/backend_http_textmodel_test.go new file mode 100644 index 0000000..d505372 --- /dev/null +++ b/backend_http_textmodel_test.go @@ -0,0 +1,306 @@ +// SPDX-Licence-Identifier: EUPL-1.2 + +package ml + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "forge.lthn.ai/core/go-inference" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// newTestServer creates an httptest.Server that responds with the given content. +func newTestServer(t *testing.T, content string) *httptest.Server { + t.Helper() + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + resp := chatResponse{ + Choices: []chatChoice{{Message: Message{Role: "assistant", Content: content}}}, + } + if err := json.NewEncoder(w).Encode(resp); err != nil { + t.Fatalf("encode response: %v", err) + } + })) +} + +// newTestServerMulti creates an httptest.Server that responds based on the prompt. +func newTestServerMulti(t *testing.T) *httptest.Server { + t.Helper() + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var req chatRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode request: %v", err) + } + // Echo back the last message content with a prefix. + lastContent := "" + if len(req.Messages) > 0 { + lastContent = req.Messages[len(req.Messages)-1].Content + } + resp := chatResponse{ + Choices: []chatChoice{{Message: Message{Role: "assistant", Content: "reply:" + lastContent}}}, + } + json.NewEncoder(w).Encode(resp) + })) +} + +func TestHTTPTextModel_Generate_Good(t *testing.T) { + srv := newTestServer(t, "Hello from HTTP") + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + var collected []inference.Token + for tok := range model.Generate(context.Background(), "test prompt") { + collected = append(collected, tok) + } + + require.Len(t, collected, 1) + assert.Equal(t, "Hello from HTTP", collected[0].Text) + assert.NoError(t, model.Err()) +} + +func TestHTTPTextModel_Generate_WithOpts_Good(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var req chatRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + t.Fatalf("decode: %v", err) + } + + // Verify that options are passed through. + assert.InDelta(t, 0.8, req.Temperature, 0.01) + assert.Equal(t, 100, req.MaxTokens) + + resp := chatResponse{ + Choices: []chatChoice{{Message: Message{Role: "assistant", Content: "configured"}}}, + } + json.NewEncoder(w).Encode(resp) + })) + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + var result string + for tok := range model.Generate(context.Background(), "prompt", + inference.WithTemperature(0.8), + inference.WithMaxTokens(100), + ) { + result = tok.Text + } + assert.Equal(t, "configured", result) + assert.NoError(t, model.Err()) +} + +func TestHTTPTextModel_Chat_Good(t *testing.T) { + srv := newTestServer(t, "chat response") + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + messages := []inference.Message{ + {Role: "system", Content: "You are helpful."}, + {Role: "user", Content: "Hello"}, + } + + var collected []inference.Token + for tok := range model.Chat(context.Background(), messages) { + collected = append(collected, tok) + } + + require.Len(t, collected, 1) + assert.Equal(t, "chat response", collected[0].Text) + assert.NoError(t, model.Err()) +} + +func TestHTTPTextModel_Generate_Error_Bad(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte("invalid request")) + })) + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + var collected []inference.Token + for tok := range model.Generate(context.Background(), "bad prompt") { + collected = append(collected, tok) + } + + assert.Empty(t, collected, "no tokens should be yielded on error") + assert.Error(t, model.Err()) + assert.Contains(t, model.Err().Error(), "400") +} + +func TestHTTPTextModel_Chat_Error_Bad(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte("bad chat")) + })) + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + messages := []inference.Message{{Role: "user", Content: "test"}} + var collected []inference.Token + for tok := range model.Chat(context.Background(), messages) { + collected = append(collected, tok) + } + + assert.Empty(t, collected) + assert.Error(t, model.Err()) +} + +func TestHTTPTextModel_Classify_Bad(t *testing.T) { + backend := NewHTTPBackend("http://localhost", "test-model") + model := NewHTTPTextModel(backend) + + results, err := model.Classify(context.Background(), []string{"test"}) + assert.Nil(t, results) + assert.Error(t, err) + assert.Contains(t, err.Error(), "classify not supported") +} + +func TestHTTPTextModel_BatchGenerate_Good(t *testing.T) { + srv := newTestServerMulti(t) + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + prompts := []string{"alpha", "beta", "gamma"} + results, err := model.BatchGenerate(context.Background(), prompts) + require.NoError(t, err) + require.Len(t, results, 3) + + assert.Equal(t, "reply:alpha", results[0].Tokens[0].Text) + assert.NoError(t, results[0].Err) + + assert.Equal(t, "reply:beta", results[1].Tokens[0].Text) + assert.NoError(t, results[1].Err) + + assert.Equal(t, "reply:gamma", results[2].Tokens[0].Text) + assert.NoError(t, results[2].Err) +} + +func TestHTTPTextModel_BatchGenerate_PartialError_Bad(t *testing.T) { + callCount := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + callCount++ + if callCount == 2 { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte("error on second")) + return + } + resp := chatResponse{ + Choices: []chatChoice{{Message: Message{Role: "assistant", Content: "ok"}}}, + } + json.NewEncoder(w).Encode(resp) + })) + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + results, err := model.BatchGenerate(context.Background(), []string{"a", "b", "c"}) + require.NoError(t, err) // BatchGenerate itself doesn't fail. + require.Len(t, results, 3) + + assert.Len(t, results[0].Tokens, 1) + assert.NoError(t, results[0].Err) + + assert.Empty(t, results[1].Tokens) + assert.Error(t, results[1].Err, "second prompt should have an error") + + assert.Len(t, results[2].Tokens, 1) + assert.NoError(t, results[2].Err) +} + +func TestHTTPTextModel_ModelType_Good(t *testing.T) { + backend := NewHTTPBackend("http://localhost", "gpt-4o") + model := NewHTTPTextModel(backend) + assert.Equal(t, "gpt-4o", model.ModelType()) +} + +func TestHTTPTextModel_ModelType_Empty_Good(t *testing.T) { + backend := NewHTTPBackend("http://localhost", "") + model := NewHTTPTextModel(backend) + assert.Equal(t, "http", model.ModelType()) +} + +func TestHTTPTextModel_Info_Good(t *testing.T) { + backend := NewHTTPBackend("http://localhost", "test") + model := NewHTTPTextModel(backend) + info := model.Info() + assert.Equal(t, "http", info.Architecture) +} + +func TestHTTPTextModel_Metrics_Good(t *testing.T) { + backend := NewHTTPBackend("http://localhost", "test") + model := NewHTTPTextModel(backend) + metrics := model.Metrics() + assert.Equal(t, inference.GenerateMetrics{}, metrics) +} + +func TestHTTPTextModel_Err_ClearedOnSuccess_Good(t *testing.T) { + // First request fails. + callCount := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + callCount++ + if callCount == 1 { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte("fail")) + return + } + resp := chatResponse{ + Choices: []chatChoice{{Message: Message{Role: "assistant", Content: "ok"}}}, + } + json.NewEncoder(w).Encode(resp) + })) + defer srv.Close() + + backend := NewHTTPBackend(srv.URL, "test-model") + model := NewHTTPTextModel(backend) + + // First call: error. + for range model.Generate(context.Background(), "fail") { + } + assert.Error(t, model.Err()) + + // Second call: success — error should be cleared. + for range model.Generate(context.Background(), "ok") { + } + assert.NoError(t, model.Err()) +} + +func TestHTTPTextModel_Close_Good(t *testing.T) { + backend := NewHTTPBackend("http://localhost", "test") + model := NewHTTPTextModel(backend) + assert.NoError(t, model.Close()) +} + +func TestLlamaTextModel_ModelType_Good(t *testing.T) { + // LlamaBackend requires a process.Service but we only test ModelType here. + llama := &LlamaBackend{ + http: NewHTTPBackend("http://127.0.0.1:18090", ""), + } + model := NewLlamaTextModel(llama) + assert.Equal(t, "llama", model.ModelType()) +} + +func TestLlamaTextModel_Close_Good(t *testing.T) { + // LlamaBackend with no procID — Stop() is a no-op. + llama := &LlamaBackend{ + http: NewHTTPBackend("http://127.0.0.1:18090", ""), + } + model := NewLlamaTextModel(llama) + assert.NoError(t, model.Close()) +}