go-ai/ml/backend_llama.go

package ml

import (
	"context"
	"fmt"
	"net/http"
	"time"

	"forge.lthn.ai/core/cli/pkg/log"
	"forge.lthn.ai/core/cli/pkg/process"
)

// LlamaBackend manages a llama-server process and delegates HTTP calls to it.
type LlamaBackend struct {
	processSvc *process.Service
	procID     string
	port       int
	http       *HTTPBackend
	modelPath  string
	loraPath   string
	llamaPath  string
}

// LlamaOpts configures the llama-server backend.
type LlamaOpts struct {
	// LlamaPath is the path to the llama-server binary.
	LlamaPath string
	// ModelPath is the path to the GGUF model file.
	ModelPath string
	// LoraPath is the optional path to a GGUF LoRA adapter file.
	LoraPath string
	// Port is the HTTP port for llama-server (default: 18090).
	Port int
}

// NewLlamaBackend creates a backend that manages a llama-server process.
// The process is not started until Start() is called.
func NewLlamaBackend(processSvc *process.Service, opts LlamaOpts) *LlamaBackend {
	if opts.Port == 0 {
		opts.Port = 18090
	}
	if opts.LlamaPath == "" {
		opts.LlamaPath = "llama-server"
	}

	baseURL := fmt.Sprintf("http://127.0.0.1:%d", opts.Port)
	return &LlamaBackend{
		processSvc: processSvc,
		port:       opts.Port,
		modelPath:  opts.ModelPath,
		loraPath:   opts.LoraPath,
		llamaPath:  opts.LlamaPath,
		http:       NewHTTPBackend(baseURL, ""),
	}
}

// Name returns "llama".
func (b *LlamaBackend) Name() string { return "llama" }

// Available checks if the llama-server is responding to health checks.
func (b *LlamaBackend) Available() bool {
	if b.procID == "" {
		return false
	}
	url := fmt.Sprintf("http://127.0.0.1:%d/health", b.port)
	client := &http.Client{Timeout: 2 * time.Second}
	resp, err := client.Get(url)
	if err != nil {
		return false
	}
	resp.Body.Close()
	return resp.StatusCode == http.StatusOK
}

// Start launches the llama-server process.
func (b *LlamaBackend) Start(ctx context.Context) error {
	args := []string{
		"-m", b.modelPath,
		"--port", fmt.Sprintf("%d", b.port),
		"--host", "127.0.0.1",
	}
	if b.loraPath != "" {
		args = append(args, "--lora", b.loraPath)
	}

	proc, err := b.processSvc.StartWithOptions(ctx, process.RunOptions{
		Command: b.llamaPath,
		Args:    args,
	})
	if err != nil {
		return log.E("ml.LlamaBackend.Start", "failed to start llama-server", err)
	}
	b.procID = proc.ID

	// Wait for health check (up to 30 seconds).
	deadline := time.Now().Add(30 * time.Second)
	for time.Now().Before(deadline) {
		if b.Available() {
			return nil
		}
		time.Sleep(500 * time.Millisecond)
	}

	return log.E("ml.LlamaBackend.Start", "llama-server did not become healthy within 30s", nil)
}

// Stop terminates the llama-server process.
func (b *LlamaBackend) Stop() error {
	if b.procID == "" {
		return nil
	}
	return b.processSvc.Kill(b.procID)
}

// Generate sends a prompt to the managed llama-server.
func (b *LlamaBackend) Generate(ctx context.Context, prompt string, opts GenOpts) (string, error) {
	if !b.Available() {
		return "", log.E("ml.LlamaBackend.Generate", "llama-server not available", nil)
	}
	return b.http.Generate(ctx, prompt, opts)
}

// Chat sends a conversation to the managed llama-server.
func (b *LlamaBackend) Chat(ctx context.Context, messages []Message, opts GenOpts) (string, error) {
	if !b.Available() {
		return "", log.E("ml.LlamaBackend.Chat", "llama-server not available", nil)
	}
	return b.http.Chat(ctx, messages, opts)
}
feat: extract AI/ML packages from core/go LEM scoring pipeline, native MLX Metal bindings, Claude SDK wrapper, RAG with Qdrant/Ollama, unified AI facade, and MCP protocol server. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-16 15:25:55 +00:00			`package ml`

			`import (`
			`"context"`
			`"fmt"`
			`"net/http"`
			`"time"`

test: validate MLX inference and scoring pipeline on M3 Ultra Fixes #2 - Run complete test suite: all 84 tests passing (100%) - Verify Metal 4 GPU support and hardware capabilities - Test scoring pipeline (heuristic + judge + engine) - Confirm GGUF model directory with 9 models (40.43 GB) - Document MLX backend build requirements - Update module imports from forge.lthn.ai/core/go to forge.lthn.ai/core/cli - Add comprehensive TEST-RESULTS.md with findings Platform: M3 Ultra (60 GPU cores, 96GB RAM, Metal 4) Results: All tests passing, scoring pipeline operational, MLX ready to build Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-02-16 17:24:36 +00:00			`"forge.lthn.ai/core/cli/pkg/log"`
			`"forge.lthn.ai/core/cli/pkg/process"`
feat: extract AI/ML packages from core/go LEM scoring pipeline, native MLX Metal bindings, Claude SDK wrapper, RAG with Qdrant/Ollama, unified AI facade, and MCP protocol server. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-02-16 15:25:55 +00:00			`)`

			`// LlamaBackend manages a llama-server process and delegates HTTP calls to it.`
			`type LlamaBackend struct {`
			`processSvc *process.Service`
			`procID string`
			`port int`
			`http *HTTPBackend`
			`modelPath string`
			`loraPath string`
			`llamaPath string`
			`}`

			`// LlamaOpts configures the llama-server backend.`
			`type LlamaOpts struct {`
			`// LlamaPath is the path to the llama-server binary.`
			`LlamaPath string`
			`// ModelPath is the path to the GGUF model file.`
			`ModelPath string`
			`// LoraPath is the optional path to a GGUF LoRA adapter file.`
			`LoraPath string`
			`// Port is the HTTP port for llama-server (default: 18090).`
			`Port int`
			`}`

			`// NewLlamaBackend creates a backend that manages a llama-server process.`
			`// The process is not started until Start() is called.`
			`func NewLlamaBackend(processSvc process.Service, opts LlamaOpts) LlamaBackend {`
			`if opts.Port == 0 {`
			`opts.Port = 18090`
			`}`
			`if opts.LlamaPath == "" {`
			`opts.LlamaPath = "llama-server"`
			`}`

			`baseURL := fmt.Sprintf("http://127.0.0.1:%d", opts.Port)`
			`return &LlamaBackend{`
			`processSvc: processSvc,`
			`port: opts.Port,`
			`modelPath: opts.ModelPath,`
			`loraPath: opts.LoraPath,`
			`llamaPath: opts.LlamaPath,`
			`http: NewHTTPBackend(baseURL, ""),`
			`}`
			`}`

			`// Name returns "llama".`
			`func (b *LlamaBackend) Name() string { return "llama" }`

			`// Available checks if the llama-server is responding to health checks.`
			`func (b *LlamaBackend) Available() bool {`
			`if b.procID == "" {`
			`return false`
			`}`
			`url := fmt.Sprintf("http://127.0.0.1:%d/health", b.port)`
			`client := &http.Client{Timeout: 2 * time.Second}`
			`resp, err := client.Get(url)`
			`if err != nil {`
			`return false`
			`}`
			`resp.Body.Close()`
			`return resp.StatusCode == http.StatusOK`
			`}`

			`// Start launches the llama-server process.`
			`func (b *LlamaBackend) Start(ctx context.Context) error {`
			`args := []string{`
			`"-m", b.modelPath,`
			`"--port", fmt.Sprintf("%d", b.port),`
			`"--host", "127.0.0.1",`
			`}`
			`if b.loraPath != "" {`
			`args = append(args, "--lora", b.loraPath)`
			`}`

			`proc, err := b.processSvc.StartWithOptions(ctx, process.RunOptions{`
			`Command: b.llamaPath,`
			`Args: args,`
			`})`
			`if err != nil {`
			`return log.E("ml.LlamaBackend.Start", "failed to start llama-server", err)`
			`}`
			`b.procID = proc.ID`

			`// Wait for health check (up to 30 seconds).`
			`deadline := time.Now().Add(30 * time.Second)`
			`for time.Now().Before(deadline) {`
			`if b.Available() {`
			`return nil`
			`}`
			`time.Sleep(500 * time.Millisecond)`
			`}`

			`return log.E("ml.LlamaBackend.Start", "llama-server did not become healthy within 30s", nil)`
			`}`

			`// Stop terminates the llama-server process.`
			`func (b *LlamaBackend) Stop() error {`
			`if b.procID == "" {`
			`return nil`
			`}`
			`return b.processSvc.Kill(b.procID)`
			`}`

			`// Generate sends a prompt to the managed llama-server.`
			`func (b *LlamaBackend) Generate(ctx context.Context, prompt string, opts GenOpts) (string, error) {`
			`if !b.Available() {`
			`return "", log.E("ml.LlamaBackend.Generate", "llama-server not available", nil)`
			`}`
			`return b.http.Generate(ctx, prompt, opts)`
			`}`

			`// Chat sends a conversation to the managed llama-server.`
			`func (b *LlamaBackend) Chat(ctx context.Context, messages []Message, opts GenOpts) (string, error) {`
			`if !b.Available() {`
			`return "", log.E("ml.LlamaBackend.Chat", "llama-server not available", nil)`
			`}`
			`return b.http.Chat(ctx, messages, opts)`
			`}`