go-ai/ml/backend_llama.go

130 lines
3.3 KiB
Go
Raw Normal View History

package ml
import (
"context"
"fmt"
"net/http"
"time"
"forge.lthn.ai/core/cli/pkg/log"
"forge.lthn.ai/core/cli/pkg/process"
)
// LlamaBackend manages a llama-server process and delegates HTTP calls to it.
type LlamaBackend struct {
processSvc *process.Service
procID string
port int
http *HTTPBackend
modelPath string
loraPath string
llamaPath string
}
// LlamaOpts configures the llama-server backend.
type LlamaOpts struct {
// LlamaPath is the path to the llama-server binary.
LlamaPath string
// ModelPath is the path to the GGUF model file.
ModelPath string
// LoraPath is the optional path to a GGUF LoRA adapter file.
LoraPath string
// Port is the HTTP port for llama-server (default: 18090).
Port int
}
// NewLlamaBackend creates a backend that manages a llama-server process.
// The process is not started until Start() is called.
func NewLlamaBackend(processSvc *process.Service, opts LlamaOpts) *LlamaBackend {
if opts.Port == 0 {
opts.Port = 18090
}
if opts.LlamaPath == "" {
opts.LlamaPath = "llama-server"
}
baseURL := fmt.Sprintf("http://127.0.0.1:%d", opts.Port)
return &LlamaBackend{
processSvc: processSvc,
port: opts.Port,
modelPath: opts.ModelPath,
loraPath: opts.LoraPath,
llamaPath: opts.LlamaPath,
http: NewHTTPBackend(baseURL, ""),
}
}
// Name returns "llama".
func (b *LlamaBackend) Name() string { return "llama" }
// Available checks if the llama-server is responding to health checks.
func (b *LlamaBackend) Available() bool {
if b.procID == "" {
return false
}
url := fmt.Sprintf("http://127.0.0.1:%d/health", b.port)
client := &http.Client{Timeout: 2 * time.Second}
resp, err := client.Get(url)
if err != nil {
return false
}
resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Start launches the llama-server process.
func (b *LlamaBackend) Start(ctx context.Context) error {
args := []string{
"-m", b.modelPath,
"--port", fmt.Sprintf("%d", b.port),
"--host", "127.0.0.1",
}
if b.loraPath != "" {
args = append(args, "--lora", b.loraPath)
}
proc, err := b.processSvc.StartWithOptions(ctx, process.RunOptions{
Command: b.llamaPath,
Args: args,
})
if err != nil {
return log.E("ml.LlamaBackend.Start", "failed to start llama-server", err)
}
b.procID = proc.ID
// Wait for health check (up to 30 seconds).
deadline := time.Now().Add(30 * time.Second)
for time.Now().Before(deadline) {
if b.Available() {
return nil
}
time.Sleep(500 * time.Millisecond)
}
return log.E("ml.LlamaBackend.Start", "llama-server did not become healthy within 30s", nil)
}
// Stop terminates the llama-server process.
func (b *LlamaBackend) Stop() error {
if b.procID == "" {
return nil
}
return b.processSvc.Kill(b.procID)
}
// Generate sends a prompt to the managed llama-server.
func (b *LlamaBackend) Generate(ctx context.Context, prompt string, opts GenOpts) (string, error) {
if !b.Available() {
return "", log.E("ml.LlamaBackend.Generate", "llama-server not available", nil)
}
return b.http.Generate(ctx, prompt, opts)
}
// Chat sends a conversation to the managed llama-server.
func (b *LlamaBackend) Chat(ctx context.Context, messages []Message, opts GenOpts) (string, error) {
if !b.Available() {
return "", log.E("ml.LlamaBackend.Chat", "llama-server not available", nil)
}
return b.http.Chat(ctx, messages, opts)
}