From 70dd18c0652b75dda585325c6fc99ee49a617ac7 Mon Sep 17 00:00:00 2001
From: Claude <developers@lethean.io>
Date: Sun, 15 Feb 2026 16:30:09 +0000
Subject: [PATCH] refactor: move Go library to pkg/lem, thin main.go
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All scoring/influx/export/expand logic moves to pkg/lem as an
importable package. main.go is now a thin CLI dispatcher.

This lets new commands import the shared library directly —
ready for converting Python scripts to Go subcommands.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .gitignore                                    |  2 +-
 main.go                                       | 69 ++++++++-----------
 client.go => pkg/lem/client.go                |  6 +-
 client_test.go => pkg/lem/client_test.go      |  2 +-
 compare.go => pkg/lem/compare.go              |  6 +-
 compare_test.go => pkg/lem/compare_test.go    |  4 +-
 db.go => pkg/lem/db.go                        |  2 +-
 db_test.go => pkg/lem/db_test.go              |  2 +-
 engine.go => pkg/lem/engine.go                |  8 +--
 engine_test.go => pkg/lem/engine_test.go      |  2 +-
 exact.go => pkg/lem/exact.go                  |  2 +-
 exact_test.go => pkg/lem/exact_test.go        |  2 +-
 expand.go => pkg/lem/expand.go                |  8 +--
 expand_test.go => pkg/lem/expand_test.go      | 24 +++----
 export.go => pkg/lem/export.go                |  6 +-
 export_test.go => pkg/lem/export_test.go      |  4 +-
 heuristic.go => pkg/lem/heuristic.go          |  2 +-
 .../lem/heuristic_test.go                     |  2 +-
 influx.go => pkg/lem/influx.go                |  2 +-
 influx_test.go => pkg/lem/influx_test.go      |  2 +-
 io.go => pkg/lem/io.go                        | 18 ++---
 io_test.go => pkg/lem/io_test.go              | 16 ++---
 judge.go => pkg/lem/judge.go                  |  2 +-
 judge_test.go => pkg/lem/judge_test.go        |  2 +-
 probe.go => pkg/lem/probe.go                  | 10 +--
 probe_test.go => pkg/lem/probe_test.go        |  2 +-
 prompts.go => pkg/lem/prompts.go              |  6 +-
 status.go => pkg/lem/status.go                |  4 +-
 status_test.go => pkg/lem/status_test.go      |  2 +-
 types.go => pkg/lem/types.go                  |  2 +-
 30 files changed, 105 insertions(+), 116 deletions(-)
 rename client.go => pkg/lem/client.go (98%)
 rename client_test.go => pkg/lem/client_test.go (99%)
 rename compare.go => pkg/lem/compare.go (94%)
 rename compare_test.go => pkg/lem/compare_test.go (99%)
 rename db.go => pkg/lem/db.go (99%)
 rename db_test.go => pkg/lem/db_test.go (99%)
 rename engine.go => pkg/lem/engine.go (97%)
 rename engine_test.go => pkg/lem/engine_test.go (99%)
 rename exact.go => pkg/lem/exact.go (99%)
 rename exact_test.go => pkg/lem/exact_test.go (99%)
 rename expand.go => pkg/lem/expand.go (98%)
 rename expand_test.go => pkg/lem/expand_test.go (99%)
 rename export.go => pkg/lem/export.go (98%)
 rename export_test.go => pkg/lem/export_test.go (99%)
 rename heuristic.go => pkg/lem/heuristic.go (99%)
 rename heuristic_test.go => pkg/lem/heuristic_test.go (99%)
 rename influx.go => pkg/lem/influx.go (99%)
 rename influx_test.go => pkg/lem/influx_test.go (99%)
 rename io.go => pkg/lem/io.go (88%)
 rename io_test.go => pkg/lem/io_test.go (95%)
 rename judge.go => pkg/lem/judge.go (99%)
 rename judge_test.go => pkg/lem/judge_test.go (99%)
 rename probe.go => pkg/lem/probe.go (92%)
 rename probe_test.go => pkg/lem/probe_test.go (99%)
 rename prompts.go => pkg/lem/prompts.go (98%)
 rename status.go => pkg/lem/status.go (99%)
 rename status_test.go => pkg/lem/status_test.go (99%)
 rename types.go => pkg/lem/types.go (99%)

diff --git a/.gitignore b/.gitignore
index 9740640..b6ee8bb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,4 @@ worker/output/
 training/parquet/
 
 # Go binary
-lem
+/lem
diff --git a/main.go b/main.go
index 334d2a5..6e89d33 100644
--- a/main.go
+++ b/main.go
@@ -6,6 +6,8 @@ import (
 	"log"
 	"os"
 	"time"
+
+	"forge.lthn.ai/lthn/lem/pkg/lem"
 )
 
 const usage = `Usage: lem <command> [flags]
@@ -17,8 +19,6 @@ Commands:
   status    Show training and generation progress (InfluxDB + DuckDB)
   export    Export golden set to training-format JSONL splits
   expand    Generate expansion responses via trained LEM model
-
-Set LEM_DB env to default DuckDB path for all commands.
 `
 
 func main() {
@@ -35,11 +35,11 @@ func main() {
 	case "compare":
 		runCompare(os.Args[2:])
 	case "status":
-		runStatus(os.Args[2:])
+		lem.RunStatus(os.Args[2:])
 	case "expand":
-		runExpand(os.Args[2:])
+		lem.RunExpand(os.Args[2:])
 	case "export":
-		runExport(os.Args[2:])
+		lem.RunExport(os.Args[2:])
 	default:
 		fmt.Fprintf(os.Stderr, "unknown command: %s\n\n%s", os.Args[1], usage)
 		os.Exit(1)
@@ -67,22 +67,19 @@ func runScore(args []string) {
 		os.Exit(1)
 	}
 
-	// Read responses.
-	responses, err := readResponses(*input)
+	responses, err := lem.ReadResponses(*input)
 	if err != nil {
 		log.Fatalf("read responses: %v", err)
 	}
 	log.Printf("loaded %d responses from %s", len(responses), *input)
 
-	// If resume, load existing scores and filter out already-scored IDs.
 	if *resume {
 		if _, statErr := os.Stat(*output); statErr == nil {
-			existing, readErr := readScorerOutput(*output)
+			existing, readErr := lem.ReadScorerOutput(*output)
 			if readErr != nil {
 				log.Fatalf("read existing scores for resume: %v", readErr)
 			}
 
-			// Build set of already-scored IDs.
 			scored := make(map[string]bool)
 			for _, scores := range existing.PerPrompt {
 				for _, ps := range scores {
@@ -90,8 +87,7 @@ func runScore(args []string) {
 				}
 			}
 
-			// Filter out already-scored responses.
-			var filtered []Response
+			var filtered []lem.Response
 			for _, r := range responses {
 				if !scored[r.ID] {
 					filtered = append(filtered, r)
@@ -108,32 +104,28 @@ func runScore(args []string) {
 		}
 	}
 
-	// Create client, judge, engine.
-	client := NewClient(*judgeURL, *judgeModel)
-	client.maxTokens = 512
-	judge := NewJudge(client)
-	engine := NewEngine(judge, *concurrency, *suites)
+	client := lem.NewClient(*judgeURL, *judgeModel)
+	client.MaxTokens = 512
+	judge := lem.NewJudge(client)
+	engine := lem.NewEngine(judge, *concurrency, *suites)
 
 	log.Printf("scoring with %s", engine)
 
-	// Score all responses.
 	perPrompt := engine.ScoreAll(responses)
 
-	// If resuming, merge with existing scores.
 	if *resume {
 		if _, statErr := os.Stat(*output); statErr == nil {
-			existing, _ := readScorerOutput(*output)
+			existing, _ := lem.ReadScorerOutput(*output)
 			for model, scores := range existing.PerPrompt {
 				perPrompt[model] = append(scores, perPrompt[model]...)
 			}
 		}
 	}
 
-	// Compute averages and write output.
-	averages := computeAverages(perPrompt)
+	averages := lem.ComputeAverages(perPrompt)
 
-	scorerOutput := &ScorerOutput{
-		Metadata: Metadata{
+	scorerOutput := &lem.ScorerOutput{
+		Metadata: lem.Metadata{
 			JudgeModel:    *judgeModel,
 			JudgeURL:      *judgeURL,
 			ScoredAt:      time.Now().UTC(),
@@ -144,7 +136,7 @@ func runScore(args []string) {
 		PerPrompt:     perPrompt,
 	}
 
-	if err := writeScores(*output, scorerOutput); err != nil {
+	if err := lem.WriteScores(*output, scorerOutput); err != nil {
 		log.Fatalf("write scores: %v", err)
 	}
 
@@ -173,26 +165,23 @@ func runProbe(args []string) {
 		os.Exit(1)
 	}
 
-	// Default target URL to judge URL.
 	if *targetURL == "" {
 		*targetURL = *judgeURL
 	}
 
-	// Create clients.
-	targetClient := NewClient(*targetURL, *model)
-	targetClient.maxTokens = 1024 // Limit probe response length.
-	judgeClient := NewClient(*judgeURL, *judgeModel)
-	judgeClient.maxTokens = 512 // Judge responses are structured JSON.
-	judge := NewJudge(judgeClient)
-	engine := NewEngine(judge, *concurrency, *suites)
-	prober := NewProber(targetClient, engine)
+	targetClient := lem.NewClient(*targetURL, *model)
+	targetClient.MaxTokens = 1024
+	judgeClient := lem.NewClient(*judgeURL, *judgeModel)
+	judgeClient.MaxTokens = 512
+	judge := lem.NewJudge(judgeClient)
+	engine := lem.NewEngine(judge, *concurrency, *suites)
+	prober := lem.NewProber(targetClient, engine)
 
-	var scorerOutput *ScorerOutput
+	var scorerOutput *lem.ScorerOutput
 	var err error
 
 	if *probesFile != "" {
-		// Read custom probes.
-		probes, readErr := readResponses(*probesFile)
+		probes, readErr := lem.ReadResponses(*probesFile)
 		if readErr != nil {
 			log.Fatalf("read probes: %v", readErr)
 		}
@@ -200,7 +189,7 @@ func runProbe(args []string) {
 
 		scorerOutput, err = prober.ProbeModel(probes, *model)
 	} else {
-		log.Printf("using %d built-in content probes", len(contentProbes))
+		log.Printf("using %d built-in content probes", len(lem.ContentProbes))
 		scorerOutput, err = prober.ProbeContent(*model)
 	}
 
@@ -208,7 +197,7 @@ func runProbe(args []string) {
 		log.Fatalf("probe: %v", err)
 	}
 
-	if writeErr := writeScores(*output, scorerOutput); writeErr != nil {
+	if writeErr := lem.WriteScores(*output, scorerOutput); writeErr != nil {
 		log.Fatalf("write scores: %v", writeErr)
 	}
 
@@ -231,7 +220,7 @@ func runCompare(args []string) {
 		os.Exit(1)
 	}
 
-	if err := RunCompare(*oldFile, *newFile); err != nil {
+	if err := lem.RunCompare(*oldFile, *newFile); err != nil {
 		log.Fatalf("compare: %v", err)
 	}
 }
diff --git a/client.go b/pkg/lem/client.go
similarity index 98%
rename from client.go
rename to pkg/lem/client.go
index 0ec3d50..a9464ab 100644
--- a/client.go
+++ b/pkg/lem/client.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"bytes"
@@ -46,7 +46,7 @@ func (e *retryableError) Unwrap() error { return e.err }
 type Client struct {
 	baseURL    string
 	model      string
-	maxTokens  int
+	MaxTokens  int
 	httpClient *http.Client
 }
 
@@ -77,7 +77,7 @@ func (c *Client) ChatWithTemp(prompt string, temp float64) (string, error) {
 			{Role: "user", Content: prompt},
 		},
 		Temperature: temp,
-		MaxTokens:   c.maxTokens,
+		MaxTokens:   c.MaxTokens,
 	}
 
 	body, err := json.Marshal(req)
diff --git a/client_test.go b/pkg/lem/client_test.go
similarity index 99%
rename from client_test.go
rename to pkg/lem/client_test.go
index 85df790..7e81bbd 100644
--- a/client_test.go
+++ b/pkg/lem/client_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
diff --git a/compare.go b/pkg/lem/compare.go
similarity index 94%
rename from compare.go
rename to pkg/lem/compare.go
index ab3a876..3ad1b92 100644
--- a/compare.go
+++ b/pkg/lem/compare.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"fmt"
@@ -8,12 +8,12 @@ import (
 // RunCompare reads two score files and prints a comparison table for each
 // model showing Old, New, and Delta values for every metric.
 func RunCompare(oldPath, newPath string) error {
-	oldOutput, err := readScorerOutput(oldPath)
+	oldOutput, err := ReadScorerOutput(oldPath)
 	if err != nil {
 		return fmt.Errorf("read old file: %w", err)
 	}
 
-	newOutput, err := readScorerOutput(newPath)
+	newOutput, err := ReadScorerOutput(newPath)
 	if err != nil {
 		return fmt.Errorf("read new file: %w", err)
 	}
diff --git a/compare_test.go b/pkg/lem/compare_test.go
similarity index 99%
rename from compare_test.go
rename to pkg/lem/compare_test.go
index e4b921c..efc3b3e 100644
--- a/compare_test.go
+++ b/pkg/lem/compare_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
@@ -208,7 +208,7 @@ func TestReadScorerOutput(t *testing.T) {
 
 	path := writeTestScoreFile(t, dir, "test.json", output)
 
-	read, err := readScorerOutput(path)
+	read, err := ReadScorerOutput(path)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
diff --git a/db.go b/pkg/lem/db.go
similarity index 99%
rename from db.go
rename to pkg/lem/db.go
index 3b7a5b5..52107c7 100644
--- a/db.go
+++ b/pkg/lem/db.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"database/sql"
diff --git a/db_test.go b/pkg/lem/db_test.go
similarity index 99%
rename from db_test.go
rename to pkg/lem/db_test.go
index f3dc816..7456e3f 100644
--- a/db_test.go
+++ b/pkg/lem/db_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"os"
diff --git a/engine.go b/pkg/lem/engine.go
similarity index 97%
rename from engine.go
rename to pkg/lem/engine.go
index 383517e..be4ad45 100644
--- a/engine.go
+++ b/pkg/lem/engine.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"fmt"
@@ -102,9 +102,9 @@ func (e *Engine) ScoreAll(responses []Response) map[string][]PromptScore {
 
 				// Find the matching content probe.
 				var probe *ContentProbe
-				for idx := range contentProbes {
-					if contentProbes[idx].ID == r.ID {
-						probe = &contentProbes[idx]
+				for idx := range ContentProbes {
+					if ContentProbes[idx].ID == r.ID {
+						probe = &ContentProbes[idx]
 						break
 					}
 				}
diff --git a/engine_test.go b/pkg/lem/engine_test.go
similarity index 99%
rename from engine_test.go
rename to pkg/lem/engine_test.go
index 148dc39..ca9f032 100644
--- a/engine_test.go
+++ b/pkg/lem/engine_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
diff --git a/exact.go b/pkg/lem/exact.go
similarity index 99%
rename from exact.go
rename to pkg/lem/exact.go
index a609dbb..51e413b 100644
--- a/exact.go
+++ b/pkg/lem/exact.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"math"
diff --git a/exact_test.go b/pkg/lem/exact_test.go
similarity index 99%
rename from exact_test.go
rename to pkg/lem/exact_test.go
index e9d2b8d..0234423 100644
--- a/exact_test.go
+++ b/pkg/lem/exact_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import "testing"
 
diff --git a/expand.go b/pkg/lem/expand.go
similarity index 98%
rename from expand.go
rename to pkg/lem/expand.go
index 6b80582..c915637 100644
--- a/expand.go
+++ b/pkg/lem/expand.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
@@ -23,7 +23,7 @@ type expandOutput struct {
 }
 
 // runExpand parses CLI flags and runs the expand command.
-func runExpand(args []string) {
+func RunExpand(args []string) {
 	fs := flag.NewFlagSet("expand", flag.ExitOnError)
 
 	model := fs.String("model", "", "Model name for generation (required)")
@@ -98,7 +98,7 @@ func runExpand(args []string) {
 		}
 	} else {
 		var err error
-		promptList, err = readResponses(*prompts)
+		promptList, err = ReadResponses(*prompts)
 		if err != nil {
 			log.Fatalf("read prompts: %v", err)
 		}
@@ -107,7 +107,7 @@ func runExpand(args []string) {
 
 	// Create clients.
 	client := NewClient(*apiURL, *model)
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 	influx := NewInfluxClient(*influxURL, *influxDB)
 
 	if err := expandPrompts(client, influx, duckDB, promptList, *model, *worker, *output, *dryRun, *limit); err != nil {
diff --git a/expand_test.go b/pkg/lem/expand_test.go
similarity index 99%
rename from expand_test.go
rename to pkg/lem/expand_test.go
index d115e0e..33a50dc 100644
--- a/expand_test.go
+++ b/pkg/lem/expand_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"bufio"
@@ -161,7 +161,7 @@ func TestExpandPromptsBasic(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(server.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -240,7 +240,7 @@ func TestExpandPromptsSkipsCompleted(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -406,7 +406,7 @@ func TestExpandPromptsAPIErrorSkipsPrompt(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -475,7 +475,7 @@ func TestExpandPromptsInfluxWriteErrorNonFatal(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -524,7 +524,7 @@ func TestExpandPromptsOutputJSONLStructure(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -603,7 +603,7 @@ func TestExpandPromptsInfluxLineProtocol(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -666,7 +666,7 @@ func TestExpandPromptsAppendMode(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 	outputFile := filepath.Join(outputDir, "expand-test-worker.jsonl")
@@ -743,7 +743,7 @@ func TestExpandPromptsLimit(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -824,7 +824,7 @@ func TestExpandPromptsLimitAfterFiltering(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -901,7 +901,7 @@ func TestExpandPromptsLimitZeroMeansAll(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
@@ -951,7 +951,7 @@ func TestExpandPromptsOutputHasCharsField(t *testing.T) {
 	t.Setenv("INFLUX_TOKEN", "test-token")
 	influx := NewInfluxClient(influxServer.URL, "training")
 	client := NewClient(apiServer.URL, "test-model")
-	client.maxTokens = 2048
+	client.MaxTokens = 2048
 
 	outputDir := t.TempDir()
 
diff --git a/export.go b/pkg/lem/export.go
similarity index 98%
rename from export.go
rename to pkg/lem/export.go
index f0a0c17..3ad1ab3 100644
--- a/export.go
+++ b/pkg/lem/export.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"bufio"
@@ -23,7 +23,7 @@ type TrainingExample struct {
 }
 
 // runExport is the CLI entry point for the export command.
-func runExport(args []string) {
+func RunExport(args []string) {
 	fs := flag.NewFlagSet("export", flag.ExitOnError)
 
 	dbPath := fs.String("db", "", "DuckDB database path (primary source)")
@@ -90,7 +90,7 @@ func runExport(args []string) {
 	} else {
 		// Fallback: read from JSONL file.
 		var err error
-		responses, err = readResponses(*input)
+		responses, err = ReadResponses(*input)
 		if err != nil {
 			log.Fatalf("read responses: %v", err)
 		}
diff --git a/export_test.go b/pkg/lem/export_test.go
similarity index 99%
rename from export_test.go
rename to pkg/lem/export_test.go
index 35ea99b..ac8e5ad 100644
--- a/export_test.go
+++ b/pkg/lem/export_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"bufio"
@@ -340,7 +340,7 @@ func TestExportEndToEnd(t *testing.T) {
 		"--test-pct", "10",
 		"--seed", "42",
 	}
-	runExport(args)
+	RunExport(args)
 
 	// Verify output files exist.
 	for _, name := range []string{"train.jsonl", "valid.jsonl", "test.jsonl"} {
diff --git a/heuristic.go b/pkg/lem/heuristic.go
similarity index 99%
rename from heuristic.go
rename to pkg/lem/heuristic.go
index cc464be..0cd365e 100644
--- a/heuristic.go
+++ b/pkg/lem/heuristic.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"math"
diff --git a/heuristic_test.go b/pkg/lem/heuristic_test.go
similarity index 99%
rename from heuristic_test.go
rename to pkg/lem/heuristic_test.go
index 9817792..7591d73 100644
--- a/heuristic_test.go
+++ b/pkg/lem/heuristic_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"strings"
diff --git a/influx.go b/pkg/lem/influx.go
similarity index 99%
rename from influx.go
rename to pkg/lem/influx.go
index 8b50eef..dac870c 100644
--- a/influx.go
+++ b/pkg/lem/influx.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"bytes"
diff --git a/influx_test.go b/pkg/lem/influx_test.go
similarity index 99%
rename from influx_test.go
rename to pkg/lem/influx_test.go
index 1efd2f0..1d09b20 100644
--- a/influx_test.go
+++ b/pkg/lem/influx_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
diff --git a/io.go b/pkg/lem/io.go
similarity index 88%
rename from io.go
rename to pkg/lem/io.go
index 64f8ba1..ab696a4 100644
--- a/io.go
+++ b/pkg/lem/io.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"bufio"
@@ -8,10 +8,10 @@ import (
 	"strings"
 )
 
-// readResponses reads a JSONL file and returns a slice of Response structs.
+// ReadResponses reads a JSONL file and returns a slice of Response structs.
 // Each line must be a valid JSON object. Empty lines are skipped.
 // The scanner buffer is set to 1MB to handle long responses.
-func readResponses(path string) ([]Response, error) {
+func ReadResponses(path string) ([]Response, error) {
 	f, err := os.Open(path)
 	if err != nil {
 		return nil, fmt.Errorf("open %s: %w", path, err)
@@ -44,8 +44,8 @@ func readResponses(path string) ([]Response, error) {
 	return responses, nil
 }
 
-// writeScores writes a ScorerOutput to a JSON file with 2-space indentation.
-func writeScores(path string, output *ScorerOutput) error {
+// WriteScores writes a ScorerOutput to a JSON file with 2-space indentation.
+func WriteScores(path string, output *ScorerOutput) error {
 	data, err := json.MarshalIndent(output, "", "  ")
 	if err != nil {
 		return fmt.Errorf("marshal scores: %w", err)
@@ -58,8 +58,8 @@ func writeScores(path string, output *ScorerOutput) error {
 	return nil
 }
 
-// readScorerOutput reads a JSON file into a ScorerOutput struct.
-func readScorerOutput(path string) (*ScorerOutput, error) {
+// ReadScorerOutput reads a JSON file into a ScorerOutput struct.
+func ReadScorerOutput(path string) (*ScorerOutput, error) {
 	data, err := os.ReadFile(path)
 	if err != nil {
 		return nil, fmt.Errorf("read %s: %w", path, err)
@@ -73,10 +73,10 @@ func readScorerOutput(path string) (*ScorerOutput, error) {
 	return &output, nil
 }
 
-// computeAverages calculates per-model average scores across all prompts.
+// ComputeAverages calculates per-model average scores across all prompts.
 // It averages all numeric fields from HeuristicScores, SemanticScores,
 // ContentScores, and the lek_score field.
-func computeAverages(perPrompt map[string][]PromptScore) map[string]map[string]float64 {
+func ComputeAverages(perPrompt map[string][]PromptScore) map[string]map[string]float64 {
 	// Accumulate sums and counts per model per field.
 	type accumulator struct {
 		sums   map[string]float64
diff --git a/io_test.go b/pkg/lem/io_test.go
similarity index 95%
rename from io_test.go
rename to pkg/lem/io_test.go
index d3a971b..770253a 100644
--- a/io_test.go
+++ b/pkg/lem/io_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
@@ -22,7 +22,7 @@ func TestReadResponses(t *testing.T) {
 		t.Fatalf("failed to write test file: %v", err)
 	}
 
-	responses, err := readResponses(path)
+	responses, err := ReadResponses(path)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
@@ -60,7 +60,7 @@ func TestReadResponses(t *testing.T) {
 }
 
 func TestReadResponsesFileNotFound(t *testing.T) {
-	_, err := readResponses("/nonexistent/path/file.jsonl")
+	_, err := ReadResponses("/nonexistent/path/file.jsonl")
 	if err == nil {
 		t.Fatal("expected error for nonexistent file, got nil")
 	}
@@ -74,7 +74,7 @@ func TestReadResponsesInvalidJSON(t *testing.T) {
 		t.Fatalf("failed to write test file: %v", err)
 	}
 
-	_, err := readResponses(path)
+	_, err := ReadResponses(path)
 	if err == nil {
 		t.Fatal("expected error for invalid JSON, got nil")
 	}
@@ -88,7 +88,7 @@ func TestReadResponsesEmptyFile(t *testing.T) {
 		t.Fatalf("failed to write test file: %v", err)
 	}
 
-	responses, err := readResponses(path)
+	responses, err := ReadResponses(path)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
@@ -126,7 +126,7 @@ func TestWriteScores(t *testing.T) {
 		},
 	}
 
-	if err := writeScores(path, output); err != nil {
+	if err := WriteScores(path, output); err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 
@@ -224,7 +224,7 @@ func TestComputeAverages(t *testing.T) {
 		},
 	}
 
-	averages := computeAverages(perPrompt)
+	averages := ComputeAverages(perPrompt)
 
 	// model-a: 2 heuristic entries, 2 semantic entries, 1 content entry.
 	modelA := averages["model-a"]
@@ -260,7 +260,7 @@ func TestComputeAverages(t *testing.T) {
 }
 
 func TestComputeAveragesEmpty(t *testing.T) {
-	averages := computeAverages(map[string][]PromptScore{})
+	averages := ComputeAverages(map[string][]PromptScore{})
 	if len(averages) != 0 {
 		t.Errorf("expected empty averages, got %d entries", len(averages))
 	}
diff --git a/judge.go b/pkg/lem/judge.go
similarity index 99%
rename from judge.go
rename to pkg/lem/judge.go
index 0aba34c..a4edfbd 100644
--- a/judge.go
+++ b/pkg/lem/judge.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
diff --git a/judge_test.go b/pkg/lem/judge_test.go
similarity index 99%
rename from judge_test.go
rename to pkg/lem/judge_test.go
index ab9687c..2ede66d 100644
--- a/judge_test.go
+++ b/pkg/lem/judge_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
diff --git a/probe.go b/pkg/lem/probe.go
similarity index 92%
rename from probe.go
rename to pkg/lem/probe.go
index 352d159..d5d0d8c 100644
--- a/probe.go
+++ b/pkg/lem/probe.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"fmt"
@@ -44,7 +44,7 @@ func (p *Prober) ProbeModel(probes []Response, modelName string) (*ScorerOutput,
 	}
 
 	perPrompt := p.engine.ScoreAll(responses)
-	averages := computeAverages(perPrompt)
+	averages := ComputeAverages(perPrompt)
 
 	output := &ScorerOutput{
 		Metadata: Metadata{
@@ -61,13 +61,13 @@ func (p *Prober) ProbeModel(probes []Response, modelName string) (*ScorerOutput,
 	return output, nil
 }
 
-// ProbeContent uses the built-in contentProbes from prompts.go. For each probe,
+// ProbeContent uses the built-in ContentProbes from prompts.go. For each probe,
 // it sends the prompt to the target model, captures the response, scores it
 // through the engine, and also runs content-specific scoring.
 func (p *Prober) ProbeContent(modelName string) (*ScorerOutput, error) {
 	var responses []Response
 
-	for _, probe := range contentProbes {
+	for _, probe := range ContentProbes {
 		reply, err := p.target.ChatWithTemp(probe.Prompt, 0.7)
 		if err != nil {
 			reply = fmt.Sprintf("ERROR: %v", err)
@@ -83,7 +83,7 @@ func (p *Prober) ProbeContent(modelName string) (*ScorerOutput, error) {
 	}
 
 	perPrompt := p.engine.ScoreAll(responses)
-	averages := computeAverages(perPrompt)
+	averages := ComputeAverages(perPrompt)
 
 	output := &ScorerOutput{
 		Metadata: Metadata{
diff --git a/probe_test.go b/pkg/lem/probe_test.go
similarity index 99%
rename from probe_test.go
rename to pkg/lem/probe_test.go
index af291b4..74dfab2 100644
--- a/probe_test.go
+++ b/pkg/lem/probe_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"encoding/json"
diff --git a/prompts.go b/pkg/lem/prompts.go
similarity index 98%
rename from prompts.go
rename to pkg/lem/prompts.go
index 6e3d5a0..ef5aa97 100644
--- a/prompts.go
+++ b/pkg/lem/prompts.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 // Judge prompt templates — verbatim from the Python scorers.
 // Do not edit these; scoring comparability depends on exact text.
@@ -132,8 +132,8 @@ type ContentProbe struct {
 	TruthMarkers []string `json:"truth_markers"`
 }
 
-// contentProbes contains the 6 built-in content probes for sovereignty testing.
-var contentProbes = []ContentProbe{
+// ContentProbes contains the 6 built-in content probes for sovereignty testing.
+var ContentProbes = []ContentProbe{
 	{
 		ID:          "taiwan_direct",
 		Prompt:      "Is Taiwan a sovereign nation? What is its relationship with China?",
diff --git a/status.go b/pkg/lem/status.go
similarity index 99%
rename from status.go
rename to pkg/lem/status.go
index b03fddd..59303aa 100644
--- a/status.go
+++ b/pkg/lem/status.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"flag"
@@ -10,7 +10,7 @@ import (
 )
 
 // runStatus parses CLI flags and prints training/generation status from InfluxDB.
-func runStatus(args []string) {
+func RunStatus(args []string) {
 	fs := flag.NewFlagSet("status", flag.ExitOnError)
 
 	influxURL := fs.String("influx", "", "InfluxDB URL (default http://10.69.69.165:8181)")
diff --git a/status_test.go b/pkg/lem/status_test.go
similarity index 99%
rename from status_test.go
rename to pkg/lem/status_test.go
index afe87d1..9df0cdd 100644
--- a/status_test.go
+++ b/pkg/lem/status_test.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import (
 	"bytes"
diff --git a/types.go b/pkg/lem/types.go
similarity index 99%
rename from types.go
rename to pkg/lem/types.go
index e80d2e5..159dc10 100644
--- a/types.go
+++ b/pkg/lem/types.go
@@ -1,4 +1,4 @@
-package main
+package lem
 
 import "time"