LEM/pkg/lem/setup_test.go
Snider 1269e70853 feat: add data hydration engine (cold JSONL.zst -> warm DuckDB -> hot InfluxDB)
RunSetup decompresses .jsonl.zst training data into DuckDB tables
(training_examples, seeds, probes, distill_results) and optionally
backfills InfluxDB with aggregate stats.

Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-28 12:20:00 +00:00

71 lines
1.7 KiB
Go

package lem
import (
"os"
"path/filepath"
"testing"
)
func TestHydrateTrainingExamples(t *testing.T) {
dir := t.TempDir()
// Create a .jsonl.zst file with two training examples
jsonl := `{"messages":[{"role":"user","content":"What is kindness?"},{"role":"assistant","content":"Kindness is..."}]}
{"messages":[{"role":"user","content":"Explain gravity"},{"role":"assistant","content":"Gravity is..."}]}
`
src := filepath.Join(dir, "train.jsonl")
os.WriteFile(src, []byte(jsonl), 0644)
zst := src + ".zst"
compressFileZstd(src, zst)
os.Remove(src)
// Hydrate into DuckDB
dbPath := filepath.Join(dir, "test.duckdb")
db, err := OpenDBReadWrite(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
n, err := hydrateTrainingFromZst(db, zst, "test-source", "train")
if err != nil {
t.Fatal(err)
}
if n != 2 {
t.Fatalf("expected 2 rows, got %d", n)
}
// Verify data
rows, err := db.QueryRows("SELECT COUNT(*) AS n FROM training_examples")
if err != nil {
t.Fatal(err)
}
count := rows[0]["n"].(int64)
if count != 2 {
t.Fatalf("expected 2 rows in DB, got %d", count)
}
}
func TestHydrateProbes(t *testing.T) {
dir := t.TempDir()
// Create a probe JSON file
probeJSON := `[{"id":"P01","domain":"Identity","prompt":"Design auth..."},{"id":"P02","domain":"Network","prompt":"Build mesh..."}]`
probeFile := filepath.Join(dir, "core.json")
os.WriteFile(probeFile, []byte(probeJSON), 0644)
dbPath := filepath.Join(dir, "test.duckdb")
db, err := OpenDBReadWrite(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.Close()
n, err := hydrateProbes(db, probeFile, 0)
if err != nil {
t.Fatal(err)
}
if n != 2 {
t.Fatalf("expected 2 probes, got %d", n)
}
}