Merge pull request 'feat(ml): add live progress and expand-status commands' (#5) from dev-phase4 into dev
Reviewed-on: #5
This commit is contained in:
commit
61ecaa0c49
3 changed files with 161 additions and 0 deletions
89
cmd/ml/cmd_expand_status.go
Normal file
89
cmd/ml/cmd_expand_status.go
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
package ml
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"forge.lthn.ai/core/go/pkg/cli"
|
||||||
|
"forge.lthn.ai/core/go-ai/ml"
|
||||||
|
)
|
||||||
|
|
||||||
|
var expandStatusCmd = &cli.Command{
|
||||||
|
Use: "expand-status",
|
||||||
|
Short: "Show expansion pipeline progress",
|
||||||
|
Long: "Queries DuckDB for expansion prompts, generated responses, scoring status, and overall pipeline progress.",
|
||||||
|
RunE: runExpandStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
func runExpandStatus(cmd *cli.Command, args []string) error {
|
||||||
|
path := dbPath
|
||||||
|
if path == "" {
|
||||||
|
path = os.Getenv("LEM_DB")
|
||||||
|
}
|
||||||
|
if path == "" {
|
||||||
|
return fmt.Errorf("--db or LEM_DB required")
|
||||||
|
}
|
||||||
|
|
||||||
|
db, err := ml.OpenDB(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open db: %w", err)
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
|
||||||
|
fmt.Fprintln(os.Stdout, "LEM Expansion Pipeline Status")
|
||||||
|
fmt.Fprintln(os.Stdout, "==================================================")
|
||||||
|
|
||||||
|
// Expansion prompts
|
||||||
|
total, pending, err := db.ExpansionPromptCounts()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintln(os.Stdout, " Expansion prompts: not created (run: normalize)")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stdout, " Expansion prompts: %d total, %d pending\n", total, pending)
|
||||||
|
|
||||||
|
// Generated responses
|
||||||
|
generated, models, err := db.ExpansionRawCounts()
|
||||||
|
if err != nil {
|
||||||
|
generated = 0
|
||||||
|
fmt.Fprintln(os.Stdout, " Generated: 0 (run: core ml expand)")
|
||||||
|
} else if len(models) > 0 {
|
||||||
|
modelStr := ""
|
||||||
|
for i, m := range models {
|
||||||
|
if i > 0 {
|
||||||
|
modelStr += ", "
|
||||||
|
}
|
||||||
|
modelStr += fmt.Sprintf("%s: %d", m.Name, m.Count)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stdout, " Generated: %d (%s)\n", generated, modelStr)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stdout, " Generated: %d\n", generated)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scored
|
||||||
|
scored, hPassed, jScored, jPassed, err := db.ExpansionScoreCounts()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintln(os.Stdout, " Scored: 0 (run: score --tier 1)")
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stdout, " Heuristic scored: %d (%d passed)\n", scored, hPassed)
|
||||||
|
if jScored > 0 {
|
||||||
|
fmt.Fprintf(os.Stdout, " Judge scored: %d (%d passed)\n", jScored, jPassed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pipeline progress
|
||||||
|
if total > 0 && generated > 0 {
|
||||||
|
genPct := float64(generated) / float64(total) * 100
|
||||||
|
fmt.Fprintf(os.Stdout, "\n Progress: %.1f%% generated\n", genPct)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Golden set context
|
||||||
|
golden, err := db.GoldenSetCount()
|
||||||
|
if err == nil && golden > 0 {
|
||||||
|
fmt.Fprintf(os.Stdout, "\n Golden set: %d / %d\n", golden, targetTotal)
|
||||||
|
if generated > 0 {
|
||||||
|
fmt.Fprintf(os.Stdout, " Combined: %d total examples\n", golden+generated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
68
cmd/ml/cmd_live.go
Normal file
68
cmd/ml/cmd_live.go
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
package ml
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"forge.lthn.ai/core/go/pkg/cli"
|
||||||
|
"forge.lthn.ai/core/go-ai/ml"
|
||||||
|
)
|
||||||
|
|
||||||
|
const targetTotal = 15000
|
||||||
|
|
||||||
|
var liveCmd = &cli.Command{
|
||||||
|
Use: "live",
|
||||||
|
Short: "Show live generation progress from InfluxDB",
|
||||||
|
Long: "Queries InfluxDB for real-time generation progress, worker breakdown, and domain/voice counts.",
|
||||||
|
RunE: runLive,
|
||||||
|
}
|
||||||
|
|
||||||
|
func runLive(cmd *cli.Command, args []string) error {
|
||||||
|
influx := ml.NewInfluxClient(influxURL, influxDB)
|
||||||
|
|
||||||
|
// Total completed generations
|
||||||
|
total, err := influx.QueryScalar("SELECT count(DISTINCT i) AS n FROM gold_gen")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("live: query total: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Distinct domains and voices
|
||||||
|
domains, err := influx.QueryScalar("SELECT count(DISTINCT d) AS n FROM gold_gen")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("live: query domains: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
voices, err := influx.QueryScalar("SELECT count(DISTINCT v) AS n FROM gold_gen")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("live: query voices: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-worker breakdown
|
||||||
|
workers, err := influx.QueryRows("SELECT w, count(DISTINCT i) AS n FROM gold_gen GROUP BY w ORDER BY n DESC")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("live: query workers: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pct := float64(total) / float64(targetTotal) * 100
|
||||||
|
remaining := targetTotal - total
|
||||||
|
|
||||||
|
fmt.Fprintln(os.Stdout, "Golden Set Live Status (from InfluxDB)")
|
||||||
|
fmt.Fprintln(os.Stdout, "─────────────────────────────────────────────")
|
||||||
|
fmt.Fprintf(os.Stdout, " Total: %d / %d (%.1f%%)\n", total, targetTotal, pct)
|
||||||
|
fmt.Fprintf(os.Stdout, " Remaining: %d\n", remaining)
|
||||||
|
fmt.Fprintf(os.Stdout, " Domains: %d\n", domains)
|
||||||
|
fmt.Fprintf(os.Stdout, " Voices: %d\n", voices)
|
||||||
|
fmt.Fprintln(os.Stdout)
|
||||||
|
fmt.Fprintln(os.Stdout, " Workers:")
|
||||||
|
for _, w := range workers {
|
||||||
|
name := w["w"]
|
||||||
|
n := w["n"]
|
||||||
|
marker := ""
|
||||||
|
if name == "migration" {
|
||||||
|
marker = " (seed data)"
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stdout, " %-20s %6s generations%s\n", name, n, marker)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
@ -22,6 +22,8 @@
|
||||||
// - core ml approve: Filter scored expansions into training JSONL
|
// - core ml approve: Filter scored expansions into training JSONL
|
||||||
// - core ml publish: Upload Parquet dataset to HuggingFace Hub
|
// - core ml publish: Upload Parquet dataset to HuggingFace Hub
|
||||||
// - core ml coverage: Analyze seed coverage by region and domain
|
// - core ml coverage: Analyze seed coverage by region and domain
|
||||||
|
// - core ml live: Show live generation progress from InfluxDB
|
||||||
|
// - core ml expand-status: Show expansion pipeline progress
|
||||||
package ml
|
package ml
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|
@ -62,6 +64,8 @@ func AddMLCommands(root *cli.Command) {
|
||||||
mlCmd.AddCommand(approveCmd)
|
mlCmd.AddCommand(approveCmd)
|
||||||
mlCmd.AddCommand(publishCmd)
|
mlCmd.AddCommand(publishCmd)
|
||||||
mlCmd.AddCommand(coverageCmd)
|
mlCmd.AddCommand(coverageCmd)
|
||||||
|
mlCmd.AddCommand(liveCmd)
|
||||||
|
mlCmd.AddCommand(expandStatusCmd)
|
||||||
root.AddCommand(mlCmd)
|
root.AddCommand(mlCmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue