feat: integrate lab dashboard as core lab serve
Port the standalone lab dashboard (lab.lthn.io) into the core CLI as pkg/lab/ with collectors, handlers, and HTML templates. The dashboard monitors machines, Docker containers, Forgejo, HuggingFace models, training runs, and InfluxDB metrics with SSE live updates. New command: core lab serve --bind :8080 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1f3a1bcc47
commit
5e9a9c2790
27 changed files with 4591 additions and 0 deletions
138
internal/cmd/lab/cmd_lab.go
Normal file
138
internal/cmd/lab/cmd_lab.go
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
package lab
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/cli"
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
"forge.lthn.ai/core/cli/pkg/lab/collector"
|
||||
"forge.lthn.ai/core/cli/pkg/lab/handler"
|
||||
)
|
||||
|
||||
func init() {
|
||||
cli.RegisterCommands(AddLabCommands)
|
||||
}
|
||||
|
||||
var labCmd = &cli.Command{
|
||||
Use: "lab",
|
||||
Short: "Homelab monitoring dashboard",
|
||||
Long: "Lab dashboard with real-time monitoring of machines, training runs, models, and services.",
|
||||
}
|
||||
|
||||
var (
|
||||
labBind string
|
||||
)
|
||||
|
||||
var serveCmd = &cli.Command{
|
||||
Use: "serve",
|
||||
Short: "Start the lab dashboard web server",
|
||||
Long: "Starts the lab dashboard HTTP server with live-updating collectors for system stats, Docker, Forgejo, HuggingFace, InfluxDB, and more.",
|
||||
RunE: runServe,
|
||||
}
|
||||
|
||||
func init() {
|
||||
serveCmd.Flags().StringVar(&labBind, "bind", ":8080", "HTTP listen address")
|
||||
}
|
||||
|
||||
// AddLabCommands registers the 'lab' command and subcommands.
|
||||
func AddLabCommands(root *cli.Command) {
|
||||
labCmd.AddCommand(serveCmd)
|
||||
root.AddCommand(labCmd)
|
||||
}
|
||||
|
||||
func runServe(cmd *cli.Command, args []string) error {
|
||||
cfg := lab.LoadConfig()
|
||||
cfg.Addr = labBind
|
||||
|
||||
store := lab.NewStore()
|
||||
logger := slog.New(slog.NewJSONHandler(os.Stdout, nil))
|
||||
|
||||
// Setup collectors.
|
||||
reg := collector.NewRegistry(logger)
|
||||
reg.Register(collector.NewSystem(cfg, store), 60*time.Second)
|
||||
reg.Register(collector.NewPrometheus(cfg.PrometheusURL, store),
|
||||
time.Duration(cfg.PrometheusInterval)*time.Second)
|
||||
reg.Register(collector.NewHuggingFace(cfg.HFAuthor, store),
|
||||
time.Duration(cfg.HFInterval)*time.Second)
|
||||
reg.Register(collector.NewDocker(store),
|
||||
time.Duration(cfg.DockerInterval)*time.Second)
|
||||
|
||||
if cfg.ForgeToken != "" {
|
||||
reg.Register(collector.NewForgejo(cfg.ForgeURL, cfg.ForgeToken, store),
|
||||
time.Duration(cfg.ForgeInterval)*time.Second)
|
||||
}
|
||||
|
||||
reg.Register(collector.NewTraining(cfg, store),
|
||||
time.Duration(cfg.TrainingInterval)*time.Second)
|
||||
reg.Register(collector.NewServices(store), 60*time.Second)
|
||||
|
||||
if cfg.InfluxToken != "" {
|
||||
reg.Register(collector.NewInfluxDB(cfg, store),
|
||||
time.Duration(cfg.InfluxInterval)*time.Second)
|
||||
}
|
||||
|
||||
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
|
||||
defer cancel()
|
||||
reg.Start(ctx)
|
||||
defer reg.Stop()
|
||||
|
||||
// Setup HTTP handlers.
|
||||
web := handler.NewWebHandler(store)
|
||||
api := handler.NewAPIHandler(store)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// Web pages.
|
||||
mux.HandleFunc("GET /", web.Dashboard)
|
||||
mux.HandleFunc("GET /models", web.Models)
|
||||
mux.HandleFunc("GET /training", web.Training)
|
||||
mux.HandleFunc("GET /dataset", web.Dataset)
|
||||
mux.HandleFunc("GET /golden-set", func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, "/dataset", http.StatusMovedPermanently)
|
||||
})
|
||||
mux.HandleFunc("GET /runs", func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, "/training", http.StatusMovedPermanently)
|
||||
})
|
||||
mux.HandleFunc("GET /agents", web.Agents)
|
||||
mux.HandleFunc("GET /services", web.Services)
|
||||
|
||||
// SSE for live updates.
|
||||
mux.HandleFunc("GET /events", web.Events)
|
||||
|
||||
// JSON API.
|
||||
mux.HandleFunc("GET /api/status", api.Status)
|
||||
mux.HandleFunc("GET /api/models", api.Models)
|
||||
mux.HandleFunc("GET /api/training", api.Training)
|
||||
mux.HandleFunc("GET /api/dataset", api.GoldenSet)
|
||||
mux.HandleFunc("GET /api/golden-set", api.GoldenSet)
|
||||
mux.HandleFunc("GET /api/runs", api.Runs)
|
||||
mux.HandleFunc("GET /api/agents", api.Agents)
|
||||
mux.HandleFunc("GET /api/services", api.Services)
|
||||
mux.HandleFunc("GET /health", api.Health)
|
||||
|
||||
srv := &http.Server{
|
||||
Addr: cfg.Addr,
|
||||
Handler: mux,
|
||||
ReadTimeout: 5 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
}
|
||||
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
logger.Info("shutting down")
|
||||
shutCtx, shutCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer shutCancel()
|
||||
srv.Shutdown(shutCtx)
|
||||
}()
|
||||
|
||||
logger.Info("lab dashboard starting", "addr", cfg.Addr)
|
||||
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
@ -44,6 +44,7 @@ import (
|
|||
_ "forge.lthn.ai/core/cli/internal/cmd/gitcmd"
|
||||
_ "forge.lthn.ai/core/cli/internal/cmd/go"
|
||||
_ "forge.lthn.ai/core/cli/internal/cmd/help"
|
||||
_ "forge.lthn.ai/core/cli/internal/cmd/lab"
|
||||
_ "forge.lthn.ai/core/cli/internal/cmd/mcpcmd"
|
||||
_ "forge.lthn.ai/core/cli/internal/cmd/ml"
|
||||
_ "forge.lthn.ai/core/cli/internal/cmd/monitor"
|
||||
|
|
|
|||
82
pkg/lab/collector/collector.go
Normal file
82
pkg/lab/collector/collector.go
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Collector interface {
|
||||
Name() string
|
||||
Collect(ctx context.Context) error
|
||||
}
|
||||
|
||||
type Registry struct {
|
||||
mu sync.Mutex
|
||||
entries []entry
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
type entry struct {
|
||||
c Collector
|
||||
interval time.Duration
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
func NewRegistry(logger *slog.Logger) *Registry {
|
||||
return &Registry{logger: logger}
|
||||
}
|
||||
|
||||
func (r *Registry) Register(c Collector, interval time.Duration) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.entries = append(r.entries, entry{c: c, interval: interval})
|
||||
}
|
||||
|
||||
func (r *Registry) Start(ctx context.Context) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
for i := range r.entries {
|
||||
e := &r.entries[i]
|
||||
cctx, cancel := context.WithCancel(ctx)
|
||||
e.cancel = cancel
|
||||
go r.run(cctx, e.c, e.interval)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Registry) run(ctx context.Context, c Collector, interval time.Duration) {
|
||||
r.logger.Info("collector started", "name", c.Name(), "interval", interval)
|
||||
|
||||
// Run immediately on start.
|
||||
if err := c.Collect(ctx); err != nil {
|
||||
r.logger.Warn("collector error", "name", c.Name(), "err", err)
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
r.logger.Info("collector stopped", "name", c.Name())
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := c.Collect(ctx); err != nil {
|
||||
r.logger.Warn("collector error", "name", c.Name(), "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Registry) Stop() {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
for _, e := range r.entries {
|
||||
if e.cancel != nil {
|
||||
e.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
94
pkg/lab/collector/docker.go
Normal file
94
pkg/lab/collector/docker.go
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type Docker struct {
|
||||
store *lab.Store
|
||||
}
|
||||
|
||||
func NewDocker(s *lab.Store) *Docker {
|
||||
return &Docker{store: s}
|
||||
}
|
||||
|
||||
func (d *Docker) Name() string { return "docker" }
|
||||
|
||||
func (d *Docker) Collect(ctx context.Context) error {
|
||||
client := &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
|
||||
return net.Dial("unix", "/var/run/docker.sock")
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", "http://docker/containers/json?all=true", nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
d.store.SetError("docker", err)
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var containers []struct {
|
||||
Names []string `json:"Names"`
|
||||
Image string `json:"Image"`
|
||||
State string `json:"State"`
|
||||
Status string `json:"Status"`
|
||||
Created int64 `json:"Created"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(resp.Body).Decode(&containers); err != nil {
|
||||
d.store.SetError("docker", err)
|
||||
return err
|
||||
}
|
||||
|
||||
var result []lab.Container
|
||||
for _, c := range containers {
|
||||
name := ""
|
||||
if len(c.Names) > 0 {
|
||||
name = c.Names[0]
|
||||
if len(name) > 0 && name[0] == '/' {
|
||||
name = name[1:]
|
||||
}
|
||||
}
|
||||
|
||||
created := time.Unix(c.Created, 0)
|
||||
uptime := ""
|
||||
if c.State == "running" {
|
||||
d := time.Since(created)
|
||||
days := int(d.Hours()) / 24
|
||||
hours := int(d.Hours()) % 24
|
||||
if days > 0 {
|
||||
uptime = fmt.Sprintf("%dd %dh", days, hours)
|
||||
} else {
|
||||
uptime = fmt.Sprintf("%dh %dm", hours, int(d.Minutes())%60)
|
||||
}
|
||||
}
|
||||
|
||||
result = append(result, lab.Container{
|
||||
Name: name,
|
||||
Status: c.State,
|
||||
Image: c.Image,
|
||||
Uptime: uptime,
|
||||
Created: created,
|
||||
})
|
||||
}
|
||||
|
||||
d.store.SetContainers(result)
|
||||
d.store.SetError("docker", nil)
|
||||
return nil
|
||||
}
|
||||
130
pkg/lab/collector/forgejo.go
Normal file
130
pkg/lab/collector/forgejo.go
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type Forgejo struct {
|
||||
url string
|
||||
token string
|
||||
store *lab.Store
|
||||
}
|
||||
|
||||
func NewForgejo(forgeURL, token string, s *lab.Store) *Forgejo {
|
||||
return &Forgejo{url: forgeURL, token: token, store: s}
|
||||
}
|
||||
|
||||
func (f *Forgejo) Name() string { return "forgejo" }
|
||||
|
||||
func (f *Forgejo) Collect(ctx context.Context) error {
|
||||
if f.token == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
commits, err := f.recentActivity(ctx)
|
||||
if err != nil {
|
||||
f.store.SetError("forgejo", err)
|
||||
return err
|
||||
}
|
||||
|
||||
f.store.SetCommits(commits)
|
||||
f.store.SetError("forgejo", nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
type forgeRepo struct {
|
||||
FullName string `json:"full_name"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type forgeCommit struct {
|
||||
SHA string `json:"sha"`
|
||||
Commit struct {
|
||||
Message string `json:"message"`
|
||||
Author struct {
|
||||
Name string `json:"name"`
|
||||
Date time.Time `json:"date"`
|
||||
} `json:"author"`
|
||||
} `json:"commit"`
|
||||
}
|
||||
|
||||
func (f *Forgejo) recentActivity(ctx context.Context) ([]lab.Commit, error) {
|
||||
// Get recently updated repos
|
||||
repos, err := f.apiGet(ctx, "/api/v1/repos/search?sort=updated&order=desc&limit=5")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var repoList []forgeRepo
|
||||
if err := json.Unmarshal(repos, &repoList); err != nil {
|
||||
// The search API wraps in {"data": [...], "ok": true}
|
||||
var wrapped struct {
|
||||
Data []forgeRepo `json:"data"`
|
||||
}
|
||||
if err2 := json.Unmarshal(repos, &wrapped); err2 != nil {
|
||||
return nil, err
|
||||
}
|
||||
repoList = wrapped.Data
|
||||
}
|
||||
|
||||
var commits []lab.Commit
|
||||
for _, repo := range repoList {
|
||||
if len(commits) >= 10 {
|
||||
break
|
||||
}
|
||||
data, err := f.apiGet(ctx, fmt.Sprintf("/api/v1/repos/%s/commits?limit=2", repo.FullName))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
var fc []forgeCommit
|
||||
if err := json.Unmarshal(data, &fc); err != nil {
|
||||
continue
|
||||
}
|
||||
for _, c := range fc {
|
||||
msg := c.Commit.Message
|
||||
if len(msg) > 80 {
|
||||
msg = msg[:77] + "..."
|
||||
}
|
||||
commits = append(commits, lab.Commit{
|
||||
SHA: c.SHA[:8],
|
||||
Message: msg,
|
||||
Author: c.Commit.Author.Name,
|
||||
Repo: repo.FullName,
|
||||
Timestamp: c.Commit.Author.Date,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return commits, nil
|
||||
}
|
||||
|
||||
func (f *Forgejo) apiGet(ctx context.Context, path string) (json.RawMessage, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", f.url+path, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", "token "+f.token)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("forgejo %s returned %d", path, resp.StatusCode)
|
||||
}
|
||||
|
||||
var raw json.RawMessage
|
||||
if err := json.NewDecoder(resp.Body).Decode(&raw); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return raw, nil
|
||||
}
|
||||
55
pkg/lab/collector/huggingface.go
Normal file
55
pkg/lab/collector/huggingface.go
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type HuggingFace struct {
|
||||
author string
|
||||
store *lab.Store
|
||||
}
|
||||
|
||||
func NewHuggingFace(author string, s *lab.Store) *HuggingFace {
|
||||
return &HuggingFace{author: author, store: s}
|
||||
}
|
||||
|
||||
func (h *HuggingFace) Name() string { return "huggingface" }
|
||||
|
||||
func (h *HuggingFace) Collect(ctx context.Context) error {
|
||||
u := fmt.Sprintf("https://huggingface.co/api/models?author=%s&sort=downloads&direction=-1&limit=20", h.author)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
h.store.SetError("huggingface", err)
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
err := fmt.Errorf("HuggingFace API returned %d", resp.StatusCode)
|
||||
h.store.SetError("huggingface", err)
|
||||
return err
|
||||
}
|
||||
|
||||
var models []lab.HFModel
|
||||
if err := json.NewDecoder(resp.Body).Decode(&models); err != nil {
|
||||
h.store.SetError("huggingface", err)
|
||||
return err
|
||||
}
|
||||
|
||||
h.store.SetModels(models)
|
||||
h.store.SetError("huggingface", nil)
|
||||
return nil
|
||||
}
|
||||
354
pkg/lab/collector/influxdb.go
Normal file
354
pkg/lab/collector/influxdb.go
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type InfluxDB struct {
|
||||
cfg *lab.Config
|
||||
store *lab.Store
|
||||
}
|
||||
|
||||
func NewInfluxDB(cfg *lab.Config, s *lab.Store) *InfluxDB {
|
||||
return &InfluxDB{cfg: cfg, store: s}
|
||||
}
|
||||
|
||||
func (i *InfluxDB) Name() string { return "influxdb" }
|
||||
|
||||
func (i *InfluxDB) Collect(ctx context.Context) error {
|
||||
if i.cfg.InfluxURL == "" || i.cfg.InfluxToken == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
data := lab.BenchmarkData{
|
||||
Loss: make(map[string][]lab.LossPoint),
|
||||
Content: make(map[string][]lab.ContentPoint),
|
||||
Capability: make(map[string][]lab.CapabilityPoint),
|
||||
CapabilityJudge: make(map[string][]lab.CapabilityJudgePoint),
|
||||
UpdatedAt: time.Now(),
|
||||
}
|
||||
|
||||
// Collect all run identifiers from each measurement.
|
||||
runSet := map[string]lab.BenchmarkRun{}
|
||||
|
||||
// Training loss data.
|
||||
if rows, err := i.query(ctx, "SELECT run_id, model, iteration, loss, loss_type, learning_rate, iterations_per_sec, tokens_per_sec FROM training_loss ORDER BY run_id, iteration"); err == nil {
|
||||
for _, row := range rows {
|
||||
rid := jsonStr(row["run_id"])
|
||||
mdl := jsonStr(row["model"])
|
||||
if rid == "" {
|
||||
continue
|
||||
}
|
||||
runSet[rid] = lab.BenchmarkRun{RunID: rid, Model: mdl, Type: "training"}
|
||||
data.Loss[rid] = append(data.Loss[rid], lab.LossPoint{
|
||||
Iteration: jsonInt(row["iteration"]),
|
||||
Loss: jsonFloat(row["loss"]),
|
||||
LossType: jsonStr(row["loss_type"]),
|
||||
LearningRate: jsonFloat(row["learning_rate"]),
|
||||
TokensPerSec: jsonFloat(row["tokens_per_sec"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Content scores.
|
||||
if rows, err := i.query(ctx, "SELECT run_id, model, label, dimension, score, iteration, has_kernel FROM content_score ORDER BY run_id, iteration, dimension"); err == nil {
|
||||
for _, row := range rows {
|
||||
rid := jsonStr(row["run_id"])
|
||||
mdl := jsonStr(row["model"])
|
||||
if rid == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := runSet[rid]; !ok {
|
||||
runSet[rid] = lab.BenchmarkRun{RunID: rid, Model: mdl, Type: "content"}
|
||||
}
|
||||
hk := jsonStr(row["has_kernel"])
|
||||
data.Content[rid] = append(data.Content[rid], lab.ContentPoint{
|
||||
Label: jsonStr(row["label"]),
|
||||
Dimension: jsonStr(row["dimension"]),
|
||||
Score: jsonFloat(row["score"]),
|
||||
Iteration: jsonInt(row["iteration"]),
|
||||
HasKernel: hk == "true" || hk == "True",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Capability scores.
|
||||
if rows, err := i.query(ctx, "SELECT run_id, model, label, category, accuracy, correct, total, iteration FROM capability_score ORDER BY run_id, iteration, category"); err == nil {
|
||||
for _, row := range rows {
|
||||
rid := jsonStr(row["run_id"])
|
||||
mdl := jsonStr(row["model"])
|
||||
if rid == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := runSet[rid]; !ok {
|
||||
runSet[rid] = lab.BenchmarkRun{RunID: rid, Model: mdl, Type: "capability"}
|
||||
}
|
||||
data.Capability[rid] = append(data.Capability[rid], lab.CapabilityPoint{
|
||||
Label: jsonStr(row["label"]),
|
||||
Category: jsonStr(row["category"]),
|
||||
Accuracy: jsonFloat(row["accuracy"]),
|
||||
Correct: jsonInt(row["correct"]),
|
||||
Total: jsonInt(row["total"]),
|
||||
Iteration: jsonInt(row["iteration"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Capability judge scores (0-10 per probe).
|
||||
if rows, err := i.query(ctx, "SELECT run_id, model, label, probe_id, category, reasoning, correctness, clarity, avg, iteration FROM capability_judge ORDER BY run_id, iteration, probe_id"); err == nil {
|
||||
for _, row := range rows {
|
||||
rid := jsonStr(row["run_id"])
|
||||
if rid == "" {
|
||||
continue
|
||||
}
|
||||
data.CapabilityJudge[rid] = append(data.CapabilityJudge[rid], lab.CapabilityJudgePoint{
|
||||
Label: jsonStr(row["label"]),
|
||||
ProbeID: jsonStr(row["probe_id"]),
|
||||
Category: jsonStr(row["category"]),
|
||||
Reasoning: jsonFloat(row["reasoning"]),
|
||||
Correctness: jsonFloat(row["correctness"]),
|
||||
Clarity: jsonFloat(row["clarity"]),
|
||||
Avg: jsonFloat(row["avg"]),
|
||||
Iteration: jsonInt(row["iteration"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Build sorted runs list.
|
||||
for _, r := range runSet {
|
||||
data.Runs = append(data.Runs, r)
|
||||
}
|
||||
sort.Slice(data.Runs, func(i, j int) bool {
|
||||
return data.Runs[i].Model < data.Runs[j].Model || (data.Runs[i].Model == data.Runs[j].Model && data.Runs[i].RunID < data.Runs[j].RunID)
|
||||
})
|
||||
|
||||
i.store.SetBenchmarks(data)
|
||||
|
||||
// Live training run statuses.
|
||||
var runStatuses []lab.TrainingRunStatus
|
||||
if rows, err := i.query(ctx, "SELECT model, run_id, status, iteration, total_iters, pct FROM training_status ORDER BY time DESC LIMIT 50"); err == nil {
|
||||
// Deduplicate: keep only the latest status per run_id.
|
||||
seen := map[string]bool{}
|
||||
for _, row := range rows {
|
||||
rid := jsonStr(row["run_id"])
|
||||
if rid == "" || seen[rid] {
|
||||
continue
|
||||
}
|
||||
seen[rid] = true
|
||||
rs := lab.TrainingRunStatus{
|
||||
Model: jsonStr(row["model"]),
|
||||
RunID: rid,
|
||||
Status: jsonStr(row["status"]),
|
||||
Iteration: jsonInt(row["iteration"]),
|
||||
TotalIters: jsonInt(row["total_iters"]),
|
||||
Pct: jsonFloat(row["pct"]),
|
||||
}
|
||||
// Find latest loss for this run from already-collected data.
|
||||
if lossPoints, ok := data.Loss[rid]; ok {
|
||||
for j := len(lossPoints) - 1; j >= 0; j-- {
|
||||
if lossPoints[j].LossType == "train" && rs.LastLoss == 0 {
|
||||
rs.LastLoss = lossPoints[j].Loss
|
||||
rs.TokensSec = lossPoints[j].TokensPerSec
|
||||
}
|
||||
if lossPoints[j].LossType == "val" && rs.ValLoss == 0 {
|
||||
rs.ValLoss = lossPoints[j].Loss
|
||||
}
|
||||
if rs.LastLoss > 0 && rs.ValLoss > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
runStatuses = append(runStatuses, rs)
|
||||
}
|
||||
}
|
||||
i.store.SetTrainingRuns(runStatuses)
|
||||
|
||||
// Golden set data explorer — query gold_gen (real-time per-generation records).
|
||||
gs := lab.GoldenSetSummary{TargetTotal: 15000, UpdatedAt: time.Now()}
|
||||
|
||||
// Try real-time gold_gen first (populated by lem_generate.py directly).
|
||||
if rows, err := i.query(ctx, "SELECT count(DISTINCT i) AS total, count(DISTINCT d) AS domains, count(DISTINCT v) AS voices, avg(gen_time) AS avg_t, avg(chars) AS avg_c FROM gold_gen"); err == nil && len(rows) > 0 {
|
||||
r := rows[0]
|
||||
total := jsonInt(r["total"])
|
||||
if total > 0 {
|
||||
gs.Available = true
|
||||
gs.TotalExamples = total
|
||||
gs.Domains = jsonInt(r["domains"])
|
||||
gs.Voices = jsonInt(r["voices"])
|
||||
gs.AvgGenTime = jsonFloat(r["avg_t"])
|
||||
gs.AvgResponseChars = jsonFloat(r["avg_c"])
|
||||
gs.CompletionPct = float64(total) / float64(gs.TargetTotal) * 100
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to pipeline.py metrics if gold_gen isn't populated.
|
||||
if !gs.Available {
|
||||
if rows, err := i.query(ctx, "SELECT total_examples, domains, voices, avg_gen_time, avg_response_chars, completion_pct FROM golden_set_stats ORDER BY time DESC LIMIT 1"); err == nil && len(rows) > 0 {
|
||||
r := rows[0]
|
||||
gs.Available = true
|
||||
gs.TotalExamples = jsonInt(r["total_examples"])
|
||||
gs.Domains = jsonInt(r["domains"])
|
||||
gs.Voices = jsonInt(r["voices"])
|
||||
gs.AvgGenTime = jsonFloat(r["avg_gen_time"])
|
||||
gs.AvgResponseChars = jsonFloat(r["avg_response_chars"])
|
||||
gs.CompletionPct = jsonFloat(r["completion_pct"])
|
||||
}
|
||||
}
|
||||
|
||||
if gs.Available {
|
||||
// Per-domain from gold_gen.
|
||||
if rows, err := i.query(ctx, "SELECT d, count(DISTINCT i) AS n, avg(gen_time) AS avg_t FROM gold_gen GROUP BY d ORDER BY n DESC"); err == nil && len(rows) > 0 {
|
||||
for _, r := range rows {
|
||||
gs.DomainStats = append(gs.DomainStats, lab.DomainStat{
|
||||
Domain: jsonStr(r["d"]),
|
||||
Count: jsonInt(r["n"]),
|
||||
AvgGenTime: jsonFloat(r["avg_t"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
// Fallback to pipeline stats.
|
||||
if len(gs.DomainStats) == 0 {
|
||||
if rows, err := i.query(ctx, "SELECT DISTINCT domain, count, avg_gen_time FROM golden_set_domain ORDER BY count DESC"); err == nil {
|
||||
for _, r := range rows {
|
||||
gs.DomainStats = append(gs.DomainStats, lab.DomainStat{
|
||||
Domain: jsonStr(r["domain"]),
|
||||
Count: jsonInt(r["count"]),
|
||||
AvgGenTime: jsonFloat(r["avg_gen_time"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per-voice from gold_gen.
|
||||
if rows, err := i.query(ctx, "SELECT v, count(DISTINCT i) AS n, avg(chars) AS avg_c, avg(gen_time) AS avg_t FROM gold_gen GROUP BY v ORDER BY n DESC"); err == nil && len(rows) > 0 {
|
||||
for _, r := range rows {
|
||||
gs.VoiceStats = append(gs.VoiceStats, lab.VoiceStat{
|
||||
Voice: jsonStr(r["v"]),
|
||||
Count: jsonInt(r["n"]),
|
||||
AvgChars: jsonFloat(r["avg_c"]),
|
||||
AvgGenTime: jsonFloat(r["avg_t"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
// Fallback.
|
||||
if len(gs.VoiceStats) == 0 {
|
||||
if rows, err := i.query(ctx, "SELECT DISTINCT voice, count, avg_chars, avg_gen_time FROM golden_set_voice ORDER BY count DESC"); err == nil {
|
||||
for _, r := range rows {
|
||||
gs.VoiceStats = append(gs.VoiceStats, lab.VoiceStat{
|
||||
Voice: jsonStr(r["voice"]),
|
||||
Count: jsonInt(r["count"]),
|
||||
AvgChars: jsonFloat(r["avg_chars"]),
|
||||
AvgGenTime: jsonFloat(r["avg_gen_time"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Worker activity.
|
||||
if rows, err := i.query(ctx, "SELECT w, count(DISTINCT i) AS n, max(time) AS last_seen FROM gold_gen GROUP BY w ORDER BY n DESC"); err == nil {
|
||||
for _, r := range rows {
|
||||
gs.Workers = append(gs.Workers, lab.WorkerStat{
|
||||
Worker: jsonStr(r["w"]),
|
||||
Count: jsonInt(r["n"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
i.store.SetGoldenSet(gs)
|
||||
|
||||
// Dataset stats (from DuckDB, pushed as dataset_stats measurement).
|
||||
ds := lab.DatasetSummary{UpdatedAt: time.Now()}
|
||||
if rows, err := i.query(ctx, "SELECT table, rows FROM dataset_stats ORDER BY rows DESC"); err == nil && len(rows) > 0 {
|
||||
ds.Available = true
|
||||
for _, r := range rows {
|
||||
ds.Tables = append(ds.Tables, lab.DatasetTable{
|
||||
Name: jsonStr(r["table"]),
|
||||
Rows: jsonInt(r["rows"]),
|
||||
})
|
||||
}
|
||||
}
|
||||
i.store.SetDataset(ds)
|
||||
|
||||
i.store.SetError("influxdb", nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *InfluxDB) query(ctx context.Context, sql string) ([]map[string]any, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
body := fmt.Sprintf(`{"db":%q,"q":%q}`, i.cfg.InfluxDB, sql)
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", i.cfg.InfluxURL+"/api/v3/query_sql", strings.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+i.cfg.InfluxToken)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
i.store.SetError("influxdb", err)
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
err := fmt.Errorf("influxdb query returned %d", resp.StatusCode)
|
||||
i.store.SetError("influxdb", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var rows []map[string]any
|
||||
if err := json.NewDecoder(resp.Body).Decode(&rows); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rows, nil
|
||||
}
|
||||
|
||||
// JSON value helpers — InfluxDB 3 returns typed JSON values.
|
||||
|
||||
func jsonStr(v any) string {
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
return fmt.Sprintf("%v", v)
|
||||
}
|
||||
|
||||
func jsonFloat(v any) float64 {
|
||||
if v == nil {
|
||||
return 0
|
||||
}
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return n
|
||||
case json.Number:
|
||||
f, _ := n.Float64()
|
||||
return f
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func jsonInt(v any) int {
|
||||
if v == nil {
|
||||
return 0
|
||||
}
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return int(n)
|
||||
case json.Number:
|
||||
i, _ := n.Int64()
|
||||
return int(i)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
104
pkg/lab/collector/prometheus.go
Normal file
104
pkg/lab/collector/prometheus.go
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type Prometheus struct {
|
||||
url string
|
||||
store *lab.Store
|
||||
}
|
||||
|
||||
func NewPrometheus(promURL string, s *lab.Store) *Prometheus {
|
||||
return &Prometheus{url: promURL, store: s}
|
||||
}
|
||||
|
||||
func (p *Prometheus) Name() string { return "prometheus" }
|
||||
|
||||
func (p *Prometheus) Collect(ctx context.Context) error {
|
||||
// Machine stats are handled by the system collector (direct /proc + SSH).
|
||||
// This collector only queries agent metrics from Prometheus.
|
||||
agents := lab.AgentSummary{}
|
||||
if v, err := p.query(ctx, "agents_registered_total"); err == nil && v != nil {
|
||||
agents.RegisteredTotal = int(*v)
|
||||
agents.Available = true
|
||||
}
|
||||
if v, err := p.query(ctx, "agents_queue_pending"); err == nil && v != nil {
|
||||
agents.QueuePending = int(*v)
|
||||
}
|
||||
if v, err := p.query(ctx, "agents_tasks_completed_total"); err == nil && v != nil {
|
||||
agents.TasksCompleted = int(*v)
|
||||
}
|
||||
if v, err := p.query(ctx, "agents_tasks_failed_total"); err == nil && v != nil {
|
||||
agents.TasksFailed = int(*v)
|
||||
}
|
||||
if v, err := p.query(ctx, "agents_capabilities_count"); err == nil && v != nil {
|
||||
agents.Capabilities = int(*v)
|
||||
}
|
||||
if v, err := p.query(ctx, "agents_heartbeat_age_seconds"); err == nil && v != nil {
|
||||
agents.HeartbeatAge = *v
|
||||
}
|
||||
if v, err := p.query(ctx, "agents_exporter_up"); err == nil && v != nil {
|
||||
agents.ExporterUp = *v > 0
|
||||
}
|
||||
|
||||
p.store.SetAgents(agents)
|
||||
p.store.SetError("prometheus", nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
type promResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
ResultType string `json:"resultType"`
|
||||
Result []struct {
|
||||
Value [2]json.RawMessage `json:"value"`
|
||||
} `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (p *Prometheus) query(ctx context.Context, promql string) (*float64, error) {
|
||||
u := fmt.Sprintf("%s/api/v1/query?query=%s", p.url, url.QueryEscape(promql))
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: 5 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
p.store.SetError("prometheus", err)
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var pr promResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&pr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if pr.Status != "success" || len(pr.Data.Result) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var valStr string
|
||||
if err := json.Unmarshal(pr.Data.Result[0].Value[1], &valStr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
val, err := strconv.ParseFloat(valStr, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &val, nil
|
||||
}
|
||||
107
pkg/lab/collector/services.go
Normal file
107
pkg/lab/collector/services.go
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type Services struct {
|
||||
store *lab.Store
|
||||
services []lab.Service
|
||||
}
|
||||
|
||||
func NewServices(s *lab.Store) *Services {
|
||||
return &Services{
|
||||
store: s,
|
||||
services: []lab.Service{
|
||||
// Source Control
|
||||
{Name: "Forgejo (primary)", URL: "https://forge.lthn.io", Category: "Source Control", Machine: "m3-ultra", Icon: "git"},
|
||||
{Name: "Forgejo (dev)", URL: "https://dev.lthn.io", Category: "Source Control", Machine: "snider-linux", Icon: "git"},
|
||||
{Name: "Forgejo (QA)", URL: "https://qa.lthn.io", Category: "Source Control", Machine: "gateway", Icon: "git"},
|
||||
{Name: "Forgejo (devops)", URL: "https://devops.lthn.io", Category: "Source Control", Machine: "gateway", Icon: "git"},
|
||||
{Name: "Forgejo Pages", URL: "https://host-uk.pages.lthn.io", Category: "Source Control", Machine: "snider-linux", Icon: "web"},
|
||||
|
||||
// CI/CD
|
||||
{Name: "Woodpecker CI", URL: "https://ci.lthn.io", Category: "CI/CD", Machine: "snider-linux", Icon: "ci"},
|
||||
|
||||
// Monitoring
|
||||
{Name: "Grafana", URL: "https://grafana.lthn.io", Category: "Monitoring", Machine: "snider-linux", Icon: "chart"},
|
||||
{Name: "Traefik Dashboard", URL: "https://traefik.lthn.io", Category: "Monitoring", Machine: "snider-linux", Icon: "route"},
|
||||
{Name: "Portainer", URL: "https://portainer.lthn.io", Category: "Monitoring", Machine: "snider-linux", Icon: "container"},
|
||||
{Name: "MantisBT", URL: "https://bugs.lthn.io", Category: "Monitoring", Machine: "snider-linux", Icon: "bug"},
|
||||
|
||||
// AI & Models
|
||||
{Name: "Ollama API", URL: "https://ollama.lthn.io", Category: "AI", Machine: "snider-linux", Icon: "ai"},
|
||||
{Name: "AnythingLLM", URL: "https://anythingllm.lthn.io", Category: "AI", Machine: "snider-linux", Icon: "ai"},
|
||||
{Name: "Argilla", URL: "https://argilla.lthn.io", Category: "AI", Machine: "snider-linux", Icon: "data"},
|
||||
{Name: "Lab Helper API", URL: "http://10.69.69.108:9800", Category: "AI", Machine: "m3-ultra", Icon: "api"},
|
||||
{Name: "Lab Dashboard", URL: "https://lab.lthn.io", Category: "AI", Machine: "snider-linux", Icon: "web"},
|
||||
|
||||
// Media & Content
|
||||
{Name: "Jellyfin", URL: "https://media.lthn.io", Category: "Media", Machine: "m3-ultra", Icon: "media"},
|
||||
{Name: "Immich Photos", URL: "https://photos.lthn.io", Category: "Media", Machine: "m3-ultra", Icon: "photo"},
|
||||
|
||||
// Social
|
||||
{Name: "Mastodon", URL: "https://fedi.lthn.io", Category: "Social", Machine: "snider-linux", Icon: "social"},
|
||||
{Name: "Mixpost", URL: "https://social.lthn.io", Category: "Social", Machine: "snider-linux", Icon: "social"},
|
||||
|
||||
// i18n
|
||||
{Name: "Weblate", URL: "https://i18n.lthn.io", Category: "Translation", Machine: "snider-linux", Icon: "i18n"},
|
||||
|
||||
// Infra
|
||||
{Name: "dAppCo.re CDN", URL: "https://dappco.re", Category: "Infrastructure", Machine: "snider-linux", Icon: "cdn"},
|
||||
{Name: "lthn.ai Landing", URL: "https://lthn.ai", Category: "Infrastructure", Machine: "snider-linux", Icon: "web"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Services) Name() string { return "services" }
|
||||
|
||||
func (s *Services) Collect(ctx context.Context) error {
|
||||
client := &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse // don't follow redirects
|
||||
},
|
||||
}
|
||||
|
||||
for i := range s.services {
|
||||
s.services[i].Status = checkHealth(ctx, client, s.services[i].URL)
|
||||
}
|
||||
|
||||
result := make([]lab.Service, len(s.services))
|
||||
copy(result, s.services)
|
||||
s.store.SetServices(result)
|
||||
s.store.SetError("services", nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkHealth(ctx context.Context, client *http.Client, url string) string {
|
||||
// Try HEAD first, fall back to GET if HEAD fails.
|
||||
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
|
||||
if err != nil {
|
||||
return "unavailable"
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
// Retry with GET (some servers reject HEAD).
|
||||
req2, _ := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if req2 == nil {
|
||||
return "unavailable"
|
||||
}
|
||||
resp, err = client.Do(req2)
|
||||
if err != nil {
|
||||
return "unavailable"
|
||||
}
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 500 {
|
||||
return "ok"
|
||||
}
|
||||
return "unavailable"
|
||||
}
|
||||
374
pkg/lab/collector/system.go
Normal file
374
pkg/lab/collector/system.go
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type System struct {
|
||||
store *lab.Store
|
||||
cfg *lab.Config
|
||||
}
|
||||
|
||||
func NewSystem(cfg *lab.Config, s *lab.Store) *System {
|
||||
return &System{store: s, cfg: cfg}
|
||||
}
|
||||
|
||||
func (s *System) Name() string { return "system" }
|
||||
|
||||
func (s *System) Collect(ctx context.Context) error {
|
||||
var machines []lab.Machine
|
||||
|
||||
// Collect local machine stats.
|
||||
local := s.collectLocal()
|
||||
machines = append(machines, local)
|
||||
|
||||
// Collect M3 Ultra stats via SSH.
|
||||
if s.cfg.M3Host != "" {
|
||||
m3 := s.collectM3(ctx)
|
||||
machines = append(machines, m3)
|
||||
}
|
||||
|
||||
s.store.SetMachines(machines)
|
||||
s.store.SetError("system", nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Local (snider-linux)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// procPath returns the path to a proc file, preferring /host/proc (Docker mount) over /proc.
|
||||
func procPath(name string) string {
|
||||
hp := "/host/proc/" + name
|
||||
if _, err := os.Stat(hp); err == nil {
|
||||
return hp
|
||||
}
|
||||
return "/proc/" + name
|
||||
}
|
||||
|
||||
func (s *System) collectLocal() lab.Machine {
|
||||
m := lab.Machine{
|
||||
Name: "snider-linux",
|
||||
Host: "localhost",
|
||||
Status: lab.StatusOK,
|
||||
CPUCores: runtime.NumCPU(),
|
||||
}
|
||||
|
||||
// Load average
|
||||
if data, err := os.ReadFile(procPath("loadavg")); err == nil {
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) > 0 {
|
||||
m.Load1, _ = strconv.ParseFloat(fields[0], 64)
|
||||
}
|
||||
}
|
||||
|
||||
// Memory from host /proc/meminfo
|
||||
if f, err := os.Open(procPath("meminfo")); err == nil {
|
||||
defer f.Close()
|
||||
var memTotal, memAvail float64
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "MemTotal:") {
|
||||
memTotal = parseMemInfoKB(line)
|
||||
} else if strings.HasPrefix(line, "MemAvailable:") {
|
||||
memAvail = parseMemInfoKB(line)
|
||||
}
|
||||
}
|
||||
if memTotal > 0 {
|
||||
m.MemTotalGB = memTotal / 1024 / 1024
|
||||
m.MemUsedGB = (memTotal - memAvail) / 1024 / 1024
|
||||
m.MemUsedPct = (1.0 - memAvail/memTotal) * 100
|
||||
}
|
||||
}
|
||||
|
||||
// Disk — use host root mount if available
|
||||
diskTarget := "/"
|
||||
if _, err := os.Stat("/host/root"); err == nil {
|
||||
diskTarget = "/host/root"
|
||||
}
|
||||
if out, err := exec.Command("df", "-BG", diskTarget).Output(); err == nil {
|
||||
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
|
||||
if len(lines) >= 2 {
|
||||
fields := strings.Fields(lines[1])
|
||||
if len(fields) >= 5 {
|
||||
m.DiskTotalGB = parseGB(fields[1])
|
||||
m.DiskUsedGB = parseGB(fields[2])
|
||||
pct := strings.TrimSuffix(fields[4], "%")
|
||||
m.DiskUsedPct, _ = strconv.ParseFloat(pct, 64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GPU via sysfs (works inside Docker with /host/drm mount)
|
||||
s.collectGPUSysfs(&m)
|
||||
|
||||
// Uptime
|
||||
if data, err := os.ReadFile(procPath("uptime")); err == nil {
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) > 0 {
|
||||
if secs, err := strconv.ParseFloat(fields[0], 64); err == nil {
|
||||
m.Uptime = formatDuration(time.Duration(secs * float64(time.Second)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func (s *System) collectGPUSysfs(m *lab.Machine) {
|
||||
// Try sysfs paths: /host/sys (Docker mount of /sys) or /sys (native)
|
||||
drmBase := "/host/sys/class/drm"
|
||||
if _, err := os.Stat(drmBase); err != nil {
|
||||
drmBase = "/sys/class/drm"
|
||||
}
|
||||
|
||||
// Find the discrete GPU (largest VRAM) — card0 may be integrated
|
||||
gpuDev := ""
|
||||
var bestTotal float64
|
||||
for _, card := range []string{"card0", "card1", "card2"} {
|
||||
p := fmt.Sprintf("%s/%s/device/mem_info_vram_total", drmBase, card)
|
||||
if data, err := os.ReadFile(p); err == nil {
|
||||
val, _ := strconv.ParseFloat(strings.TrimSpace(string(data)), 64)
|
||||
if val > bestTotal {
|
||||
bestTotal = val
|
||||
gpuDev = fmt.Sprintf("%s/%s/device", drmBase, card)
|
||||
}
|
||||
}
|
||||
}
|
||||
if gpuDev == "" {
|
||||
return
|
||||
}
|
||||
|
||||
m.GPUName = "AMD Radeon RX 7800 XT"
|
||||
m.GPUVRAMTotal = bestTotal / 1024 / 1024 / 1024
|
||||
|
||||
if data, err := os.ReadFile(gpuDev + "/mem_info_vram_used"); err == nil {
|
||||
val, _ := strconv.ParseFloat(strings.TrimSpace(string(data)), 64)
|
||||
m.GPUVRAMUsed = val / 1024 / 1024 / 1024
|
||||
}
|
||||
if m.GPUVRAMTotal > 0 {
|
||||
m.GPUVRAMPct = m.GPUVRAMUsed / m.GPUVRAMTotal * 100
|
||||
}
|
||||
|
||||
// Temperature — find hwmon under the device
|
||||
matches, _ := filepath.Glob(gpuDev + "/hwmon/hwmon*/temp1_input")
|
||||
if len(matches) > 0 {
|
||||
if data, err := os.ReadFile(matches[0]); err == nil {
|
||||
val, _ := strconv.ParseFloat(strings.TrimSpace(string(data)), 64)
|
||||
m.GPUTemp = int(val / 1000) // millidegrees to degrees
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// M3 Ultra (via SSH)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func (s *System) collectM3(ctx context.Context) lab.Machine {
|
||||
m := lab.Machine{
|
||||
Name: "m3-ultra",
|
||||
Host: s.cfg.M3Host,
|
||||
Status: lab.StatusUnavailable,
|
||||
GPUName: "Apple M3 Ultra (80 cores)",
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, "ssh",
|
||||
"-o", "ConnectTimeout=5",
|
||||
"-o", "BatchMode=yes",
|
||||
"-i", s.cfg.M3SSHKey,
|
||||
fmt.Sprintf("%s@%s", s.cfg.M3User, s.cfg.M3Host),
|
||||
"printf '===CPU===\\n'; sysctl -n hw.ncpu; sysctl -n vm.loadavg; printf '===MEM===\\n'; sysctl -n hw.memsize; vm_stat; printf '===DISK===\\n'; df -k /; printf '===UPTIME===\\n'; uptime",
|
||||
)
|
||||
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return m
|
||||
}
|
||||
|
||||
m.Status = lab.StatusOK
|
||||
s.parseM3Output(&m, string(out))
|
||||
return m
|
||||
}
|
||||
|
||||
func (s *System) parseM3Output(m *lab.Machine, output string) {
|
||||
sections := splitSections(output)
|
||||
|
||||
// CPU
|
||||
if cpu, ok := sections["CPU"]; ok {
|
||||
lines := strings.Split(strings.TrimSpace(cpu), "\n")
|
||||
if len(lines) >= 1 {
|
||||
m.CPUCores, _ = strconv.Atoi(strings.TrimSpace(lines[0]))
|
||||
}
|
||||
if len(lines) >= 2 {
|
||||
// "{ 8.22 4.56 4.00 }"
|
||||
loadStr := strings.Trim(strings.TrimSpace(lines[1]), "{ }")
|
||||
fields := strings.Fields(loadStr)
|
||||
if len(fields) >= 1 {
|
||||
m.Load1, _ = strconv.ParseFloat(fields[0], 64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Memory
|
||||
if mem, ok := sections["MEM"]; ok {
|
||||
lines := strings.Split(strings.TrimSpace(mem), "\n")
|
||||
if len(lines) >= 1 {
|
||||
bytes, _ := strconv.ParseFloat(strings.TrimSpace(lines[0]), 64)
|
||||
m.MemTotalGB = bytes / 1024 / 1024 / 1024
|
||||
}
|
||||
// Parse vm_stat: page size 16384, look for free/active/inactive/wired/speculative/compressor
|
||||
var pageSize float64 = 16384
|
||||
var free, active, inactive, speculative, wired, compressor float64
|
||||
for _, line := range lines[1:] {
|
||||
if strings.Contains(line, "page size of") {
|
||||
// "Mach Virtual Memory Statistics: (page size of 16384 bytes)"
|
||||
for _, word := range strings.Fields(line) {
|
||||
if v, err := strconv.ParseFloat(word, 64); err == nil && v > 1000 {
|
||||
pageSize = v
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
val := parseVMStatLine(line)
|
||||
switch {
|
||||
case strings.HasPrefix(line, "Pages free:"):
|
||||
free = val
|
||||
case strings.HasPrefix(line, "Pages active:"):
|
||||
active = val
|
||||
case strings.HasPrefix(line, "Pages inactive:"):
|
||||
inactive = val
|
||||
case strings.HasPrefix(line, "Pages speculative:"):
|
||||
speculative = val
|
||||
case strings.HasPrefix(line, "Pages wired"):
|
||||
wired = val
|
||||
case strings.HasPrefix(line, "Pages occupied by compressor:"):
|
||||
compressor = val
|
||||
}
|
||||
}
|
||||
usedPages := active + wired + compressor
|
||||
totalPages := free + active + inactive + speculative + wired + compressor
|
||||
if totalPages > 0 && m.MemTotalGB > 0 {
|
||||
m.MemUsedGB = usedPages * pageSize / 1024 / 1024 / 1024
|
||||
m.MemUsedPct = m.MemUsedGB / m.MemTotalGB * 100
|
||||
}
|
||||
}
|
||||
|
||||
// Disk
|
||||
if disk, ok := sections["DISK"]; ok {
|
||||
lines := strings.Split(strings.TrimSpace(disk), "\n")
|
||||
if len(lines) >= 2 {
|
||||
fields := strings.Fields(lines[1])
|
||||
if len(fields) >= 5 {
|
||||
totalKB, _ := strconv.ParseFloat(fields[1], 64)
|
||||
usedKB, _ := strconv.ParseFloat(fields[2], 64)
|
||||
m.DiskTotalGB = totalKB / 1024 / 1024
|
||||
m.DiskUsedGB = usedKB / 1024 / 1024
|
||||
if m.DiskTotalGB > 0 {
|
||||
m.DiskUsedPct = m.DiskUsedGB / m.DiskTotalGB * 100
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Uptime — "13:20 up 3 days, 1:09, 3 users, load averages: ..."
|
||||
if up, ok := sections["UPTIME"]; ok {
|
||||
line := strings.TrimSpace(up)
|
||||
if idx := strings.Index(line, "up "); idx >= 0 {
|
||||
rest := line[idx+3:]
|
||||
// Split on ", " and take parts until we hit one containing "user"
|
||||
parts := strings.Split(rest, ", ")
|
||||
var uptimeParts []string
|
||||
for _, p := range parts {
|
||||
if strings.Contains(p, "user") || strings.Contains(p, "load") {
|
||||
break
|
||||
}
|
||||
uptimeParts = append(uptimeParts, p)
|
||||
}
|
||||
m.Uptime = strings.TrimSpace(strings.Join(uptimeParts, ", "))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func splitSections(output string) map[string]string {
|
||||
sections := make(map[string]string)
|
||||
var current string
|
||||
var buf strings.Builder
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
if strings.HasPrefix(line, "===") && strings.HasSuffix(line, "===") {
|
||||
if current != "" {
|
||||
sections[current] = buf.String()
|
||||
buf.Reset()
|
||||
}
|
||||
current = strings.Trim(line, "=")
|
||||
} else if current != "" {
|
||||
buf.WriteString(line)
|
||||
buf.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
if current != "" {
|
||||
sections[current] = buf.String()
|
||||
}
|
||||
return sections
|
||||
}
|
||||
|
||||
func parseVMStatLine(line string) float64 {
|
||||
// "Pages free: 2266867."
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) < 2 {
|
||||
return 0
|
||||
}
|
||||
val := strings.TrimSpace(strings.TrimSuffix(strings.TrimSpace(parts[1]), "."))
|
||||
f, _ := strconv.ParseFloat(val, 64)
|
||||
return f
|
||||
}
|
||||
|
||||
func parseMemInfoKB(line string) float64 {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 2 {
|
||||
return 0
|
||||
}
|
||||
v, _ := strconv.ParseFloat(fields[1], 64)
|
||||
return v
|
||||
}
|
||||
|
||||
func parseGB(s string) float64 {
|
||||
s = strings.TrimSuffix(s, "G")
|
||||
v, _ := strconv.ParseFloat(s, 64)
|
||||
return v
|
||||
}
|
||||
|
||||
func parseBytesGB(line string) float64 {
|
||||
// "GPU[0] : VRAM Total Memory (B): 17163091968"
|
||||
parts := strings.Split(line, ":")
|
||||
if len(parts) < 3 {
|
||||
return 0
|
||||
}
|
||||
val := strings.TrimSpace(parts[len(parts)-1])
|
||||
bytes, _ := strconv.ParseFloat(val, 64)
|
||||
return bytes / 1024 / 1024 / 1024
|
||||
}
|
||||
|
||||
func formatDuration(d time.Duration) string {
|
||||
days := int(d.Hours()) / 24
|
||||
hours := int(d.Hours()) % 24
|
||||
if days > 0 {
|
||||
return fmt.Sprintf("%dd %dh", days, hours)
|
||||
}
|
||||
return fmt.Sprintf("%dh %dm", hours, int(d.Minutes())%60)
|
||||
}
|
||||
123
pkg/lab/collector/training.go
Normal file
123
pkg/lab/collector/training.go
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type Training struct {
|
||||
cfg *lab.Config
|
||||
store *lab.Store
|
||||
}
|
||||
|
||||
func NewTraining(cfg *lab.Config, s *lab.Store) *Training {
|
||||
return &Training{cfg: cfg, store: s}
|
||||
}
|
||||
|
||||
func (t *Training) Name() string { return "training" }
|
||||
|
||||
func (t *Training) Collect(ctx context.Context) error {
|
||||
summary := lab.TrainingSummary{
|
||||
GoldTarget: 15000,
|
||||
}
|
||||
|
||||
// Fetch from M3 lab-helper API
|
||||
if t.cfg.M3APIURL != "" {
|
||||
t.fetchM3API(ctx, &summary)
|
||||
}
|
||||
|
||||
// Parse local intercept JSONL files
|
||||
interceptDir := t.cfg.TrainingDataDir
|
||||
if interceptDir != "" {
|
||||
count, lastTime := countJSONLFiles(filepath.Join(interceptDir, "command-intercepts"))
|
||||
summary.InterceptCount = count
|
||||
summary.LastIntercept = lastTime
|
||||
}
|
||||
|
||||
// Count QA sessions
|
||||
sessDir := filepath.Join(t.cfg.TrainingDataDir, "qa-epic-verification", "sessions")
|
||||
if entries, err := os.ReadDir(sessDir); err == nil {
|
||||
summary.SessionCount = len(entries)
|
||||
}
|
||||
|
||||
t.store.SetTraining(summary)
|
||||
t.store.SetError("training", nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
type m3TrainingResponse struct {
|
||||
GoldGenerated int `json:"gold_generated"`
|
||||
GoldTarget int `json:"gold_target"`
|
||||
GoldPercent float64 `json:"gold_percent"`
|
||||
SeedsComplete int `json:"seeds_complete"`
|
||||
GGUFCount int `json:"gguf_count"`
|
||||
GGUFFiles []string `json:"gguf_files"`
|
||||
AdapterCount int `json:"adapter_count"`
|
||||
}
|
||||
|
||||
func (t *Training) fetchM3API(ctx context.Context, summary *lab.TrainingSummary) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", t.cfg.M3APIURL+"/api/training", nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
t.store.SetError("m3-api", err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var data m3TrainingResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
summary.GoldGenerated = data.GoldGenerated
|
||||
summary.GoldAvailable = true
|
||||
summary.GoldPercent = data.GoldPercent
|
||||
summary.GGUFCount = data.GGUFCount
|
||||
summary.GGUFFiles = data.GGUFFiles
|
||||
summary.AdapterCount = data.AdapterCount
|
||||
t.store.SetError("m3-api", nil)
|
||||
}
|
||||
|
||||
func countJSONLFiles(dir string) (int, time.Time) {
|
||||
var total int
|
||||
var lastTime time.Time
|
||||
|
||||
files, err := filepath.Glob(filepath.Join(dir, "*.jsonl"))
|
||||
if err != nil {
|
||||
return 0, lastTime
|
||||
}
|
||||
|
||||
for _, f := range files {
|
||||
file, err := os.Open(f)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
total++
|
||||
var ev struct {
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
}
|
||||
if json.Unmarshal(scanner.Bytes(), &ev) == nil && ev.Timestamp.After(lastTime) {
|
||||
lastTime = ev.Timestamp
|
||||
}
|
||||
}
|
||||
file.Close()
|
||||
}
|
||||
|
||||
return total, lastTime
|
||||
}
|
||||
84
pkg/lab/config.go
Normal file
84
pkg/lab/config.go
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
package lab
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
Addr string
|
||||
|
||||
PrometheusURL string
|
||||
PrometheusInterval int
|
||||
|
||||
ForgeURL string
|
||||
ForgeToken string
|
||||
ForgeInterval int
|
||||
|
||||
HFAuthor string
|
||||
HFInterval int
|
||||
|
||||
M3Host string
|
||||
M3User string
|
||||
M3SSHKey string
|
||||
M3APIURL string
|
||||
M3Interval int
|
||||
|
||||
TrainingDataDir string
|
||||
TrainingInterval int
|
||||
|
||||
DockerInterval int
|
||||
|
||||
InfluxURL string
|
||||
InfluxToken string
|
||||
InfluxDB string
|
||||
InfluxInterval int
|
||||
}
|
||||
|
||||
func LoadConfig() *Config {
|
||||
return &Config{
|
||||
Addr: env("ADDR", ":8080"),
|
||||
|
||||
PrometheusURL: env("PROMETHEUS_URL", "http://prometheus:9090"),
|
||||
PrometheusInterval: envInt("PROMETHEUS_INTERVAL", 15),
|
||||
|
||||
ForgeURL: env("FORGE_URL", "https://forge.lthn.io"),
|
||||
ForgeToken: env("FORGE_TOKEN", ""),
|
||||
ForgeInterval: envInt("FORGE_INTERVAL", 60),
|
||||
|
||||
HFAuthor: env("HF_AUTHOR", "lthn"),
|
||||
HFInterval: envInt("HF_INTERVAL", 300),
|
||||
|
||||
M3Host: env("M3_HOST", "10.69.69.108"),
|
||||
M3User: env("M3_USER", "claude"),
|
||||
M3SSHKey: env("M3_SSH_KEY", "/root/.ssh/id_ed25519"),
|
||||
M3APIURL: env("M3_API_URL", "http://10.69.69.108:9800"),
|
||||
M3Interval: envInt("M3_INTERVAL", 30),
|
||||
|
||||
TrainingDataDir: env("TRAINING_DATA_DIR", "/data/training"),
|
||||
TrainingInterval: envInt("TRAINING_INTERVAL", 60),
|
||||
|
||||
DockerInterval: envInt("DOCKER_INTERVAL", 30),
|
||||
|
||||
InfluxURL: env("INFLUX_URL", "http://localhost:8181"),
|
||||
InfluxToken: env("INFLUX_TOKEN", ""),
|
||||
InfluxDB: env("INFLUX_DB", "training"),
|
||||
InfluxInterval: envInt("INFLUX_INTERVAL", 60),
|
||||
}
|
||||
}
|
||||
|
||||
func env(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func envInt(key string, fallback int) int {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
65
pkg/lab/handler/api.go
Normal file
65
pkg/lab/handler/api.go
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
type APIHandler struct {
|
||||
store *lab.Store
|
||||
}
|
||||
|
||||
func NewAPIHandler(s *lab.Store) *APIHandler {
|
||||
return &APIHandler{store: s}
|
||||
}
|
||||
|
||||
type apiResponse struct {
|
||||
Status string `json:"status"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Data any `json:"data"`
|
||||
}
|
||||
|
||||
func (h *APIHandler) writeJSON(w http.ResponseWriter, data any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(apiResponse{
|
||||
Status: "ok",
|
||||
UpdatedAt: time.Now(),
|
||||
Data: data,
|
||||
})
|
||||
}
|
||||
|
||||
func (h *APIHandler) Status(w http.ResponseWriter, r *http.Request) {
|
||||
h.writeJSON(w, h.store.Overview())
|
||||
}
|
||||
|
||||
func (h *APIHandler) Models(w http.ResponseWriter, r *http.Request) {
|
||||
h.writeJSON(w, h.store.GetModels())
|
||||
}
|
||||
|
||||
func (h *APIHandler) Training(w http.ResponseWriter, r *http.Request) {
|
||||
h.writeJSON(w, h.store.GetTraining())
|
||||
}
|
||||
|
||||
func (h *APIHandler) Agents(w http.ResponseWriter, r *http.Request) {
|
||||
h.writeJSON(w, h.store.GetAgents())
|
||||
}
|
||||
|
||||
func (h *APIHandler) Services(w http.ResponseWriter, r *http.Request) {
|
||||
h.writeJSON(w, h.store.GetServices())
|
||||
}
|
||||
|
||||
func (h *APIHandler) GoldenSet(w http.ResponseWriter, r *http.Request) {
|
||||
h.writeJSON(w, h.store.GetGoldenSet())
|
||||
}
|
||||
|
||||
func (h *APIHandler) Runs(w http.ResponseWriter, r *http.Request) {
|
||||
h.writeJSON(w, h.store.GetBenchmarks())
|
||||
}
|
||||
|
||||
func (h *APIHandler) Health(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
|
||||
}
|
||||
626
pkg/lab/handler/chart.go
Normal file
626
pkg/lab/handler/chart.go
Normal file
|
|
@ -0,0 +1,626 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
const (
|
||||
chartW = 760
|
||||
chartH = 280
|
||||
marginTop = 25
|
||||
marginRight = 20
|
||||
marginBot = 35
|
||||
marginLeft = 55
|
||||
plotW = chartW - marginLeft - marginRight
|
||||
plotH = chartH - marginTop - marginBot
|
||||
)
|
||||
|
||||
var dimensionColors = map[string]string{
|
||||
"ccp_compliance": "#f87171",
|
||||
"truth_telling": "#4ade80",
|
||||
"engagement": "#fbbf24",
|
||||
"axiom_integration": "#60a5fa",
|
||||
"sovereignty_reasoning": "#c084fc",
|
||||
"emotional_register": "#fb923c",
|
||||
}
|
||||
|
||||
func getDimColor(dim string) string {
|
||||
if c, ok := dimensionColors[dim]; ok {
|
||||
return c
|
||||
}
|
||||
return "#8888a0"
|
||||
}
|
||||
|
||||
// LossChart generates an SVG line chart for training loss data.
|
||||
func LossChart(points []lab.LossPoint) template.HTML {
|
||||
if len(points) == 0 {
|
||||
return template.HTML(`<div class="empty">No training loss data</div>`)
|
||||
}
|
||||
|
||||
// Separate val and train loss.
|
||||
var valPts, trainPts []lab.LossPoint
|
||||
for _, p := range points {
|
||||
switch p.LossType {
|
||||
case "val":
|
||||
valPts = append(valPts, p)
|
||||
case "train":
|
||||
trainPts = append(trainPts, p)
|
||||
}
|
||||
}
|
||||
|
||||
// Find data bounds.
|
||||
allPts := append(valPts, trainPts...)
|
||||
xMin, xMax := float64(allPts[0].Iteration), float64(allPts[0].Iteration)
|
||||
yMin, yMax := allPts[0].Loss, allPts[0].Loss
|
||||
for _, p := range allPts {
|
||||
x := float64(p.Iteration)
|
||||
if x < xMin {
|
||||
xMin = x
|
||||
}
|
||||
if x > xMax {
|
||||
xMax = x
|
||||
}
|
||||
if p.Loss < yMin {
|
||||
yMin = p.Loss
|
||||
}
|
||||
if p.Loss > yMax {
|
||||
yMax = p.Loss
|
||||
}
|
||||
}
|
||||
|
||||
// Add padding to Y range.
|
||||
yRange := yMax - yMin
|
||||
if yRange < 0.1 {
|
||||
yRange = 0.1
|
||||
}
|
||||
yMin = yMin - yRange*0.1
|
||||
yMax = yMax + yRange*0.1
|
||||
if xMax == xMin {
|
||||
xMax = xMin + 1
|
||||
}
|
||||
|
||||
scaleX := func(v float64) float64 { return marginLeft + (v-xMin)/(xMax-xMin)*plotW }
|
||||
scaleY := func(v float64) float64 { return marginTop + (1-(v-yMin)/(yMax-yMin))*plotH }
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString(fmt.Sprintf(`<svg viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" style="width:100%%;max-width:%dpx">`, chartW, chartH, chartW))
|
||||
sb.WriteString(fmt.Sprintf(`<rect width="%d" height="%d" fill="#12121a" rx="8"/>`, chartW, chartH))
|
||||
|
||||
// Grid lines.
|
||||
nGridY := 5
|
||||
for i := 0; i <= nGridY; i++ {
|
||||
y := marginTop + float64(i)*plotH/float64(nGridY)
|
||||
val := yMax - float64(i)*(yMax-yMin)/float64(nGridY)
|
||||
sb.WriteString(fmt.Sprintf(`<line x1="%d" y1="%.0f" x2="%d" y2="%.0f" stroke="#1e1e2e" stroke-width="1"/>`, marginLeft, y, chartW-marginRight, y))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%d" y="%.0f" fill="#8888a0" font-size="10" text-anchor="end" dominant-baseline="middle">%.2f</text>`, marginLeft-6, y, val))
|
||||
}
|
||||
|
||||
// X axis labels.
|
||||
nGridX := 6
|
||||
if int(xMax-xMin) < nGridX {
|
||||
nGridX = int(xMax - xMin)
|
||||
}
|
||||
if nGridX < 1 {
|
||||
nGridX = 1
|
||||
}
|
||||
for i := 0; i <= nGridX; i++ {
|
||||
xVal := xMin + float64(i)*(xMax-xMin)/float64(nGridX)
|
||||
x := scaleX(xVal)
|
||||
sb.WriteString(fmt.Sprintf(`<line x1="%.0f" y1="%d" x2="%.0f" y2="%d" stroke="#1e1e2e" stroke-width="1"/>`, x, marginTop, x, marginTop+plotH))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%.0f" y="%d" fill="#8888a0" font-size="10" text-anchor="middle">%d</text>`, x, chartH-8, int(xVal)))
|
||||
}
|
||||
|
||||
// Draw train loss line (dimmed).
|
||||
if len(trainPts) > 1 {
|
||||
sort.Slice(trainPts, func(i, j int) bool { return trainPts[i].Iteration < trainPts[j].Iteration })
|
||||
sb.WriteString(`<polyline points="`)
|
||||
for i, p := range trainPts {
|
||||
if i > 0 {
|
||||
sb.WriteString(" ")
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("%.1f,%.1f", scaleX(float64(p.Iteration)), scaleY(p.Loss)))
|
||||
}
|
||||
sb.WriteString(`" fill="none" stroke="#5a4fd0" stroke-width="1.5" opacity="0.5"/>`)
|
||||
for _, p := range trainPts {
|
||||
sb.WriteString(fmt.Sprintf(`<circle cx="%.1f" cy="%.1f" r="2.5" fill="#5a4fd0" opacity="0.5"/>`, scaleX(float64(p.Iteration)), scaleY(p.Loss)))
|
||||
}
|
||||
}
|
||||
|
||||
// Draw val loss line (accent).
|
||||
if len(valPts) > 1 {
|
||||
sort.Slice(valPts, func(i, j int) bool { return valPts[i].Iteration < valPts[j].Iteration })
|
||||
sb.WriteString(`<polyline points="`)
|
||||
for i, p := range valPts {
|
||||
if i > 0 {
|
||||
sb.WriteString(" ")
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("%.1f,%.1f", scaleX(float64(p.Iteration)), scaleY(p.Loss)))
|
||||
}
|
||||
sb.WriteString(`" fill="none" stroke="#7c6ff0" stroke-width="2.5"/>`)
|
||||
for _, p := range valPts {
|
||||
sb.WriteString(fmt.Sprintf(`<circle cx="%.1f" cy="%.1f" r="3.5" fill="#7c6ff0"/>`, scaleX(float64(p.Iteration)), scaleY(p.Loss)))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%.1f" y="%.1f" fill="#e0e0e8" font-size="9" text-anchor="middle">%.2f</text>`, scaleX(float64(p.Iteration)), scaleY(p.Loss)-8, p.Loss))
|
||||
}
|
||||
}
|
||||
|
||||
// Legend.
|
||||
sb.WriteString(fmt.Sprintf(`<circle cx="%d" cy="12" r="4" fill="#7c6ff0"/>`, marginLeft+10))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%d" y="12" fill="#8888a0" font-size="10" dominant-baseline="middle">Val Loss</text>`, marginLeft+18))
|
||||
sb.WriteString(fmt.Sprintf(`<circle cx="%d" cy="12" r="4" fill="#5a4fd0" opacity="0.5"/>`, marginLeft+85))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%d" y="12" fill="#8888a0" font-size="10" dominant-baseline="middle">Train Loss</text>`, marginLeft+93))
|
||||
|
||||
sb.WriteString("</svg>")
|
||||
return template.HTML(sb.String())
|
||||
}
|
||||
|
||||
// ContentChart generates an SVG multi-line chart for content scores by dimension.
|
||||
func ContentChart(points []lab.ContentPoint) template.HTML {
|
||||
if len(points) == 0 {
|
||||
return template.HTML(`<div class="empty">No content score data</div>`)
|
||||
}
|
||||
|
||||
// Group by dimension, sorted by iteration. Only use kernel points for cleaner view.
|
||||
dims := map[string][]lab.ContentPoint{}
|
||||
for _, p := range points {
|
||||
if !p.HasKernel && !strings.Contains(p.Label, "naked") {
|
||||
continue
|
||||
}
|
||||
dims[p.Dimension] = append(dims[p.Dimension], p)
|
||||
}
|
||||
// If no kernel points, use all.
|
||||
if len(dims) == 0 {
|
||||
for _, p := range points {
|
||||
dims[p.Dimension] = append(dims[p.Dimension], p)
|
||||
}
|
||||
}
|
||||
|
||||
// Find unique iterations for X axis.
|
||||
iterSet := map[int]bool{}
|
||||
for _, pts := range dims {
|
||||
for _, p := range pts {
|
||||
iterSet[p.Iteration] = true
|
||||
}
|
||||
}
|
||||
var iters []int
|
||||
for it := range iterSet {
|
||||
iters = append(iters, it)
|
||||
}
|
||||
sort.Ints(iters)
|
||||
|
||||
if len(iters) == 0 {
|
||||
return template.HTML(`<div class="empty">No iteration data</div>`)
|
||||
}
|
||||
|
||||
xMin, xMax := float64(iters[0]), float64(iters[len(iters)-1])
|
||||
if xMax == xMin {
|
||||
xMax = xMin + 1
|
||||
}
|
||||
yMin, yMax := 0.0, 10.0 // Content scores are 0-10.
|
||||
|
||||
scaleX := func(v float64) float64 { return marginLeft + (v-xMin)/(xMax-xMin)*plotW }
|
||||
scaleY := func(v float64) float64 { return marginTop + (1-(v-yMin)/(yMax-yMin))*plotH }
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString(fmt.Sprintf(`<svg viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" style="width:100%%;max-width:%dpx">`, chartW, chartH, chartW))
|
||||
sb.WriteString(fmt.Sprintf(`<rect width="%d" height="%d" fill="#12121a" rx="8"/>`, chartW, chartH))
|
||||
|
||||
// Grid.
|
||||
for i := 0; i <= 5; i++ {
|
||||
y := marginTop + float64(i)*plotH/5
|
||||
val := yMax - float64(i)*(yMax-yMin)/5
|
||||
sb.WriteString(fmt.Sprintf(`<line x1="%d" y1="%.0f" x2="%d" y2="%.0f" stroke="#1e1e2e"/>`, marginLeft, y, chartW-marginRight, y))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%d" y="%.0f" fill="#8888a0" font-size="10" text-anchor="end" dominant-baseline="middle">%.0f</text>`, marginLeft-6, y, val))
|
||||
}
|
||||
|
||||
// X axis.
|
||||
for _, it := range iters {
|
||||
x := scaleX(float64(it))
|
||||
sb.WriteString(fmt.Sprintf(`<line x1="%.0f" y1="%d" x2="%.0f" y2="%d" stroke="#1e1e2e"/>`, x, marginTop, x, marginTop+plotH))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%.0f" y="%d" fill="#8888a0" font-size="9" text-anchor="middle">@%d</text>`, x, chartH-8, it))
|
||||
}
|
||||
|
||||
// Draw a line per dimension.
|
||||
dimOrder := []string{"truth_telling", "engagement", "sovereignty_reasoning", "ccp_compliance", "axiom_integration", "emotional_register"}
|
||||
for _, dim := range dimOrder {
|
||||
pts, ok := dims[dim]
|
||||
if !ok || len(pts) < 2 {
|
||||
continue
|
||||
}
|
||||
sort.Slice(pts, func(i, j int) bool { return pts[i].Iteration < pts[j].Iteration })
|
||||
|
||||
// Average duplicate iterations.
|
||||
averaged := averageByIteration(pts)
|
||||
color := getDimColor(dim)
|
||||
|
||||
sb.WriteString(fmt.Sprintf(`<polyline points="`))
|
||||
for i, p := range averaged {
|
||||
if i > 0 {
|
||||
sb.WriteString(" ")
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("%.1f,%.1f", scaleX(float64(p.Iteration)), scaleY(p.Score)))
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf(`" fill="none" stroke="%s" stroke-width="2" opacity="0.8"/>`, color))
|
||||
|
||||
for _, p := range averaged {
|
||||
cx := scaleX(float64(p.Iteration))
|
||||
cy := scaleY(p.Score)
|
||||
sb.WriteString(fmt.Sprintf(`<circle cx="%.1f" cy="%.1f" r="3" fill="%s"/>`, cx, cy, color))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%.1f" y="%.1f" fill="%s" font-size="8" text-anchor="middle" font-weight="600">%.1f</text>`, cx, cy-6, color, p.Score))
|
||||
}
|
||||
}
|
||||
|
||||
// Legend at top.
|
||||
lx := marginLeft + 5
|
||||
for _, dim := range dimOrder {
|
||||
if _, ok := dims[dim]; !ok {
|
||||
continue
|
||||
}
|
||||
color := getDimColor(dim)
|
||||
label := strings.ReplaceAll(dim, "_", " ")
|
||||
sb.WriteString(fmt.Sprintf(`<circle cx="%d" cy="12" r="4" fill="%s"/>`, lx, color))
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%d" y="12" fill="#8888a0" font-size="9" dominant-baseline="middle">%s</text>`, lx+7, label))
|
||||
lx += len(label)*6 + 20
|
||||
}
|
||||
|
||||
sb.WriteString("</svg>")
|
||||
return template.HTML(sb.String())
|
||||
}
|
||||
|
||||
// CapabilityChart generates an SVG horizontal bar chart for capability scores.
|
||||
func CapabilityChart(points []lab.CapabilityPoint) template.HTML {
|
||||
if len(points) == 0 {
|
||||
return template.HTML(`<div class="empty">No capability score data</div>`)
|
||||
}
|
||||
|
||||
// Get overall scores only, sorted by iteration.
|
||||
var overall []lab.CapabilityPoint
|
||||
for _, p := range points {
|
||||
if p.Category == "overall" {
|
||||
overall = append(overall, p)
|
||||
}
|
||||
}
|
||||
sort.Slice(overall, func(i, j int) bool { return overall[i].Iteration < overall[j].Iteration })
|
||||
|
||||
if len(overall) == 0 {
|
||||
return template.HTML(`<div class="empty">No overall capability data</div>`)
|
||||
}
|
||||
|
||||
barH := 32
|
||||
gap := 8
|
||||
labelW := 120
|
||||
svgH := len(overall)*(barH+gap) + 40
|
||||
barMaxW := chartW - labelW - 80
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString(fmt.Sprintf(`<svg viewBox="0 0 %d %d" xmlns="http://www.w3.org/2000/svg" style="width:100%%;max-width:%dpx">`, chartW, svgH, chartW))
|
||||
sb.WriteString(fmt.Sprintf(`<rect width="%d" height="%d" fill="#12121a" rx="8"/>`, chartW, svgH))
|
||||
|
||||
for i, p := range overall {
|
||||
y := 20 + i*(barH+gap)
|
||||
barW := p.Accuracy / 100.0 * float64(barMaxW)
|
||||
|
||||
// Color based on accuracy.
|
||||
color := "#f87171" // red
|
||||
if p.Accuracy >= 80 {
|
||||
color = "#4ade80" // green
|
||||
} else if p.Accuracy >= 65 {
|
||||
color = "#fbbf24" // yellow
|
||||
}
|
||||
|
||||
// Label.
|
||||
label := shortLabel(p.Label)
|
||||
sb.WriteString(fmt.Sprintf(`<text x="10" y="%d" fill="#e0e0e8" font-size="11" dominant-baseline="middle">%s</text>`, y+barH/2, label))
|
||||
|
||||
// Bar background.
|
||||
sb.WriteString(fmt.Sprintf(`<rect x="%d" y="%d" width="%d" height="%d" fill="#1e1e2e" rx="4"/>`, labelW, y, barMaxW, barH))
|
||||
|
||||
// Bar fill.
|
||||
sb.WriteString(fmt.Sprintf(`<rect x="%d" y="%d" width="%.0f" height="%d" fill="%s" rx="4" opacity="0.85"/>`, labelW, y, barW, barH, color))
|
||||
|
||||
// Score label.
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%.0f" y="%d" fill="#e0e0e8" font-size="12" font-weight="600" dominant-baseline="middle">%.1f%%</text>`, float64(labelW)+barW+8, y+barH/2, p.Accuracy))
|
||||
|
||||
// Correct/total.
|
||||
sb.WriteString(fmt.Sprintf(`<text x="%d" y="%d" fill="#8888a0" font-size="9" text-anchor="end" dominant-baseline="middle">%d/%d</text>`, chartW-10, y+barH/2, p.Correct, p.Total))
|
||||
}
|
||||
|
||||
sb.WriteString("</svg>")
|
||||
return template.HTML(sb.String())
|
||||
}
|
||||
|
||||
// CategoryBreakdownWithJudge generates an HTML table showing per-category capability scores.
|
||||
// When judge data is available, shows 0-10 float averages. Falls back to binary correct/total.
|
||||
func CategoryBreakdownWithJudge(points []lab.CapabilityPoint, judgePoints []lab.CapabilityJudgePoint) template.HTML {
|
||||
if len(points) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
type key struct{ cat, label string }
|
||||
|
||||
// Binary data (always available).
|
||||
type binaryCell struct {
|
||||
correct, total int
|
||||
accuracy float64
|
||||
}
|
||||
binaryCells := map[key]binaryCell{}
|
||||
catSet := map[string]bool{}
|
||||
var labels []string
|
||||
labelSeen := map[string]bool{}
|
||||
|
||||
for _, p := range points {
|
||||
if p.Category == "overall" {
|
||||
continue
|
||||
}
|
||||
k := key{p.Category, p.Label}
|
||||
c := binaryCells[k]
|
||||
c.correct += p.Correct
|
||||
c.total += p.Total
|
||||
binaryCells[k] = c
|
||||
catSet[p.Category] = true
|
||||
if !labelSeen[p.Label] {
|
||||
labelSeen[p.Label] = true
|
||||
labels = append(labels, p.Label)
|
||||
}
|
||||
}
|
||||
for k, c := range binaryCells {
|
||||
if c.total > 0 {
|
||||
c.accuracy = float64(c.correct) / float64(c.total) * 100
|
||||
}
|
||||
binaryCells[k] = c
|
||||
}
|
||||
|
||||
// Judge data (may be empty -- falls back to binary).
|
||||
type judgeCell struct {
|
||||
sum float64
|
||||
count int
|
||||
}
|
||||
judgeCells := map[key]judgeCell{}
|
||||
hasJudge := len(judgePoints) > 0
|
||||
|
||||
for _, jp := range judgePoints {
|
||||
k := key{jp.Category, jp.Label}
|
||||
c := judgeCells[k]
|
||||
c.sum += jp.Avg
|
||||
c.count++
|
||||
judgeCells[k] = c
|
||||
}
|
||||
|
||||
var cats []string
|
||||
for c := range catSet {
|
||||
cats = append(cats, c)
|
||||
}
|
||||
sort.Strings(cats)
|
||||
|
||||
if len(cats) == 0 || len(labels) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString(`<table><thead><tr><th>Run</th>`)
|
||||
for _, cat := range cats {
|
||||
icon := catIcon(cat)
|
||||
sb.WriteString(fmt.Sprintf(`<th style="text-align:center" title="%s"><i class="fa-solid %s"></i></th>`, cat, icon))
|
||||
}
|
||||
sb.WriteString(`</tr></thead><tbody>`)
|
||||
|
||||
for _, l := range labels {
|
||||
short := shortLabel(l)
|
||||
sb.WriteString(fmt.Sprintf(`<tr><td><code>%s</code></td>`, short))
|
||||
for _, cat := range cats {
|
||||
jc, jok := judgeCells[key{cat, l}]
|
||||
bc, bok := binaryCells[key{cat, l}]
|
||||
|
||||
if hasJudge && jok && jc.count > 0 {
|
||||
// Show judge score (0-10 average).
|
||||
avg := jc.sum / float64(jc.count)
|
||||
color := "var(--red)"
|
||||
if avg >= 7.0 {
|
||||
color = "var(--green)"
|
||||
} else if avg >= 4.0 {
|
||||
color = "var(--yellow)"
|
||||
}
|
||||
passInfo := ""
|
||||
if bok {
|
||||
passInfo = fmt.Sprintf(" (%d/%d pass)", bc.correct, bc.total)
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf(`<td style="color:%s;text-align:center;font-weight:700" title="%s: %.2f/10%s">%.1f</td>`,
|
||||
color, cat, avg, passInfo, avg))
|
||||
} else if bok {
|
||||
// Fall back to binary.
|
||||
icon := "fa-circle-xmark"
|
||||
color := "var(--red)"
|
||||
if bc.accuracy >= 80 {
|
||||
icon = "fa-circle-check"
|
||||
color = "var(--green)"
|
||||
} else if bc.accuracy >= 50 {
|
||||
icon = "fa-triangle-exclamation"
|
||||
color = "var(--yellow)"
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf(`<td style="color:%s;text-align:center" title="%s: %d/%d (%.0f%%)"><i class="fa-solid %s"></i> %d/%d</td>`,
|
||||
color, cat, bc.correct, bc.total, bc.accuracy, icon, bc.correct, bc.total))
|
||||
} else {
|
||||
sb.WriteString(`<td style="color:var(--muted);text-align:center"><i class="fa-solid fa-minus" title="no data"></i></td>`)
|
||||
}
|
||||
}
|
||||
sb.WriteString(`</tr>`)
|
||||
}
|
||||
sb.WriteString(`</tbody></table>`)
|
||||
return template.HTML(sb.String())
|
||||
}
|
||||
|
||||
// catIcon maps capability category names to Font Awesome icons.
|
||||
func catIcon(cat string) string {
|
||||
icons := map[string]string{
|
||||
"algebra": "fa-square-root-variable",
|
||||
"analogy": "fa-right-left",
|
||||
"arithmetic": "fa-calculator",
|
||||
"causal": "fa-diagram-project",
|
||||
"code": "fa-code",
|
||||
"deduction": "fa-magnifying-glass",
|
||||
"geometry": "fa-shapes",
|
||||
"pattern": "fa-grip",
|
||||
"percentages": "fa-percent",
|
||||
"probability": "fa-dice",
|
||||
"puzzles": "fa-puzzle-piece",
|
||||
"sequences": "fa-list-ol",
|
||||
"sets": "fa-circle-nodes",
|
||||
"spatial": "fa-cube",
|
||||
"temporal": "fa-clock",
|
||||
"word": "fa-font",
|
||||
}
|
||||
if ic, ok := icons[cat]; ok {
|
||||
return ic
|
||||
}
|
||||
return "fa-question"
|
||||
}
|
||||
|
||||
// shortLabel compresses run labels for table display.
|
||||
// "base-gemma-3-27b" -> "base-27b", "G12 @0000100" -> "G12 @100"
|
||||
func shortLabel(s string) string {
|
||||
// Strip "gemma-3-" prefix pattern from compound labels
|
||||
s = strings.ReplaceAll(s, "gemma-3-", "")
|
||||
// Collapse leading zeros in iteration numbers: @0000100 -> @100
|
||||
if idx := strings.Index(s, "@"); idx >= 0 {
|
||||
prefix := s[:idx+1]
|
||||
num := strings.TrimLeft(s[idx+1:], "0")
|
||||
if num == "" {
|
||||
num = "0"
|
||||
}
|
||||
s = prefix + num
|
||||
}
|
||||
if len(s) > 18 {
|
||||
s = s[:18]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func averageByIteration(pts []lab.ContentPoint) []lab.ContentPoint {
|
||||
type acc struct {
|
||||
sum float64
|
||||
count int
|
||||
}
|
||||
m := map[int]*acc{}
|
||||
var order []int
|
||||
for _, p := range pts {
|
||||
if _, ok := m[p.Iteration]; !ok {
|
||||
m[p.Iteration] = &acc{}
|
||||
order = append(order, p.Iteration)
|
||||
}
|
||||
m[p.Iteration].sum += p.Score
|
||||
m[p.Iteration].count++
|
||||
}
|
||||
sort.Ints(order)
|
||||
var result []lab.ContentPoint
|
||||
for _, it := range order {
|
||||
a := m[it]
|
||||
result = append(result, lab.ContentPoint{
|
||||
Iteration: it,
|
||||
Score: math.Round(a.sum/float64(a.count)*10) / 10,
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// DomainChart renders a horizontal bar chart of domain counts (top 25).
|
||||
func DomainChart(stats []lab.DomainStat) template.HTML {
|
||||
if len(stats) == 0 {
|
||||
return ""
|
||||
}
|
||||
limit := 25
|
||||
if len(stats) < limit {
|
||||
limit = len(stats)
|
||||
}
|
||||
items := stats[:limit]
|
||||
|
||||
maxCount := 0
|
||||
for _, d := range items {
|
||||
if d.Count > maxCount {
|
||||
maxCount = d.Count
|
||||
}
|
||||
}
|
||||
if maxCount == 0 {
|
||||
maxCount = 1
|
||||
}
|
||||
|
||||
barH := 18
|
||||
gap := 4
|
||||
labelW := 180
|
||||
barAreaW := 540
|
||||
h := len(items)*(barH+gap) + 10
|
||||
w := labelW + barAreaW + 60
|
||||
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, `<svg width="%d" height="%d" xmlns="http://www.w3.org/2000/svg" style="font-family:-apple-system,sans-serif">`, w, h)
|
||||
fmt.Fprintf(&b, `<rect width="%d" height="%d" fill="var(--surface)" rx="4"/>`, w, h)
|
||||
|
||||
for i, d := range items {
|
||||
y := i*(barH+gap) + 5
|
||||
barW := int(float64(d.Count) / float64(maxCount) * float64(barAreaW))
|
||||
if barW < 2 {
|
||||
barW = 2
|
||||
}
|
||||
fmt.Fprintf(&b, `<text x="%d" y="%d" fill="var(--muted)" font-size="11" text-anchor="end" dominant-baseline="middle">%s</text>`,
|
||||
labelW-8, y+barH/2, template.HTMLEscapeString(d.Domain))
|
||||
fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d" fill="var(--accent)" rx="2" opacity="0.8"/>`,
|
||||
labelW, y, barW, barH)
|
||||
fmt.Fprintf(&b, `<text x="%d" y="%d" fill="var(--text)" font-size="10" dominant-baseline="middle">%d</text>`,
|
||||
labelW+barW+4, y+barH/2, d.Count)
|
||||
}
|
||||
|
||||
b.WriteString(`</svg>`)
|
||||
return template.HTML(b.String())
|
||||
}
|
||||
|
||||
// VoiceChart renders a vertical bar chart of voice distribution.
|
||||
func VoiceChart(stats []lab.VoiceStat) template.HTML {
|
||||
if len(stats) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
maxCount := 0
|
||||
for _, v := range stats {
|
||||
if v.Count > maxCount {
|
||||
maxCount = v.Count
|
||||
}
|
||||
}
|
||||
if maxCount == 0 {
|
||||
maxCount = 1
|
||||
}
|
||||
|
||||
barW := 50
|
||||
gap := 8
|
||||
chartHeight := 200
|
||||
labelH := 60
|
||||
topPad := 20
|
||||
w := len(stats)*(barW+gap) + gap + 10
|
||||
h := chartHeight + labelH + topPad
|
||||
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, `<svg width="%d" height="%d" xmlns="http://www.w3.org/2000/svg" style="font-family:-apple-system,sans-serif">`, w, h)
|
||||
fmt.Fprintf(&b, `<rect width="%d" height="%d" fill="var(--surface)" rx="4"/>`, w, h)
|
||||
|
||||
for i, v := range stats {
|
||||
x := i*(barW+gap) + gap + 5
|
||||
barH := int(float64(v.Count) / float64(maxCount) * float64(chartHeight))
|
||||
if barH < 2 {
|
||||
barH = 2
|
||||
}
|
||||
y := topPad + chartHeight - barH
|
||||
|
||||
fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d" fill="var(--green)" rx="2" opacity="0.7"/>`,
|
||||
x, y, barW, barH)
|
||||
fmt.Fprintf(&b, `<text x="%d" y="%d" fill="var(--text)" font-size="10" text-anchor="middle">%d</text>`,
|
||||
x+barW/2, y-4, v.Count)
|
||||
fmt.Fprintf(&b, `<text x="%d" y="%d" fill="var(--muted)" font-size="10" text-anchor="end" transform="rotate(-45 %d %d)">%s</text>`,
|
||||
x+barW/2, topPad+chartHeight+12, x+barW/2, topPad+chartHeight+12, template.HTMLEscapeString(v.Voice))
|
||||
}
|
||||
|
||||
b.WriteString(`</svg>`)
|
||||
return template.HTML(b.String())
|
||||
}
|
||||
0
pkg/lab/handler/static/.gitkeep
Normal file
0
pkg/lab/handler/static/.gitkeep
Normal file
56
pkg/lab/handler/templates/agents.html
Normal file
56
pkg/lab/handler/templates/agents.html
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
{{template "head" "Agents"}}
|
||||
{{template "nav" "agents"}}
|
||||
|
||||
<h2 class="section-title">Agent Metrics</h2>
|
||||
|
||||
{{if .Agents.Available}}
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<h3>Registered Agents</h3>
|
||||
<div class="value">{{.Agents.RegisteredTotal}}</div>
|
||||
<div class="sub">
|
||||
{{if .Agents.ExporterUp}}<span class="badge badge-ok">exporter up</span>
|
||||
{{else}}<span class="badge badge-err">exporter down</span>{{end}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Queue Pending</h3>
|
||||
<div class="value">{{.Agents.QueuePending}}</div>
|
||||
<div class="sub">Tasks waiting for agents</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Tasks Completed</h3>
|
||||
<div class="value" style="color:var(--green)">{{.Agents.TasksCompleted}}</div>
|
||||
<div class="sub">Total successful</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Tasks Failed</h3>
|
||||
<div class="value" style="color:var(--red)">{{.Agents.TasksFailed}}</div>
|
||||
<div class="sub">Total failures</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<h3>Capabilities</h3>
|
||||
<div class="value">{{.Agents.Capabilities}}</div>
|
||||
<div class="sub">Registered capabilities</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Heartbeat Age</h3>
|
||||
<div class="value">{{pct .Agents.HeartbeatAge}}s</div>
|
||||
<div class="sub">Time since last heartbeat</div>
|
||||
</div>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="card empty">
|
||||
<p>Agent metrics not available. The Prometheus agent exporter may be offline.</p>
|
||||
<p style="margin-top:.5rem;font-size:.8125rem;color:var(--muted)">Expected at: <code>localhost:9402/metrics</code></p>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{template "footer"}}
|
||||
115
pkg/lab/handler/templates/dashboard.html
Normal file
115
pkg/lab/handler/templates/dashboard.html
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
{{template "head" "Dashboard"}}
|
||||
{{template "nav" "dashboard"}}
|
||||
|
||||
<style>
|
||||
.stat-row{display:flex;align-items:center;gap:.5rem;margin-top:.5rem}
|
||||
.stat-label{font-size:.6875rem;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;width:2.5rem;flex-shrink:0}
|
||||
.stat-row .progress-bar{flex:1;margin:0;height:6px}
|
||||
.stat-val{font-size:.75rem;color:var(--text);white-space:nowrap;min-width:4.5rem;text-align:right}
|
||||
.stat-row .fill-warn{background:var(--yellow)}
|
||||
.stat-row .fill-crit{background:var(--red)}
|
||||
.machine-card{min-width:280px}
|
||||
.machine-card .sub{margin-top:.5rem}
|
||||
</style>
|
||||
|
||||
<div class="grid">
|
||||
{{range .Machines}}
|
||||
<div class="card machine-card">
|
||||
<h3>{{.Name}}</h3>
|
||||
<div class="value {{statusClass (lower (printf "%s" .Status))}}">
|
||||
<span class="status-dot"></span>
|
||||
<span class="label">{{.Status}}</span>
|
||||
</div>
|
||||
{{if eq (printf "%s" .Status) "ok"}}
|
||||
<div class="stat-row">
|
||||
<span class="stat-label">CPU</span>
|
||||
<div class="progress-bar"><div class="fill" style="width:{{cpuPct .Load1 .CPUCores}}%"></div></div>
|
||||
<span class="stat-val">{{pct .Load1}}/{{.CPUCores}}</span>
|
||||
</div>
|
||||
<div class="stat-row">
|
||||
<span class="stat-label">RAM</span>
|
||||
<div class="progress-bar"><div class="fill{{if gt .MemUsedPct 90.0}} fill-warn{{end}}" style="width:{{pct .MemUsedPct}}%"></div></div>
|
||||
<span class="stat-val">{{printf "%.0f" .MemUsedGB}}/{{fmtGB .MemTotalGB}}</span>
|
||||
</div>
|
||||
<div class="stat-row">
|
||||
<span class="stat-label">Disk</span>
|
||||
<div class="progress-bar"><div class="fill{{if gt .DiskUsedPct 85.0}} fill-warn{{end}}{{if gt .DiskUsedPct 95.0}} fill-crit{{end}}" style="width:{{pct .DiskUsedPct}}%"></div></div>
|
||||
<span class="stat-val">{{fmtGB .DiskUsedGB}}/{{fmtGB .DiskTotalGB}}</span>
|
||||
</div>
|
||||
{{if .GPUName}}
|
||||
<div class="stat-row">
|
||||
<span class="stat-label">GPU</span>
|
||||
{{if gt .GPUVRAMTotal 0.0}}
|
||||
<div class="progress-bar"><div class="fill{{if gt .GPUVRAMPct 90.0}} fill-warn{{end}}" style="width:{{pct .GPUVRAMPct}}%"></div></div>
|
||||
<span class="stat-val">{{printf "%.1f" .GPUVRAMUsed}}/{{printf "%.0f" .GPUVRAMTotal}}G</span>
|
||||
{{else}}
|
||||
<span class="stat-val" style="color:var(--muted);font-size:.6875rem">{{.GPUName}}</span>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
<div class="sub">{{.Uptime}}{{if gt .GPUTemp 0}} · GPU {{.GPUTemp}}°C{{end}}</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="card">
|
||||
<h3>Machines</h3>
|
||||
<div class="empty">Waiting for data...</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
<div class="card">
|
||||
<h3>LEK Models</h3>
|
||||
<div class="value">{{len .Models}}</div>
|
||||
<div class="sub"><a href="/models">View on HuggingFace</a></div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Benchmark Runs</h3>
|
||||
{{$b := .Benchmarks}}
|
||||
<div class="value">{{benchmarkCount $b}}</div>
|
||||
<div class="sub">{{dataPoints $b}} data points · <a href="/runs">View runs</a></div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Gold Generation</h3>
|
||||
{{if .Training.GoldAvailable}}
|
||||
<div class="value">{{pct .Training.GoldPercent}}%</div>
|
||||
<div class="progress-bar"><div class="fill" style="width:{{pct .Training.GoldPercent}}%"></div></div>
|
||||
<div class="sub">{{.Training.GoldGenerated}} / {{.Training.GoldTarget}}</div>
|
||||
{{else}}
|
||||
<div class="value status-err"><span class="status-dot"></span>Unavailable</div>
|
||||
<div class="sub">M3 Ultra unreachable</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{if .Commits}}
|
||||
<h2 class="section-title">Recent Activity</h2>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Repo</th><th>Message</th><th>Author</th><th>Time</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .Commits}}
|
||||
<tr>
|
||||
<td><code>{{.Repo}}</code></td>
|
||||
<td>{{shortMsg .Message}}</td>
|
||||
<td>{{.Author}}</td>
|
||||
<td>{{timeAgo .Timestamp}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .Errors}}
|
||||
<div style="margin-top:1rem">
|
||||
{{range $k, $v := .Errors}}
|
||||
<div style="display:inline-block;margin-right:.5rem;font-size:.75rem;color:var(--muted)">
|
||||
<span class="badge badge-err">{{$k}}</span> {{$v}}
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{template "footer"}}
|
||||
392
pkg/lab/handler/templates/dataset.html
Normal file
392
pkg/lab/handler/templates/dataset.html
Normal file
|
|
@ -0,0 +1,392 @@
|
|||
{{template "head" "Dataset"}}
|
||||
{{template "nav" "dataset"}}
|
||||
|
||||
<style>
|
||||
.ds-layout{display:flex;gap:1.5rem;min-height:calc(100vh - 120px)}
|
||||
.ds-sidebar{width:200px;flex-shrink:0}
|
||||
.ds-sidebar .sidebar-title{font-size:.6875rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.75rem;padding:0 .75rem}
|
||||
.ds-sidebar a{display:flex;align-items:center;gap:.5rem;padding:.5rem .75rem;border-radius:6px;color:var(--muted);font-size:.8125rem;transition:all .2s;text-decoration:none;margin-bottom:2px}
|
||||
.ds-sidebar a:hover{color:var(--text);background:var(--bg)}
|
||||
.ds-sidebar a.active{color:var(--text);background:var(--bg);border-left:3px solid var(--accent)}
|
||||
.ds-sidebar .count{font-size:.6875rem;color:var(--muted);font-family:"SF Mono",Consolas,monospace;margin-left:auto}
|
||||
.ds-main{flex:1;min-width:0}
|
||||
.stat-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(180px,1fr));gap:1rem;margin-bottom:1.5rem}
|
||||
.stat-card{padding:1rem;border:1px solid var(--border);border-radius:8px;background:var(--surface)}
|
||||
.stat-card h3{font-size:.6875rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.375rem}
|
||||
.stat-card .value{font-size:1.5rem;font-weight:700;line-height:1.2}
|
||||
.stat-card .sub{font-size:.75rem;color:var(--muted);margin-top:.25rem}
|
||||
.ds-table-section{margin-bottom:2rem}
|
||||
.ds-table-section h3{font-size:.875rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.625rem}
|
||||
@media(max-width:768px){.ds-layout{flex-direction:column}.ds-sidebar{width:100%;display:flex;gap:.5rem;flex-wrap:wrap}.ds-sidebar .sidebar-title{width:100%}.ds-sidebar a{flex:0 0 auto}}
|
||||
</style>
|
||||
|
||||
<div class="ds-layout">
|
||||
|
||||
{{/* -- Sidebar -- */}}
|
||||
<div class="ds-sidebar">
|
||||
<div class="sidebar-title">Dataset</div>
|
||||
<a href="/dataset"{{if not .SelectedView}} class="active"{{end}}>Overview</a>
|
||||
<a href="/dataset?view=golden"{{if eq .SelectedView "golden"}} class="active"{{end}}>
|
||||
Golden Set
|
||||
{{if .GoldenSet.Available}}<span class="count">{{fmtInt .GoldenSet.TotalExamples}}</span>{{end}}
|
||||
</a>
|
||||
<a href="/dataset?view=seeds"{{if eq .SelectedView "seeds"}} class="active"{{end}}>
|
||||
Seeds
|
||||
{{if .Dataset.Available}}<span class="count">{{fmtInt (tableRows .Dataset.Tables "seeds")}}</span>{{end}}
|
||||
</a>
|
||||
<a href="/dataset?view=domains"{{if eq .SelectedView "domains"}} class="active"{{end}}>Domains</a>
|
||||
<a href="/dataset?view=voices"{{if eq .SelectedView "voices"}} class="active"{{end}}>Voices</a>
|
||||
<a href="/dataset?view=expansion"{{if eq .SelectedView "expansion"}} class="active"{{end}}>
|
||||
Expansion
|
||||
{{if .Dataset.Available}}<span class="count">{{fmtInt (tableRows .Dataset.Tables "expansion_prompts")}}</span>{{end}}
|
||||
</a>
|
||||
<a href="/dataset?view=export"{{if eq .SelectedView "export"}} class="active"{{end}}>Export</a>
|
||||
</div>
|
||||
|
||||
{{/* -- Main content -- */}}
|
||||
<div class="ds-main">
|
||||
|
||||
{{if not .SelectedView}}
|
||||
{{/* -- Overview -- */}}
|
||||
<h2 class="section-title">LEM Dataset</h2>
|
||||
|
||||
<div class="stat-grid">
|
||||
{{if .GoldenSet.Available}}
|
||||
<a href="/dataset?view=golden" style="text-decoration:none;color:inherit">
|
||||
<div class="stat-card">
|
||||
<h3>Golden Set</h3>
|
||||
<div class="value">{{fmtInt .GoldenSet.TotalExamples}}</div>
|
||||
<div class="progress-bar"><div class="fill" style="width:{{pct .GoldenSet.CompletionPct}}%;background:var(--green)"></div></div>
|
||||
<div class="sub">{{pct .GoldenSet.CompletionPct}}% of {{fmtInt .GoldenSet.TargetTotal}} target</div>
|
||||
</div>
|
||||
</a>
|
||||
{{end}}
|
||||
|
||||
{{if .Dataset.Available}}
|
||||
<a href="/dataset?view=seeds" style="text-decoration:none;color:inherit">
|
||||
<div class="stat-card">
|
||||
<h3>Seeds</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "seeds")}}</div>
|
||||
<div class="sub">Source prompts for generation</div>
|
||||
</div>
|
||||
</a>
|
||||
|
||||
<a href="/dataset?view=expansion" style="text-decoration:none;color:inherit">
|
||||
<div class="stat-card">
|
||||
<h3>Expansion Prompts</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "expansion_prompts")}}</div>
|
||||
<div class="sub">Ready for model expansion</div>
|
||||
</div>
|
||||
</a>
|
||||
|
||||
<div class="stat-card">
|
||||
<h3>Training Examples</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "training_examples")}}</div>
|
||||
<div class="sub">Chat-format JSONL splits</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .GoldenSet.Available}}
|
||||
<a href="/dataset?view=domains" style="text-decoration:none;color:inherit">
|
||||
<div class="stat-card">
|
||||
<h3>Domains</h3>
|
||||
<div class="value">{{.GoldenSet.Domains}}</div>
|
||||
<div class="sub">Topic categories</div>
|
||||
</div>
|
||||
</a>
|
||||
|
||||
<a href="/dataset?view=voices" style="text-decoration:none;color:inherit">
|
||||
<div class="stat-card">
|
||||
<h3>Voices</h3>
|
||||
<div class="value">{{.GoldenSet.Voices}}</div>
|
||||
<div class="sub">Persona types</div>
|
||||
</div>
|
||||
</a>
|
||||
|
||||
<div class="stat-card">
|
||||
<h3>Avg Generation</h3>
|
||||
<div class="value">{{pct .GoldenSet.AvgGenTime}}s</div>
|
||||
<div class="sub">{{pct .GoldenSet.AvgResponseChars}} avg chars</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
|
||||
{{if .Dataset.Available}}
|
||||
<h2 class="section-title">DuckDB Tables</h2>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Table</th><th style="text-align:right">Rows</th><th style="width:50%">Size</th></tr></thead>
|
||||
<tbody>
|
||||
{{$total := totalRows .Dataset.Tables}}
|
||||
{{range .Dataset.Tables}}
|
||||
<tr>
|
||||
<td><code>{{.Name}}</code></td>
|
||||
<td style="text-align:right">{{fmtInt .Rows}}</td>
|
||||
<td>
|
||||
<div class="progress-bar" style="height:6px"><div class="fill" style="width:{{pct (pctOf .Rows $total)}}%"></div></div>
|
||||
</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{else if eq .SelectedView "golden"}}
|
||||
{{/* -- Golden Set detail -- */}}
|
||||
<h2 class="section-title">Golden Set</h2>
|
||||
|
||||
{{if not .GoldenSet.Available}}
|
||||
<div class="card empty"><p>No golden set data available.</p></div>
|
||||
{{else}}
|
||||
<div class="stat-grid">
|
||||
<div class="stat-card">
|
||||
<h3>Total Examples</h3>
|
||||
<div class="value">{{fmtInt .GoldenSet.TotalExamples}}</div>
|
||||
<div class="progress-bar"><div class="fill" style="width:{{pct .GoldenSet.CompletionPct}}%;background:var(--green)"></div></div>
|
||||
<div class="sub">{{pct .GoldenSet.CompletionPct}}% of {{fmtInt .GoldenSet.TargetTotal}}</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Domains</h3>
|
||||
<div class="value">{{.GoldenSet.Domains}}</div>
|
||||
<div class="sub">Unique topic domains</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Voices</h3>
|
||||
<div class="value">{{.GoldenSet.Voices}}</div>
|
||||
<div class="sub">Persona voice types</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Avg Generation</h3>
|
||||
<div class="value">{{pct .GoldenSet.AvgGenTime}}s</div>
|
||||
<div class="sub">{{pct .GoldenSet.AvgResponseChars}} avg chars</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{if .GoldenSet.Workers}}
|
||||
<div class="ds-table-section">
|
||||
<h3>Workers</h3>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Worker</th><th style="text-align:right">Generations</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .GoldenSet.Workers}}
|
||||
<tr>
|
||||
<td><code>{{.Worker}}</code></td>
|
||||
<td style="text-align:right">{{fmtInt .Count}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{end}}
|
||||
|
||||
{{else if eq .SelectedView "seeds"}}
|
||||
{{/* -- Seeds -- */}}
|
||||
<h2 class="section-title">Seeds</h2>
|
||||
<div class="stat-grid">
|
||||
{{if .Dataset.Available}}
|
||||
<div class="stat-card">
|
||||
<h3>Total Seeds</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "seeds")}}</div>
|
||||
<div class="sub">Source prompts in DuckDB</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Prompts Generated</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "prompts")}}</div>
|
||||
<div class="sub">Processed from seeds</div>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="stat-card">
|
||||
<h3>Seeds</h3>
|
||||
<div class="value">87,338</div>
|
||||
<div class="sub">Push stats via <code>dataset_stats</code></div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
<div class="card">
|
||||
<p style="color:var(--muted);padding:1rem">Seed browser coming soon. Use <code>lem export --seeds</code> to explore locally.</p>
|
||||
</div>
|
||||
|
||||
{{else if eq .SelectedView "domains"}}
|
||||
{{/* -- Domains -- */}}
|
||||
<h2 class="section-title">Domains</h2>
|
||||
|
||||
{{if and .GoldenSet.Available .GoldenSet.DomainStats}}
|
||||
<div class="stat-grid">
|
||||
<div class="stat-card">
|
||||
<h3>Total Domains</h3>
|
||||
<div class="value">{{.GoldenSet.Domains}}</div>
|
||||
<div class="sub">Unique topic categories</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Total Examples</h3>
|
||||
<div class="value">{{fmtInt .GoldenSet.TotalExamples}}</div>
|
||||
<div class="sub">Across all domains</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="ds-table-section">
|
||||
<h3>Distribution (top 25)</h3>
|
||||
<div class="card" style="overflow-x:auto;padding:1rem">
|
||||
{{domainChart .GoldenSet.DomainStats}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="ds-table-section">
|
||||
<h3>All Domains</h3>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Domain</th><th style="text-align:right">Count</th><th style="text-align:right">Avg Gen Time</th><th style="width:40%">Coverage</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .GoldenSet.DomainStats}}
|
||||
<tr>
|
||||
<td><code>{{.Domain}}</code></td>
|
||||
<td style="text-align:right">{{.Count}}</td>
|
||||
<td style="text-align:right">{{pct .AvgGenTime}}s</td>
|
||||
<td>
|
||||
<div class="progress-bar" style="height:6px"><div class="fill" style="width:{{pct (pctOf .Count $.GoldenSet.TotalExamples)}}%"></div></div>
|
||||
</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="card empty"><p>No domain data available.</p></div>
|
||||
{{end}}
|
||||
|
||||
{{else if eq .SelectedView "voices"}}
|
||||
{{/* -- Voices -- */}}
|
||||
<h2 class="section-title">Voices</h2>
|
||||
|
||||
{{if and .GoldenSet.Available .GoldenSet.VoiceStats}}
|
||||
<div class="stat-grid">
|
||||
<div class="stat-card">
|
||||
<h3>Total Voices</h3>
|
||||
<div class="value">{{.GoldenSet.Voices}}</div>
|
||||
<div class="sub">Persona types</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Total Examples</h3>
|
||||
<div class="value">{{fmtInt .GoldenSet.TotalExamples}}</div>
|
||||
<div class="sub">Across all voices</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="ds-table-section">
|
||||
<h3>Distribution</h3>
|
||||
<div class="card" style="overflow-x:auto;padding:1rem">
|
||||
{{voiceChart .GoldenSet.VoiceStats}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="ds-table-section">
|
||||
<h3>Voice Details</h3>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Voice</th><th style="text-align:right">Count</th><th style="text-align:right">Avg Chars</th><th style="text-align:right">Avg Gen Time</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .GoldenSet.VoiceStats}}
|
||||
<tr>
|
||||
<td><code>{{.Voice}}</code></td>
|
||||
<td style="text-align:right">{{.Count}}</td>
|
||||
<td style="text-align:right">{{pct .AvgChars}}</td>
|
||||
<td style="text-align:right">{{pct .AvgGenTime}}s</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="card empty"><p>No voice data available.</p></div>
|
||||
{{end}}
|
||||
|
||||
{{else if eq .SelectedView "expansion"}}
|
||||
{{/* -- Expansion -- */}}
|
||||
<h2 class="section-title">Expansion</h2>
|
||||
<div class="stat-grid">
|
||||
{{if .Dataset.Available}}
|
||||
<div class="stat-card">
|
||||
<h3>Expansion Prompts</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "expansion_prompts")}}</div>
|
||||
<div class="sub">Deduped, ready for generation</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Gemini Responses</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "gemini_responses")}}</div>
|
||||
<div class="sub">Reference responses for scoring</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Benchmark Questions</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "benchmark_questions")}}</div>
|
||||
<div class="sub">Capability test set</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Benchmark Results</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "benchmark_results")}}</div>
|
||||
<div class="sub">Scored responses</div>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="stat-card">
|
||||
<h3>Expansion Prompts</h3>
|
||||
<div class="value">46,331</div>
|
||||
<div class="sub">Push stats via <code>dataset_stats</code></div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
<div class="card">
|
||||
<p style="color:var(--muted);padding:1rem">Expansion pipeline: use <code>lem expand</code> to generate responses from trained models, then <code>lem score</code> to filter by quality.</p>
|
||||
</div>
|
||||
|
||||
{{else if eq .SelectedView "export"}}
|
||||
{{/* -- Export -- */}}
|
||||
<h2 class="section-title">Export</h2>
|
||||
<div class="stat-grid">
|
||||
{{if .Dataset.Available}}
|
||||
<div class="stat-card">
|
||||
<h3>Training Examples</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "training_examples")}}</div>
|
||||
<div class="sub">Chat-format JSONL</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h3>Validations</h3>
|
||||
<div class="value">{{fmtInt (tableRows .Dataset.Tables "validations")}}</div>
|
||||
<div class="sub">Quality checks</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
<div class="card">
|
||||
<p style="color:var(--muted);padding:1rem">Export formats:</p>
|
||||
<table>
|
||||
<thead><tr><th>Format</th><th>Command</th><th>Use</th></tr></thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><code>JSONL (MLX)</code></td>
|
||||
<td><code>lem export --format jsonl</code></td>
|
||||
<td>MLX LoRA training (train/valid/test splits)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>Parquet</code></td>
|
||||
<td><code>lem export --format parquet</code></td>
|
||||
<td>HuggingFace dataset upload</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>CSV</code></td>
|
||||
<td><code>lem export --format csv</code></td>
|
||||
<td>Spreadsheet analysis</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
{{end}}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "footer"}}
|
||||
108
pkg/lab/handler/templates/golden-set.html
Normal file
108
pkg/lab/handler/templates/golden-set.html
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
{{template "head" "Golden Set"}}
|
||||
{{template "nav" "golden-set"}}
|
||||
|
||||
<h2 class="section-title">LEM Golden Set Explorer</h2>
|
||||
|
||||
{{if not .GoldenSet.Available}}
|
||||
<div class="card"><div class="empty">No golden set data available. Run <code>pipeline.py metrics</code> to push stats to InfluxDB.</div></div>
|
||||
{{else}}
|
||||
|
||||
<div class="grid">
|
||||
<div class="card">
|
||||
<h3>Progress</h3>
|
||||
<div class="value">{{fmtInt .GoldenSet.TotalExamples}} / {{fmtInt .GoldenSet.TargetTotal}}</div>
|
||||
<div class="progress-bar"><div class="fill" style="width:{{pct .GoldenSet.CompletionPct}}%"></div></div>
|
||||
<div class="sub">{{pct .GoldenSet.CompletionPct}}% complete</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Domains</h3>
|
||||
<div class="value">{{.GoldenSet.Domains}}</div>
|
||||
<div class="sub">Unique topic domains</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Voices</h3>
|
||||
<div class="value">{{.GoldenSet.Voices}}</div>
|
||||
<div class="sub">Persona voice types</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h3>Avg Generation</h3>
|
||||
<div class="value">{{pct .GoldenSet.AvgGenTime}}s</div>
|
||||
<div class="sub">{{pct .GoldenSet.AvgResponseChars}} avg chars per response</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{if .GoldenSet.Workers}}
|
||||
<h2 class="section-title">Workers</h2>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Worker</th><th style="text-align:right">Generations</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .GoldenSet.Workers}}
|
||||
<tr>
|
||||
<td><code>{{.Worker}}</code></td>
|
||||
<td style="text-align:right">{{.Count}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .GoldenSet.VoiceStats}}
|
||||
<h2 class="section-title">Voice Distribution</h2>
|
||||
<div class="card" style="overflow-x:auto;padding:1rem">
|
||||
{{voiceChart .GoldenSet.VoiceStats}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .GoldenSet.DomainStats}}
|
||||
<h2 class="section-title">Domain Breakdown (top 25)</h2>
|
||||
<div class="card" style="overflow-x:auto;padding:1rem">
|
||||
{{domainChart .GoldenSet.DomainStats}}
|
||||
</div>
|
||||
|
||||
<h2 class="section-title">All Domains</h2>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Domain</th><th style="text-align:right">Count</th><th style="text-align:right">Avg Gen Time</th><th style="width:40%">Coverage</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .GoldenSet.DomainStats}}
|
||||
<tr>
|
||||
<td><code>{{.Domain}}</code></td>
|
||||
<td style="text-align:right">{{.Count}}</td>
|
||||
<td style="text-align:right">{{pct .AvgGenTime}}s</td>
|
||||
<td>
|
||||
<div class="progress-bar" style="height:6px"><div class="fill" style="width:{{pct (pctOf .Count $.GoldenSet.TotalExamples)}}%"></div></div>
|
||||
</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .GoldenSet.VoiceStats}}
|
||||
<h2 class="section-title">Voice Details</h2>
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Voice</th><th style="text-align:right">Count</th><th style="text-align:right">Avg Chars</th><th style="text-align:right">Avg Gen Time</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .GoldenSet.VoiceStats}}
|
||||
<tr>
|
||||
<td><code>{{.Voice}}</code></td>
|
||||
<td style="text-align:right">{{.Count}}</td>
|
||||
<td style="text-align:right">{{pct .AvgChars}}</td>
|
||||
<td style="text-align:right">{{pct .AvgGenTime}}s</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{end}}
|
||||
|
||||
{{template "footer"}}
|
||||
103
pkg/lab/handler/templates/layout.html
Normal file
103
pkg/lab/handler/templates/layout.html
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
{{define "head"}}<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>{{.}} - LEM.Lab</title>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.7.2/css/all.min.css" integrity="sha512-Evv84Mr4kqVGRNSgIGL/F/aIDqQb7xQ2vcrdIwxfjThSH8CSR7PBEakCr51Ck+w+/U6swU2Im1vVX0SVk9ABhg==" crossorigin="anonymous" referrerpolicy="no-referrer"/>
|
||||
<style>
|
||||
*{margin:0;padding:0;box-sizing:border-box}
|
||||
:root{--bg:#0a0a0f;--surface:#12121a;--border:#1e1e2e;--text:#e0e0e8;--muted:#8888a0;--accent:#7c6ff0;--accent-dim:#5a4fd0;--green:#4ade80;--red:#f87171;--yellow:#fbbf24}
|
||||
body{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif;background:var(--bg);color:var(--text);min-height:100vh;line-height:1.6;font-size:.9375rem}
|
||||
a{color:var(--accent);text-decoration:none;transition:color .2s}
|
||||
a:hover{color:var(--green)}
|
||||
nav{display:flex;align-items:center;gap:1.5rem;padding:.75rem 1.5rem;border-bottom:1px solid var(--border);background:var(--surface)}
|
||||
nav .logo{font-size:1.25rem;font-weight:700;letter-spacing:-.02em}
|
||||
nav .logo span{color:var(--accent)}
|
||||
nav .links{display:flex;gap:.25rem}
|
||||
nav .links a{padding:.375rem .75rem;border-radius:6px;font-size:.8125rem;color:var(--muted);transition:all .2s}
|
||||
nav .links a:hover,nav .links a.active{color:var(--text);background:var(--bg)}
|
||||
.container{max-width:1600px;margin:0 auto;padding:1.5rem}
|
||||
.grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(260px,1fr));gap:1rem;margin-bottom:1.5rem}
|
||||
.card{padding:1.25rem;border:1px solid var(--border);border-radius:8px;background:var(--surface)}
|
||||
.card h3{font-size:.8125rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.5rem}
|
||||
.card .value{font-size:1.75rem;font-weight:700;line-height:1.2}
|
||||
.card .sub{font-size:.8125rem;color:var(--muted);margin-top:.25rem}
|
||||
.status-dot{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:.375rem}
|
||||
.status-ok .status-dot{background:var(--green)}
|
||||
.status-warn .status-dot{background:var(--yellow)}
|
||||
.status-err .status-dot{background:var(--red)}
|
||||
.status-ok .label{color:var(--green)}
|
||||
.status-warn .label{color:var(--yellow)}
|
||||
.status-err .label{color:var(--red)}
|
||||
.progress-bar{width:100%;height:8px;background:var(--border);border-radius:4px;overflow:hidden;margin:.5rem 0}
|
||||
.progress-bar .fill{height:100%;background:var(--accent);border-radius:4px;transition:width .5s}
|
||||
table{width:100%;border-collapse:collapse;font-size:.8125rem}
|
||||
th{text-align:left;color:var(--muted);font-weight:600;padding:.5rem .75rem;border-bottom:1px solid var(--border);text-transform:uppercase;letter-spacing:.05em;font-size:.75rem}
|
||||
td{padding:.5rem .75rem;border-bottom:1px solid var(--border)}
|
||||
tr:last-child td{border-bottom:none}
|
||||
code{font-family:"SF Mono",Consolas,monospace;font-size:.75rem;background:var(--bg);padding:.125rem .375rem;border-radius:4px;border:1px solid var(--border)}
|
||||
.badge{display:inline-block;padding:.125rem .5rem;border-radius:4px;font-size:.6875rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em}
|
||||
.badge-ok{background:rgba(74,222,128,.15);color:var(--green)}
|
||||
.badge-err{background:rgba(248,113,113,.15);color:var(--red)}
|
||||
.badge-info{background:rgba(124,111,240,.15);color:var(--accent)}
|
||||
.empty{text-align:center;padding:2rem;color:var(--muted)}
|
||||
.section-title{font-size:1rem;font-weight:600;margin-bottom:1rem;color:var(--text)}
|
||||
footer{text-align:center;padding:1rem;color:var(--muted);font-size:.75rem;border-top:1px solid var(--border);margin-top:2rem}
|
||||
@media(max-width:640px){.grid{grid-template-columns:1fr}nav{flex-wrap:wrap;gap:.75rem}}
|
||||
</style>
|
||||
</head>
|
||||
<body>{{end}}
|
||||
|
||||
{{define "nav"}}
|
||||
<nav>
|
||||
<div class="logo">LEM<span>.Lab</span></div>
|
||||
<div class="links">
|
||||
<a href="/"{{if eq . "dashboard"}} class="active"{{end}}>Dashboard</a>
|
||||
<a href="/models"{{if eq . "models"}} class="active"{{end}}>Models</a>
|
||||
<a href="/training"{{if eq . "training"}} class="active"{{end}}>Training</a>
|
||||
<a href="/dataset"{{if eq . "dataset"}} class="active"{{end}}>Dataset</a>
|
||||
<a href="/agents"{{if eq . "agents"}} class="active"{{end}}>Agents</a>
|
||||
<a href="/services"{{if eq . "services"}} class="active"{{end}}>Services</a>
|
||||
</div>
|
||||
</nav>
|
||||
<div class="container">{{end}}
|
||||
|
||||
{{define "footer"}}
|
||||
</div>
|
||||
<footer>LEM.Lab · live · <a href="https://forge.lthn.io/agentic">forge.lthn.io</a></footer>
|
||||
<script>
|
||||
// SSE live update: fetches same-origin page on data change, swaps container content.
|
||||
// Safe: only fetches from same origin (our own server), no user input involved.
|
||||
(function(){
|
||||
var es, timer;
|
||||
function connect(){
|
||||
es = new EventSource('/events');
|
||||
es.onmessage = function(){
|
||||
clearTimeout(timer);
|
||||
timer = setTimeout(refresh, 500);
|
||||
};
|
||||
es.onerror = function(){
|
||||
es.close();
|
||||
setTimeout(connect, 5000);
|
||||
};
|
||||
}
|
||||
function refresh(){
|
||||
fetch(location.href).then(function(r){ return r.text(); }).then(function(html){
|
||||
var doc = new DOMParser().parseFromString(html, 'text/html');
|
||||
var fresh = doc.querySelector('.container');
|
||||
var current = document.querySelector('.container');
|
||||
if(fresh && current){
|
||||
// Save active tab before replacing DOM.
|
||||
var activeTab = document.querySelector('.chart-panel.active');
|
||||
var tabName = activeTab ? activeTab.getAttribute('data-tab') : null;
|
||||
current.replaceWith(fresh);
|
||||
// Restore active tab after DOM swap.
|
||||
if(tabName && typeof showTab === 'function') showTab(tabName);
|
||||
}
|
||||
});
|
||||
}
|
||||
connect();
|
||||
})();
|
||||
</script>
|
||||
</body></html>{{end}}
|
||||
29
pkg/lab/handler/templates/models.html
Normal file
29
pkg/lab/handler/templates/models.html
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
{{template "head" "Models"}}
|
||||
{{template "nav" "models"}}
|
||||
|
||||
<h2 class="section-title">LEK Models on HuggingFace</h2>
|
||||
|
||||
{{if .Models}}
|
||||
<div class="card">
|
||||
<table>
|
||||
<thead><tr><th>Model</th><th>Downloads</th><th>Likes</th><th>Pipeline</th><th>Updated</th></tr></thead>
|
||||
<tbody>
|
||||
{{range .Models}}
|
||||
<tr>
|
||||
<td><a href="https://huggingface.co/{{.ModelID}}" target="_blank">{{.ModelID}}</a></td>
|
||||
<td>{{.Downloads}}</td>
|
||||
<td>{{.Likes}}</td>
|
||||
<td>{{if .PipelineTag}}<span class="badge badge-info">{{.PipelineTag}}</span>{{else}}-{{end}}</td>
|
||||
<td>{{timeAgo .LastModified}}</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="card empty">
|
||||
<p>No models loaded yet. HuggingFace data refreshes every 5 minutes.</p>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{template "footer"}}
|
||||
113
pkg/lab/handler/templates/runs.html
Normal file
113
pkg/lab/handler/templates/runs.html
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
{{template "head" "Runs"}}
|
||||
{{template "nav" "runs"}}
|
||||
|
||||
<style>
|
||||
.run-section{margin-bottom:2.5rem}
|
||||
.run-header{display:flex;align-items:center;gap:.75rem;margin-bottom:1rem;padding-bottom:.5rem;border-bottom:1px solid var(--border)}
|
||||
.run-header h2{font-size:1.125rem;font-weight:700;color:var(--text);margin:0}
|
||||
.run-header .model-badge{font-size:.6875rem;font-weight:600;text-transform:uppercase;letter-spacing:.05em;padding:.2rem .6rem;border-radius:4px;background:rgba(124,111,240,.15);color:var(--accent)}
|
||||
.run-header .run-id{font-size:.75rem;color:var(--muted);font-family:"SF Mono",Consolas,monospace}
|
||||
.chart-container{margin-bottom:1.25rem}
|
||||
.chart-container h3{font-size:.8125rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.625rem}
|
||||
.chart-card{border:1px solid var(--border);border-radius:8px;padding:1rem;background:var(--surface);overflow-x:auto}
|
||||
.run-summary{display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));gap:.75rem;margin-bottom:1.25rem}
|
||||
.run-stat{padding:.75rem 1rem;border:1px solid var(--border);border-radius:8px;background:var(--surface)}
|
||||
.run-stat .label{font-size:.6875rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.25rem}
|
||||
.run-stat .value{font-size:1.5rem;font-weight:700;line-height:1.2}
|
||||
.run-stat .sub{font-size:.75rem;color:var(--muted);margin-top:.125rem}
|
||||
</style>
|
||||
|
||||
<h2 class="section-title">Training Runs</h2>
|
||||
|
||||
{{$b := .Benchmarks}}
|
||||
|
||||
{{if not $b.Runs}}
|
||||
<div class="card empty">
|
||||
<p>No benchmark data available. InfluxDB data refreshes every 60 seconds.</p>
|
||||
</div>
|
||||
{{else}}
|
||||
|
||||
{{range $b.Runs}}
|
||||
{{$rid := .RunID}}
|
||||
{{$mdl := .Model}}
|
||||
|
||||
<div class="run-section" id="{{$rid}}">
|
||||
<div class="run-header">
|
||||
<h2>{{$mdl}}</h2>
|
||||
<span class="model-badge">{{.Type}}</span>
|
||||
<span class="run-id">{{$rid}}</span>
|
||||
</div>
|
||||
|
||||
{{/* Summary stats */}}
|
||||
<div class="run-summary">
|
||||
{{if hasKey $b.Loss $rid}}
|
||||
{{$loss := getLoss $b.Loss $rid}}
|
||||
<div class="run-stat">
|
||||
<div class="label">Loss Points</div>
|
||||
<div class="value">{{len $loss}}</div>
|
||||
<div class="sub">val + train</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if hasContentKey $b.Content $rid}}
|
||||
{{$content := getContent $b.Content $rid}}
|
||||
<div class="run-stat">
|
||||
<div class="label">Content Scores</div>
|
||||
<div class="value">{{len $content}}</div>
|
||||
<div class="sub">dimension scores</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if hasCapKey $b.Capability $rid}}
|
||||
{{$cap := getCap $b.Capability $rid}}
|
||||
<div class="run-stat">
|
||||
<div class="label">Capability Tests</div>
|
||||
<div class="value">{{len $cap}}</div>
|
||||
<div class="sub">benchmark points</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
|
||||
{{/* Training Loss Chart */}}
|
||||
{{if hasKey $b.Loss $rid}}
|
||||
<div class="chart-container">
|
||||
<h3>Training Loss Curve</h3>
|
||||
<div class="chart-card">
|
||||
{{lossChart (getLoss $b.Loss $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{/* Content Score Chart */}}
|
||||
{{if hasContentKey $b.Content $rid}}
|
||||
<div class="chart-container">
|
||||
<h3>Content Scores by Dimension</h3>
|
||||
<div class="chart-card">
|
||||
{{contentChart (getContent $b.Content $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{/* Capability Chart */}}
|
||||
{{if hasCapKey $b.Capability $rid}}
|
||||
<div class="chart-container">
|
||||
<h3>Capability Benchmark</h3>
|
||||
<div class="chart-card">
|
||||
{{capabilityChart (getCap $b.Capability $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="chart-container">
|
||||
<h3>Category Breakdown</h3>
|
||||
<div class="chart-card">
|
||||
{{categoryBreakdown (getCap $b.Capability $rid) (getCapJudge $b.CapabilityJudge $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{end}}
|
||||
|
||||
{{template "footer"}}
|
||||
65
pkg/lab/handler/templates/services.html
Normal file
65
pkg/lab/handler/templates/services.html
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
{{template "head" "Services"}}
|
||||
{{template "nav" "services"}}
|
||||
|
||||
<h2 class="section-title">Internal Services</h2>
|
||||
|
||||
<style>
|
||||
.svc-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:1rem;margin-bottom:2rem}
|
||||
.svc-card{padding:1rem 1.25rem;border:1px solid var(--border);border-radius:8px;background:var(--surface);display:flex;align-items:center;gap:1rem;transition:border-color .2s}
|
||||
.svc-card:hover{border-color:var(--accent-dim)}
|
||||
.svc-dot{width:10px;height:10px;border-radius:50%;flex-shrink:0}
|
||||
.svc-dot.ok{background:var(--green)}
|
||||
.svc-dot.degraded{background:var(--yellow)}
|
||||
.svc-dot.unavailable{background:var(--red)}
|
||||
.svc-dot.unchecked{background:var(--muted)}
|
||||
.svc-info{flex:1;min-width:0}
|
||||
.svc-name{font-weight:600;font-size:.875rem}
|
||||
.svc-name a{color:var(--text)}
|
||||
.svc-name a:hover{color:var(--accent)}
|
||||
.svc-meta{font-size:.75rem;color:var(--muted)}
|
||||
.svc-cat-title{font-size:.875rem;font-weight:600;color:var(--accent);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.75rem;padding-bottom:.375rem;border-bottom:1px solid var(--border)}
|
||||
.svc-section{margin-bottom:1.5rem}
|
||||
.svc-summary{display:flex;gap:1.5rem;margin-bottom:1.5rem;flex-wrap:wrap}
|
||||
.svc-stat{font-size:.8125rem;color:var(--muted)}
|
||||
.svc-stat strong{font-size:1.25rem;color:var(--text);display:block}
|
||||
</style>
|
||||
|
||||
{{$services := .Services}}
|
||||
|
||||
<div class="svc-summary">
|
||||
<div class="svc-stat">
|
||||
<strong>{{len $services}}</strong>
|
||||
Total Services
|
||||
</div>
|
||||
<div class="svc-stat">
|
||||
<strong style="color:var(--green)">{{countStatus $services "ok"}}</strong>
|
||||
Online
|
||||
</div>
|
||||
<div class="svc-stat">
|
||||
<strong style="color:var(--yellow)">{{countStatus $services "degraded"}}</strong>
|
||||
Degraded
|
||||
</div>
|
||||
<div class="svc-stat">
|
||||
<strong style="color:var(--red)">{{countStatus $services "unavailable"}}</strong>
|
||||
Offline
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{range categories $services}}
|
||||
<div class="svc-section">
|
||||
<div class="svc-cat-title">{{.}}</div>
|
||||
<div class="svc-grid">
|
||||
{{range filterCat $services .}}
|
||||
<div class="svc-card">
|
||||
<div class="svc-dot {{.Status}}"></div>
|
||||
<div class="svc-info">
|
||||
<div class="svc-name"><a href="{{.URL}}" target="_blank">{{.Name}}</a></div>
|
||||
<div class="svc-meta">{{.Machine}} · {{.URL}}</div>
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{template "footer"}}
|
||||
278
pkg/lab/handler/templates/training.html
Normal file
278
pkg/lab/handler/templates/training.html
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
{{template "head" "Training"}}
|
||||
{{template "nav" "training"}}
|
||||
|
||||
<style>
|
||||
.training-layout{display:flex;gap:1.5rem;min-height:calc(100vh - 120px)}
|
||||
.training-sidebar{width:220px;flex-shrink:0}
|
||||
.training-sidebar .sidebar-title{font-size:.6875rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.75rem;padding:0 .75rem}
|
||||
.training-sidebar a{display:flex;align-items:center;gap:.5rem;padding:.625rem .75rem;border-radius:6px;color:var(--muted);font-size:.8125rem;transition:all .2s;text-decoration:none;margin-bottom:2px}
|
||||
.training-sidebar a:hover{color:var(--text);background:var(--bg)}
|
||||
.training-sidebar a.active{color:var(--text);background:var(--bg);border-left:3px solid var(--accent)}
|
||||
.training-sidebar .model-name{font-weight:600;flex:1;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}
|
||||
.training-sidebar .badge{font-size:.5625rem;padding:.0625rem .375rem}
|
||||
.training-main{flex:1;min-width:0}
|
||||
.overview-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(280px,1fr));gap:1rem;margin-bottom:1.5rem}
|
||||
.model-card{padding:1.25rem;border:1px solid var(--border);border-radius:8px;background:var(--surface);cursor:pointer;transition:border-color .2s}
|
||||
.model-card:hover{border-color:var(--accent-dim)}
|
||||
.model-card h3{font-size:1rem;font-weight:700;margin-bottom:.5rem;display:flex;align-items:center;gap:.5rem}
|
||||
.model-card .run-id{font-size:.6875rem;color:var(--muted);font-family:"SF Mono",Consolas,monospace}
|
||||
.model-card .stats{display:grid;grid-template-columns:1fr 1fr;gap:.5rem;margin-top:.75rem}
|
||||
.model-card .stat-label{font-size:.6875rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em}
|
||||
.model-card .stat-value{font-size:1.125rem;font-weight:700}
|
||||
.detail-header{display:flex;align-items:center;gap:.75rem;margin-bottom:1.5rem;padding-bottom:.75rem;border-bottom:1px solid var(--border)}
|
||||
.detail-header h2{font-size:1.25rem;font-weight:700;margin:0}
|
||||
.detail-stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));gap:.75rem;margin-bottom:1.5rem}
|
||||
.detail-stat{padding:.75rem 1rem;border:1px solid var(--border);border-radius:8px;background:var(--surface)}
|
||||
.detail-stat .label{font-size:.6875rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.25rem}
|
||||
.detail-stat .value{font-size:1.5rem;font-weight:700;line-height:1.2}
|
||||
.detail-stat .sub{font-size:.75rem;color:var(--muted);margin-top:.125rem}
|
||||
.run-section{margin-bottom:2rem;padding-bottom:1.5rem;border-bottom:1px solid var(--border)}
|
||||
.run-section:last-child{border-bottom:none}
|
||||
.run-header{display:flex;align-items:center;gap:.5rem;margin-bottom:1rem}
|
||||
.run-header h3{font-size:.9375rem;font-weight:700;margin:0}
|
||||
.run-header .run-id{font-size:.6875rem;color:var(--muted);font-family:"SF Mono",Consolas,monospace}
|
||||
.chart-section{margin-bottom:1.5rem}
|
||||
.chart-section h4{font-size:.8125rem;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.05em;margin-bottom:.5rem}
|
||||
.chart-card{border:1px solid var(--border);border-radius:8px;padding:1rem;background:var(--surface);overflow-x:auto}
|
||||
.chart-tabs{display:flex;gap:2px;margin-bottom:1rem;border-bottom:1px solid var(--border);padding-bottom:0}
|
||||
.chart-tabs button{background:none;border:none;padding:.5rem 1rem;font-size:.8125rem;font-weight:600;color:var(--muted);cursor:pointer;border-bottom:2px solid transparent;transition:all .2s;font-family:inherit}
|
||||
.chart-tabs button:hover{color:var(--text)}
|
||||
.chart-tabs button.active{color:var(--accent);border-bottom-color:var(--accent)}
|
||||
.chart-panel{display:none}
|
||||
.chart-panel.active{display:block}
|
||||
@media(max-width:768px){.training-layout{flex-direction:column}.training-sidebar{width:100%;display:flex;gap:.5rem;flex-wrap:wrap}.training-sidebar .sidebar-title{width:100%}.training-sidebar a{flex:0 0 auto}}
|
||||
</style>
|
||||
|
||||
<div class="training-layout">
|
||||
|
||||
{{/* -- Sidebar -- */}}
|
||||
<div class="training-sidebar">
|
||||
<div class="sidebar-title">Models</div>
|
||||
<a href="/training"{{if not .SelectedModel}} class="active"{{end}}>
|
||||
<span class="model-name">Overview</span>
|
||||
</a>
|
||||
{{range .ModelGroups}}
|
||||
<a href="/training?model={{.Model}}"{{if eq $.SelectedModel .Model}} class="active"{{end}}>
|
||||
<span class="model-name">{{.Model}}</span>
|
||||
<span class="badge {{statusBadge .BestStatus}}">{{.BestStatus}}</span>
|
||||
</a>
|
||||
{{end}}
|
||||
</div>
|
||||
|
||||
{{/* -- Main content -- */}}
|
||||
<div class="training-main">
|
||||
|
||||
{{if not .SelectedModel}}
|
||||
{{/* -- Overview: all models -- */}}
|
||||
<h2 class="section-title">LEM Training</h2>
|
||||
|
||||
{{/* -- Scoring progress summary -- */}}
|
||||
{{if .ModelGroups}}
|
||||
<div class="detail-stats" style="margin-bottom:1.5rem">
|
||||
<div class="detail-stat">
|
||||
<div class="label">Models</div>
|
||||
<div class="value">{{.ScoredModels}} / {{len .ModelGroups}}</div>
|
||||
<div class="sub">scored</div>
|
||||
</div>
|
||||
<div class="detail-stat">
|
||||
<div class="label">Scoring Runs</div>
|
||||
<div class="value">{{.TotalScoringRuns}}</div>
|
||||
<div class="sub">content + capability</div>
|
||||
</div>
|
||||
<div class="detail-stat">
|
||||
<div class="label">Data Points</div>
|
||||
<div class="value">{{fmtInt .TotalDataPoints}}</div>
|
||||
<div class="sub">across all benchmarks</div>
|
||||
</div>
|
||||
{{if gt .UnscoredModels 0}}
|
||||
<div class="detail-stat" style="border-color:var(--accent-dim)">
|
||||
<div class="label">Awaiting Scoring</div>
|
||||
<div class="value" style="color:var(--accent)">{{.UnscoredModels}}</div>
|
||||
<div class="sub">{{.UnscoredNames}}</div>
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="detail-stat" style="border-color:var(--green)">
|
||||
<div class="label">Status</div>
|
||||
<div class="value" style="color:var(--green)">Done</div>
|
||||
<div class="sub">all models scored</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .ModelGroups}}
|
||||
<div class="overview-grid">
|
||||
{{range .ModelGroups}}
|
||||
<a href="/training?model={{.Model}}" style="text-decoration:none;color:inherit">
|
||||
<div class="model-card">
|
||||
<h3>
|
||||
{{.Model}}
|
||||
<span class="badge {{statusBadge .BestStatus}}">{{.BestStatus}}</span>
|
||||
</h3>
|
||||
{{if .HasTraining}}
|
||||
{{range .TrainingRuns}}
|
||||
<div class="sub" style="margin-bottom:.375rem"><i class="fa-solid fa-database" style="color:var(--accent)"></i> {{runLabel .RunID}}</div>
|
||||
<div class="progress-bar"><div class="fill" style="width:{{pct .Pct}}%;{{if eq .Status "complete"}}background:var(--green){{end}}"></div></div>
|
||||
<div class="sub">{{.Iteration}} / {{.TotalIters}} iters ({{pct .Pct}}%)</div>
|
||||
<div class="stats">
|
||||
{{if gt .LastLoss 0.0}}
|
||||
<div>
|
||||
<div class="stat-label">Train Loss</div>
|
||||
<div class="stat-value">{{fmtFloat .LastLoss 3}}</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{if gt .ValLoss 0.0}}
|
||||
<div>
|
||||
<div class="stat-label">Val Loss</div>
|
||||
<div class="stat-value">{{fmtFloat .ValLoss 3}}</div>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
{{break}}
|
||||
{{end}}
|
||||
{{else}}
|
||||
<div class="sub" style="margin-top:.5rem">{{len .BenchmarkRuns}} benchmark run{{if gt (len .BenchmarkRuns) 1}}s{{end}}</div>
|
||||
{{if .HasCapability}}<div class="sub"><i class="fa-solid fa-flask"></i> Capability probes scored</div>{{end}}
|
||||
{{if .HasContent}}<div class="sub"><i class="fa-solid fa-chart-bar"></i> Content scores available</div>{{end}}
|
||||
{{end}}
|
||||
</div>
|
||||
</a>
|
||||
{{end}}
|
||||
</div>
|
||||
{{else}}
|
||||
<div class="card empty">
|
||||
<p>No training or benchmark data. InfluxDB refreshes every 60 seconds.</p>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{else}}
|
||||
{{/* -- Detail view: single model -- */}}
|
||||
{{$sel := .SelectedModel}}
|
||||
{{$b := .Benchmarks}}
|
||||
{{$found := false}}
|
||||
|
||||
{{range .ModelGroups}}
|
||||
{{if eq .Model $sel}}
|
||||
|
||||
<div class="detail-header">
|
||||
<h2>{{.Model}}</h2>
|
||||
<span class="badge {{statusBadge .BestStatus}}">{{.BestStatus}}</span>
|
||||
</div>
|
||||
|
||||
{{/* Training run status cards */}}
|
||||
{{if .TrainingRuns}}
|
||||
<div class="detail-stats">
|
||||
{{range .TrainingRuns}}
|
||||
<div class="detail-stat">
|
||||
<div class="label">{{.RunID}}</div>
|
||||
<div class="value">{{pct .Pct}}%</div>
|
||||
<div class="sub">{{.Iteration}} / {{.TotalIters}} · {{.Status}}</div>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{/* Show latest loss stats from most recent run */}}
|
||||
{{with index .TrainingRuns 0}}
|
||||
{{if gt .LastLoss 0.0}}
|
||||
<div class="detail-stat">
|
||||
<div class="label">Train Loss</div>
|
||||
<div class="value">{{fmtFloat .LastLoss 3}}</div>
|
||||
<div class="sub">latest</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{if gt .ValLoss 0.0}}
|
||||
<div class="detail-stat">
|
||||
<div class="label">Val Loss</div>
|
||||
<div class="value">{{fmtFloat .ValLoss 3}}</div>
|
||||
<div class="sub">latest</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{if gt .TokensSec 0.0}}
|
||||
<div class="detail-stat">
|
||||
<div class="label">Tokens/sec</div>
|
||||
<div class="value">{{fmtFloat .TokensSec 0}}</div>
|
||||
<div class="sub">throughput</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{end}}
|
||||
</div>
|
||||
|
||||
{{/* Progress bars for in-progress training runs only */}}
|
||||
{{range .TrainingRuns}}
|
||||
{{if ne .Status "complete"}}
|
||||
<div style="margin-bottom:1rem">
|
||||
<div class="sub" style="margin-bottom:.25rem"><strong>{{.RunID}}</strong></div>
|
||||
<div class="progress-bar"><div class="fill" style="width:{{pct .Pct}}%"></div></div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{end}}
|
||||
{{end}}
|
||||
|
||||
{{/* All benchmark runs for this model -- collect data for tabs */}}
|
||||
{{$runs := runsForModel $b $sel}}
|
||||
|
||||
{{/* Tabbed charts */}}
|
||||
<div class="chart-tabs" id="chartTabs">
|
||||
{{if anyContent $runs $b.Content}}<button class="active" onclick="showTab('content')"><i class="fa-solid fa-chart-line"></i> Content</button>{{end}}
|
||||
{{if anyCap $runs $b.Capability}}<button onclick="showTab('capability')"><i class="fa-solid fa-flask"></i> Capability</button>{{end}}
|
||||
{{if anyCap $runs $b.Capability}}<button onclick="showTab('categories')"><i class="fa-solid fa-table-cells"></i> Categories</button>{{end}}
|
||||
{{if anyLoss $runs $b.Loss}}<button onclick="showTab('loss')"><i class="fa-solid fa-chart-area"></i> Loss</button>{{end}}
|
||||
</div>
|
||||
|
||||
{{range $runs}}
|
||||
{{$rid := .RunID}}
|
||||
{{if hasContentKey $b.Content $rid}}
|
||||
<div class="chart-panel active" data-tab="content">
|
||||
<div class="chart-card">
|
||||
{{contentChart (getContent $b.Content $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{if hasCapKey $b.Capability $rid}}
|
||||
<div class="chart-panel" data-tab="capability">
|
||||
<div class="chart-card">
|
||||
{{capabilityChart (getCap $b.Capability $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
<div class="chart-panel" data-tab="categories">
|
||||
<div class="chart-card">
|
||||
{{categoryBreakdown (getCap $b.Capability $rid) (getCapJudge $b.CapabilityJudge $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{if hasKey $b.Loss $rid}}
|
||||
<div class="chart-panel" data-tab="loss">
|
||||
<div class="chart-card">
|
||||
{{lossChart (getLoss $b.Loss $rid)}}
|
||||
</div>
|
||||
</div>
|
||||
{{end}}
|
||||
{{end}}
|
||||
|
||||
<script>
|
||||
function showTab(name){
|
||||
document.querySelectorAll('.chart-panel').forEach(function(p){p.classList.remove('active')});
|
||||
document.querySelectorAll('.chart-tabs button').forEach(function(b){b.classList.remove('active')});
|
||||
document.querySelectorAll('[data-tab="'+name+'"]').forEach(function(p){p.classList.add('active')});
|
||||
document.querySelectorAll('.chart-tabs button[onclick*="\'"+name+"\'"]').forEach(function(b){b.classList.add('active')});
|
||||
}
|
||||
(function(){
|
||||
var tabs=document.getElementById('chartTabs');
|
||||
if(!tabs)return;
|
||||
var first=tabs.querySelector('button');
|
||||
if(first&&!tabs.querySelector('button.active')){first.classList.add('active');first.click()}
|
||||
})();
|
||||
</script>
|
||||
|
||||
{{if and (not .TrainingRuns) (not $runs)}}
|
||||
<div class="card empty"><p>No data for this model yet.</p></div>
|
||||
{{end}}
|
||||
|
||||
{{end}}
|
||||
{{end}}
|
||||
|
||||
{{end}}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{template "footer"}}
|
||||
501
pkg/lab/handler/web.go
Normal file
501
pkg/lab/handler/web.go
Normal file
|
|
@ -0,0 +1,501 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"forge.lthn.ai/core/cli/pkg/lab"
|
||||
)
|
||||
|
||||
//go:embed templates/*
|
||||
var templateFS embed.FS
|
||||
|
||||
//go:embed static/*
|
||||
var StaticFS embed.FS
|
||||
|
||||
type WebHandler struct {
|
||||
store *lab.Store
|
||||
tmpl *template.Template
|
||||
}
|
||||
|
||||
func NewWebHandler(s *lab.Store) *WebHandler {
|
||||
funcMap := template.FuncMap{
|
||||
"timeAgo": func(t time.Time) string {
|
||||
if t.IsZero() {
|
||||
return "never"
|
||||
}
|
||||
d := time.Since(t)
|
||||
switch {
|
||||
case d < time.Minute:
|
||||
return "just now"
|
||||
case d < time.Hour:
|
||||
return fmt.Sprintf("%dm ago", int(d.Minutes()))
|
||||
case d < 24*time.Hour:
|
||||
return fmt.Sprintf("%dh ago", int(d.Hours()))
|
||||
default:
|
||||
days := int(d.Hours()) / 24
|
||||
if days == 1 {
|
||||
return "1 day ago"
|
||||
}
|
||||
return fmt.Sprintf("%d days ago", days)
|
||||
}
|
||||
},
|
||||
"pct": func(v float64) string {
|
||||
return fmt.Sprintf("%.1f", v)
|
||||
},
|
||||
"statusClass": func(s string) string {
|
||||
switch s {
|
||||
case "ok", "running":
|
||||
return "status-ok"
|
||||
case "degraded":
|
||||
return "status-warn"
|
||||
default:
|
||||
return "status-err"
|
||||
}
|
||||
},
|
||||
"shortMsg": func(s string) string {
|
||||
if i := strings.IndexByte(s, '\n'); i > 0 {
|
||||
s = s[:i]
|
||||
}
|
||||
if len(s) > 72 {
|
||||
return s[:69] + "..."
|
||||
}
|
||||
return s
|
||||
},
|
||||
"lower": strings.ToLower,
|
||||
"cpuPct": func(load float64, cores int) string {
|
||||
if cores <= 0 {
|
||||
return "0"
|
||||
}
|
||||
pct := load / float64(cores) * 100
|
||||
if pct > 100 {
|
||||
pct = 100
|
||||
}
|
||||
return fmt.Sprintf("%.0f", pct)
|
||||
},
|
||||
"fmtGB": func(v float64) string {
|
||||
if v >= 1000 {
|
||||
return fmt.Sprintf("%.1fT", v/1024)
|
||||
}
|
||||
return fmt.Sprintf("%.0fG", v)
|
||||
},
|
||||
"countStatus": func(services []lab.Service, status string) int {
|
||||
n := 0
|
||||
for _, s := range services {
|
||||
if s.Status == status {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
},
|
||||
"categories": func(services []lab.Service) []string {
|
||||
seen := map[string]bool{}
|
||||
var cats []string
|
||||
for _, s := range services {
|
||||
if !seen[s.Category] {
|
||||
seen[s.Category] = true
|
||||
cats = append(cats, s.Category)
|
||||
}
|
||||
}
|
||||
return cats
|
||||
},
|
||||
"filterCat": func(services []lab.Service, cat string) []lab.Service {
|
||||
var out []lab.Service
|
||||
for _, s := range services {
|
||||
if s.Category == cat {
|
||||
out = append(out, s)
|
||||
}
|
||||
}
|
||||
return out
|
||||
},
|
||||
"lossChart": LossChart,
|
||||
"contentChart": ContentChart,
|
||||
"capabilityChart": CapabilityChart,
|
||||
"categoryBreakdown": CategoryBreakdownWithJudge,
|
||||
"hasKey": func(m map[string][]lab.LossPoint, key string) bool {
|
||||
_, ok := m[key]
|
||||
return ok
|
||||
},
|
||||
"hasContentKey": func(m map[string][]lab.ContentPoint, key string) bool {
|
||||
_, ok := m[key]
|
||||
return ok
|
||||
},
|
||||
"hasCapKey": func(m map[string][]lab.CapabilityPoint, key string) bool {
|
||||
_, ok := m[key]
|
||||
return ok
|
||||
},
|
||||
"anyContent": func(runs []lab.BenchmarkRun, m map[string][]lab.ContentPoint) bool {
|
||||
for _, r := range runs {
|
||||
if _, ok := m[r.RunID]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
},
|
||||
"anyCap": func(runs []lab.BenchmarkRun, m map[string][]lab.CapabilityPoint) bool {
|
||||
for _, r := range runs {
|
||||
if _, ok := m[r.RunID]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
},
|
||||
"anyLoss": func(runs []lab.BenchmarkRun, m map[string][]lab.LossPoint) bool {
|
||||
for _, r := range runs {
|
||||
if _, ok := m[r.RunID]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
},
|
||||
"getLoss": func(m map[string][]lab.LossPoint, key string) []lab.LossPoint {
|
||||
return m[key]
|
||||
},
|
||||
"getContent": func(m map[string][]lab.ContentPoint, key string) []lab.ContentPoint {
|
||||
return m[key]
|
||||
},
|
||||
"getCap": func(m map[string][]lab.CapabilityPoint, key string) []lab.CapabilityPoint {
|
||||
return m[key]
|
||||
},
|
||||
"getCapJudge": func(m map[string][]lab.CapabilityJudgePoint, key string) []lab.CapabilityJudgePoint {
|
||||
return m[key]
|
||||
},
|
||||
"runTypeIcon": func(t string) string {
|
||||
switch t {
|
||||
case "training":
|
||||
return "loss"
|
||||
case "content":
|
||||
return "content"
|
||||
case "capability":
|
||||
return "cap"
|
||||
default:
|
||||
return "data"
|
||||
}
|
||||
},
|
||||
"domainChart": DomainChart,
|
||||
"voiceChart": VoiceChart,
|
||||
"pctOf": func(part, total int) float64 {
|
||||
if total == 0 {
|
||||
return 0
|
||||
}
|
||||
return float64(part) / float64(total) * 100
|
||||
},
|
||||
"fmtInt": func(n int) string {
|
||||
if n < 1000 {
|
||||
return fmt.Sprintf("%d", n)
|
||||
}
|
||||
return fmt.Sprintf("%d,%03d", n/1000, n%1000)
|
||||
},
|
||||
"tableRows": func(tables []lab.DatasetTable, name string) int {
|
||||
for _, t := range tables {
|
||||
if t.Name == name {
|
||||
return t.Rows
|
||||
}
|
||||
}
|
||||
return 0
|
||||
},
|
||||
"totalRows": func(tables []lab.DatasetTable) int {
|
||||
total := 0
|
||||
for _, t := range tables {
|
||||
total += t.Rows
|
||||
}
|
||||
return total
|
||||
},
|
||||
"fmtFloat": func(v float64, prec int) string {
|
||||
return fmt.Sprintf("%.*f", prec, v)
|
||||
},
|
||||
"statusColor": func(s string) string {
|
||||
switch s {
|
||||
case "complete":
|
||||
return "var(--green)"
|
||||
case "training", "fusing":
|
||||
return "var(--accent)"
|
||||
case "failed", "fuse_failed":
|
||||
return "var(--red)"
|
||||
default:
|
||||
return "var(--muted)"
|
||||
}
|
||||
},
|
||||
"statusBadge": func(s string) string {
|
||||
switch s {
|
||||
case "complete":
|
||||
return "badge-ok"
|
||||
case "training", "fusing":
|
||||
return "badge-info"
|
||||
default:
|
||||
return "badge-err"
|
||||
}
|
||||
},
|
||||
"runLabel": func(s string) string {
|
||||
// Make run IDs like "15k-1b@0001000" more readable.
|
||||
s = strings.ReplaceAll(s, "gemma-3-", "")
|
||||
s = strings.ReplaceAll(s, "gemma3-", "")
|
||||
// Strip leading zeros after @.
|
||||
if idx := strings.Index(s, "@"); idx >= 0 {
|
||||
prefix := s[:idx+1]
|
||||
num := strings.TrimLeft(s[idx+1:], "0")
|
||||
if num == "" {
|
||||
num = "0"
|
||||
}
|
||||
s = prefix + num
|
||||
}
|
||||
return s
|
||||
},
|
||||
"normModel": func(s string) string {
|
||||
return strings.ReplaceAll(s, "gemma3-", "gemma-3-")
|
||||
},
|
||||
"runsForModel": func(b lab.BenchmarkData, modelName string) []lab.BenchmarkRun {
|
||||
normRun := func(s string) string {
|
||||
s = strings.ReplaceAll(s, "gemma3-", "gemma-3-")
|
||||
s = strings.TrimPrefix(s, "baseline-")
|
||||
return s
|
||||
}
|
||||
target := normRun(modelName)
|
||||
var out []lab.BenchmarkRun
|
||||
for _, r := range b.Runs {
|
||||
if normRun(r.Model) == target {
|
||||
out = append(out, r)
|
||||
}
|
||||
}
|
||||
return out
|
||||
},
|
||||
"benchmarkCount": func(b lab.BenchmarkData) int {
|
||||
return len(b.Runs)
|
||||
},
|
||||
"dataPoints": func(b lab.BenchmarkData) int {
|
||||
n := 0
|
||||
for _, v := range b.Loss {
|
||||
n += len(v)
|
||||
}
|
||||
for _, v := range b.Content {
|
||||
n += len(v)
|
||||
}
|
||||
for _, v := range b.Capability {
|
||||
n += len(v)
|
||||
}
|
||||
return n
|
||||
},
|
||||
}
|
||||
|
||||
tmpl := template.Must(
|
||||
template.New("").Funcs(funcMap).ParseFS(templateFS, "templates/*.html"),
|
||||
)
|
||||
|
||||
return &WebHandler{store: s, tmpl: tmpl}
|
||||
}
|
||||
|
||||
func (h *WebHandler) Dashboard(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path != "/" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
ov := h.store.Overview()
|
||||
b := h.store.GetBenchmarks()
|
||||
h.render(w, "dashboard.html", map[string]any{
|
||||
"Machines": ov.Machines,
|
||||
"Agents": ov.Agents,
|
||||
"Training": ov.Training,
|
||||
"Models": ov.Models,
|
||||
"Commits": ov.Commits,
|
||||
"Errors": ov.Errors,
|
||||
"Benchmarks": b,
|
||||
})
|
||||
}
|
||||
|
||||
func (h *WebHandler) Models(w http.ResponseWriter, r *http.Request) {
|
||||
h.render(w, "models.html", map[string]any{
|
||||
"Models": h.store.GetModels(),
|
||||
})
|
||||
}
|
||||
|
||||
// ModelGroup gathers all runs and data for a single model name.
|
||||
type ModelGroup struct {
|
||||
Model string
|
||||
TrainingRuns []lab.TrainingRunStatus
|
||||
BenchmarkRuns []lab.BenchmarkRun
|
||||
HasTraining bool
|
||||
HasContent bool
|
||||
HasCapability bool
|
||||
BestStatus string // best training status: complete > training > pending
|
||||
}
|
||||
|
||||
func buildModelGroups(runs []lab.TrainingRunStatus, benchmarks lab.BenchmarkData) []ModelGroup {
|
||||
groups := map[string]*ModelGroup{}
|
||||
|
||||
// Normalise model names: gemma3-12b -> gemma-3-12b, baseline-gemma-3-12b -> gemma-3-12b.
|
||||
norm := func(s string) string {
|
||||
s = strings.ReplaceAll(s, "gemma3-", "gemma-3-")
|
||||
s = strings.TrimPrefix(s, "baseline-")
|
||||
return s
|
||||
}
|
||||
|
||||
// Training runs.
|
||||
for _, r := range runs {
|
||||
key := norm(r.Model)
|
||||
g, ok := groups[key]
|
||||
if !ok {
|
||||
g = &ModelGroup{Model: key}
|
||||
groups[key] = g
|
||||
}
|
||||
g.TrainingRuns = append(g.TrainingRuns, r)
|
||||
g.HasTraining = true
|
||||
if r.Status == "complete" || (g.BestStatus != "complete" && r.Status == "training") {
|
||||
g.BestStatus = r.Status
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmark runs.
|
||||
for _, r := range benchmarks.Runs {
|
||||
key := norm(r.Model)
|
||||
g, ok := groups[key]
|
||||
if !ok {
|
||||
g = &ModelGroup{Model: key}
|
||||
groups[key] = g
|
||||
}
|
||||
g.BenchmarkRuns = append(g.BenchmarkRuns, r)
|
||||
switch r.Type {
|
||||
case "content":
|
||||
g.HasContent = true
|
||||
case "capability":
|
||||
g.HasCapability = true
|
||||
case "training":
|
||||
g.HasTraining = true
|
||||
}
|
||||
}
|
||||
|
||||
// Sort: models with training first, then alphabetical.
|
||||
var result []ModelGroup
|
||||
for _, g := range groups {
|
||||
if g.BestStatus == "" {
|
||||
g.BestStatus = "scored"
|
||||
}
|
||||
result = append(result, *g)
|
||||
}
|
||||
sort.Slice(result, func(i, j int) bool {
|
||||
if result[i].HasTraining != result[j].HasTraining {
|
||||
return result[i].HasTraining
|
||||
}
|
||||
return result[i].Model < result[j].Model
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
func (h *WebHandler) Training(w http.ResponseWriter, r *http.Request) {
|
||||
selectedModel := r.URL.Query().Get("model")
|
||||
benchmarks := h.store.GetBenchmarks()
|
||||
trainingRuns := h.store.GetTrainingRuns()
|
||||
groups := buildModelGroups(trainingRuns, benchmarks)
|
||||
|
||||
// Compute scoring progress from model groups.
|
||||
var scoredModels, totalScoringRuns, totalDataPoints int
|
||||
var unscoredNames []string
|
||||
for _, g := range groups {
|
||||
if g.HasContent || g.HasCapability {
|
||||
scoredModels++
|
||||
} else {
|
||||
unscoredNames = append(unscoredNames, g.Model)
|
||||
}
|
||||
totalScoringRuns += len(g.BenchmarkRuns)
|
||||
}
|
||||
for _, v := range benchmarks.Loss {
|
||||
totalDataPoints += len(v)
|
||||
}
|
||||
for _, v := range benchmarks.Content {
|
||||
totalDataPoints += len(v)
|
||||
}
|
||||
for _, v := range benchmarks.Capability {
|
||||
totalDataPoints += len(v)
|
||||
}
|
||||
|
||||
h.render(w, "training.html", map[string]any{
|
||||
"Training": h.store.GetTraining(),
|
||||
"TrainingRuns": trainingRuns,
|
||||
"Benchmarks": benchmarks,
|
||||
"ModelGroups": groups,
|
||||
"Containers": h.store.GetContainers(),
|
||||
"SelectedModel": selectedModel,
|
||||
"ScoredModels": scoredModels,
|
||||
"TotalScoringRuns": totalScoringRuns,
|
||||
"TotalDataPoints": totalDataPoints,
|
||||
"UnscoredModels": len(unscoredNames),
|
||||
"UnscoredNames": strings.Join(unscoredNames, ", "),
|
||||
})
|
||||
}
|
||||
|
||||
func (h *WebHandler) Agents(w http.ResponseWriter, r *http.Request) {
|
||||
h.render(w, "agents.html", map[string]any{
|
||||
"Agents": h.store.GetAgents(),
|
||||
})
|
||||
}
|
||||
|
||||
func (h *WebHandler) Services(w http.ResponseWriter, r *http.Request) {
|
||||
h.render(w, "services.html", map[string]any{
|
||||
"Services": h.store.GetServices(),
|
||||
})
|
||||
}
|
||||
|
||||
func (h *WebHandler) Dataset(w http.ResponseWriter, r *http.Request) {
|
||||
view := r.URL.Query().Get("view")
|
||||
h.render(w, "dataset.html", map[string]any{
|
||||
"GoldenSet": h.store.GetGoldenSet(),
|
||||
"Dataset": h.store.GetDataset(),
|
||||
"SelectedView": view,
|
||||
})
|
||||
}
|
||||
|
||||
func (h *WebHandler) GoldenSet(w http.ResponseWriter, r *http.Request) {
|
||||
h.render(w, "dataset.html", map[string]any{
|
||||
"GoldenSet": h.store.GetGoldenSet(),
|
||||
"Dataset": h.store.GetDataset(),
|
||||
"SelectedView": "",
|
||||
})
|
||||
}
|
||||
|
||||
func (h *WebHandler) Runs(w http.ResponseWriter, r *http.Request) {
|
||||
b := h.store.GetBenchmarks()
|
||||
h.render(w, "runs.html", map[string]any{
|
||||
"Benchmarks": b,
|
||||
})
|
||||
}
|
||||
|
||||
// Events is an SSE endpoint that pushes "update" events when store data changes.
|
||||
func (h *WebHandler) Events(w http.ResponseWriter, r *http.Request) {
|
||||
flusher, ok := w.(http.Flusher)
|
||||
if !ok {
|
||||
http.Error(w, "streaming not supported", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.Header().Set("Cache-Control", "no-cache")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
|
||||
ch := h.store.Subscribe()
|
||||
defer h.store.Unsubscribe(ch)
|
||||
|
||||
// Send initial keepalive.
|
||||
fmt.Fprintf(w, ": connected\n\n")
|
||||
flusher.Flush()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ch:
|
||||
fmt.Fprintf(w, "data: update\n\n")
|
||||
flusher.Flush()
|
||||
case <-r.Context().Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *WebHandler) render(w http.ResponseWriter, name string, data any) {
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
if err := h.tmpl.ExecuteTemplate(w, name, data); err != nil {
|
||||
http.Error(w, "template error: "+err.Error(), 500)
|
||||
}
|
||||
}
|
||||
219
pkg/lab/model.go
Normal file
219
pkg/lab/model.go
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
package lab
|
||||
|
||||
import "time"
|
||||
|
||||
type Status string
|
||||
|
||||
const (
|
||||
StatusOK Status = "ok"
|
||||
StatusDegraded Status = "degraded"
|
||||
StatusUnavailable Status = "unavailable"
|
||||
)
|
||||
|
||||
type Overview struct {
|
||||
UpdatedAt time.Time
|
||||
Machines []Machine
|
||||
Agents AgentSummary
|
||||
Training TrainingSummary
|
||||
Models []HFModel
|
||||
Commits []Commit
|
||||
Errors map[string]string
|
||||
}
|
||||
|
||||
type Machine struct {
|
||||
Name string
|
||||
Host string
|
||||
Status Status
|
||||
Load1 float64
|
||||
MemUsedPct float64
|
||||
Containers []Container
|
||||
// Extended stats
|
||||
CPUCores int
|
||||
MemTotalGB float64
|
||||
MemUsedGB float64
|
||||
DiskTotalGB float64
|
||||
DiskUsedGB float64
|
||||
DiskUsedPct float64
|
||||
GPUName string
|
||||
GPUVRAMTotal float64 // GB, 0 if not applicable
|
||||
GPUVRAMUsed float64
|
||||
GPUVRAMPct float64
|
||||
GPUTemp int // Celsius, 0 if unavailable
|
||||
Uptime string
|
||||
}
|
||||
|
||||
type Container struct {
|
||||
Name string
|
||||
Status string
|
||||
Image string
|
||||
Uptime string
|
||||
Created time.Time
|
||||
}
|
||||
|
||||
type AgentSummary struct {
|
||||
Available bool
|
||||
RegisteredTotal int
|
||||
QueuePending int
|
||||
TasksCompleted int
|
||||
TasksFailed int
|
||||
Capabilities int
|
||||
HeartbeatAge float64
|
||||
ExporterUp bool
|
||||
}
|
||||
|
||||
type TrainingSummary struct {
|
||||
GoldGenerated int
|
||||
GoldTarget int
|
||||
GoldPercent float64
|
||||
GoldAvailable bool
|
||||
InterceptCount int
|
||||
SessionCount int
|
||||
LastIntercept time.Time
|
||||
GGUFCount int
|
||||
GGUFFiles []string
|
||||
AdapterCount int
|
||||
}
|
||||
|
||||
type HFModel struct {
|
||||
ModelID string `json:"modelId"`
|
||||
Author string `json:"author"`
|
||||
Downloads int `json:"downloads"`
|
||||
Likes int `json:"likes"`
|
||||
Tags []string `json:"tags"`
|
||||
PipelineTag string `json:"pipeline_tag"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
LastModified time.Time `json:"lastModified"`
|
||||
}
|
||||
|
||||
type Commit struct {
|
||||
SHA string
|
||||
Message string
|
||||
Author string
|
||||
Repo string
|
||||
Timestamp time.Time
|
||||
}
|
||||
|
||||
type Service struct {
|
||||
Name string
|
||||
URL string
|
||||
Category string
|
||||
Machine string
|
||||
Icon string
|
||||
Status string // ok, degraded, unavailable, unchecked
|
||||
}
|
||||
|
||||
// Dataset stats from DuckDB (pushed to InfluxDB as dataset_stats).
|
||||
|
||||
type DatasetTable struct {
|
||||
Name string
|
||||
Rows int
|
||||
}
|
||||
|
||||
type DatasetSummary struct {
|
||||
Available bool
|
||||
Tables []DatasetTable
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
|
||||
// Golden set data explorer types.
|
||||
|
||||
type GoldenSetSummary struct {
|
||||
Available bool
|
||||
TotalExamples int
|
||||
TargetTotal int
|
||||
CompletionPct float64
|
||||
Domains int
|
||||
Voices int
|
||||
AvgGenTime float64
|
||||
AvgResponseChars float64
|
||||
DomainStats []DomainStat
|
||||
VoiceStats []VoiceStat
|
||||
Workers []WorkerStat
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
|
||||
type WorkerStat struct {
|
||||
Worker string
|
||||
Count int
|
||||
LastSeen time.Time
|
||||
}
|
||||
|
||||
type DomainStat struct {
|
||||
Domain string
|
||||
Count int
|
||||
AvgGenTime float64
|
||||
}
|
||||
|
||||
type VoiceStat struct {
|
||||
Voice string
|
||||
Count int
|
||||
AvgChars float64
|
||||
AvgGenTime float64
|
||||
}
|
||||
|
||||
// Live training run status (from InfluxDB training_status measurement).
|
||||
|
||||
type TrainingRunStatus struct {
|
||||
Model string
|
||||
RunID string
|
||||
Status string // training, fusing, complete, failed
|
||||
Iteration int
|
||||
TotalIters int
|
||||
Pct float64
|
||||
LastLoss float64 // most recent train loss
|
||||
ValLoss float64 // most recent val loss
|
||||
TokensSec float64 // most recent tokens/sec
|
||||
}
|
||||
|
||||
// Benchmark data types for training run viewer.
|
||||
|
||||
type BenchmarkRun struct {
|
||||
RunID string
|
||||
Model string
|
||||
Type string // "content", "capability", "training"
|
||||
}
|
||||
|
||||
type LossPoint struct {
|
||||
Iteration int
|
||||
Loss float64
|
||||
LossType string // "val" or "train"
|
||||
LearningRate float64
|
||||
TokensPerSec float64
|
||||
}
|
||||
|
||||
type ContentPoint struct {
|
||||
Label string
|
||||
Dimension string
|
||||
Score float64
|
||||
Iteration int
|
||||
HasKernel bool
|
||||
}
|
||||
|
||||
type CapabilityPoint struct {
|
||||
Label string
|
||||
Category string
|
||||
Accuracy float64
|
||||
Correct int
|
||||
Total int
|
||||
Iteration int
|
||||
}
|
||||
|
||||
type CapabilityJudgePoint struct {
|
||||
Label string
|
||||
ProbeID string
|
||||
Category string
|
||||
Reasoning float64
|
||||
Correctness float64
|
||||
Clarity float64
|
||||
Avg float64
|
||||
Iteration int
|
||||
}
|
||||
|
||||
type BenchmarkData struct {
|
||||
Runs []BenchmarkRun
|
||||
Loss map[string][]LossPoint
|
||||
Content map[string][]ContentPoint
|
||||
Capability map[string][]CapabilityPoint
|
||||
CapabilityJudge map[string][]CapabilityJudgePoint
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
275
pkg/lab/store.go
Normal file
275
pkg/lab/store.go
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
package lab
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Store struct {
|
||||
mu sync.RWMutex
|
||||
|
||||
// SSE subscriber channels -- notified on any data change.
|
||||
subMu sync.Mutex
|
||||
subs map[chan struct{}]struct{}
|
||||
|
||||
machines []Machine
|
||||
machinesAt time.Time
|
||||
|
||||
agents AgentSummary
|
||||
agentsAt time.Time
|
||||
|
||||
training TrainingSummary
|
||||
trainingAt time.Time
|
||||
|
||||
models []HFModel
|
||||
modelsAt time.Time
|
||||
|
||||
commits []Commit
|
||||
commitsAt time.Time
|
||||
|
||||
containers []Container
|
||||
containersAt time.Time
|
||||
|
||||
services []Service
|
||||
servicesAt time.Time
|
||||
|
||||
benchmarks BenchmarkData
|
||||
benchmarksAt time.Time
|
||||
|
||||
goldenSet GoldenSetSummary
|
||||
goldenSetAt time.Time
|
||||
|
||||
trainingRuns []TrainingRunStatus
|
||||
trainingRunsAt time.Time
|
||||
|
||||
dataset DatasetSummary
|
||||
datasetAt time.Time
|
||||
|
||||
errors map[string]string
|
||||
}
|
||||
|
||||
func NewStore() *Store {
|
||||
return &Store{
|
||||
subs: make(map[chan struct{}]struct{}),
|
||||
errors: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
// Subscribe returns a channel that receives a signal on every data update.
|
||||
// Call Unsubscribe when done to avoid leaks.
|
||||
func (s *Store) Subscribe() chan struct{} {
|
||||
ch := make(chan struct{}, 1)
|
||||
s.subMu.Lock()
|
||||
s.subs[ch] = struct{}{}
|
||||
s.subMu.Unlock()
|
||||
return ch
|
||||
}
|
||||
|
||||
// Unsubscribe removes a subscriber channel.
|
||||
func (s *Store) Unsubscribe(ch chan struct{}) {
|
||||
s.subMu.Lock()
|
||||
delete(s.subs, ch)
|
||||
s.subMu.Unlock()
|
||||
}
|
||||
|
||||
// notify sends a non-blocking signal to all subscribers.
|
||||
func (s *Store) notify() {
|
||||
s.subMu.Lock()
|
||||
defer s.subMu.Unlock()
|
||||
for ch := range s.subs {
|
||||
select {
|
||||
case ch <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) SetMachines(m []Machine) {
|
||||
s.mu.Lock()
|
||||
s.machines = m
|
||||
s.machinesAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) SetAgents(a AgentSummary) {
|
||||
s.mu.Lock()
|
||||
s.agents = a
|
||||
s.agentsAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) SetTraining(t TrainingSummary) {
|
||||
s.mu.Lock()
|
||||
s.training = t
|
||||
s.trainingAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) SetModels(m []HFModel) {
|
||||
s.mu.Lock()
|
||||
s.models = m
|
||||
s.modelsAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) SetCommits(c []Commit) {
|
||||
s.mu.Lock()
|
||||
s.commits = c
|
||||
s.commitsAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) SetContainers(c []Container) {
|
||||
s.mu.Lock()
|
||||
s.containers = c
|
||||
s.containersAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) SetError(collector string, err error) {
|
||||
s.mu.Lock()
|
||||
if err != nil {
|
||||
s.errors[collector] = err.Error()
|
||||
} else {
|
||||
delete(s.errors, collector)
|
||||
}
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) Overview() Overview {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
errCopy := make(map[string]string, len(s.errors))
|
||||
for k, v := range s.errors {
|
||||
errCopy[k] = v
|
||||
}
|
||||
|
||||
// Merge containers into the first machine (snider-linux / local Docker host).
|
||||
machines := make([]Machine, len(s.machines))
|
||||
copy(machines, s.machines)
|
||||
if len(machines) > 0 {
|
||||
machines[0].Containers = s.containers
|
||||
}
|
||||
|
||||
return Overview{
|
||||
UpdatedAt: time.Now(),
|
||||
Machines: machines,
|
||||
Agents: s.agents,
|
||||
Training: s.training,
|
||||
Models: s.models,
|
||||
Commits: s.commits,
|
||||
Errors: errCopy,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) GetModels() []HFModel {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.models
|
||||
}
|
||||
|
||||
func (s *Store) GetTraining() TrainingSummary {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.training
|
||||
}
|
||||
|
||||
func (s *Store) GetAgents() AgentSummary {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.agents
|
||||
}
|
||||
|
||||
func (s *Store) GetContainers() []Container {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.containers
|
||||
}
|
||||
|
||||
func (s *Store) SetServices(svc []Service) {
|
||||
s.mu.Lock()
|
||||
s.services = svc
|
||||
s.servicesAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) GetServices() []Service {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.services
|
||||
}
|
||||
|
||||
func (s *Store) SetBenchmarks(b BenchmarkData) {
|
||||
s.mu.Lock()
|
||||
s.benchmarks = b
|
||||
s.benchmarksAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) GetBenchmarks() BenchmarkData {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.benchmarks
|
||||
}
|
||||
|
||||
func (s *Store) SetGoldenSet(g GoldenSetSummary) {
|
||||
s.mu.Lock()
|
||||
s.goldenSet = g
|
||||
s.goldenSetAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) GetGoldenSet() GoldenSetSummary {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.goldenSet
|
||||
}
|
||||
|
||||
func (s *Store) SetTrainingRuns(runs []TrainingRunStatus) {
|
||||
s.mu.Lock()
|
||||
s.trainingRuns = runs
|
||||
s.trainingRunsAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) GetTrainingRuns() []TrainingRunStatus {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.trainingRuns
|
||||
}
|
||||
|
||||
func (s *Store) SetDataset(d DatasetSummary) {
|
||||
s.mu.Lock()
|
||||
s.dataset = d
|
||||
s.datasetAt = time.Now()
|
||||
s.mu.Unlock()
|
||||
s.notify()
|
||||
}
|
||||
|
||||
func (s *Store) GetDataset() DatasetSummary {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.dataset
|
||||
}
|
||||
|
||||
func (s *Store) GetErrors() map[string]string {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
errCopy := make(map[string]string, len(s.errors))
|
||||
for k, v := range s.errors {
|
||||
errCopy[k] = v
|
||||
}
|
||||
return errCopy
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue