go/pkg/lab/collector/prometheus.go
Claude 5e9a9c2790
Some checks failed
Security Scan / Go Vulnerability Check (push) Has been cancelled
Security Scan / Secret Detection (push) Has been cancelled
Security Scan / Dependency & Config Scan (push) Has been cancelled
feat: integrate lab dashboard as core lab serve
Port the standalone lab dashboard (lab.lthn.io) into the core CLI as
pkg/lab/ with collectors, handlers, and HTML templates. The dashboard
monitors machines, Docker containers, Forgejo, HuggingFace models,
training runs, and InfluxDB metrics with SSE live updates.

New command: core lab serve --bind :8080

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 05:53:52 +00:00

104 lines
2.5 KiB
Go

package collector
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"strconv"
"time"
"forge.lthn.ai/core/cli/pkg/lab"
)
type Prometheus struct {
url string
store *lab.Store
}
func NewPrometheus(promURL string, s *lab.Store) *Prometheus {
return &Prometheus{url: promURL, store: s}
}
func (p *Prometheus) Name() string { return "prometheus" }
func (p *Prometheus) Collect(ctx context.Context) error {
// Machine stats are handled by the system collector (direct /proc + SSH).
// This collector only queries agent metrics from Prometheus.
agents := lab.AgentSummary{}
if v, err := p.query(ctx, "agents_registered_total"); err == nil && v != nil {
agents.RegisteredTotal = int(*v)
agents.Available = true
}
if v, err := p.query(ctx, "agents_queue_pending"); err == nil && v != nil {
agents.QueuePending = int(*v)
}
if v, err := p.query(ctx, "agents_tasks_completed_total"); err == nil && v != nil {
agents.TasksCompleted = int(*v)
}
if v, err := p.query(ctx, "agents_tasks_failed_total"); err == nil && v != nil {
agents.TasksFailed = int(*v)
}
if v, err := p.query(ctx, "agents_capabilities_count"); err == nil && v != nil {
agents.Capabilities = int(*v)
}
if v, err := p.query(ctx, "agents_heartbeat_age_seconds"); err == nil && v != nil {
agents.HeartbeatAge = *v
}
if v, err := p.query(ctx, "agents_exporter_up"); err == nil && v != nil {
agents.ExporterUp = *v > 0
}
p.store.SetAgents(agents)
p.store.SetError("prometheus", nil)
return nil
}
type promResponse struct {
Status string `json:"status"`
Data struct {
ResultType string `json:"resultType"`
Result []struct {
Value [2]json.RawMessage `json:"value"`
} `json:"result"`
} `json:"data"`
}
func (p *Prometheus) query(ctx context.Context, promql string) (*float64, error) {
u := fmt.Sprintf("%s/api/v1/query?query=%s", p.url, url.QueryEscape(promql))
req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Do(req)
if err != nil {
p.store.SetError("prometheus", err)
return nil, err
}
defer resp.Body.Close()
var pr promResponse
if err := json.NewDecoder(resp.Body).Decode(&pr); err != nil {
return nil, err
}
if pr.Status != "success" || len(pr.Data.Result) == 0 {
return nil, nil
}
var valStr string
if err := json.Unmarshal(pr.Data.Result[0].Value[1], &valStr); err != nil {
return nil, err
}
val, err := strconv.ParseFloat(valStr, 64)
if err != nil {
return nil, err
}
return &val, nil
}