feat(process): expose health probe failure reasons

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-04-04 00:15:20 +00:00
parent 5142114e89
commit 214cf4cfa8
3 changed files with 70 additions and 5 deletions

View file

@ -3,8 +3,10 @@ package process
import (
"context"
"fmt"
"io"
"net"
"net/http"
"strings"
"sync"
"time"
@ -116,21 +118,40 @@ func (h *HealthServer) Addr() string {
// WaitForHealth polls a health endpoint until it responds 200 or the timeout
// (in milliseconds) expires. Returns true if healthy, false on timeout.
func WaitForHealth(addr string, timeoutMs int) bool {
ok, _ := ProbeHealth(addr, timeoutMs)
return ok
}
// ProbeHealth polls a health endpoint until it responds 200 or the timeout
// (in milliseconds) expires. Returns the health status and the last observed
// failure reason if the endpoint never becomes healthy.
func ProbeHealth(addr string, timeoutMs int) (bool, string) {
deadline := time.Now().Add(time.Duration(timeoutMs) * time.Millisecond)
url := fmt.Sprintf("http://%s/health", addr)
client := &http.Client{Timeout: 2 * time.Second}
var lastReason string
for time.Now().Before(deadline) {
resp, err := client.Get(url)
if err == nil {
resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
if resp.StatusCode == http.StatusOK {
return true
return true, ""
}
lastReason = strings.TrimSpace(string(body))
if lastReason == "" {
lastReason = resp.Status
}
} else {
lastReason = err.Error()
}
time.Sleep(200 * time.Millisecond)
}
return false
if lastReason == "" {
lastReason = "health check timed out"
}
return false, lastReason
}

View file

@ -140,13 +140,14 @@ func (p *ProcessProvider) Describe() []api.RouteDescription {
Method: "GET",
Path: "/daemons/:code/:daemon/health",
Summary: "Check daemon health",
Description: "Probes the daemon's health endpoint and returns the result.",
Description: "Probes the daemon's health endpoint and returns the result, including a failure reason when unhealthy.",
Tags: []string{"process"},
Response: map[string]any{
"type": "object",
"properties": map[string]any{
"healthy": map[string]any{"type": "boolean"},
"address": map[string]any{"type": "string"},
"reason": map[string]any{"type": "string"},
},
},
},
@ -232,18 +233,22 @@ func (p *ProcessProvider) healthCheck(c *gin.Context) {
return
}
healthy := process.WaitForHealth(entry.Health, 2000)
healthy, reason := process.ProbeHealth(entry.Health, 2000)
result := map[string]any{
"healthy": healthy,
"address": entry.Health,
}
if !healthy && reason != "" {
result["reason"] = reason
}
// Emit health event
p.emitEvent("process.daemon.health", map[string]any{
"code": code,
"daemon": daemon,
"healthy": healthy,
"reason": reason,
})
statusCode := http.StatusOK

View file

@ -6,6 +6,8 @@ import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
goapi "dappco.re/go/core/api"
@ -84,6 +86,43 @@ func TestProcessProvider_GetDaemon_Bad(t *testing.T) {
assert.Equal(t, http.StatusNotFound, w.Code)
}
func TestProcessProvider_HealthCheck_Bad(t *testing.T) {
dir := t.TempDir()
registry := newTestRegistry(dir)
healthSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte("upstream health check failed"))
}))
defer healthSrv.Close()
hostPort := strings.TrimPrefix(healthSrv.URL, "http://")
require.NoError(t, registry.Register(process.DaemonEntry{
Code: "test",
Daemon: "broken",
PID: os.Getpid(),
Health: hostPort,
}))
p := processapi.NewProvider(registry, nil)
r := setupRouter(p)
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/api/process/daemons/test/broken/health", nil)
r.ServeHTTP(w, req)
assert.Equal(t, http.StatusServiceUnavailable, w.Code)
var resp goapi.Response[map[string]any]
err := json.Unmarshal(w.Body.Bytes(), &resp)
require.NoError(t, err)
require.True(t, resp.Success)
assert.Equal(t, false, resp.Data["healthy"])
assert.Equal(t, hostPort, resp.Data["address"])
assert.Equal(t, "upstream health check failed", resp.Data["reason"])
}
func TestProcessProvider_RegistersAsRouteGroup_Good(t *testing.T) {
p := processapi.NewProvider(nil, nil)