go/pkg/process/supervisor_test.go

package process

import (
	"context"
	"fmt"
	"sync/atomic"
	"testing"
	"time"
)

func TestSupervisor_GoFunc_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	var count atomic.Int32
	sup.RegisterFunc(GoSpec{
		Name: "counter",
		Func: func(ctx context.Context) error {
			count.Add(1)
			<-ctx.Done()
			return nil
		},
		Restart: RestartPolicy{Delay: 10 * time.Millisecond, MaxRestarts: -1},
	})

	sup.Start()
	time.Sleep(50 * time.Millisecond)

	status, err := sup.Status("counter")
	if err != nil {
		t.Fatal(err)
	}
	if !status.Running {
		t.Error("expected counter to be running")
	}
	if status.Type != "goroutine" {
		t.Errorf("expected type goroutine, got %s", status.Type)
	}

	sup.Stop()

	if c := count.Load(); c < 1 {
		t.Errorf("expected counter >= 1, got %d", c)
	}
}

func TestSupervisor_GoFunc_Restart_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	var runs atomic.Int32
	sup.RegisterFunc(GoSpec{
		Name: "crasher",
		Func: func(ctx context.Context) error {
			n := runs.Add(1)
			if n <= 3 {
				return fmt.Errorf("crash #%d", n)
			}
			// After 3 crashes, stay running
			<-ctx.Done()
			return nil
		},
		Restart: RestartPolicy{Delay: 5 * time.Millisecond, MaxRestarts: -1},
	})

	sup.Start()
	// Wait for restarts
	time.Sleep(200 * time.Millisecond)

	status, _ := sup.Status("crasher")
	if status.RestartCount < 3 {
		t.Errorf("expected at least 3 restarts, got %d", status.RestartCount)
	}
	if !status.Running {
		t.Error("expected crasher to be running after recovering")
	}

	sup.Stop()
}

func TestSupervisor_GoFunc_MaxRestarts_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	sup.RegisterFunc(GoSpec{
		Name: "limited",
		Func: func(ctx context.Context) error {
			return fmt.Errorf("always fail")
		},
		Restart: RestartPolicy{Delay: 5 * time.Millisecond, MaxRestarts: 2},
	})

	sup.Start()
	time.Sleep(200 * time.Millisecond)

	status, _ := sup.Status("limited")
	if status.Running {
		t.Error("expected limited to have stopped after max restarts")
	}
	// The function runs once (initial) + 2 restarts = restartCount should be 3
	// (restartCount increments each time the function exits)
	if status.RestartCount > 3 {
		t.Errorf("expected restartCount <= 3, got %d", status.RestartCount)
	}

	sup.Stop()
}

func TestSupervisor_GoFunc_Panic_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	var runs atomic.Int32
	sup.RegisterFunc(GoSpec{
		Name: "panicker",
		Func: func(ctx context.Context) error {
			n := runs.Add(1)
			if n == 1 {
				panic("boom")
			}
			<-ctx.Done()
			return nil
		},
		Restart: RestartPolicy{Delay: 5 * time.Millisecond, MaxRestarts: 3},
	})

	sup.Start()
	time.Sleep(100 * time.Millisecond)

	status, _ := sup.Status("panicker")
	if !status.Running {
		t.Error("expected panicker to recover and be running")
	}
	if runs.Load() < 2 {
		t.Error("expected at least 2 runs (1 panic + 1 recovery)")
	}

	sup.Stop()
}

func TestSupervisor_Statuses_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	sup.RegisterFunc(GoSpec{
		Name: "a",
		Func: func(ctx context.Context) error { <-ctx.Done(); return nil },
		Restart: RestartPolicy{MaxRestarts: -1},
	})
	sup.RegisterFunc(GoSpec{
		Name: "b",
		Func: func(ctx context.Context) error { <-ctx.Done(); return nil },
		Restart: RestartPolicy{MaxRestarts: -1},
	})

	sup.Start()
	time.Sleep(50 * time.Millisecond)

	statuses := sup.Statuses()
	if len(statuses) != 2 {
		t.Errorf("expected 2 statuses, got %d", len(statuses))
	}
	if !statuses["a"].Running || !statuses["b"].Running {
		t.Error("expected both units running")
	}

	sup.Stop()
}

func TestSupervisor_UnitNames_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	sup.RegisterFunc(GoSpec{
		Name: "alpha",
		Func: func(ctx context.Context) error { <-ctx.Done(); return nil },
	})
	sup.RegisterFunc(GoSpec{
		Name: "beta",
		Func: func(ctx context.Context) error { <-ctx.Done(); return nil },
	})

	names := sup.UnitNames()
	if len(names) != 2 {
		t.Errorf("expected 2 names, got %d", len(names))
	}
}

func TestSupervisor_Status_Bad(t *testing.T) {
	sup := NewSupervisor(nil)

	_, err := sup.Status("nonexistent")
	if err == nil {
		t.Error("expected error for nonexistent unit")
	}
}

func TestSupervisor_Restart_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	var runs atomic.Int32
	sup.RegisterFunc(GoSpec{
		Name: "restartable",
		Func: func(ctx context.Context) error {
			runs.Add(1)
			<-ctx.Done()
			return nil
		},
		Restart: RestartPolicy{Delay: 5 * time.Millisecond, MaxRestarts: -1},
	})

	sup.Start()
	time.Sleep(50 * time.Millisecond)

	if err := sup.Restart("restartable"); err != nil {
		t.Fatal(err)
	}
	time.Sleep(100 * time.Millisecond)

	if runs.Load() < 2 {
		t.Errorf("expected at least 2 runs after restart, got %d", runs.Load())
	}

	sup.Stop()
}

func TestSupervisor_Restart_Bad(t *testing.T) {
	sup := NewSupervisor(nil)

	err := sup.Restart("nonexistent")
	if err == nil {
		t.Error("expected error for nonexistent unit")
	}
}

func TestSupervisor_StopUnit_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	sup.RegisterFunc(GoSpec{
		Name: "stoppable",
		Func: func(ctx context.Context) error {
			<-ctx.Done()
			return nil
		},
		Restart: RestartPolicy{Delay: 5 * time.Millisecond, MaxRestarts: -1},
	})

	sup.Start()
	time.Sleep(50 * time.Millisecond)

	if err := sup.StopUnit("stoppable"); err != nil {
		t.Fatal(err)
	}
	time.Sleep(100 * time.Millisecond)

	status, _ := sup.Status("stoppable")
	if status.Running {
		t.Error("expected unit to be stopped")
	}

	sup.Stop()
}

func TestSupervisor_StopUnit_Bad(t *testing.T) {
	sup := NewSupervisor(nil)

	err := sup.StopUnit("nonexistent")
	if err == nil {
		t.Error("expected error for nonexistent unit")
	}
}

func TestSupervisor_StartIdempotent_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	var count atomic.Int32
	sup.RegisterFunc(GoSpec{
		Name: "once",
		Func: func(ctx context.Context) error {
			count.Add(1)
			<-ctx.Done()
			return nil
		},
	})

	sup.Start()
	sup.Start() // Should be no-op
	sup.Start() // Should be no-op

	time.Sleep(50 * time.Millisecond)

	if count.Load() != 1 {
		t.Errorf("expected exactly 1 run, got %d", count.Load())
	}

	sup.Stop()
}

func TestSupervisor_NoRestart_Good(t *testing.T) {
	sup := NewSupervisor(nil)

	var runs atomic.Int32
	sup.RegisterFunc(GoSpec{
		Name: "oneshot",
		Func: func(ctx context.Context) error {
			runs.Add(1)
			return nil // Exit immediately
		},
		Restart: RestartPolicy{Delay: 5 * time.Millisecond, MaxRestarts: 0},
	})

	sup.Start()
	time.Sleep(100 * time.Millisecond)

	status, _ := sup.Status("oneshot")
	if status.Running {
		t.Error("expected oneshot to not be running")
	}
	// Should run once (initial) then stop. restartCount will be 1
	// (incremented after the initial run exits).
	if runs.Load() != 1 {
		t.Errorf("expected exactly 1 run, got %d", runs.Load())
	}

	sup.Stop()
}

func TestSupervisor_Register_Ugly(t *testing.T) {
	sup := NewSupervisor(nil)

	defer func() {
		if r := recover(); r == nil {
			t.Error("expected panic when registering process daemon without service")
		}
	}()

	sup.Register(DaemonSpec{
		Name:       "will-panic",
		RunOptions: RunOptions{Command: "echo"},
	})
}