From d9f3b726f2da238c5d53ff36d45986241ecb1ed7 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 9 Feb 2026 10:36:23 +0000 Subject: [PATCH] feat(agentci): package dispatch system for multi-agent deployment Config-driven agent targets replace hardcoded map so new agents can be added via CLI instead of recompiling. Includes setup script for bootstrapping agent machines and CLI commands for management. - Add pkg/agentci with config types and CRUD (LoadAgents, SaveAgent, etc.) - Add CLI: core ai agent {add,list,status,logs,setup,remove} - Add scripts/agent-setup.sh (SSH bootstrap: dirs, cron, prereq check) - Headless loads agents from ~/.core/config.yaml - Dispatch ticket includes forgejo_user for dynamic clone URLs - agent-runner.sh reads username from ticket JSON, not hardcoded Co-Authored-By: Claude Opus 4.6 --- internal/cmd/ai/cmd_agent.go | 332 +++++++++++++++++++++++++++++ internal/cmd/ai/cmd_commands.go | 3 + internal/core-ide/headless.go | 14 +- pkg/agentci/config.go | 80 +++++++ pkg/jobrunner/handlers/dispatch.go | 2 + scripts/agent-runner.sh | 6 +- scripts/agent-setup.sh | 86 ++++++++ 7 files changed, 519 insertions(+), 4 deletions(-) create mode 100644 internal/cmd/ai/cmd_agent.go create mode 100644 pkg/agentci/config.go create mode 100755 scripts/agent-setup.sh diff --git a/internal/cmd/ai/cmd_agent.go b/internal/cmd/ai/cmd_agent.go new file mode 100644 index 00000000..8d325ae7 --- /dev/null +++ b/internal/cmd/ai/cmd_agent.go @@ -0,0 +1,332 @@ +package ai + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/host-uk/core/pkg/agentci" + "github.com/host-uk/core/pkg/cli" + "github.com/host-uk/core/pkg/config" +) + +// AddAgentCommands registers the 'agent' subcommand group under 'ai'. +func AddAgentCommands(parent *cli.Command) { + agentCmd := &cli.Command{ + Use: "agent", + Short: "Manage AgentCI dispatch targets", + } + + agentCmd.AddCommand(agentAddCmd()) + agentCmd.AddCommand(agentListCmd()) + agentCmd.AddCommand(agentStatusCmd()) + agentCmd.AddCommand(agentLogsCmd()) + agentCmd.AddCommand(agentSetupCmd()) + agentCmd.AddCommand(agentRemoveCmd()) + + parent.AddCommand(agentCmd) +} + +func loadConfig() (*config.Config, error) { + return config.New() +} + +func agentAddCmd() *cli.Command { + cmd := &cli.Command{ + Use: "add ", + Short: "Add an agent to the config", + Args: cli.ExactArgs(2), + RunE: func(cmd *cli.Command, args []string) error { + name := args[0] + host := args[1] + + forgejoUser, _ := cmd.Flags().GetString("forgejo-user") + if forgejoUser == "" { + forgejoUser = name + } + queueDir, _ := cmd.Flags().GetString("queue-dir") + if queueDir == "" { + queueDir = "/home/claude/ai-work/queue" + } + + // Test SSH connectivity. + fmt.Printf("Testing SSH to %s... ", host) + out, err := exec.Command("ssh", + "-o", "StrictHostKeyChecking=accept-new", + "-o", "ConnectTimeout=10", + host, "echo ok").CombinedOutput() + if err != nil { + fmt.Println(errorStyle.Render("FAILED")) + return fmt.Errorf("SSH failed: %s", strings.TrimSpace(string(out))) + } + fmt.Println(successStyle.Render("OK")) + + cfg, err := loadConfig() + if err != nil { + return err + } + + ac := agentci.AgentConfig{ + Host: host, + QueueDir: queueDir, + ForgejoUser: forgejoUser, + Active: true, + } + if err := agentci.SaveAgent(cfg, name, ac); err != nil { + return err + } + + fmt.Printf("Agent %s added (%s)\n", successStyle.Render(name), host) + return nil + }, + } + cmd.Flags().String("forgejo-user", "", "Forgejo username (defaults to agent name)") + cmd.Flags().String("queue-dir", "", "Remote queue directory (default: /home/claude/ai-work/queue)") + return cmd +} + +func agentListCmd() *cli.Command { + return &cli.Command{ + Use: "list", + Short: "List configured agents", + RunE: func(cmd *cli.Command, args []string) error { + cfg, err := loadConfig() + if err != nil { + return err + } + + agents, err := agentci.ListAgents(cfg) + if err != nil { + return err + } + + if len(agents) == 0 { + fmt.Println(dimStyle.Render("No agents configured. Use 'core ai agent add' to add one.")) + return nil + } + + table := cli.NewTable("NAME", "HOST", "FORGEJO USER", "ACTIVE", "QUEUE") + for name, ac := range agents { + active := dimStyle.Render("no") + if ac.Active { + active = successStyle.Render("yes") + } + + // Quick SSH check for queue depth. + queue := dimStyle.Render("-") + out, err := exec.Command("ssh", + "-o", "StrictHostKeyChecking=accept-new", + "-o", "ConnectTimeout=5", + ac.Host, + fmt.Sprintf("ls %s/ticket-*.json 2>/dev/null | wc -l", ac.QueueDir), + ).Output() + if err == nil { + n := strings.TrimSpace(string(out)) + if n != "0" { + queue = n + } else { + queue = "0" + } + } + + table.AddRow(name, ac.Host, ac.ForgejoUser, active, queue) + } + table.Render() + return nil + }, + } +} + +func agentStatusCmd() *cli.Command { + return &cli.Command{ + Use: "status ", + Short: "Check agent status via SSH", + Args: cli.ExactArgs(1), + RunE: func(cmd *cli.Command, args []string) error { + name := args[0] + cfg, err := loadConfig() + if err != nil { + return err + } + + agents, err := agentci.ListAgents(cfg) + if err != nil { + return err + } + ac, ok := agents[name] + if !ok { + return fmt.Errorf("agent %q not found", name) + } + + script := ` + echo "=== Queue ===" + ls ~/ai-work/queue/ticket-*.json 2>/dev/null | wc -l + echo "=== Active ===" + ls ~/ai-work/active/ticket-*.json 2>/dev/null || echo "none" + echo "=== Done ===" + ls ~/ai-work/done/ticket-*.json 2>/dev/null | wc -l + echo "=== Lock ===" + if [ -f ~/ai-work/.runner.lock ]; then + PID=$(cat ~/ai-work/.runner.lock) + if kill -0 "$PID" 2>/dev/null; then + echo "RUNNING (PID $PID)" + else + echo "STALE (PID $PID)" + fi + else + echo "IDLE" + fi + ` + + sshCmd := exec.Command("ssh", + "-o", "StrictHostKeyChecking=accept-new", + "-o", "ConnectTimeout=10", + ac.Host, script) + sshCmd.Stdout = os.Stdout + sshCmd.Stderr = os.Stderr + return sshCmd.Run() + }, + } +} + +func agentLogsCmd() *cli.Command { + cmd := &cli.Command{ + Use: "logs ", + Short: "Stream agent runner logs", + Args: cli.ExactArgs(1), + RunE: func(cmd *cli.Command, args []string) error { + name := args[0] + follow, _ := cmd.Flags().GetBool("follow") + lines, _ := cmd.Flags().GetInt("lines") + + cfg, err := loadConfig() + if err != nil { + return err + } + + agents, err := agentci.ListAgents(cfg) + if err != nil { + return err + } + ac, ok := agents[name] + if !ok { + return fmt.Errorf("agent %q not found", name) + } + + tailArgs := []string{ + "-o", "StrictHostKeyChecking=accept-new", + "-o", "ConnectTimeout=10", + ac.Host, + } + if follow { + tailArgs = append(tailArgs, fmt.Sprintf("tail -f -n %d ~/ai-work/logs/runner.log", lines)) + } else { + tailArgs = append(tailArgs, fmt.Sprintf("tail -n %d ~/ai-work/logs/runner.log", lines)) + } + + sshCmd := exec.Command("ssh", tailArgs...) + sshCmd.Stdout = os.Stdout + sshCmd.Stderr = os.Stderr + sshCmd.Stdin = os.Stdin + return sshCmd.Run() + }, + } + cmd.Flags().BoolP("follow", "f", false, "Follow log output") + cmd.Flags().IntP("lines", "n", 50, "Number of lines to show") + return cmd +} + +func agentSetupCmd() *cli.Command { + return &cli.Command{ + Use: "setup ", + Short: "Bootstrap agent machine (create dirs, copy runner, install cron)", + Args: cli.ExactArgs(1), + RunE: func(cmd *cli.Command, args []string) error { + name := args[0] + cfg, err := loadConfig() + if err != nil { + return err + } + + agents, err := agentci.ListAgents(cfg) + if err != nil { + return err + } + ac, ok := agents[name] + if !ok { + return fmt.Errorf("agent %q not found — use 'core ai agent add' first", name) + } + + // Find the setup script relative to the binary or in known locations. + scriptPath := findSetupScript() + if scriptPath == "" { + return fmt.Errorf("agent-setup.sh not found — expected in scripts/ directory") + } + + fmt.Printf("Setting up %s on %s...\n", name, ac.Host) + setupCmd := exec.Command("bash", scriptPath, ac.Host) + setupCmd.Stdout = os.Stdout + setupCmd.Stderr = os.Stderr + if err := setupCmd.Run(); err != nil { + return fmt.Errorf("setup failed: %w", err) + } + + fmt.Println(successStyle.Render("Setup complete!")) + return nil + }, + } +} + +func agentRemoveCmd() *cli.Command { + return &cli.Command{ + Use: "remove ", + Short: "Remove an agent from config", + Args: cli.ExactArgs(1), + RunE: func(cmd *cli.Command, args []string) error { + name := args[0] + cfg, err := loadConfig() + if err != nil { + return err + } + + if err := agentci.RemoveAgent(cfg, name); err != nil { + return err + } + + fmt.Printf("Agent %s removed.\n", name) + return nil + }, + } +} + +// findSetupScript looks for agent-setup.sh in common locations. +func findSetupScript() string { + // Relative to executable. + exe, _ := os.Executable() + if exe != "" { + dir := filepath.Dir(exe) + candidates := []string{ + filepath.Join(dir, "scripts", "agent-setup.sh"), + filepath.Join(dir, "..", "scripts", "agent-setup.sh"), + } + for _, c := range candidates { + if _, err := os.Stat(c); err == nil { + return c + } + } + } + + // Working directory. + cwd, _ := os.Getwd() + if cwd != "" { + p := filepath.Join(cwd, "scripts", "agent-setup.sh") + if _, err := os.Stat(p); err == nil { + return p + } + } + + return "" +} + diff --git a/internal/cmd/ai/cmd_commands.go b/internal/cmd/ai/cmd_commands.go index 1cf7dade..68c31624 100644 --- a/internal/cmd/ai/cmd_commands.go +++ b/internal/cmd/ai/cmd_commands.go @@ -66,6 +66,9 @@ func initCommands() { // Add metrics subcommand (core ai metrics) addMetricsCommand(aiCmd) + + // Add agent management commands (core ai agent ...) + AddAgentCommands(aiCmd) } // AddAICommands registers the 'ai' command and all subcommands. diff --git a/internal/core-ide/headless.go b/internal/core-ide/headless.go index 929b6b61..1ac22efa 100644 --- a/internal/core-ide/headless.go +++ b/internal/core-ide/headless.go @@ -11,7 +11,9 @@ import ( "syscall" "time" + "github.com/host-uk/core/pkg/agentci" "github.com/host-uk/core/pkg/cli" + "github.com/host-uk/core/pkg/config" "github.com/host-uk/core/pkg/forge" "github.com/host-uk/core/pkg/jobrunner" forgejosource "github.com/host-uk/core/pkg/jobrunner/forgejo" @@ -65,10 +67,16 @@ func startHeadless() { enableAutoMerge := handlers.NewEnableAutoMergeHandler(forgeClient) tickParent := handlers.NewTickParentHandler(forgeClient) - // Agent dispatch — maps Forgejo usernames to SSH targets. - agentTargets := map[string]handlers.AgentTarget{ - "darbs-claude": {Host: "claude@192.168.0.201", QueueDir: "/home/claude/ai-work/queue"}, + // Agent dispatch — load targets from ~/.core/config.yaml + cfg, cfgErr := config.New() + var agentTargets map[string]handlers.AgentTarget + if cfgErr == nil { + agentTargets, _ = agentci.LoadAgents(cfg) } + if agentTargets == nil { + agentTargets = map[string]handlers.AgentTarget{} + } + log.Printf("Loaded %d agent targets", len(agentTargets)) dispatch := handlers.NewDispatchHandler(forgeClient, forgeURL, forgeToken, agentTargets) // Build poller diff --git a/pkg/agentci/config.go b/pkg/agentci/config.go new file mode 100644 index 00000000..bf3d13b3 --- /dev/null +++ b/pkg/agentci/config.go @@ -0,0 +1,80 @@ +// Package agentci provides configuration and management for AgentCI dispatch targets. +package agentci + +import ( + "fmt" + + "github.com/host-uk/core/pkg/config" + "github.com/host-uk/core/pkg/jobrunner/handlers" +) + +// AgentConfig represents a single agent machine in the config file. +type AgentConfig struct { + Host string `yaml:"host" mapstructure:"host"` + QueueDir string `yaml:"queue_dir" mapstructure:"queue_dir"` + ForgejoUser string `yaml:"forgejo_user" mapstructure:"forgejo_user"` + Active bool `yaml:"active" mapstructure:"active"` +} + +// LoadAgents reads agent targets from config and returns a map suitable for the dispatch handler. +// Returns an empty map (not an error) if no agents are configured. +func LoadAgents(cfg *config.Config) (map[string]handlers.AgentTarget, error) { + var agents map[string]AgentConfig + if err := cfg.Get("agentci.agents", &agents); err != nil { + // No config is fine — just no agents. + return map[string]handlers.AgentTarget{}, nil + } + + targets := make(map[string]handlers.AgentTarget) + for name, ac := range agents { + if !ac.Active { + continue + } + if ac.Host == "" { + return nil, fmt.Errorf("agent %q: host is required", name) + } + queueDir := ac.QueueDir + if queueDir == "" { + queueDir = "/home/claude/ai-work/queue" + } + targets[name] = handlers.AgentTarget{ + Host: ac.Host, + QueueDir: queueDir, + } + } + + return targets, nil +} + +// SaveAgent writes an agent config entry to the config file. +func SaveAgent(cfg *config.Config, name string, ac AgentConfig) error { + key := fmt.Sprintf("agentci.agents.%s", name) + return cfg.Set(key, map[string]any{ + "host": ac.Host, + "queue_dir": ac.QueueDir, + "forgejo_user": ac.ForgejoUser, + "active": ac.Active, + }) +} + +// RemoveAgent removes an agent from the config file. +func RemoveAgent(cfg *config.Config, name string) error { + var agents map[string]AgentConfig + if err := cfg.Get("agentci.agents", &agents); err != nil { + return fmt.Errorf("no agents configured") + } + if _, ok := agents[name]; !ok { + return fmt.Errorf("agent %q not found", name) + } + delete(agents, name) + return cfg.Set("agentci.agents", agents) +} + +// ListAgents returns all configured agents (active and inactive). +func ListAgents(cfg *config.Config) (map[string]AgentConfig, error) { + var agents map[string]AgentConfig + if err := cfg.Get("agentci.agents", &agents); err != nil { + return map[string]AgentConfig{}, nil + } + return agents, nil +} diff --git a/pkg/jobrunner/handlers/dispatch.go b/pkg/jobrunner/handlers/dispatch.go index c32340fa..1fb99d8f 100644 --- a/pkg/jobrunner/handlers/dispatch.go +++ b/pkg/jobrunner/handlers/dispatch.go @@ -32,6 +32,7 @@ type DispatchTicket struct { EpicNumber int `json:"epic_number"` ForgeURL string `json:"forge_url"` ForgeToken string `json:"forge_token"` + ForgeUser string `json:"forgejo_user"` CreatedAt string `json:"created_at"` } @@ -91,6 +92,7 @@ func (h *DispatchHandler) Execute(ctx context.Context, signal *jobrunner.Pipelin EpicNumber: signal.EpicNumber, ForgeURL: h.forgeURL, ForgeToken: h.token, + ForgeUser: signal.Assignee, CreatedAt: time.Now().UTC().Format(time.RFC3339), } diff --git a/scripts/agent-runner.sh b/scripts/agent-runner.sh index f99009ae..46a6ca4d 100755 --- a/scripts/agent-runner.sh +++ b/scripts/agent-runner.sh @@ -71,7 +71,11 @@ JOB_DIR="$WORK_DIR/jobs/${REPO_OWNER}-${REPO_NAME}-${ISSUE_NUM}" REPO_DIR="$JOB_DIR/$REPO_NAME" mkdir -p "$JOB_DIR" -CLONE_URL="https://darbs-claude:${FORGE_TOKEN}@${FORGE_URL#https://}/${REPO_OWNER}/${REPO_NAME}.git" +FORGEJO_USER=$(jq -r '.forgejo_user // empty' "$TICKET_FILE") +if [ -z "$FORGEJO_USER" ]; then + FORGEJO_USER="$(hostname -s)-$(whoami)" +fi +CLONE_URL="https://${FORGEJO_USER}:${FORGE_TOKEN}@${FORGE_URL#https://}/${REPO_OWNER}/${REPO_NAME}.git" if [ -d "$REPO_DIR/.git" ]; then echo "$(date -Iseconds) Updating existing clone..." diff --git a/scripts/agent-setup.sh b/scripts/agent-setup.sh new file mode 100755 index 00000000..70ecacc7 --- /dev/null +++ b/scripts/agent-setup.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# agent-setup.sh — Bootstrap an AgentCI agent machine via SSH. +# +# Usage: agent-setup.sh +# +# Creates work directories, copies agent-runner.sh, installs cron, +# and verifies prerequisites. +set -euo pipefail + +HOST="${1:?Usage: agent-setup.sh }" +SSH_OPTS="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +RUNNER_SCRIPT="${SCRIPT_DIR}/agent-runner.sh" + +if [ ! -f "$RUNNER_SCRIPT" ]; then + echo "ERROR: agent-runner.sh not found at $RUNNER_SCRIPT" + exit 1 +fi + +echo "=== AgentCI Setup: $HOST ===" + +# --- 1. Test SSH --- +echo -n "Testing SSH... " +if ! ssh $SSH_OPTS "$HOST" "echo ok" >/dev/null 2>&1; then + echo "FAILED — cannot reach $HOST" + exit 1 +fi +echo "OK" + +# --- 2. Create directories --- +echo -n "Creating directories... " +ssh $SSH_OPTS "$HOST" "mkdir -p ~/ai-work/{queue,active,done,logs,jobs}" +echo "OK" + +# --- 3. Copy runner script --- +echo -n "Copying agent-runner.sh... " +scp $SSH_OPTS "$RUNNER_SCRIPT" "${HOST}:~/ai-work/agent-runner.sh" +ssh $SSH_OPTS "$HOST" "chmod +x ~/ai-work/agent-runner.sh" +echo "OK" + +# --- 4. Install cron (idempotent) --- +echo -n "Installing cron... " +CRON_LINE="*/5 * * * * ~/ai-work/agent-runner.sh >> ~/ai-work/logs/runner.log 2>&1" +ssh $SSH_OPTS "$HOST" " + if crontab -l 2>/dev/null | grep -qF 'agent-runner.sh'; then + echo 'already installed' + else + (crontab -l 2>/dev/null; echo '$CRON_LINE') | crontab - + echo 'installed' + fi +" + +# --- 5. Verify prerequisites --- +echo "Checking prerequisites..." +MISSING="" +for tool in jq git claude; do + if ssh $SSH_OPTS "$HOST" "command -v $tool" >/dev/null 2>&1; then + echo " $tool: OK" + else + echo " $tool: MISSING" + MISSING="$MISSING $tool" + fi +done + +if [ -n "$MISSING" ]; then + echo "" + echo "WARNING: Missing tools:$MISSING" + echo "Install them before the agent can process tickets." +fi + +# --- 6. Round-trip test --- +echo -n "Round-trip test... " +TEST_FILE="queue/test-setup-$(date +%s).json" +ssh $SSH_OPTS "$HOST" "echo '{\"test\":true}' > ~/ai-work/$TEST_FILE" +RESULT=$(ssh $SSH_OPTS "$HOST" "cat ~/ai-work/$TEST_FILE && rm ~/ai-work/$TEST_FILE") +if [ "$RESULT" = '{"test":true}' ]; then + echo "OK" +else + echo "FAILED" + exit 1 +fi + +echo "" +echo "=== Setup complete ===" +echo "Agent queue: $HOST:~/ai-work/queue/" +echo "Runner log: $HOST:~/ai-work/logs/runner.log"