fix(agentci): restore security helpers and map attack vectors

Co-Authored-By: Virgil <virgil@lethean.io>
2026-03-24 13:15:55 +00:00 · 2026-03-24 13:15:55 +00:00 · a54bd834ff
commit a54bd834ff
parent e93f5673ef
4 changed files with 589 additions and 5 deletions
--- a/agentci/security.go
+++ b/agentci/security.go
@ -2,6 +2,7 @@ package agentci

 import (
 	"os/exec"
+	"path"
 	"path/filepath"
 	"regexp"
 	"strings"
@ -15,13 +16,134 @@ var safeNameRegex = regexp.MustCompile(`^[a-zA-Z0-9\-\_\.]+$`)
 // Returns filepath.Base of the input after validation.
 func SanitizePath(input string) (string, error) {
 	base := filepath.Base(input)
-	if !safeNameRegex.MatchString(base) {
-		return "", coreerr.E("agentci.SanitizePath", "invalid characters in path element: "+input, nil)
+	safeBase, err := ValidatePathElement(base)
+	if err != nil {
+		return "", coreerr.E("agentci.SanitizePath", "invalid path element", err)
 	}
-	if base == "." || base == ".." || base == "/" {
-		return "", coreerr.E("agentci.SanitizePath", "invalid path element: "+base, nil)
+	return safeBase, nil
+}
+
+// ValidatePathElement validates a single filesystem or URL path element.
+func ValidatePathElement(input string) (string, error) {
+	if input == "" {
+		return "", coreerr.E("agentci.ValidatePathElement", "path element is empty", nil)
 	}
-	return base, nil
+	if strings.TrimSpace(input) != input {
+		return "", coreerr.E("agentci.ValidatePathElement", "path element has leading or trailing whitespace", nil)
+	}
+	if input == "." || input == ".." {
+		return "", coreerr.E("agentci.ValidatePathElement", "invalid path element: "+input, nil)
+	}
+	if strings.ContainsAny(input, `/\`) || filepath.IsAbs(input) {
+		return "", coreerr.E("agentci.ValidatePathElement", "path element must not contain path separators", nil)
+	}
+	if !safeNameRegex.MatchString(input) {
+		return "", coreerr.E("agentci.ValidatePathElement", "invalid characters in path element: "+input, nil)
+	}
+
+	return input, nil
+}
+
+// ResolvePathWithinRoot returns a validated name and an absolute path constrained to the given root.
+func ResolvePathWithinRoot(root, name string) (string, string, error) {
+	if root == "" {
+		return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "root is empty", nil)
+	}
+
+	safeName, err := ValidatePathElement(name)
+	if err != nil {
+		return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "invalid path element", err)
+	}
+
+	rootAbs, err := filepath.Abs(root)
+	if err != nil {
+		return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "resolve root path", err)
+	}
+	rootAbs = filepath.Clean(rootAbs)
+
+	resolved := filepath.Clean(filepath.Join(rootAbs, safeName))
+	rel, err := filepath.Rel(rootAbs, resolved)
+	if err != nil {
+		return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "compute relative path", err)
+	}
+	if rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
+		return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "resolved path escapes root", nil)
+	}
+
+	return safeName, resolved, nil
+}
+
+// ValidateRemoteDir validates a remote POSIX directory path for SSH command usage.
+func ValidateRemoteDir(input string) (string, error) {
+	if input == "" {
+		return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir is empty", nil)
+	}
+	if strings.TrimSpace(input) != input {
+		return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir has leading or trailing whitespace", nil)
+	}
+	if strings.ContainsAny(input, "\x00\r\n") {
+		return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir contains control characters", nil)
+	}
+	if input == "/" || input == "~" {
+		return input, nil
+	}
+
+	var (
+		parts []string
+		rest  string
+	)
+	switch {
+	case strings.HasPrefix(input, "/"):
+		rest = strings.TrimPrefix(input, "/")
+	case strings.HasPrefix(input, "~/"):
+		rest = strings.TrimPrefix(input, "~/")
+	default:
+		return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir must be absolute or home-relative", nil)
+	}
+
+	for _, part := range strings.Split(rest, "/") {
+		if part == "" {
+			continue
+		}
+		safePart, err := ValidatePathElement(part)
+		if err != nil {
+			return "", coreerr.E("agentci.ValidateRemoteDir", "invalid remote dir segment", err)
+		}
+		parts = append(parts, safePart)
+	}
+
+	switch {
+	case strings.HasPrefix(input, "/"):
+		if len(parts) == 0 {
+			return "/", nil
+		}
+		return "/" + strings.Join(parts, "/"), nil
+	default:
+		if len(parts) == 0 {
+			return "~", nil
+		}
+		return "~/" + strings.Join(parts, "/"), nil
+	}
+}
+
+// JoinRemotePath joins a validated remote directory with validated path elements.
+func JoinRemotePath(base string, elems ...string) (string, error) {
+	safeBase, err := ValidateRemoteDir(base)
+	if err != nil {
+		return "", coreerr.E("agentci.JoinRemotePath", "invalid base path", err)
+	}
+
+	segments := make([]string, 0, len(elems)+1)
+	segments = append(segments, safeBase)
+	for _, elem := range elems {
+		safeElem, err := ValidatePathElement(elem)
+		if err != nil {
+			return "", coreerr.E("agentci.JoinRemotePath", "invalid path element", err)
+		}
+		segments = append(segments, safeElem)
+	}
+
+	return path.Join(segments...), nil
 }

 // EscapeShellArg wraps a string in single quotes for safe remote shell insertion.
--- a/agentci/security_test.go
+++ b/agentci/security_test.go
@ -3,6 +3,7 @@
 package agentci

 import (
+	"path/filepath"
 	"testing"

 	"github.com/stretchr/testify/assert"
@ -56,6 +57,134 @@ func TestSanitizePath_Bad(t *testing.T) {
 	}
 }

+func TestValidatePathElement_Good(t *testing.T) {
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{"simple", "simple"},
+		{"with-dash", "with-dash"},
+		{"with_underscore", "with_underscore"},
+		{"with.dot", "with.dot"},
+		{"ticket-host-uk-core-1.json", "ticket-host-uk-core-1.json"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			got, err := ValidatePathElement(tt.input)
+			require.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestValidatePathElement_Bad(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+	}{
+		{"empty", ""},
+		{"space", "has space"},
+		{"leading space", " name"},
+		{"slash", "org/repo"},
+		{"backslash", `org\\repo`},
+		{"traversal", ".."},
+		{"absolute", "/tmp"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			_, err := ValidatePathElement(tt.input)
+			assert.Error(t, err)
+		})
+	}
+}
+
+func TestResolvePathWithinRoot_Good(t *testing.T) {
+	root := t.TempDir()
+
+	gotName, gotPath, err := ResolvePathWithinRoot(root, "plugin-one")
+	require.NoError(t, err)
+	assert.Equal(t, "plugin-one", gotName)
+	assert.Equal(t, filepath.Join(root, "plugin-one"), gotPath)
+}
+
+func TestResolvePathWithinRoot_Bad(t *testing.T) {
+	root := t.TempDir()
+
+	_, _, err := ResolvePathWithinRoot(root, "../escape")
+	require.Error(t, err)
+}
+
+func TestValidateRemoteDir_Good(t *testing.T) {
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{"/tmp/queue", "/tmp/queue"},
+		{"/tmp//queue/", "/tmp/queue"},
+		{"~/ai-work/queue", "~/ai-work/queue"},
+		{"~/ai-work//queue/", "~/ai-work/queue"},
+		{"~", "~"},
+		{"/", "/"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.input, func(t *testing.T) {
+			got, err := ValidateRemoteDir(tt.input)
+			require.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestValidateRemoteDir_Bad(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+	}{
+		{"empty", ""},
+		{"relative", "queue"},
+		{"shell metachar", "/tmp/queue; touch /tmp/pwned"},
+		{"space", "/tmp/queue name"},
+		{"traversal", "~/../queue"},
+		{"newline", "/tmp/queue\nother"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			_, err := ValidateRemoteDir(tt.input)
+			assert.Error(t, err)
+		})
+	}
+}
+
+func TestJoinRemotePath_Good(t *testing.T) {
+	tests := []struct {
+		name string
+		base string
+		elem string
+		want string
+	}{
+		{"absolute", "/tmp/queue", "ticket.json", "/tmp/queue/ticket.json"},
+		{"home relative", "~/ai-work/queue", "ticket.json", "~/ai-work/queue/ticket.json"},
+		{"home root", "~", ".env.ticket", "~/.env.ticket"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := JoinRemotePath(tt.base, tt.elem)
+			require.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestJoinRemotePath_Bad(t *testing.T) {
+	_, err := JoinRemotePath("/tmp/queue", "../ticket.json")
+	assert.Error(t, err)
+}
+
 func TestEscapeShellArg_Good(t *testing.T) {
 	tests := []struct {
 		input string
--- a/docs/plans/2026-03-24-security-attack-vector-mapping.md
+++ b/docs/plans/2026-03-24-security-attack-vector-mapping.md
@ -0,0 +1,199 @@
+# [scan] Security attack vector mapping — Implementation Plan
+
+> For Codex agent: this document is the issue body to execute next.
+
+## Goal
+Map every external input entry point across `core/go-scm` and produce a complete attack-vector matrix with evidence and validation status.
+
+## Scope
+- Repository: `core/go-scm`
+- Language: Go
+- Branch: `agent/create-an-implementation-plan-for-this-s`
+- Target files: all non-test Go source files
+
+## 1) Every file to scan
+
+Use this exact list as the scan set.
+
+- `agentci/clotho.go`
+- `agentci/config.go`
+- `agentci/security.go`
+- `cmd/collect/cmd.go`
+- `cmd/collect/cmd_bitcointalk.go`
+- `cmd/collect/cmd_dispatch.go`
+- `cmd/collect/cmd_excavate.go`
+- `cmd/collect/cmd_github.go`
+- `cmd/collect/cmd_market.go`
+- `cmd/collect/cmd_papers.go`
+- `cmd/collect/cmd_process.go`
+- `cmd/forge/cmd_auth.go`
+- `cmd/forge/cmd_config.go`
+- `cmd/forge/cmd_forge.go`
+- `cmd/forge/cmd_issues.go`
+- `cmd/forge/cmd_labels.go`
+- `cmd/forge/cmd_migrate.go`
+- `cmd/forge/cmd_orgs.go`
+- `cmd/forge/cmd_prs.go`
+- `cmd/forge/cmd_repos.go`
+- `cmd/forge/cmd_status.go`
+- `cmd/forge/cmd_sync.go`
+- `cmd/forge/helpers.go`
+- `cmd/gitea/cmd_config.go`
+- `cmd/gitea/cmd_gitea.go`
+- `cmd/gitea/cmd_issues.go`
+- `cmd/gitea/cmd_mirror.go`
+- `cmd/gitea/cmd_prs.go`
+- `cmd/gitea/cmd_repos.go`
+- `cmd/gitea/cmd_sync.go`
+- `cmd/scm/cmd_compile.go`
+- `cmd/scm/cmd_export.go`
+- `cmd/scm/cmd_index.go`
+- `cmd/scm/cmd_scm.go`
+- `collect/bitcointalk.go`
+- `collect/collect.go`
+- `collect/events.go`
+- `collect/excavate.go`
+- `collect/github.go`
+- `collect/market.go`
+- `collect/papers.go`
+- `collect/process.go`
+- `collect/ratelimit.go`
+- `collect/state.go`
+- `forge/client.go`
+- `forge/config.go`
+- `forge/issues.go`
+- `forge/labels.go`
+- `forge/meta.go`
+- `forge/orgs.go`
+- `forge/prs.go`
+- `forge/repos.go`
+- `forge/webhooks.go`
+- `git/git.go`
+- `git/service.go`
+- `gitea/client.go`
+- `gitea/config.go`
+- `gitea/issues.go`
+- `gitea/meta.go`
+- `gitea/repos.go`
+- `jobrunner/forgejo/signals.go`
+- `jobrunner/forgejo/source.go`
+- `jobrunner/handlers/completion.go`
+- `jobrunner/handlers/dispatch.go`
+- `jobrunner/handlers/enable_auto_merge.go`
+- `jobrunner/handlers/publish_draft.go`
+- `jobrunner/handlers/resolve_threads.go`
+- `jobrunner/handlers/send_fix_command.go`
+- `jobrunner/handlers/tick_parent.go`
+- `jobrunner/journal.go`
+- `jobrunner/poller.go`
+- `jobrunner/types.go`
+- `locales/embed.go`
+- `manifest/compile.go`
+- `manifest/loader.go`
+- `manifest/manifest.go`
+- `manifest/sign.go`
+- `marketplace/builder.go`
+- `marketplace/discovery.go`
+- `marketplace/installer.go`
+- `marketplace/marketplace.go`
+- `pkg/api/embed.go`
+- `pkg/api/provider.go`
+- `plugin/config.go`
+- `plugin/installer.go`
+- `plugin/loader.go`
+- `plugin/manifest.go`
+- `plugin/plugin.go`
+- `plugin/registry.go`
+- `repos/gitstate.go`
+- `repos/kbconfig.go`
+- `repos/registry.go`
+- `repos/workconfig.go`
+
+## 2) What to look for in each file
+
+### A) External input entry points
+- CLI entry arguments and flags
+- CLI env-config precedence (`config`, `FORGE_*`, `GITEA_*`)
+- HTTP request handlers, webhook payloads, request headers, params, and body
+- Filesystem paths and filenames supplied as config/flags/env/user values
+- External URLs, repo names, owner/org names, branch names
+- SSH command inputs and key material
+- Collector source inputs (search terms, query terms, package names, IDs)
+- Job signals/labels/checklists/issue fields/command payloads
+
+### B) Validation gaps to record
+- Missing/weak allowlists for path segments and IDs
+- Missing canonicalisation of URLs/paths/refs before use
+- Incomplete enum/type validation (state, status, action)
+- Unsafe conversions/casts without bounds checks
+- Insufficient escaping for shell, file, JSON, or command composition
+- Trusting external API responses without schema checks
+- Ignoring error paths that should halt processing
+
+### C) Injection vectors
+- OS command construction
+- Path traversal / arbitrary file write/read
+- Log forging / token leak via logging
+- SSRF via configurable URLs and webhooks
+- Template/HTML injection through rendered output paths
+- SQL-like or LDAP-like interpolation (where backend uses query strings)
+- Git command argument injection and branch/ref injection
+- Header injection in HTTP clients/servers
+
+### D) Race condition risks
+- Shared mutable state in handlers and services
+- Journal writes/read/write-back without coordination
+- Async polling loops sharing token/counter/state
+- Map/slice writes from worker goroutines
+- TOCTOU around file presence/load/read-modify-write sequences
+- Cache + state refresh races under concurrent polling
+
+## 3) Required output format for findings
+
+Each finding row must contain all columns in this exact order:
+
+`file:line | input source | flows into | validation | attack vector`
+
+Example format:
+
+`collect/github.go:142 | cmd flag `--org` (string) | buildGitHubCollector(config) -> net/http request URL | domain allowlist absent | SSRF + data exfil by domain override`
+
+### Minimal per-row capture fields
+- `file:line` (primary function where input first enters)
+- `input source` (flag/env/body/path/header/id)
+- `flows into` (target function/call chain)
+- `validation` (what checks currently exist, if any)
+- `attack vector` (confidentiality/integrity/availability risk)
+
+### Evidence fields (optional but preferred)
+- CWE id
+- Repro path
+- severity (`low`/`medium`/`high`)
+- confidence (`high`/`med`/`low`)
+- mitigation candidate
+
+## 4) Where to write the report
+
+Primary report file to produce:
+
+- `docs/security/scan-attack-vector-mapping-report.md`
+
+This file must contain:
+1. The completed matrix in the required format above
+2. Deduplicated list of attack vectors with severity
+3. Verification status (`mapped`, `validated`, `open`
+)
+4. A final summary by subsystem and risk priority
+
+This issue body should be replaced with a pointer plus a short runbook:
+- "Execution plan in `docs/plans/2026-03-24-security-attack-vector-mapping.md`, results in `docs/security/scan-attack-vector-mapping-report.md`."
+
+## Execution order
+
+- [x] 1. Create report file and add header + schema columns
+- [x] 2. Scan files in package order listed above
+- [x] 3. For each file, capture every external input entry point and map to sinks
+- [x] 4. Populate one row per mapped flow in the required `file:line | ...` format
+- [x] 5. Cross-check for duplicates and deduplicate by identical sink and attack vector
+- [x] 6. Add severity and validation gap notes per row
+- [x] 7. Finalise summary + high-priority follow-up list
--- a/docs/security/scan-attack-vector-mapping-report.md
+++ b/docs/security/scan-attack-vector-mapping-report.md
@ -0,0 +1,134 @@
+# Security Attack Vector Mapping Report
+
+Execution plan in `docs/plans/2026-03-24-security-attack-vector-mapping.md`, results in `docs/security/scan-attack-vector-mapping-report.md`.
+
+- Issue: `[scan] Security attack vector mapping`
+- Repo: `core/go-scm`
+- Branch: `agent/create-an-implementation-plan-for-this-s`
+- Date: `2026-03-24`
+- Agent: `Codex`
+
+## Findings Matrix
+
+Status meanings:
+- `open`: validation gap or exploitable flow remains.
+- `validated`: explicit hardening exists and was verified in code.
+- `mapped`: flow is real, but the current risk is limited or mostly delegated to downstream tools.
+
+```text
+file:line | input source | flows into | validation | attack vector | severity | confidence | verification
+agentci/config.go:34 | config file `agentci.agents.*` (`host`, `queue_dir`, `forgejo_user`, models, runner) | `LoadAgents()` -> `Spinner.FindByForgejoUser()` -> `jobrunner/handlers.DispatchHandler` SSH target selection | only presence/default checks; queue dir is validated later, host is never allowlisted | SSH target manipulation / unauthorised agent routing if config is attacker-controlled | medium | med | mapped
+cmd/collect/cmd.go:56 | CLI flag `--output` | `newConfig()` -> `collect.NewConfigWithMedium()` -> `cfg.OutputDir` and `.collect-state.json` | no canonicalisation or root policy | arbitrary local file write/read root for all collectors and processors | low | high | open
+collect/bitcointalk.go:64 | positional `<topic-id>` via `cmd/collect/cmd_bitcointalk.go` | `BitcoinTalkCollector.Collect()` -> `filepath.Join(cfg.OutputDir, "bitcointalk", topicID, "posts")` -> `EnsureDir`/`Write` | only non-empty topic ID required | path traversal / arbitrary file write | high | high | open
+collect/bitcointalk.go:89 | positional `<topic-id>` via `cmd/collect/cmd_bitcointalk.go` | `fmt.Sprintf("https://bitcointalk.org/index.php?topic=%s.%d", topicID, offset)` -> `http.NewRequestWithContext` | no query escaping or numeric check | malformed request / same-host request steering | medium | med | open
+collect/github.go:123 | CLI `org/repo`, `--org`, or `excavate <project>` fallback org | `exec.CommandContext("gh", "repo", "list", g.Org, ...)` | no org format validation; `exec.CommandContext` blocks shell metachar expansion | scope expansion / excessive API enumeration; low injection surface | low | med | mapped
+collect/github.go:179 | CLI `org/repo` and GitHub repo names from `gh repo list` | `filepath.Join(cfg.OutputDir, "github", g.Org, repo, "issues")` -> `EnsureDir`/`Write` | no path-segment validation on `g.Org` or `repo` | path traversal / arbitrary file write | high | high | open
+collect/github.go:238 | CLI `org/repo` and GitHub repo names from `gh repo list` | `filepath.Join(cfg.OutputDir, "github", g.Org, repo, "pulls")` -> `EnsureDir`/`Write` | no path-segment validation on `g.Org` or `repo` | path traversal / arbitrary file write | high | high | open
+collect/github.go:284 | GitHub API `issue.body` / `pr.body` | `formatIssueMarkdown()` -> markdown files under `cfg.Output` | no sanitisation before persistence | stored markdown/HTML/prompt injection in downstream renderers or LLM ingestion | medium | high | open
+collect/market.go:84 | CLI `<coin>` via `cmd/collect/cmd_market.go` | `filepath.Join(cfg.OutputDir, "market", m.CoinID)` -> `EnsureDir` | only non-empty `CoinID` check | path traversal / arbitrary file write | high | high | open
+collect/market.go:132 | CLI `<coin>` via `cmd/collect/cmd_market.go` | `fmt.Sprintf("%s/coins/%s", coinGeckoBaseURL, m.CoinID)` -> `fetchJSON()` | no path escaping or allowlist for `CoinID` | same-host endpoint injection / SSRF-style path steering | medium | high | open
+collect/market.go:184 | CLI `<coin>` and `--from` | `fmt.Sprintf("%s/coins/%s/market_chart?vs_currency=usd&days=%s", ...)` -> `fetchJSON()` | `FromDate` parsing bounds `days`; `CoinID` remains unescaped | same-host endpoint injection / excessive upstream query shaping | medium | high | open
+collect/papers.go:113 | CLI `--query` | `url.QueryEscape(p.Query)` -> IACR search request | query is escaped before request construction | query injection blocked; residual risk is upstream content trust only | low | high | validated
+collect/papers.go:145 | IACR HTML `href` values parsed from remote search results | `parseIACREntry()` -> derived `ppr.ID` -> `filepath.Join(baseDir, ppr.ID+".md")` | no filename sanitisation on remote-derived ID | path traversal / arbitrary file write if upstream HTML is hostile or compromised | high | med | open
+collect/papers.go:199 | CLI `--query` and `--category` | `url.QueryEscape(...)` -> arXiv API request, then `arxivEntryToPaper()` replaces `/` and `:` in IDs | query and category are escaped; arXiv IDs are partially canonicalised | request injection largely blocked; residual risk is remote content trust | low | high | validated
+collect/process.go:50 | CLI `<dir>` via `cmd/collect/cmd_process.go` | `cfg.Output.List(p.Dir)` and `cfg.Output.Read(filepath.Join(p.Dir, name))` | no root containment or canonicalisation of `p.Dir` | arbitrary local read/list when using `io.Local` medium | high | high | open
+collect/process.go:55 | CLI `<source>` via `cmd/collect/cmd_process.go` | `filepath.Join(cfg.OutputDir, "processed", p.Source)` -> `EnsureDir`/`Write` | no path validation on `p.Source` | path traversal / arbitrary file write | high | high | open
+collect/process.go:214 | source HTML `href` attributes from files under `p.Dir` | `nodeToMarkdown()` -> `[text](href)` persisted to markdown | no scheme allowlist or URL sanitisation | stored `javascript:` / `data:` link injection | medium | high | open
+collect/ratelimit.go:44 | concurrent caller-controlled source keys | `RateLimiter.Wait()` -> shared `last[source]` map | mutex released before slot reservation; no second eligibility check after sleep | burst bypass / API quota exhaustion / availability degradation | medium | high | open
+forge/config.go:40 | config file `forge.url`, `forge.token`, env `FORGE_URL`, `FORGE_TOKEN`, CLI overrides | `ResolveConfig()` -> `forge.New()` -> Forgejo SDK client | token required, URL not allowlisted or canonicalised | SSRF / credential exfiltration to attacker-controlled Forgejo endpoint | high | high | open
+cmd/forge/cmd_migrate.go:46 | CLI `<clone-url>`, `--service`, `--token`, `--org` | `runMigrate()` -> `forge.Client.MigrateRepo()` with `CloneAddr` and `AuthToken` | repo name is derived from URL tail only; clone URL itself is not constrained | server-side clone SSRF / internal repo import / credential misuse | high | high | open
+cmd/forge/cmd_sync.go:95 | CLI repo args and `--base-path` | `syncRepoNameFromArg()` + `ResolvePathWithinRoot()` -> `git -C <localPath>` / `git push` | repo names validated via `ValidatePathElement`; local path confined to base path | path traversal mitigated; residual risk limited to untrusted Forge URL config | low | high | validated
+gitea/config.go:40 | config file `gitea.url`, `gitea.token`, env `GITEA_URL`, `GITEA_TOKEN`, CLI overrides | `ResolveConfig()` -> `gitea.New()` -> Gitea SDK client | token required, URL not allowlisted or canonicalised | SSRF / credential exfiltration to attacker-controlled Gitea endpoint | high | high | open
+cmd/gitea/cmd_mirror.go:45 | CLI `<github-owner/repo>` and optional GitHub token | `runMirror()` -> fixed `https://github.com/<owner>/<repo>.git` -> `gitea.CreateMirror()` | owner/repo are split, but not allowlisted; source token may be forwarded | server-side clone of attacker-chosen repo and token relay to remote Gitea | high | med | open
+cmd/gitea/cmd_sync.go:95 | CLI repo args and `--base-path` | `repoNameFromArg()` + `ResolvePathWithinRoot()` -> `git -C <localPath>` / `git push` | repo names validated via `ValidatePathElement`; local path confined to base path | path traversal mitigated; residual risk limited to untrusted Gitea URL config | low | high | validated
+jobrunner/forgejo/signals.go:16 | epic issue body checklist text | `parseEpicChildren()` -> child issue numbers -> `pollRepo()` dispatch decisions | regex only checks `#<digits>`; no repository or ownership cross-check | workflow spoofing / unintended child issue dispatch | medium | high | open
+jobrunner/forgejo/signals.go:36 | PR body text | `findLinkedPR()` -> child issue association -> downstream handler execution | first `#<n>` match wins; no keyword, owner, or repo validation | PR-body spoofing causes wrong issue state transitions | medium | high | open
+jobrunner/forgejo/source.go:72 | handler `result.Error` strings | `Report()` -> Forgejo issue comment code block | no sanitisation, truncation, or mention filtering | comment injection / mention spam / log forging of pipeline output | medium | high | open
+jobrunner/handlers/dispatch.go:89 | agent config `queue_dir` plus `signal.RepoOwner` / `signal.RepoName` | `Execute()` -> remote queue path building and Forgejo label/comment operations | remote dir is validated; repo owner/name still pass through `SanitizePath(filepath.Base(...))` | retargeting/confusion if a forged signal supplies slash-containing names | medium | med | open
+jobrunner/handlers/dispatch.go:151 | issue title/body, assignee, repo metadata from `PipelineSignal` | `DispatchTicket` JSON -> remote agent queue file | JSON encoding only; content is not filtered, signed, or policy-checked | prompt injection / remote agent control-surface abuse | high | high | open
+jobrunner/handlers/dispatch.go:212 | local Forge token | `.env.<ticketID>` content -> remote worker filesystem | remote path validated and file mode set to `0600` after transfer | credential exposure on remote worker host, snapshots, or backups | medium | high | mapped
+jobrunner/handlers/dispatch.go:255 | runtime `reason` strings and raw `signal.RepoOwner` / `signal.RepoName` | `failDispatch()` -> labels and issue comments on Forgejo | no sanitisation; fail path uses raw signal repo fields | comment injection and repo-retargeting if upstream signal is forged | medium | med | open
+jobrunner/handlers/completion.go:58 | agent completion `signal.Message` / `signal.Error` | `CreateIssueComment()` on child issue | no sanitisation, truncation, or mention filtering | stored comment injection / mention spam / log forging | medium | high | open
+jobrunner/journal.go:130 | `signal.RepoOwner` / `signal.RepoName` | `Append()` -> `<base>/<owner>/<repo>/<date>.jsonl` | strict component regex, absolute-prefix containment check, and mutexed writes | path traversal blocked; safe write pattern verified | low | high | validated
+git/service.go:72 | concurrent query/task caller inputs `Paths` and service calls | `handleQuery()` / `handleTask()` -> shared `s.lastStatus` cache | no mutex or atomic protection around `lastStatus` | data race / stale or torn state under concurrent queries | medium | high | open
+cmd/scm/cmd_compile.go:54 | CLI `--sign-key` hex string | `hex.DecodeString()` -> `manifest.Compile()` -> `manifest.Sign()` -> `ed25519.Sign()` | hex decoding only; no private key length check | panic-triggered DoS via invalid Ed25519 private key length | high | high | open
+cmd/scm/cmd_export.go:28 | CLI `--dir` | `io.NewSandboxed(dir)` -> `manifest.LoadCompiled()` / `manifest.Load()` -> stdout | sandboxed medium confines access to the chosen root | local file disclosure limited to caller-selected directory | low | high | mapped
+cmd/scm/cmd_index.go:39 | CLI `--dir`, `--output`, `--base-url`, `--org` | `marketplace.Builder.BuildFromDirs()` / `marketplace.WriteIndex()` | no path or URL validation beyond OS errors | arbitrary local read/write and untrusted repo URL generation in produced index | medium | high | open
+pkg/api/provider.go:202 | HTTP query params `q` and `category` | `listMarketplace()` -> in-memory `Index.Search()` / `ByCategory()` | no server-side escaping needed; no filesystem or outbound request sink | low-risk search-scope manipulation only | low | high | mapped
+pkg/api/provider.go:466 | HTTP path param `:code` | `marketplaceCodeParam()` -> install/update/remove lookups | `PathUnescape()` + `ValidatePathElement()` | path traversal blocked for marketplace code paths | low | high | validated
+pkg/api/provider.go:309 | HTTP JSON body `public_key` | `hex.DecodeString()` -> `manifest.Verify()` -> `ed25519.Verify()` | hex decoding only; no public key length check | panic-triggered DoS via malformed Ed25519 public key | high | high | open
+pkg/api/provider.go:346 | HTTP JSON body `private_key` | `hex.DecodeString()` -> `manifest.Sign()` -> `ed25519.Sign()` | hex decoding only; no private key length check | panic-triggered DoS via malformed Ed25519 private key | high | high | open
+marketplace/installer.go:50 | marketplace index `Module.Code` and `Module.Repo` | `resolveModulePath()` -> `git clone <repo> <dest>` -> manifest load/store | module code is now confined with `ResolvePathWithinRoot`; repo URL is unvalidated | arbitrary git transport / SSRF / local-path clone from untrusted marketplace index | high | high | open
+marketplace/installer.go:195 | marketplace index `Module.SignKey` | `hex.DecodeString()` -> `manifest.LoadVerified()` -> `ed25519.Verify()` | hex decoding only; no public key length check | panic-triggered DoS during install/update from malformed marketplace metadata | high | high | open
+plugin/installer.go:181 | plugin source `org/repo@version` | `ParseSource()` -> registry name/path selection -> clone arguments | org and repo validated as path elements; version only checked for non-empty string | branch/ref abuse and unexpected checkout target selection | medium | med | mapped
+plugin/installer.go:165 | plugin `@version` suffix | `gh repo clone <org/repo> <dest> -- --branch <version>` | `exec.CommandContext` blocks shell injection; no ref allowlist | malicious ref names can force unexpected refs/tags or failure loops | medium | med | open
+plugin/loader.go:51 | caller-supplied plugin name | `filepath.Join(baseDir, name, "plugin.json")` -> `LoadManifest()` | no validation on `name` | path traversal / arbitrary local file read | high | high | open
+repos/registry.go:66 | caller-supplied registry path | `io.Medium.Read(path)` -> YAML parse | no path validation | arbitrary local file read via caller-chosen registry path | medium | high | open
+repos/registry.go:81 | YAML `base_path` and per-repo `path` values | expanded `Repo.Path` -> later `Exists()`, `IsGitRepo()`, git config probing | tilde expansion only; no root confinement | workspace escape / probing of arbitrary directories from untrusted registry | medium | high | open
+repos/kbconfig.go:69 | caller-supplied workspace root | `.core/kb.yaml` read/write | no canonicalisation beyond join | arbitrary local read/write under caller-selected root | low | high | mapped
+repos/kbconfig.go:110 | KB config `Wiki.Remote`, `Wiki.Dir`, and caller-supplied `repoName` | `WikiRepoURL()` and `WikiLocalPath()` | no validation of remote URL, local subdir, or repo name | path traversal in local wiki path and clone redirection to attacker-chosen remote | medium | high | open
+```
+
+## Deduplicated Attack Vectors
+
+- `high` Path traversal and arbitrary local read/write from unsanitised path joins in `collect/`, `plugin/loader.go`, `repos/registry.go`, and `repos/kbconfig.go`.
+- `high` SSRF, server-side clone, and credential exfiltration via unvalidated Forgejo/Gitea base URLs, migration clone URLs, and marketplace repo URLs.
+- `high` Panic-triggered availability loss from unchecked Ed25519 key lengths in CLI and HTTP signing/verification flows.
+- `high` Prompt injection and remote worker influence via raw issue title/body content written into dispatch tickets.
+- `medium` Stored markdown/comment injection via GitHub bodies, processed HTML links, jobrunner error comments, and completion messages.
+- `medium` Workflow spoofing from weak issue/PR text parsing in `jobrunner/forgejo/signals.go`.
+- `medium` Branch/ref steering and unexpected checkout behaviour in plugin installation.
+- `medium` Concurrency races in `collect/ratelimit.go` and `git/service.go`.
+- `low` Broad CLI-selected filesystem scope in `cmd/collect`, `cmd/scm export`, and `repos/kbconfig.go` remains expected but should not be delegated to lower-trust callers.
+- `low` Verified mitigations exist for journal path safety, marketplace code path validation, and sync local-path confinement.
+
+## Verification Status
+
+- Total mapped flows: `50`
+- `open`: `37`
+- `validated`: `6`
+- `mapped`: `7`
+- High severity: `18`
+- Medium severity: `20`
+- Low severity: `12`
+
+## Subsystem Summary
+
+- `P1 collect/`: the highest concentration of exploitable issues is in collector output path construction and persistence of raw third-party content. `collect/github.go`, `collect/market.go`, `collect/bitcointalk.go`, `collect/papers.go`, and `collect/process.go` all need path and content hardening.
+- `P1 forge/gitea/marketplace/pkg/api`: base URL trust, migration/mirror clone sources, and Ed25519 key handling are the main confidentiality and availability risks. These are central control-plane paths and should be fixed before widening API exposure.
+- `P2 jobrunner/`: epic/PR text parsing, remote dispatch ticket generation, and comment reporting create workflow spoofing and prompt-injection surfaces. The journal implementation is the notable exception and is already hardened.
+- `P2 workspace/plugin/repos/git`: plugin loading, registry path handling, wiki path generation, and shared git-service state carry medium-risk filesystem and race-condition exposure.
+- `P3 validated controls`: `jobrunner/journal.go`, `cmd/forge/cmd_sync.go`, `cmd/gitea/cmd_sync.go`, and `pkg/api/provider.go` marketplace code normalisation are solid reference implementations for future hardening work.
+
+## High-Priority Follow-Up
+
+- Replace every collector and loader `filepath.Join(..., userValue, ...)` call with strict path-element validation before directory creation or file reads/writes.
+- Add scheme, host, and canonicalisation allowlists for Forgejo/Gitea base URLs, migration clone URLs, marketplace repo URLs, and wiki remotes.
+- Reject invalid Ed25519 key lengths before calling `ed25519.Sign` or `ed25519.Verify` in CLI, HTTP, and installer code paths.
+- Treat dispatch tickets as untrusted agent prompts: strip or isolate raw issue bodies, add provenance metadata, and avoid shipping long-lived Forge tokens to remote workers where possible.
+- Harden `jobrunner/forgejo` linking logic so epic children and PR references must match explicit repo/issue relationships, not bare `#123` text.
+- Sanitize or encode user-visible comments and stored markdown links before writing them to Forgejo or output files.
+- Fix `collect.RateLimiter.Wait` and `git.Service.lastStatus` with proper reservation and synchronisation.
+
+## Scan Coverage
+
+The full scan set from the issue plan was reviewed in package order. Files below either map directly to the matrix above or are forward-only wrappers/helpers with no additional unique external entry point beyond the flows already captured.
+
+- `agentci`: `clotho.go` (strategy helper only), `config.go` (matrix), `security.go` (validation utilities only).
+- `cmd/collect`: `cmd.go` (matrix), `cmd_bitcointalk.go`, `cmd_excavate.go`, `cmd_github.go`, `cmd_market.go`, `cmd_papers.go`, `cmd_process.go` (all covered by `collect/*` rows), `cmd_dispatch.go` (local event enum validation only; no command execution sink).
+- `cmd/forge`: `cmd_auth.go` and `cmd_config.go` feed the `forge/config.go` row; `cmd_migrate.go` and `cmd_sync.go` are in the matrix; `cmd_forge.go`, `cmd_issues.go`, `cmd_labels.go`, `cmd_orgs.go`, `cmd_prs.go`, `cmd_repos.go`, `cmd_status.go`, `helpers.go` are CLI/API wrappers only.
+- `cmd/gitea`: `cmd_config.go` feeds the `gitea/config.go` row; `cmd_mirror.go` and `cmd_sync.go` are in the matrix; `cmd_gitea.go`, `cmd_issues.go`, `cmd_prs.go`, `cmd_repos.go` are CLI/API wrappers only.
+- `cmd/scm`: `cmd_compile.go`, `cmd_export.go`, `cmd_index.go` are in the matrix; `cmd_scm.go` is a root command wrapper only.
+- `collect`: `bitcointalk.go`, `collect.go`, `github.go`, `market.go`, `papers.go`, `process.go`, `ratelimit.go` are in the matrix; `events.go` and `excavate.go` add no new sink beyond already-mapped config and collector flows; `state.go` is subsumed by the shared `--output` path row.
+- `forge`: `client.go`, `config.go`, `repos.go` are in the matrix; `issues.go`, `labels.go`, `meta.go`, `orgs.go`, `prs.go`, `webhooks.go` forward typed inputs into the SDK without additional local command/path construction.
+- `git`: `service.go` is in the matrix; `git.go` is command-wrapper logic without additional external entry surface beyond caller-supplied repo paths.
+- `gitea`: `client.go`, `config.go`, `repos.go` are in the matrix; `issues.go` and `meta.go` are SDK wrappers only.
+- `jobrunner/forgejo`: `signals.go` and `source.go` are in the matrix.
+- `jobrunner/handlers`: `completion.go` and `dispatch.go` are in the matrix; `enable_auto_merge.go`, `publish_draft.go`, `resolve_threads.go`, `send_fix_command.go`, and `tick_parent.go` consume already-mapped `PipelineSignal` fields and emit fixed API calls/messages only.
+- `jobrunner`: `journal.go` is in the matrix; `poller.go` and `types.go` define orchestration and data structures without introducing new external input parsing.
+- `locales`: `embed.go` contains no runtime external input handling.
+- `manifest`: `compile.go` and `sign.go` are covered through `cmd/scm` and `pkg/api/provider.go`; `loader.go` and `manifest.go` parse fixed project-local files only.
+- `marketplace`: `builder.go` and `installer.go` are in the matrix; `discovery.go` and `marketplace.go` are read-only helpers around already-mapped local filesystem or in-memory index flows.
+- `pkg/api`: `provider.go` is in the matrix; `embed.go` contains no external input handling.
+- `plugin`: `installer.go`, `loader.go`, and `registry.go` are in the matrix; `config.go`, `manifest.go`, and `plugin.go` define data structures and validation helpers only.
+- `repos`: `kbconfig.go` and `registry.go` are in the matrix; `gitstate.go` and `workconfig.go` are config/struct helpers without new sink construction.