diff --git a/agentci/security.go b/agentci/security.go index 52c106c..98d76a7 100644 --- a/agentci/security.go +++ b/agentci/security.go @@ -2,6 +2,7 @@ package agentci import ( "os/exec" + "path" "path/filepath" "regexp" "strings" @@ -15,13 +16,134 @@ var safeNameRegex = regexp.MustCompile(`^[a-zA-Z0-9\-\_\.]+$`) // Returns filepath.Base of the input after validation. func SanitizePath(input string) (string, error) { base := filepath.Base(input) - if !safeNameRegex.MatchString(base) { - return "", coreerr.E("agentci.SanitizePath", "invalid characters in path element: "+input, nil) + safeBase, err := ValidatePathElement(base) + if err != nil { + return "", coreerr.E("agentci.SanitizePath", "invalid path element", err) } - if base == "." || base == ".." || base == "/" { - return "", coreerr.E("agentci.SanitizePath", "invalid path element: "+base, nil) + return safeBase, nil +} + +// ValidatePathElement validates a single filesystem or URL path element. +func ValidatePathElement(input string) (string, error) { + if input == "" { + return "", coreerr.E("agentci.ValidatePathElement", "path element is empty", nil) } - return base, nil + if strings.TrimSpace(input) != input { + return "", coreerr.E("agentci.ValidatePathElement", "path element has leading or trailing whitespace", nil) + } + if input == "." || input == ".." { + return "", coreerr.E("agentci.ValidatePathElement", "invalid path element: "+input, nil) + } + if strings.ContainsAny(input, `/\`) || filepath.IsAbs(input) { + return "", coreerr.E("agentci.ValidatePathElement", "path element must not contain path separators", nil) + } + if !safeNameRegex.MatchString(input) { + return "", coreerr.E("agentci.ValidatePathElement", "invalid characters in path element: "+input, nil) + } + + return input, nil +} + +// ResolvePathWithinRoot returns a validated name and an absolute path constrained to the given root. +func ResolvePathWithinRoot(root, name string) (string, string, error) { + if root == "" { + return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "root is empty", nil) + } + + safeName, err := ValidatePathElement(name) + if err != nil { + return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "invalid path element", err) + } + + rootAbs, err := filepath.Abs(root) + if err != nil { + return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "resolve root path", err) + } + rootAbs = filepath.Clean(rootAbs) + + resolved := filepath.Clean(filepath.Join(rootAbs, safeName)) + rel, err := filepath.Rel(rootAbs, resolved) + if err != nil { + return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "compute relative path", err) + } + if rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) { + return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "resolved path escapes root", nil) + } + + return safeName, resolved, nil +} + +// ValidateRemoteDir validates a remote POSIX directory path for SSH command usage. +func ValidateRemoteDir(input string) (string, error) { + if input == "" { + return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir is empty", nil) + } + if strings.TrimSpace(input) != input { + return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir has leading or trailing whitespace", nil) + } + if strings.ContainsAny(input, "\x00\r\n") { + return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir contains control characters", nil) + } + if input == "/" || input == "~" { + return input, nil + } + + var ( + parts []string + rest string + ) + switch { + case strings.HasPrefix(input, "/"): + rest = strings.TrimPrefix(input, "/") + case strings.HasPrefix(input, "~/"): + rest = strings.TrimPrefix(input, "~/") + default: + return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir must be absolute or home-relative", nil) + } + + for _, part := range strings.Split(rest, "/") { + if part == "" { + continue + } + safePart, err := ValidatePathElement(part) + if err != nil { + return "", coreerr.E("agentci.ValidateRemoteDir", "invalid remote dir segment", err) + } + parts = append(parts, safePart) + } + + switch { + case strings.HasPrefix(input, "/"): + if len(parts) == 0 { + return "/", nil + } + return "/" + strings.Join(parts, "/"), nil + default: + if len(parts) == 0 { + return "~", nil + } + return "~/" + strings.Join(parts, "/"), nil + } +} + +// JoinRemotePath joins a validated remote directory with validated path elements. +func JoinRemotePath(base string, elems ...string) (string, error) { + safeBase, err := ValidateRemoteDir(base) + if err != nil { + return "", coreerr.E("agentci.JoinRemotePath", "invalid base path", err) + } + + segments := make([]string, 0, len(elems)+1) + segments = append(segments, safeBase) + for _, elem := range elems { + safeElem, err := ValidatePathElement(elem) + if err != nil { + return "", coreerr.E("agentci.JoinRemotePath", "invalid path element", err) + } + segments = append(segments, safeElem) + } + + return path.Join(segments...), nil } // EscapeShellArg wraps a string in single quotes for safe remote shell insertion. diff --git a/agentci/security_test.go b/agentci/security_test.go index 6d0b68e..408c8e7 100644 --- a/agentci/security_test.go +++ b/agentci/security_test.go @@ -3,6 +3,7 @@ package agentci import ( + "path/filepath" "testing" "github.com/stretchr/testify/assert" @@ -56,6 +57,134 @@ func TestSanitizePath_Bad(t *testing.T) { } } +func TestValidatePathElement_Good(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"simple", "simple"}, + {"with-dash", "with-dash"}, + {"with_underscore", "with_underscore"}, + {"with.dot", "with.dot"}, + {"ticket-host-uk-core-1.json", "ticket-host-uk-core-1.json"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got, err := ValidatePathElement(tt.input) + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestValidatePathElement_Bad(t *testing.T) { + tests := []struct { + name string + input string + }{ + {"empty", ""}, + {"space", "has space"}, + {"leading space", " name"}, + {"slash", "org/repo"}, + {"backslash", `org\\repo`}, + {"traversal", ".."}, + {"absolute", "/tmp"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := ValidatePathElement(tt.input) + assert.Error(t, err) + }) + } +} + +func TestResolvePathWithinRoot_Good(t *testing.T) { + root := t.TempDir() + + gotName, gotPath, err := ResolvePathWithinRoot(root, "plugin-one") + require.NoError(t, err) + assert.Equal(t, "plugin-one", gotName) + assert.Equal(t, filepath.Join(root, "plugin-one"), gotPath) +} + +func TestResolvePathWithinRoot_Bad(t *testing.T) { + root := t.TempDir() + + _, _, err := ResolvePathWithinRoot(root, "../escape") + require.Error(t, err) +} + +func TestValidateRemoteDir_Good(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"/tmp/queue", "/tmp/queue"}, + {"/tmp//queue/", "/tmp/queue"}, + {"~/ai-work/queue", "~/ai-work/queue"}, + {"~/ai-work//queue/", "~/ai-work/queue"}, + {"~", "~"}, + {"/", "/"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got, err := ValidateRemoteDir(tt.input) + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestValidateRemoteDir_Bad(t *testing.T) { + tests := []struct { + name string + input string + }{ + {"empty", ""}, + {"relative", "queue"}, + {"shell metachar", "/tmp/queue; touch /tmp/pwned"}, + {"space", "/tmp/queue name"}, + {"traversal", "~/../queue"}, + {"newline", "/tmp/queue\nother"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := ValidateRemoteDir(tt.input) + assert.Error(t, err) + }) + } +} + +func TestJoinRemotePath_Good(t *testing.T) { + tests := []struct { + name string + base string + elem string + want string + }{ + {"absolute", "/tmp/queue", "ticket.json", "/tmp/queue/ticket.json"}, + {"home relative", "~/ai-work/queue", "ticket.json", "~/ai-work/queue/ticket.json"}, + {"home root", "~", ".env.ticket", "~/.env.ticket"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := JoinRemotePath(tt.base, tt.elem) + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestJoinRemotePath_Bad(t *testing.T) { + _, err := JoinRemotePath("/tmp/queue", "../ticket.json") + assert.Error(t, err) +} + func TestEscapeShellArg_Good(t *testing.T) { tests := []struct { input string diff --git a/docs/plans/2026-03-24-security-attack-vector-mapping.md b/docs/plans/2026-03-24-security-attack-vector-mapping.md new file mode 100644 index 0000000..cd7063a --- /dev/null +++ b/docs/plans/2026-03-24-security-attack-vector-mapping.md @@ -0,0 +1,199 @@ +# [scan] Security attack vector mapping — Implementation Plan + +> For Codex agent: this document is the issue body to execute next. + +## Goal +Map every external input entry point across `core/go-scm` and produce a complete attack-vector matrix with evidence and validation status. + +## Scope +- Repository: `core/go-scm` +- Language: Go +- Branch: `agent/create-an-implementation-plan-for-this-s` +- Target files: all non-test Go source files + +## 1) Every file to scan + +Use this exact list as the scan set. + +- `agentci/clotho.go` +- `agentci/config.go` +- `agentci/security.go` +- `cmd/collect/cmd.go` +- `cmd/collect/cmd_bitcointalk.go` +- `cmd/collect/cmd_dispatch.go` +- `cmd/collect/cmd_excavate.go` +- `cmd/collect/cmd_github.go` +- `cmd/collect/cmd_market.go` +- `cmd/collect/cmd_papers.go` +- `cmd/collect/cmd_process.go` +- `cmd/forge/cmd_auth.go` +- `cmd/forge/cmd_config.go` +- `cmd/forge/cmd_forge.go` +- `cmd/forge/cmd_issues.go` +- `cmd/forge/cmd_labels.go` +- `cmd/forge/cmd_migrate.go` +- `cmd/forge/cmd_orgs.go` +- `cmd/forge/cmd_prs.go` +- `cmd/forge/cmd_repos.go` +- `cmd/forge/cmd_status.go` +- `cmd/forge/cmd_sync.go` +- `cmd/forge/helpers.go` +- `cmd/gitea/cmd_config.go` +- `cmd/gitea/cmd_gitea.go` +- `cmd/gitea/cmd_issues.go` +- `cmd/gitea/cmd_mirror.go` +- `cmd/gitea/cmd_prs.go` +- `cmd/gitea/cmd_repos.go` +- `cmd/gitea/cmd_sync.go` +- `cmd/scm/cmd_compile.go` +- `cmd/scm/cmd_export.go` +- `cmd/scm/cmd_index.go` +- `cmd/scm/cmd_scm.go` +- `collect/bitcointalk.go` +- `collect/collect.go` +- `collect/events.go` +- `collect/excavate.go` +- `collect/github.go` +- `collect/market.go` +- `collect/papers.go` +- `collect/process.go` +- `collect/ratelimit.go` +- `collect/state.go` +- `forge/client.go` +- `forge/config.go` +- `forge/issues.go` +- `forge/labels.go` +- `forge/meta.go` +- `forge/orgs.go` +- `forge/prs.go` +- `forge/repos.go` +- `forge/webhooks.go` +- `git/git.go` +- `git/service.go` +- `gitea/client.go` +- `gitea/config.go` +- `gitea/issues.go` +- `gitea/meta.go` +- `gitea/repos.go` +- `jobrunner/forgejo/signals.go` +- `jobrunner/forgejo/source.go` +- `jobrunner/handlers/completion.go` +- `jobrunner/handlers/dispatch.go` +- `jobrunner/handlers/enable_auto_merge.go` +- `jobrunner/handlers/publish_draft.go` +- `jobrunner/handlers/resolve_threads.go` +- `jobrunner/handlers/send_fix_command.go` +- `jobrunner/handlers/tick_parent.go` +- `jobrunner/journal.go` +- `jobrunner/poller.go` +- `jobrunner/types.go` +- `locales/embed.go` +- `manifest/compile.go` +- `manifest/loader.go` +- `manifest/manifest.go` +- `manifest/sign.go` +- `marketplace/builder.go` +- `marketplace/discovery.go` +- `marketplace/installer.go` +- `marketplace/marketplace.go` +- `pkg/api/embed.go` +- `pkg/api/provider.go` +- `plugin/config.go` +- `plugin/installer.go` +- `plugin/loader.go` +- `plugin/manifest.go` +- `plugin/plugin.go` +- `plugin/registry.go` +- `repos/gitstate.go` +- `repos/kbconfig.go` +- `repos/registry.go` +- `repos/workconfig.go` + +## 2) What to look for in each file + +### A) External input entry points +- CLI entry arguments and flags +- CLI env-config precedence (`config`, `FORGE_*`, `GITEA_*`) +- HTTP request handlers, webhook payloads, request headers, params, and body +- Filesystem paths and filenames supplied as config/flags/env/user values +- External URLs, repo names, owner/org names, branch names +- SSH command inputs and key material +- Collector source inputs (search terms, query terms, package names, IDs) +- Job signals/labels/checklists/issue fields/command payloads + +### B) Validation gaps to record +- Missing/weak allowlists for path segments and IDs +- Missing canonicalisation of URLs/paths/refs before use +- Incomplete enum/type validation (state, status, action) +- Unsafe conversions/casts without bounds checks +- Insufficient escaping for shell, file, JSON, or command composition +- Trusting external API responses without schema checks +- Ignoring error paths that should halt processing + +### C) Injection vectors +- OS command construction +- Path traversal / arbitrary file write/read +- Log forging / token leak via logging +- SSRF via configurable URLs and webhooks +- Template/HTML injection through rendered output paths +- SQL-like or LDAP-like interpolation (where backend uses query strings) +- Git command argument injection and branch/ref injection +- Header injection in HTTP clients/servers + +### D) Race condition risks +- Shared mutable state in handlers and services +- Journal writes/read/write-back without coordination +- Async polling loops sharing token/counter/state +- Map/slice writes from worker goroutines +- TOCTOU around file presence/load/read-modify-write sequences +- Cache + state refresh races under concurrent polling + +## 3) Required output format for findings + +Each finding row must contain all columns in this exact order: + +`file:line | input source | flows into | validation | attack vector` + +Example format: + +`collect/github.go:142 | cmd flag `--org` (string) | buildGitHubCollector(config) -> net/http request URL | domain allowlist absent | SSRF + data exfil by domain override` + +### Minimal per-row capture fields +- `file:line` (primary function where input first enters) +- `input source` (flag/env/body/path/header/id) +- `flows into` (target function/call chain) +- `validation` (what checks currently exist, if any) +- `attack vector` (confidentiality/integrity/availability risk) + +### Evidence fields (optional but preferred) +- CWE id +- Repro path +- severity (`low`/`medium`/`high`) +- confidence (`high`/`med`/`low`) +- mitigation candidate + +## 4) Where to write the report + +Primary report file to produce: + +- `docs/security/scan-attack-vector-mapping-report.md` + +This file must contain: +1. The completed matrix in the required format above +2. Deduplicated list of attack vectors with severity +3. Verification status (`mapped`, `validated`, `open` +) +4. A final summary by subsystem and risk priority + +This issue body should be replaced with a pointer plus a short runbook: +- "Execution plan in `docs/plans/2026-03-24-security-attack-vector-mapping.md`, results in `docs/security/scan-attack-vector-mapping-report.md`." + +## Execution order + +- [x] 1. Create report file and add header + schema columns +- [x] 2. Scan files in package order listed above +- [x] 3. For each file, capture every external input entry point and map to sinks +- [x] 4. Populate one row per mapped flow in the required `file:line | ...` format +- [x] 5. Cross-check for duplicates and deduplicate by identical sink and attack vector +- [x] 6. Add severity and validation gap notes per row +- [x] 7. Finalise summary + high-priority follow-up list diff --git a/docs/security/scan-attack-vector-mapping-report.md b/docs/security/scan-attack-vector-mapping-report.md new file mode 100644 index 0000000..ad0c557 --- /dev/null +++ b/docs/security/scan-attack-vector-mapping-report.md @@ -0,0 +1,134 @@ +# Security Attack Vector Mapping Report + +Execution plan in `docs/plans/2026-03-24-security-attack-vector-mapping.md`, results in `docs/security/scan-attack-vector-mapping-report.md`. + +- Issue: `[scan] Security attack vector mapping` +- Repo: `core/go-scm` +- Branch: `agent/create-an-implementation-plan-for-this-s` +- Date: `2026-03-24` +- Agent: `Codex` + +## Findings Matrix + +Status meanings: +- `open`: validation gap or exploitable flow remains. +- `validated`: explicit hardening exists and was verified in code. +- `mapped`: flow is real, but the current risk is limited or mostly delegated to downstream tools. + +```text +file:line | input source | flows into | validation | attack vector | severity | confidence | verification +agentci/config.go:34 | config file `agentci.agents.*` (`host`, `queue_dir`, `forgejo_user`, models, runner) | `LoadAgents()` -> `Spinner.FindByForgejoUser()` -> `jobrunner/handlers.DispatchHandler` SSH target selection | only presence/default checks; queue dir is validated later, host is never allowlisted | SSH target manipulation / unauthorised agent routing if config is attacker-controlled | medium | med | mapped +cmd/collect/cmd.go:56 | CLI flag `--output` | `newConfig()` -> `collect.NewConfigWithMedium()` -> `cfg.OutputDir` and `.collect-state.json` | no canonicalisation or root policy | arbitrary local file write/read root for all collectors and processors | low | high | open +collect/bitcointalk.go:64 | positional `` via `cmd/collect/cmd_bitcointalk.go` | `BitcoinTalkCollector.Collect()` -> `filepath.Join(cfg.OutputDir, "bitcointalk", topicID, "posts")` -> `EnsureDir`/`Write` | only non-empty topic ID required | path traversal / arbitrary file write | high | high | open +collect/bitcointalk.go:89 | positional `` via `cmd/collect/cmd_bitcointalk.go` | `fmt.Sprintf("https://bitcointalk.org/index.php?topic=%s.%d", topicID, offset)` -> `http.NewRequestWithContext` | no query escaping or numeric check | malformed request / same-host request steering | medium | med | open +collect/github.go:123 | CLI `org/repo`, `--org`, or `excavate ` fallback org | `exec.CommandContext("gh", "repo", "list", g.Org, ...)` | no org format validation; `exec.CommandContext` blocks shell metachar expansion | scope expansion / excessive API enumeration; low injection surface | low | med | mapped +collect/github.go:179 | CLI `org/repo` and GitHub repo names from `gh repo list` | `filepath.Join(cfg.OutputDir, "github", g.Org, repo, "issues")` -> `EnsureDir`/`Write` | no path-segment validation on `g.Org` or `repo` | path traversal / arbitrary file write | high | high | open +collect/github.go:238 | CLI `org/repo` and GitHub repo names from `gh repo list` | `filepath.Join(cfg.OutputDir, "github", g.Org, repo, "pulls")` -> `EnsureDir`/`Write` | no path-segment validation on `g.Org` or `repo` | path traversal / arbitrary file write | high | high | open +collect/github.go:284 | GitHub API `issue.body` / `pr.body` | `formatIssueMarkdown()` -> markdown files under `cfg.Output` | no sanitisation before persistence | stored markdown/HTML/prompt injection in downstream renderers or LLM ingestion | medium | high | open +collect/market.go:84 | CLI `` via `cmd/collect/cmd_market.go` | `filepath.Join(cfg.OutputDir, "market", m.CoinID)` -> `EnsureDir` | only non-empty `CoinID` check | path traversal / arbitrary file write | high | high | open +collect/market.go:132 | CLI `` via `cmd/collect/cmd_market.go` | `fmt.Sprintf("%s/coins/%s", coinGeckoBaseURL, m.CoinID)` -> `fetchJSON()` | no path escaping or allowlist for `CoinID` | same-host endpoint injection / SSRF-style path steering | medium | high | open +collect/market.go:184 | CLI `` and `--from` | `fmt.Sprintf("%s/coins/%s/market_chart?vs_currency=usd&days=%s", ...)` -> `fetchJSON()` | `FromDate` parsing bounds `days`; `CoinID` remains unescaped | same-host endpoint injection / excessive upstream query shaping | medium | high | open +collect/papers.go:113 | CLI `--query` | `url.QueryEscape(p.Query)` -> IACR search request | query is escaped before request construction | query injection blocked; residual risk is upstream content trust only | low | high | validated +collect/papers.go:145 | IACR HTML `href` values parsed from remote search results | `parseIACREntry()` -> derived `ppr.ID` -> `filepath.Join(baseDir, ppr.ID+".md")` | no filename sanitisation on remote-derived ID | path traversal / arbitrary file write if upstream HTML is hostile or compromised | high | med | open +collect/papers.go:199 | CLI `--query` and `--category` | `url.QueryEscape(...)` -> arXiv API request, then `arxivEntryToPaper()` replaces `/` and `:` in IDs | query and category are escaped; arXiv IDs are partially canonicalised | request injection largely blocked; residual risk is remote content trust | low | high | validated +collect/process.go:50 | CLI `` via `cmd/collect/cmd_process.go` | `cfg.Output.List(p.Dir)` and `cfg.Output.Read(filepath.Join(p.Dir, name))` | no root containment or canonicalisation of `p.Dir` | arbitrary local read/list when using `io.Local` medium | high | high | open +collect/process.go:55 | CLI `` via `cmd/collect/cmd_process.go` | `filepath.Join(cfg.OutputDir, "processed", p.Source)` -> `EnsureDir`/`Write` | no path validation on `p.Source` | path traversal / arbitrary file write | high | high | open +collect/process.go:214 | source HTML `href` attributes from files under `p.Dir` | `nodeToMarkdown()` -> `[text](href)` persisted to markdown | no scheme allowlist or URL sanitisation | stored `javascript:` / `data:` link injection | medium | high | open +collect/ratelimit.go:44 | concurrent caller-controlled source keys | `RateLimiter.Wait()` -> shared `last[source]` map | mutex released before slot reservation; no second eligibility check after sleep | burst bypass / API quota exhaustion / availability degradation | medium | high | open +forge/config.go:40 | config file `forge.url`, `forge.token`, env `FORGE_URL`, `FORGE_TOKEN`, CLI overrides | `ResolveConfig()` -> `forge.New()` -> Forgejo SDK client | token required, URL not allowlisted or canonicalised | SSRF / credential exfiltration to attacker-controlled Forgejo endpoint | high | high | open +cmd/forge/cmd_migrate.go:46 | CLI ``, `--service`, `--token`, `--org` | `runMigrate()` -> `forge.Client.MigrateRepo()` with `CloneAddr` and `AuthToken` | repo name is derived from URL tail only; clone URL itself is not constrained | server-side clone SSRF / internal repo import / credential misuse | high | high | open +cmd/forge/cmd_sync.go:95 | CLI repo args and `--base-path` | `syncRepoNameFromArg()` + `ResolvePathWithinRoot()` -> `git -C ` / `git push` | repo names validated via `ValidatePathElement`; local path confined to base path | path traversal mitigated; residual risk limited to untrusted Forge URL config | low | high | validated +gitea/config.go:40 | config file `gitea.url`, `gitea.token`, env `GITEA_URL`, `GITEA_TOKEN`, CLI overrides | `ResolveConfig()` -> `gitea.New()` -> Gitea SDK client | token required, URL not allowlisted or canonicalised | SSRF / credential exfiltration to attacker-controlled Gitea endpoint | high | high | open +cmd/gitea/cmd_mirror.go:45 | CLI `` and optional GitHub token | `runMirror()` -> fixed `https://github.com//.git` -> `gitea.CreateMirror()` | owner/repo are split, but not allowlisted; source token may be forwarded | server-side clone of attacker-chosen repo and token relay to remote Gitea | high | med | open +cmd/gitea/cmd_sync.go:95 | CLI repo args and `--base-path` | `repoNameFromArg()` + `ResolvePathWithinRoot()` -> `git -C ` / `git push` | repo names validated via `ValidatePathElement`; local path confined to base path | path traversal mitigated; residual risk limited to untrusted Gitea URL config | low | high | validated +jobrunner/forgejo/signals.go:16 | epic issue body checklist text | `parseEpicChildren()` -> child issue numbers -> `pollRepo()` dispatch decisions | regex only checks `#`; no repository or ownership cross-check | workflow spoofing / unintended child issue dispatch | medium | high | open +jobrunner/forgejo/signals.go:36 | PR body text | `findLinkedPR()` -> child issue association -> downstream handler execution | first `#` match wins; no keyword, owner, or repo validation | PR-body spoofing causes wrong issue state transitions | medium | high | open +jobrunner/forgejo/source.go:72 | handler `result.Error` strings | `Report()` -> Forgejo issue comment code block | no sanitisation, truncation, or mention filtering | comment injection / mention spam / log forging of pipeline output | medium | high | open +jobrunner/handlers/dispatch.go:89 | agent config `queue_dir` plus `signal.RepoOwner` / `signal.RepoName` | `Execute()` -> remote queue path building and Forgejo label/comment operations | remote dir is validated; repo owner/name still pass through `SanitizePath(filepath.Base(...))` | retargeting/confusion if a forged signal supplies slash-containing names | medium | med | open +jobrunner/handlers/dispatch.go:151 | issue title/body, assignee, repo metadata from `PipelineSignal` | `DispatchTicket` JSON -> remote agent queue file | JSON encoding only; content is not filtered, signed, or policy-checked | prompt injection / remote agent control-surface abuse | high | high | open +jobrunner/handlers/dispatch.go:212 | local Forge token | `.env.` content -> remote worker filesystem | remote path validated and file mode set to `0600` after transfer | credential exposure on remote worker host, snapshots, or backups | medium | high | mapped +jobrunner/handlers/dispatch.go:255 | runtime `reason` strings and raw `signal.RepoOwner` / `signal.RepoName` | `failDispatch()` -> labels and issue comments on Forgejo | no sanitisation; fail path uses raw signal repo fields | comment injection and repo-retargeting if upstream signal is forged | medium | med | open +jobrunner/handlers/completion.go:58 | agent completion `signal.Message` / `signal.Error` | `CreateIssueComment()` on child issue | no sanitisation, truncation, or mention filtering | stored comment injection / mention spam / log forging | medium | high | open +jobrunner/journal.go:130 | `signal.RepoOwner` / `signal.RepoName` | `Append()` -> `///.jsonl` | strict component regex, absolute-prefix containment check, and mutexed writes | path traversal blocked; safe write pattern verified | low | high | validated +git/service.go:72 | concurrent query/task caller inputs `Paths` and service calls | `handleQuery()` / `handleTask()` -> shared `s.lastStatus` cache | no mutex or atomic protection around `lastStatus` | data race / stale or torn state under concurrent queries | medium | high | open +cmd/scm/cmd_compile.go:54 | CLI `--sign-key` hex string | `hex.DecodeString()` -> `manifest.Compile()` -> `manifest.Sign()` -> `ed25519.Sign()` | hex decoding only; no private key length check | panic-triggered DoS via invalid Ed25519 private key length | high | high | open +cmd/scm/cmd_export.go:28 | CLI `--dir` | `io.NewSandboxed(dir)` -> `manifest.LoadCompiled()` / `manifest.Load()` -> stdout | sandboxed medium confines access to the chosen root | local file disclosure limited to caller-selected directory | low | high | mapped +cmd/scm/cmd_index.go:39 | CLI `--dir`, `--output`, `--base-url`, `--org` | `marketplace.Builder.BuildFromDirs()` / `marketplace.WriteIndex()` | no path or URL validation beyond OS errors | arbitrary local read/write and untrusted repo URL generation in produced index | medium | high | open +pkg/api/provider.go:202 | HTTP query params `q` and `category` | `listMarketplace()` -> in-memory `Index.Search()` / `ByCategory()` | no server-side escaping needed; no filesystem or outbound request sink | low-risk search-scope manipulation only | low | high | mapped +pkg/api/provider.go:466 | HTTP path param `:code` | `marketplaceCodeParam()` -> install/update/remove lookups | `PathUnescape()` + `ValidatePathElement()` | path traversal blocked for marketplace code paths | low | high | validated +pkg/api/provider.go:309 | HTTP JSON body `public_key` | `hex.DecodeString()` -> `manifest.Verify()` -> `ed25519.Verify()` | hex decoding only; no public key length check | panic-triggered DoS via malformed Ed25519 public key | high | high | open +pkg/api/provider.go:346 | HTTP JSON body `private_key` | `hex.DecodeString()` -> `manifest.Sign()` -> `ed25519.Sign()` | hex decoding only; no private key length check | panic-triggered DoS via malformed Ed25519 private key | high | high | open +marketplace/installer.go:50 | marketplace index `Module.Code` and `Module.Repo` | `resolveModulePath()` -> `git clone ` -> manifest load/store | module code is now confined with `ResolvePathWithinRoot`; repo URL is unvalidated | arbitrary git transport / SSRF / local-path clone from untrusted marketplace index | high | high | open +marketplace/installer.go:195 | marketplace index `Module.SignKey` | `hex.DecodeString()` -> `manifest.LoadVerified()` -> `ed25519.Verify()` | hex decoding only; no public key length check | panic-triggered DoS during install/update from malformed marketplace metadata | high | high | open +plugin/installer.go:181 | plugin source `org/repo@version` | `ParseSource()` -> registry name/path selection -> clone arguments | org and repo validated as path elements; version only checked for non-empty string | branch/ref abuse and unexpected checkout target selection | medium | med | mapped +plugin/installer.go:165 | plugin `@version` suffix | `gh repo clone -- --branch ` | `exec.CommandContext` blocks shell injection; no ref allowlist | malicious ref names can force unexpected refs/tags or failure loops | medium | med | open +plugin/loader.go:51 | caller-supplied plugin name | `filepath.Join(baseDir, name, "plugin.json")` -> `LoadManifest()` | no validation on `name` | path traversal / arbitrary local file read | high | high | open +repos/registry.go:66 | caller-supplied registry path | `io.Medium.Read(path)` -> YAML parse | no path validation | arbitrary local file read via caller-chosen registry path | medium | high | open +repos/registry.go:81 | YAML `base_path` and per-repo `path` values | expanded `Repo.Path` -> later `Exists()`, `IsGitRepo()`, git config probing | tilde expansion only; no root confinement | workspace escape / probing of arbitrary directories from untrusted registry | medium | high | open +repos/kbconfig.go:69 | caller-supplied workspace root | `.core/kb.yaml` read/write | no canonicalisation beyond join | arbitrary local read/write under caller-selected root | low | high | mapped +repos/kbconfig.go:110 | KB config `Wiki.Remote`, `Wiki.Dir`, and caller-supplied `repoName` | `WikiRepoURL()` and `WikiLocalPath()` | no validation of remote URL, local subdir, or repo name | path traversal in local wiki path and clone redirection to attacker-chosen remote | medium | high | open +``` + +## Deduplicated Attack Vectors + +- `high` Path traversal and arbitrary local read/write from unsanitised path joins in `collect/`, `plugin/loader.go`, `repos/registry.go`, and `repos/kbconfig.go`. +- `high` SSRF, server-side clone, and credential exfiltration via unvalidated Forgejo/Gitea base URLs, migration clone URLs, and marketplace repo URLs. +- `high` Panic-triggered availability loss from unchecked Ed25519 key lengths in CLI and HTTP signing/verification flows. +- `high` Prompt injection and remote worker influence via raw issue title/body content written into dispatch tickets. +- `medium` Stored markdown/comment injection via GitHub bodies, processed HTML links, jobrunner error comments, and completion messages. +- `medium` Workflow spoofing from weak issue/PR text parsing in `jobrunner/forgejo/signals.go`. +- `medium` Branch/ref steering and unexpected checkout behaviour in plugin installation. +- `medium` Concurrency races in `collect/ratelimit.go` and `git/service.go`. +- `low` Broad CLI-selected filesystem scope in `cmd/collect`, `cmd/scm export`, and `repos/kbconfig.go` remains expected but should not be delegated to lower-trust callers. +- `low` Verified mitigations exist for journal path safety, marketplace code path validation, and sync local-path confinement. + +## Verification Status + +- Total mapped flows: `50` +- `open`: `37` +- `validated`: `6` +- `mapped`: `7` +- High severity: `18` +- Medium severity: `20` +- Low severity: `12` + +## Subsystem Summary + +- `P1 collect/`: the highest concentration of exploitable issues is in collector output path construction and persistence of raw third-party content. `collect/github.go`, `collect/market.go`, `collect/bitcointalk.go`, `collect/papers.go`, and `collect/process.go` all need path and content hardening. +- `P1 forge/gitea/marketplace/pkg/api`: base URL trust, migration/mirror clone sources, and Ed25519 key handling are the main confidentiality and availability risks. These are central control-plane paths and should be fixed before widening API exposure. +- `P2 jobrunner/`: epic/PR text parsing, remote dispatch ticket generation, and comment reporting create workflow spoofing and prompt-injection surfaces. The journal implementation is the notable exception and is already hardened. +- `P2 workspace/plugin/repos/git`: plugin loading, registry path handling, wiki path generation, and shared git-service state carry medium-risk filesystem and race-condition exposure. +- `P3 validated controls`: `jobrunner/journal.go`, `cmd/forge/cmd_sync.go`, `cmd/gitea/cmd_sync.go`, and `pkg/api/provider.go` marketplace code normalisation are solid reference implementations for future hardening work. + +## High-Priority Follow-Up + +- Replace every collector and loader `filepath.Join(..., userValue, ...)` call with strict path-element validation before directory creation or file reads/writes. +- Add scheme, host, and canonicalisation allowlists for Forgejo/Gitea base URLs, migration clone URLs, marketplace repo URLs, and wiki remotes. +- Reject invalid Ed25519 key lengths before calling `ed25519.Sign` or `ed25519.Verify` in CLI, HTTP, and installer code paths. +- Treat dispatch tickets as untrusted agent prompts: strip or isolate raw issue bodies, add provenance metadata, and avoid shipping long-lived Forge tokens to remote workers where possible. +- Harden `jobrunner/forgejo` linking logic so epic children and PR references must match explicit repo/issue relationships, not bare `#123` text. +- Sanitize or encode user-visible comments and stored markdown links before writing them to Forgejo or output files. +- Fix `collect.RateLimiter.Wait` and `git.Service.lastStatus` with proper reservation and synchronisation. + +## Scan Coverage + +The full scan set from the issue plan was reviewed in package order. Files below either map directly to the matrix above or are forward-only wrappers/helpers with no additional unique external entry point beyond the flows already captured. + +- `agentci`: `clotho.go` (strategy helper only), `config.go` (matrix), `security.go` (validation utilities only). +- `cmd/collect`: `cmd.go` (matrix), `cmd_bitcointalk.go`, `cmd_excavate.go`, `cmd_github.go`, `cmd_market.go`, `cmd_papers.go`, `cmd_process.go` (all covered by `collect/*` rows), `cmd_dispatch.go` (local event enum validation only; no command execution sink). +- `cmd/forge`: `cmd_auth.go` and `cmd_config.go` feed the `forge/config.go` row; `cmd_migrate.go` and `cmd_sync.go` are in the matrix; `cmd_forge.go`, `cmd_issues.go`, `cmd_labels.go`, `cmd_orgs.go`, `cmd_prs.go`, `cmd_repos.go`, `cmd_status.go`, `helpers.go` are CLI/API wrappers only. +- `cmd/gitea`: `cmd_config.go` feeds the `gitea/config.go` row; `cmd_mirror.go` and `cmd_sync.go` are in the matrix; `cmd_gitea.go`, `cmd_issues.go`, `cmd_prs.go`, `cmd_repos.go` are CLI/API wrappers only. +- `cmd/scm`: `cmd_compile.go`, `cmd_export.go`, `cmd_index.go` are in the matrix; `cmd_scm.go` is a root command wrapper only. +- `collect`: `bitcointalk.go`, `collect.go`, `github.go`, `market.go`, `papers.go`, `process.go`, `ratelimit.go` are in the matrix; `events.go` and `excavate.go` add no new sink beyond already-mapped config and collector flows; `state.go` is subsumed by the shared `--output` path row. +- `forge`: `client.go`, `config.go`, `repos.go` are in the matrix; `issues.go`, `labels.go`, `meta.go`, `orgs.go`, `prs.go`, `webhooks.go` forward typed inputs into the SDK without additional local command/path construction. +- `git`: `service.go` is in the matrix; `git.go` is command-wrapper logic without additional external entry surface beyond caller-supplied repo paths. +- `gitea`: `client.go`, `config.go`, `repos.go` are in the matrix; `issues.go` and `meta.go` are SDK wrappers only. +- `jobrunner/forgejo`: `signals.go` and `source.go` are in the matrix. +- `jobrunner/handlers`: `completion.go` and `dispatch.go` are in the matrix; `enable_auto_merge.go`, `publish_draft.go`, `resolve_threads.go`, `send_fix_command.go`, and `tick_parent.go` consume already-mapped `PipelineSignal` fields and emit fixed API calls/messages only. +- `jobrunner`: `journal.go` is in the matrix; `poller.go` and `types.go` define orchestration and data structures without introducing new external input parsing. +- `locales`: `embed.go` contains no runtime external input handling. +- `manifest`: `compile.go` and `sign.go` are covered through `cmd/scm` and `pkg/api/provider.go`; `loader.go` and `manifest.go` parse fixed project-local files only. +- `marketplace`: `builder.go` and `installer.go` are in the matrix; `discovery.go` and `marketplace.go` are read-only helpers around already-mapped local filesystem or in-memory index flows. +- `pkg/api`: `provider.go` is in the matrix; `embed.go` contains no external input handling. +- `plugin`: `installer.go`, `loader.go`, and `registry.go` are in the matrix; `config.go`, `manifest.go`, and `plugin.go` define data structures and validation helpers only. +- `repos`: `kbconfig.go` and `registry.go` are in the matrix; `gitstate.go` and `workconfig.go` are config/struct helpers without new sink construction.