fix(agentci): restore security helpers and map attack vectors

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-03-24 13:15:55 +00:00
parent e93f5673ef
commit a54bd834ff
4 changed files with 589 additions and 5 deletions

View file

@ -2,6 +2,7 @@ package agentci
import (
"os/exec"
"path"
"path/filepath"
"regexp"
"strings"
@ -15,13 +16,134 @@ var safeNameRegex = regexp.MustCompile(`^[a-zA-Z0-9\-\_\.]+$`)
// Returns filepath.Base of the input after validation.
func SanitizePath(input string) (string, error) {
base := filepath.Base(input)
if !safeNameRegex.MatchString(base) {
return "", coreerr.E("agentci.SanitizePath", "invalid characters in path element: "+input, nil)
safeBase, err := ValidatePathElement(base)
if err != nil {
return "", coreerr.E("agentci.SanitizePath", "invalid path element", err)
}
if base == "." || base == ".." || base == "/" {
return "", coreerr.E("agentci.SanitizePath", "invalid path element: "+base, nil)
return safeBase, nil
}
// ValidatePathElement validates a single filesystem or URL path element.
func ValidatePathElement(input string) (string, error) {
if input == "" {
return "", coreerr.E("agentci.ValidatePathElement", "path element is empty", nil)
}
return base, nil
if strings.TrimSpace(input) != input {
return "", coreerr.E("agentci.ValidatePathElement", "path element has leading or trailing whitespace", nil)
}
if input == "." || input == ".." {
return "", coreerr.E("agentci.ValidatePathElement", "invalid path element: "+input, nil)
}
if strings.ContainsAny(input, `/\`) || filepath.IsAbs(input) {
return "", coreerr.E("agentci.ValidatePathElement", "path element must not contain path separators", nil)
}
if !safeNameRegex.MatchString(input) {
return "", coreerr.E("agentci.ValidatePathElement", "invalid characters in path element: "+input, nil)
}
return input, nil
}
// ResolvePathWithinRoot returns a validated name and an absolute path constrained to the given root.
func ResolvePathWithinRoot(root, name string) (string, string, error) {
if root == "" {
return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "root is empty", nil)
}
safeName, err := ValidatePathElement(name)
if err != nil {
return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "invalid path element", err)
}
rootAbs, err := filepath.Abs(root)
if err != nil {
return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "resolve root path", err)
}
rootAbs = filepath.Clean(rootAbs)
resolved := filepath.Clean(filepath.Join(rootAbs, safeName))
rel, err := filepath.Rel(rootAbs, resolved)
if err != nil {
return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "compute relative path", err)
}
if rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
return "", "", coreerr.E("agentci.ResolvePathWithinRoot", "resolved path escapes root", nil)
}
return safeName, resolved, nil
}
// ValidateRemoteDir validates a remote POSIX directory path for SSH command usage.
func ValidateRemoteDir(input string) (string, error) {
if input == "" {
return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir is empty", nil)
}
if strings.TrimSpace(input) != input {
return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir has leading or trailing whitespace", nil)
}
if strings.ContainsAny(input, "\x00\r\n") {
return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir contains control characters", nil)
}
if input == "/" || input == "~" {
return input, nil
}
var (
parts []string
rest string
)
switch {
case strings.HasPrefix(input, "/"):
rest = strings.TrimPrefix(input, "/")
case strings.HasPrefix(input, "~/"):
rest = strings.TrimPrefix(input, "~/")
default:
return "", coreerr.E("agentci.ValidateRemoteDir", "remote dir must be absolute or home-relative", nil)
}
for _, part := range strings.Split(rest, "/") {
if part == "" {
continue
}
safePart, err := ValidatePathElement(part)
if err != nil {
return "", coreerr.E("agentci.ValidateRemoteDir", "invalid remote dir segment", err)
}
parts = append(parts, safePart)
}
switch {
case strings.HasPrefix(input, "/"):
if len(parts) == 0 {
return "/", nil
}
return "/" + strings.Join(parts, "/"), nil
default:
if len(parts) == 0 {
return "~", nil
}
return "~/" + strings.Join(parts, "/"), nil
}
}
// JoinRemotePath joins a validated remote directory with validated path elements.
func JoinRemotePath(base string, elems ...string) (string, error) {
safeBase, err := ValidateRemoteDir(base)
if err != nil {
return "", coreerr.E("agentci.JoinRemotePath", "invalid base path", err)
}
segments := make([]string, 0, len(elems)+1)
segments = append(segments, safeBase)
for _, elem := range elems {
safeElem, err := ValidatePathElement(elem)
if err != nil {
return "", coreerr.E("agentci.JoinRemotePath", "invalid path element", err)
}
segments = append(segments, safeElem)
}
return path.Join(segments...), nil
}
// EscapeShellArg wraps a string in single quotes for safe remote shell insertion.

View file

@ -3,6 +3,7 @@
package agentci
import (
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
@ -56,6 +57,134 @@ func TestSanitizePath_Bad(t *testing.T) {
}
}
func TestValidatePathElement_Good(t *testing.T) {
tests := []struct {
input string
want string
}{
{"simple", "simple"},
{"with-dash", "with-dash"},
{"with_underscore", "with_underscore"},
{"with.dot", "with.dot"},
{"ticket-host-uk-core-1.json", "ticket-host-uk-core-1.json"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
got, err := ValidatePathElement(tt.input)
require.NoError(t, err)
assert.Equal(t, tt.want, got)
})
}
}
func TestValidatePathElement_Bad(t *testing.T) {
tests := []struct {
name string
input string
}{
{"empty", ""},
{"space", "has space"},
{"leading space", " name"},
{"slash", "org/repo"},
{"backslash", `org\\repo`},
{"traversal", ".."},
{"absolute", "/tmp"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := ValidatePathElement(tt.input)
assert.Error(t, err)
})
}
}
func TestResolvePathWithinRoot_Good(t *testing.T) {
root := t.TempDir()
gotName, gotPath, err := ResolvePathWithinRoot(root, "plugin-one")
require.NoError(t, err)
assert.Equal(t, "plugin-one", gotName)
assert.Equal(t, filepath.Join(root, "plugin-one"), gotPath)
}
func TestResolvePathWithinRoot_Bad(t *testing.T) {
root := t.TempDir()
_, _, err := ResolvePathWithinRoot(root, "../escape")
require.Error(t, err)
}
func TestValidateRemoteDir_Good(t *testing.T) {
tests := []struct {
input string
want string
}{
{"/tmp/queue", "/tmp/queue"},
{"/tmp//queue/", "/tmp/queue"},
{"~/ai-work/queue", "~/ai-work/queue"},
{"~/ai-work//queue/", "~/ai-work/queue"},
{"~", "~"},
{"/", "/"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
got, err := ValidateRemoteDir(tt.input)
require.NoError(t, err)
assert.Equal(t, tt.want, got)
})
}
}
func TestValidateRemoteDir_Bad(t *testing.T) {
tests := []struct {
name string
input string
}{
{"empty", ""},
{"relative", "queue"},
{"shell metachar", "/tmp/queue; touch /tmp/pwned"},
{"space", "/tmp/queue name"},
{"traversal", "~/../queue"},
{"newline", "/tmp/queue\nother"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := ValidateRemoteDir(tt.input)
assert.Error(t, err)
})
}
}
func TestJoinRemotePath_Good(t *testing.T) {
tests := []struct {
name string
base string
elem string
want string
}{
{"absolute", "/tmp/queue", "ticket.json", "/tmp/queue/ticket.json"},
{"home relative", "~/ai-work/queue", "ticket.json", "~/ai-work/queue/ticket.json"},
{"home root", "~", ".env.ticket", "~/.env.ticket"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := JoinRemotePath(tt.base, tt.elem)
require.NoError(t, err)
assert.Equal(t, tt.want, got)
})
}
}
func TestJoinRemotePath_Bad(t *testing.T) {
_, err := JoinRemotePath("/tmp/queue", "../ticket.json")
assert.Error(t, err)
}
func TestEscapeShellArg_Good(t *testing.T) {
tests := []struct {
input string

View file

@ -0,0 +1,199 @@
# [scan] Security attack vector mapping — Implementation Plan
> For Codex agent: this document is the issue body to execute next.
## Goal
Map every external input entry point across `core/go-scm` and produce a complete attack-vector matrix with evidence and validation status.
## Scope
- Repository: `core/go-scm`
- Language: Go
- Branch: `agent/create-an-implementation-plan-for-this-s`
- Target files: all non-test Go source files
## 1) Every file to scan
Use this exact list as the scan set.
- `agentci/clotho.go`
- `agentci/config.go`
- `agentci/security.go`
- `cmd/collect/cmd.go`
- `cmd/collect/cmd_bitcointalk.go`
- `cmd/collect/cmd_dispatch.go`
- `cmd/collect/cmd_excavate.go`
- `cmd/collect/cmd_github.go`
- `cmd/collect/cmd_market.go`
- `cmd/collect/cmd_papers.go`
- `cmd/collect/cmd_process.go`
- `cmd/forge/cmd_auth.go`
- `cmd/forge/cmd_config.go`
- `cmd/forge/cmd_forge.go`
- `cmd/forge/cmd_issues.go`
- `cmd/forge/cmd_labels.go`
- `cmd/forge/cmd_migrate.go`
- `cmd/forge/cmd_orgs.go`
- `cmd/forge/cmd_prs.go`
- `cmd/forge/cmd_repos.go`
- `cmd/forge/cmd_status.go`
- `cmd/forge/cmd_sync.go`
- `cmd/forge/helpers.go`
- `cmd/gitea/cmd_config.go`
- `cmd/gitea/cmd_gitea.go`
- `cmd/gitea/cmd_issues.go`
- `cmd/gitea/cmd_mirror.go`
- `cmd/gitea/cmd_prs.go`
- `cmd/gitea/cmd_repos.go`
- `cmd/gitea/cmd_sync.go`
- `cmd/scm/cmd_compile.go`
- `cmd/scm/cmd_export.go`
- `cmd/scm/cmd_index.go`
- `cmd/scm/cmd_scm.go`
- `collect/bitcointalk.go`
- `collect/collect.go`
- `collect/events.go`
- `collect/excavate.go`
- `collect/github.go`
- `collect/market.go`
- `collect/papers.go`
- `collect/process.go`
- `collect/ratelimit.go`
- `collect/state.go`
- `forge/client.go`
- `forge/config.go`
- `forge/issues.go`
- `forge/labels.go`
- `forge/meta.go`
- `forge/orgs.go`
- `forge/prs.go`
- `forge/repos.go`
- `forge/webhooks.go`
- `git/git.go`
- `git/service.go`
- `gitea/client.go`
- `gitea/config.go`
- `gitea/issues.go`
- `gitea/meta.go`
- `gitea/repos.go`
- `jobrunner/forgejo/signals.go`
- `jobrunner/forgejo/source.go`
- `jobrunner/handlers/completion.go`
- `jobrunner/handlers/dispatch.go`
- `jobrunner/handlers/enable_auto_merge.go`
- `jobrunner/handlers/publish_draft.go`
- `jobrunner/handlers/resolve_threads.go`
- `jobrunner/handlers/send_fix_command.go`
- `jobrunner/handlers/tick_parent.go`
- `jobrunner/journal.go`
- `jobrunner/poller.go`
- `jobrunner/types.go`
- `locales/embed.go`
- `manifest/compile.go`
- `manifest/loader.go`
- `manifest/manifest.go`
- `manifest/sign.go`
- `marketplace/builder.go`
- `marketplace/discovery.go`
- `marketplace/installer.go`
- `marketplace/marketplace.go`
- `pkg/api/embed.go`
- `pkg/api/provider.go`
- `plugin/config.go`
- `plugin/installer.go`
- `plugin/loader.go`
- `plugin/manifest.go`
- `plugin/plugin.go`
- `plugin/registry.go`
- `repos/gitstate.go`
- `repos/kbconfig.go`
- `repos/registry.go`
- `repos/workconfig.go`
## 2) What to look for in each file
### A) External input entry points
- CLI entry arguments and flags
- CLI env-config precedence (`config`, `FORGE_*`, `GITEA_*`)
- HTTP request handlers, webhook payloads, request headers, params, and body
- Filesystem paths and filenames supplied as config/flags/env/user values
- External URLs, repo names, owner/org names, branch names
- SSH command inputs and key material
- Collector source inputs (search terms, query terms, package names, IDs)
- Job signals/labels/checklists/issue fields/command payloads
### B) Validation gaps to record
- Missing/weak allowlists for path segments and IDs
- Missing canonicalisation of URLs/paths/refs before use
- Incomplete enum/type validation (state, status, action)
- Unsafe conversions/casts without bounds checks
- Insufficient escaping for shell, file, JSON, or command composition
- Trusting external API responses without schema checks
- Ignoring error paths that should halt processing
### C) Injection vectors
- OS command construction
- Path traversal / arbitrary file write/read
- Log forging / token leak via logging
- SSRF via configurable URLs and webhooks
- Template/HTML injection through rendered output paths
- SQL-like or LDAP-like interpolation (where backend uses query strings)
- Git command argument injection and branch/ref injection
- Header injection in HTTP clients/servers
### D) Race condition risks
- Shared mutable state in handlers and services
- Journal writes/read/write-back without coordination
- Async polling loops sharing token/counter/state
- Map/slice writes from worker goroutines
- TOCTOU around file presence/load/read-modify-write sequences
- Cache + state refresh races under concurrent polling
## 3) Required output format for findings
Each finding row must contain all columns in this exact order:
`file:line | input source | flows into | validation | attack vector`
Example format:
`collect/github.go:142 | cmd flag `--org` (string) | buildGitHubCollector(config) -> net/http request URL | domain allowlist absent | SSRF + data exfil by domain override`
### Minimal per-row capture fields
- `file:line` (primary function where input first enters)
- `input source` (flag/env/body/path/header/id)
- `flows into` (target function/call chain)
- `validation` (what checks currently exist, if any)
- `attack vector` (confidentiality/integrity/availability risk)
### Evidence fields (optional but preferred)
- CWE id
- Repro path
- severity (`low`/`medium`/`high`)
- confidence (`high`/`med`/`low`)
- mitigation candidate
## 4) Where to write the report
Primary report file to produce:
- `docs/security/scan-attack-vector-mapping-report.md`
This file must contain:
1. The completed matrix in the required format above
2. Deduplicated list of attack vectors with severity
3. Verification status (`mapped`, `validated`, `open`
)
4. A final summary by subsystem and risk priority
This issue body should be replaced with a pointer plus a short runbook:
- "Execution plan in `docs/plans/2026-03-24-security-attack-vector-mapping.md`, results in `docs/security/scan-attack-vector-mapping-report.md`."
## Execution order
- [x] 1. Create report file and add header + schema columns
- [x] 2. Scan files in package order listed above
- [x] 3. For each file, capture every external input entry point and map to sinks
- [x] 4. Populate one row per mapped flow in the required `file:line | ...` format
- [x] 5. Cross-check for duplicates and deduplicate by identical sink and attack vector
- [x] 6. Add severity and validation gap notes per row
- [x] 7. Finalise summary + high-priority follow-up list

View file

@ -0,0 +1,134 @@
# Security Attack Vector Mapping Report
Execution plan in `docs/plans/2026-03-24-security-attack-vector-mapping.md`, results in `docs/security/scan-attack-vector-mapping-report.md`.
- Issue: `[scan] Security attack vector mapping`
- Repo: `core/go-scm`
- Branch: `agent/create-an-implementation-plan-for-this-s`
- Date: `2026-03-24`
- Agent: `Codex`
## Findings Matrix
Status meanings:
- `open`: validation gap or exploitable flow remains.
- `validated`: explicit hardening exists and was verified in code.
- `mapped`: flow is real, but the current risk is limited or mostly delegated to downstream tools.
```text
file:line | input source | flows into | validation | attack vector | severity | confidence | verification
agentci/config.go:34 | config file `agentci.agents.*` (`host`, `queue_dir`, `forgejo_user`, models, runner) | `LoadAgents()` -> `Spinner.FindByForgejoUser()` -> `jobrunner/handlers.DispatchHandler` SSH target selection | only presence/default checks; queue dir is validated later, host is never allowlisted | SSH target manipulation / unauthorised agent routing if config is attacker-controlled | medium | med | mapped
cmd/collect/cmd.go:56 | CLI flag `--output` | `newConfig()` -> `collect.NewConfigWithMedium()` -> `cfg.OutputDir` and `.collect-state.json` | no canonicalisation or root policy | arbitrary local file write/read root for all collectors and processors | low | high | open
collect/bitcointalk.go:64 | positional `<topic-id>` via `cmd/collect/cmd_bitcointalk.go` | `BitcoinTalkCollector.Collect()` -> `filepath.Join(cfg.OutputDir, "bitcointalk", topicID, "posts")` -> `EnsureDir`/`Write` | only non-empty topic ID required | path traversal / arbitrary file write | high | high | open
collect/bitcointalk.go:89 | positional `<topic-id>` via `cmd/collect/cmd_bitcointalk.go` | `fmt.Sprintf("https://bitcointalk.org/index.php?topic=%s.%d", topicID, offset)` -> `http.NewRequestWithContext` | no query escaping or numeric check | malformed request / same-host request steering | medium | med | open
collect/github.go:123 | CLI `org/repo`, `--org`, or `excavate <project>` fallback org | `exec.CommandContext("gh", "repo", "list", g.Org, ...)` | no org format validation; `exec.CommandContext` blocks shell metachar expansion | scope expansion / excessive API enumeration; low injection surface | low | med | mapped
collect/github.go:179 | CLI `org/repo` and GitHub repo names from `gh repo list` | `filepath.Join(cfg.OutputDir, "github", g.Org, repo, "issues")` -> `EnsureDir`/`Write` | no path-segment validation on `g.Org` or `repo` | path traversal / arbitrary file write | high | high | open
collect/github.go:238 | CLI `org/repo` and GitHub repo names from `gh repo list` | `filepath.Join(cfg.OutputDir, "github", g.Org, repo, "pulls")` -> `EnsureDir`/`Write` | no path-segment validation on `g.Org` or `repo` | path traversal / arbitrary file write | high | high | open
collect/github.go:284 | GitHub API `issue.body` / `pr.body` | `formatIssueMarkdown()` -> markdown files under `cfg.Output` | no sanitisation before persistence | stored markdown/HTML/prompt injection in downstream renderers or LLM ingestion | medium | high | open
collect/market.go:84 | CLI `<coin>` via `cmd/collect/cmd_market.go` | `filepath.Join(cfg.OutputDir, "market", m.CoinID)` -> `EnsureDir` | only non-empty `CoinID` check | path traversal / arbitrary file write | high | high | open
collect/market.go:132 | CLI `<coin>` via `cmd/collect/cmd_market.go` | `fmt.Sprintf("%s/coins/%s", coinGeckoBaseURL, m.CoinID)` -> `fetchJSON()` | no path escaping or allowlist for `CoinID` | same-host endpoint injection / SSRF-style path steering | medium | high | open
collect/market.go:184 | CLI `<coin>` and `--from` | `fmt.Sprintf("%s/coins/%s/market_chart?vs_currency=usd&days=%s", ...)` -> `fetchJSON()` | `FromDate` parsing bounds `days`; `CoinID` remains unescaped | same-host endpoint injection / excessive upstream query shaping | medium | high | open
collect/papers.go:113 | CLI `--query` | `url.QueryEscape(p.Query)` -> IACR search request | query is escaped before request construction | query injection blocked; residual risk is upstream content trust only | low | high | validated
collect/papers.go:145 | IACR HTML `href` values parsed from remote search results | `parseIACREntry()` -> derived `ppr.ID` -> `filepath.Join(baseDir, ppr.ID+".md")` | no filename sanitisation on remote-derived ID | path traversal / arbitrary file write if upstream HTML is hostile or compromised | high | med | open
collect/papers.go:199 | CLI `--query` and `--category` | `url.QueryEscape(...)` -> arXiv API request, then `arxivEntryToPaper()` replaces `/` and `:` in IDs | query and category are escaped; arXiv IDs are partially canonicalised | request injection largely blocked; residual risk is remote content trust | low | high | validated
collect/process.go:50 | CLI `<dir>` via `cmd/collect/cmd_process.go` | `cfg.Output.List(p.Dir)` and `cfg.Output.Read(filepath.Join(p.Dir, name))` | no root containment or canonicalisation of `p.Dir` | arbitrary local read/list when using `io.Local` medium | high | high | open
collect/process.go:55 | CLI `<source>` via `cmd/collect/cmd_process.go` | `filepath.Join(cfg.OutputDir, "processed", p.Source)` -> `EnsureDir`/`Write` | no path validation on `p.Source` | path traversal / arbitrary file write | high | high | open
collect/process.go:214 | source HTML `href` attributes from files under `p.Dir` | `nodeToMarkdown()` -> `[text](href)` persisted to markdown | no scheme allowlist or URL sanitisation | stored `javascript:` / `data:` link injection | medium | high | open
collect/ratelimit.go:44 | concurrent caller-controlled source keys | `RateLimiter.Wait()` -> shared `last[source]` map | mutex released before slot reservation; no second eligibility check after sleep | burst bypass / API quota exhaustion / availability degradation | medium | high | open
forge/config.go:40 | config file `forge.url`, `forge.token`, env `FORGE_URL`, `FORGE_TOKEN`, CLI overrides | `ResolveConfig()` -> `forge.New()` -> Forgejo SDK client | token required, URL not allowlisted or canonicalised | SSRF / credential exfiltration to attacker-controlled Forgejo endpoint | high | high | open
cmd/forge/cmd_migrate.go:46 | CLI `<clone-url>`, `--service`, `--token`, `--org` | `runMigrate()` -> `forge.Client.MigrateRepo()` with `CloneAddr` and `AuthToken` | repo name is derived from URL tail only; clone URL itself is not constrained | server-side clone SSRF / internal repo import / credential misuse | high | high | open
cmd/forge/cmd_sync.go:95 | CLI repo args and `--base-path` | `syncRepoNameFromArg()` + `ResolvePathWithinRoot()` -> `git -C <localPath>` / `git push` | repo names validated via `ValidatePathElement`; local path confined to base path | path traversal mitigated; residual risk limited to untrusted Forge URL config | low | high | validated
gitea/config.go:40 | config file `gitea.url`, `gitea.token`, env `GITEA_URL`, `GITEA_TOKEN`, CLI overrides | `ResolveConfig()` -> `gitea.New()` -> Gitea SDK client | token required, URL not allowlisted or canonicalised | SSRF / credential exfiltration to attacker-controlled Gitea endpoint | high | high | open
cmd/gitea/cmd_mirror.go:45 | CLI `<github-owner/repo>` and optional GitHub token | `runMirror()` -> fixed `https://github.com/<owner>/<repo>.git` -> `gitea.CreateMirror()` | owner/repo are split, but not allowlisted; source token may be forwarded | server-side clone of attacker-chosen repo and token relay to remote Gitea | high | med | open
cmd/gitea/cmd_sync.go:95 | CLI repo args and `--base-path` | `repoNameFromArg()` + `ResolvePathWithinRoot()` -> `git -C <localPath>` / `git push` | repo names validated via `ValidatePathElement`; local path confined to base path | path traversal mitigated; residual risk limited to untrusted Gitea URL config | low | high | validated
jobrunner/forgejo/signals.go:16 | epic issue body checklist text | `parseEpicChildren()` -> child issue numbers -> `pollRepo()` dispatch decisions | regex only checks `#<digits>`; no repository or ownership cross-check | workflow spoofing / unintended child issue dispatch | medium | high | open
jobrunner/forgejo/signals.go:36 | PR body text | `findLinkedPR()` -> child issue association -> downstream handler execution | first `#<n>` match wins; no keyword, owner, or repo validation | PR-body spoofing causes wrong issue state transitions | medium | high | open
jobrunner/forgejo/source.go:72 | handler `result.Error` strings | `Report()` -> Forgejo issue comment code block | no sanitisation, truncation, or mention filtering | comment injection / mention spam / log forging of pipeline output | medium | high | open
jobrunner/handlers/dispatch.go:89 | agent config `queue_dir` plus `signal.RepoOwner` / `signal.RepoName` | `Execute()` -> remote queue path building and Forgejo label/comment operations | remote dir is validated; repo owner/name still pass through `SanitizePath(filepath.Base(...))` | retargeting/confusion if a forged signal supplies slash-containing names | medium | med | open
jobrunner/handlers/dispatch.go:151 | issue title/body, assignee, repo metadata from `PipelineSignal` | `DispatchTicket` JSON -> remote agent queue file | JSON encoding only; content is not filtered, signed, or policy-checked | prompt injection / remote agent control-surface abuse | high | high | open
jobrunner/handlers/dispatch.go:212 | local Forge token | `.env.<ticketID>` content -> remote worker filesystem | remote path validated and file mode set to `0600` after transfer | credential exposure on remote worker host, snapshots, or backups | medium | high | mapped
jobrunner/handlers/dispatch.go:255 | runtime `reason` strings and raw `signal.RepoOwner` / `signal.RepoName` | `failDispatch()` -> labels and issue comments on Forgejo | no sanitisation; fail path uses raw signal repo fields | comment injection and repo-retargeting if upstream signal is forged | medium | med | open
jobrunner/handlers/completion.go:58 | agent completion `signal.Message` / `signal.Error` | `CreateIssueComment()` on child issue | no sanitisation, truncation, or mention filtering | stored comment injection / mention spam / log forging | medium | high | open
jobrunner/journal.go:130 | `signal.RepoOwner` / `signal.RepoName` | `Append()` -> `<base>/<owner>/<repo>/<date>.jsonl` | strict component regex, absolute-prefix containment check, and mutexed writes | path traversal blocked; safe write pattern verified | low | high | validated
git/service.go:72 | concurrent query/task caller inputs `Paths` and service calls | `handleQuery()` / `handleTask()` -> shared `s.lastStatus` cache | no mutex or atomic protection around `lastStatus` | data race / stale or torn state under concurrent queries | medium | high | open
cmd/scm/cmd_compile.go:54 | CLI `--sign-key` hex string | `hex.DecodeString()` -> `manifest.Compile()` -> `manifest.Sign()` -> `ed25519.Sign()` | hex decoding only; no private key length check | panic-triggered DoS via invalid Ed25519 private key length | high | high | open
cmd/scm/cmd_export.go:28 | CLI `--dir` | `io.NewSandboxed(dir)` -> `manifest.LoadCompiled()` / `manifest.Load()` -> stdout | sandboxed medium confines access to the chosen root | local file disclosure limited to caller-selected directory | low | high | mapped
cmd/scm/cmd_index.go:39 | CLI `--dir`, `--output`, `--base-url`, `--org` | `marketplace.Builder.BuildFromDirs()` / `marketplace.WriteIndex()` | no path or URL validation beyond OS errors | arbitrary local read/write and untrusted repo URL generation in produced index | medium | high | open
pkg/api/provider.go:202 | HTTP query params `q` and `category` | `listMarketplace()` -> in-memory `Index.Search()` / `ByCategory()` | no server-side escaping needed; no filesystem or outbound request sink | low-risk search-scope manipulation only | low | high | mapped
pkg/api/provider.go:466 | HTTP path param `:code` | `marketplaceCodeParam()` -> install/update/remove lookups | `PathUnescape()` + `ValidatePathElement()` | path traversal blocked for marketplace code paths | low | high | validated
pkg/api/provider.go:309 | HTTP JSON body `public_key` | `hex.DecodeString()` -> `manifest.Verify()` -> `ed25519.Verify()` | hex decoding only; no public key length check | panic-triggered DoS via malformed Ed25519 public key | high | high | open
pkg/api/provider.go:346 | HTTP JSON body `private_key` | `hex.DecodeString()` -> `manifest.Sign()` -> `ed25519.Sign()` | hex decoding only; no private key length check | panic-triggered DoS via malformed Ed25519 private key | high | high | open
marketplace/installer.go:50 | marketplace index `Module.Code` and `Module.Repo` | `resolveModulePath()` -> `git clone <repo> <dest>` -> manifest load/store | module code is now confined with `ResolvePathWithinRoot`; repo URL is unvalidated | arbitrary git transport / SSRF / local-path clone from untrusted marketplace index | high | high | open
marketplace/installer.go:195 | marketplace index `Module.SignKey` | `hex.DecodeString()` -> `manifest.LoadVerified()` -> `ed25519.Verify()` | hex decoding only; no public key length check | panic-triggered DoS during install/update from malformed marketplace metadata | high | high | open
plugin/installer.go:181 | plugin source `org/repo@version` | `ParseSource()` -> registry name/path selection -> clone arguments | org and repo validated as path elements; version only checked for non-empty string | branch/ref abuse and unexpected checkout target selection | medium | med | mapped
plugin/installer.go:165 | plugin `@version` suffix | `gh repo clone <org/repo> <dest> -- --branch <version>` | `exec.CommandContext` blocks shell injection; no ref allowlist | malicious ref names can force unexpected refs/tags or failure loops | medium | med | open
plugin/loader.go:51 | caller-supplied plugin name | `filepath.Join(baseDir, name, "plugin.json")` -> `LoadManifest()` | no validation on `name` | path traversal / arbitrary local file read | high | high | open
repos/registry.go:66 | caller-supplied registry path | `io.Medium.Read(path)` -> YAML parse | no path validation | arbitrary local file read via caller-chosen registry path | medium | high | open
repos/registry.go:81 | YAML `base_path` and per-repo `path` values | expanded `Repo.Path` -> later `Exists()`, `IsGitRepo()`, git config probing | tilde expansion only; no root confinement | workspace escape / probing of arbitrary directories from untrusted registry | medium | high | open
repos/kbconfig.go:69 | caller-supplied workspace root | `.core/kb.yaml` read/write | no canonicalisation beyond join | arbitrary local read/write under caller-selected root | low | high | mapped
repos/kbconfig.go:110 | KB config `Wiki.Remote`, `Wiki.Dir`, and caller-supplied `repoName` | `WikiRepoURL()` and `WikiLocalPath()` | no validation of remote URL, local subdir, or repo name | path traversal in local wiki path and clone redirection to attacker-chosen remote | medium | high | open
```
## Deduplicated Attack Vectors
- `high` Path traversal and arbitrary local read/write from unsanitised path joins in `collect/`, `plugin/loader.go`, `repos/registry.go`, and `repos/kbconfig.go`.
- `high` SSRF, server-side clone, and credential exfiltration via unvalidated Forgejo/Gitea base URLs, migration clone URLs, and marketplace repo URLs.
- `high` Panic-triggered availability loss from unchecked Ed25519 key lengths in CLI and HTTP signing/verification flows.
- `high` Prompt injection and remote worker influence via raw issue title/body content written into dispatch tickets.
- `medium` Stored markdown/comment injection via GitHub bodies, processed HTML links, jobrunner error comments, and completion messages.
- `medium` Workflow spoofing from weak issue/PR text parsing in `jobrunner/forgejo/signals.go`.
- `medium` Branch/ref steering and unexpected checkout behaviour in plugin installation.
- `medium` Concurrency races in `collect/ratelimit.go` and `git/service.go`.
- `low` Broad CLI-selected filesystem scope in `cmd/collect`, `cmd/scm export`, and `repos/kbconfig.go` remains expected but should not be delegated to lower-trust callers.
- `low` Verified mitigations exist for journal path safety, marketplace code path validation, and sync local-path confinement.
## Verification Status
- Total mapped flows: `50`
- `open`: `37`
- `validated`: `6`
- `mapped`: `7`
- High severity: `18`
- Medium severity: `20`
- Low severity: `12`
## Subsystem Summary
- `P1 collect/`: the highest concentration of exploitable issues is in collector output path construction and persistence of raw third-party content. `collect/github.go`, `collect/market.go`, `collect/bitcointalk.go`, `collect/papers.go`, and `collect/process.go` all need path and content hardening.
- `P1 forge/gitea/marketplace/pkg/api`: base URL trust, migration/mirror clone sources, and Ed25519 key handling are the main confidentiality and availability risks. These are central control-plane paths and should be fixed before widening API exposure.
- `P2 jobrunner/`: epic/PR text parsing, remote dispatch ticket generation, and comment reporting create workflow spoofing and prompt-injection surfaces. The journal implementation is the notable exception and is already hardened.
- `P2 workspace/plugin/repos/git`: plugin loading, registry path handling, wiki path generation, and shared git-service state carry medium-risk filesystem and race-condition exposure.
- `P3 validated controls`: `jobrunner/journal.go`, `cmd/forge/cmd_sync.go`, `cmd/gitea/cmd_sync.go`, and `pkg/api/provider.go` marketplace code normalisation are solid reference implementations for future hardening work.
## High-Priority Follow-Up
- Replace every collector and loader `filepath.Join(..., userValue, ...)` call with strict path-element validation before directory creation or file reads/writes.
- Add scheme, host, and canonicalisation allowlists for Forgejo/Gitea base URLs, migration clone URLs, marketplace repo URLs, and wiki remotes.
- Reject invalid Ed25519 key lengths before calling `ed25519.Sign` or `ed25519.Verify` in CLI, HTTP, and installer code paths.
- Treat dispatch tickets as untrusted agent prompts: strip or isolate raw issue bodies, add provenance metadata, and avoid shipping long-lived Forge tokens to remote workers where possible.
- Harden `jobrunner/forgejo` linking logic so epic children and PR references must match explicit repo/issue relationships, not bare `#123` text.
- Sanitize or encode user-visible comments and stored markdown links before writing them to Forgejo or output files.
- Fix `collect.RateLimiter.Wait` and `git.Service.lastStatus` with proper reservation and synchronisation.
## Scan Coverage
The full scan set from the issue plan was reviewed in package order. Files below either map directly to the matrix above or are forward-only wrappers/helpers with no additional unique external entry point beyond the flows already captured.
- `agentci`: `clotho.go` (strategy helper only), `config.go` (matrix), `security.go` (validation utilities only).
- `cmd/collect`: `cmd.go` (matrix), `cmd_bitcointalk.go`, `cmd_excavate.go`, `cmd_github.go`, `cmd_market.go`, `cmd_papers.go`, `cmd_process.go` (all covered by `collect/*` rows), `cmd_dispatch.go` (local event enum validation only; no command execution sink).
- `cmd/forge`: `cmd_auth.go` and `cmd_config.go` feed the `forge/config.go` row; `cmd_migrate.go` and `cmd_sync.go` are in the matrix; `cmd_forge.go`, `cmd_issues.go`, `cmd_labels.go`, `cmd_orgs.go`, `cmd_prs.go`, `cmd_repos.go`, `cmd_status.go`, `helpers.go` are CLI/API wrappers only.
- `cmd/gitea`: `cmd_config.go` feeds the `gitea/config.go` row; `cmd_mirror.go` and `cmd_sync.go` are in the matrix; `cmd_gitea.go`, `cmd_issues.go`, `cmd_prs.go`, `cmd_repos.go` are CLI/API wrappers only.
- `cmd/scm`: `cmd_compile.go`, `cmd_export.go`, `cmd_index.go` are in the matrix; `cmd_scm.go` is a root command wrapper only.
- `collect`: `bitcointalk.go`, `collect.go`, `github.go`, `market.go`, `papers.go`, `process.go`, `ratelimit.go` are in the matrix; `events.go` and `excavate.go` add no new sink beyond already-mapped config and collector flows; `state.go` is subsumed by the shared `--output` path row.
- `forge`: `client.go`, `config.go`, `repos.go` are in the matrix; `issues.go`, `labels.go`, `meta.go`, `orgs.go`, `prs.go`, `webhooks.go` forward typed inputs into the SDK without additional local command/path construction.
- `git`: `service.go` is in the matrix; `git.go` is command-wrapper logic without additional external entry surface beyond caller-supplied repo paths.
- `gitea`: `client.go`, `config.go`, `repos.go` are in the matrix; `issues.go` and `meta.go` are SDK wrappers only.
- `jobrunner/forgejo`: `signals.go` and `source.go` are in the matrix.
- `jobrunner/handlers`: `completion.go` and `dispatch.go` are in the matrix; `enable_auto_merge.go`, `publish_draft.go`, `resolve_threads.go`, `send_fix_command.go`, and `tick_parent.go` consume already-mapped `PipelineSignal` fields and emit fixed API calls/messages only.
- `jobrunner`: `journal.go` is in the matrix; `poller.go` and `types.go` define orchestration and data structures without introducing new external input parsing.
- `locales`: `embed.go` contains no runtime external input handling.
- `manifest`: `compile.go` and `sign.go` are covered through `cmd/scm` and `pkg/api/provider.go`; `loader.go` and `manifest.go` parse fixed project-local files only.
- `marketplace`: `builder.go` and `installer.go` are in the matrix; `discovery.go` and `marketplace.go` are read-only helpers around already-mapped local filesystem or in-memory index flows.
- `pkg/api`: `provider.go` is in the matrix; `embed.go` contains no external input handling.
- `plugin`: `installer.go`, `loader.go`, and `registry.go` are in the matrix; `config.go`, `manifest.go`, and `plugin.go` define data structures and validation helpers only.
- `repos`: `kbconfig.go` and `registry.go` are in the matrix; `gitstate.go` and `workconfig.go` are config/struct helpers without new sink construction.