cli/internal/cmd/docs/cmd_scan.go

142 lines
3.4 KiB
Go
Raw Normal View History

package docs
import (
"io/fs"
"os"
"path/filepath"
"strings"
feat(mcp): add workspace root validation to prevent path traversal (#100) * feat(mcp): add workspace root validation to prevent path traversal - Add workspaceRoot field to Service for restricting file operations - Add WithWorkspaceRoot() option for configuring the workspace directory - Add validatePath() helper to check paths are within workspace - Apply validation to all file operation handlers - Default to current working directory for security - Add comprehensive tests for path validation Closes #82 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor: move CLI commands from pkg/ to internal/cmd/ - Move 18 CLI command packages to internal/cmd/ (not externally importable) - Keep 16 library packages in pkg/ (externally importable) - Update all import paths throughout codebase - Cleaner separation between CLI logic and reusable libraries CLI commands moved: ai, ci, dev, docs, doctor, gitcmd, go, monitor, php, pkgcmd, qa, sdk, security, setup, test, updater, vm, workspace Libraries remaining: agentic, build, cache, cli, container, devops, errors, framework, git, i18n, io, log, mcp, process, release, repos Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor(mcp): use pkg/io Medium for sandboxed file operations Replace manual path validation with pkg/io.Medium for all file operations. This delegates security (path traversal, symlink bypass) to the sandboxed local.Medium implementation. Changes: - Add io.NewSandboxed() for creating sandboxed Medium instances - Refactor MCP Service to use io.Medium instead of direct os.* calls - Remove validatePath and resolvePathWithSymlinks functions - Update tests to verify Medium-based behaviour Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: correct import path and workflow references - Fix pkg/io/io.go import from core-gui to core - Update CI workflows to use internal/cmd/updater path Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(security): address CodeRabbit review issues for path validation - pkg/io/local: add symlink resolution and boundary-aware containment - Reject absolute paths in sandboxed Medium - Use filepath.EvalSymlinks to prevent symlink bypass attacks - Fix prefix check to prevent /tmp/root matching /tmp/root2 - pkg/mcp: fix resolvePath to validate and return errors - Changed resolvePath from (string) to (string, error) - Update deleteFile, renameFile, listDirectory, fileExists to handle errors - Changed New() to return (*Service, error) instead of *Service - Properly propagate option errors instead of silently discarding - pkg/io: wrap errors with E() helper for consistent context - Copy() and MockMedium.Read() now use coreerr.E() - tests: rename to use _Good/_Bad/_Ugly suffixes per coding guidelines - Fix hardcoded /tmp in TestPath to use t.TempDir() - Add TestResolvePath_Bad_SymlinkTraversal test Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * style: fix gofmt formatting Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * style: fix gofmt formatting across all files Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 21:59:34 +00:00
"github.com/host-uk/core/internal/cmd/workspace"
"github.com/host-uk/core/pkg/cli"
"github.com/host-uk/core/pkg/i18n"
feat: Batch implementation of Gemini issues (#176) * feat(help): Add CLI help command Fixes #136 * chore: remove binary * feat(mcp): Add TCP transport Fixes #126 * feat(io): Migrate pkg/mcp to use Medium abstraction Fixes #103 * chore(io): Migrate internal/cmd/docs/* to Medium abstraction Fixes #113 * chore(io): Migrate internal/cmd/dev/* to Medium abstraction Fixes #114 * chore(io): Migrate internal/cmd/setup/* to Medium abstraction * chore(io): Complete migration of internal/cmd/dev/* to Medium abstraction * chore(io): Migrate internal/cmd/sdk, pkgcmd, and workspace to Medium abstraction * style: fix formatting in internal/variants Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor(io): simplify local Medium implementation Rewrote to match the simpler TypeScript pattern: - path() sanitizes and returns string directly - Each method calls path() once - No complex symlink validation - Less code, less attack surface Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test(mcp): update sandboxing tests for simplified Medium The simplified io/local.Medium implementation: - Sanitizes .. to . (no error, path is cleaned) - Allows absolute paths through (caller validates if needed) - Follows symlinks (no traversal blocking) Update tests to match this simplified behavior. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(updater): resolve PkgVersion duplicate declaration Remove var PkgVersion from updater.go since go generate creates const PkgVersion in version.go. Track version.go in git to ensure builds work without running go generate first. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 04:20:18 +00:00
"github.com/host-uk/core/pkg/io"
"github.com/host-uk/core/pkg/repos"
)
// RepoDocInfo holds documentation info for a repo
type RepoDocInfo struct {
Name string
Path string
HasDocs bool
Readme string
ClaudeMd string
Changelog string
DocsFiles []string // All files in docs/ directory (recursive)
}
func loadRegistry(registryPath string) (*repos.Registry, string, error) {
var reg *repos.Registry
var err error
var registryDir string
if registryPath != "" {
reg, err = repos.LoadRegistry(registryPath)
if err != nil {
return nil, "", cli.Wrap(err, i18n.T("i18n.fail.load", "registry"))
}
registryDir = filepath.Dir(registryPath)
} else {
registryPath, err = repos.FindRegistry()
if err == nil {
reg, err = repos.LoadRegistry(registryPath)
if err != nil {
return nil, "", cli.Wrap(err, i18n.T("i18n.fail.load", "registry"))
}
registryDir = filepath.Dir(registryPath)
} else {
cwd, _ := os.Getwd()
reg, err = repos.ScanDirectory(cwd)
if err != nil {
return nil, "", cli.Wrap(err, i18n.T("i18n.fail.scan", "directory"))
}
registryDir = cwd
}
}
// Load workspace config to respect packages_dir
wsConfig, err := workspace.LoadConfig(registryDir)
if err != nil {
return nil, "", cli.Wrap(err, i18n.T("i18n.fail.load", "workspace config"))
}
basePath := registryDir
if wsConfig != nil && wsConfig.PackagesDir != "" && wsConfig.PackagesDir != "./packages" {
pkgDir := wsConfig.PackagesDir
feat: git command, build improvements, and go fmt git-aware (#74) * feat(go): make go fmt git-aware by default - By default, only check changed Go files (modified, staged, untracked) - Add --all flag to check all files (previous behaviour) - Reduces noise when running fmt on large codebases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(build): minimal output by default, add missing i18n - Default output now shows single line: "Success Built N artifacts (dir)" - Add --verbose/-v flag to show full detailed output - Add all missing i18n translations for build commands - Errors still show failure reason in minimal mode Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat: add root-level `core git` command - Create pkg/gitcmd with git workflow commands as root menu - Export command builders from pkg/dev (AddCommitCommand, etc.) - Commands available under both `core git` and `core dev` for compatibility - Git commands: health, commit, push, pull, work, sync, apply - GitHub orchestration stays in dev: issues, reviews, ci, impact Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(qa): add docblock coverage checking Implement docblock/docstring coverage analysis for Go code: - New `core qa docblock` command to check coverage - Shows compact file:line list when under threshold - Integrate with `core go qa` as a default check - Add --docblock-threshold flag (default 80%) The checker uses Go AST parsing to find exported symbols (functions, types, consts, vars) without documentation. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: address CodeRabbit review feedback - Fix doc comment: "status" → "health" in gitcmd package - Implement --check flag for `core go fmt` (exits non-zero if files need formatting) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * docs: add docstrings for 100% coverage Add documentation comments to all exported symbols: - pkg/build: ProjectType constants - pkg/cli: LogLevel, RenderStyle, TableStyle - pkg/framework: ServiceFor, MustServiceFor, Core.Core - pkg/git: GitError.Error, GitError.Unwrap - pkg/i18n: Handler Match/Handle methods - pkg/log: Level constants - pkg/mcp: Tool input/output types - pkg/php: Service constants, QA types, service methods - pkg/process: ServiceError.Error - pkg/repos: RepoType constants - pkg/setup: ChangeType, ChangeCategory constants - pkg/workspace: AddWorkspaceCommands Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * chore: standardize line endings to LF Add .gitattributes to enforce LF line endings for all text files. Normalize all existing files to use Unix-style line endings. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: address CodeRabbit review feedback - cmd_format.go: validate --check/--fix mutual exclusivity, capture stderr - cmd_docblock.go: return error instead of os.Exit(1) for proper error handling Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: address CodeRabbit review feedback (round 2) - linuxkit.go: propagate state update errors, handle cmd.Wait() errors in waitForExit - mcp.go: guard against empty old_string in editDiff to prevent runaway edits - cmd_docblock.go: log parse errors instead of silently skipping Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 10:48:44 +00:00
// Expand ~
if strings.HasPrefix(pkgDir, "~/") {
home, _ := os.UserHomeDir()
pkgDir = filepath.Join(home, pkgDir[2:])
}
if !filepath.IsAbs(pkgDir) {
pkgDir = filepath.Join(registryDir, pkgDir)
}
basePath = pkgDir
// Update repo paths if they were relative to registry
// This ensures consistency when packages_dir overrides the default
reg.BasePath = basePath
for _, repo := range reg.Repos {
repo.Path = filepath.Join(basePath, repo.Name)
}
}
return reg, basePath, nil
}
func scanRepoDocs(repo *repos.Repo) RepoDocInfo {
info := RepoDocInfo{
Name: repo.Name,
Path: repo.Path,
}
// Check for README.md
readme := filepath.Join(repo.Path, "README.md")
feat: Batch implementation of Gemini issues (#176) * feat(help): Add CLI help command Fixes #136 * chore: remove binary * feat(mcp): Add TCP transport Fixes #126 * feat(io): Migrate pkg/mcp to use Medium abstraction Fixes #103 * chore(io): Migrate internal/cmd/docs/* to Medium abstraction Fixes #113 * chore(io): Migrate internal/cmd/dev/* to Medium abstraction Fixes #114 * chore(io): Migrate internal/cmd/setup/* to Medium abstraction * chore(io): Complete migration of internal/cmd/dev/* to Medium abstraction * chore(io): Migrate internal/cmd/sdk, pkgcmd, and workspace to Medium abstraction * style: fix formatting in internal/variants Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor(io): simplify local Medium implementation Rewrote to match the simpler TypeScript pattern: - path() sanitizes and returns string directly - Each method calls path() once - No complex symlink validation - Less code, less attack surface Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test(mcp): update sandboxing tests for simplified Medium The simplified io/local.Medium implementation: - Sanitizes .. to . (no error, path is cleaned) - Allows absolute paths through (caller validates if needed) - Follows symlinks (no traversal blocking) Update tests to match this simplified behavior. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(updater): resolve PkgVersion duplicate declaration Remove var PkgVersion from updater.go since go generate creates const PkgVersion in version.go. Track version.go in git to ensure builds work without running go generate first. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 04:20:18 +00:00
if io.Local.IsFile(readme) {
info.Readme = readme
info.HasDocs = true
}
// Check for CLAUDE.md
claudeMd := filepath.Join(repo.Path, "CLAUDE.md")
feat: Batch implementation of Gemini issues (#176) * feat(help): Add CLI help command Fixes #136 * chore: remove binary * feat(mcp): Add TCP transport Fixes #126 * feat(io): Migrate pkg/mcp to use Medium abstraction Fixes #103 * chore(io): Migrate internal/cmd/docs/* to Medium abstraction Fixes #113 * chore(io): Migrate internal/cmd/dev/* to Medium abstraction Fixes #114 * chore(io): Migrate internal/cmd/setup/* to Medium abstraction * chore(io): Complete migration of internal/cmd/dev/* to Medium abstraction * chore(io): Migrate internal/cmd/sdk, pkgcmd, and workspace to Medium abstraction * style: fix formatting in internal/variants Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor(io): simplify local Medium implementation Rewrote to match the simpler TypeScript pattern: - path() sanitizes and returns string directly - Each method calls path() once - No complex symlink validation - Less code, less attack surface Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test(mcp): update sandboxing tests for simplified Medium The simplified io/local.Medium implementation: - Sanitizes .. to . (no error, path is cleaned) - Allows absolute paths through (caller validates if needed) - Follows symlinks (no traversal blocking) Update tests to match this simplified behavior. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(updater): resolve PkgVersion duplicate declaration Remove var PkgVersion from updater.go since go generate creates const PkgVersion in version.go. Track version.go in git to ensure builds work without running go generate first. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 04:20:18 +00:00
if io.Local.IsFile(claudeMd) {
info.ClaudeMd = claudeMd
info.HasDocs = true
}
// Check for CHANGELOG.md
changelog := filepath.Join(repo.Path, "CHANGELOG.md")
feat: Batch implementation of Gemini issues (#176) * feat(help): Add CLI help command Fixes #136 * chore: remove binary * feat(mcp): Add TCP transport Fixes #126 * feat(io): Migrate pkg/mcp to use Medium abstraction Fixes #103 * chore(io): Migrate internal/cmd/docs/* to Medium abstraction Fixes #113 * chore(io): Migrate internal/cmd/dev/* to Medium abstraction Fixes #114 * chore(io): Migrate internal/cmd/setup/* to Medium abstraction * chore(io): Complete migration of internal/cmd/dev/* to Medium abstraction * chore(io): Migrate internal/cmd/sdk, pkgcmd, and workspace to Medium abstraction * style: fix formatting in internal/variants Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor(io): simplify local Medium implementation Rewrote to match the simpler TypeScript pattern: - path() sanitizes and returns string directly - Each method calls path() once - No complex symlink validation - Less code, less attack surface Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test(mcp): update sandboxing tests for simplified Medium The simplified io/local.Medium implementation: - Sanitizes .. to . (no error, path is cleaned) - Allows absolute paths through (caller validates if needed) - Follows symlinks (no traversal blocking) Update tests to match this simplified behavior. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(updater): resolve PkgVersion duplicate declaration Remove var PkgVersion from updater.go since go generate creates const PkgVersion in version.go. Track version.go in git to ensure builds work without running go generate first. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 04:20:18 +00:00
if io.Local.IsFile(changelog) {
info.Changelog = changelog
info.HasDocs = true
}
// Recursively scan docs/ directory for .md files
docsDir := filepath.Join(repo.Path, "docs")
feat: Batch implementation of Gemini issues (#176) * feat(help): Add CLI help command Fixes #136 * chore: remove binary * feat(mcp): Add TCP transport Fixes #126 * feat(io): Migrate pkg/mcp to use Medium abstraction Fixes #103 * chore(io): Migrate internal/cmd/docs/* to Medium abstraction Fixes #113 * chore(io): Migrate internal/cmd/dev/* to Medium abstraction Fixes #114 * chore(io): Migrate internal/cmd/setup/* to Medium abstraction * chore(io): Complete migration of internal/cmd/dev/* to Medium abstraction * chore(io): Migrate internal/cmd/sdk, pkgcmd, and workspace to Medium abstraction * style: fix formatting in internal/variants Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor(io): simplify local Medium implementation Rewrote to match the simpler TypeScript pattern: - path() sanitizes and returns string directly - Each method calls path() once - No complex symlink validation - Less code, less attack surface Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test(mcp): update sandboxing tests for simplified Medium The simplified io/local.Medium implementation: - Sanitizes .. to . (no error, path is cleaned) - Allows absolute paths through (caller validates if needed) - Follows symlinks (no traversal blocking) Update tests to match this simplified behavior. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(updater): resolve PkgVersion duplicate declaration Remove var PkgVersion from updater.go since go generate creates const PkgVersion in version.go. Track version.go in git to ensure builds work without running go generate first. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 04:20:18 +00:00
// Check if directory exists by listing it
if _, err := io.Local.List(docsDir); err == nil {
_ = filepath.WalkDir(docsDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return nil
}
// Skip plans/ directory
if d.IsDir() && d.Name() == "plans" {
return filepath.SkipDir
}
// Skip non-markdown files
if d.IsDir() || !strings.HasSuffix(d.Name(), ".md") {
return nil
}
// Get relative path from docs/
relPath, _ := filepath.Rel(docsDir, path)
info.DocsFiles = append(info.DocsFiles, relPath)
info.HasDocs = true
return nil
})
}
return info
}