Compare commits

..

5 commits

Author SHA1 Message Date
Snider
a77024aad4 feat(collect): add local directory collection
Add `borg collect local` command to collect files from the local
filesystem into a DataNode.

Features:
- Walks directory tree (defaults to CWD)
- Respects .gitignore patterns by default
- Excludes hidden files by default (--hidden to include)
- Custom exclude patterns via --exclude flag
- Output formats: datanode, tim, trix, stim
- Compression: none, gz, xz

Examples:
  borg collect local
  borg collect local ./src --output src.tar.xz --compression xz
  borg collect local . --format stim --password secret

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 07:12:10 +00:00
Snider
eae9de0cf6
Merge pull request #18 from Snider/dependabot/go_modules/golang.org/x/crypto-0.45.0
Bump golang.org/x/crypto from 0.44.0 to 0.45.0
2026-02-02 06:43:32 +00:00
Snider
6e38c4f3a6
Merge pull request #112 from Snider/copilot/combine-prs-into-one-update
[WIP] Combine multiple PRs into a single squash commit
2026-02-02 06:35:39 +00:00
copilot-swe-agent[bot]
c26d841b1b Initial plan 2026-02-02 05:36:04 +00:00
dependabot[bot]
b94ffbab5e
Bump golang.org/x/crypto from 0.44.0 to 0.45.0
Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.44.0 to 0.45.0.
- [Commits](https://github.com/golang/crypto/compare/v0.44.0...v0.45.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.45.0
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-20 02:44:02 +00:00
15 changed files with 438 additions and 935 deletions

View file

@ -1,10 +1,10 @@
package cmd
import (
"fmt"
"io"
"io/fs"
"net/url"
"os"
"strings"
@ -18,105 +18,98 @@ import (
"github.com/spf13/cobra"
)
var githubAllCmd = NewGithubAllCmd()
var allCmd = NewAllCmd()
func NewGithubAllCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "all <owner/repo>",
Short: "Collect all resources from a GitHub repository",
Long: `Collect all resources from a GitHub repository, including code, issues, and pull requests.`,
func NewAllCmd() *cobra.Command {
allCmd := &cobra.Command{
Use: "all [url]",
Short: "Collect all resources from a URL",
Long: `Collect all resources from a URL, dispatching to the appropriate collector based on the URL type.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
repoPath := args[0]
parts := strings.Split(repoPath, "/")
if len(parts) != 2 {
return fmt.Errorf("invalid repository path: %s (must be in the format <owner>/<repo>)", repoPath)
}
owner, repo := parts[0], parts[1]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("error getting output flag: %w", err)
}
format, err := cmd.Flags().GetString("format")
if err != nil {
return fmt.Errorf("error getting format flag: %w", err)
}
compression, err := cmd.Flags().GetString("compression")
if err != nil {
return fmt.Errorf("error getting compression flag: %w", err)
}
password, err := cmd.Flags().GetString("password")
if err != nil {
return fmt.Errorf("error getting password flag: %w", err)
}
collectIssues, err := cmd.Flags().GetBool("issues")
if err != nil {
return fmt.Errorf("error getting issues flag: %w", err)
}
collectPRs, err := cmd.Flags().GetBool("prs")
if err != nil {
return fmt.Errorf("error getting prs flag: %w", err)
}
collectCode, err := cmd.Flags().GetBool("code")
if err != nil {
return fmt.Errorf("error getting code flag: %w", err)
}
url := args[0]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
if format != "datanode" && format != "tim" && format != "trix" {
return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format)
}
allDataNodes := datanode.New()
owner, err := parseGithubOwner(url)
if err != nil {
return err
}
repos, err := GithubClient.GetPublicRepos(cmd.Context(), owner)
if err != nil {
return err
}
prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote)
prompter.Start()
defer prompter.Stop()
if collectCode {
var progressWriter io.Writer
if prompter.IsInteractive() {
bar := ui.NewProgressBar(-1, "Cloning repository")
progressWriter = ui.NewProgressWriter(bar)
}
cloner := vcs.NewGitCloner()
repoURL := fmt.Sprintf("https://github.com/%s/%s.git", owner, repo)
var progressWriter io.Writer
if prompter.IsInteractive() {
bar := ui.NewProgressBar(len(repos), "Cloning repositories")
progressWriter = ui.NewProgressWriter(bar)
}
cloner := vcs.NewGitCloner()
allDataNodes := datanode.New()
for _, repoURL := range repos {
dn, err := cloner.CloneGitRepository(repoURL, progressWriter)
if err != nil {
return fmt.Errorf("error cloning repository: %w", err)
// Log the error and continue
fmt.Fprintln(cmd.ErrOrStderr(), "Error cloning repository:", err)
continue
}
if mergeErr := mergeDataNodes(allDataNodes, dn, "code"); mergeErr != nil {
return fmt.Errorf("error merging code datanode: %w", mergeErr)
}
}
// This is not an efficient way to merge datanodes, but it's the only way for now
// A better approach would be to add a Merge method to the DataNode
repoName := strings.TrimSuffix(repoURL, ".git")
parts := strings.Split(repoName, "/")
repoName = parts[len(parts)-1]
client := github.NewGithubClient()
if collectIssues {
dn, err := client.GetIssues(cmd.Context(), owner, repo)
err = dn.Walk(".", func(path string, de fs.DirEntry, err error) error {
if err != nil {
return err
}
if !de.IsDir() {
err := func() error {
file, err := dn.Open(path)
if err != nil {
return err
}
defer file.Close()
data, err := io.ReadAll(file)
if err != nil {
return err
}
allDataNodes.AddData(repoName+"/"+path, data)
return nil
}()
if err != nil {
return err
}
}
return nil
})
if err != nil {
return fmt.Errorf("error getting issues: %w", err)
}
if mergeErr := mergeDataNodes(allDataNodes, dn, ""); mergeErr != nil {
return fmt.Errorf("error merging issues datanode: %w", mergeErr)
}
}
if collectPRs {
dn, err := client.GetPullRequests(cmd.Context(), owner, repo)
if err != nil {
return fmt.Errorf("error getting pull requests: %w", err)
}
if mergeErr := mergeDataNodes(allDataNodes, dn, ""); mergeErr != nil {
return fmt.Errorf("error merging pull requests datanode: %w", mergeErr)
fmt.Fprintln(cmd.ErrOrStderr(), "Error walking datanode:", err)
continue
}
}
var data []byte
if format == "tim" {
t, err := tim.FromDataNode(allDataNodes)
tim, err := tim.FromDataNode(allDataNodes)
if err != nil {
return fmt.Errorf("error creating tim: %w", err)
}
data, err = t.ToTar()
data, err = tim.ToTar()
if err != nil {
return fmt.Errorf("error serializing tim: %w", err)
}
@ -137,67 +130,49 @@ func NewGithubAllCmd() *cobra.Command {
return fmt.Errorf("error compressing data: %w", err)
}
if outputFile == "" {
outputFile = fmt.Sprintf("%s-all.%s", repo, format)
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing DataNode to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "All resources saved to", outputFile)
fmt.Fprintln(cmd.OutOrStdout(), "All repositories saved to", outputFile)
return nil
},
}
cmd.Flags().String("output", "", "Output file for the DataNode")
cmd.Flags().String("format", "datanode", "Output format (datanode, tim, or trix)")
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
cmd.Flags().String("password", "", "Password for encryption")
cmd.Flags().Bool("issues", true, "Collect issues")
cmd.Flags().Bool("prs", true, "Collect pull requests")
cmd.Flags().Bool("code", true, "Collect code")
return cmd
allCmd.PersistentFlags().String("output", "all.dat", "Output file for the DataNode")
allCmd.PersistentFlags().String("format", "datanode", "Output format (datanode, tim, or trix)")
allCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
allCmd.PersistentFlags().String("password", "", "Password for encryption")
return allCmd
}
func GetGithubAllCmd() *cobra.Command {
return githubAllCmd
func GetAllCmd() *cobra.Command {
return allCmd
}
func init() {
collectGithubCmd.AddCommand(GetGithubAllCmd())
RootCmd.AddCommand(GetAllCmd())
}
func mergeDataNodes(dest *datanode.DataNode, src *datanode.DataNode, prefix string) error {
return src.Walk(".", func(path string, de fs.DirEntry, err error) error {
if err != nil {
return err
}
if !de.IsDir() {
err := func() error {
file, err := src.Open(path)
if err != nil {
return err
}
defer file.Close()
data, err := io.ReadAll(file)
if err != nil {
return err
}
destPath := path
if prefix != "" {
destPath = prefix + "/" + path
}
dest.AddData(destPath, data)
return nil
}()
if err != nil {
return err
}
}
return nil
})
func parseGithubOwner(u string) (string, error) {
owner, _, err := github.ParseRepoFromURL(u)
if err == nil {
return owner, nil
}
parsedURL, err := url.Parse(u)
if err != nil {
return "", fmt.Errorf("invalid URL: %w", err)
}
path := strings.Trim(parsedURL.Path, "/")
if path == "" {
return "", fmt.Errorf("invalid owner URL: %s", u)
}
parts := strings.Split(path, "/")
if len(parts) != 1 || parts[0] == "" {
return "", fmt.Errorf("invalid owner URL: %s", u)
}
return parts[0], nil
}

View file

@ -42,7 +42,7 @@ func TestAllCmd_Good(t *testing.T) {
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetGithubAllCmd())
rootCmd.AddCommand(GetAllCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")
@ -75,7 +75,7 @@ func TestAllCmd_Bad(t *testing.T) {
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetGithubAllCmd())
rootCmd.AddCommand(GetAllCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")
@ -104,7 +104,7 @@ func TestAllCmd_Ugly(t *testing.T) {
}()
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetGithubAllCmd())
rootCmd.AddCommand(GetAllCmd())
// Execute command
out := filepath.Join(t.TempDir(), "out")

View file

@ -1,84 +0,0 @@
package cmd
import (
"fmt"
"os"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/github"
"github.com/Snider/Borg/pkg/ui"
"github.com/spf13/cobra"
)
// NewCollectGithubIssuesCmd creates a new cobra command for collecting github issues.
func NewCollectGithubIssuesCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "issues <owner/repo>",
Short: "Collect issues from a GitHub repository",
Long: `Collect all issues from a GitHub repository and store them in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
repoPath := args[0]
parts := strings.Split(repoPath, "/")
if len(parts) != 2 {
return fmt.Errorf("invalid repository path: %s (must be in the format <owner>/<repo>)", repoPath)
}
owner, repo := parts[0], parts[1]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
if format != "datanode" {
return fmt.Errorf("invalid format: %s (must be 'datanode')", format)
}
if compression != "none" && compression != "gz" && compression != "xz" {
return fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression)
}
prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote)
prompter.Start()
defer prompter.Stop()
client := github.NewGithubClient()
dn, err := client.GetIssues(cmd.Context(), owner, repo)
if err != nil {
return fmt.Errorf("error getting issues: %w", err)
}
data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
compressedData, err := compress.Compress(data, compression)
if err != nil {
return fmt.Errorf("error compressing data: %w", err)
}
if outputFile == "" {
outputFile = "issues." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing DataNode to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "Issues saved to", outputFile)
return nil
},
}
cmd.Flags().String("output", "", "Output file for the DataNode")
cmd.Flags().String("format", "datanode", "Output format (datanode)")
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
return cmd
}
func init() {
GetCollectGithubCmd().AddCommand(NewCollectGithubIssuesCmd())
}

View file

@ -1,53 +0,0 @@
package cmd
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/Snider/Borg/pkg/github"
"github.com/stretchr/testify/assert"
)
func TestCollectGithubIssuesCmd(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/issues" {
w.Header().Set("Content-Type", "application/json")
issues := []github.Issue{
{Number: 1, Title: "Issue 1", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/1/comments"},
}
json.NewEncoder(w).Encode(issues)
} else if r.URL.Path == "/repos/owner/repo/issues/1/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := github.NewAuthenticatedClient
github.NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
github.NewAuthenticatedClient = originalNewAuthenticatedClient
}()
cmd := NewCollectGithubIssuesCmd()
var out bytes.Buffer
cmd.SetOut(&out)
cmd.SetErr(&out)
cmd.SetArgs([]string{"owner/repo", "--output", "issues.dat"})
err := cmd.Execute()
assert.NoError(t, err)
_, err = os.Stat("issues.dat")
assert.NoError(t, err)
os.Remove("issues.dat")
}

View file

@ -1,84 +0,0 @@
package cmd
import (
"fmt"
"os"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/github"
"github.com/Snider/Borg/pkg/ui"
"github.com/spf13/cobra"
)
// NewCollectGithubPrsCmd creates a new cobra command for collecting github pull requests.
func NewCollectGithubPrsCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "prs <owner/repo>",
Short: "Collect pull requests from a GitHub repository",
Long: `Collect all pull requests from a GitHub repository and store them in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
repoPath := args[0]
parts := strings.Split(repoPath, "/")
if len(parts) != 2 {
return fmt.Errorf("invalid repository path: %s (must be in the format <owner>/<repo>)", repoPath)
}
owner, repo := parts[0], parts[1]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
if format != "datanode" {
return fmt.Errorf("invalid format: %s (must be 'datanode')", format)
}
if compression != "none" && compression != "gz" && compression != "xz" {
return fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression)
}
prompter := ui.NewNonInteractivePrompter(ui.GetVCSQuote)
prompter.Start()
defer prompter.Stop()
client := github.NewGithubClient()
dn, err := client.GetPullRequests(cmd.Context(), owner, repo)
if err != nil {
return fmt.Errorf("error getting pull requests: %w", err)
}
data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
compressedData, err := compress.Compress(data, compression)
if err != nil {
return fmt.Errorf("error compressing data: %w", err)
}
if outputFile == "" {
outputFile = "prs." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return fmt.Errorf("error writing DataNode to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "Pull requests saved to", outputFile)
return nil
},
}
cmd.Flags().String("output", "", "Output file for the DataNode")
cmd.Flags().String("format", "datanode", "Output format (datanode)")
cmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
return cmd
}
func init() {
GetCollectGithubCmd().AddCommand(NewCollectGithubPrsCmd())
}

View file

@ -1,64 +0,0 @@
package cmd
import (
"bytes"
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/Snider/Borg/pkg/github"
"github.com/stretchr/testify/assert"
)
func TestCollectGithubPrsCmd(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/pulls" {
w.Header().Set("Content-Type", "application/json")
prs := []github.PullRequest{
{
Number: 1, Title: "PR 1",
DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/1.diff",
Links: struct {
Comments struct{ Href string `json:"href"` } `json:"comments"`
ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"`
}{
ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/1/comments"},
},
},
}
json.NewEncoder(w).Encode(prs)
} else if r.URL.Path == "/repos/owner/repo/pulls/1.diff" {
w.Write([]byte("diff --git a/file b/file"))
} else if r.URL.Path == "/repos/owner/repo/pulls/1/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := github.NewAuthenticatedClient
github.NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
github.NewAuthenticatedClient = originalNewAuthenticatedClient
}()
cmd := NewCollectGithubPrsCmd()
var out bytes.Buffer
cmd.SetOut(&out)
cmd.SetErr(&out)
cmd.SetArgs([]string{"owner/repo", "--output", "prs.dat"})
err := cmd.Execute()
assert.NoError(t, err)
_, err = os.Stat("prs.dat")
assert.NoError(t, err)
os.Remove("prs.dat")
}

333
cmd/collect_local.go Normal file
View file

@ -0,0 +1,333 @@
package cmd
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/tim"
"github.com/Snider/Borg/pkg/trix"
"github.com/Snider/Borg/pkg/ui"
"github.com/spf13/cobra"
)
type CollectLocalCmd struct {
cobra.Command
}
// NewCollectLocalCmd creates a new collect local command
func NewCollectLocalCmd() *CollectLocalCmd {
c := &CollectLocalCmd{}
c.Command = cobra.Command{
Use: "local [directory]",
Short: "Collect files from a local directory",
Long: `Collect files from a local directory and store them in a DataNode.
If no directory is specified, the current working directory is used.
Examples:
borg collect local
borg collect local ./src
borg collect local /path/to/project --output project.tar
borg collect local . --format stim --password secret
borg collect local . --exclude "*.log" --exclude "node_modules"`,
Args: cobra.MaximumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
directory := "."
if len(args) > 0 {
directory = args[0]
}
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
excludes, _ := cmd.Flags().GetStringSlice("exclude")
includeHidden, _ := cmd.Flags().GetBool("hidden")
respectGitignore, _ := cmd.Flags().GetBool("gitignore")
finalPath, err := CollectLocal(directory, outputFile, format, compression, password, excludes, includeHidden, respectGitignore)
if err != nil {
return err
}
fmt.Fprintln(cmd.OutOrStdout(), "Files saved to", finalPath)
return nil
},
}
c.Flags().String("output", "", "Output file for the DataNode")
c.Flags().String("format", "datanode", "Output format (datanode, tim, trix, or stim)")
c.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
c.Flags().String("password", "", "Password for encryption (required for stim/trix format)")
c.Flags().StringSlice("exclude", nil, "Patterns to exclude (can be specified multiple times)")
c.Flags().Bool("hidden", false, "Include hidden files and directories")
c.Flags().Bool("gitignore", true, "Respect .gitignore files (default: true)")
return c
}
func init() {
collectCmd.AddCommand(&NewCollectLocalCmd().Command)
}
// CollectLocal collects files from a local directory into a DataNode
func CollectLocal(directory string, outputFile string, format string, compression string, password string, excludes []string, includeHidden bool, respectGitignore bool) (string, error) {
// Validate format
if format != "datanode" && format != "tim" && format != "trix" && format != "stim" {
return "", fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', 'trix', or 'stim')", format)
}
if (format == "stim" || format == "trix") && password == "" {
return "", fmt.Errorf("password is required for %s format", format)
}
if compression != "none" && compression != "gz" && compression != "xz" {
return "", fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression)
}
// Resolve directory path
absDir, err := filepath.Abs(directory)
if err != nil {
return "", fmt.Errorf("error resolving directory path: %w", err)
}
info, err := os.Stat(absDir)
if err != nil {
return "", fmt.Errorf("error accessing directory: %w", err)
}
if !info.IsDir() {
return "", fmt.Errorf("not a directory: %s", absDir)
}
// Load gitignore patterns if enabled
var gitignorePatterns []string
if respectGitignore {
gitignorePatterns = loadGitignore(absDir)
}
// Create DataNode and collect files
dn := datanode.New()
var fileCount int
bar := ui.NewProgressBar(-1, "Scanning files")
defer bar.Finish()
err = filepath.WalkDir(absDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Get relative path
relPath, err := filepath.Rel(absDir, path)
if err != nil {
return err
}
// Skip root
if relPath == "." {
return nil
}
// Skip hidden files/dirs unless explicitly included
if !includeHidden && isHidden(relPath) {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
// Check gitignore patterns
if respectGitignore && matchesGitignore(relPath, d.IsDir(), gitignorePatterns) {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
// Check exclude patterns
if matchesExclude(relPath, excludes) {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories (they're implicit in DataNode)
if d.IsDir() {
return nil
}
// Read file content
content, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("error reading %s: %w", relPath, err)
}
// Add to DataNode with forward slashes (tar convention)
dn.AddData(filepath.ToSlash(relPath), content)
fileCount++
bar.Describe(fmt.Sprintf("Collected %d files", fileCount))
return nil
})
if err != nil {
return "", fmt.Errorf("error walking directory: %w", err)
}
if fileCount == 0 {
return "", fmt.Errorf("no files found in %s", directory)
}
bar.Describe(fmt.Sprintf("Packaging %d files", fileCount))
// Convert to output format
var data []byte
if format == "tim" {
t, err := tim.FromDataNode(dn)
if err != nil {
return "", fmt.Errorf("error creating tim: %w", err)
}
data, err = t.ToTar()
if err != nil {
return "", fmt.Errorf("error serializing tim: %w", err)
}
} else if format == "stim" {
t, err := tim.FromDataNode(dn)
if err != nil {
return "", fmt.Errorf("error creating tim: %w", err)
}
data, err = t.ToSigil(password)
if err != nil {
return "", fmt.Errorf("error encrypting stim: %w", err)
}
} else if format == "trix" {
data, err = trix.ToTrix(dn, password)
if err != nil {
return "", fmt.Errorf("error serializing trix: %w", err)
}
} else {
data, err = dn.ToTar()
if err != nil {
return "", fmt.Errorf("error serializing DataNode: %w", err)
}
}
// Apply compression
compressedData, err := compress.Compress(data, compression)
if err != nil {
return "", fmt.Errorf("error compressing data: %w", err)
}
// Determine output filename
if outputFile == "" {
baseName := filepath.Base(absDir)
if baseName == "." || baseName == "/" {
baseName = "local"
}
outputFile = baseName + "." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return "", fmt.Errorf("error writing output file: %w", err)
}
return outputFile, nil
}
// isHidden checks if a path component starts with a dot
func isHidden(path string) bool {
parts := strings.Split(filepath.ToSlash(path), "/")
for _, part := range parts {
if strings.HasPrefix(part, ".") {
return true
}
}
return false
}
// loadGitignore loads patterns from .gitignore if it exists
func loadGitignore(dir string) []string {
var patterns []string
gitignorePath := filepath.Join(dir, ".gitignore")
content, err := os.ReadFile(gitignorePath)
if err != nil {
return patterns
}
lines := strings.Split(string(content), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
// Skip empty lines and comments
if line == "" || strings.HasPrefix(line, "#") {
continue
}
patterns = append(patterns, line)
}
return patterns
}
// matchesGitignore checks if a path matches any gitignore pattern
func matchesGitignore(path string, isDir bool, patterns []string) bool {
for _, pattern := range patterns {
// Handle directory-only patterns
if strings.HasSuffix(pattern, "/") {
if !isDir {
continue
}
pattern = strings.TrimSuffix(pattern, "/")
}
// Handle negation (simplified - just skip negated patterns)
if strings.HasPrefix(pattern, "!") {
continue
}
// Match against path components
matched, _ := filepath.Match(pattern, filepath.Base(path))
if matched {
return true
}
// Also try matching the full path
matched, _ = filepath.Match(pattern, path)
if matched {
return true
}
// Handle ** patterns (simplified)
if strings.Contains(pattern, "**") {
simplePattern := strings.ReplaceAll(pattern, "**", "*")
matched, _ = filepath.Match(simplePattern, path)
if matched {
return true
}
}
}
return false
}
// matchesExclude checks if a path matches any exclude pattern
func matchesExclude(path string, excludes []string) bool {
for _, pattern := range excludes {
// Match against basename
matched, _ := filepath.Match(pattern, filepath.Base(path))
if matched {
return true
}
// Match against full path
matched, _ = filepath.Match(pattern, path)
if matched {
return true
}
}
return false
}

Binary file not shown.

2
go.mod
View file

@ -60,7 +60,7 @@ require (
github.com/wailsapp/go-webview2 v1.0.22 // indirect
github.com/wailsapp/mimetype v1.4.1 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
golang.org/x/crypto v0.44.0 // indirect
golang.org/x/crypto v0.45.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/term v0.37.0 // indirect
golang.org/x/text v0.31.0 // indirect

4
go.sum
View file

@ -155,8 +155,8 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=

View file

@ -8,7 +8,6 @@ import (
"os"
"strings"
"github.com/Snider/Borg/pkg/datanode"
"golang.org/x/oauth2"
)
@ -19,8 +18,6 @@ type Repo struct {
// GithubClient is an interface for interacting with the Github API.
type GithubClient interface {
GetPublicRepos(ctx context.Context, userOrOrg string) ([]string, error)
GetIssues(ctx context.Context, owner, repo string) (*datanode.DataNode, error)
GetPullRequests(ctx context.Context, owner, repo string) (*datanode.DataNode, error)
}
// NewGithubClient creates a new GithubClient.
@ -28,9 +25,7 @@ func NewGithubClient() GithubClient {
return &githubClient{}
}
type githubClient struct {
apiURL string
}
type githubClient struct{}
// NewAuthenticatedClient creates a new authenticated http client.
var NewAuthenticatedClient = func(ctx context.Context) *http.Client {

View file

@ -1,156 +0,0 @@
package github
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"time"
"github.com/Snider/Borg/pkg/datanode"
)
type Issue struct {
Number int `json:"number"`
Title string `json:"title"`
Body string `json:"body"`
State string `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
Labels []struct {
Name string `json:"name"`
} `json:"labels"`
CommentsURL string `json:"comments_url"`
}
type Comment struct {
Body string `json:"body"`
CreatedAt time.Time `json:"created_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
}
func (g *githubClient) GetIssues(ctx context.Context, owner, repo string) (*datanode.DataNode, error) {
dn := datanode.New()
client := NewAuthenticatedClient(ctx)
apiURL := "https://api.github.com"
if g.apiURL != "" {
apiURL = g.apiURL
}
url := fmt.Sprintf("%s/repos/%s/%s/issues", apiURL, owner, repo)
var allIssues []Issue
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch issues: %s", resp.Status)
}
var issues []Issue
if err := json.NewDecoder(resp.Body).Decode(&issues); err != nil {
return nil, err
}
allIssues = append(allIssues, issues...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
for _, issue := range allIssues {
var markdown strings.Builder
markdown.WriteString(fmt.Sprintf("# Issue %d: %s\n\n", issue.Number, issue.Title))
markdown.WriteString(fmt.Sprintf("**Author**: %s\n", issue.User.Login))
markdown.WriteString(fmt.Sprintf("**State**: %s\n", issue.State))
markdown.WriteString(fmt.Sprintf("**Created**: %s\n", issue.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(fmt.Sprintf("**Updated**: %s\n\n", issue.UpdatedAt.Format(time.RFC1123)))
if len(issue.Labels) > 0 {
markdown.WriteString("**Labels**:\n")
for _, label := range issue.Labels {
markdown.WriteString(fmt.Sprintf("- %s\n", label.Name))
}
markdown.WriteString("\n")
}
markdown.WriteString("## Body\n\n")
markdown.WriteString(issue.Body)
markdown.WriteString("\n\n")
// Fetch comments
comments, err := g.getComments(ctx, issue.CommentsURL)
if err != nil {
return nil, err
}
if len(comments) > 0 {
markdown.WriteString("## Comments\n\n")
for _, comment := range comments {
markdown.WriteString(fmt.Sprintf("**%s** commented on %s:\n\n", comment.User.Login, comment.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(comment.Body)
markdown.WriteString("\n\n---\n\n")
}
}
filename := fmt.Sprintf("issues/%d.md", issue.Number)
dn.AddData(filename, []byte(markdown.String()))
}
// Add an index file
index, err := json.MarshalIndent(allIssues, "", " ")
if err != nil {
return nil, err
}
dn.AddData("issues/INDEX.json", index)
return dn, nil
}
func (g *githubClient) getComments(ctx context.Context, url string) ([]Comment, error) {
client := NewAuthenticatedClient(ctx)
var allComments []Comment
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch comments: %s", resp.Status)
}
var comments []Comment
if err := json.NewDecoder(resp.Body).Decode(&comments); err != nil {
return nil, err
}
allComments = append(allComments, comments...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
return allComments, nil
}

View file

@ -1,67 +0,0 @@
package github
import (
"context"
"encoding/json"
"io/fs"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetIssues(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/issues" {
w.Header().Set("Content-Type", "application/json")
issues := []Issue{
{Number: 1, Title: "Issue 1", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/1/comments"},
{Number: 2, Title: "Issue 2", CommentsURL: "http://" + r.Host + "/repos/owner/repo/issues/2/comments"},
}
json.NewEncoder(w).Encode(issues)
} else if r.URL.Path == "/repos/owner/repo/issues/1/comments" {
w.Header().Set("Content-Type", "application/json")
comments := []Comment{
{Body: "Comment 1"},
}
json.NewEncoder(w).Encode(comments)
} else if r.URL.Path == "/repos/owner/repo/issues/2/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := NewAuthenticatedClient
NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
NewAuthenticatedClient = originalNewAuthenticatedClient
}()
client := &githubClient{apiURL: server.URL}
dn, err := client.GetIssues(context.Background(), "owner", "repo")
assert.NoError(t, err)
assert.NotNil(t, dn)
expectedFiles := []string{
"issues/1.md",
"issues/2.md",
"issues/INDEX.json",
}
actualFiles := []string{}
dn.Walk(".", func(path string, de fs.DirEntry, err error) error {
if !de.IsDir() {
actualFiles = append(actualFiles, path)
}
return nil
})
assert.ElementsMatch(t, expectedFiles, actualFiles)
}

View file

@ -1,201 +0,0 @@
package github
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/Snider/Borg/pkg/datanode"
)
type PullRequest struct {
Number int `json:"number"`
Title string `json:"title"`
Body string `json:"body"`
State string `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
MergedAt time.Time `json:"merged_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
Labels []struct {
Name string `json:"name"`
} `json:"labels"`
Links struct {
Comments struct {
Href string `json:"href"`
} `json:"comments"`
ReviewComments struct {
Href string `json:"href"`
} `json:"review_comments"`
} `json:"_links"`
DiffURL string `json:"diff_url"`
}
type ReviewComment struct {
Body string `json:"body"`
Path string `json:"path"`
CreatedAt time.Time `json:"created_at"`
User struct {
Login string `json:"login"`
} `json:"user"`
}
func (g *githubClient) GetPullRequests(ctx context.Context, owner, repo string) (*datanode.DataNode, error) {
dn := datanode.New()
client := NewAuthenticatedClient(ctx)
apiURL := "https://api.github.com"
if g.apiURL != "" {
apiURL = g.apiURL
}
// Get both open and closed pull requests
url := fmt.Sprintf("%s/repos/%s/%s/pulls?state=all", apiURL, owner, repo)
var allPRs []PullRequest
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch pull requests: %s", resp.Status)
}
var prs []PullRequest
if err := json.NewDecoder(resp.Body).Decode(&prs); err != nil {
return nil, err
}
allPRs = append(allPRs, prs...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
for _, pr := range allPRs {
var markdown strings.Builder
markdown.WriteString(fmt.Sprintf("# PR %d: %s\n\n", pr.Number, pr.Title))
markdown.WriteString(fmt.Sprintf("**Author**: %s\n", pr.User.Login))
markdown.WriteString(fmt.Sprintf("**State**: %s\n", pr.State))
markdown.WriteString(fmt.Sprintf("**Created**: %s\n", pr.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(fmt.Sprintf("**Updated**: %s\n", pr.UpdatedAt.Format(time.RFC1123)))
if !pr.MergedAt.IsZero() {
markdown.WriteString(fmt.Sprintf("**Merged**: %s\n", pr.MergedAt.Format(time.RFC1123)))
}
markdown.WriteString(fmt.Sprintf("\n**[View Diff](%s)**\n\n", pr.DiffURL))
if len(pr.Labels) > 0 {
markdown.WriteString("**Labels**:\n")
for _, label := range pr.Labels {
markdown.WriteString(fmt.Sprintf("- %s\n", label.Name))
}
markdown.WriteString("\n")
}
markdown.WriteString("## Body\n\n")
markdown.WriteString(pr.Body)
markdown.WriteString("\n\n")
// Fetch diff
diff, err := g.getDiff(ctx, pr.DiffURL)
if err != nil {
return nil, fmt.Errorf("failed to get diff for PR #%d: %w", pr.Number, err)
}
dn.AddData(fmt.Sprintf("pulls/%d.diff", pr.Number), diff)
// Fetch review comments
reviewComments, err := g.getReviewComments(ctx, pr.Links.ReviewComments.Href)
if err != nil {
return nil, err
}
if len(reviewComments) > 0 {
markdown.WriteString("## Review Comments\n\n")
for _, comment := range reviewComments {
markdown.WriteString(fmt.Sprintf("**%s** commented on `%s` at %s:\n\n", comment.User.Login, comment.Path, comment.CreatedAt.Format(time.RFC1123)))
markdown.WriteString(comment.Body)
markdown.WriteString("\n\n---\n\n")
}
}
filename := fmt.Sprintf("pulls/%d.md", pr.Number)
dn.AddData(filename, []byte(markdown.String()))
}
// Add an index file
index, err := json.MarshalIndent(allPRs, "", " ")
if err != nil {
return nil, err
}
dn.AddData("pulls/INDEX.json", index)
return dn, nil
}
func (g *githubClient) getReviewComments(ctx context.Context, url string) ([]ReviewComment, error) {
client := NewAuthenticatedClient(ctx)
var allComments []ReviewComment
for url != "" {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("failed to fetch review comments: %s", resp.Status)
}
var comments []ReviewComment
if err := json.NewDecoder(resp.Body).Decode(&comments); err != nil {
return nil, err
}
allComments = append(allComments, comments...)
linkHeader := resp.Header.Get("Link")
url = g.findNextURL(linkHeader)
}
return allComments, nil
}
func (g *githubClient) getDiff(ctx context.Context, url string) ([]byte, error) {
client := NewAuthenticatedClient(ctx)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Borg-Data-Collector")
req.Header.Set("Accept", "application/vnd.github.v3.diff")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to fetch diff: %s", resp.Status)
}
return io.ReadAll(resp.Body)
}

View file

@ -1,91 +0,0 @@
package github
import (
"context"
"encoding/json"
"io/fs"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetPullRequests(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/owner/repo/pulls" {
w.Header().Set("Content-Type", "application/json")
prs := []PullRequest{
{
Number: 1, Title: "PR 1",
DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/1.diff",
Links: struct {
Comments struct{ Href string `json:"href"` } `json:"comments"`
ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"`
}{
ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/1/comments"},
},
},
{
Number: 2, Title: "PR 2",
DiffURL: "http://" + r.Host + "/repos/owner/repo/pulls/2.diff",
Links: struct {
Comments struct{ Href string `json:"href"` } `json:"comments"`
ReviewComments struct{ Href string `json:"href"` } `json:"review_comments"`
}{
ReviewComments: struct{ Href string `json:"href"` }{Href: "http://" + r.Host + "/repos/owner/repo/pulls/2/comments"},
},
},
}
json.NewEncoder(w).Encode(prs)
} else if r.URL.Path == "/repos/owner/repo/pulls/1.diff" {
w.Write([]byte("diff --git a/file b/file"))
} else if r.URL.Path == "/repos/owner/repo/pulls/1/comments" {
w.Header().Set("Content-Type", "application/json")
comments := []ReviewComment{
{Body: "Review Comment 1"},
}
json.NewEncoder(w).Encode(comments)
} else if r.URL.Path == "/repos/owner/repo/pulls/2.diff" {
w.Write([]byte("diff --git a/file2 b/file2"))
} else if r.URL.Path == "/repos/owner/repo/pulls/2/comments" {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte("[]"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
originalNewAuthenticatedClient := NewAuthenticatedClient
NewAuthenticatedClient = func(ctx context.Context) *http.Client {
return server.Client()
}
defer func() {
NewAuthenticatedClient = originalNewAuthenticatedClient
}()
client := &githubClient{apiURL: server.URL}
dn, err := client.GetPullRequests(context.Background(), "owner", "repo")
assert.NoError(t, err)
assert.NotNil(t, dn)
expectedFiles := []string{
"pulls/1.md",
"pulls/1.diff",
"pulls/2.md",
"pulls/2.diff",
"pulls/INDEX.json",
}
actualFiles := []string{}
dn.Walk(".", func(path string, de fs.DirEntry, err error) error {
if !de.IsDir() {
actualFiles = append(actualFiles, path)
}
return nil
})
assert.ElementsMatch(t, expectedFiles, actualFiles)
}