diff --git a/README.md b/README.md index 31544f5..a1f734a 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ # Borg Data Collector -As the name might sugest; this pkg collects information and stores it in a Cube file or passes it on; -comes as both a cli tool and a usable package for your go project with a clean export only top level interface. +Assimulate all the data!!! \ +No, seriously, what do you need to download? PWA? A GitHub repo, or every repository they have? A website? Build artefacts? Malware? + +That's why I made Borg, to download and contain sets of data into explorable collections, to reuse later; ATM there is only Zuul, erm, Tar, but that’s all I need right now~ Custom rootFS distroless image and, of course, Nanites (sec ops tooling) to come, but if you want to use and work on a Web3 malware analysis, tool, patches welcome (non-sarcasticly). + +Oh, Calling Trekkies, the status messages below, you know they are wrong, don't you? It hurts a little? Good, you, you're the one... \ +Take part in Open Source, make us smirk with amusement and make the CLI crack more smirks. ## Borg Status Scratch Pad diff --git a/cmd/collect_github_repo.go b/cmd/collect_github_repo.go index b4f873d..75b7dff 100644 --- a/cmd/collect_github_repo.go +++ b/cmd/collect_github_repo.go @@ -3,9 +3,13 @@ package cmd import ( "fmt" "os" + "path/filepath" + "strings" "github.com/Snider/Borg/pkg/vcs" + "github.com/Snider/Borg/pkg/github" + "github.com/Snider/Borg/pkg/vcs" "github.com/spf13/cobra" ) @@ -15,33 +19,90 @@ var collectGithubRepoCmd = &cobra.Command{ Short: "Collect a single Git repository", Long: `Collect a single Git repository and store it in a DataNode.`, Args: cobra.ExactArgs(1), +// collectGitCmd represents the collect git command +var collectGitCmd = &cobra.Command{ + Use: "git", + Short: "Collect one or more Git repositories", + Long: `Collect a single Git repository from a URL, or all public repositories from a GitHub user/organization.`, Run: func(cmd *cobra.Command, args []string) { - repoURL := args[0] - outputFile, _ := cmd.Flags().GetString("output") + repoURL, _ := cmd.Flags().GetString("uri") + user, _ := cmd.Flags().GetString("user") + output, _ := cmd.Flags().GetString("output") - dn, err := vcs.CloneGitRepository(repoURL) - if err != nil { - fmt.Printf("Error cloning repository: %v\n", err) - return + if (repoURL == "" && user == "") || (repoURL != "" && user != "") { + fmt.Println("Error: You must specify either --uri or --user, but not both.") + os.Exit(1) } - data, err := dn.ToTar() - if err != nil { - fmt.Printf("Error serializing DataNode: %v\n", err) - return - } + if user != "" { + // User specified, collect all their repos + fmt.Printf("Fetching public repositories for %s...\n", user) + repos, err := github.GetPublicRepos(user) + if err != nil { + fmt.Printf("Error fetching repositories: %v\n", err) + return + } + fmt.Printf("Found %d repositories. Cloning...\n\n", len(repos)) - err = os.WriteFile(outputFile, data, 0644) - if err != nil { - fmt.Printf("Error writing DataNode to file: %v\n", err) - return - } + // Ensure output directory exists + err = os.MkdirAll(output, 0755) + if err != nil { + fmt.Printf("Error creating output directory: %v\n", err) + return + } - fmt.Printf("Repository saved to %s\n", outputFile) + for _, repo := range repos { + fmt.Printf("Cloning %s...\n", repo) + dn, err := vcs.CloneGitRepository(repo) + if err != nil { + fmt.Printf(" Error cloning: %v\n", err) + continue + } + + data, err := dn.ToTar() + if err != nil { + fmt.Printf(" Error serializing: %v\n", err) + continue + } + + repoName := strings.TrimSuffix(filepath.Base(repo), ".git") + outputFile := filepath.Join(output, fmt.Sprintf("%s.dat", repoName)) + err = os.WriteFile(outputFile, data, 0644) + if err != nil { + fmt.Printf(" Error writing file: %v\n", err) + continue + } + fmt.Printf(" Successfully saved to %s\n", outputFile) + } + fmt.Println("\nCollection complete.") + + } else { + // Single repository URL specified + dn, err := vcs.CloneGitRepository(repoURL) + if err != nil { + fmt.Printf("Error cloning repository: %v\n", err) + return + } + + data, err := dn.ToTar() + if err != nil { + fmt.Printf("Error serializing DataNode: %v\n", err) + return + } + + err = os.WriteFile(output, data, 0644) + if err != nil { + fmt.Printf("Error writing DataNode to file: %v\n", err) + return + } + fmt.Printf("Repository saved to %s\n", output) + } }, } func init() { collectGithubCmd.AddCommand(collectGithubRepoCmd) - collectGithubRepoCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode") + collectGithubRepoCmd.Flags().String("uri", "", "URL of the Git repository to collect") + collectGitCmd.Flags().String("user", "", "GitHub user or organization to collect all repositories from") + collectGitCmd.Flags().String("output", "repo.dat", "Output file (for --uri) or directory (for --user)") } diff --git a/cmd/root.go b/cmd/root.go index d409cbe..a95ea23 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -8,7 +8,7 @@ import ( // rootCmd represents the base command when called without any subcommands var rootCmd = &cobra.Command{ - Use: "borg-data-collector", + Use: "github.com/Snider/Borg", Short: "A tool for collecting and managing data.", Long: `Borg Data Collector is a command-line tool for cloning Git repositories, packaging their contents into a single file, and managing the data within.`, @@ -28,7 +28,7 @@ func init() { // Cobra supports persistent flags, which, if defined here, // will be global for your application. - // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.borg-data-collector.yaml)") + // rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.github.com/Snider/Borg.yaml)") // Cobra also supports local flags, which will only run // when this action is called directly. diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..b9e4da9 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,7 @@ +# Borg Data Collector Documentation + +This directory contains the documentation for the Borg Data Collector. + +## Table of Contents + +- [Usage](usage.md) diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..52feff0 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,39 @@ +# Usage + +This document explains how to use the Borg Data Collector. + +## `collect git` + +The `collect git` command is used to clone a git repository and store it in a DataNode. + +### Collect a single repository + +```bash +borg collect git --uri https://github.com/torvalds/linux.git --output linux.dat +``` + +### Collect all repositories for a user + +```bash +borg collect git --user torvalds --output /path/to/output/dir +``` + +## `collect website` + +The `collect website` command is used to crawl a website and store it in a DataNode. + +### Example + +```bash +borg collect website --uri https://tldp.org/ +``` + +## `serve` + +The `serve` command is used to serve a DataNode file. + +### Example + +```bash +borg serve --file linux.borg +``` diff --git a/examples/collect_git.sh b/examples/collect_git.sh new file mode 100644 index 0000000..b7fd1d5 --- /dev/null +++ b/examples/collect_git.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Example of how to use the 'collect git' command. + +# This will clone a single git repository and store it in a DataNode. +borg collect git --uri https://github.com/torvalds/linux.git --output linux.dat + +# This will clone all public repositories for a user and store them in a directory. +borg collect git --user torvalds --output /tmp/borg-repos diff --git a/examples/collect_website.sh b/examples/collect_website.sh new file mode 100644 index 0000000..5f96616 --- /dev/null +++ b/examples/collect_website.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# Example of how to use the 'collect website' command. + +# This will crawl the specified website and store it in a DataNode. +borg collect website --uri https://tldp.org/ diff --git a/examples/serve.sh b/examples/serve.sh new file mode 100644 index 0000000..3aa4c9c --- /dev/null +++ b/examples/serve.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# Example of how to use the 'serve' command. + +# This will serve the specified DataNode file. +borg serve --file linux.borg diff --git a/go.mod b/go.mod index 1fdfae3..9747d63 100644 --- a/go.mod +++ b/go.mod @@ -32,7 +32,7 @@ require ( github.com/spf13/pflag v1.0.10 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect golang.org/x/crypto v0.43.0 // indirect - golang.org/x/mod v0.29.0 // indirect + golang.org/x/net v0.46.0 // indirect golang.org/x/sys v0.37.0 // indirect golang.org/x/term v0.36.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect diff --git a/go.sum b/go.sum index e2688d7..461f127 100644 --- a/go.sum +++ b/go.sum @@ -35,16 +35,8 @@ github.com/go-git/go-git/v5 v5.16.3 h1:Z8BtvxZ09bYm/yYNgPKCzgWtaRqDTgIKRgIRHBfU6 github.com/go-git/go-git/v5 v5.16.3/go.mod h1:4Ge4alE/5gPs30F2H1esi2gPd69R0C39lolkucHBOp8= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/go-github/v39 v39.2.0 h1:rNNM311XtPOz5rDdsJXAp2o8F67X9FnROXTvto3aSnQ= -github.com/google/go-github/v39 v39.2.0/go.mod h1:C1s8C5aCC9L+JXIYpJM5GYytdX52vC1bLvHEF1IhBrE= -github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= -github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= @@ -94,22 +86,14 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= -golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= -golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -121,15 +105,10 @@ golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/go.work b/go.work new file mode 100644 index 0000000..0eda0de --- /dev/null +++ b/go.work @@ -0,0 +1,3 @@ +go 1.24.3 + +use . diff --git a/pkg/github/github.go b/pkg/github/github.go index f28553b..c7cf8f4 100644 --- a/pkg/github/github.go +++ b/pkg/github/github.go @@ -11,7 +11,15 @@ type Repo struct { } func GetPublicRepos(userOrOrg string) ([]string, error) { - resp, err := http.Get(fmt.Sprintf("https://api.github.com/users/%s/repos", userOrOrg)) + return GetPublicReposWithAPIURL("https://api.github.com", userOrOrg) +} + +func GetPublicReposWithAPIURL(apiURL, userOrOrg string) ([]string, error) { + if userOrOrg == "" { + return nil, fmt.Errorf("user or organization cannot be empty") + } + + resp, err := http.Get(fmt.Sprintf("%s/users/%s/repos", apiURL, userOrOrg)) if err != nil { return nil, err } @@ -19,7 +27,7 @@ func GetPublicRepos(userOrOrg string) ([]string, error) { if resp.StatusCode != http.StatusOK { // Try organization endpoint - resp, err = http.Get(fmt.Sprintf("https://api.github.com/orgs/%s/repos", userOrOrg)) + resp, err = http.Get(fmt.Sprintf("%s/orgs/%s/repos", apiURL, userOrOrg)) if err != nil { return nil, err } diff --git a/pkg/github/github_test.go b/pkg/github/github_test.go new file mode 100644 index 0000000..00e952b --- /dev/null +++ b/pkg/github/github_test.go @@ -0,0 +1,42 @@ +package github + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestGetPublicRepos_Good(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`[{"clone_url": "https://github.com/good/repo.git"}]`)) + })) + defer server.Close() + + repos, err := GetPublicReposWithAPIURL(server.URL, "good") + if err != nil { + t.Errorf("Expected no error, got %v", err) + } + if len(repos) != 1 || repos[0] != "https://github.com/good/repo.git" { + t.Errorf("Expected one repo, got %v", repos) + } +} + +func TestGetPublicRepos_Bad(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer server.Close() + + _, err := GetPublicReposWithAPIURL(server.URL, "bad") + if err == nil { + t.Errorf("Expected an error, got nil") + } +} + +func TestGetPublicRepos_Ugly(t *testing.T) { + _, err := GetPublicReposWithAPIURL("http://localhost", "") + if err == nil { + t.Errorf("Expected an error for empty user/org, got nil") + } +}