This commit is contained in:
Snider 2026-02-12 09:45:48 +00:00 committed by GitHub
commit 70218a2f36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 650 additions and 0 deletions

View file

@ -104,6 +104,9 @@ borg collect github repo <url> # Clone repository
borg collect github repos <owner> # Clone all repos from user/org
borg collect website <url> --depth 2 # Crawl website
borg collect pwa --uri <url> # Download PWA
borg collect npm <package> # Collect npm package
borg collect cargo <package> # Collect cargo crate
borg collect go <module> # Collect Go module
# Compilation
borg compile -f Borgfile -o out.tim # Plain TIM

61
cmd/collect_cargo.go Normal file
View file

@ -0,0 +1,61 @@
package cmd
import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/collect"
"github.com/spf13/cobra"
)
// collectCargoCmd represents the collect cargo command
var collectCargoCmd = NewCollectCargoCmd()
func init() {
GetCollectCmd().AddCommand(GetCollectCargoCmd())
}
func GetCollectCargoCmd() *cobra.Command {
return collectCargoCmd
}
func NewCollectCargoCmd() *cobra.Command {
collectCargoCmd := &cobra.Command{
Use: "cargo [package]",
Short: "Collect a single cargo package",
Long: `Collect a single cargo package and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
packageName := args[0]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("could not get output flag: %w", err)
}
collector := collect.NewCargoCollector()
dn, err := collector.Collect(packageName)
if err != nil {
return fmt.Errorf("error collecting cargo package: %w", err)
}
data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
if outputFile == "" {
outputFile = packageName + ".dat"
}
err = os.WriteFile(outputFile, data, 0644)
if err != nil {
return fmt.Errorf("error writing cargo package to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "Cargo package saved to", outputFile)
return nil
},
}
collectCargoCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
return collectCargoCmd
}

61
cmd/collect_go.go Normal file
View file

@ -0,0 +1,61 @@
package cmd
import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/collect"
"github.com/spf13/cobra"
)
// collectGoCmd represents the collect go command
var collectGoCmd = NewCollectGoCmd()
func init() {
GetCollectCmd().AddCommand(GetCollectGoCmd())
}
func GetCollectGoCmd() *cobra.Command {
return collectGoCmd
}
func NewCollectGoCmd() *cobra.Command {
collectGoCmd := &cobra.Command{
Use: "go [module]",
Short: "Collect a single Go module",
Long: `Collect a single Go module and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
modulePath := args[0]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("could not get output flag: %w", err)
}
collector := collect.NewGoCollector()
dn, err := collector.Collect(modulePath)
if err != nil {
return fmt.Errorf("error collecting go module: %w", err)
}
data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
if outputFile == "" {
outputFile = modulePath + ".dat"
}
err = os.WriteFile(outputFile, data, 0644)
if err != nil {
return fmt.Errorf("error writing go module to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "Go module saved to", outputFile)
return nil
},
}
collectGoCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
return collectGoCmd
}

61
cmd/collect_npm.go Normal file
View file

@ -0,0 +1,61 @@
package cmd
import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/collect"
"github.com/spf13/cobra"
)
// collectNpmCmd represents the collect npm command
var collectNpmCmd = NewCollectNpmCmd()
func init() {
GetCollectCmd().AddCommand(GetCollectNpmCmd())
}
func GetCollectNpmCmd() *cobra.Command {
return collectNpmCmd
}
func NewCollectNpmCmd() *cobra.Command {
collectNpmCmd := &cobra.Command{
Use: "npm [package]",
Short: "Collect a single npm package",
Long: `Collect a single npm package and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
packageName := args[0]
outputFile, err := cmd.Flags().GetString("output")
if err != nil {
return fmt.Errorf("could not get output flag: %w", err)
}
collector := collect.NewNPMCollector()
dn, err := collector.Collect(packageName)
if err != nil {
return fmt.Errorf("error collecting npm package: %w", err)
}
data, err := dn.ToTar()
if err != nil {
return fmt.Errorf("error serializing DataNode: %w", err)
}
if outputFile == "" {
outputFile = packageName + ".dat"
}
err = os.WriteFile(outputFile, data, 0644)
if err != nil {
return fmt.Errorf("error writing npm package to file: %w", err)
}
fmt.Fprintln(cmd.OutOrStdout(), "NPM package saved to", outputFile)
return nil
},
}
collectNpmCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
return collectNpmCmd
}

View file

@ -21,11 +21,17 @@ Subcommands:
- `borg collect github repos <org-or-user> [--output <file>] [--format ...] [--compression ...]`
- `borg collect website <url> [--depth N] [--output <file>] [--format ...] [--compression ...]`
- `borg collect pwa --uri <url> [--output <file>] [--format ...] [--compression ...]`
- `borg collect npm <package-name> [--output <file>]`
- `borg collect cargo <crate-name> [--output <file>]`
- `borg collect go <module-name> [--output <file>]`
Examples:
- `borg collect github repo https://github.com/Snider/Borg --output borg.dat`
- `borg collect website https://example.com --depth 1 --output site.dat`
- `borg collect pwa --uri https://squoosh.app --output squoosh.dat`
- `borg collect npm @angular/cli --output angular-cli.dat`
- `borg collect cargo serde --output serde.dat`
- `borg collect go golang.org/x/text --output go-text.dat`
### all

114
pkg/collect/cargo.go Normal file
View file

@ -0,0 +1,114 @@
package collect
import (
"encoding/json"
"fmt"
"io"
"net/http"
"github.com/Snider/Borg/pkg/datanode"
)
// CargoRegistryURL is the base URL for the cargo registry.
const CargoRegistryURL = "https://crates.io/api/v1"
// CargoCollector is a collector for cargo packages.
type CargoCollector struct {
client *http.Client
}
// NewCargoCollector creates a new CargoCollector.
func NewCargoCollector() *CargoCollector {
return &CargoCollector{
client: &http.Client{},
}
}
// Collect fetches a cargo package and returns a DataNode.
func (c *CargoCollector) Collect(crateName string) (*datanode.DataNode, error) {
meta, err := c.fetchCrateMetadata(crateName)
if err != nil {
return nil, fmt.Errorf("could not fetch crate metadata: %w", err)
}
dn := datanode.New()
metadata, err := json.MarshalIndent(meta, "", " ")
if err != nil {
return nil, fmt.Errorf("could not marshal metadata: %w", err)
}
dn.AddData("metadata.json", metadata)
for _, version := range meta.Versions {
if err := c.fetchAndAddCrate(dn, version.DlPath, version.Num+".crate"); err != nil {
return nil, fmt.Errorf("could not fetch crate for version %s: %w", version.Num, err)
}
}
return dn, nil
}
func (c *CargoCollector) fetchCrateMetadata(crateName string) (*CargoCrate, error) {
req, err := http.NewRequest("GET", fmt.Sprintf("%s/crates/%s", CargoRegistryURL, crateName), nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "git/oxide-0.38.0")
resp, err := c.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("bad status: %s", resp.Status)
}
var crate CargoCrate
if err := json.NewDecoder(resp.Body).Decode(&crate); err != nil {
return nil, err
}
return &crate, nil
}
func (c *CargoCollector) fetchAndAddCrate(dn *datanode.DataNode, downloadURL, filename string) error {
req, err := http.NewRequest("GET", fmt.Sprintf("https://crates.io%s", downloadURL), nil)
if err != nil {
return err
}
req.Header.Set("User-Agent", "git/oxide-0.38.0")
resp, err := c.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status: %s", resp.Status)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
dn.AddData(filename, data)
return nil
}
// CargoCrate represents the metadata for a cargo crate.
type CargoCrate struct {
Crate CargoCrateData `json:"crate"`
Versions []CargoVersionData `json:"versions"`
}
// CargoCrateData represents the metadata for a cargo crate.
type CargoCrateData struct {
Name string `json:"name"`
}
// CargoVersionData represents the metadata for a specific version of a cargo crate.
type CargoVersionData struct {
Num string `json:"num"`
DlPath string `json:"dl_path"`
}

50
pkg/collect/cargo_test.go Normal file
View file

@ -0,0 +1,50 @@
package collect
import (
"bytes"
"io"
"net/http"
"strings"
"testing"
)
func TestCargoCollector_Collect(t *testing.T) {
client := &http.Client{
Transport: &mockHTTPClient{
responses: map[string]*http.Response{
"https://crates.io/api/v1/crates/monero-rs": {
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(`{
"crate": {
"name": "monero-rs"
},
"versions": [
{
"num": "0.1.0",
"dl_path": "/api/v1/crates/monero-rs/0.1.0/download"
}
]
}`)),
},
"https://crates.io/api/v1/crates/monero-rs/0.1.0/download": {
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewReader([]byte("crate content"))),
},
},
},
}
collector := &CargoCollector{client: client}
dn, err := collector.Collect("monero-rs")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if _, err := dn.Stat("metadata.json"); err != nil {
t.Errorf("expected metadata.json to exist")
}
if _, err := dn.Stat("0.1.0.crate"); err != nil {
t.Errorf("expected 0.1.0.crate to exist")
}
}

81
pkg/collect/go.go Normal file
View file

@ -0,0 +1,81 @@
package collect
import (
"fmt"
"io"
"net/http"
"strings"
"github.com/Snider/Borg/pkg/datanode"
)
// GoProxyURL is the base URL for the Go module proxy.
const GoProxyURL = "https://proxy.golang.org"
// GoCollector is a collector for Go modules.
type GoCollector struct {
client *http.Client
}
// NewGoCollector creates a new GoCollector.
func NewGoCollector() *GoCollector {
return &GoCollector{
client: http.DefaultClient,
}
}
// Collect fetches a Go module and returns a DataNode.
func (c *GoCollector) Collect(modulePath string) (*datanode.DataNode, error) {
versions, err := c.fetchModuleVersions(modulePath)
if err != nil {
return nil, fmt.Errorf("could not fetch module versions: %w", err)
}
dn := datanode.New()
for _, version := range versions {
if err := c.fetchAndAddSource(dn, modulePath, version); err != nil {
return nil, fmt.Errorf("could not fetch source for version %s: %w", version, err)
}
}
return dn, nil
}
func (c *GoCollector) fetchModuleVersions(modulePath string) ([]string, error) {
resp, err := c.client.Get(fmt.Sprintf("%s/%s/@v/list", GoProxyURL, modulePath))
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("bad status: %s", resp.Status)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return strings.Split(string(body), "\n"), nil
}
func (c *GoCollector) fetchAndAddSource(dn *datanode.DataNode, modulePath, version string) error {
resp, err := c.client.Get(fmt.Sprintf("%s/%s/@v/%s.zip", GoProxyURL, modulePath, version))
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status: %s", resp.Status)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
dn.AddData(version+".zip", data)
return nil
}

52
pkg/collect/go_test.go Normal file
View file

@ -0,0 +1,52 @@
package collect
import (
"bytes"
"io"
"net/http"
"strings"
"testing"
)
type mockGoHTTPClient struct {
responses map[string]*http.Response
}
func (c *mockGoHTTPClient) RoundTrip(req *http.Request) (*http.Response, error) {
return c.responses[req.URL.String()], nil
}
func TestGoCollector_Collect(t *testing.T) {
client := &http.Client{
Transport: &mockGoHTTPClient{
responses: map[string]*http.Response{
"https://proxy.golang.org/github.com/monero-ecosystem/go-monero/@v/list": {
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader("v0.1.0\nv0.2.0")),
},
"https://proxy.golang.org/github.com/monero-ecosystem/go-monero/@v/v0.1.0.zip": {
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewReader([]byte("zip content v0.1.0"))),
},
"https://proxy.golang.org/github.com/monero-ecosystem/go-monero/@v/v0.2.0.zip": {
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewReader([]byte("zip content v0.2.0"))),
},
},
},
}
collector := &GoCollector{client: client}
dn, err := collector.Collect("github.com/monero-ecosystem/go-monero")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if _, err := dn.Stat("v0.1.0.zip"); err != nil {
t.Errorf("expected v0.1.0.zip to exist")
}
if _, err := dn.Stat("v0.2.0.zip"); err != nil {
t.Errorf("expected v0.2.0.zip to exist")
}
}

104
pkg/collect/npm.go Normal file
View file

@ -0,0 +1,104 @@
package collect
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"github.com/Snider/Borg/pkg/datanode"
)
// NPMRegistryURL is the base URL for the npm registry.
const NPMRegistryURL = "https://registry.npmjs.org"
// NPMCollector is a collector for npm packages.
type NPMCollector struct {
client *http.Client
}
// NewNPMCollector creates a new NPMCollector.
func NewNPMCollector() *NPMCollector {
return &NPMCollector{
client: http.DefaultClient,
}
}
// Collect fetches an npm package and returns a DataNode.
func (c *NPMCollector) Collect(packageName string) (*datanode.DataNode, error) {
meta, err := c.fetchPackageMetadata(packageName)
if err != nil {
return nil, fmt.Errorf("could not fetch package metadata: %w", err)
}
dn := datanode.New()
metadata, err := json.MarshalIndent(meta, "", " ")
if err != nil {
return nil, fmt.Errorf("could not marshal metadata: %w", err)
}
dn.AddData("metadata.json", metadata)
for version, data := range meta.Versions {
if err := c.fetchAndAddTarball(dn, data.Dist.Tarball, version+".tgz"); err != nil {
// It is a valid use case to only collect metadata
log.Printf("could not fetch tarball for version %s: %v", version, err)
}
}
return dn, nil
}
func (c *NPMCollector) fetchAndAddTarball(dn *datanode.DataNode, url, filename string) error {
resp, err := c.client.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status: %s", resp.Status)
}
data, err := c.readBody(resp.Body)
if err != nil {
return err
}
dn.AddData(filename, data)
return nil
}
func (c *NPMCollector) fetchPackageMetadata(packageName string) (*NPMPackage, error) {
resp, err := c.client.Get(fmt.Sprintf("%s/%s", NPMRegistryURL, packageName))
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("bad status: %s", resp.Status)
}
var pkg NPMPackage
if err := json.NewDecoder(resp.Body).Decode(&pkg); err != nil {
return nil, err
}
return &pkg, nil
}
func (c *NPMCollector) readBody(body io.Reader) ([]byte, error) {
return io.ReadAll(body)
}
// NPMPackage represents the metadata for an npm package.
type NPMPackage struct {
Name string `json:"name"`
Versions map[string]NPMVersionData `json:"versions"`
}
// NPMVersionData represents the metadata for a specific version of an npm package.
type NPMVersionData struct {
Dist struct {
Tarball string `json:"tarball"`
} `json:"dist"`
}

57
pkg/collect/npm_test.go Normal file
View file

@ -0,0 +1,57 @@
package collect
import (
"bytes"
"io"
"net/http"
"strings"
"testing"
)
type mockHTTPClient struct {
responses map[string]*http.Response
}
func (c *mockHTTPClient) RoundTrip(req *http.Request) (*http.Response, error) {
return c.responses[req.URL.String()], nil
}
func TestNPMCollector_Collect(t *testing.T) {
client := &http.Client{
Transport: &mockHTTPClient{
responses: map[string]*http.Response{
"https://registry.npmjs.org/@monero-project/monero-ts": {
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader(`{
"name": "@monero-project/monero-ts",
"versions": {
"1.0.0": {
"dist": {
"tarball": "https://registry.npmjs.org/@monero-project/monero-ts/-/monero-ts-1.0.0.tgz"
}
}
}
}`)),
},
"https://registry.npmjs.org/@monero-project/monero-ts/-/monero-ts-1.0.0.tgz": {
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewReader([]byte("tarball content"))),
},
},
},
}
collector := &NPMCollector{client: client}
dn, err := collector.Collect("@monero-project/monero-ts")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if _, err := dn.Stat("metadata.json"); err != nil {
t.Errorf("expected metadata.json to exist")
}
if _, err := dn.Stat("1.0.0.tgz"); err != nil {
t.Errorf("expected 1.0.0.tgz to exist")
}
}