From 4ae69ae74c78298aa76d7cfdec02ff3dced3bc24 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 12:23:25 +0000 Subject: [PATCH 1/4] feat: Add documentation and examples This change adds a 'docs/README.md' file with command-line documentation and an 'examples' directory containing scripts to demonstrate the tool's usage. It also includes a Go program to inspect the contents of a DataNode. --- docs/README.md | 97 +++++++++++++++++++++++++++++++++ examples/collect_github_repo.sh | 8 +++ examples/collect_pwa.sh | 8 +++ examples/collect_website.sh | 8 +++ examples/inspect_datanode.go | 43 +++++++++++++++ 5 files changed, 164 insertions(+) create mode 100644 docs/README.md create mode 100755 examples/collect_github_repo.sh create mode 100755 examples/collect_pwa.sh create mode 100755 examples/collect_website.sh create mode 100644 examples/inspect_datanode.go diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..ca38516 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,97 @@ +# Borg Data Collector + +Borg Data Collector is a command-line tool for collecting and managing data from various sources. + +## Commands + +### `collect` + +This command is used to collect resources from different sources and store them in a DataNode. + +#### `collect github repo` + +Collects a single Git repository and stores it in a DataNode. + +**Usage:** +``` +borg collect github repo [repository-url] [flags] +``` + +**Flags:** +- `--output string`: Output file for the DataNode (default "repo.dat") + +**Example:** +``` +./borg collect github repo https://github.com/Snider/Borg --output borg.dat +``` + +#### `collect website` + +Collects a single website and stores it in a DataNode. + +**Usage:** +``` +borg collect website [url] [flags] +``` + +**Flags:** +- `--output string`: Output file for the DataNode (default "website.dat") +- `--depth int`: Recursion depth for downloading (default 2) + +**Example:** +``` +./borg collect website https://google.com --output website.dat --depth 1 +``` + +#### `collect pwa` + +Collects a single PWA and stores it in a DataNode. + +**Usage:** +``` +borg collect pwa [flags] +``` + +**Flags:** +- `--uri string`: The URI of the PWA to collect +- `--output string`: Output file for the DataNode (default "pwa.dat") + +**Example:** +``` +./borg collect pwa --uri https://squoosh.app --output squoosh.dat +``` + +### `serve` + +Serves the contents of a packaged DataNode file using a static file server. + +**Usage:** +``` +borg serve [file] [flags] +``` + +**Flags:** +- `--port string`: Port to serve the DataNode on (default "8080") + +**Example:** +``` +./borg serve squoosh.dat --port 8888 +``` + +## Inspecting a DataNode + +The `examples` directory contains a Go program that can be used to inspect the contents of a `.dat` file. + +**Usage:** +``` +go run examples/inspect_datanode.go +``` + +**Example:** +``` +# First, create a .dat file +./borg collect github repo https://github.com/Snider/Borg --output borg.dat + +# Then, inspect it +go run examples/inspect_datanode.go borg.dat +``` diff --git a/examples/collect_github_repo.sh b/examples/collect_github_repo.sh new file mode 100755 index 0000000..a07b047 --- /dev/null +++ b/examples/collect_github_repo.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Example of using the 'borg collect github repo' command. + +# This script clones the specified Git repository and saves it as a .dat file. +# The main executable 'borg' is built from the project's root. +# Make sure you have built the project by running 'go build -o borg main.go' in the root directory. + +./borg collect github repo https://github.com/Snider/Borg --output borg.dat diff --git a/examples/collect_pwa.sh b/examples/collect_pwa.sh new file mode 100755 index 0000000..bbcc53b --- /dev/null +++ b/examples/collect_pwa.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Example of using the 'borg collect pwa' command. + +# This script downloads the specified PWA and saves it as a .dat file. +# The main executable 'borg' is built from the project's root. +# Make sure you have built the project by running 'go build -o borg main.go' in the root directory. + +./borg collect pwa --uri https://squoosh.app --output squoosh.dat diff --git a/examples/collect_website.sh b/examples/collect_website.sh new file mode 100755 index 0000000..fa80dd1 --- /dev/null +++ b/examples/collect_website.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Example of using the 'borg collect website' command. + +# This script crawls the specified website and saves it as a .dat file. +# The main executable 'borg' is built from the project's root. +# Make sure you have built the project by running 'go build -o borg main.go' in the root directory. + +./borg collect website https://google.com --output website.dat --depth 1 diff --git a/examples/inspect_datanode.go b/examples/inspect_datanode.go new file mode 100644 index 0000000..9c97ede --- /dev/null +++ b/examples/inspect_datanode.go @@ -0,0 +1,43 @@ +package main + +import ( + "fmt" + "io/fs" + "os" + + "github.com/Snider/Borg/pkg/datanode" +) + +func main() { + if len(os.Args) != 2 { + fmt.Println("Usage: go run inspect_datanode.go ") + os.Exit(1) + } + + datFile := os.Args[1] + + data, err := os.ReadFile(datFile) + if err != nil { + fmt.Printf("Error reading .dat file: %v\n", err) + os.Exit(1) + } + + dn, err := datanode.FromTar(data) + if err != nil { + fmt.Printf("Error creating DataNode from tarball: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Contents of %s:\n", datFile) + err = dn.Walk(".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + fmt.Println(path) + return nil + }) + if err != nil { + fmt.Printf("Error walking DataNode: %v\n", err) + os.Exit(1) + } +} From 4e5257ce4a01ce90e987ae377d405eb1d556fb63 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 12:39:46 +0000 Subject: [PATCH 2/4] feat: Add Terminal Isolation Matrix feature This change introduces the "Terminal Isolation Matrix", a new output format that creates a runc-compatible container bundle. This allows for the collected files to be run in an isolated environment. A --format flag has been added to all collect commands to support this new format. --- cmd/collect_github_repo.go | 25 ++++- cmd/collect_pwa.go | 27 +++++- cmd/collect_website.go | 27 +++++- docs/README.md | 27 ++++++ examples/create_matrix.sh | 8 ++ pkg/matrix/config.go | 190 +++++++++++++++++++++++++++++++++++++ pkg/matrix/matrix.go | 116 ++++++++++++++++++++++ 7 files changed, 406 insertions(+), 14 deletions(-) create mode 100755 examples/create_matrix.sh create mode 100644 pkg/matrix/config.go create mode 100644 pkg/matrix/matrix.go diff --git a/cmd/collect_github_repo.go b/cmd/collect_github_repo.go index e66f7aa..e48e505 100644 --- a/cmd/collect_github_repo.go +++ b/cmd/collect_github_repo.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/Snider/Borg/pkg/matrix" "github.com/Snider/Borg/pkg/ui" "github.com/Snider/Borg/pkg/vcs" @@ -19,6 +20,7 @@ var collectGithubRepoCmd = &cobra.Command{ Run: func(cmd *cobra.Command, args []string) { repoURL := args[0] outputFile, _ := cmd.Flags().GetString("output") + format, _ := cmd.Flags().GetString("format") bar := ui.NewProgressBar(-1, "Cloning repository") defer bar.Finish() @@ -29,10 +31,24 @@ var collectGithubRepoCmd = &cobra.Command{ return } - data, err := dn.ToTar() - if err != nil { - fmt.Printf("Error serializing DataNode: %v\n", err) - return + var data []byte + if format == "matrix" { + matrix, err := matrix.FromDataNode(dn) + if err != nil { + fmt.Printf("Error creating matrix: %v\n", err) + return + } + data, err = matrix.ToTar() + if err != nil { + fmt.Printf("Error serializing matrix: %v\n", err) + return + } + } else { + data, err = dn.ToTar() + if err != nil { + fmt.Printf("Error serializing DataNode: %v\n", err) + return + } } err = os.WriteFile(outputFile, data, 0644) @@ -48,4 +64,5 @@ var collectGithubRepoCmd = &cobra.Command{ func init() { collectGithubCmd.AddCommand(collectGithubRepoCmd) collectGithubRepoCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode") + collectGithubRepoCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)") } diff --git a/cmd/collect_pwa.go b/cmd/collect_pwa.go index 000ac53..7c371ff 100644 --- a/cmd/collect_pwa.go +++ b/cmd/collect_pwa.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/Snider/Borg/pkg/matrix" "github.com/Snider/Borg/pkg/pwa" "github.com/Snider/Borg/pkg/ui" @@ -21,6 +22,7 @@ Example: Run: func(cmd *cobra.Command, args []string) { pwaURL, _ := cmd.Flags().GetString("uri") outputFile, _ := cmd.Flags().GetString("output") + format, _ := cmd.Flags().GetString("format") if pwaURL == "" { fmt.Println("Error: uri is required") @@ -42,13 +44,27 @@ Example: return } - pwaData, err := dn.ToTar() - if err != nil { - fmt.Printf("Error converting PWA to bytes: %v\n", err) - return + var data []byte + if format == "matrix" { + matrix, err := matrix.FromDataNode(dn) + if err != nil { + fmt.Printf("Error creating matrix: %v\n", err) + return + } + data, err = matrix.ToTar() + if err != nil { + fmt.Printf("Error serializing matrix: %v\n", err) + return + } + } else { + data, err = dn.ToTar() + if err != nil { + fmt.Printf("Error serializing DataNode: %v\n", err) + return + } } - err = os.WriteFile(outputFile, pwaData, 0644) + err = os.WriteFile(outputFile, data, 0644) if err != nil { fmt.Printf("Error writing PWA to file: %v\n", err) return @@ -62,4 +78,5 @@ func init() { collectCmd.AddCommand(collectPWACmd) collectPWACmd.Flags().String("uri", "", "The URI of the PWA to collect") collectPWACmd.Flags().String("output", "pwa.dat", "Output file for the DataNode") + collectPWACmd.Flags().String("format", "datanode", "Output format (datanode or matrix)") } diff --git a/cmd/collect_website.go b/cmd/collect_website.go index b11803f..1a964af 100644 --- a/cmd/collect_website.go +++ b/cmd/collect_website.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/Snider/Borg/pkg/matrix" "github.com/Snider/Borg/pkg/ui" "github.com/Snider/Borg/pkg/website" @@ -20,6 +21,7 @@ var collectWebsiteCmd = &cobra.Command{ websiteURL := args[0] outputFile, _ := cmd.Flags().GetString("output") depth, _ := cmd.Flags().GetInt("depth") + format, _ := cmd.Flags().GetString("format") bar := ui.NewProgressBar(-1, "Crawling website") defer bar.Finish() @@ -30,13 +32,27 @@ var collectWebsiteCmd = &cobra.Command{ return } - websiteData, err := dn.ToTar() - if err != nil { - fmt.Printf("Error converting website to bytes: %v\n", err) - return + var data []byte + if format == "matrix" { + matrix, err := matrix.FromDataNode(dn) + if err != nil { + fmt.Printf("Error creating matrix: %v\n", err) + return + } + data, err = matrix.ToTar() + if err != nil { + fmt.Printf("Error serializing matrix: %v\n", err) + return + } + } else { + data, err = dn.ToTar() + if err != nil { + fmt.Printf("Error serializing DataNode: %v\n", err) + return + } } - err = os.WriteFile(outputFile, websiteData, 0644) + err = os.WriteFile(outputFile, data, 0644) if err != nil { fmt.Printf("Error writing website to file: %v\n", err) return @@ -50,4 +66,5 @@ func init() { collectCmd.AddCommand(collectWebsiteCmd) collectWebsiteCmd.PersistentFlags().String("output", "website.dat", "Output file for the DataNode") collectWebsiteCmd.PersistentFlags().Int("depth", 2, "Recursion depth for downloading") + collectWebsiteCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)") } diff --git a/docs/README.md b/docs/README.md index ca38516..aea826a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -19,6 +19,7 @@ borg collect github repo [repository-url] [flags] **Flags:** - `--output string`: Output file for the DataNode (default "repo.dat") +- `--format string`: Output format (datanode or matrix) (default "datanode") **Example:** ``` @@ -37,6 +38,7 @@ borg collect website [url] [flags] **Flags:** - `--output string`: Output file for the DataNode (default "website.dat") - `--depth int`: Recursion depth for downloading (default 2) +- `--format string`: Output format (datanode or matrix) (default "datanode") **Example:** ``` @@ -55,6 +57,7 @@ borg collect pwa [flags] **Flags:** - `--uri string`: The URI of the PWA to collect - `--output string`: Output file for the DataNode (default "pwa.dat") +- `--format string`: Output format (datanode or matrix) (default "datanode") **Example:** ``` @@ -78,6 +81,30 @@ borg serve [file] [flags] ./borg serve squoosh.dat --port 8888 ``` +## Terminal Isolation Matrix + +The `matrix` format creates a `runc` compatible bundle. This bundle can be executed by `runc` to create a container with the collected files. This is useful for creating isolated environments for testing or analysis. + +To create a Matrix, use the `--format matrix` flag with any of the `collect` subcommands. + +**Example:** +``` +./borg collect github repo https://github.com/Snider/Borg --output borg.matrix --format matrix +``` + +You can then execute the Matrix with `runc`: +``` +# Create a directory for the bundle +mkdir borg-bundle + +# Unpack the matrix into the bundle directory +tar -xf borg.matrix -C borg-bundle + +# Run the bundle +cd borg-bundle +runc run borg +``` + ## Inspecting a DataNode The `examples` directory contains a Go program that can be used to inspect the contents of a `.dat` file. diff --git a/examples/create_matrix.sh b/examples/create_matrix.sh new file mode 100755 index 0000000..db19b48 --- /dev/null +++ b/examples/create_matrix.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Example of using the 'borg collect' command with the '--format matrix' flag. + +# This script clones the specified Git repository and saves it as a .matrix file. +# The main executable 'borg' is built from the project's root. +# Make sure you have built the project by running 'go build -o borg main.go' in the root directory. + +./borg collect github repo https://github.com/Snider/Borg --output borg.matrix --format matrix diff --git a/pkg/matrix/config.go b/pkg/matrix/config.go new file mode 100644 index 0000000..3ba19f3 --- /dev/null +++ b/pkg/matrix/config.go @@ -0,0 +1,190 @@ +package matrix + +import ( + "encoding/json" +) + +// This is the default runc spec, generated by `runc spec`. +const defaultConfigJSON = `{ + "ociVersion": "1.2.1", + "process": { + "terminal": true, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ + "sh" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "noNewPrivileges": true + }, + "root": { + "path": "rootfs", + "readonly": true + }, + "hostname": "runc", + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid":5 + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "linux": { + "resources": { + "devices": [ + { + "allow": false, + "access": "rwm" + } + ] + }, + "namespaces": [ + { + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + }, + { + "type": "cgroup" + } + ], + "maskedPaths": [ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/proc/scsi" + ], + "readonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +}` + +// defaultConfig returns the default runc spec. +func defaultConfig() (map[string]interface{}, error) { + var spec map[string]interface{} + err := json.Unmarshal([]byte(defaultConfigJSON), &spec) + if err != nil { + return nil, err + } + return spec, nil +} diff --git a/pkg/matrix/matrix.go b/pkg/matrix/matrix.go new file mode 100644 index 0000000..3738577 --- /dev/null +++ b/pkg/matrix/matrix.go @@ -0,0 +1,116 @@ +package matrix + +import ( + "archive/tar" + "bytes" + "encoding/json" + "io/fs" + + "github.com/Snider/Borg/pkg/datanode" +) + +// TerminalIsolationMatrix represents a runc bundle. +type TerminalIsolationMatrix struct { + Config []byte + RootFS *datanode.DataNode +} + +// New creates a new, empty TerminalIsolationMatrix. +func New() (*TerminalIsolationMatrix, error) { + // Use the default runc spec as a starting point. + // This can be customized later. + spec, err := defaultConfig() + if err != nil { + return nil, err + } + + specBytes, err := json.Marshal(spec) + if err != nil { + return nil, err + } + + return &TerminalIsolationMatrix{ + Config: specBytes, + RootFS: datanode.New(), + }, nil +} + +// FromDataNode creates a new TerminalIsolationMatrix from a DataNode. +func FromDataNode(dn *datanode.DataNode) (*TerminalIsolationMatrix, error) { + m, err := New() + if err != nil { + return nil, err + } + m.RootFS = dn + return m, nil +} + +// ToTar serializes the TerminalIsolationMatrix to a tarball. +func (m *TerminalIsolationMatrix) ToTar() ([]byte, error) { + buf := new(bytes.Buffer) + tw := tar.NewWriter(buf) + + // Add the config.json file. + hdr := &tar.Header{ + Name: "config.json", + Mode: 0600, + Size: int64(len(m.Config)), + } + if err := tw.WriteHeader(hdr); err != nil { + return nil, err + } + if _, err := tw.Write(m.Config); err != nil { + return nil, err + } + + // Add the rootfs files. + err := m.RootFS.Walk(".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if d.IsDir() { + return nil + } + + file, err := m.RootFS.Open(path) + if err != nil { + return err + } + defer file.Close() + + info, err := file.Stat() + if err != nil { + return err + } + + hdr := &tar.Header{ + Name: "rootfs/" + path, + Mode: 0600, + Size: info.Size(), + } + if err := tw.WriteHeader(hdr); err != nil { + return err + } + + buf := new(bytes.Buffer) + if _, err := buf.ReadFrom(file); err != nil { + return err + } + + if _, err := tw.Write(buf.Bytes()); err != nil { + return err + } + + return nil + }) + if err != nil { + return nil, err + } + + if err := tw.Close(); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} From 92843876cda177c8f54c561b4c1a74eeb1067d90 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 13:03:48 +0000 Subject: [PATCH 3/4] feat: Enhance serve command to support Matrix files This change enhances the 'serve' command to support serving files directly from a Terminal Isolation Matrix. It introduces a new 'pkg/tarfs' package that provides an http.FileSystem implementation for tar archives, allowing for a "passthrough" server that serves files directly from the Matrix bundle. --- cmd/serve.go | 28 +++++++--- docs/README.md | 6 ++- examples/serve_matrix.sh | 12 +++++ pkg/tarfs/tarfs.go | 110 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+), 9 deletions(-) create mode 100755 examples/serve_matrix.sh create mode 100644 pkg/tarfs/tarfs.go diff --git a/cmd/serve.go b/cmd/serve.go index 40dd400..531e719 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -4,8 +4,10 @@ import ( "fmt" "net/http" "os" + "strings" "github.com/Snider/Borg/pkg/datanode" + "github.com/Snider/Borg/pkg/tarfs" "github.com/spf13/cobra" ) @@ -17,22 +19,32 @@ var serveCmd = &cobra.Command{ Long: `Serves the contents of a packaged PWA file using a static file server.`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { - pwaFile := args[0] + dataFile := args[0] port, _ := cmd.Flags().GetString("port") - pwaData, err := os.ReadFile(pwaFile) + data, err := os.ReadFile(dataFile) if err != nil { - fmt.Printf("Error reading PWA file: %v\n", err) + fmt.Printf("Error reading data file: %v\n", err) return } - dn, err := datanode.FromTar(pwaData) - if err != nil { - fmt.Printf("Error creating DataNode from tarball: %v\n", err) - return + var fs http.FileSystem + if strings.HasSuffix(dataFile, ".matrix") { + fs, err = tarfs.New(data) + if err != nil { + fmt.Printf("Error creating TarFS from matrix tarball: %v\n", err) + return + } + } else { + dn, err := datanode.FromTar(data) + if err != nil { + fmt.Printf("Error creating DataNode from tarball: %v\n", err) + return + } + fs = http.FS(dn) } - http.Handle("/", http.FileServer(http.FS(dn))) + http.Handle("/", http.FileServer(fs)) fmt.Printf("Serving PWA on http://localhost:%s\n", port) err = http.ListenAndServe(":"+port, nil) diff --git a/docs/README.md b/docs/README.md index aea826a..fe82825 100644 --- a/docs/README.md +++ b/docs/README.md @@ -66,7 +66,7 @@ borg collect pwa [flags] ### `serve` -Serves the contents of a packaged DataNode file using a static file server. +Serves the contents of a packaged DataNode or Terminal Isolation Matrix file using a static file server. **Usage:** ``` @@ -78,7 +78,11 @@ borg serve [file] [flags] **Example:** ``` +# Serve a DataNode ./borg serve squoosh.dat --port 8888 + +# Serve a Terminal Isolation Matrix +./borg serve borg.matrix --port 9999 ``` ## Terminal Isolation Matrix diff --git a/examples/serve_matrix.sh b/examples/serve_matrix.sh new file mode 100755 index 0000000..2ecf613 --- /dev/null +++ b/examples/serve_matrix.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Example of using the 'borg serve' command with a .matrix file. + +# This script serves the contents of a .matrix file using a static file server. +# The main executable 'borg' is built from the project's root. +# Make sure you have built the project by running 'go build -o borg main.go' in the root directory. + +# First, create a .matrix file +./borg collect github repo https://github.com/Snider/Borg --output borg.matrix --format matrix + +# Then, serve it +./borg serve borg.matrix --port 9999 diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go new file mode 100644 index 0000000..9a4b440 --- /dev/null +++ b/pkg/tarfs/tarfs.go @@ -0,0 +1,110 @@ +package tarfs + +import ( + "archive/tar" + "bytes" + "io" + "net/http" + "os" + "path" + "strings" + "time" +) + +// TarFS is a http.FileSystem that serves files from a tar archive. +type TarFS struct { + files map[string]*tar.Header + data []byte +} + +// New creates a new TarFS from a tar archive. +func New(data []byte) (*TarFS, error) { + fs := &TarFS{ + files: make(map[string]*tar.Header), + data: data, + } + + tr := tar.NewReader(bytes.NewReader(data)) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + if strings.HasPrefix(hdr.Name, "rootfs/") { + fs.files[strings.TrimPrefix(hdr.Name, "rootfs/")] = hdr + } + } + + return fs, nil +} + +// Open opens a file from the tar archive. +func (fs *TarFS) Open(name string) (http.File, error) { + name = strings.TrimPrefix(name, "/") + if hdr, ok := fs.files[name]; ok { + // This is a bit inefficient, but it's the simplest way to + // get the file content without pre-indexing everything. + tr := tar.NewReader(bytes.NewReader(fs.data)) + for { + h, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if h.Name == hdr.Name { + return &tarFile{ + header: hdr, + content: tr, + modTime: hdr.ModTime, + }, nil + } + } + } + + return nil, os.ErrNotExist +} + +// tarFile is a http.File that represents a file in a tar archive. +type tarFile struct { + header *tar.Header + content io.Reader + modTime time.Time +} + +func (f *tarFile) Close() error { return nil } +func (f *tarFile) Read(p []byte) (int, error) { return f.content.Read(p) } +func (f *tarFile) Seek(offset int64, whence int) (int64, error) { + return 0, io.ErrUnexpectedEOF +} + +func (f *tarFile) Readdir(count int) ([]os.FileInfo, error) { + return nil, os.ErrInvalid +} + +func (f *tarFile) Stat() (os.FileInfo, error) { + return &tarFileInfo{ + name: path.Base(f.header.Name), + size: f.header.Size, + modTime: f.modTime, + }, nil +} + +// tarFileInfo is a os.FileInfo that represents a file in a tar archive. +type tarFileInfo struct { + name string + size int64 + modTime time.Time +} + +func (i *tarFileInfo) Name() string { return i.name } +func (i *tarFileInfo) Size() int64 { return i.size } +func (i *tarFileInfo) Mode() os.FileMode { return 0444 } +func (i *tarFileInfo) ModTime() time.Time { return i.modTime } +func (i *tarFileInfo) IsDir() bool { return false } +func (i *tarFileInfo) Sys() interface{} { return nil } From 5a864a905946b251db3323e7bc0a227a9a4d03fc Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 2 Nov 2025 13:27:04 +0000 Subject: [PATCH 4/4] feat: Add optional compression to collect commands This change introduces optional compression to the `collect` commands. Users can now specify `--compression` with `gz` or `xz` to compress the output. The `serve` command has also been enhanced to transparently decompress and serve these files. --- cmd/collect_github_repo.go | 20 +++++++++-- cmd/collect_pwa.go | 20 +++++++++-- cmd/collect_website.go | 20 +++++++++-- cmd/serve.go | 9 ++++- docs/README.md | 20 +++++++++++ examples/compress_datanode.sh | 8 +++++ examples/inspect_datanode.go | 9 ++++- go.mod | 1 + go.sum | 2 ++ pkg/compress/compress.go | 64 +++++++++++++++++++++++++++++++++++ pkg/tarfs/tarfs.go | 44 ++++++++++-------------- 11 files changed, 182 insertions(+), 35 deletions(-) create mode 100755 examples/compress_datanode.sh create mode 100644 pkg/compress/compress.go diff --git a/cmd/collect_github_repo.go b/cmd/collect_github_repo.go index e48e505..237d9af 100644 --- a/cmd/collect_github_repo.go +++ b/cmd/collect_github_repo.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/Snider/Borg/pkg/compress" "github.com/Snider/Borg/pkg/matrix" "github.com/Snider/Borg/pkg/ui" "github.com/Snider/Borg/pkg/vcs" @@ -21,6 +22,7 @@ var collectGithubRepoCmd = &cobra.Command{ repoURL := args[0] outputFile, _ := cmd.Flags().GetString("output") format, _ := cmd.Flags().GetString("format") + compression, _ := cmd.Flags().GetString("compression") bar := ui.NewProgressBar(-1, "Cloning repository") defer bar.Finish() @@ -51,7 +53,20 @@ var collectGithubRepoCmd = &cobra.Command{ } } - err = os.WriteFile(outputFile, data, 0644) + compressedData, err := compress.Compress(data, compression) + if err != nil { + fmt.Printf("Error compressing data: %v\n", err) + return + } + + if outputFile == "" { + outputFile = "repo." + format + if compression != "none" { + outputFile += "." + compression + } + } + + err = os.WriteFile(outputFile, compressedData, 0644) if err != nil { fmt.Printf("Error writing DataNode to file: %v\n", err) return @@ -63,6 +78,7 @@ var collectGithubRepoCmd = &cobra.Command{ func init() { collectGithubCmd.AddCommand(collectGithubRepoCmd) - collectGithubRepoCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode") + collectGithubRepoCmd.PersistentFlags().String("output", "", "Output file for the DataNode") collectGithubRepoCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)") + collectGithubRepoCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)") } diff --git a/cmd/collect_pwa.go b/cmd/collect_pwa.go index 7c371ff..6dd57af 100644 --- a/cmd/collect_pwa.go +++ b/cmd/collect_pwa.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/Snider/Borg/pkg/compress" "github.com/Snider/Borg/pkg/matrix" "github.com/Snider/Borg/pkg/pwa" "github.com/Snider/Borg/pkg/ui" @@ -23,6 +24,7 @@ Example: pwaURL, _ := cmd.Flags().GetString("uri") outputFile, _ := cmd.Flags().GetString("output") format, _ := cmd.Flags().GetString("format") + compression, _ := cmd.Flags().GetString("compression") if pwaURL == "" { fmt.Println("Error: uri is required") @@ -64,7 +66,20 @@ Example: } } - err = os.WriteFile(outputFile, data, 0644) + compressedData, err := compress.Compress(data, compression) + if err != nil { + fmt.Printf("Error compressing data: %v\n", err) + return + } + + if outputFile == "" { + outputFile = "pwa." + format + if compression != "none" { + outputFile += "." + compression + } + } + + err = os.WriteFile(outputFile, compressedData, 0644) if err != nil { fmt.Printf("Error writing PWA to file: %v\n", err) return @@ -77,6 +92,7 @@ Example: func init() { collectCmd.AddCommand(collectPWACmd) collectPWACmd.Flags().String("uri", "", "The URI of the PWA to collect") - collectPWACmd.Flags().String("output", "pwa.dat", "Output file for the DataNode") + collectPWACmd.Flags().String("output", "", "Output file for the DataNode") collectPWACmd.Flags().String("format", "datanode", "Output format (datanode or matrix)") + collectPWACmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)") } diff --git a/cmd/collect_website.go b/cmd/collect_website.go index 1a964af..b3f7c37 100644 --- a/cmd/collect_website.go +++ b/cmd/collect_website.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "github.com/Snider/Borg/pkg/compress" "github.com/Snider/Borg/pkg/matrix" "github.com/Snider/Borg/pkg/ui" "github.com/Snider/Borg/pkg/website" @@ -22,6 +23,7 @@ var collectWebsiteCmd = &cobra.Command{ outputFile, _ := cmd.Flags().GetString("output") depth, _ := cmd.Flags().GetInt("depth") format, _ := cmd.Flags().GetString("format") + compression, _ := cmd.Flags().GetString("compression") bar := ui.NewProgressBar(-1, "Crawling website") defer bar.Finish() @@ -52,7 +54,20 @@ var collectWebsiteCmd = &cobra.Command{ } } - err = os.WriteFile(outputFile, data, 0644) + compressedData, err := compress.Compress(data, compression) + if err != nil { + fmt.Printf("Error compressing data: %v\n", err) + return + } + + if outputFile == "" { + outputFile = "website." + format + if compression != "none" { + outputFile += "." + compression + } + } + + err = os.WriteFile(outputFile, compressedData, 0644) if err != nil { fmt.Printf("Error writing website to file: %v\n", err) return @@ -64,7 +79,8 @@ var collectWebsiteCmd = &cobra.Command{ func init() { collectCmd.AddCommand(collectWebsiteCmd) - collectWebsiteCmd.PersistentFlags().String("output", "website.dat", "Output file for the DataNode") + collectWebsiteCmd.PersistentFlags().String("output", "", "Output file for the DataNode") collectWebsiteCmd.PersistentFlags().Int("depth", 2, "Recursion depth for downloading") collectWebsiteCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)") + collectWebsiteCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)") } diff --git a/cmd/serve.go b/cmd/serve.go index 531e719..87e225f 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -6,6 +6,7 @@ import ( "os" "strings" + "github.com/Snider/Borg/pkg/compress" "github.com/Snider/Borg/pkg/datanode" "github.com/Snider/Borg/pkg/tarfs" @@ -22,12 +23,18 @@ var serveCmd = &cobra.Command{ dataFile := args[0] port, _ := cmd.Flags().GetString("port") - data, err := os.ReadFile(dataFile) + rawData, err := os.ReadFile(dataFile) if err != nil { fmt.Printf("Error reading data file: %v\n", err) return } + data, err := compress.Decompress(rawData) + if err != nil { + fmt.Printf("Error decompressing data: %v\n", err) + return + } + var fs http.FileSystem if strings.HasSuffix(dataFile, ".matrix") { fs, err = tarfs.New(data) diff --git a/docs/README.md b/docs/README.md index fe82825..4151a29 100644 --- a/docs/README.md +++ b/docs/README.md @@ -20,6 +20,7 @@ borg collect github repo [repository-url] [flags] **Flags:** - `--output string`: Output file for the DataNode (default "repo.dat") - `--format string`: Output format (datanode or matrix) (default "datanode") +- `--compression string`: Compression format (none, gz, or xz) (default "none") **Example:** ``` @@ -39,6 +40,7 @@ borg collect website [url] [flags] - `--output string`: Output file for the DataNode (default "website.dat") - `--depth int`: Recursion depth for downloading (default 2) - `--format string`: Output format (datanode or matrix) (default "datanode") +- `--compression string`: Compression format (none, gz, or xz) (default "none") **Example:** ``` @@ -58,6 +60,7 @@ borg collect pwa [flags] - `--uri string`: The URI of the PWA to collect - `--output string`: Output file for the DataNode (default "pwa.dat") - `--format string`: Output format (datanode or matrix) (default "datanode") +- `--compression string`: Compression format (none, gz, or xz) (default "none") **Example:** ``` @@ -85,6 +88,23 @@ borg serve [file] [flags] ./borg serve borg.matrix --port 9999 ``` +## Compression + +All `collect` commands support optional compression. The following compression formats are available: + +- `none`: No compression (default) +- `gz`: Gzip compression +- `xz`: XZ compression + +To use compression, specify the desired format with the `--compression` flag. The output filename will be automatically updated with the appropriate extension (e.g., `.gz`, `.xz`). + +**Example:** +``` +./borg collect github repo https://github.com/Snider/Borg --compression gz +``` + +The `serve` command can transparently serve compressed files. + ## Terminal Isolation Matrix The `matrix` format creates a `runc` compatible bundle. This bundle can be executed by `runc` to create a container with the collected files. This is useful for creating isolated environments for testing or analysis. diff --git a/examples/compress_datanode.sh b/examples/compress_datanode.sh new file mode 100755 index 0000000..a5b95df --- /dev/null +++ b/examples/compress_datanode.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Example of using the 'borg collect' command with the '--compression' flag. + +# This script clones the specified Git repository and saves it as a compressed .dat file. +# The main executable 'borg' is built from the project's root. +# Make sure you have built the project by running 'go build -o borg main.go' in the root directory. + +./borg collect github repo https://github.com/Snider/Borg --compression gz diff --git a/examples/inspect_datanode.go b/examples/inspect_datanode.go index 9c97ede..a6c67bf 100644 --- a/examples/inspect_datanode.go +++ b/examples/inspect_datanode.go @@ -5,6 +5,7 @@ import ( "io/fs" "os" + "github.com/Snider/Borg/pkg/compress" "github.com/Snider/Borg/pkg/datanode" ) @@ -16,12 +17,18 @@ func main() { datFile := os.Args[1] - data, err := os.ReadFile(datFile) + rawData, err := os.ReadFile(datFile) if err != nil { fmt.Printf("Error reading .dat file: %v\n", err) os.Exit(1) } + data, err := compress.Decompress(rawData) + if err != nil { + fmt.Printf("Error decompressing data: %v\n", err) + os.Exit(1) + } + dn, err := datanode.FromTar(data) if err != nil { fmt.Printf("Error creating DataNode from tarball: %v\n", err) diff --git a/go.mod b/go.mod index 2b473d4..ca335d6 100644 --- a/go.mod +++ b/go.mod @@ -33,6 +33,7 @@ require ( github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/skeema/knownhosts v1.3.1 // indirect github.com/spf13/pflag v1.0.10 // indirect + github.com/ulikunitz/xz v0.5.15 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect golang.org/x/crypto v0.43.0 // indirect golang.org/x/sys v0.37.0 // indirect diff --git a/go.sum b/go.sum index 693b03f..12ad192 100644 --- a/go.sum +++ b/go.sum @@ -94,6 +94,8 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY= +github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= diff --git a/pkg/compress/compress.go b/pkg/compress/compress.go new file mode 100644 index 0000000..07e4d28 --- /dev/null +++ b/pkg/compress/compress.go @@ -0,0 +1,64 @@ +package compress + +import ( + "bytes" + "compress/gzip" + "io" + + "github.com/ulikunitz/xz" +) + +// Compress compresses data using the specified format. +func Compress(data []byte, format string) ([]byte, error) { + var buf bytes.Buffer + var writer io.WriteCloser + var err error + + switch format { + case "gz": + writer = gzip.NewWriter(&buf) + case "xz": + writer, err = xz.NewWriter(&buf) + if err != nil { + return nil, err + } + default: + return data, nil + } + + _, err = writer.Write(data) + if err != nil { + return nil, err + } + + err = writer.Close() + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// Decompress decompresses data, detecting the format automatically. +func Decompress(data []byte) ([]byte, error) { + // Check for gzip header + if len(data) > 2 && data[0] == 0x1f && data[1] == 0x8b { + reader, err := gzip.NewReader(bytes.NewReader(data)) + if err != nil { + return nil, err + } + defer reader.Close() + return io.ReadAll(reader) + } + + // Check for xz header + if len(data) > 6 && data[0] == 0xfd && data[1] == '7' && data[2] == 'z' && data[3] == 'X' && data[4] == 'Z' && data[5] == 0x00 { + reader, err := xz.NewReader(bytes.NewReader(data)) + if err != nil { + return nil, err + } + return io.ReadAll(reader) + } + + return data, nil +} diff --git a/pkg/tarfs/tarfs.go b/pkg/tarfs/tarfs.go index 9a4b440..6abbee4 100644 --- a/pkg/tarfs/tarfs.go +++ b/pkg/tarfs/tarfs.go @@ -13,15 +13,13 @@ import ( // TarFS is a http.FileSystem that serves files from a tar archive. type TarFS struct { - files map[string]*tar.Header - data []byte + files map[string]*tarFile } // New creates a new TarFS from a tar archive. func New(data []byte) (*TarFS, error) { fs := &TarFS{ - files: make(map[string]*tar.Header), - data: data, + files: make(map[string]*tarFile), } tr := tar.NewReader(bytes.NewReader(data)) @@ -35,7 +33,15 @@ func New(data []byte) (*TarFS, error) { } if strings.HasPrefix(hdr.Name, "rootfs/") { - fs.files[strings.TrimPrefix(hdr.Name, "rootfs/")] = hdr + content, err := io.ReadAll(tr) + if err != nil { + return nil, err + } + fs.files[strings.TrimPrefix(hdr.Name, "rootfs/")] = &tarFile{ + header: hdr, + content: bytes.NewReader(content), + modTime: hdr.ModTime, + } } } @@ -45,26 +51,10 @@ func New(data []byte) (*TarFS, error) { // Open opens a file from the tar archive. func (fs *TarFS) Open(name string) (http.File, error) { name = strings.TrimPrefix(name, "/") - if hdr, ok := fs.files[name]; ok { - // This is a bit inefficient, but it's the simplest way to - // get the file content without pre-indexing everything. - tr := tar.NewReader(bytes.NewReader(fs.data)) - for { - h, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - if h.Name == hdr.Name { - return &tarFile{ - header: hdr, - content: tr, - modTime: hdr.ModTime, - }, nil - } - } + if file, ok := fs.files[name]; ok { + // Reset the reader to the beginning of the file + file.content.Seek(0, 0) + return file, nil } return nil, os.ErrNotExist @@ -73,14 +63,14 @@ func (fs *TarFS) Open(name string) (http.File, error) { // tarFile is a http.File that represents a file in a tar archive. type tarFile struct { header *tar.Header - content io.Reader + content *bytes.Reader modTime time.Time } func (f *tarFile) Close() error { return nil } func (f *tarFile) Read(p []byte) (int, error) { return f.content.Read(p) } func (f *tarFile) Seek(offset int64, whence int) (int64, error) { - return 0, io.ErrUnexpectedEOF + return f.content.Seek(offset, whence) } func (f *tarFile) Readdir(count int) ([]os.FileInfo, error) {