feat: Add optional compression to collect commands

This change introduces optional compression to the `collect` commands. Users can now specify `--compression` with `gz` or `xz` to compress the output. The `serve` command has also been enhanced to transparently decompress and serve these files.
This commit is contained in:
google-labs-jules[bot] 2025-11-02 13:27:04 +00:00
parent 92843876cd
commit 5a864a9059
11 changed files with 182 additions and 35 deletions

View file

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/matrix"
"github.com/Snider/Borg/pkg/ui"
"github.com/Snider/Borg/pkg/vcs"
@ -21,6 +22,7 @@ var collectGithubRepoCmd = &cobra.Command{
repoURL := args[0]
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
bar := ui.NewProgressBar(-1, "Cloning repository")
defer bar.Finish()
@ -51,7 +53,20 @@ var collectGithubRepoCmd = &cobra.Command{
}
}
err = os.WriteFile(outputFile, data, 0644)
compressedData, err := compress.Compress(data, compression)
if err != nil {
fmt.Printf("Error compressing data: %v\n", err)
return
}
if outputFile == "" {
outputFile = "repo." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
fmt.Printf("Error writing DataNode to file: %v\n", err)
return
@ -63,6 +78,7 @@ var collectGithubRepoCmd = &cobra.Command{
func init() {
collectGithubCmd.AddCommand(collectGithubRepoCmd)
collectGithubRepoCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode")
collectGithubRepoCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
collectGithubRepoCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)")
collectGithubRepoCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
}

View file

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/matrix"
"github.com/Snider/Borg/pkg/pwa"
"github.com/Snider/Borg/pkg/ui"
@ -23,6 +24,7 @@ Example:
pwaURL, _ := cmd.Flags().GetString("uri")
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
if pwaURL == "" {
fmt.Println("Error: uri is required")
@ -64,7 +66,20 @@ Example:
}
}
err = os.WriteFile(outputFile, data, 0644)
compressedData, err := compress.Compress(data, compression)
if err != nil {
fmt.Printf("Error compressing data: %v\n", err)
return
}
if outputFile == "" {
outputFile = "pwa." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
fmt.Printf("Error writing PWA to file: %v\n", err)
return
@ -77,6 +92,7 @@ Example:
func init() {
collectCmd.AddCommand(collectPWACmd)
collectPWACmd.Flags().String("uri", "", "The URI of the PWA to collect")
collectPWACmd.Flags().String("output", "pwa.dat", "Output file for the DataNode")
collectPWACmd.Flags().String("output", "", "Output file for the DataNode")
collectPWACmd.Flags().String("format", "datanode", "Output format (datanode or matrix)")
collectPWACmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
}

View file

@ -4,6 +4,7 @@ import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/matrix"
"github.com/Snider/Borg/pkg/ui"
"github.com/Snider/Borg/pkg/website"
@ -22,6 +23,7 @@ var collectWebsiteCmd = &cobra.Command{
outputFile, _ := cmd.Flags().GetString("output")
depth, _ := cmd.Flags().GetInt("depth")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
bar := ui.NewProgressBar(-1, "Crawling website")
defer bar.Finish()
@ -52,7 +54,20 @@ var collectWebsiteCmd = &cobra.Command{
}
}
err = os.WriteFile(outputFile, data, 0644)
compressedData, err := compress.Compress(data, compression)
if err != nil {
fmt.Printf("Error compressing data: %v\n", err)
return
}
if outputFile == "" {
outputFile = "website." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
fmt.Printf("Error writing website to file: %v\n", err)
return
@ -64,7 +79,8 @@ var collectWebsiteCmd = &cobra.Command{
func init() {
collectCmd.AddCommand(collectWebsiteCmd)
collectWebsiteCmd.PersistentFlags().String("output", "website.dat", "Output file for the DataNode")
collectWebsiteCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
collectWebsiteCmd.PersistentFlags().Int("depth", 2, "Recursion depth for downloading")
collectWebsiteCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)")
collectWebsiteCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
}

View file

@ -6,6 +6,7 @@ import (
"os"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/tarfs"
@ -22,12 +23,18 @@ var serveCmd = &cobra.Command{
dataFile := args[0]
port, _ := cmd.Flags().GetString("port")
data, err := os.ReadFile(dataFile)
rawData, err := os.ReadFile(dataFile)
if err != nil {
fmt.Printf("Error reading data file: %v\n", err)
return
}
data, err := compress.Decompress(rawData)
if err != nil {
fmt.Printf("Error decompressing data: %v\n", err)
return
}
var fs http.FileSystem
if strings.HasSuffix(dataFile, ".matrix") {
fs, err = tarfs.New(data)

View file

@ -20,6 +20,7 @@ borg collect github repo [repository-url] [flags]
**Flags:**
- `--output string`: Output file for the DataNode (default "repo.dat")
- `--format string`: Output format (datanode or matrix) (default "datanode")
- `--compression string`: Compression format (none, gz, or xz) (default "none")
**Example:**
```
@ -39,6 +40,7 @@ borg collect website [url] [flags]
- `--output string`: Output file for the DataNode (default "website.dat")
- `--depth int`: Recursion depth for downloading (default 2)
- `--format string`: Output format (datanode or matrix) (default "datanode")
- `--compression string`: Compression format (none, gz, or xz) (default "none")
**Example:**
```
@ -58,6 +60,7 @@ borg collect pwa [flags]
- `--uri string`: The URI of the PWA to collect
- `--output string`: Output file for the DataNode (default "pwa.dat")
- `--format string`: Output format (datanode or matrix) (default "datanode")
- `--compression string`: Compression format (none, gz, or xz) (default "none")
**Example:**
```
@ -85,6 +88,23 @@ borg serve [file] [flags]
./borg serve borg.matrix --port 9999
```
## Compression
All `collect` commands support optional compression. The following compression formats are available:
- `none`: No compression (default)
- `gz`: Gzip compression
- `xz`: XZ compression
To use compression, specify the desired format with the `--compression` flag. The output filename will be automatically updated with the appropriate extension (e.g., `.gz`, `.xz`).
**Example:**
```
./borg collect github repo https://github.com/Snider/Borg --compression gz
```
The `serve` command can transparently serve compressed files.
## Terminal Isolation Matrix
The `matrix` format creates a `runc` compatible bundle. This bundle can be executed by `runc` to create a container with the collected files. This is useful for creating isolated environments for testing or analysis.

8
examples/compress_datanode.sh Executable file
View file

@ -0,0 +1,8 @@
#!/bin/bash
# Example of using the 'borg collect' command with the '--compression' flag.
# This script clones the specified Git repository and saves it as a compressed .dat file.
# The main executable 'borg' is built from the project's root.
# Make sure you have built the project by running 'go build -o borg main.go' in the root directory.
./borg collect github repo https://github.com/Snider/Borg --compression gz

View file

@ -5,6 +5,7 @@ import (
"io/fs"
"os"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/datanode"
)
@ -16,12 +17,18 @@ func main() {
datFile := os.Args[1]
data, err := os.ReadFile(datFile)
rawData, err := os.ReadFile(datFile)
if err != nil {
fmt.Printf("Error reading .dat file: %v\n", err)
os.Exit(1)
}
data, err := compress.Decompress(rawData)
if err != nil {
fmt.Printf("Error decompressing data: %v\n", err)
os.Exit(1)
}
dn, err := datanode.FromTar(data)
if err != nil {
fmt.Printf("Error creating DataNode from tarball: %v\n", err)

1
go.mod
View file

@ -33,6 +33,7 @@ require (
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/skeema/knownhosts v1.3.1 // indirect
github.com/spf13/pflag v1.0.10 // indirect
github.com/ulikunitz/xz v0.5.15 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
golang.org/x/crypto v0.43.0 // indirect
golang.org/x/sys v0.37.0 // indirect

2
go.sum
View file

@ -94,6 +94,8 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY=
github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=
github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=

64
pkg/compress/compress.go Normal file
View file

@ -0,0 +1,64 @@
package compress
import (
"bytes"
"compress/gzip"
"io"
"github.com/ulikunitz/xz"
)
// Compress compresses data using the specified format.
func Compress(data []byte, format string) ([]byte, error) {
var buf bytes.Buffer
var writer io.WriteCloser
var err error
switch format {
case "gz":
writer = gzip.NewWriter(&buf)
case "xz":
writer, err = xz.NewWriter(&buf)
if err != nil {
return nil, err
}
default:
return data, nil
}
_, err = writer.Write(data)
if err != nil {
return nil, err
}
err = writer.Close()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// Decompress decompresses data, detecting the format automatically.
func Decompress(data []byte) ([]byte, error) {
// Check for gzip header
if len(data) > 2 && data[0] == 0x1f && data[1] == 0x8b {
reader, err := gzip.NewReader(bytes.NewReader(data))
if err != nil {
return nil, err
}
defer reader.Close()
return io.ReadAll(reader)
}
// Check for xz header
if len(data) > 6 && data[0] == 0xfd && data[1] == '7' && data[2] == 'z' && data[3] == 'X' && data[4] == 'Z' && data[5] == 0x00 {
reader, err := xz.NewReader(bytes.NewReader(data))
if err != nil {
return nil, err
}
return io.ReadAll(reader)
}
return data, nil
}

View file

@ -13,15 +13,13 @@ import (
// TarFS is a http.FileSystem that serves files from a tar archive.
type TarFS struct {
files map[string]*tar.Header
data []byte
files map[string]*tarFile
}
// New creates a new TarFS from a tar archive.
func New(data []byte) (*TarFS, error) {
fs := &TarFS{
files: make(map[string]*tar.Header),
data: data,
files: make(map[string]*tarFile),
}
tr := tar.NewReader(bytes.NewReader(data))
@ -35,7 +33,15 @@ func New(data []byte) (*TarFS, error) {
}
if strings.HasPrefix(hdr.Name, "rootfs/") {
fs.files[strings.TrimPrefix(hdr.Name, "rootfs/")] = hdr
content, err := io.ReadAll(tr)
if err != nil {
return nil, err
}
fs.files[strings.TrimPrefix(hdr.Name, "rootfs/")] = &tarFile{
header: hdr,
content: bytes.NewReader(content),
modTime: hdr.ModTime,
}
}
}
@ -45,26 +51,10 @@ func New(data []byte) (*TarFS, error) {
// Open opens a file from the tar archive.
func (fs *TarFS) Open(name string) (http.File, error) {
name = strings.TrimPrefix(name, "/")
if hdr, ok := fs.files[name]; ok {
// This is a bit inefficient, but it's the simplest way to
// get the file content without pre-indexing everything.
tr := tar.NewReader(bytes.NewReader(fs.data))
for {
h, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if h.Name == hdr.Name {
return &tarFile{
header: hdr,
content: tr,
modTime: hdr.ModTime,
}, nil
}
}
if file, ok := fs.files[name]; ok {
// Reset the reader to the beginning of the file
file.content.Seek(0, 0)
return file, nil
}
return nil, os.ErrNotExist
@ -73,14 +63,14 @@ func (fs *TarFS) Open(name string) (http.File, error) {
// tarFile is a http.File that represents a file in a tar archive.
type tarFile struct {
header *tar.Header
content io.Reader
content *bytes.Reader
modTime time.Time
}
func (f *tarFile) Close() error { return nil }
func (f *tarFile) Read(p []byte) (int, error) { return f.content.Read(p) }
func (f *tarFile) Seek(offset int64, whence int) (int64, error) {
return 0, io.ErrUnexpectedEOF
return f.content.Seek(offset, whence)
}
func (f *tarFile) Readdir(count int) ([]os.FileInfo, error) {