feat: Add optional compression to collect commands
This change introduces optional compression to the `collect` commands. Users can now specify `--compression` with `gz` or `xz` to compress the output. The `serve` command has also been enhanced to transparently decompress and serve these files.
This commit is contained in:
parent
92843876cd
commit
5a864a9059
11 changed files with 182 additions and 35 deletions
|
|
@ -4,6 +4,7 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/matrix"
|
||||
"github.com/Snider/Borg/pkg/ui"
|
||||
"github.com/Snider/Borg/pkg/vcs"
|
||||
|
|
@ -21,6 +22,7 @@ var collectGithubRepoCmd = &cobra.Command{
|
|||
repoURL := args[0]
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
format, _ := cmd.Flags().GetString("format")
|
||||
compression, _ := cmd.Flags().GetString("compression")
|
||||
|
||||
bar := ui.NewProgressBar(-1, "Cloning repository")
|
||||
defer bar.Finish()
|
||||
|
|
@ -51,7 +53,20 @@ var collectGithubRepoCmd = &cobra.Command{
|
|||
}
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, data, 0644)
|
||||
compressedData, err := compress.Compress(data, compression)
|
||||
if err != nil {
|
||||
fmt.Printf("Error compressing data: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
if outputFile == "" {
|
||||
outputFile = "repo." + format
|
||||
if compression != "none" {
|
||||
outputFile += "." + compression
|
||||
}
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, compressedData, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing DataNode to file: %v\n", err)
|
||||
return
|
||||
|
|
@ -63,6 +78,7 @@ var collectGithubRepoCmd = &cobra.Command{
|
|||
|
||||
func init() {
|
||||
collectGithubCmd.AddCommand(collectGithubRepoCmd)
|
||||
collectGithubRepoCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode")
|
||||
collectGithubRepoCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
|
||||
collectGithubRepoCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)")
|
||||
collectGithubRepoCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/matrix"
|
||||
"github.com/Snider/Borg/pkg/pwa"
|
||||
"github.com/Snider/Borg/pkg/ui"
|
||||
|
|
@ -23,6 +24,7 @@ Example:
|
|||
pwaURL, _ := cmd.Flags().GetString("uri")
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
format, _ := cmd.Flags().GetString("format")
|
||||
compression, _ := cmd.Flags().GetString("compression")
|
||||
|
||||
if pwaURL == "" {
|
||||
fmt.Println("Error: uri is required")
|
||||
|
|
@ -64,7 +66,20 @@ Example:
|
|||
}
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, data, 0644)
|
||||
compressedData, err := compress.Compress(data, compression)
|
||||
if err != nil {
|
||||
fmt.Printf("Error compressing data: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
if outputFile == "" {
|
||||
outputFile = "pwa." + format
|
||||
if compression != "none" {
|
||||
outputFile += "." + compression
|
||||
}
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, compressedData, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing PWA to file: %v\n", err)
|
||||
return
|
||||
|
|
@ -77,6 +92,7 @@ Example:
|
|||
func init() {
|
||||
collectCmd.AddCommand(collectPWACmd)
|
||||
collectPWACmd.Flags().String("uri", "", "The URI of the PWA to collect")
|
||||
collectPWACmd.Flags().String("output", "pwa.dat", "Output file for the DataNode")
|
||||
collectPWACmd.Flags().String("output", "", "Output file for the DataNode")
|
||||
collectPWACmd.Flags().String("format", "datanode", "Output format (datanode or matrix)")
|
||||
collectPWACmd.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import (
|
|||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/matrix"
|
||||
"github.com/Snider/Borg/pkg/ui"
|
||||
"github.com/Snider/Borg/pkg/website"
|
||||
|
|
@ -22,6 +23,7 @@ var collectWebsiteCmd = &cobra.Command{
|
|||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
depth, _ := cmd.Flags().GetInt("depth")
|
||||
format, _ := cmd.Flags().GetString("format")
|
||||
compression, _ := cmd.Flags().GetString("compression")
|
||||
|
||||
bar := ui.NewProgressBar(-1, "Crawling website")
|
||||
defer bar.Finish()
|
||||
|
|
@ -52,7 +54,20 @@ var collectWebsiteCmd = &cobra.Command{
|
|||
}
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, data, 0644)
|
||||
compressedData, err := compress.Compress(data, compression)
|
||||
if err != nil {
|
||||
fmt.Printf("Error compressing data: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
if outputFile == "" {
|
||||
outputFile = "website." + format
|
||||
if compression != "none" {
|
||||
outputFile += "." + compression
|
||||
}
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, compressedData, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing website to file: %v\n", err)
|
||||
return
|
||||
|
|
@ -64,7 +79,8 @@ var collectWebsiteCmd = &cobra.Command{
|
|||
|
||||
func init() {
|
||||
collectCmd.AddCommand(collectWebsiteCmd)
|
||||
collectWebsiteCmd.PersistentFlags().String("output", "website.dat", "Output file for the DataNode")
|
||||
collectWebsiteCmd.PersistentFlags().String("output", "", "Output file for the DataNode")
|
||||
collectWebsiteCmd.PersistentFlags().Int("depth", 2, "Recursion depth for downloading")
|
||||
collectWebsiteCmd.PersistentFlags().String("format", "datanode", "Output format (datanode or matrix)")
|
||||
collectWebsiteCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import (
|
|||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
"github.com/Snider/Borg/pkg/tarfs"
|
||||
|
||||
|
|
@ -22,12 +23,18 @@ var serveCmd = &cobra.Command{
|
|||
dataFile := args[0]
|
||||
port, _ := cmd.Flags().GetString("port")
|
||||
|
||||
data, err := os.ReadFile(dataFile)
|
||||
rawData, err := os.ReadFile(dataFile)
|
||||
if err != nil {
|
||||
fmt.Printf("Error reading data file: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
data, err := compress.Decompress(rawData)
|
||||
if err != nil {
|
||||
fmt.Printf("Error decompressing data: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
var fs http.FileSystem
|
||||
if strings.HasSuffix(dataFile, ".matrix") {
|
||||
fs, err = tarfs.New(data)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ borg collect github repo [repository-url] [flags]
|
|||
**Flags:**
|
||||
- `--output string`: Output file for the DataNode (default "repo.dat")
|
||||
- `--format string`: Output format (datanode or matrix) (default "datanode")
|
||||
- `--compression string`: Compression format (none, gz, or xz) (default "none")
|
||||
|
||||
**Example:**
|
||||
```
|
||||
|
|
@ -39,6 +40,7 @@ borg collect website [url] [flags]
|
|||
- `--output string`: Output file for the DataNode (default "website.dat")
|
||||
- `--depth int`: Recursion depth for downloading (default 2)
|
||||
- `--format string`: Output format (datanode or matrix) (default "datanode")
|
||||
- `--compression string`: Compression format (none, gz, or xz) (default "none")
|
||||
|
||||
**Example:**
|
||||
```
|
||||
|
|
@ -58,6 +60,7 @@ borg collect pwa [flags]
|
|||
- `--uri string`: The URI of the PWA to collect
|
||||
- `--output string`: Output file for the DataNode (default "pwa.dat")
|
||||
- `--format string`: Output format (datanode or matrix) (default "datanode")
|
||||
- `--compression string`: Compression format (none, gz, or xz) (default "none")
|
||||
|
||||
**Example:**
|
||||
```
|
||||
|
|
@ -85,6 +88,23 @@ borg serve [file] [flags]
|
|||
./borg serve borg.matrix --port 9999
|
||||
```
|
||||
|
||||
## Compression
|
||||
|
||||
All `collect` commands support optional compression. The following compression formats are available:
|
||||
|
||||
- `none`: No compression (default)
|
||||
- `gz`: Gzip compression
|
||||
- `xz`: XZ compression
|
||||
|
||||
To use compression, specify the desired format with the `--compression` flag. The output filename will be automatically updated with the appropriate extension (e.g., `.gz`, `.xz`).
|
||||
|
||||
**Example:**
|
||||
```
|
||||
./borg collect github repo https://github.com/Snider/Borg --compression gz
|
||||
```
|
||||
|
||||
The `serve` command can transparently serve compressed files.
|
||||
|
||||
## Terminal Isolation Matrix
|
||||
|
||||
The `matrix` format creates a `runc` compatible bundle. This bundle can be executed by `runc` to create a container with the collected files. This is useful for creating isolated environments for testing or analysis.
|
||||
|
|
|
|||
8
examples/compress_datanode.sh
Executable file
8
examples/compress_datanode.sh
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
# Example of using the 'borg collect' command with the '--compression' flag.
|
||||
|
||||
# This script clones the specified Git repository and saves it as a compressed .dat file.
|
||||
# The main executable 'borg' is built from the project's root.
|
||||
# Make sure you have built the project by running 'go build -o borg main.go' in the root directory.
|
||||
|
||||
./borg collect github repo https://github.com/Snider/Borg --compression gz
|
||||
|
|
@ -5,6 +5,7 @@ import (
|
|||
"io/fs"
|
||||
"os"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
)
|
||||
|
||||
|
|
@ -16,12 +17,18 @@ func main() {
|
|||
|
||||
datFile := os.Args[1]
|
||||
|
||||
data, err := os.ReadFile(datFile)
|
||||
rawData, err := os.ReadFile(datFile)
|
||||
if err != nil {
|
||||
fmt.Printf("Error reading .dat file: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
data, err := compress.Decompress(rawData)
|
||||
if err != nil {
|
||||
fmt.Printf("Error decompressing data: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
dn, err := datanode.FromTar(data)
|
||||
if err != nil {
|
||||
fmt.Printf("Error creating DataNode from tarball: %v\n", err)
|
||||
|
|
|
|||
1
go.mod
1
go.mod
|
|
@ -33,6 +33,7 @@ require (
|
|||
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
|
||||
github.com/skeema/knownhosts v1.3.1 // indirect
|
||||
github.com/spf13/pflag v1.0.10 // indirect
|
||||
github.com/ulikunitz/xz v0.5.15 // indirect
|
||||
github.com/xanzy/ssh-agent v0.3.3 // indirect
|
||||
golang.org/x/crypto v0.43.0 // indirect
|
||||
golang.org/x/sys v0.37.0 // indirect
|
||||
|
|
|
|||
2
go.sum
2
go.sum
|
|
@ -94,6 +94,8 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
|
|||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/ulikunitz/xz v0.5.15 h1:9DNdB5s+SgV3bQ2ApL10xRc35ck0DuIX/isZvIk+ubY=
|
||||
github.com/ulikunitz/xz v0.5.15/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM=
|
||||
github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
|
|
|
|||
64
pkg/compress/compress.go
Normal file
64
pkg/compress/compress.go
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
package compress
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz"
|
||||
)
|
||||
|
||||
// Compress compresses data using the specified format.
|
||||
func Compress(data []byte, format string) ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
var writer io.WriteCloser
|
||||
var err error
|
||||
|
||||
switch format {
|
||||
case "gz":
|
||||
writer = gzip.NewWriter(&buf)
|
||||
case "xz":
|
||||
writer, err = xz.NewWriter(&buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
return data, nil
|
||||
}
|
||||
|
||||
_, err = writer.Write(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = writer.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
// Decompress decompresses data, detecting the format automatically.
|
||||
func Decompress(data []byte) ([]byte, error) {
|
||||
// Check for gzip header
|
||||
if len(data) > 2 && data[0] == 0x1f && data[1] == 0x8b {
|
||||
reader, err := gzip.NewReader(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer reader.Close()
|
||||
return io.ReadAll(reader)
|
||||
}
|
||||
|
||||
// Check for xz header
|
||||
if len(data) > 6 && data[0] == 0xfd && data[1] == '7' && data[2] == 'z' && data[3] == 'X' && data[4] == 'Z' && data[5] == 0x00 {
|
||||
reader, err := xz.NewReader(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return io.ReadAll(reader)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
|
@ -13,15 +13,13 @@ import (
|
|||
|
||||
// TarFS is a http.FileSystem that serves files from a tar archive.
|
||||
type TarFS struct {
|
||||
files map[string]*tar.Header
|
||||
data []byte
|
||||
files map[string]*tarFile
|
||||
}
|
||||
|
||||
// New creates a new TarFS from a tar archive.
|
||||
func New(data []byte) (*TarFS, error) {
|
||||
fs := &TarFS{
|
||||
files: make(map[string]*tar.Header),
|
||||
data: data,
|
||||
files: make(map[string]*tarFile),
|
||||
}
|
||||
|
||||
tr := tar.NewReader(bytes.NewReader(data))
|
||||
|
|
@ -35,7 +33,15 @@ func New(data []byte) (*TarFS, error) {
|
|||
}
|
||||
|
||||
if strings.HasPrefix(hdr.Name, "rootfs/") {
|
||||
fs.files[strings.TrimPrefix(hdr.Name, "rootfs/")] = hdr
|
||||
content, err := io.ReadAll(tr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fs.files[strings.TrimPrefix(hdr.Name, "rootfs/")] = &tarFile{
|
||||
header: hdr,
|
||||
content: bytes.NewReader(content),
|
||||
modTime: hdr.ModTime,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -45,26 +51,10 @@ func New(data []byte) (*TarFS, error) {
|
|||
// Open opens a file from the tar archive.
|
||||
func (fs *TarFS) Open(name string) (http.File, error) {
|
||||
name = strings.TrimPrefix(name, "/")
|
||||
if hdr, ok := fs.files[name]; ok {
|
||||
// This is a bit inefficient, but it's the simplest way to
|
||||
// get the file content without pre-indexing everything.
|
||||
tr := tar.NewReader(bytes.NewReader(fs.data))
|
||||
for {
|
||||
h, err := tr.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if h.Name == hdr.Name {
|
||||
return &tarFile{
|
||||
header: hdr,
|
||||
content: tr,
|
||||
modTime: hdr.ModTime,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
if file, ok := fs.files[name]; ok {
|
||||
// Reset the reader to the beginning of the file
|
||||
file.content.Seek(0, 0)
|
||||
return file, nil
|
||||
}
|
||||
|
||||
return nil, os.ErrNotExist
|
||||
|
|
@ -73,14 +63,14 @@ func (fs *TarFS) Open(name string) (http.File, error) {
|
|||
// tarFile is a http.File that represents a file in a tar archive.
|
||||
type tarFile struct {
|
||||
header *tar.Header
|
||||
content io.Reader
|
||||
content *bytes.Reader
|
||||
modTime time.Time
|
||||
}
|
||||
|
||||
func (f *tarFile) Close() error { return nil }
|
||||
func (f *tarFile) Read(p []byte) (int, error) { return f.content.Read(p) }
|
||||
func (f *tarFile) Seek(offset int64, whence int) (int64, error) {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
return f.content.Seek(offset, whence)
|
||||
}
|
||||
|
||||
func (f *tarFile) Readdir(count int) ([]os.FileInfo, error) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue