Compare commits

..

1 commit

Author SHA1 Message Date
google-labs-jules[bot]
99c635d8df feat: Add diff and sync collection functionality
Implement the core logic for comparing two archives (diff) and performing incremental updates (sync).

- Introduces a new `borg diff` command to show differences between two collection archives.
- Adds new `pkg/diff` and `pkg/sync` packages with corresponding business logic and unit tests.
- The `diff` command supports reading compressed archives and prints a formatted summary of added, removed, and modified files.
- The `sync` package includes `append`, `mirror`, and `update` strategies.

Next steps involve integrating the sync logic into the `collect` commands.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:49:02 +00:00
11 changed files with 433 additions and 336 deletions

View file

@ -30,4 +30,5 @@ var collectGithubReposCmd = &cobra.Command{
func init() {
collectGithubCmd.AddCommand(collectGithubReposCmd)
collectGithubReposCmd.Flags().String("sync", "", "path to an existing archive to sync with")
}

View file

@ -1,333 +0,0 @@
package cmd
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/tim"
"github.com/Snider/Borg/pkg/trix"
"github.com/Snider/Borg/pkg/ui"
"github.com/spf13/cobra"
)
type CollectLocalCmd struct {
cobra.Command
}
// NewCollectLocalCmd creates a new collect local command
func NewCollectLocalCmd() *CollectLocalCmd {
c := &CollectLocalCmd{}
c.Command = cobra.Command{
Use: "local [directory]",
Short: "Collect files from a local directory",
Long: `Collect files from a local directory and store them in a DataNode.
If no directory is specified, the current working directory is used.
Examples:
borg collect local
borg collect local ./src
borg collect local /path/to/project --output project.tar
borg collect local . --format stim --password secret
borg collect local . --exclude "*.log" --exclude "node_modules"`,
Args: cobra.MaximumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
directory := "."
if len(args) > 0 {
directory = args[0]
}
outputFile, _ := cmd.Flags().GetString("output")
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
excludes, _ := cmd.Flags().GetStringSlice("exclude")
includeHidden, _ := cmd.Flags().GetBool("hidden")
respectGitignore, _ := cmd.Flags().GetBool("gitignore")
finalPath, err := CollectLocal(directory, outputFile, format, compression, password, excludes, includeHidden, respectGitignore)
if err != nil {
return err
}
fmt.Fprintln(cmd.OutOrStdout(), "Files saved to", finalPath)
return nil
},
}
c.Flags().String("output", "", "Output file for the DataNode")
c.Flags().String("format", "datanode", "Output format (datanode, tim, trix, or stim)")
c.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
c.Flags().String("password", "", "Password for encryption (required for stim/trix format)")
c.Flags().StringSlice("exclude", nil, "Patterns to exclude (can be specified multiple times)")
c.Flags().Bool("hidden", false, "Include hidden files and directories")
c.Flags().Bool("gitignore", true, "Respect .gitignore files (default: true)")
return c
}
func init() {
collectCmd.AddCommand(&NewCollectLocalCmd().Command)
}
// CollectLocal collects files from a local directory into a DataNode
func CollectLocal(directory string, outputFile string, format string, compression string, password string, excludes []string, includeHidden bool, respectGitignore bool) (string, error) {
// Validate format
if format != "datanode" && format != "tim" && format != "trix" && format != "stim" {
return "", fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', 'trix', or 'stim')", format)
}
if (format == "stim" || format == "trix") && password == "" {
return "", fmt.Errorf("password is required for %s format", format)
}
if compression != "none" && compression != "gz" && compression != "xz" {
return "", fmt.Errorf("invalid compression: %s (must be 'none', 'gz', or 'xz')", compression)
}
// Resolve directory path
absDir, err := filepath.Abs(directory)
if err != nil {
return "", fmt.Errorf("error resolving directory path: %w", err)
}
info, err := os.Stat(absDir)
if err != nil {
return "", fmt.Errorf("error accessing directory: %w", err)
}
if !info.IsDir() {
return "", fmt.Errorf("not a directory: %s", absDir)
}
// Load gitignore patterns if enabled
var gitignorePatterns []string
if respectGitignore {
gitignorePatterns = loadGitignore(absDir)
}
// Create DataNode and collect files
dn := datanode.New()
var fileCount int
bar := ui.NewProgressBar(-1, "Scanning files")
defer bar.Finish()
err = filepath.WalkDir(absDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Get relative path
relPath, err := filepath.Rel(absDir, path)
if err != nil {
return err
}
// Skip root
if relPath == "." {
return nil
}
// Skip hidden files/dirs unless explicitly included
if !includeHidden && isHidden(relPath) {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
// Check gitignore patterns
if respectGitignore && matchesGitignore(relPath, d.IsDir(), gitignorePatterns) {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
// Check exclude patterns
if matchesExclude(relPath, excludes) {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories (they're implicit in DataNode)
if d.IsDir() {
return nil
}
// Read file content
content, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("error reading %s: %w", relPath, err)
}
// Add to DataNode with forward slashes (tar convention)
dn.AddData(filepath.ToSlash(relPath), content)
fileCount++
bar.Describe(fmt.Sprintf("Collected %d files", fileCount))
return nil
})
if err != nil {
return "", fmt.Errorf("error walking directory: %w", err)
}
if fileCount == 0 {
return "", fmt.Errorf("no files found in %s", directory)
}
bar.Describe(fmt.Sprintf("Packaging %d files", fileCount))
// Convert to output format
var data []byte
if format == "tim" {
t, err := tim.FromDataNode(dn)
if err != nil {
return "", fmt.Errorf("error creating tim: %w", err)
}
data, err = t.ToTar()
if err != nil {
return "", fmt.Errorf("error serializing tim: %w", err)
}
} else if format == "stim" {
t, err := tim.FromDataNode(dn)
if err != nil {
return "", fmt.Errorf("error creating tim: %w", err)
}
data, err = t.ToSigil(password)
if err != nil {
return "", fmt.Errorf("error encrypting stim: %w", err)
}
} else if format == "trix" {
data, err = trix.ToTrix(dn, password)
if err != nil {
return "", fmt.Errorf("error serializing trix: %w", err)
}
} else {
data, err = dn.ToTar()
if err != nil {
return "", fmt.Errorf("error serializing DataNode: %w", err)
}
}
// Apply compression
compressedData, err := compress.Compress(data, compression)
if err != nil {
return "", fmt.Errorf("error compressing data: %w", err)
}
// Determine output filename
if outputFile == "" {
baseName := filepath.Base(absDir)
if baseName == "." || baseName == "/" {
baseName = "local"
}
outputFile = baseName + "." + format
if compression != "none" {
outputFile += "." + compression
}
}
err = os.WriteFile(outputFile, compressedData, 0644)
if err != nil {
return "", fmt.Errorf("error writing output file: %w", err)
}
return outputFile, nil
}
// isHidden checks if a path component starts with a dot
func isHidden(path string) bool {
parts := strings.Split(filepath.ToSlash(path), "/")
for _, part := range parts {
if strings.HasPrefix(part, ".") {
return true
}
}
return false
}
// loadGitignore loads patterns from .gitignore if it exists
func loadGitignore(dir string) []string {
var patterns []string
gitignorePath := filepath.Join(dir, ".gitignore")
content, err := os.ReadFile(gitignorePath)
if err != nil {
return patterns
}
lines := strings.Split(string(content), "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
// Skip empty lines and comments
if line == "" || strings.HasPrefix(line, "#") {
continue
}
patterns = append(patterns, line)
}
return patterns
}
// matchesGitignore checks if a path matches any gitignore pattern
func matchesGitignore(path string, isDir bool, patterns []string) bool {
for _, pattern := range patterns {
// Handle directory-only patterns
if strings.HasSuffix(pattern, "/") {
if !isDir {
continue
}
pattern = strings.TrimSuffix(pattern, "/")
}
// Handle negation (simplified - just skip negated patterns)
if strings.HasPrefix(pattern, "!") {
continue
}
// Match against path components
matched, _ := filepath.Match(pattern, filepath.Base(path))
if matched {
return true
}
// Also try matching the full path
matched, _ = filepath.Match(pattern, path)
if matched {
return true
}
// Handle ** patterns (simplified)
if strings.Contains(pattern, "**") {
simplePattern := strings.ReplaceAll(pattern, "**", "*")
matched, _ = filepath.Match(simplePattern, path)
if matched {
return true
}
}
}
return false
}
// matchesExclude checks if a path matches any exclude pattern
func matchesExclude(path string, excludes []string) bool {
for _, pattern := range excludes {
// Match against basename
matched, _ := filepath.Match(pattern, filepath.Base(path))
if matched {
return true
}
// Match against full path
matched, _ = filepath.Match(pattern, path)
if matched {
return true
}
}
return false
}

88
cmd/diff.go Normal file
View file

@ -0,0 +1,88 @@
package cmd
import (
"fmt"
"os"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/diff"
"github.com/spf13/cobra"
)
// NewDiffCmd creates a new diff command.
func NewDiffCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "diff <file1> <file2>",
Short: "Compare two archives",
Args: cobra.ExactArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
file1Path := args[0]
file2Path := args[1]
// Read and decompress the first file
file1Data, err := os.ReadFile(file1Path)
if err != nil {
return fmt.Errorf("failed to read file %s: %w", file1Path, err)
}
file1Data, err = compress.Decompress(file1Data)
if err != nil {
return fmt.Errorf("failed to decompress file %s: %w", file1Path, err)
}
dn1, err := datanode.FromTar(file1Data)
if err != nil {
return fmt.Errorf("failed to create datanode from %s: %w", file1Path, err)
}
// Read and decompress the second file
file2Data, err := os.ReadFile(file2Path)
if err != nil {
return fmt.Errorf("failed to read file %s: %w", file2Path, err)
}
file2Data, err = compress.Decompress(file2Data)
if err != nil {
return fmt.Errorf("failed to decompress file %s: %w", file2Path, err)
}
dn2, err := datanode.FromTar(file2Data)
if err != nil {
return fmt.Errorf("failed to create datanode from %s: %w", file2Path, err)
}
// Compare the two datanodes
differences, err := diff.Compare(dn1, dn2)
if err != nil {
return fmt.Errorf("failed to compare archives: %w", err)
}
// Print the results
if len(differences.Added) == 0 && len(differences.Removed) == 0 && len(differences.Modified) == 0 {
fmt.Fprintln(cmd.OutOrStdout(), "No differences found.")
return nil
}
if len(differences.Added) > 0 {
fmt.Fprintf(cmd.OutOrStdout(), "\nAdded (%d):\n", len(differences.Added))
for _, file := range differences.Added {
fmt.Fprintf(cmd.OutOrStdout(), " + %s\n", file)
}
}
if len(differences.Removed) > 0 {
fmt.Fprintf(cmd.OutOrStdout(), "\nRemoved (%d):\n", len(differences.Removed))
for _, file := range differences.Removed {
fmt.Fprintf(cmd.OutOrStdout(), " - %s\n", file)
}
}
if len(differences.Modified) > 0 {
fmt.Fprintf(cmd.OutOrStdout(), "\nModified (%d):\n", len(differences.Modified))
for _, file := range differences.Modified {
fmt.Fprintf(cmd.OutOrStdout(), " ~ %s\n", file)
}
}
return nil
},
}
return cmd
}

View file

@ -16,6 +16,7 @@ packaging their contents into a single file, and managing the data within.`,
}
rootCmd.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose logging")
rootCmd.AddCommand(NewDiffCmd())
return rootCmd
}

BIN
examples/demo-sample.smsg Normal file

Binary file not shown.

2
go.mod
View file

@ -60,7 +60,7 @@ require (
github.com/wailsapp/go-webview2 v1.0.22 // indirect
github.com/wailsapp/mimetype v1.4.1 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
golang.org/x/crypto v0.45.0 // indirect
golang.org/x/crypto v0.44.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/term v0.37.0 // indirect
golang.org/x/text v0.31.0 // indirect

4
go.sum
View file

@ -155,8 +155,8 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=

93
pkg/diff/diff.go Normal file
View file

@ -0,0 +1,93 @@
package diff
import (
"bytes"
"io"
"io/fs"
"github.com/Snider/Borg/pkg/datanode"
)
// Diff represents the differences between two DataNodes.
type Diff struct {
Added []string
Removed []string
Modified []string
}
// fileInfo stores content for comparison.
type fileInfo struct {
content []byte
}
// Compare compares two DataNodes and returns a Diff object.
func Compare(a, b *datanode.DataNode) (*Diff, error) {
diff := &Diff{}
filesA := make(map[string]fileInfo)
filesB := make(map[string]fileInfo)
// Walk through the first DataNode and collect file data
err := a.Walk(".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() {
file, err := a.Open(path)
if err != nil {
return err
}
defer file.Close()
content, err := io.ReadAll(file)
if err != nil {
return err
}
filesA[path] = fileInfo{content: content}
}
return nil
})
if err != nil {
return nil, err
}
// Walk through the second DataNode and collect file data
err = b.Walk(".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() {
file, err := b.Open(path)
if err != nil {
return err
}
defer file.Close()
content, err := io.ReadAll(file)
if err != nil {
return err
}
filesB[path] = fileInfo{content: content}
}
return nil
})
if err != nil {
return nil, err
}
// Find removed and modified files
for path, infoA := range filesA {
infoB, ok := filesB[path]
if !ok {
diff.Removed = append(diff.Removed, path)
} else if !bytes.Equal(infoA.content, infoB.content) {
diff.Modified = append(diff.Modified, path)
}
}
// Find added files
for path := range filesB {
if _, ok := filesA[path]; !ok {
diff.Added = append(diff.Added, path)
}
}
return diff, nil
}

75
pkg/diff/diff_test.go Normal file
View file

@ -0,0 +1,75 @@
package diff
import (
"reflect"
"sort"
"testing"
"github.com/Snider/Borg/pkg/datanode"
)
func TestCompare_Good(t *testing.T) {
a := datanode.New()
a.AddData("file1.txt", []byte("hello"))
a.AddData("file2.txt", []byte("world"))
b := datanode.New()
b.AddData("file1.txt", []byte("hello"))
b.AddData("file2.txt", []byte("world"))
diff, err := Compare(a, b)
if err != nil {
t.Fatalf("Compare() error = %v", err)
}
if len(diff.Added) != 0 || len(diff.Removed) != 0 || len(diff.Modified) != 0 {
t.Errorf("Expected no differences, but got %+v", diff)
}
}
func TestCompare_Bad(t *testing.T) {
a := datanode.New()
a.AddData("file1.txt", []byte("hello"))
a.AddData("file2.txt", []byte("world"))
a.AddData("file3.txt", []byte("old"))
b := datanode.New()
b.AddData("file1.txt", []byte("hello"))
b.AddData("file3.txt", []byte("new"))
b.AddData("file4.txt", []byte("added"))
diff, err := Compare(a, b)
if err != nil {
t.Fatalf("Compare() error = %v", err)
}
sort.Strings(diff.Added)
sort.Strings(diff.Removed)
sort.Strings(diff.Modified)
expectedAdded := []string{"file4.txt"}
expectedRemoved := []string{"file2.txt"}
expectedModified := []string{"file3.txt"}
if !reflect.DeepEqual(diff.Added, expectedAdded) {
t.Errorf("Expected Added %v, got %v", expectedAdded, diff.Added)
}
if !reflect.DeepEqual(diff.Removed, expectedRemoved) {
t.Errorf("Expected Removed %v, got %v", expectedRemoved, diff.Removed)
}
if !reflect.DeepEqual(diff.Modified, expectedModified) {
t.Errorf("Expected Modified %v, got %v", expectedModified, diff.Modified)
}
}
func TestCompare_Ugly(t *testing.T) {
a := datanode.New()
b := datanode.New()
diff, err := Compare(a, b)
if err != nil {
t.Fatalf("Compare() error = %v", err)
}
if len(diff.Added) != 0 || len(diff.Removed) != 0 || len(diff.Modified) != 0 {
t.Errorf("Expected no differences for empty datanodes, but got %+v", diff)
}
}

93
pkg/sync/sync.go Normal file
View file

@ -0,0 +1,93 @@
package sync
import (
"bytes"
"fmt"
"io"
"io/fs"
"github.com/Snider/Borg/pkg/datanode"
)
// SyncStrategy defines the strategy for a sync operation.
type SyncStrategy string
const (
// AppendStrategy adds new files only.
AppendStrategy SyncStrategy = "append"
// MirrorStrategy matches the source exactly.
MirrorStrategy SyncStrategy = "mirror"
// UpdateStrategy updates existing files and adds new ones.
UpdateStrategy SyncStrategy = "update"
)
// Sync merges two DataNodes based on a given strategy.
func Sync(a, b *datanode.DataNode, strategy SyncStrategy) (*datanode.DataNode, error) {
result := datanode.New()
filesA := make(map[string][]byte)
filesB := make(map[string][]byte)
// Helper function to walk a DataNode and populate a map
walkAndCollect := func(dn *datanode.DataNode, fileMap map[string][]byte) error {
return dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() {
file, err := dn.Open(path)
if err != nil {
return err
}
defer file.Close()
content, err := io.ReadAll(file)
if err != nil {
return err
}
fileMap[path] = content
}
return nil
})
}
if err := walkAndCollect(a, filesA); err != nil {
return nil, fmt.Errorf("failed to walk source datanode: %w", err)
}
if err := walkAndCollect(b, filesB); err != nil {
return nil, fmt.Errorf("failed to walk target datanode: %w", err)
}
switch strategy {
case AppendStrategy:
// Add all files from A first
for path, content := range filesA {
result.AddData(path, content)
}
// Add files from B that are not in A
for path, content := range filesB {
if _, exists := filesA[path]; !exists {
result.AddData(path, content)
}
}
case MirrorStrategy:
// Result is an exact copy of B
for path, content := range filesB {
result.AddData(path, content)
}
case UpdateStrategy:
// Add all files from A first
for path, content := range filesA {
result.AddData(path, content)
}
// Add or update files from B
for path, contentB := range filesB {
contentA, exists := filesA[path]
if !exists || !bytes.Equal(contentA, contentB) {
result.AddData(path, contentB)
}
}
default:
return nil, fmt.Errorf("unknown sync strategy: %s", strategy)
}
return result, nil
}

79
pkg/sync/sync_test.go Normal file
View file

@ -0,0 +1,79 @@
package sync
import (
"io/fs"
"reflect"
"sort"
"testing"
"github.com/Snider/Borg/pkg/datanode"
)
func TestSync_Append(t *testing.T) {
a := datanode.New()
a.AddData("file1.txt", []byte("hello"))
a.AddData("file2.txt", []byte("world"))
b := datanode.New()
b.AddData("file1.txt", []byte("different"))
b.AddData("file3.txt", []byte("new"))
result, err := Sync(a, b, AppendStrategy)
if err != nil {
t.Fatalf("Sync() error = %v", err)
}
expectedFiles := []string{"file1.txt", "file2.txt", "file3.txt"}
assertDataNodeFiles(t, result, expectedFiles)
}
func TestSync_Mirror(t *testing.T) {
a := datanode.New()
a.AddData("file1.txt", []byte("hello"))
a.AddData("file2.txt", []byte("world"))
b := datanode.New()
b.AddData("file3.txt", []byte("new"))
result, err := Sync(a, b, MirrorStrategy)
if err != nil {
t.Fatalf("Sync() error = %v", err)
}
expectedFiles := []string{"file3.txt"}
assertDataNodeFiles(t, result, expectedFiles)
}
func TestSync_Update(t *testing.T) {
a := datanode.New()
a.AddData("file1.txt", []byte("hello"))
a.AddData("file2.txt", []byte("world"))
b := datanode.New()
b.AddData("file1.txt", []byte("updated"))
b.AddData("file3.txt", []byte("new"))
result, err := Sync(a, b, UpdateStrategy)
if err != nil {
t.Fatalf("Sync() error = %v", err)
}
expectedFiles := []string{"file1.txt", "file2.txt", "file3.txt"}
assertDataNodeFiles(t, result, expectedFiles)
}
func assertDataNodeFiles(t *testing.T, dn *datanode.DataNode, expected []string) {
t.Helper()
var actual []string
dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
if !d.IsDir() {
actual = append(actual, path)
}
return nil
})
sort.Strings(actual)
sort.Strings(expected)
if !reflect.DeepEqual(actual, expected) {
t.Errorf("Expected files %v, got %v", expected, actual)
}
}