diff --git a/cmd/collect_github_repos.go b/cmd/collect_github_repos.go index dfcd315..209d476 100644 --- a/cmd/collect_github_repos.go +++ b/cmd/collect_github_repos.go @@ -30,4 +30,5 @@ var collectGithubReposCmd = &cobra.Command{ func init() { collectGithubCmd.AddCommand(collectGithubReposCmd) + collectGithubReposCmd.Flags().String("sync", "", "path to an existing archive to sync with") } diff --git a/cmd/diff.go b/cmd/diff.go new file mode 100644 index 0000000..4b0fc79 --- /dev/null +++ b/cmd/diff.go @@ -0,0 +1,88 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/Snider/Borg/pkg/compress" + "github.com/Snider/Borg/pkg/datanode" + "github.com/Snider/Borg/pkg/diff" + "github.com/spf13/cobra" +) + +// NewDiffCmd creates a new diff command. +func NewDiffCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "diff ", + Short: "Compare two archives", + Args: cobra.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + file1Path := args[0] + file2Path := args[1] + + // Read and decompress the first file + file1Data, err := os.ReadFile(file1Path) + if err != nil { + return fmt.Errorf("failed to read file %s: %w", file1Path, err) + } + file1Data, err = compress.Decompress(file1Data) + if err != nil { + return fmt.Errorf("failed to decompress file %s: %w", file1Path, err) + } + dn1, err := datanode.FromTar(file1Data) + if err != nil { + return fmt.Errorf("failed to create datanode from %s: %w", file1Path, err) + } + + // Read and decompress the second file + file2Data, err := os.ReadFile(file2Path) + if err != nil { + return fmt.Errorf("failed to read file %s: %w", file2Path, err) + } + file2Data, err = compress.Decompress(file2Data) + if err != nil { + return fmt.Errorf("failed to decompress file %s: %w", file2Path, err) + } + dn2, err := datanode.FromTar(file2Data) + if err != nil { + return fmt.Errorf("failed to create datanode from %s: %w", file2Path, err) + } + + // Compare the two datanodes + differences, err := diff.Compare(dn1, dn2) + if err != nil { + return fmt.Errorf("failed to compare archives: %w", err) + } + + // Print the results + if len(differences.Added) == 0 && len(differences.Removed) == 0 && len(differences.Modified) == 0 { + fmt.Fprintln(cmd.OutOrStdout(), "No differences found.") + return nil + } + + if len(differences.Added) > 0 { + fmt.Fprintf(cmd.OutOrStdout(), "\nAdded (%d):\n", len(differences.Added)) + for _, file := range differences.Added { + fmt.Fprintf(cmd.OutOrStdout(), " + %s\n", file) + } + } + + if len(differences.Removed) > 0 { + fmt.Fprintf(cmd.OutOrStdout(), "\nRemoved (%d):\n", len(differences.Removed)) + for _, file := range differences.Removed { + fmt.Fprintf(cmd.OutOrStdout(), " - %s\n", file) + } + } + + if len(differences.Modified) > 0 { + fmt.Fprintf(cmd.OutOrStdout(), "\nModified (%d):\n", len(differences.Modified)) + for _, file := range differences.Modified { + fmt.Fprintf(cmd.OutOrStdout(), " ~ %s\n", file) + } + } + + return nil + }, + } + return cmd +} diff --git a/cmd/root.go b/cmd/root.go index 9cadb27..5e4615f 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -16,6 +16,7 @@ packaging their contents into a single file, and managing the data within.`, } rootCmd.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose logging") + rootCmd.AddCommand(NewDiffCmd()) return rootCmd } diff --git a/pkg/diff/diff.go b/pkg/diff/diff.go new file mode 100644 index 0000000..2116cd0 --- /dev/null +++ b/pkg/diff/diff.go @@ -0,0 +1,93 @@ +package diff + +import ( + "bytes" + "io" + "io/fs" + + "github.com/Snider/Borg/pkg/datanode" +) + +// Diff represents the differences between two DataNodes. +type Diff struct { + Added []string + Removed []string + Modified []string +} + +// fileInfo stores content for comparison. +type fileInfo struct { + content []byte +} + +// Compare compares two DataNodes and returns a Diff object. +func Compare(a, b *datanode.DataNode) (*Diff, error) { + diff := &Diff{} + filesA := make(map[string]fileInfo) + filesB := make(map[string]fileInfo) + + // Walk through the first DataNode and collect file data + err := a.Walk(".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() { + file, err := a.Open(path) + if err != nil { + return err + } + defer file.Close() + content, err := io.ReadAll(file) + if err != nil { + return err + } + filesA[path] = fileInfo{content: content} + } + return nil + }) + if err != nil { + return nil, err + } + + // Walk through the second DataNode and collect file data + err = b.Walk(".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() { + file, err := b.Open(path) + if err != nil { + return err + } + defer file.Close() + content, err := io.ReadAll(file) + if err != nil { + return err + } + filesB[path] = fileInfo{content: content} + } + return nil + }) + if err != nil { + return nil, err + } + + // Find removed and modified files + for path, infoA := range filesA { + infoB, ok := filesB[path] + if !ok { + diff.Removed = append(diff.Removed, path) + } else if !bytes.Equal(infoA.content, infoB.content) { + diff.Modified = append(diff.Modified, path) + } + } + + // Find added files + for path := range filesB { + if _, ok := filesA[path]; !ok { + diff.Added = append(diff.Added, path) + } + } + + return diff, nil +} diff --git a/pkg/diff/diff_test.go b/pkg/diff/diff_test.go new file mode 100644 index 0000000..9643f12 --- /dev/null +++ b/pkg/diff/diff_test.go @@ -0,0 +1,75 @@ +package diff + +import ( + "reflect" + "sort" + "testing" + + "github.com/Snider/Borg/pkg/datanode" +) + +func TestCompare_Good(t *testing.T) { + a := datanode.New() + a.AddData("file1.txt", []byte("hello")) + a.AddData("file2.txt", []byte("world")) + + b := datanode.New() + b.AddData("file1.txt", []byte("hello")) + b.AddData("file2.txt", []byte("world")) + + diff, err := Compare(a, b) + if err != nil { + t.Fatalf("Compare() error = %v", err) + } + if len(diff.Added) != 0 || len(diff.Removed) != 0 || len(diff.Modified) != 0 { + t.Errorf("Expected no differences, but got %+v", diff) + } +} + +func TestCompare_Bad(t *testing.T) { + a := datanode.New() + a.AddData("file1.txt", []byte("hello")) + a.AddData("file2.txt", []byte("world")) + a.AddData("file3.txt", []byte("old")) + + b := datanode.New() + b.AddData("file1.txt", []byte("hello")) + b.AddData("file3.txt", []byte("new")) + b.AddData("file4.txt", []byte("added")) + + diff, err := Compare(a, b) + if err != nil { + t.Fatalf("Compare() error = %v", err) + } + + sort.Strings(diff.Added) + sort.Strings(diff.Removed) + sort.Strings(diff.Modified) + + expectedAdded := []string{"file4.txt"} + expectedRemoved := []string{"file2.txt"} + expectedModified := []string{"file3.txt"} + + if !reflect.DeepEqual(diff.Added, expectedAdded) { + t.Errorf("Expected Added %v, got %v", expectedAdded, diff.Added) + } + if !reflect.DeepEqual(diff.Removed, expectedRemoved) { + t.Errorf("Expected Removed %v, got %v", expectedRemoved, diff.Removed) + } + if !reflect.DeepEqual(diff.Modified, expectedModified) { + t.Errorf("Expected Modified %v, got %v", expectedModified, diff.Modified) + } +} + +func TestCompare_Ugly(t *testing.T) { + a := datanode.New() + b := datanode.New() + + diff, err := Compare(a, b) + if err != nil { + t.Fatalf("Compare() error = %v", err) + } + if len(diff.Added) != 0 || len(diff.Removed) != 0 || len(diff.Modified) != 0 { + t.Errorf("Expected no differences for empty datanodes, but got %+v", diff) + } +} diff --git a/pkg/sync/sync.go b/pkg/sync/sync.go new file mode 100644 index 0000000..747cc7c --- /dev/null +++ b/pkg/sync/sync.go @@ -0,0 +1,93 @@ +package sync + +import ( + "bytes" + "fmt" + "io" + "io/fs" + + "github.com/Snider/Borg/pkg/datanode" +) + +// SyncStrategy defines the strategy for a sync operation. +type SyncStrategy string + +const ( + // AppendStrategy adds new files only. + AppendStrategy SyncStrategy = "append" + // MirrorStrategy matches the source exactly. + MirrorStrategy SyncStrategy = "mirror" + // UpdateStrategy updates existing files and adds new ones. + UpdateStrategy SyncStrategy = "update" +) + +// Sync merges two DataNodes based on a given strategy. +func Sync(a, b *datanode.DataNode, strategy SyncStrategy) (*datanode.DataNode, error) { + result := datanode.New() + filesA := make(map[string][]byte) + filesB := make(map[string][]byte) + + // Helper function to walk a DataNode and populate a map + walkAndCollect := func(dn *datanode.DataNode, fileMap map[string][]byte) error { + return dn.Walk(".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() { + file, err := dn.Open(path) + if err != nil { + return err + } + defer file.Close() + content, err := io.ReadAll(file) + if err != nil { + return err + } + fileMap[path] = content + } + return nil + }) + } + + if err := walkAndCollect(a, filesA); err != nil { + return nil, fmt.Errorf("failed to walk source datanode: %w", err) + } + if err := walkAndCollect(b, filesB); err != nil { + return nil, fmt.Errorf("failed to walk target datanode: %w", err) + } + + switch strategy { + case AppendStrategy: + // Add all files from A first + for path, content := range filesA { + result.AddData(path, content) + } + // Add files from B that are not in A + for path, content := range filesB { + if _, exists := filesA[path]; !exists { + result.AddData(path, content) + } + } + case MirrorStrategy: + // Result is an exact copy of B + for path, content := range filesB { + result.AddData(path, content) + } + case UpdateStrategy: + // Add all files from A first + for path, content := range filesA { + result.AddData(path, content) + } + // Add or update files from B + for path, contentB := range filesB { + contentA, exists := filesA[path] + if !exists || !bytes.Equal(contentA, contentB) { + result.AddData(path, contentB) + } + } + default: + return nil, fmt.Errorf("unknown sync strategy: %s", strategy) + } + + return result, nil +} diff --git a/pkg/sync/sync_test.go b/pkg/sync/sync_test.go new file mode 100644 index 0000000..e03ac39 --- /dev/null +++ b/pkg/sync/sync_test.go @@ -0,0 +1,79 @@ +package sync + +import ( + "io/fs" + "reflect" + "sort" + "testing" + + "github.com/Snider/Borg/pkg/datanode" +) + +func TestSync_Append(t *testing.T) { + a := datanode.New() + a.AddData("file1.txt", []byte("hello")) + a.AddData("file2.txt", []byte("world")) + + b := datanode.New() + b.AddData("file1.txt", []byte("different")) + b.AddData("file3.txt", []byte("new")) + + result, err := Sync(a, b, AppendStrategy) + if err != nil { + t.Fatalf("Sync() error = %v", err) + } + + expectedFiles := []string{"file1.txt", "file2.txt", "file3.txt"} + assertDataNodeFiles(t, result, expectedFiles) +} + +func TestSync_Mirror(t *testing.T) { + a := datanode.New() + a.AddData("file1.txt", []byte("hello")) + a.AddData("file2.txt", []byte("world")) + + b := datanode.New() + b.AddData("file3.txt", []byte("new")) + + result, err := Sync(a, b, MirrorStrategy) + if err != nil { + t.Fatalf("Sync() error = %v", err) + } + + expectedFiles := []string{"file3.txt"} + assertDataNodeFiles(t, result, expectedFiles) +} + +func TestSync_Update(t *testing.T) { + a := datanode.New() + a.AddData("file1.txt", []byte("hello")) + a.AddData("file2.txt", []byte("world")) + + b := datanode.New() + b.AddData("file1.txt", []byte("updated")) + b.AddData("file3.txt", []byte("new")) + + result, err := Sync(a, b, UpdateStrategy) + if err != nil { + t.Fatalf("Sync() error = %v", err) + } + + expectedFiles := []string{"file1.txt", "file2.txt", "file3.txt"} + assertDataNodeFiles(t, result, expectedFiles) +} + +func assertDataNodeFiles(t *testing.T, dn *datanode.DataNode, expected []string) { + t.Helper() + var actual []string + dn.Walk(".", func(path string, d fs.DirEntry, err error) error { + if !d.IsDir() { + actual = append(actual, path) + } + return nil + }) + sort.Strings(actual) + sort.Strings(expected) + if !reflect.DeepEqual(actual, expected) { + t.Errorf("Expected files %v, got %v", expected, actual) + } +}