diff --git a/cmd/repair.go b/cmd/repair.go new file mode 100644 index 0000000..351c05f --- /dev/null +++ b/cmd/repair.go @@ -0,0 +1,163 @@ +package cmd + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strings" + + "github.com/Snider/Borg/pkg/datanode" + "github.com/Snider/Borg/pkg/tim" + "github.com/spf13/cobra" +) + +// repairCmd represents the repair command +var repairCmd = NewRepairCmd() + +func init() { + RootCmd.AddCommand(GetRepairCmd()) +} +func NewRepairCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "repair [archive]", + Short: "Repair a corrupted archive.", + Long: `Repair a corrupted archive by re-downloading missing or corrupted files from the original source.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + archivePath := args[0] + password, _ := cmd.Flags().GetString("password") + source, _ := cmd.Flags().GetString("source") + + if source == "" { + return fmt.Errorf("--source is required for repair") + } + + cmd.Printf("Repairing %s...\n", archivePath) + + content, err := os.ReadFile(archivePath) + if err != nil { + return fmt.Errorf("could not read archive: %w", err) + } + + var dn *datanode.DataNode + if strings.HasSuffix(archivePath, ".stim") { + if password == "" { + return fmt.Errorf("password required for .stim files") + } + t, err := tim.FromSigil(content, password) + if err != nil { + return fmt.Errorf("decryption failed: %w", err) + } + dn = t.RootFS + } else { + dn, err = datanode.FromTar(content) + if err != nil { + return fmt.Errorf("archive structure is corrupt: %w", err) + } + } + + manifestFile, err := dn.Open("manifest.json") + if err != nil { + return fmt.Errorf("could not open manifest: %w", err) + } + defer manifestFile.Close() + + manifestBytes, err := io.ReadAll(manifestFile) + if err != nil { + return fmt.Errorf("could not read manifest: %w", err) + } + + var manifest map[string]string + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + return fmt.Errorf("could not parse manifest: %w", err) + } + + var corruptedFiles []string + for name, expectedChecksum := range manifest { + file, err := dn.Open(name) + if err != nil { + corruptedFiles = append(corruptedFiles, name) + continue + } + defer file.Close() + + fileBytes, err := io.ReadAll(file) + if err != nil { + corruptedFiles = append(corruptedFiles, name) + continue + } + + hash := sha256.Sum256(fileBytes) + actualChecksum := hex.EncodeToString(hash[:]) + + if actualChecksum != expectedChecksum { + corruptedFiles = append(corruptedFiles, name) + } + } + + if len(corruptedFiles) == 0 { + cmd.Println("Archive is not corrupted.") + return nil + } + + cmd.Printf("Found %d corrupted files:\n", len(corruptedFiles)) + for _, file := range corruptedFiles { + cmd.Printf(" - %s\n", file) + } + + cmd.Println("Attempting to repair from source...") + repairedCount := 0 + for _, file := range corruptedFiles { + fileURL := source + "/" + file + resp, err := http.Get(fileURL) + if err != nil { + cmd.Printf(" ✗ Could not download %s: %v\n", file, err) + continue + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + cmd.Printf(" ✗ Could not download %s: status %s\n", file, resp.Status) + continue + } + + newContent, err := io.ReadAll(resp.Body) + if err != nil { + cmd.Printf(" ✗ Could not read downloaded content for %s: %v\n", file, err) + continue + } + dn.ReplaceFile(file, newContent) + repairedCount++ + cmd.Printf(" ✓ Repaired %s\n", file) + } + + if repairedCount < len(corruptedFiles) { + return fmt.Errorf("could not repair all corrupted files") + } + + // Save the repaired archive + repairedData, err := dn.ToTar() + if err != nil { + return fmt.Errorf("could not serialize repaired archive: %w", err) + } + + if err := os.WriteFile(archivePath, repairedData, 0644); err != nil { + return fmt.Errorf("could not write repaired archive: %w", err) + } + + cmd.Println("Archive repaired successfully.") + return nil + }, + } + cmd.Flags().String("source", "", "Original source to compare against") + cmd.Flags().StringP("password", "p", "", "Password for decryption (for .stim files)") + return cmd +} + +func GetRepairCmd() *cobra.Command { + return repairCmd +} diff --git a/cmd/repair_test.go b/cmd/repair_test.go new file mode 100644 index 0000000..7cf0588 --- /dev/null +++ b/cmd/repair_test.go @@ -0,0 +1,66 @@ +package cmd + +import ( + "bytes" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/Snider/Borg/pkg/datanode" + "github.com/stretchr/testify/assert" +) + +func TestRepairCmd_Good(t *testing.T) { + // Create a temporary directory for the test + tempDir := t.TempDir() + archivePath := filepath.Join(tempDir, "test.dat") + + // Create a DataNode and add a file + dn := datanode.New() + dn.AddData("hello.txt", []byte("hello world")) + + // Serialize the DataNode to a tarball + tarball, err := dn.ToTar() + assert.NoError(t, err) + + // Corrupt the tarball + corruptedTarball := bytes.Replace(tarball, []byte("hello world"), []byte("hello mars!"), 1) + + // Write the corrupted tarball to the archive file + err = os.WriteFile(archivePath, corruptedTarball, 0644) + assert.NoError(t, err) + + // Create a mock HTTP server to serve the correct file + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/hello.txt" { + w.Write([]byte("hello world")) + } else { + http.NotFound(w, r) + } + })) + defer server.Close() + + // Execute the repair command + cmd := NewRepairCmd() + b := bytes.NewBufferString("") + cmd.SetOut(b) + cmd.SetArgs([]string{archivePath, "--source", server.URL}) + err = cmd.Execute() + + // Assert that the command was successful + assert.NoError(t, err) + + // Verify that the archive is now valid + verifyCmd := NewVerifyCmd() + b.Reset() + verifyCmd.SetOut(b) + verifyCmd.SetArgs([]string{archivePath}) + err = verifyCmd.Execute() + assert.NoError(t, err) + output := b.String() + assert.Contains(t, output, "✓ Structure: valid") + assert.Contains(t, output, "✓ Checksums: 1/1 files OK") + assert.Contains(t, output, "✓ Manifest: complete") +} diff --git a/cmd/verify.go b/cmd/verify.go new file mode 100644 index 0000000..2860fcf --- /dev/null +++ b/cmd/verify.go @@ -0,0 +1,151 @@ +package cmd + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "io/fs" + "os" + "strings" + + "github.com/Snider/Borg/pkg/datanode" + "github.com/Snider/Borg/pkg/tim" + "github.com/spf13/cobra" +) + +// verifyCmd represents the verify command +var verifyCmd = NewVerifyCmd() + +func init() { + RootCmd.AddCommand(GetVerifyCmd()) +} +func NewVerifyCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "verify [archive]", + Short: "Verify archive integrity and detect corruption.", + Long: `Verify archive integrity and detect corruption.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + archivePath := args[0] + password, _ := cmd.Flags().GetString("password") + cmd.Printf("Verifying %s...\n", archivePath) + + content, err := os.ReadFile(archivePath) + if err != nil { + return fmt.Errorf("could not read archive: %w", err) + } + + var dn *datanode.DataNode + if strings.HasSuffix(archivePath, ".stim") { + if password == "" { + return fmt.Errorf("password required for .stim files") + } + t, err := tim.FromSigil(content, password) + if err != nil { + cmd.Println("✗ Decryption: failed") + return fmt.Errorf("decryption failed: %w", err) + } + dn = t.RootFS + cmd.Println("✓ Decryption: successful") + } else { + dn, err = datanode.FromTar(content) + if err != nil { + cmd.Println("✗ Structure: invalid") + return fmt.Errorf("archive structure is corrupt: %w", err) + } + cmd.Println("✓ Structure: valid") + } + + manifestFile, err := dn.Open("manifest.json") + if err != nil { + cmd.Println("✗ Checksums: missing manifest") + return fmt.Errorf("could not open manifest: %w", err) + } + defer manifestFile.Close() + + manifestBytes, err := io.ReadAll(manifestFile) + if err != nil { + cmd.Println("✗ Checksums: unreadable manifest") + return fmt.Errorf("could not read manifest: %w", err) + } + + var manifest map[string]string + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + cmd.Println("✗ Checksums: invalid manifest") + return fmt.Errorf("could not parse manifest: %w", err) + } + + filesOk := 0 + filesCorrupt := 0 + for name, expectedChecksum := range manifest { + file, err := dn.Open(name) + if err != nil { + cmd.Printf("✗ Checksum mismatch: %s (missing)\n", name) + filesCorrupt++ + continue + } + defer file.Close() + + fileBytes, err := io.ReadAll(file) + if err != nil { + cmd.Printf("✗ Checksum mismatch: %s (unreadable)\n", name) + filesCorrupt++ + continue + } + + hash := sha256.Sum256(fileBytes) + actualChecksum := hex.EncodeToString(hash[:]) + + if actualChecksum != expectedChecksum { + cmd.Printf("✗ Checksum mismatch: %s\n", name) + cmd.Printf(" Expected: %s\n", expectedChecksum) + cmd.Printf(" Got: %s\n", actualChecksum) + filesCorrupt++ + } else { + filesOk++ + } + } + + if filesCorrupt > 0 { + cmd.Printf("✗ Checksums: %d/%d files OK\n", filesOk, filesOk+filesCorrupt) + } else { + cmd.Printf("✓ Checksums: %d/%d files OK\n", filesOk, filesOk) + } + + // Manifest completeness check + untrackedFiles := 0 + walkErr := dn.Walk(".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() && path != "manifest.json" { + if _, exists := manifest[path]; !exists { + untrackedFiles++ + cmd.Printf("✗ Untracked file: %s\n", path) + } + } + return nil + }) + if walkErr != nil { + return fmt.Errorf("error checking for untracked files: %w", walkErr) + } + + if filesCorrupt > 0 || untrackedFiles > 0 { + cmd.Println("✗ Manifest: incomplete") + return fmt.Errorf("%d files corrupted or untracked", filesCorrupt+untrackedFiles) + } + + cmd.Println("✓ Manifest: complete") + return nil + }, + } + cmd.Flags().String("source", "", "Original source to compare against") + cmd.Flags().StringP("password", "p", "", "Password for decryption (for .stim files)") + return cmd +} + +func GetVerifyCmd() *cobra.Command { + return verifyCmd +} diff --git a/cmd/verify_helpers.go b/cmd/verify_helpers.go new file mode 100644 index 0000000..1d619dd --- /dev/null +++ b/cmd/verify_helpers.go @@ -0,0 +1 @@ +package cmd diff --git a/cmd/verify_test.go b/cmd/verify_test.go new file mode 100644 index 0000000..dfa0b3f --- /dev/null +++ b/cmd/verify_test.go @@ -0,0 +1,80 @@ +package cmd + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/Snider/Borg/pkg/datanode" + "github.com/stretchr/testify/assert" +) + +func TestVerifyCmd_Good(t *testing.T) { + // Create a temporary directory for the test + tempDir := t.TempDir() + archivePath := filepath.Join(tempDir, "test.dat") + + // Create a DataNode and add a file + dn := datanode.New() + dn.AddData("hello.txt", []byte("hello world")) + + // Serialize the DataNode to a tarball + tarball, err := dn.ToTar() + assert.NoError(t, err) + + // Write the tarball to the archive file + err = os.WriteFile(archivePath, tarball, 0644) + assert.NoError(t, err) + + // Execute the verify command + cmd := NewVerifyCmd() + b := bytes.NewBufferString("") + cmd.SetOut(b) + cmd.SetArgs([]string{archivePath}) + err = cmd.Execute() + + // Assert that the command was successful + assert.NoError(t, err) + output := b.String() + assert.Contains(t, output, "✓ Structure: valid") + assert.Contains(t, output, "✓ Checksums: 1/1 files OK") + assert.Contains(t, output, "✓ Manifest: complete") +} + +func TestVerifyCmd_Bad(t *testing.T) { + // Create a temporary directory for the test + tempDir := t.TempDir() + archivePath := filepath.Join(tempDir, "test.dat") + + // Create a DataNode and add a file + dn := datanode.New() + dn.AddData("hello.txt", []byte("hello world")) + + // Serialize the DataNode to a tarball + tarball, err := dn.ToTar() + assert.NoError(t, err) + + // Corrupt the tarball + corruptedTarball := bytes.Replace(tarball, []byte("hello world"), []byte("hello mars!"), 1) + + // Write the corrupted tarball to the archive file + err = os.WriteFile(archivePath, corruptedTarball, 0644) + assert.NoError(t, err) + + // Execute the verify command + cmd := NewVerifyCmd() + b := bytes.NewBufferString("") + cmd.SetOut(b) + cmd.SetErr(b) + cmd.SetArgs([]string{archivePath}) + err = cmd.Execute() + + // Assert that the command failed + assert.Error(t, err) + output := b.String() + assert.Contains(t, output, "✓ Structure: valid") + assert.Contains(t, output, "✗ Checksum mismatch: hello.txt") + assert.Contains(t, output, "✗ Checksums: 0/1 files OK") + assert.Contains(t, output, "✗ Manifest: incomplete") +} diff --git a/pkg/datanode/datanode.go b/pkg/datanode/datanode.go index cc53da9..3254af3 100644 --- a/pkg/datanode/datanode.go +++ b/pkg/datanode/datanode.go @@ -3,6 +3,9 @@ package datanode import ( "archive/tar" "bytes" + "crypto/sha256" + "encoding/hex" + "encoding/json" "errors" "io" "io/fs" @@ -59,7 +62,18 @@ func (d *DataNode) ToTar() ([]byte, error) { buf := new(bytes.Buffer) tw := tar.NewWriter(buf) + manifest := make(map[string]string) for _, file := range d.files { + if file.name == "manifest.json" { + continue + } + manifest[file.name] = file.checksum + } + + for _, file := range d.files { + if file.name == "manifest.json" { + continue + } hdr := &tar.Header{ Name: file.name, Mode: 0600, @@ -74,6 +88,23 @@ func (d *DataNode) ToTar() ([]byte, error) { } } + manifestBytes, err := json.MarshalIndent(manifest, "", " ") + if err != nil { + return nil, err + } + hdr := &tar.Header{ + Name: "manifest.json", + Mode: 0600, + Size: int64(len(manifestBytes)), + ModTime: time.Now(), + } + if err := tw.WriteHeader(hdr); err != nil { + return nil, err + } + if _, err := tw.Write(manifestBytes); err != nil { + return nil, err + } + if err := tw.Close(); err != nil { return nil, err } @@ -92,10 +123,12 @@ func (d *DataNode) AddData(name string, content []byte) { if strings.HasSuffix(name, "/") { return } + hash := sha256.Sum256(content) d.files[name] = &dataFile{ - name: name, - content: content, - modTime: time.Now(), + name: name, + content: content, + modTime: time.Now(), + checksum: hex.EncodeToString(hash[:]), } } @@ -296,15 +329,21 @@ func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode) // dataFile represents a file in the DataNode. type dataFile struct { - name string - content []byte - modTime time.Time + name string + content []byte + modTime time.Time + checksum string } func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil } func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF } func (d *dataFile) Close() error { return nil } +// ReplaceFile replaces a file in the DataNode with new content. +func (d *DataNode) ReplaceFile(name string, content []byte) { + d.AddData(name, content) +} + // dataFileInfo implements fs.FileInfo for a dataFile. type dataFileInfo struct{ file *dataFile }