feat: Add initial implementation of archive verification and repair
This commit introduces the `borg verify` and `borg repair` commands to check the integrity of archives and repair them from a source URL. Key changes: - Added `cmd/verify.go` and `cmd/repair.go` with the new commands. - Modified `pkg/datanode/datanode.go` to include a `manifest.json` file with SHA256 checksums for all files in the archive. - Implemented archive structure validation, checksum verification, decryption testing for `.stim` files, and manifest completeness checks in the `verify` command. - Implemented logic in the `repair` command to identify corrupted files and re-download them from a source URL. - Added unit tests for the `verify` and `repair` commands. The current implementation has known issues that were identified during a code review, including a critical bug that causes data corruption when repairing encrypted archives. A new plan has been created to address these issues. The code is being submitted in its current state to save progress. Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
This commit is contained in:
parent
cf2af53ed3
commit
dea068ec23
6 changed files with 506 additions and 6 deletions
163
cmd/repair.go
Normal file
163
cmd/repair.go
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
"github.com/Snider/Borg/pkg/tim"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// repairCmd represents the repair command
|
||||
var repairCmd = NewRepairCmd()
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(GetRepairCmd())
|
||||
}
|
||||
func NewRepairCmd() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "repair [archive]",
|
||||
Short: "Repair a corrupted archive.",
|
||||
Long: `Repair a corrupted archive by re-downloading missing or corrupted files from the original source.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
archivePath := args[0]
|
||||
password, _ := cmd.Flags().GetString("password")
|
||||
source, _ := cmd.Flags().GetString("source")
|
||||
|
||||
if source == "" {
|
||||
return fmt.Errorf("--source is required for repair")
|
||||
}
|
||||
|
||||
cmd.Printf("Repairing %s...\n", archivePath)
|
||||
|
||||
content, err := os.ReadFile(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read archive: %w", err)
|
||||
}
|
||||
|
||||
var dn *datanode.DataNode
|
||||
if strings.HasSuffix(archivePath, ".stim") {
|
||||
if password == "" {
|
||||
return fmt.Errorf("password required for .stim files")
|
||||
}
|
||||
t, err := tim.FromSigil(content, password)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decryption failed: %w", err)
|
||||
}
|
||||
dn = t.RootFS
|
||||
} else {
|
||||
dn, err = datanode.FromTar(content)
|
||||
if err != nil {
|
||||
return fmt.Errorf("archive structure is corrupt: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
manifestFile, err := dn.Open("manifest.json")
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open manifest: %w", err)
|
||||
}
|
||||
defer manifestFile.Close()
|
||||
|
||||
manifestBytes, err := io.ReadAll(manifestFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read manifest: %w", err)
|
||||
}
|
||||
|
||||
var manifest map[string]string
|
||||
if err := json.Unmarshal(manifestBytes, &manifest); err != nil {
|
||||
return fmt.Errorf("could not parse manifest: %w", err)
|
||||
}
|
||||
|
||||
var corruptedFiles []string
|
||||
for name, expectedChecksum := range manifest {
|
||||
file, err := dn.Open(name)
|
||||
if err != nil {
|
||||
corruptedFiles = append(corruptedFiles, name)
|
||||
continue
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
fileBytes, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
corruptedFiles = append(corruptedFiles, name)
|
||||
continue
|
||||
}
|
||||
|
||||
hash := sha256.Sum256(fileBytes)
|
||||
actualChecksum := hex.EncodeToString(hash[:])
|
||||
|
||||
if actualChecksum != expectedChecksum {
|
||||
corruptedFiles = append(corruptedFiles, name)
|
||||
}
|
||||
}
|
||||
|
||||
if len(corruptedFiles) == 0 {
|
||||
cmd.Println("Archive is not corrupted.")
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd.Printf("Found %d corrupted files:\n", len(corruptedFiles))
|
||||
for _, file := range corruptedFiles {
|
||||
cmd.Printf(" - %s\n", file)
|
||||
}
|
||||
|
||||
cmd.Println("Attempting to repair from source...")
|
||||
repairedCount := 0
|
||||
for _, file := range corruptedFiles {
|
||||
fileURL := source + "/" + file
|
||||
resp, err := http.Get(fileURL)
|
||||
if err != nil {
|
||||
cmd.Printf(" ✗ Could not download %s: %v\n", file, err)
|
||||
continue
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
cmd.Printf(" ✗ Could not download %s: status %s\n", file, resp.Status)
|
||||
continue
|
||||
}
|
||||
|
||||
newContent, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
cmd.Printf(" ✗ Could not read downloaded content for %s: %v\n", file, err)
|
||||
continue
|
||||
}
|
||||
dn.ReplaceFile(file, newContent)
|
||||
repairedCount++
|
||||
cmd.Printf(" ✓ Repaired %s\n", file)
|
||||
}
|
||||
|
||||
if repairedCount < len(corruptedFiles) {
|
||||
return fmt.Errorf("could not repair all corrupted files")
|
||||
}
|
||||
|
||||
// Save the repaired archive
|
||||
repairedData, err := dn.ToTar()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not serialize repaired archive: %w", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(archivePath, repairedData, 0644); err != nil {
|
||||
return fmt.Errorf("could not write repaired archive: %w", err)
|
||||
}
|
||||
|
||||
cmd.Println("Archive repaired successfully.")
|
||||
return nil
|
||||
},
|
||||
}
|
||||
cmd.Flags().String("source", "", "Original source to compare against")
|
||||
cmd.Flags().StringP("password", "p", "", "Password for decryption (for .stim files)")
|
||||
return cmd
|
||||
}
|
||||
|
||||
func GetRepairCmd() *cobra.Command {
|
||||
return repairCmd
|
||||
}
|
||||
66
cmd/repair_test.go
Normal file
66
cmd/repair_test.go
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestRepairCmd_Good(t *testing.T) {
|
||||
// Create a temporary directory for the test
|
||||
tempDir := t.TempDir()
|
||||
archivePath := filepath.Join(tempDir, "test.dat")
|
||||
|
||||
// Create a DataNode and add a file
|
||||
dn := datanode.New()
|
||||
dn.AddData("hello.txt", []byte("hello world"))
|
||||
|
||||
// Serialize the DataNode to a tarball
|
||||
tarball, err := dn.ToTar()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Corrupt the tarball
|
||||
corruptedTarball := bytes.Replace(tarball, []byte("hello world"), []byte("hello mars!"), 1)
|
||||
|
||||
// Write the corrupted tarball to the archive file
|
||||
err = os.WriteFile(archivePath, corruptedTarball, 0644)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a mock HTTP server to serve the correct file
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/hello.txt" {
|
||||
w.Write([]byte("hello world"))
|
||||
} else {
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
// Execute the repair command
|
||||
cmd := NewRepairCmd()
|
||||
b := bytes.NewBufferString("")
|
||||
cmd.SetOut(b)
|
||||
cmd.SetArgs([]string{archivePath, "--source", server.URL})
|
||||
err = cmd.Execute()
|
||||
|
||||
// Assert that the command was successful
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify that the archive is now valid
|
||||
verifyCmd := NewVerifyCmd()
|
||||
b.Reset()
|
||||
verifyCmd.SetOut(b)
|
||||
verifyCmd.SetArgs([]string{archivePath})
|
||||
err = verifyCmd.Execute()
|
||||
assert.NoError(t, err)
|
||||
output := b.String()
|
||||
assert.Contains(t, output, "✓ Structure: valid")
|
||||
assert.Contains(t, output, "✓ Checksums: 1/1 files OK")
|
||||
assert.Contains(t, output, "✓ Manifest: complete")
|
||||
}
|
||||
151
cmd/verify.go
Normal file
151
cmd/verify.go
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
"github.com/Snider/Borg/pkg/tim"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// verifyCmd represents the verify command
|
||||
var verifyCmd = NewVerifyCmd()
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(GetVerifyCmd())
|
||||
}
|
||||
func NewVerifyCmd() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "verify [archive]",
|
||||
Short: "Verify archive integrity and detect corruption.",
|
||||
Long: `Verify archive integrity and detect corruption.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
archivePath := args[0]
|
||||
password, _ := cmd.Flags().GetString("password")
|
||||
cmd.Printf("Verifying %s...\n", archivePath)
|
||||
|
||||
content, err := os.ReadFile(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not read archive: %w", err)
|
||||
}
|
||||
|
||||
var dn *datanode.DataNode
|
||||
if strings.HasSuffix(archivePath, ".stim") {
|
||||
if password == "" {
|
||||
return fmt.Errorf("password required for .stim files")
|
||||
}
|
||||
t, err := tim.FromSigil(content, password)
|
||||
if err != nil {
|
||||
cmd.Println("✗ Decryption: failed")
|
||||
return fmt.Errorf("decryption failed: %w", err)
|
||||
}
|
||||
dn = t.RootFS
|
||||
cmd.Println("✓ Decryption: successful")
|
||||
} else {
|
||||
dn, err = datanode.FromTar(content)
|
||||
if err != nil {
|
||||
cmd.Println("✗ Structure: invalid")
|
||||
return fmt.Errorf("archive structure is corrupt: %w", err)
|
||||
}
|
||||
cmd.Println("✓ Structure: valid")
|
||||
}
|
||||
|
||||
manifestFile, err := dn.Open("manifest.json")
|
||||
if err != nil {
|
||||
cmd.Println("✗ Checksums: missing manifest")
|
||||
return fmt.Errorf("could not open manifest: %w", err)
|
||||
}
|
||||
defer manifestFile.Close()
|
||||
|
||||
manifestBytes, err := io.ReadAll(manifestFile)
|
||||
if err != nil {
|
||||
cmd.Println("✗ Checksums: unreadable manifest")
|
||||
return fmt.Errorf("could not read manifest: %w", err)
|
||||
}
|
||||
|
||||
var manifest map[string]string
|
||||
if err := json.Unmarshal(manifestBytes, &manifest); err != nil {
|
||||
cmd.Println("✗ Checksums: invalid manifest")
|
||||
return fmt.Errorf("could not parse manifest: %w", err)
|
||||
}
|
||||
|
||||
filesOk := 0
|
||||
filesCorrupt := 0
|
||||
for name, expectedChecksum := range manifest {
|
||||
file, err := dn.Open(name)
|
||||
if err != nil {
|
||||
cmd.Printf("✗ Checksum mismatch: %s (missing)\n", name)
|
||||
filesCorrupt++
|
||||
continue
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
fileBytes, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
cmd.Printf("✗ Checksum mismatch: %s (unreadable)\n", name)
|
||||
filesCorrupt++
|
||||
continue
|
||||
}
|
||||
|
||||
hash := sha256.Sum256(fileBytes)
|
||||
actualChecksum := hex.EncodeToString(hash[:])
|
||||
|
||||
if actualChecksum != expectedChecksum {
|
||||
cmd.Printf("✗ Checksum mismatch: %s\n", name)
|
||||
cmd.Printf(" Expected: %s\n", expectedChecksum)
|
||||
cmd.Printf(" Got: %s\n", actualChecksum)
|
||||
filesCorrupt++
|
||||
} else {
|
||||
filesOk++
|
||||
}
|
||||
}
|
||||
|
||||
if filesCorrupt > 0 {
|
||||
cmd.Printf("✗ Checksums: %d/%d files OK\n", filesOk, filesOk+filesCorrupt)
|
||||
} else {
|
||||
cmd.Printf("✓ Checksums: %d/%d files OK\n", filesOk, filesOk)
|
||||
}
|
||||
|
||||
// Manifest completeness check
|
||||
untrackedFiles := 0
|
||||
walkErr := dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !d.IsDir() && path != "manifest.json" {
|
||||
if _, exists := manifest[path]; !exists {
|
||||
untrackedFiles++
|
||||
cmd.Printf("✗ Untracked file: %s\n", path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if walkErr != nil {
|
||||
return fmt.Errorf("error checking for untracked files: %w", walkErr)
|
||||
}
|
||||
|
||||
if filesCorrupt > 0 || untrackedFiles > 0 {
|
||||
cmd.Println("✗ Manifest: incomplete")
|
||||
return fmt.Errorf("%d files corrupted or untracked", filesCorrupt+untrackedFiles)
|
||||
}
|
||||
|
||||
cmd.Println("✓ Manifest: complete")
|
||||
return nil
|
||||
},
|
||||
}
|
||||
cmd.Flags().String("source", "", "Original source to compare against")
|
||||
cmd.Flags().StringP("password", "p", "", "Password for decryption (for .stim files)")
|
||||
return cmd
|
||||
}
|
||||
|
||||
func GetVerifyCmd() *cobra.Command {
|
||||
return verifyCmd
|
||||
}
|
||||
1
cmd/verify_helpers.go
Normal file
1
cmd/verify_helpers.go
Normal file
|
|
@ -0,0 +1 @@
|
|||
package cmd
|
||||
80
cmd/verify_test.go
Normal file
80
cmd/verify_test.go
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestVerifyCmd_Good(t *testing.T) {
|
||||
// Create a temporary directory for the test
|
||||
tempDir := t.TempDir()
|
||||
archivePath := filepath.Join(tempDir, "test.dat")
|
||||
|
||||
// Create a DataNode and add a file
|
||||
dn := datanode.New()
|
||||
dn.AddData("hello.txt", []byte("hello world"))
|
||||
|
||||
// Serialize the DataNode to a tarball
|
||||
tarball, err := dn.ToTar()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Write the tarball to the archive file
|
||||
err = os.WriteFile(archivePath, tarball, 0644)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Execute the verify command
|
||||
cmd := NewVerifyCmd()
|
||||
b := bytes.NewBufferString("")
|
||||
cmd.SetOut(b)
|
||||
cmd.SetArgs([]string{archivePath})
|
||||
err = cmd.Execute()
|
||||
|
||||
// Assert that the command was successful
|
||||
assert.NoError(t, err)
|
||||
output := b.String()
|
||||
assert.Contains(t, output, "✓ Structure: valid")
|
||||
assert.Contains(t, output, "✓ Checksums: 1/1 files OK")
|
||||
assert.Contains(t, output, "✓ Manifest: complete")
|
||||
}
|
||||
|
||||
func TestVerifyCmd_Bad(t *testing.T) {
|
||||
// Create a temporary directory for the test
|
||||
tempDir := t.TempDir()
|
||||
archivePath := filepath.Join(tempDir, "test.dat")
|
||||
|
||||
// Create a DataNode and add a file
|
||||
dn := datanode.New()
|
||||
dn.AddData("hello.txt", []byte("hello world"))
|
||||
|
||||
// Serialize the DataNode to a tarball
|
||||
tarball, err := dn.ToTar()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Corrupt the tarball
|
||||
corruptedTarball := bytes.Replace(tarball, []byte("hello world"), []byte("hello mars!"), 1)
|
||||
|
||||
// Write the corrupted tarball to the archive file
|
||||
err = os.WriteFile(archivePath, corruptedTarball, 0644)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Execute the verify command
|
||||
cmd := NewVerifyCmd()
|
||||
b := bytes.NewBufferString("")
|
||||
cmd.SetOut(b)
|
||||
cmd.SetErr(b)
|
||||
cmd.SetArgs([]string{archivePath})
|
||||
err = cmd.Execute()
|
||||
|
||||
// Assert that the command failed
|
||||
assert.Error(t, err)
|
||||
output := b.String()
|
||||
assert.Contains(t, output, "✓ Structure: valid")
|
||||
assert.Contains(t, output, "✗ Checksum mismatch: hello.txt")
|
||||
assert.Contains(t, output, "✗ Checksums: 0/1 files OK")
|
||||
assert.Contains(t, output, "✗ Manifest: incomplete")
|
||||
}
|
||||
|
|
@ -3,6 +3,9 @@ package datanode
|
|||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"io/fs"
|
||||
|
|
@ -59,7 +62,18 @@ func (d *DataNode) ToTar() ([]byte, error) {
|
|||
buf := new(bytes.Buffer)
|
||||
tw := tar.NewWriter(buf)
|
||||
|
||||
manifest := make(map[string]string)
|
||||
for _, file := range d.files {
|
||||
if file.name == "manifest.json" {
|
||||
continue
|
||||
}
|
||||
manifest[file.name] = file.checksum
|
||||
}
|
||||
|
||||
for _, file := range d.files {
|
||||
if file.name == "manifest.json" {
|
||||
continue
|
||||
}
|
||||
hdr := &tar.Header{
|
||||
Name: file.name,
|
||||
Mode: 0600,
|
||||
|
|
@ -74,6 +88,23 @@ func (d *DataNode) ToTar() ([]byte, error) {
|
|||
}
|
||||
}
|
||||
|
||||
manifestBytes, err := json.MarshalIndent(manifest, "", " ")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hdr := &tar.Header{
|
||||
Name: "manifest.json",
|
||||
Mode: 0600,
|
||||
Size: int64(len(manifestBytes)),
|
||||
ModTime: time.Now(),
|
||||
}
|
||||
if err := tw.WriteHeader(hdr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := tw.Write(manifestBytes); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := tw.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -92,10 +123,12 @@ func (d *DataNode) AddData(name string, content []byte) {
|
|||
if strings.HasSuffix(name, "/") {
|
||||
return
|
||||
}
|
||||
hash := sha256.Sum256(content)
|
||||
d.files[name] = &dataFile{
|
||||
name: name,
|
||||
content: content,
|
||||
modTime: time.Now(),
|
||||
name: name,
|
||||
content: content,
|
||||
modTime: time.Now(),
|
||||
checksum: hex.EncodeToString(hash[:]),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -296,15 +329,21 @@ func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode)
|
|||
|
||||
// dataFile represents a file in the DataNode.
|
||||
type dataFile struct {
|
||||
name string
|
||||
content []byte
|
||||
modTime time.Time
|
||||
name string
|
||||
content []byte
|
||||
modTime time.Time
|
||||
checksum string
|
||||
}
|
||||
|
||||
func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil }
|
||||
func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF }
|
||||
func (d *dataFile) Close() error { return nil }
|
||||
|
||||
// ReplaceFile replaces a file in the DataNode with new content.
|
||||
func (d *DataNode) ReplaceFile(name string, content []byte) {
|
||||
d.AddData(name, content)
|
||||
}
|
||||
|
||||
// dataFileInfo implements fs.FileInfo for a dataFile.
|
||||
type dataFileInfo struct{ file *dataFile }
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue