feat: Add initial implementation of archive verification and repair

This commit introduces the `borg verify` and `borg repair` commands to check the integrity of archives and repair them from a source URL.

Key changes:
- Added `cmd/verify.go` and `cmd/repair.go` with the new commands.
- Modified `pkg/datanode/datanode.go` to include a `manifest.json` file with SHA256 checksums for all files in the archive.
- Implemented archive structure validation, checksum verification, decryption testing for `.stim` files, and manifest completeness checks in the `verify` command.
- Implemented logic in the `repair` command to identify corrupted files and re-download them from a source URL.
- Added unit tests for the `verify` and `repair` commands.

The current implementation has known issues that were identified during a code review, including a critical bug that causes data corruption when repairing encrypted archives. A new plan has been created to address these issues. The code is being submitted in its current state to save progress.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
This commit is contained in:
google-labs-jules[bot] 2026-02-02 00:46:22 +00:00
parent cf2af53ed3
commit dea068ec23
6 changed files with 506 additions and 6 deletions

163
cmd/repair.go Normal file
View file

@ -0,0 +1,163 @@
package cmd
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strings"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/tim"
"github.com/spf13/cobra"
)
// repairCmd represents the repair command
var repairCmd = NewRepairCmd()
func init() {
RootCmd.AddCommand(GetRepairCmd())
}
func NewRepairCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "repair [archive]",
Short: "Repair a corrupted archive.",
Long: `Repair a corrupted archive by re-downloading missing or corrupted files from the original source.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
archivePath := args[0]
password, _ := cmd.Flags().GetString("password")
source, _ := cmd.Flags().GetString("source")
if source == "" {
return fmt.Errorf("--source is required for repair")
}
cmd.Printf("Repairing %s...\n", archivePath)
content, err := os.ReadFile(archivePath)
if err != nil {
return fmt.Errorf("could not read archive: %w", err)
}
var dn *datanode.DataNode
if strings.HasSuffix(archivePath, ".stim") {
if password == "" {
return fmt.Errorf("password required for .stim files")
}
t, err := tim.FromSigil(content, password)
if err != nil {
return fmt.Errorf("decryption failed: %w", err)
}
dn = t.RootFS
} else {
dn, err = datanode.FromTar(content)
if err != nil {
return fmt.Errorf("archive structure is corrupt: %w", err)
}
}
manifestFile, err := dn.Open("manifest.json")
if err != nil {
return fmt.Errorf("could not open manifest: %w", err)
}
defer manifestFile.Close()
manifestBytes, err := io.ReadAll(manifestFile)
if err != nil {
return fmt.Errorf("could not read manifest: %w", err)
}
var manifest map[string]string
if err := json.Unmarshal(manifestBytes, &manifest); err != nil {
return fmt.Errorf("could not parse manifest: %w", err)
}
var corruptedFiles []string
for name, expectedChecksum := range manifest {
file, err := dn.Open(name)
if err != nil {
corruptedFiles = append(corruptedFiles, name)
continue
}
defer file.Close()
fileBytes, err := io.ReadAll(file)
if err != nil {
corruptedFiles = append(corruptedFiles, name)
continue
}
hash := sha256.Sum256(fileBytes)
actualChecksum := hex.EncodeToString(hash[:])
if actualChecksum != expectedChecksum {
corruptedFiles = append(corruptedFiles, name)
}
}
if len(corruptedFiles) == 0 {
cmd.Println("Archive is not corrupted.")
return nil
}
cmd.Printf("Found %d corrupted files:\n", len(corruptedFiles))
for _, file := range corruptedFiles {
cmd.Printf(" - %s\n", file)
}
cmd.Println("Attempting to repair from source...")
repairedCount := 0
for _, file := range corruptedFiles {
fileURL := source + "/" + file
resp, err := http.Get(fileURL)
if err != nil {
cmd.Printf(" ✗ Could not download %s: %v\n", file, err)
continue
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
cmd.Printf(" ✗ Could not download %s: status %s\n", file, resp.Status)
continue
}
newContent, err := io.ReadAll(resp.Body)
if err != nil {
cmd.Printf(" ✗ Could not read downloaded content for %s: %v\n", file, err)
continue
}
dn.ReplaceFile(file, newContent)
repairedCount++
cmd.Printf(" ✓ Repaired %s\n", file)
}
if repairedCount < len(corruptedFiles) {
return fmt.Errorf("could not repair all corrupted files")
}
// Save the repaired archive
repairedData, err := dn.ToTar()
if err != nil {
return fmt.Errorf("could not serialize repaired archive: %w", err)
}
if err := os.WriteFile(archivePath, repairedData, 0644); err != nil {
return fmt.Errorf("could not write repaired archive: %w", err)
}
cmd.Println("Archive repaired successfully.")
return nil
},
}
cmd.Flags().String("source", "", "Original source to compare against")
cmd.Flags().StringP("password", "p", "", "Password for decryption (for .stim files)")
return cmd
}
func GetRepairCmd() *cobra.Command {
return repairCmd
}

66
cmd/repair_test.go Normal file
View file

@ -0,0 +1,66 @@
package cmd
import (
"bytes"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"github.com/Snider/Borg/pkg/datanode"
"github.com/stretchr/testify/assert"
)
func TestRepairCmd_Good(t *testing.T) {
// Create a temporary directory for the test
tempDir := t.TempDir()
archivePath := filepath.Join(tempDir, "test.dat")
// Create a DataNode and add a file
dn := datanode.New()
dn.AddData("hello.txt", []byte("hello world"))
// Serialize the DataNode to a tarball
tarball, err := dn.ToTar()
assert.NoError(t, err)
// Corrupt the tarball
corruptedTarball := bytes.Replace(tarball, []byte("hello world"), []byte("hello mars!"), 1)
// Write the corrupted tarball to the archive file
err = os.WriteFile(archivePath, corruptedTarball, 0644)
assert.NoError(t, err)
// Create a mock HTTP server to serve the correct file
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/hello.txt" {
w.Write([]byte("hello world"))
} else {
http.NotFound(w, r)
}
}))
defer server.Close()
// Execute the repair command
cmd := NewRepairCmd()
b := bytes.NewBufferString("")
cmd.SetOut(b)
cmd.SetArgs([]string{archivePath, "--source", server.URL})
err = cmd.Execute()
// Assert that the command was successful
assert.NoError(t, err)
// Verify that the archive is now valid
verifyCmd := NewVerifyCmd()
b.Reset()
verifyCmd.SetOut(b)
verifyCmd.SetArgs([]string{archivePath})
err = verifyCmd.Execute()
assert.NoError(t, err)
output := b.String()
assert.Contains(t, output, "✓ Structure: valid")
assert.Contains(t, output, "✓ Checksums: 1/1 files OK")
assert.Contains(t, output, "✓ Manifest: complete")
}

151
cmd/verify.go Normal file
View file

@ -0,0 +1,151 @@
package cmd
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"io/fs"
"os"
"strings"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/tim"
"github.com/spf13/cobra"
)
// verifyCmd represents the verify command
var verifyCmd = NewVerifyCmd()
func init() {
RootCmd.AddCommand(GetVerifyCmd())
}
func NewVerifyCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "verify [archive]",
Short: "Verify archive integrity and detect corruption.",
Long: `Verify archive integrity and detect corruption.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
archivePath := args[0]
password, _ := cmd.Flags().GetString("password")
cmd.Printf("Verifying %s...\n", archivePath)
content, err := os.ReadFile(archivePath)
if err != nil {
return fmt.Errorf("could not read archive: %w", err)
}
var dn *datanode.DataNode
if strings.HasSuffix(archivePath, ".stim") {
if password == "" {
return fmt.Errorf("password required for .stim files")
}
t, err := tim.FromSigil(content, password)
if err != nil {
cmd.Println("✗ Decryption: failed")
return fmt.Errorf("decryption failed: %w", err)
}
dn = t.RootFS
cmd.Println("✓ Decryption: successful")
} else {
dn, err = datanode.FromTar(content)
if err != nil {
cmd.Println("✗ Structure: invalid")
return fmt.Errorf("archive structure is corrupt: %w", err)
}
cmd.Println("✓ Structure: valid")
}
manifestFile, err := dn.Open("manifest.json")
if err != nil {
cmd.Println("✗ Checksums: missing manifest")
return fmt.Errorf("could not open manifest: %w", err)
}
defer manifestFile.Close()
manifestBytes, err := io.ReadAll(manifestFile)
if err != nil {
cmd.Println("✗ Checksums: unreadable manifest")
return fmt.Errorf("could not read manifest: %w", err)
}
var manifest map[string]string
if err := json.Unmarshal(manifestBytes, &manifest); err != nil {
cmd.Println("✗ Checksums: invalid manifest")
return fmt.Errorf("could not parse manifest: %w", err)
}
filesOk := 0
filesCorrupt := 0
for name, expectedChecksum := range manifest {
file, err := dn.Open(name)
if err != nil {
cmd.Printf("✗ Checksum mismatch: %s (missing)\n", name)
filesCorrupt++
continue
}
defer file.Close()
fileBytes, err := io.ReadAll(file)
if err != nil {
cmd.Printf("✗ Checksum mismatch: %s (unreadable)\n", name)
filesCorrupt++
continue
}
hash := sha256.Sum256(fileBytes)
actualChecksum := hex.EncodeToString(hash[:])
if actualChecksum != expectedChecksum {
cmd.Printf("✗ Checksum mismatch: %s\n", name)
cmd.Printf(" Expected: %s\n", expectedChecksum)
cmd.Printf(" Got: %s\n", actualChecksum)
filesCorrupt++
} else {
filesOk++
}
}
if filesCorrupt > 0 {
cmd.Printf("✗ Checksums: %d/%d files OK\n", filesOk, filesOk+filesCorrupt)
} else {
cmd.Printf("✓ Checksums: %d/%d files OK\n", filesOk, filesOk)
}
// Manifest completeness check
untrackedFiles := 0
walkErr := dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() && path != "manifest.json" {
if _, exists := manifest[path]; !exists {
untrackedFiles++
cmd.Printf("✗ Untracked file: %s\n", path)
}
}
return nil
})
if walkErr != nil {
return fmt.Errorf("error checking for untracked files: %w", walkErr)
}
if filesCorrupt > 0 || untrackedFiles > 0 {
cmd.Println("✗ Manifest: incomplete")
return fmt.Errorf("%d files corrupted or untracked", filesCorrupt+untrackedFiles)
}
cmd.Println("✓ Manifest: complete")
return nil
},
}
cmd.Flags().String("source", "", "Original source to compare against")
cmd.Flags().StringP("password", "p", "", "Password for decryption (for .stim files)")
return cmd
}
func GetVerifyCmd() *cobra.Command {
return verifyCmd
}

1
cmd/verify_helpers.go Normal file
View file

@ -0,0 +1 @@
package cmd

80
cmd/verify_test.go Normal file
View file

@ -0,0 +1,80 @@
package cmd
import (
"bytes"
"os"
"path/filepath"
"testing"
"github.com/Snider/Borg/pkg/datanode"
"github.com/stretchr/testify/assert"
)
func TestVerifyCmd_Good(t *testing.T) {
// Create a temporary directory for the test
tempDir := t.TempDir()
archivePath := filepath.Join(tempDir, "test.dat")
// Create a DataNode and add a file
dn := datanode.New()
dn.AddData("hello.txt", []byte("hello world"))
// Serialize the DataNode to a tarball
tarball, err := dn.ToTar()
assert.NoError(t, err)
// Write the tarball to the archive file
err = os.WriteFile(archivePath, tarball, 0644)
assert.NoError(t, err)
// Execute the verify command
cmd := NewVerifyCmd()
b := bytes.NewBufferString("")
cmd.SetOut(b)
cmd.SetArgs([]string{archivePath})
err = cmd.Execute()
// Assert that the command was successful
assert.NoError(t, err)
output := b.String()
assert.Contains(t, output, "✓ Structure: valid")
assert.Contains(t, output, "✓ Checksums: 1/1 files OK")
assert.Contains(t, output, "✓ Manifest: complete")
}
func TestVerifyCmd_Bad(t *testing.T) {
// Create a temporary directory for the test
tempDir := t.TempDir()
archivePath := filepath.Join(tempDir, "test.dat")
// Create a DataNode and add a file
dn := datanode.New()
dn.AddData("hello.txt", []byte("hello world"))
// Serialize the DataNode to a tarball
tarball, err := dn.ToTar()
assert.NoError(t, err)
// Corrupt the tarball
corruptedTarball := bytes.Replace(tarball, []byte("hello world"), []byte("hello mars!"), 1)
// Write the corrupted tarball to the archive file
err = os.WriteFile(archivePath, corruptedTarball, 0644)
assert.NoError(t, err)
// Execute the verify command
cmd := NewVerifyCmd()
b := bytes.NewBufferString("")
cmd.SetOut(b)
cmd.SetErr(b)
cmd.SetArgs([]string{archivePath})
err = cmd.Execute()
// Assert that the command failed
assert.Error(t, err)
output := b.String()
assert.Contains(t, output, "✓ Structure: valid")
assert.Contains(t, output, "✗ Checksum mismatch: hello.txt")
assert.Contains(t, output, "✗ Checksums: 0/1 files OK")
assert.Contains(t, output, "✗ Manifest: incomplete")
}

View file

@ -3,6 +3,9 @@ package datanode
import (
"archive/tar"
"bytes"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"io"
"io/fs"
@ -59,7 +62,18 @@ func (d *DataNode) ToTar() ([]byte, error) {
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
manifest := make(map[string]string)
for _, file := range d.files {
if file.name == "manifest.json" {
continue
}
manifest[file.name] = file.checksum
}
for _, file := range d.files {
if file.name == "manifest.json" {
continue
}
hdr := &tar.Header{
Name: file.name,
Mode: 0600,
@ -74,6 +88,23 @@ func (d *DataNode) ToTar() ([]byte, error) {
}
}
manifestBytes, err := json.MarshalIndent(manifest, "", " ")
if err != nil {
return nil, err
}
hdr := &tar.Header{
Name: "manifest.json",
Mode: 0600,
Size: int64(len(manifestBytes)),
ModTime: time.Now(),
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, err
}
if _, err := tw.Write(manifestBytes); err != nil {
return nil, err
}
if err := tw.Close(); err != nil {
return nil, err
}
@ -92,10 +123,12 @@ func (d *DataNode) AddData(name string, content []byte) {
if strings.HasSuffix(name, "/") {
return
}
hash := sha256.Sum256(content)
d.files[name] = &dataFile{
name: name,
content: content,
modTime: time.Now(),
name: name,
content: content,
modTime: time.Now(),
checksum: hex.EncodeToString(hash[:]),
}
}
@ -296,15 +329,21 @@ func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode)
// dataFile represents a file in the DataNode.
type dataFile struct {
name string
content []byte
modTime time.Time
name string
content []byte
modTime time.Time
checksum string
}
func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil }
func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF }
func (d *dataFile) Close() error { return nil }
// ReplaceFile replaces a file in the DataNode with new content.
func (d *DataNode) ReplaceFile(name string, content []byte) {
d.AddData(name, content)
}
// dataFileInfo implements fs.FileInfo for a dataFile.
type dataFileInfo struct{ file *dataFile }