Borg/pkg/failures/manager.go
google-labs-jules[bot] 46ffec7071 feat: Implement failure reporting and dead letter queue
This change introduces a new failure handling system for collection tasks.

- Created a new package `pkg/failures` to manage failure reporting, including a `Manager` to handle the lifecycle of a failure report, and `Failure` and `FailureReport` structs for storing failure data. The manager creates a `.borg-failures/<timestamp>` directory for each run, containing a `failures.json` report and a `retry.sh` script.
- Added a `borg failures` command with `show` and `clear` subcommands to manage failure reports.
- Added a `borg retry` command to retry failed collections.
- Added `--on-failure` and `--failures-dir` flags to the `collect` command.
- Refactored the `collect github repo` command to make the single-repository cloning logic reusable.
- Updated the `collect github repos` command to use the reusable cloning function and implement failure handling, including the `--on-failure=stop` and `--on-failure=prompt` options.
- Implemented failure categorization to distinguish between retryable and permanent failures.
- Implemented tracking of the number of attempts for each failed item.
- Created a placeholder file for a missing asset to fix the build.

Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
2026-02-02 00:53:35 +00:00

81 lines
2.1 KiB
Go

package failures
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"time"
)
// Manager handles the lifecycle of a failure report.
type Manager struct {
failuresDir string
runDir string
report *FailureReport
}
// NewManager creates a new failure manager for a given collection.
func NewManager(failuresDir, collection string) (*Manager, error) {
if failuresDir == "" {
failuresDir = ".borg-failures"
}
runDir := filepath.Join(failuresDir, time.Now().Format("2006-01-02T15-04-05"))
if err := os.MkdirAll(runDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create failures directory: %w", err)
}
return &Manager{
failuresDir: failuresDir,
runDir: runDir,
report: &FailureReport{
Collection: collection,
Started: time.Now(),
},
}, nil
}
// RecordFailure records a single failure.
func (m *Manager) RecordFailure(failure *Failure) {
m.report.Failures = append(m.report.Failures, failure)
m.report.Stats.Failed++
}
// SetTotal sets the total number of items to be processed.
func (m *Manager) SetTotal(total int) {
m.report.Stats.Total = total
}
// Finalize completes the failure report, writing it to disk.
func (m *Manager) Finalize() error {
m.report.Completed = time.Now()
m.report.Stats.Success = m.report.Stats.Total - m.report.Stats.Failed
// Write failures.json
reportPath := filepath.Join(m.runDir, "failures.json")
reportFile, err := os.Create(reportPath)
if err != nil {
return fmt.Errorf("failed to create failures.json: %w", err)
}
defer reportFile.Close()
encoder := json.NewEncoder(reportFile)
encoder.SetIndent("", " ")
if err := encoder.Encode(m.report); err != nil {
return fmt.Errorf("failed to write failures.json: %w", err)
}
// Write retry.sh
var retryScript strings.Builder
retryScript.WriteString("#!/bin/bash\n\n")
for _, failure := range m.report.Failures {
retryScript.WriteString(fmt.Sprintf("borg collect github repo %s\n", failure.URL))
}
retryPath := filepath.Join(m.runDir, "retry.sh")
if err := os.WriteFile(retryPath, []byte(retryScript.String()), 0755); err != nil {
return fmt.Errorf("failed to write retry.sh: %w", err)
}
return nil
}