diff --git a/cmd/collect_pwa.go b/cmd/collect_pwa.go index 8b5ef8c..2b5b495 100644 --- a/cmd/collect_pwa.go +++ b/cmd/collect_pwa.go @@ -5,6 +5,7 @@ import ( "os" "github.com/Snider/Borg/pkg/compress" + "github.com/Snider/Borg/pkg/hooks" "github.com/Snider/Borg/pkg/pwa" "github.com/Snider/Borg/pkg/tim" "github.com/Snider/Borg/pkg/trix" @@ -43,8 +44,21 @@ Examples: format, _ := cmd.Flags().GetString("format") compression, _ := cmd.Flags().GetString("compression") password, _ := cmd.Flags().GetString("password") + hooksFile, _ := cmd.Flags().GetString("hooks") - finalPath, err := CollectPWA(c.PWAClient, pwaURL, outputFile, format, compression, password) + // If hooks file is not specified, check for default + if hooksFile == "" { + if _, err := os.Stat(".borg-hooks.yaml"); err == nil { + hooksFile = ".borg-hooks.yaml" + } + } + + hookRunner, err := hooks.NewHookRunner(hooksFile) + if err != nil { + return err + } + + finalPath, err := CollectPWA(c.PWAClient, pwaURL, outputFile, format, compression, password, hookRunner) if err != nil { return err } @@ -57,13 +71,14 @@ Examples: c.Flags().String("format", "datanode", "Output format (datanode, tim, trix, or stim)") c.Flags().String("compression", "none", "Compression format (none, gz, or xz)") c.Flags().String("password", "", "Password for encryption (required for stim format)") + c.Flags().String("hooks", "", "Path to the .borg-hooks.yaml file") return c } func init() { collectCmd.AddCommand(&NewCollectPWACmd().Command) } -func CollectPWA(client pwa.PWAClient, pwaURL string, outputFile string, format string, compression string, password string) (string, error) { +func CollectPWA(client pwa.PWAClient, pwaURL string, outputFile string, format string, compression string, password string, hookRunner *hooks.HookRunner) (string, error) { if pwaURL == "" { return "", fmt.Errorf("url is required") } @@ -85,7 +100,7 @@ func CollectPWA(client pwa.PWAClient, pwaURL string, outputFile string, format s return "", fmt.Errorf("error finding manifest: %w", err) } bar.Describe("Downloading and packaging PWA") - dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, bar) + dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, bar, hookRunner) if err != nil { return "", fmt.Errorf("error downloading and packaging PWA: %w", err) } diff --git a/cmd/collect_website.go b/cmd/collect_website.go index 3811f32..3c85955 100644 --- a/cmd/collect_website.go +++ b/cmd/collect_website.go @@ -6,6 +6,7 @@ import ( "github.com/schollz/progressbar/v3" "github.com/Snider/Borg/pkg/compress" + "github.com/Snider/Borg/pkg/hooks" "github.com/Snider/Borg/pkg/tim" "github.com/Snider/Borg/pkg/trix" "github.com/Snider/Borg/pkg/ui" @@ -38,6 +39,19 @@ func NewCollectWebsiteCmd() *cobra.Command { format, _ := cmd.Flags().GetString("format") compression, _ := cmd.Flags().GetString("compression") password, _ := cmd.Flags().GetString("password") + hooksFile, _ := cmd.Flags().GetString("hooks") + + // If hooks file is not specified, check for default + if hooksFile == "" { + if _, err := os.Stat(".borg-hooks.yaml"); err == nil { + hooksFile = ".borg-hooks.yaml" + } + } + + hookRunner, err := hooks.NewHookRunner(hooksFile) + if err != nil { + return fmt.Errorf("failed to create hook runner: %w", err) + } if format != "datanode" && format != "tim" && format != "trix" { return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format) @@ -51,7 +65,7 @@ func NewCollectWebsiteCmd() *cobra.Command { bar = ui.NewProgressBar(-1, "Crawling website") } - dn, err := website.DownloadAndPackageWebsite(websiteURL, depth, bar) + dn, err := website.DownloadAndPackageWebsite(websiteURL, depth, bar, hookRunner) if err != nil { return fmt.Errorf("error downloading and packaging website: %w", err) } @@ -104,5 +118,6 @@ func NewCollectWebsiteCmd() *cobra.Command { collectWebsiteCmd.PersistentFlags().String("format", "datanode", "Output format (datanode, tim, or trix)") collectWebsiteCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)") collectWebsiteCmd.PersistentFlags().String("password", "", "Password for encryption") + collectWebsiteCmd.PersistentFlags().String("hooks", "", "Path to the .borg-hooks.yaml file") return collectWebsiteCmd } diff --git a/cmd/collect_website_test.go b/cmd/collect_website_test.go index 2c39674..20d54e2 100644 --- a/cmd/collect_website_test.go +++ b/cmd/collect_website_test.go @@ -1,20 +1,24 @@ package cmd import ( + "encoding/json" "fmt" + "os" "path/filepath" "strings" "testing" "github.com/Snider/Borg/pkg/datanode" + "github.com/Snider/Borg/pkg/hooks" "github.com/Snider/Borg/pkg/website" "github.com/schollz/progressbar/v3" + "github.com/stretchr/testify/require" ) func TestCollectWebsiteCmd_Good(t *testing.T) { // Mock the website downloader oldDownloadAndPackageWebsite := website.DownloadAndPackageWebsite - website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar) (*datanode.DataNode, error) { + website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) { return datanode.New(), nil } defer func() { @@ -32,10 +36,64 @@ func TestCollectWebsiteCmd_Good(t *testing.T) { } } +func TestCollectWebsiteCmd_Hooks(t *testing.T) { + // 1. Setup temp directory for test artifacts + tmpDir := t.TempDir() + + // 2. Create the hook script + scriptContent := "#!/bin/sh\ncat > " + filepath.Join(tmpDir, "hook.output") + scriptPath := filepath.Join(tmpDir, "testhook.sh") + err := os.WriteFile(scriptPath, []byte(scriptContent), 0755) + require.NoError(t, err) + + // 3. Create the hooks YAML config + hooksYAML := ` +hooks: + on_collection_complete: + - run: "` + scriptPath + `" +` + configPath := filepath.Join(tmpDir, ".borg-hooks.yaml") + err = os.WriteFile(configPath, []byte(hooksYAML), 0644) + require.NoError(t, err) + + // 4. Mock the website downloader + oldDownloadAndPackageWebsite := website.DownloadAndPackageWebsite + website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) { + dn := datanode.New() + // Manually trigger the hook that the real function would trigger + err := hookRunner.Trigger(hooks.Event{ + Event: hooks.OnCollectionComplete, + }) + require.NoError(t, err) // Use require in the mock to fail fast if the trigger fails + return dn, nil + } + defer func() { + website.DownloadAndPackageWebsite = oldDownloadAndPackageWebsite + }() + + // 5. Execute the command + rootCmd := NewRootCmd() + rootCmd.AddCommand(GetCollectCmd()) + out := filepath.Join(tmpDir, "out") + _, err = executeCommand(rootCmd, "collect", "website", "https://example.com", "--output", out, "--hooks", configPath) + require.NoError(t, err) + + // 6. Assert results + hookOutputFile := filepath.Join(tmpDir, "hook.output") + content, err := os.ReadFile(hookOutputFile) + require.NoError(t, err, "Hook output file should have been created") + + var receivedEvent hooks.Event + err = json.Unmarshal(content, &receivedEvent) + require.NoError(t, err, "Failed to unmarshal hook output") + + require.Equal(t, hooks.OnCollectionComplete, receivedEvent.Event) +} + func TestCollectWebsiteCmd_Bad(t *testing.T) { // Mock the website downloader to return an error oldDownloadAndPackageWebsite := website.DownloadAndPackageWebsite - website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar) (*datanode.DataNode, error) { + website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) { return nil, fmt.Errorf("website error") } defer func() { diff --git a/examples/collect_pwa/main.go b/examples/collect_pwa/main.go index 963ba62..f325b46 100644 --- a/examples/collect_pwa/main.go +++ b/examples/collect_pwa/main.go @@ -4,6 +4,7 @@ import ( "log" "os" + "github.com/Snider/Borg/pkg/hooks" "github.com/Snider/Borg/pkg/pwa" ) @@ -18,7 +19,12 @@ func main() { log.Fatalf("Failed to find manifest: %v", err) } - dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, nil) + hookRunner, err := hooks.NewHookRunner("") + if err != nil { + log.Fatalf("Failed to create hook runner: %v", err) + } + + dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, nil, hookRunner) if err != nil { log.Fatalf("Failed to download and package PWA: %v", err) } diff --git a/examples/collect_website/main.go b/examples/collect_website/main.go index 2e2f606..5d730f4 100644 --- a/examples/collect_website/main.go +++ b/examples/collect_website/main.go @@ -4,14 +4,20 @@ import ( "log" "os" + "github.com/Snider/Borg/pkg/hooks" "github.com/Snider/Borg/pkg/website" ) func main() { log.Println("Collecting website...") + hookRunner, err := hooks.NewHookRunner("") + if err != nil { + log.Fatalf("Failed to create hook runner: %v", err) + } + // Download and package the website. - dn, err := website.DownloadAndPackageWebsite("https://example.com", 2, nil) + dn, err := website.DownloadAndPackageWebsite("https://example.com", 2, nil, hookRunner) if err != nil { log.Fatalf("Failed to collect website: %v", err) } diff --git a/pkg/hooks/hooks.go b/pkg/hooks/hooks.go new file mode 100644 index 0000000..62dee4b --- /dev/null +++ b/pkg/hooks/hooks.go @@ -0,0 +1,126 @@ +package hooks + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + + "gopkg.in/yaml.v3" +) + +// HookEventType represents the type of a hook event. +type HookEventType string + +const ( + // OnFileCollected is triggered after each file is collected. + OnFileCollected HookEventType = "on_file_collected" + // OnURLFound is triggered when a new URL is discovered. + OnURLFound HookEventType = "on_url_found" + // OnCollectionComplete is triggered after the entire collection is done. + OnCollectionComplete HookEventType = "on_collection_complete" + // OnError is triggered when a failure occurs. + OnError HookEventType = "on_error" +) + +// Hook represents a single hook to be executed. +type Hook struct { + Pattern string `yaml:"pattern"` + Run string `yaml:"run"` +} + +// HookConfig represents the configuration for all hooks. +type HookConfig struct { + Hooks map[HookEventType][]Hook `yaml:"hooks"` +} + +// HookRunner is responsible for running hooks. +type HookRunner struct { + config *HookConfig +} + +// NewHookRunner creates a new HookRunner. +func NewHookRunner(configFile string) (*HookRunner, error) { + if configFile == "" { + return &HookRunner{config: &HookConfig{}}, nil + } + + data, err := os.ReadFile(configFile) + if err != nil { + return nil, fmt.Errorf("failed to read hook config file: %w", err) + } + + var config HookConfig + err = yaml.Unmarshal(data, &config) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal hook config: %w", err) + } + + return &HookRunner{config: &config}, nil +} + +// Event represents a hook event. +type Event struct { + Event HookEventType `json:"event"` + File string `json:"file,omitempty"` + URL string `json:"url,omitempty"` + Type string `json:"type,omitempty"` + Error string `json:"error,omitempty"` +} + +// Trigger triggers the hooks for a given event. +func (r *HookRunner) Trigger(event Event) error { + if r.config == nil { + return nil + } + + hooks, ok := r.config.Hooks[event.Event] + if !ok { + return nil + } + + for _, hook := range hooks { + if hook.Pattern != "" && event.File != "" { + matched, err := filepath.Match(hook.Pattern, filepath.Base(event.File)) + if err != nil { + return fmt.Errorf("failed to match pattern '%s' with file '%s': %w", hook.Pattern, event.File, err) + } + if !matched { + continue + } + } + + err := r.runHook(hook, event) + if err != nil { + return fmt.Errorf("failed to run hook '%s': %w", hook.Run, err) + } + } + + return nil +} + +func (r *HookRunner) runHook(hook Hook, event Event) error { + cmd := exec.Command("sh", "-c", hook.Run) + + stdin, err := cmd.StdinPipe() + if err != nil { + return fmt.Errorf("failed to get stdin pipe: %w", err) + } + + go func() { + defer stdin.Close() + err := json.NewEncoder(stdin).Encode(event) + if err != nil { + // It's hard to propagate this error, so we'll just log it + fmt.Fprintf(os.Stderr, "failed to write to hook stdin: %v\n", err) + } + }() + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("hook execution failed: %w\n%s", err, string(output)) + } + + return nil +} diff --git a/pkg/hooks/hooks_test.go b/pkg/hooks/hooks_test.go new file mode 100644 index 0000000..74487c5 --- /dev/null +++ b/pkg/hooks/hooks_test.go @@ -0,0 +1,218 @@ +package hooks + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupTest(t *testing.T) (string, func()) { + t.Helper() + + // Create a temporary directory for test artifacts + tmpDir, err := os.MkdirTemp("", "hooks-test-") + require.NoError(t, err) + + // Create test hook script + scriptContent := "#!/bin/sh\ncat > " + filepath.Join(tmpDir, "testhook.output") + scriptPath := filepath.Join(tmpDir, "testhook.sh") + err = os.WriteFile(scriptPath, []byte(scriptContent), 0755) + require.NoError(t, err) + + // Create test hooks config + hooksYAML := ` +hooks: + on_file_collected: + - pattern: "*.pdf" + run: "` + scriptPath + `" + - pattern: "*.txt" + run: "` + scriptPath + `" + on_url_found: + - run: "` + scriptPath + `" + on_collection_complete: + - run: "` + scriptPath + `" + on_error: + - run: "` + scriptPath + `" +` + configPath := filepath.Join(tmpDir, ".borg-hooks.yaml") + err = os.WriteFile(configPath, []byte(hooksYAML), 0644) + require.NoError(t, err) + + return tmpDir, func() { + os.RemoveAll(tmpDir) + } +} + +func TestNewHookRunner(t *testing.T) { + tmpDir, cleanup := setupTest(t) + defer cleanup() + configPath := filepath.Join(tmpDir, ".borg-hooks.yaml") + + // Test with a valid config file + runner, err := NewHookRunner(configPath) + require.NoError(t, err) + assert.NotNil(t, runner) + assert.NotNil(t, runner.config) + + // Test with a non-existent file + _, err = NewHookRunner("non-existent-file.yaml") + assert.Error(t, err) + + // Test with a malformed file + malformedConfigPath := filepath.Join(tmpDir, "malformed.yaml") + err = os.WriteFile(malformedConfigPath, []byte("hooks: \n - invalid"), 0644) + require.NoError(t, err) + _, err = NewHookRunner(malformedConfigPath) + assert.Error(t, err) + + // Test with an empty config file path (should not error) + runner, err = NewHookRunner("") + require.NoError(t, err) + assert.NotNil(t, runner) + assert.NotNil(t, runner.config) +} + +func TestHookRunner_Trigger(t *testing.T) { + tmpDir, cleanup := setupTest(t) + defer cleanup() + configPath := filepath.Join(tmpDir, ".borg-hooks.yaml") + runner, err := NewHookRunner(configPath) + require.NoError(t, err) + + outputFile := filepath.Join(tmpDir, "testhook.output") + + tests := []struct { + name string + event Event + shouldTrigger bool + expectedEvent Event + }{ + { + name: "OnFileCollected - PDF Match with full path", + event: Event{ + Event: OnFileCollected, + File: "assets/document.pdf", + URL: "http://example.com/assets/document.pdf", + Type: "application/pdf", + }, + shouldTrigger: true, + expectedEvent: Event{ + Event: OnFileCollected, + File: "assets/document.pdf", + URL: "http://example.com/assets/document.pdf", + Type: "application/pdf", + }, + }, + { + name: "OnFileCollected - TXT Match with full path", + event: Event{ + Event: OnFileCollected, + File: "notes/notes.txt", + }, + shouldTrigger: true, + expectedEvent: Event{ + Event: OnFileCollected, + File: "notes/notes.txt", + }, + }, + { + name: "OnFileCollected - No Match", + event: Event{ + Event: OnFileCollected, + File: "image.jpg", + }, + shouldTrigger: false, + }, + { + name: "OnURLFound", + event: Event{ + Event: OnURLFound, + URL: "http://example.com/page2", + }, + shouldTrigger: true, + expectedEvent: Event{ + Event: OnURLFound, + URL: "http://example.com/page2", + }, + }, + { + name: "OnCollectionComplete", + event: Event{ + Event: OnCollectionComplete, + }, + shouldTrigger: true, + expectedEvent: Event{ + Event: OnCollectionComplete, + }, + }, + { + name: "OnError", + event: Event{ + Event: OnError, + Error: "something went wrong", + }, + shouldTrigger: true, + expectedEvent: Event{ + Event: OnError, + Error: "something went wrong", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Clean up previous output + _ = os.Remove(outputFile) + + err := runner.Trigger(tt.event) + require.NoError(t, err) + + if !tt.shouldTrigger { + _, err := os.Stat(outputFile) + assert.True(t, os.IsNotExist(err), "Hook should not have been triggered") + return + } + + // Verify the output file was created and contains the correct JSON + content, err := os.ReadFile(outputFile) + require.NoError(t, err) + + var receivedEvent Event + err = json.Unmarshal(content, &receivedEvent) + require.NoError(t, err) + + assert.Equal(t, tt.expectedEvent, receivedEvent) + }) + } +} + +func TestHookRunner_FailingHook(t *testing.T) { + tmpDir, cleanup := setupTest(t) + defer cleanup() + + // Create a failing script + scriptContent := "#!/bin/sh\nexit 1" + scriptPath := filepath.Join(tmpDir, "failing-hook.sh") + err := os.WriteFile(scriptPath, []byte(scriptContent), 0755) + require.NoError(t, err) + + // Create hooks config with the failing script + hooksYAML := ` +hooks: + on_error: + - run: "` + scriptPath + `" +` + configPath := filepath.Join(tmpDir, "failing-hooks.yaml") + err = os.WriteFile(configPath, []byte(hooksYAML), 0644) + require.NoError(t, err) + + runner, err := NewHookRunner(configPath) + require.NoError(t, err) + + err = runner.Trigger(Event{Event: OnError, Error: "test error"}) + assert.Error(t, err) +} diff --git a/pkg/pwa/pwa.go b/pkg/pwa/pwa.go index ce7af06..22b7aac 100644 --- a/pkg/pwa/pwa.go +++ b/pkg/pwa/pwa.go @@ -12,6 +12,7 @@ import ( "sync" "github.com/Snider/Borg/pkg/datanode" + "github.com/Snider/Borg/pkg/hooks" "github.com/schollz/progressbar/v3" "golang.org/x/net/html" ) @@ -27,7 +28,7 @@ var manifestFallbackPaths = []string{ // PWAClient is an interface for interacting with PWAs. type PWAClient interface { FindManifest(pwaURL string) (string, error) - DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar) (*datanode.DataNode, error) + DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) } // NewPWAClient creates a new PWAClient. @@ -158,13 +159,23 @@ type Manifest struct { // DownloadAndPackagePWA downloads and packages a PWA into a DataNode. // It downloads the manifest, all referenced assets, and parses HTML pages // for additional linked resources (CSS, JS, images). -func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar) (*datanode.DataNode, error) { +func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) { dn := datanode.New() var wg sync.WaitGroup var errs []error var mu sync.Mutex downloaded := make(map[string]bool) + triggerErrorHook := func(err error) { + mu.Lock() + defer mu.Unlock() + errs = append(errs, err) + hookRunner.Trigger(hooks.Event{ + Event: hooks.OnError, + Error: err.Error(), + }) + } + var downloadAndAdd func(assetURL string, parseHTML bool) downloadAndAdd = func(assetURL string, parseHTML bool) { defer wg.Done() @@ -183,33 +194,25 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr resp, err := p.client.Get(assetURL) if err != nil { - mu.Lock() - errs = append(errs, fmt.Errorf("failed to download %s: %w", assetURL, err)) - mu.Unlock() + triggerErrorHook(fmt.Errorf("failed to download %s: %w", assetURL, err)) return } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { - mu.Lock() - errs = append(errs, fmt.Errorf("failed to download %s: status code %d", assetURL, resp.StatusCode)) - mu.Unlock() + triggerErrorHook(fmt.Errorf("failed to download %s: status code %d", assetURL, resp.StatusCode)) return } body, err := io.ReadAll(resp.Body) if err != nil { - mu.Lock() - errs = append(errs, fmt.Errorf("failed to read body of %s: %w", assetURL, err)) - mu.Unlock() + triggerErrorHook(fmt.Errorf("failed to read body of %s: %w", assetURL, err)) return } u, err := url.Parse(assetURL) if err != nil { - mu.Lock() - errs = append(errs, fmt.Errorf("failed to parse asset URL %s: %w", assetURL, err)) - mu.Unlock() + triggerErrorHook(fmt.Errorf("failed to parse asset URL %s: %w", assetURL, err)) return } @@ -218,11 +221,21 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr path = "index.html" } dn.AddData(path, body) + hookRunner.Trigger(hooks.Event{ + Event: hooks.OnFileCollected, + File: path, + URL: assetURL, + Type: resp.Header.Get("Content-Type"), + }) // Parse HTML for additional assets if parseHTML && isHTMLContent(resp.Header.Get("Content-Type"), body) { additionalAssets := p.extractAssetsFromHTML(assetURL, body) for _, asset := range additionalAssets { + hookRunner.Trigger(hooks.Event{ + Event: hooks.OnURLFound, + URL: asset, + }) mu.Lock() if !downloaded[asset] { wg.Add(1) @@ -272,6 +285,9 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr // If no start_url, use the PWA URL itself htmlPages = append(htmlPages, pwaURL) } + for _, page := range htmlPages { + hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: page}) + } // Icons for _, icon := range manifest.Icons { @@ -279,6 +295,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr iconURL, err := p.resolveURL(manifestURL, icon.Src) if err == nil { assetsToDownload = append(assetsToDownload, iconURL.String()) + hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: iconURL.String()}) } } } @@ -289,6 +306,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr screenshotURL, err := p.resolveURL(manifestURL, screenshot.Src) if err == nil { assetsToDownload = append(assetsToDownload, screenshotURL.String()) + hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: screenshotURL.String()}) } } } @@ -299,6 +317,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr shortcutURL, err := p.resolveURL(manifestURL, shortcut.URL) if err == nil { htmlPages = append(htmlPages, shortcutURL.String()) + hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: shortcutURL.String()}) } } for _, icon := range shortcut.Icons { @@ -306,6 +325,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr iconURL, err := p.resolveURL(manifestURL, icon.Src) if err == nil { assetsToDownload = append(assetsToDownload, iconURL.String()) + hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: iconURL.String()}) } } } @@ -316,6 +336,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr swURL, err := p.resolveURL(manifestURL, manifest.ServiceWorker.Src) if err == nil { assetsToDownload = append(assetsToDownload, swURL.String()) + hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: swURL.String()}) } } @@ -339,6 +360,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr if manifest.ServiceWorker.Src == "" { swURL := p.detectServiceWorker(pwaURL, dn) if swURL != "" && !downloaded[swURL] { + hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: swURL}) wg.Add(1) go downloadAndAdd(swURL, false) wg.Wait() @@ -353,6 +375,9 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr return dn, fmt.Errorf("%s", strings.Join(errStrings, "; ")) } + hookRunner.Trigger(hooks.Event{ + Event: hooks.OnCollectionComplete, + }) return dn, nil } @@ -511,6 +536,6 @@ func (m *MockPWAClient) FindManifest(pwaURL string) (string, error) { } // DownloadAndPackagePWA mocks the downloading and packaging of a PWA. -func (m *MockPWAClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar) (*datanode.DataNode, error) { +func (m *MockPWAClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) { return m.DN, m.Err } diff --git a/pkg/pwa/pwa_test.go b/pkg/pwa/pwa_test.go index 4145cd9..070cace 100644 --- a/pkg/pwa/pwa_test.go +++ b/pkg/pwa/pwa_test.go @@ -8,7 +8,9 @@ import ( "strings" "testing" + "github.com/Snider/Borg/pkg/hooks" "github.com/schollz/progressbar/v3" + "github.com/stretchr/testify/require" ) // --- Test Cases for FindManifest --- @@ -142,8 +144,10 @@ func TestDownloadAndPackagePWA_Good(t *testing.T) { defer server.Close() client := NewPWAClient() + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) bar := progressbar.NewOptions(1, progressbar.OptionSetWriter(io.Discard)) - dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", bar) + dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", bar, hookRunner) if err != nil { t.Fatalf("DownloadAndPackagePWA failed: %v", err) } @@ -158,11 +162,14 @@ func TestDownloadAndPackagePWA_Good(t *testing.T) { } func TestDownloadAndPackagePWA_Bad(t *testing.T) { + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + t.Run("Bad Manifest URL", func(t *testing.T) { server := newPWATestServer() defer server.Close() client := NewPWAClient() - _, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/nonexistent-manifest.json", nil) + _, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/nonexistent-manifest.json", nil, hookRunner) if err == nil { t.Fatal("expected an error for bad manifest url, but got none") } @@ -179,7 +186,7 @@ func TestDownloadAndPackagePWA_Bad(t *testing.T) { })) defer server.Close() client := NewPWAClient() - _, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil) + _, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner) if err == nil { t.Fatal("expected an error for asset 404, but got none") } @@ -191,6 +198,9 @@ func TestDownloadAndPackagePWA_Bad(t *testing.T) { } func TestDownloadAndPackagePWA_Ugly(t *testing.T) { + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + t.Run("Manifest with no assets", func(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") @@ -199,7 +209,7 @@ func TestDownloadAndPackagePWA_Ugly(t *testing.T) { defer server.Close() client := NewPWAClient() - dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil) + dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner) if err != nil { t.Fatalf("unexpected error for manifest with no assets: %v", err) } @@ -376,7 +386,9 @@ func TestMockPWAClient(t *testing.T) { t.Run("DownloadAndPackagePWA returns configured datanode", func(t *testing.T) { mock := NewMockPWAClient("", nil, nil) - dn, err := mock.DownloadAndPackagePWA("http://example.com", "http://example.com/manifest.json", nil) + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + dn, err := mock.DownloadAndPackagePWA("http://example.com", "http://example.com/manifest.json", nil, hookRunner) if err != nil { t.Fatalf("DownloadAndPackagePWA error = %v", err) } @@ -428,7 +440,9 @@ func TestDownloadAndPackagePWA_FullManifest(t *testing.T) { defer server.Close() client := NewPWAClient() - dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil) + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner) if err != nil { t.Fatalf("DownloadAndPackagePWA failed: %v", err) } @@ -496,7 +510,9 @@ func TestDownloadAndPackagePWA_ServiceWorker(t *testing.T) { defer server.Close() client := NewPWAClient() - dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil) + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner) if err != nil { t.Fatalf("DownloadAndPackagePWA failed: %v", err) } diff --git a/pkg/website/website.go b/pkg/website/website.go index b2bd517..8c46ac2 100644 --- a/pkg/website/website.go +++ b/pkg/website/website.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/Snider/Borg/pkg/datanode" + "github.com/Snider/Borg/pkg/hooks" "github.com/schollz/progressbar/v3" "golang.org/x/net/html" @@ -24,32 +25,34 @@ type Downloader struct { progressBar *progressbar.ProgressBar client *http.Client errors []error + hookRunner *hooks.HookRunner } // NewDownloader creates a new Downloader. -func NewDownloader(maxDepth int) *Downloader { - return NewDownloaderWithClient(maxDepth, http.DefaultClient) +func NewDownloader(maxDepth int, hookRunner *hooks.HookRunner) *Downloader { + return NewDownloaderWithClient(maxDepth, http.DefaultClient, hookRunner) } // NewDownloaderWithClient creates a new Downloader with a custom http.Client. -func NewDownloaderWithClient(maxDepth int, client *http.Client) *Downloader { +func NewDownloaderWithClient(maxDepth int, client *http.Client, hookRunner *hooks.HookRunner) *Downloader { return &Downloader{ dn: datanode.New(), visited: make(map[string]bool), maxDepth: maxDepth, client: client, errors: make([]error, 0), + hookRunner: hookRunner, } } // downloadAndPackageWebsite downloads a website and packages it into a DataNode. -func downloadAndPackageWebsite(startURL string, maxDepth int, bar *progressbar.ProgressBar) (*datanode.DataNode, error) { +func downloadAndPackageWebsite(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) { baseURL, err := url.Parse(startURL) if err != nil { return nil, err } - d := NewDownloader(maxDepth) + d := NewDownloader(maxDepth, hookRunner) d.baseURL = baseURL d.progressBar = bar d.crawl(startURL, 0) @@ -62,6 +65,9 @@ func downloadAndPackageWebsite(startURL string, maxDepth int, bar *progressbar.P return nil, fmt.Errorf("failed to download website:\n%s", strings.Join(errs, "\n")) } + d.hookRunner.Trigger(hooks.Event{ + Event: hooks.OnCollectionComplete, + }) return d.dn, nil } @@ -76,24 +82,30 @@ func (d *Downloader) crawl(pageURL string, depth int) { resp, err := d.client.Get(pageURL) if err != nil { - d.errors = append(d.errors, fmt.Errorf("Error getting %s: %w", pageURL, err)) + d.triggerErrorHook(fmt.Errorf("Error getting %s: %w", pageURL, err)) return } defer resp.Body.Close() if resp.StatusCode >= 400 { - d.errors = append(d.errors, fmt.Errorf("bad status for %s: %s", pageURL, resp.Status)) + d.triggerErrorHook(fmt.Errorf("bad status for %s: %s", pageURL, resp.Status)) return } body, err := io.ReadAll(resp.Body) if err != nil { - d.errors = append(d.errors, fmt.Errorf("Error reading body of %s: %w", pageURL, err)) + d.triggerErrorHook(fmt.Errorf("Error reading body of %s: %w", pageURL, err)) return } relPath := d.getRelativePath(pageURL) d.dn.AddData(relPath, body) + d.hookRunner.Trigger(hooks.Event{ + Event: hooks.OnFileCollected, + File: relPath, + URL: pageURL, + Type: resp.Header.Get("Content-Type"), + }) // Don't try to parse non-html content if !strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") { @@ -102,7 +114,7 @@ func (d *Downloader) crawl(pageURL string, depth int) { doc, err := html.Parse(strings.NewReader(string(body))) if err != nil { - d.errors = append(d.errors, fmt.Errorf("Error parsing HTML of %s: %w", pageURL, err)) + d.triggerErrorHook(fmt.Errorf("Error parsing HTML of %s: %w", pageURL, err)) return } @@ -115,6 +127,10 @@ func (d *Downloader) crawl(pageURL string, depth int) { if err != nil { continue } + d.hookRunner.Trigger(hooks.Event{ + Event: hooks.OnURLFound, + URL: link, + }) if d.isLocal(link) { if isAsset(link) { d.downloadAsset(link) @@ -143,24 +159,38 @@ func (d *Downloader) downloadAsset(assetURL string) { resp, err := d.client.Get(assetURL) if err != nil { - d.errors = append(d.errors, fmt.Errorf("Error getting asset %s: %w", assetURL, err)) + d.triggerErrorHook(fmt.Errorf("Error getting asset %s: %w", assetURL, err)) return } defer resp.Body.Close() if resp.StatusCode >= 400 { - d.errors = append(d.errors, fmt.Errorf("bad status for asset %s: %s", assetURL, resp.Status)) + d.triggerErrorHook(fmt.Errorf("bad status for asset %s: %s", assetURL, resp.Status)) return } body, err := io.ReadAll(resp.Body) if err != nil { - d.errors = append(d.errors, fmt.Errorf("Error reading body of asset %s: %w", assetURL, err)) + d.triggerErrorHook(fmt.Errorf("Error reading body of asset %s: %w", assetURL, err)) return } relPath := d.getRelativePath(assetURL) d.dn.AddData(relPath, body) + d.hookRunner.Trigger(hooks.Event{ + Event: hooks.OnFileCollected, + File: relPath, + URL: assetURL, + Type: resp.Header.Get("Content-Type"), + }) +} + +func (d *Downloader) triggerErrorHook(err error) { + d.errors = append(d.errors, err) + d.hookRunner.Trigger(hooks.Event{ + Event: hooks.OnError, + Error: err.Error(), + }) } func (d *Downloader) getRelativePath(pageURL string) string { diff --git a/pkg/website/website_test.go b/pkg/website/website_test.go index d3685e5..a6def33 100644 --- a/pkg/website/website_test.go +++ b/pkg/website/website_test.go @@ -10,7 +10,9 @@ import ( "testing" "time" + "github.com/Snider/Borg/pkg/hooks" "github.com/schollz/progressbar/v3" + "github.com/stretchr/testify/require" ) // --- Test Cases --- @@ -19,8 +21,11 @@ func TestDownloadAndPackageWebsite_Good(t *testing.T) { server := newWebsiteTestServer() defer server.Close() + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + bar := progressbar.NewOptions(1, progressbar.OptionSetWriter(io.Discard)) - dn, err := DownloadAndPackageWebsite(server.URL, 2, bar) + dn, err := DownloadAndPackageWebsite(server.URL, 2, bar, hookRunner) if err != nil { t.Fatalf("DownloadAndPackageWebsite failed: %v", err) } @@ -51,8 +56,11 @@ func TestDownloadAndPackageWebsite_Good(t *testing.T) { } func TestDownloadAndPackageWebsite_Bad(t *testing.T) { + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + t.Run("Invalid Start URL", func(t *testing.T) { - _, err := DownloadAndPackageWebsite("http://invalid-url", 1, nil) + _, err := DownloadAndPackageWebsite("http://invalid-url", 1, nil, hookRunner) if err == nil { t.Fatal("Expected an error for an invalid start URL, but got nil") } @@ -63,7 +71,7 @@ func TestDownloadAndPackageWebsite_Bad(t *testing.T) { http.Error(w, "Internal Server Error", http.StatusInternalServerError) })) defer server.Close() - _, err := DownloadAndPackageWebsite(server.URL, 1, nil) + _, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner) if err == nil { t.Fatal("Expected an error for a server error on the start URL, but got nil") } @@ -80,7 +88,7 @@ func TestDownloadAndPackageWebsite_Bad(t *testing.T) { })) defer server.Close() // We expect an error because the link is broken. - dn, err := DownloadAndPackageWebsite(server.URL, 1, nil) + dn, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner) if err == nil { t.Fatal("Expected an error for a broken link, but got nil") } @@ -94,12 +102,15 @@ func TestDownloadAndPackageWebsite_Bad(t *testing.T) { } func TestDownloadAndPackageWebsite_Ugly(t *testing.T) { + hookRunner, err := hooks.NewHookRunner("") + require.NoError(t, err) + t.Run("Exceed Max Depth", func(t *testing.T) { server := newWebsiteTestServer() defer server.Close() bar := progressbar.NewOptions(1, progressbar.OptionSetWriter(io.Discard)) - dn, err := DownloadAndPackageWebsite(server.URL, 1, bar) // Max depth of 1 + dn, err := DownloadAndPackageWebsite(server.URL, 1, bar, hookRunner) // Max depth of 1 if err != nil { t.Fatalf("DownloadAndPackageWebsite failed: %v", err) } @@ -122,7 +133,7 @@ func TestDownloadAndPackageWebsite_Ugly(t *testing.T) { fmt.Fprint(w, `External`) })) defer server.Close() - dn, err := DownloadAndPackageWebsite(server.URL, 1, nil) + dn, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner) if err != nil { t.Fatalf("DownloadAndPackageWebsite failed: %v", err) } @@ -156,7 +167,7 @@ func TestDownloadAndPackageWebsite_Ugly(t *testing.T) { // For now, we'll just test that it doesn't hang forever. done := make(chan bool) go func() { - _, err := DownloadAndPackageWebsite(server.URL, 1, nil) + _, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner) if err != nil && !strings.Contains(err.Error(), "context deadline exceeded") { // We expect a timeout error, but other errors are failures. t.Errorf("unexpected error: %v", err)