This commit is contained in:
Snider 2026-02-11 01:00:09 +08:00 committed by GitHub
commit 30a8aa2fa2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 575 additions and 49 deletions

View file

@ -5,6 +5,7 @@ import (
"os"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/hooks"
"github.com/Snider/Borg/pkg/pwa"
"github.com/Snider/Borg/pkg/tim"
"github.com/Snider/Borg/pkg/trix"
@ -43,8 +44,21 @@ Examples:
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
hooksFile, _ := cmd.Flags().GetString("hooks")
finalPath, err := CollectPWA(c.PWAClient, pwaURL, outputFile, format, compression, password)
// If hooks file is not specified, check for default
if hooksFile == "" {
if _, err := os.Stat(".borg-hooks.yaml"); err == nil {
hooksFile = ".borg-hooks.yaml"
}
}
hookRunner, err := hooks.NewHookRunner(hooksFile)
if err != nil {
return err
}
finalPath, err := CollectPWA(c.PWAClient, pwaURL, outputFile, format, compression, password, hookRunner)
if err != nil {
return err
}
@ -57,13 +71,14 @@ Examples:
c.Flags().String("format", "datanode", "Output format (datanode, tim, trix, or stim)")
c.Flags().String("compression", "none", "Compression format (none, gz, or xz)")
c.Flags().String("password", "", "Password for encryption (required for stim format)")
c.Flags().String("hooks", "", "Path to the .borg-hooks.yaml file")
return c
}
func init() {
collectCmd.AddCommand(&NewCollectPWACmd().Command)
}
func CollectPWA(client pwa.PWAClient, pwaURL string, outputFile string, format string, compression string, password string) (string, error) {
func CollectPWA(client pwa.PWAClient, pwaURL string, outputFile string, format string, compression string, password string, hookRunner *hooks.HookRunner) (string, error) {
if pwaURL == "" {
return "", fmt.Errorf("url is required")
}
@ -85,7 +100,7 @@ func CollectPWA(client pwa.PWAClient, pwaURL string, outputFile string, format s
return "", fmt.Errorf("error finding manifest: %w", err)
}
bar.Describe("Downloading and packaging PWA")
dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, bar)
dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, bar, hookRunner)
if err != nil {
return "", fmt.Errorf("error downloading and packaging PWA: %w", err)
}

View file

@ -6,6 +6,7 @@ import (
"github.com/schollz/progressbar/v3"
"github.com/Snider/Borg/pkg/compress"
"github.com/Snider/Borg/pkg/hooks"
"github.com/Snider/Borg/pkg/tim"
"github.com/Snider/Borg/pkg/trix"
"github.com/Snider/Borg/pkg/ui"
@ -38,6 +39,19 @@ func NewCollectWebsiteCmd() *cobra.Command {
format, _ := cmd.Flags().GetString("format")
compression, _ := cmd.Flags().GetString("compression")
password, _ := cmd.Flags().GetString("password")
hooksFile, _ := cmd.Flags().GetString("hooks")
// If hooks file is not specified, check for default
if hooksFile == "" {
if _, err := os.Stat(".borg-hooks.yaml"); err == nil {
hooksFile = ".borg-hooks.yaml"
}
}
hookRunner, err := hooks.NewHookRunner(hooksFile)
if err != nil {
return fmt.Errorf("failed to create hook runner: %w", err)
}
if format != "datanode" && format != "tim" && format != "trix" {
return fmt.Errorf("invalid format: %s (must be 'datanode', 'tim', or 'trix')", format)
@ -51,7 +65,7 @@ func NewCollectWebsiteCmd() *cobra.Command {
bar = ui.NewProgressBar(-1, "Crawling website")
}
dn, err := website.DownloadAndPackageWebsite(websiteURL, depth, bar)
dn, err := website.DownloadAndPackageWebsite(websiteURL, depth, bar, hookRunner)
if err != nil {
return fmt.Errorf("error downloading and packaging website: %w", err)
}
@ -104,5 +118,6 @@ func NewCollectWebsiteCmd() *cobra.Command {
collectWebsiteCmd.PersistentFlags().String("format", "datanode", "Output format (datanode, tim, or trix)")
collectWebsiteCmd.PersistentFlags().String("compression", "none", "Compression format (none, gz, or xz)")
collectWebsiteCmd.PersistentFlags().String("password", "", "Password for encryption")
collectWebsiteCmd.PersistentFlags().String("hooks", "", "Path to the .borg-hooks.yaml file")
return collectWebsiteCmd
}

View file

@ -1,20 +1,24 @@
package cmd
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/hooks"
"github.com/Snider/Borg/pkg/website"
"github.com/schollz/progressbar/v3"
"github.com/stretchr/testify/require"
)
func TestCollectWebsiteCmd_Good(t *testing.T) {
// Mock the website downloader
oldDownloadAndPackageWebsite := website.DownloadAndPackageWebsite
website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar) (*datanode.DataNode, error) {
website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) {
return datanode.New(), nil
}
defer func() {
@ -32,10 +36,64 @@ func TestCollectWebsiteCmd_Good(t *testing.T) {
}
}
func TestCollectWebsiteCmd_Hooks(t *testing.T) {
// 1. Setup temp directory for test artifacts
tmpDir := t.TempDir()
// 2. Create the hook script
scriptContent := "#!/bin/sh\ncat > " + filepath.Join(tmpDir, "hook.output")
scriptPath := filepath.Join(tmpDir, "testhook.sh")
err := os.WriteFile(scriptPath, []byte(scriptContent), 0755)
require.NoError(t, err)
// 3. Create the hooks YAML config
hooksYAML := `
hooks:
on_collection_complete:
- run: "` + scriptPath + `"
`
configPath := filepath.Join(tmpDir, ".borg-hooks.yaml")
err = os.WriteFile(configPath, []byte(hooksYAML), 0644)
require.NoError(t, err)
// 4. Mock the website downloader
oldDownloadAndPackageWebsite := website.DownloadAndPackageWebsite
website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) {
dn := datanode.New()
// Manually trigger the hook that the real function would trigger
err := hookRunner.Trigger(hooks.Event{
Event: hooks.OnCollectionComplete,
})
require.NoError(t, err) // Use require in the mock to fail fast if the trigger fails
return dn, nil
}
defer func() {
website.DownloadAndPackageWebsite = oldDownloadAndPackageWebsite
}()
// 5. Execute the command
rootCmd := NewRootCmd()
rootCmd.AddCommand(GetCollectCmd())
out := filepath.Join(tmpDir, "out")
_, err = executeCommand(rootCmd, "collect", "website", "https://example.com", "--output", out, "--hooks", configPath)
require.NoError(t, err)
// 6. Assert results
hookOutputFile := filepath.Join(tmpDir, "hook.output")
content, err := os.ReadFile(hookOutputFile)
require.NoError(t, err, "Hook output file should have been created")
var receivedEvent hooks.Event
err = json.Unmarshal(content, &receivedEvent)
require.NoError(t, err, "Failed to unmarshal hook output")
require.Equal(t, hooks.OnCollectionComplete, receivedEvent.Event)
}
func TestCollectWebsiteCmd_Bad(t *testing.T) {
// Mock the website downloader to return an error
oldDownloadAndPackageWebsite := website.DownloadAndPackageWebsite
website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar) (*datanode.DataNode, error) {
website.DownloadAndPackageWebsite = func(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) {
return nil, fmt.Errorf("website error")
}
defer func() {

View file

@ -4,6 +4,7 @@ import (
"log"
"os"
"github.com/Snider/Borg/pkg/hooks"
"github.com/Snider/Borg/pkg/pwa"
)
@ -18,7 +19,12 @@ func main() {
log.Fatalf("Failed to find manifest: %v", err)
}
dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, nil)
hookRunner, err := hooks.NewHookRunner("")
if err != nil {
log.Fatalf("Failed to create hook runner: %v", err)
}
dn, err := client.DownloadAndPackagePWA(pwaURL, manifestURL, nil, hookRunner)
if err != nil {
log.Fatalf("Failed to download and package PWA: %v", err)
}

View file

@ -4,14 +4,20 @@ import (
"log"
"os"
"github.com/Snider/Borg/pkg/hooks"
"github.com/Snider/Borg/pkg/website"
)
func main() {
log.Println("Collecting website...")
hookRunner, err := hooks.NewHookRunner("")
if err != nil {
log.Fatalf("Failed to create hook runner: %v", err)
}
// Download and package the website.
dn, err := website.DownloadAndPackageWebsite("https://example.com", 2, nil)
dn, err := website.DownloadAndPackageWebsite("https://example.com", 2, nil, hookRunner)
if err != nil {
log.Fatalf("Failed to collect website: %v", err)
}

126
pkg/hooks/hooks.go Normal file
View file

@ -0,0 +1,126 @@
package hooks
import (
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"gopkg.in/yaml.v3"
)
// HookEventType represents the type of a hook event.
type HookEventType string
const (
// OnFileCollected is triggered after each file is collected.
OnFileCollected HookEventType = "on_file_collected"
// OnURLFound is triggered when a new URL is discovered.
OnURLFound HookEventType = "on_url_found"
// OnCollectionComplete is triggered after the entire collection is done.
OnCollectionComplete HookEventType = "on_collection_complete"
// OnError is triggered when a failure occurs.
OnError HookEventType = "on_error"
)
// Hook represents a single hook to be executed.
type Hook struct {
Pattern string `yaml:"pattern"`
Run string `yaml:"run"`
}
// HookConfig represents the configuration for all hooks.
type HookConfig struct {
Hooks map[HookEventType][]Hook `yaml:"hooks"`
}
// HookRunner is responsible for running hooks.
type HookRunner struct {
config *HookConfig
}
// NewHookRunner creates a new HookRunner.
func NewHookRunner(configFile string) (*HookRunner, error) {
if configFile == "" {
return &HookRunner{config: &HookConfig{}}, nil
}
data, err := os.ReadFile(configFile)
if err != nil {
return nil, fmt.Errorf("failed to read hook config file: %w", err)
}
var config HookConfig
err = yaml.Unmarshal(data, &config)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal hook config: %w", err)
}
return &HookRunner{config: &config}, nil
}
// Event represents a hook event.
type Event struct {
Event HookEventType `json:"event"`
File string `json:"file,omitempty"`
URL string `json:"url,omitempty"`
Type string `json:"type,omitempty"`
Error string `json:"error,omitempty"`
}
// Trigger triggers the hooks for a given event.
func (r *HookRunner) Trigger(event Event) error {
if r.config == nil {
return nil
}
hooks, ok := r.config.Hooks[event.Event]
if !ok {
return nil
}
for _, hook := range hooks {
if hook.Pattern != "" && event.File != "" {
matched, err := filepath.Match(hook.Pattern, filepath.Base(event.File))
if err != nil {
return fmt.Errorf("failed to match pattern '%s' with file '%s': %w", hook.Pattern, event.File, err)
}
if !matched {
continue
}
}
err := r.runHook(hook, event)
if err != nil {
return fmt.Errorf("failed to run hook '%s': %w", hook.Run, err)
}
}
return nil
}
func (r *HookRunner) runHook(hook Hook, event Event) error {
cmd := exec.Command("sh", "-c", hook.Run)
stdin, err := cmd.StdinPipe()
if err != nil {
return fmt.Errorf("failed to get stdin pipe: %w", err)
}
go func() {
defer stdin.Close()
err := json.NewEncoder(stdin).Encode(event)
if err != nil {
// It's hard to propagate this error, so we'll just log it
fmt.Fprintf(os.Stderr, "failed to write to hook stdin: %v\n", err)
}
}()
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("hook execution failed: %w\n%s", err, string(output))
}
return nil
}

218
pkg/hooks/hooks_test.go Normal file
View file

@ -0,0 +1,218 @@
package hooks
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func setupTest(t *testing.T) (string, func()) {
t.Helper()
// Create a temporary directory for test artifacts
tmpDir, err := os.MkdirTemp("", "hooks-test-")
require.NoError(t, err)
// Create test hook script
scriptContent := "#!/bin/sh\ncat > " + filepath.Join(tmpDir, "testhook.output")
scriptPath := filepath.Join(tmpDir, "testhook.sh")
err = os.WriteFile(scriptPath, []byte(scriptContent), 0755)
require.NoError(t, err)
// Create test hooks config
hooksYAML := `
hooks:
on_file_collected:
- pattern: "*.pdf"
run: "` + scriptPath + `"
- pattern: "*.txt"
run: "` + scriptPath + `"
on_url_found:
- run: "` + scriptPath + `"
on_collection_complete:
- run: "` + scriptPath + `"
on_error:
- run: "` + scriptPath + `"
`
configPath := filepath.Join(tmpDir, ".borg-hooks.yaml")
err = os.WriteFile(configPath, []byte(hooksYAML), 0644)
require.NoError(t, err)
return tmpDir, func() {
os.RemoveAll(tmpDir)
}
}
func TestNewHookRunner(t *testing.T) {
tmpDir, cleanup := setupTest(t)
defer cleanup()
configPath := filepath.Join(tmpDir, ".borg-hooks.yaml")
// Test with a valid config file
runner, err := NewHookRunner(configPath)
require.NoError(t, err)
assert.NotNil(t, runner)
assert.NotNil(t, runner.config)
// Test with a non-existent file
_, err = NewHookRunner("non-existent-file.yaml")
assert.Error(t, err)
// Test with a malformed file
malformedConfigPath := filepath.Join(tmpDir, "malformed.yaml")
err = os.WriteFile(malformedConfigPath, []byte("hooks: \n - invalid"), 0644)
require.NoError(t, err)
_, err = NewHookRunner(malformedConfigPath)
assert.Error(t, err)
// Test with an empty config file path (should not error)
runner, err = NewHookRunner("")
require.NoError(t, err)
assert.NotNil(t, runner)
assert.NotNil(t, runner.config)
}
func TestHookRunner_Trigger(t *testing.T) {
tmpDir, cleanup := setupTest(t)
defer cleanup()
configPath := filepath.Join(tmpDir, ".borg-hooks.yaml")
runner, err := NewHookRunner(configPath)
require.NoError(t, err)
outputFile := filepath.Join(tmpDir, "testhook.output")
tests := []struct {
name string
event Event
shouldTrigger bool
expectedEvent Event
}{
{
name: "OnFileCollected - PDF Match with full path",
event: Event{
Event: OnFileCollected,
File: "assets/document.pdf",
URL: "http://example.com/assets/document.pdf",
Type: "application/pdf",
},
shouldTrigger: true,
expectedEvent: Event{
Event: OnFileCollected,
File: "assets/document.pdf",
URL: "http://example.com/assets/document.pdf",
Type: "application/pdf",
},
},
{
name: "OnFileCollected - TXT Match with full path",
event: Event{
Event: OnFileCollected,
File: "notes/notes.txt",
},
shouldTrigger: true,
expectedEvent: Event{
Event: OnFileCollected,
File: "notes/notes.txt",
},
},
{
name: "OnFileCollected - No Match",
event: Event{
Event: OnFileCollected,
File: "image.jpg",
},
shouldTrigger: false,
},
{
name: "OnURLFound",
event: Event{
Event: OnURLFound,
URL: "http://example.com/page2",
},
shouldTrigger: true,
expectedEvent: Event{
Event: OnURLFound,
URL: "http://example.com/page2",
},
},
{
name: "OnCollectionComplete",
event: Event{
Event: OnCollectionComplete,
},
shouldTrigger: true,
expectedEvent: Event{
Event: OnCollectionComplete,
},
},
{
name: "OnError",
event: Event{
Event: OnError,
Error: "something went wrong",
},
shouldTrigger: true,
expectedEvent: Event{
Event: OnError,
Error: "something went wrong",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Clean up previous output
_ = os.Remove(outputFile)
err := runner.Trigger(tt.event)
require.NoError(t, err)
if !tt.shouldTrigger {
_, err := os.Stat(outputFile)
assert.True(t, os.IsNotExist(err), "Hook should not have been triggered")
return
}
// Verify the output file was created and contains the correct JSON
content, err := os.ReadFile(outputFile)
require.NoError(t, err)
var receivedEvent Event
err = json.Unmarshal(content, &receivedEvent)
require.NoError(t, err)
assert.Equal(t, tt.expectedEvent, receivedEvent)
})
}
}
func TestHookRunner_FailingHook(t *testing.T) {
tmpDir, cleanup := setupTest(t)
defer cleanup()
// Create a failing script
scriptContent := "#!/bin/sh\nexit 1"
scriptPath := filepath.Join(tmpDir, "failing-hook.sh")
err := os.WriteFile(scriptPath, []byte(scriptContent), 0755)
require.NoError(t, err)
// Create hooks config with the failing script
hooksYAML := `
hooks:
on_error:
- run: "` + scriptPath + `"
`
configPath := filepath.Join(tmpDir, "failing-hooks.yaml")
err = os.WriteFile(configPath, []byte(hooksYAML), 0644)
require.NoError(t, err)
runner, err := NewHookRunner(configPath)
require.NoError(t, err)
err = runner.Trigger(Event{Event: OnError, Error: "test error"})
assert.Error(t, err)
}

View file

@ -12,6 +12,7 @@ import (
"sync"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/hooks"
"github.com/schollz/progressbar/v3"
"golang.org/x/net/html"
)
@ -27,7 +28,7 @@ var manifestFallbackPaths = []string{
// PWAClient is an interface for interacting with PWAs.
type PWAClient interface {
FindManifest(pwaURL string) (string, error)
DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar) (*datanode.DataNode, error)
DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error)
}
// NewPWAClient creates a new PWAClient.
@ -158,13 +159,23 @@ type Manifest struct {
// DownloadAndPackagePWA downloads and packages a PWA into a DataNode.
// It downloads the manifest, all referenced assets, and parses HTML pages
// for additional linked resources (CSS, JS, images).
func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar) (*datanode.DataNode, error) {
func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) {
dn := datanode.New()
var wg sync.WaitGroup
var errs []error
var mu sync.Mutex
downloaded := make(map[string]bool)
triggerErrorHook := func(err error) {
mu.Lock()
defer mu.Unlock()
errs = append(errs, err)
hookRunner.Trigger(hooks.Event{
Event: hooks.OnError,
Error: err.Error(),
})
}
var downloadAndAdd func(assetURL string, parseHTML bool)
downloadAndAdd = func(assetURL string, parseHTML bool) {
defer wg.Done()
@ -183,33 +194,25 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
resp, err := p.client.Get(assetURL)
if err != nil {
mu.Lock()
errs = append(errs, fmt.Errorf("failed to download %s: %w", assetURL, err))
mu.Unlock()
triggerErrorHook(fmt.Errorf("failed to download %s: %w", assetURL, err))
return
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
mu.Lock()
errs = append(errs, fmt.Errorf("failed to download %s: status code %d", assetURL, resp.StatusCode))
mu.Unlock()
triggerErrorHook(fmt.Errorf("failed to download %s: status code %d", assetURL, resp.StatusCode))
return
}
body, err := io.ReadAll(resp.Body)
if err != nil {
mu.Lock()
errs = append(errs, fmt.Errorf("failed to read body of %s: %w", assetURL, err))
mu.Unlock()
triggerErrorHook(fmt.Errorf("failed to read body of %s: %w", assetURL, err))
return
}
u, err := url.Parse(assetURL)
if err != nil {
mu.Lock()
errs = append(errs, fmt.Errorf("failed to parse asset URL %s: %w", assetURL, err))
mu.Unlock()
triggerErrorHook(fmt.Errorf("failed to parse asset URL %s: %w", assetURL, err))
return
}
@ -218,11 +221,21 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
path = "index.html"
}
dn.AddData(path, body)
hookRunner.Trigger(hooks.Event{
Event: hooks.OnFileCollected,
File: path,
URL: assetURL,
Type: resp.Header.Get("Content-Type"),
})
// Parse HTML for additional assets
if parseHTML && isHTMLContent(resp.Header.Get("Content-Type"), body) {
additionalAssets := p.extractAssetsFromHTML(assetURL, body)
for _, asset := range additionalAssets {
hookRunner.Trigger(hooks.Event{
Event: hooks.OnURLFound,
URL: asset,
})
mu.Lock()
if !downloaded[asset] {
wg.Add(1)
@ -272,6 +285,9 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
// If no start_url, use the PWA URL itself
htmlPages = append(htmlPages, pwaURL)
}
for _, page := range htmlPages {
hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: page})
}
// Icons
for _, icon := range manifest.Icons {
@ -279,6 +295,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
iconURL, err := p.resolveURL(manifestURL, icon.Src)
if err == nil {
assetsToDownload = append(assetsToDownload, iconURL.String())
hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: iconURL.String()})
}
}
}
@ -289,6 +306,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
screenshotURL, err := p.resolveURL(manifestURL, screenshot.Src)
if err == nil {
assetsToDownload = append(assetsToDownload, screenshotURL.String())
hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: screenshotURL.String()})
}
}
}
@ -299,6 +317,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
shortcutURL, err := p.resolveURL(manifestURL, shortcut.URL)
if err == nil {
htmlPages = append(htmlPages, shortcutURL.String())
hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: shortcutURL.String()})
}
}
for _, icon := range shortcut.Icons {
@ -306,6 +325,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
iconURL, err := p.resolveURL(manifestURL, icon.Src)
if err == nil {
assetsToDownload = append(assetsToDownload, iconURL.String())
hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: iconURL.String()})
}
}
}
@ -316,6 +336,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
swURL, err := p.resolveURL(manifestURL, manifest.ServiceWorker.Src)
if err == nil {
assetsToDownload = append(assetsToDownload, swURL.String())
hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: swURL.String()})
}
}
@ -339,6 +360,7 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
if manifest.ServiceWorker.Src == "" {
swURL := p.detectServiceWorker(pwaURL, dn)
if swURL != "" && !downloaded[swURL] {
hookRunner.Trigger(hooks.Event{Event: hooks.OnURLFound, URL: swURL})
wg.Add(1)
go downloadAndAdd(swURL, false)
wg.Wait()
@ -353,6 +375,9 @@ func (p *pwaClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progr
return dn, fmt.Errorf("%s", strings.Join(errStrings, "; "))
}
hookRunner.Trigger(hooks.Event{
Event: hooks.OnCollectionComplete,
})
return dn, nil
}
@ -511,6 +536,6 @@ func (m *MockPWAClient) FindManifest(pwaURL string) (string, error) {
}
// DownloadAndPackagePWA mocks the downloading and packaging of a PWA.
func (m *MockPWAClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar) (*datanode.DataNode, error) {
func (m *MockPWAClient) DownloadAndPackagePWA(pwaURL, manifestURL string, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) {
return m.DN, m.Err
}

View file

@ -8,7 +8,9 @@ import (
"strings"
"testing"
"github.com/Snider/Borg/pkg/hooks"
"github.com/schollz/progressbar/v3"
"github.com/stretchr/testify/require"
)
// --- Test Cases for FindManifest ---
@ -142,8 +144,10 @@ func TestDownloadAndPackagePWA_Good(t *testing.T) {
defer server.Close()
client := NewPWAClient()
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
bar := progressbar.NewOptions(1, progressbar.OptionSetWriter(io.Discard))
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", bar)
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", bar, hookRunner)
if err != nil {
t.Fatalf("DownloadAndPackagePWA failed: %v", err)
}
@ -158,11 +162,14 @@ func TestDownloadAndPackagePWA_Good(t *testing.T) {
}
func TestDownloadAndPackagePWA_Bad(t *testing.T) {
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
t.Run("Bad Manifest URL", func(t *testing.T) {
server := newPWATestServer()
defer server.Close()
client := NewPWAClient()
_, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/nonexistent-manifest.json", nil)
_, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/nonexistent-manifest.json", nil, hookRunner)
if err == nil {
t.Fatal("expected an error for bad manifest url, but got none")
}
@ -179,7 +186,7 @@ func TestDownloadAndPackagePWA_Bad(t *testing.T) {
}))
defer server.Close()
client := NewPWAClient()
_, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil)
_, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner)
if err == nil {
t.Fatal("expected an error for asset 404, but got none")
}
@ -191,6 +198,9 @@ func TestDownloadAndPackagePWA_Bad(t *testing.T) {
}
func TestDownloadAndPackagePWA_Ugly(t *testing.T) {
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
t.Run("Manifest with no assets", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
@ -199,7 +209,7 @@ func TestDownloadAndPackagePWA_Ugly(t *testing.T) {
defer server.Close()
client := NewPWAClient()
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil)
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner)
if err != nil {
t.Fatalf("unexpected error for manifest with no assets: %v", err)
}
@ -376,7 +386,9 @@ func TestMockPWAClient(t *testing.T) {
t.Run("DownloadAndPackagePWA returns configured datanode", func(t *testing.T) {
mock := NewMockPWAClient("", nil, nil)
dn, err := mock.DownloadAndPackagePWA("http://example.com", "http://example.com/manifest.json", nil)
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
dn, err := mock.DownloadAndPackagePWA("http://example.com", "http://example.com/manifest.json", nil, hookRunner)
if err != nil {
t.Fatalf("DownloadAndPackagePWA error = %v", err)
}
@ -428,7 +440,9 @@ func TestDownloadAndPackagePWA_FullManifest(t *testing.T) {
defer server.Close()
client := NewPWAClient()
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil)
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner)
if err != nil {
t.Fatalf("DownloadAndPackagePWA failed: %v", err)
}
@ -496,7 +510,9 @@ func TestDownloadAndPackagePWA_ServiceWorker(t *testing.T) {
defer server.Close()
client := NewPWAClient()
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil)
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
dn, err := client.DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json", nil, hookRunner)
if err != nil {
t.Fatalf("DownloadAndPackagePWA failed: %v", err)
}

View file

@ -8,6 +8,7 @@ import (
"strings"
"github.com/Snider/Borg/pkg/datanode"
"github.com/Snider/Borg/pkg/hooks"
"github.com/schollz/progressbar/v3"
"golang.org/x/net/html"
@ -24,32 +25,34 @@ type Downloader struct {
progressBar *progressbar.ProgressBar
client *http.Client
errors []error
hookRunner *hooks.HookRunner
}
// NewDownloader creates a new Downloader.
func NewDownloader(maxDepth int) *Downloader {
return NewDownloaderWithClient(maxDepth, http.DefaultClient)
func NewDownloader(maxDepth int, hookRunner *hooks.HookRunner) *Downloader {
return NewDownloaderWithClient(maxDepth, http.DefaultClient, hookRunner)
}
// NewDownloaderWithClient creates a new Downloader with a custom http.Client.
func NewDownloaderWithClient(maxDepth int, client *http.Client) *Downloader {
func NewDownloaderWithClient(maxDepth int, client *http.Client, hookRunner *hooks.HookRunner) *Downloader {
return &Downloader{
dn: datanode.New(),
visited: make(map[string]bool),
maxDepth: maxDepth,
client: client,
errors: make([]error, 0),
hookRunner: hookRunner,
}
}
// downloadAndPackageWebsite downloads a website and packages it into a DataNode.
func downloadAndPackageWebsite(startURL string, maxDepth int, bar *progressbar.ProgressBar) (*datanode.DataNode, error) {
func downloadAndPackageWebsite(startURL string, maxDepth int, bar *progressbar.ProgressBar, hookRunner *hooks.HookRunner) (*datanode.DataNode, error) {
baseURL, err := url.Parse(startURL)
if err != nil {
return nil, err
}
d := NewDownloader(maxDepth)
d := NewDownloader(maxDepth, hookRunner)
d.baseURL = baseURL
d.progressBar = bar
d.crawl(startURL, 0)
@ -62,6 +65,9 @@ func downloadAndPackageWebsite(startURL string, maxDepth int, bar *progressbar.P
return nil, fmt.Errorf("failed to download website:\n%s", strings.Join(errs, "\n"))
}
d.hookRunner.Trigger(hooks.Event{
Event: hooks.OnCollectionComplete,
})
return d.dn, nil
}
@ -76,24 +82,30 @@ func (d *Downloader) crawl(pageURL string, depth int) {
resp, err := d.client.Get(pageURL)
if err != nil {
d.errors = append(d.errors, fmt.Errorf("Error getting %s: %w", pageURL, err))
d.triggerErrorHook(fmt.Errorf("Error getting %s: %w", pageURL, err))
return
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
d.errors = append(d.errors, fmt.Errorf("bad status for %s: %s", pageURL, resp.Status))
d.triggerErrorHook(fmt.Errorf("bad status for %s: %s", pageURL, resp.Status))
return
}
body, err := io.ReadAll(resp.Body)
if err != nil {
d.errors = append(d.errors, fmt.Errorf("Error reading body of %s: %w", pageURL, err))
d.triggerErrorHook(fmt.Errorf("Error reading body of %s: %w", pageURL, err))
return
}
relPath := d.getRelativePath(pageURL)
d.dn.AddData(relPath, body)
d.hookRunner.Trigger(hooks.Event{
Event: hooks.OnFileCollected,
File: relPath,
URL: pageURL,
Type: resp.Header.Get("Content-Type"),
})
// Don't try to parse non-html content
if !strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") {
@ -102,7 +114,7 @@ func (d *Downloader) crawl(pageURL string, depth int) {
doc, err := html.Parse(strings.NewReader(string(body)))
if err != nil {
d.errors = append(d.errors, fmt.Errorf("Error parsing HTML of %s: %w", pageURL, err))
d.triggerErrorHook(fmt.Errorf("Error parsing HTML of %s: %w", pageURL, err))
return
}
@ -115,6 +127,10 @@ func (d *Downloader) crawl(pageURL string, depth int) {
if err != nil {
continue
}
d.hookRunner.Trigger(hooks.Event{
Event: hooks.OnURLFound,
URL: link,
})
if d.isLocal(link) {
if isAsset(link) {
d.downloadAsset(link)
@ -143,24 +159,38 @@ func (d *Downloader) downloadAsset(assetURL string) {
resp, err := d.client.Get(assetURL)
if err != nil {
d.errors = append(d.errors, fmt.Errorf("Error getting asset %s: %w", assetURL, err))
d.triggerErrorHook(fmt.Errorf("Error getting asset %s: %w", assetURL, err))
return
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
d.errors = append(d.errors, fmt.Errorf("bad status for asset %s: %s", assetURL, resp.Status))
d.triggerErrorHook(fmt.Errorf("bad status for asset %s: %s", assetURL, resp.Status))
return
}
body, err := io.ReadAll(resp.Body)
if err != nil {
d.errors = append(d.errors, fmt.Errorf("Error reading body of asset %s: %w", assetURL, err))
d.triggerErrorHook(fmt.Errorf("Error reading body of asset %s: %w", assetURL, err))
return
}
relPath := d.getRelativePath(assetURL)
d.dn.AddData(relPath, body)
d.hookRunner.Trigger(hooks.Event{
Event: hooks.OnFileCollected,
File: relPath,
URL: assetURL,
Type: resp.Header.Get("Content-Type"),
})
}
func (d *Downloader) triggerErrorHook(err error) {
d.errors = append(d.errors, err)
d.hookRunner.Trigger(hooks.Event{
Event: hooks.OnError,
Error: err.Error(),
})
}
func (d *Downloader) getRelativePath(pageURL string) string {

View file

@ -10,7 +10,9 @@ import (
"testing"
"time"
"github.com/Snider/Borg/pkg/hooks"
"github.com/schollz/progressbar/v3"
"github.com/stretchr/testify/require"
)
// --- Test Cases ---
@ -19,8 +21,11 @@ func TestDownloadAndPackageWebsite_Good(t *testing.T) {
server := newWebsiteTestServer()
defer server.Close()
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
bar := progressbar.NewOptions(1, progressbar.OptionSetWriter(io.Discard))
dn, err := DownloadAndPackageWebsite(server.URL, 2, bar)
dn, err := DownloadAndPackageWebsite(server.URL, 2, bar, hookRunner)
if err != nil {
t.Fatalf("DownloadAndPackageWebsite failed: %v", err)
}
@ -51,8 +56,11 @@ func TestDownloadAndPackageWebsite_Good(t *testing.T) {
}
func TestDownloadAndPackageWebsite_Bad(t *testing.T) {
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
t.Run("Invalid Start URL", func(t *testing.T) {
_, err := DownloadAndPackageWebsite("http://invalid-url", 1, nil)
_, err := DownloadAndPackageWebsite("http://invalid-url", 1, nil, hookRunner)
if err == nil {
t.Fatal("Expected an error for an invalid start URL, but got nil")
}
@ -63,7 +71,7 @@ func TestDownloadAndPackageWebsite_Bad(t *testing.T) {
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
}))
defer server.Close()
_, err := DownloadAndPackageWebsite(server.URL, 1, nil)
_, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner)
if err == nil {
t.Fatal("Expected an error for a server error on the start URL, but got nil")
}
@ -80,7 +88,7 @@ func TestDownloadAndPackageWebsite_Bad(t *testing.T) {
}))
defer server.Close()
// We expect an error because the link is broken.
dn, err := DownloadAndPackageWebsite(server.URL, 1, nil)
dn, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner)
if err == nil {
t.Fatal("Expected an error for a broken link, but got nil")
}
@ -94,12 +102,15 @@ func TestDownloadAndPackageWebsite_Bad(t *testing.T) {
}
func TestDownloadAndPackageWebsite_Ugly(t *testing.T) {
hookRunner, err := hooks.NewHookRunner("")
require.NoError(t, err)
t.Run("Exceed Max Depth", func(t *testing.T) {
server := newWebsiteTestServer()
defer server.Close()
bar := progressbar.NewOptions(1, progressbar.OptionSetWriter(io.Discard))
dn, err := DownloadAndPackageWebsite(server.URL, 1, bar) // Max depth of 1
dn, err := DownloadAndPackageWebsite(server.URL, 1, bar, hookRunner) // Max depth of 1
if err != nil {
t.Fatalf("DownloadAndPackageWebsite failed: %v", err)
}
@ -122,7 +133,7 @@ func TestDownloadAndPackageWebsite_Ugly(t *testing.T) {
fmt.Fprint(w, `<a href="http://externalsite.com/page.html">External</a>`)
}))
defer server.Close()
dn, err := DownloadAndPackageWebsite(server.URL, 1, nil)
dn, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner)
if err != nil {
t.Fatalf("DownloadAndPackageWebsite failed: %v", err)
}
@ -156,7 +167,7 @@ func TestDownloadAndPackageWebsite_Ugly(t *testing.T) {
// For now, we'll just test that it doesn't hang forever.
done := make(chan bool)
go func() {
_, err := DownloadAndPackageWebsite(server.URL, 1, nil)
_, err := DownloadAndPackageWebsite(server.URL, 1, nil, hookRunner)
if err != nil && !strings.Contains(err.Error(), "context deadline exceeded") {
// We expect a timeout error, but other errors are failures.
t.Errorf("unexpected error: %v", err)