feat: Add recursive website downloader and progress bar
This commit introduces a new `collect website` command that recursively downloads a website to a specified depth. - A new `pkg/website` package contains the logic for the recursive download. - A new `pkg/ui` package provides a progress bar for long-running operations, which is used by the website downloader. - The `collect pwa` subcommand has been restored to be PWA-specific.
This commit is contained in:
parent
bd65eefcd3
commit
8e82bada06
11 changed files with 387 additions and 47 deletions
|
|
@ -1,47 +1,16 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"borg-data-collector/pkg/vcs"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// collectCmd represents the collect command
|
||||
var collectCmd = &cobra.Command{
|
||||
Use: "collect [repository-url]",
|
||||
Short: "Collect a single repository",
|
||||
Long: `Collect a single repository and store it in a DataNode.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
repoURL := args[0]
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
|
||||
dn, err := vcs.CloneGitRepository(repoURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error cloning repository: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
data, err := dn.ToTar()
|
||||
if err != nil {
|
||||
fmt.Printf("Error serializing DataNode: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, data, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing DataNode to file: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Repository saved to %s\n", outputFile)
|
||||
},
|
||||
Use: "collect",
|
||||
Short: "Collect a resource and store it in a DataNode.",
|
||||
Long: `Collect a resource from a git repository, a website, or other URI and store it in a DataNode.`,
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(collectCmd)
|
||||
collectCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode")
|
||||
}
|
||||
|
|
|
|||
47
cmd/collect_git.go
Normal file
47
cmd/collect_git.go
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"borg-data-collector/pkg/vcs"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// collectGitCmd represents the collect git command
|
||||
var collectGitCmd = &cobra.Command{
|
||||
Use: "git [repository-url]",
|
||||
Short: "Collect a single Git repository",
|
||||
Long: `Collect a single Git repository and store it in a DataNode.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
repoURL := args[0]
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
|
||||
dn, err := vcs.CloneGitRepository(repoURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error cloning repository: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
data, err := dn.ToTar()
|
||||
if err != nil {
|
||||
fmt.Printf("Error serializing DataNode: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, data, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing DataNode to file: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Repository saved to %s\n", outputFile)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
collectCmd.AddCommand(collectGitCmd)
|
||||
collectGitCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode")
|
||||
}
|
||||
|
|
@ -9,18 +9,18 @@ import (
|
|||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// pwaCmd represents the pwa command
|
||||
var pwaCmd = &cobra.Command{
|
||||
// collectPWACmd represents the collect pwa command
|
||||
var collectPWACmd = &cobra.Command{
|
||||
Use: "pwa [url]",
|
||||
Short: "Download a PWA from a URL",
|
||||
Long: `Downloads a Progressive Web Application (PWA) from a given URL by finding its manifest.`,
|
||||
Short: "Collect a single PWA",
|
||||
Long: `Collect a single PWA and store it in a DataNode.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
pwaURL := args[0]
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
|
||||
fmt.Println("Finding PWA manifest...")
|
||||
manifestURL, err := pwa.FindManifestURL(pwaURL)
|
||||
manifestURL, err := pwa.FindManifest(pwaURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error finding manifest: %v\n", err)
|
||||
return
|
||||
|
|
@ -36,7 +36,7 @@ var pwaCmd = &cobra.Command{
|
|||
|
||||
pwaData, err := dn.ToTar()
|
||||
if err != nil {
|
||||
fmt.Printf("Error serializing PWA data: %v\n", err)
|
||||
fmt.Printf("Error converting PWA to bytes: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -51,6 +51,6 @@ var pwaCmd = &cobra.Command{
|
|||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(pwaCmd)
|
||||
pwaCmd.PersistentFlags().String("output", "pwa.dat", "Output file for the PWA DataNode")
|
||||
collectCmd.AddCommand(collectPWACmd)
|
||||
collectPWACmd.PersistentFlags().String("output", "pwa.dat", "Output file for the DataNode")
|
||||
}
|
||||
49
cmd/collect_website.go
Normal file
49
cmd/collect_website.go
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"borg-data-collector/pkg/website"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// collectWebsiteCmd represents the collect website command
|
||||
var collectWebsiteCmd = &cobra.Command{
|
||||
Use: "website [url]",
|
||||
Short: "Collect a single website",
|
||||
Long: `Collect a single website and store it in a DataNode.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
websiteURL := args[0]
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
depth, _ := cmd.Flags().GetInt("depth")
|
||||
|
||||
dn, err := website.DownloadAndPackageWebsite(websiteURL, depth)
|
||||
if err != nil {
|
||||
fmt.Printf("Error downloading and packaging website: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
websiteData, err := dn.ToTar()
|
||||
if err != nil {
|
||||
fmt.Printf("Error converting website to bytes: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, websiteData, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing website to file: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Website saved to %s\n", outputFile)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
collectCmd.AddCommand(collectWebsiteCmd)
|
||||
collectWebsiteCmd.PersistentFlags().String("output", "website.dat", "Output file for the DataNode")
|
||||
collectWebsiteCmd.PersistentFlags().Int("depth", 2, "Recursion depth for downloading")
|
||||
}
|
||||
4
go.mod
4
go.mod
|
|
@ -19,7 +19,10 @@ require (
|
|||
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
|
||||
github.com/kevinburke/ssh_config v1.2.0 // indirect
|
||||
github.com/leaanthony/debme v1.2.1 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/pjbgf/sha1cd v0.3.2 // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/schollz/progressbar/v3 v3.18.0 // indirect
|
||||
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
|
||||
github.com/skeema/knownhosts v1.3.1 // indirect
|
||||
github.com/spf13/pflag v1.0.10 // indirect
|
||||
|
|
@ -27,5 +30,6 @@ require (
|
|||
golang.org/x/crypto v0.43.0 // indirect
|
||||
golang.org/x/net v0.46.0 // indirect
|
||||
golang.org/x/sys v0.37.0 // indirect
|
||||
golang.org/x/term v0.36.0 // indirect
|
||||
gopkg.in/warnings.v0 v0.1.2 // indirect
|
||||
)
|
||||
|
|
|
|||
8
go.sum
8
go.sum
|
|
@ -35,11 +35,17 @@ github.com/leaanthony/debme v1.2.1 h1:9Tgwf+kjcrbMQ4WnPcEIUcQuIZYqdWftzZkBr+i/oO
|
|||
github.com/leaanthony/debme v1.2.1/go.mod h1:3V+sCm5tYAgQymvSOfYQ5Xx2JCr+OXiD9Jkw3otUjiA=
|
||||
github.com/leaanthony/slicer v1.5.0/go.mod h1:FwrApmf8gOrpzEWM2J/9Lh79tyq8KTX5AzRtwV7m4AY=
|
||||
github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
||||
github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4=
|
||||
github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA=
|
||||
github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
|
||||
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
|
||||
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
|
||||
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||
|
|
@ -76,6 +82,8 @@ golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
|||
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
|
||||
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
|
||||
golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ type Icon struct {
|
|||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
// FindManifestURL finds the manifest URL from a given HTML page.
|
||||
func FindManifestURL(pageURL string) (string, error) {
|
||||
// FindManifest finds the manifest URL from a given HTML page.
|
||||
func FindManifest(pageURL string) (string, error) {
|
||||
resp, err := http.Get(pageURL)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
func TestFindManifestURL(t *testing.T) {
|
||||
func TestFindManifest(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.Write([]byte(`
|
||||
|
|
@ -25,9 +25,9 @@ func TestFindManifestURL(t *testing.T) {
|
|||
defer server.Close()
|
||||
|
||||
expectedURL := server.URL + "/manifest.json"
|
||||
actualURL, err := FindManifestURL(server.URL)
|
||||
actualURL, err := FindManifest(server.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("FindManifestURL failed: %v", err)
|
||||
t.Fatalf("FindManifest failed: %v", err)
|
||||
}
|
||||
|
||||
if actualURL != expectedURL {
|
||||
|
|
|
|||
15
pkg/ui/progressbar.go
Normal file
15
pkg/ui/progressbar.go
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
package ui
|
||||
|
||||
import (
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
// NewProgressBar creates a new progress bar with the specified total and description.
|
||||
func NewProgressBar(total int, description string) *progressbar.ProgressBar {
|
||||
return progressbar.NewOptions(total,
|
||||
progressbar.OptionSetDescription(description),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
}
|
||||
166
pkg/website/website.go
Normal file
166
pkg/website/website.go
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
package website
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"borg-data-collector/pkg/datanode"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// Downloader is a recursive website downloader.
|
||||
type Downloader struct {
|
||||
baseURL *url.URL
|
||||
dn *datanode.DataNode
|
||||
visited map[string]bool
|
||||
maxDepth int
|
||||
progressBar *progressbar.ProgressBar
|
||||
}
|
||||
|
||||
// NewDownloader creates a new Downloader.
|
||||
func NewDownloader(maxDepth int) *Downloader {
|
||||
return &Downloader{
|
||||
dn: datanode.New(),
|
||||
visited: make(map[string]bool),
|
||||
maxDepth: maxDepth,
|
||||
}
|
||||
}
|
||||
|
||||
// DownloadAndPackageWebsite downloads a website and packages it into a DataNode.
|
||||
func DownloadAndPackageWebsite(startURL string, maxDepth int) (*datanode.DataNode, error) {
|
||||
baseURL, err := url.Parse(startURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
d := NewDownloader(maxDepth)
|
||||
d.baseURL = baseURL
|
||||
|
||||
fmt.Println("Downloading website...")
|
||||
d.progressBar = progressbar.NewOptions(1, progressbar.OptionSetDescription("Downloading"))
|
||||
d.crawl(startURL, 0)
|
||||
|
||||
return d.dn, nil
|
||||
}
|
||||
|
||||
func (d *Downloader) crawl(pageURL string, depth int) {
|
||||
if depth > d.maxDepth || d.visited[pageURL] {
|
||||
return
|
||||
}
|
||||
d.visited[pageURL] = true
|
||||
d.progressBar.Add(1)
|
||||
|
||||
resp, err := http.Get(pageURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error getting %s: %v\n", pageURL, err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
fmt.Printf("Error reading body of %s: %v\n", pageURL, err)
|
||||
return
|
||||
}
|
||||
|
||||
relPath := d.getRelativePath(pageURL)
|
||||
d.dn.AddData(relPath, body)
|
||||
|
||||
doc, err := html.Parse(strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
fmt.Printf("Error parsing HTML of %s: %v\n", pageURL, err)
|
||||
return
|
||||
}
|
||||
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "href" || a.Key == "src" {
|
||||
link, err := d.resolveURL(pageURL, a.Val)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if d.isLocal(link) {
|
||||
if isAsset(link) {
|
||||
d.downloadAsset(link)
|
||||
} else {
|
||||
d.crawl(link, depth+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
f(doc)
|
||||
}
|
||||
|
||||
func (d *Downloader) downloadAsset(assetURL string) {
|
||||
if d.visited[assetURL] {
|
||||
return
|
||||
}
|
||||
d.visited[assetURL] = true
|
||||
d.progressBar.Add(1)
|
||||
|
||||
resp, err := http.Get(assetURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error getting asset %s: %v\n", assetURL, err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
fmt.Printf("Error reading body of asset %s: %v\n", assetURL, err)
|
||||
return
|
||||
}
|
||||
|
||||
relPath := d.getRelativePath(assetURL)
|
||||
d.dn.AddData(relPath, body)
|
||||
}
|
||||
|
||||
func (d *Downloader) getRelativePath(pageURL string) string {
|
||||
u, err := url.Parse(pageURL)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimPrefix(u.Path, "/")
|
||||
}
|
||||
|
||||
func (d *Downloader) resolveURL(base, ref string) (string, error) {
|
||||
baseURL, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
refURL, err := url.Parse(ref)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return baseURL.ResolveReference(refURL).String(), nil
|
||||
}
|
||||
|
||||
func (d *Downloader) isLocal(pageURL string) bool {
|
||||
u, err := url.Parse(pageURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return u.Hostname() == d.baseURL.Hostname()
|
||||
}
|
||||
|
||||
func isAsset(pageURL string) bool {
|
||||
ext := []string{".css", ".js", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico"}
|
||||
for _, e := range ext {
|
||||
if strings.HasSuffix(pageURL, e) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
82
pkg/website/website_test.go
Normal file
82
pkg/website/website_test.go
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
package website
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDownloadAndPackageWebsite(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/":
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.Write([]byte(`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Test Website</title>
|
||||
<link rel="stylesheet" href="style.css">
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hello, Website!</h1>
|
||||
<a href="/page2.html">Page 2</a>
|
||||
<img src="image.png">
|
||||
</body>
|
||||
</html>
|
||||
`))
|
||||
case "/style.css":
|
||||
w.Header().Set("Content-Type", "text/css")
|
||||
w.Write([]byte(`body { color: red; }`))
|
||||
case "/image.png":
|
||||
w.Header().Set("Content-Type", "image/png")
|
||||
w.Write([]byte("fake image data"))
|
||||
case "/page2.html":
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.Write([]byte(`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Page 2</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Page 2</h1>
|
||||
<a href="/page3.html">Page 3</a>
|
||||
</body>
|
||||
</html>
|
||||
`))
|
||||
case "/page3.html":
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.Write([]byte(`
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Page 3</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Page 3</h1>
|
||||
</body>
|
||||
</html>
|
||||
`))
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
dn, err := DownloadAndPackageWebsite(server.URL, 2)
|
||||
if err != nil {
|
||||
t.Fatalf("DownloadAndPackageWebsite failed: %v", err)
|
||||
}
|
||||
|
||||
expectedFiles := []string{"", "style.css", "image.png", "page2.html", "page3.html"}
|
||||
for _, file := range expectedFiles {
|
||||
exists, err := dn.Exists(file)
|
||||
if err != nil {
|
||||
t.Fatalf("Exists failed for %s: %v", file, err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Expected to find file %s in DataNode, but it was not found", file)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue