#2 from Snider/feature-pwa-downloader

Add PWA Download and Serve Commands
This commit is contained in:
Snider 2025-10-31 21:37:31 +00:00 committed by GitHub
commit 73b814f1de
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1369 additions and 267 deletions

View file

@ -3,10 +3,11 @@ package cmd
import (
"fmt"
"os"
"strings"
"borg-data-collector/pkg/borg"
"borg-data-collector/pkg/github"
"borg-data-collector/pkg/trix"
"borg-data-collector/pkg/vcs"
"github.com/spf13/cobra"
)
@ -15,7 +16,7 @@ import (
var allCmd = &cobra.Command{
Use: "all [user/org]",
Short: "Collect all public repositories from a user or organization",
Long: `Collect all public repositories from a user or organization and store them in a Trix cube.`,
Long: `Collect all public repositories from a user or organization and store them in a DataNode.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
fmt.Println(borg.GetRandomAssimilationMessage())
@ -26,30 +27,30 @@ var allCmd = &cobra.Command{
return
}
outputFile, _ := cmd.Flags().GetString("output")
cube, err := trix.NewCube(outputFile)
if err != nil {
fmt.Println(err)
return
}
defer cube.Close()
outputDir, _ := cmd.Flags().GetString("output")
for _, repoURL := range repos {
fmt.Printf("Cloning %s...\n", repoURL)
tempPath, err := os.MkdirTemp("", "borg-clone-*")
if err != nil {
fmt.Println(err)
return
}
defer os.RemoveAll(tempPath)
err = addRepoToCube(repoURL, cube, tempPath)
dn, err := vcs.CloneGitRepository(repoURL)
if err != nil {
fmt.Printf("Error cloning %s: %s\n", repoURL, err)
continue
}
data, err := dn.ToTar()
if err != nil {
fmt.Printf("Error serializing DataNode for %s: %v\n", repoURL, err)
continue
}
repoName := strings.Split(repoURL, "/")[len(strings.Split(repoURL, "/"))-1]
outputFile := fmt.Sprintf("%s/%s.dat", outputDir, repoName)
err = os.WriteFile(outputFile, data, 0644)
if err != nil {
fmt.Printf("Error writing DataNode for %s to file: %v\n", repoURL, err)
continue
}
}
fmt.Println(borg.GetRandomCodeLongMessage())
@ -57,5 +58,6 @@ var allCmd = &cobra.Command{
}
func init() {
collectCmd.AddCommand(allCmd)
rootCmd.AddCommand(allCmd)
allCmd.PersistentFlags().String("output", ".", "Output directory for the DataNodes")
}

View file

@ -1,53 +0,0 @@
package cmd
import (
"fmt"
"io"
"os"
"borg-data-collector/pkg/trix"
"github.com/spf13/cobra"
)
// catCmd represents the cat command
var catCmd = &cobra.Command{
Use: "cat [cube-file] [file-to-extract]",
Short: "Extract a file from a Trix cube",
Long: `Extract a file from a Trix cube and print its content to standard output.`,
Args: cobra.ExactArgs(2),
Run: func(cmd *cobra.Command, args []string) {
cubeFile := args[0]
fileToExtract := args[1]
reader, file, err := trix.Extract(cubeFile)
if err != nil {
fmt.Println(err)
return
}
defer file.Close()
for {
hdr, err := reader.Next()
if err == io.EOF {
break
}
if err != nil {
fmt.Println(err)
return
}
if hdr.Name == fileToExtract {
if _, err := io.Copy(os.Stdout, reader); err != nil {
fmt.Println(err)
return
}
return
}
}
},
}
func init() {
rootCmd.AddCommand(catCmd)
}

View file

@ -1,45 +1,16 @@
package cmd
import (
"fmt"
"borg-data-collector/pkg/trix"
"github.com/spf13/cobra"
)
// collectCmd represents the collect command
var collectCmd = &cobra.Command{
Use: "collect [repository-url]",
Short: "Collect a single repository",
Long: `Collect a single repository and store it in a Trix cube.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
if len(args) < 1 {
fmt.Println("Please provide a repository URL")
return
}
repoURL := args[0]
clonePath, _ := cmd.Flags().GetString("path")
outputFile, _ := cmd.Flags().GetString("output")
cube, err := trix.NewCube(outputFile)
if err != nil {
fmt.Println(err)
return
}
defer cube.Close()
err = addRepoToCube(repoURL, cube, clonePath)
if err != nil {
fmt.Println(err)
return
}
},
Use: "collect",
Short: "Collect a resource and store it in a DataNode.",
Long: `Collect a resource from a git repository, a website, or other URI and store it in a DataNode.`,
}
func init() {
rootCmd.AddCommand(collectCmd)
collectCmd.PersistentFlags().String("path", "/tmp/borg-clone", "Path to clone the repository")
collectCmd.PersistentFlags().String("output", "borg.cube", "Output file for the Trix cube")
}

47
cmd/collect_git.go Normal file
View file

@ -0,0 +1,47 @@
package cmd
import (
"fmt"
"os"
"borg-data-collector/pkg/vcs"
"github.com/spf13/cobra"
)
// collectGitCmd represents the collect git command
var collectGitCmd = &cobra.Command{
Use: "git [repository-url]",
Short: "Collect a single Git repository",
Long: `Collect a single Git repository and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
repoURL := args[0]
outputFile, _ := cmd.Flags().GetString("output")
dn, err := vcs.CloneGitRepository(repoURL)
if err != nil {
fmt.Printf("Error cloning repository: %v\n", err)
return
}
data, err := dn.ToTar()
if err != nil {
fmt.Printf("Error serializing DataNode: %v\n", err)
return
}
err = os.WriteFile(outputFile, data, 0644)
if err != nil {
fmt.Printf("Error writing DataNode to file: %v\n", err)
return
}
fmt.Printf("Repository saved to %s\n", outputFile)
},
}
func init() {
collectCmd.AddCommand(collectGitCmd)
collectGitCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode")
}

56
cmd/collect_pwa.go Normal file
View file

@ -0,0 +1,56 @@
package cmd
import (
"fmt"
"os"
"borg-data-collector/pkg/pwa"
"github.com/spf13/cobra"
)
// collectPWACmd represents the collect pwa command
var collectPWACmd = &cobra.Command{
Use: "pwa [url]",
Short: "Collect a single PWA",
Long: `Collect a single PWA and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
pwaURL := args[0]
outputFile, _ := cmd.Flags().GetString("output")
fmt.Println("Finding PWA manifest...")
manifestURL, err := pwa.FindManifest(pwaURL)
if err != nil {
fmt.Printf("Error finding manifest: %v\n", err)
return
}
fmt.Printf("Found manifest: %s\n", manifestURL)
fmt.Println("Downloading and packaging PWA...")
dn, err := pwa.DownloadAndPackagePWA(pwaURL, manifestURL)
if err != nil {
fmt.Printf("Error downloading and packaging PWA: %v\n", err)
return
}
pwaData, err := dn.ToTar()
if err != nil {
fmt.Printf("Error converting PWA to bytes: %v\n", err)
return
}
err = os.WriteFile(outputFile, pwaData, 0644)
if err != nil {
fmt.Printf("Error writing PWA to file: %v\n", err)
return
}
fmt.Printf("PWA saved to %s\n", outputFile)
},
}
func init() {
collectCmd.AddCommand(collectPWACmd)
collectPWACmd.PersistentFlags().String("output", "pwa.dat", "Output file for the DataNode")
}

49
cmd/collect_website.go Normal file
View file

@ -0,0 +1,49 @@
package cmd
import (
"fmt"
"os"
"borg-data-collector/pkg/website"
"github.com/spf13/cobra"
)
// collectWebsiteCmd represents the collect website command
var collectWebsiteCmd = &cobra.Command{
Use: "website [url]",
Short: "Collect a single website",
Long: `Collect a single website and store it in a DataNode.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
websiteURL := args[0]
outputFile, _ := cmd.Flags().GetString("output")
depth, _ := cmd.Flags().GetInt("depth")
dn, err := website.DownloadAndPackageWebsite(websiteURL, depth)
if err != nil {
fmt.Printf("Error downloading and packaging website: %v\n", err)
return
}
websiteData, err := dn.ToTar()
if err != nil {
fmt.Printf("Error converting website to bytes: %v\n", err)
return
}
err = os.WriteFile(outputFile, websiteData, 0644)
if err != nil {
fmt.Printf("Error writing website to file: %v\n", err)
return
}
fmt.Printf("Website saved to %s\n", outputFile)
},
}
func init() {
collectCmd.AddCommand(collectWebsiteCmd)
collectWebsiteCmd.PersistentFlags().String("output", "website.dat", "Output file for the DataNode")
collectWebsiteCmd.PersistentFlags().Int("depth", 2, "Recursion depth for downloading")
}

View file

@ -1,41 +0,0 @@
package cmd
import (
"os"
"path/filepath"
"borg-data-collector/pkg/trix"
"github.com/go-git/go-git/v5"
)
func addRepoToCube(repoURL string, cube *trix.Cube, clonePath string) error {
_, err := git.PlainClone(clonePath, false, &git.CloneOptions{
URL: repoURL,
Progress: os.Stdout,
})
if err != nil {
return err
}
err = filepath.Walk(clonePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
content, err := os.ReadFile(path)
if err != nil {
return err
}
relPath, err := filepath.Rel(clonePath, path)
if err != nil {
return err
}
cube.AddFile(relPath, content)
}
return nil
})
return err
}

View file

@ -1,56 +0,0 @@
package cmd
import (
"fmt"
"os"
"borg-data-collector/pkg/borg"
"borg-data-collector/pkg/trix"
"github.com/spf13/cobra"
)
// ingestCmd represents the ingest command
var ingestCmd = &cobra.Command{
Use: "ingest [cube-file] [file-to-add]",
Short: "Add a file to a Trix cube",
Long: `Add a file to a Trix cube. If the cube file does not exist, it will be created.`,
Args: cobra.ExactArgs(2),
Run: func(cmd *cobra.Command, args []string) {
cubeFile := args[0]
fileToAdd := args[1]
var cube *trix.Cube
var err error
if _, err := os.Stat(cubeFile); os.IsNotExist(err) {
cube, err = trix.NewCube(cubeFile)
} else {
cube, err = trix.AppendToCube(cubeFile)
}
if err != nil {
fmt.Println(err)
return
}
defer cube.Close()
content, err := os.ReadFile(fileToAdd)
if err != nil {
fmt.Println(err)
return
}
err = cube.AddFile(fileToAdd, content)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(borg.GetRandomCodeShortMessage())
},
}
func init() {
rootCmd.AddCommand(ingestCmd)
}

49
cmd/serve.go Normal file
View file

@ -0,0 +1,49 @@
package cmd
import (
"fmt"
"net/http"
"os"
"borg-data-collector/pkg/datanode"
"github.com/spf13/cobra"
)
// serveCmd represents the serve command
var serveCmd = &cobra.Command{
Use: "serve [file]",
Short: "Serve a packaged PWA file",
Long: `Serves the contents of a packaged PWA file using a static file server.`,
Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
pwaFile := args[0]
port, _ := cmd.Flags().GetString("port")
pwaData, err := os.ReadFile(pwaFile)
if err != nil {
fmt.Printf("Error reading PWA file: %v\n", err)
return
}
dn, err := datanode.FromTar(pwaData)
if err != nil {
fmt.Printf("Error creating DataNode from tarball: %v\n", err)
return
}
http.Handle("/", http.FileServer(http.FS(dn)))
fmt.Printf("Serving PWA on http://localhost:%s\n", port)
err = http.ListenAndServe(":"+port, nil)
if err != nil {
fmt.Printf("Error starting server: %v\n", err)
return
}
},
}
func init() {
rootCmd.AddCommand(serveCmd)
serveCmd.PersistentFlags().String("port", "8080", "Port to serve the PWA on")
}

11
go.mod
View file

@ -18,13 +18,18 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/leaanthony/debme v1.2.1 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/pjbgf/sha1cd v0.3.2 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/schollz/progressbar/v3 v3.18.0 // indirect
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/skeema/knownhosts v1.3.1 // indirect
github.com/spf13/pflag v1.0.10 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
golang.org/x/crypto v0.37.0 // indirect
golang.org/x/net v0.39.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/crypto v0.43.0 // indirect
golang.org/x/net v0.46.0 // indirect
golang.org/x/sys v0.37.0 // indirect
golang.org/x/term v0.36.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
)

18
go.sum
View file

@ -31,11 +31,21 @@ github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/leaanthony/debme v1.2.1 h1:9Tgwf+kjcrbMQ4WnPcEIUcQuIZYqdWftzZkBr+i/oOc=
github.com/leaanthony/debme v1.2.1/go.mod h1:3V+sCm5tYAgQymvSOfYQ5Xx2JCr+OXiD9Jkw3otUjiA=
github.com/leaanthony/slicer v1.5.0/go.mod h1:FwrApmf8gOrpzEWM2J/9Lh79tyq8KTX5AzRtwV7m4AY=
github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4=
github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA=
github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8=
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
@ -54,9 +64,13 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -65,7 +79,11 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

317
pkg/datanode/datanode.go Normal file
View file

@ -0,0 +1,317 @@
package datanode
import (
"archive/tar"
"bytes"
"io"
"io/fs"
"os"
"path"
"sort"
"strings"
"time"
)
// DataNode is an in-memory filesystem that is compatible with fs.FS.
type DataNode struct {
files map[string]*dataFile
}
// New creates a new, empty DataNode.
func New() *DataNode {
return &DataNode{files: make(map[string]*dataFile)}
}
// FromTar creates a new DataNode from a tarball.
func FromTar(tarball []byte) (*DataNode, error) {
dn := New()
tarReader := tar.NewReader(bytes.NewReader(tarball))
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if header.Typeflag == tar.TypeReg {
data, err := io.ReadAll(tarReader)
if err != nil {
return nil, err
}
dn.AddData(header.Name, data)
}
}
return dn, nil
}
// ToTar serializes the DataNode to a tarball.
func (d *DataNode) ToTar() ([]byte, error) {
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
for _, file := range d.files {
hdr := &tar.Header{
Name: file.name,
Mode: 0600,
Size: int64(len(file.content)),
ModTime: file.modTime,
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, err
}
if _, err := tw.Write(file.content); err != nil {
return nil, err
}
}
if err := tw.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// AddData adds a file to the DataNode.
func (d *DataNode) AddData(name string, content []byte) {
name = strings.TrimPrefix(name, "/")
d.files[name] = &dataFile{
name: name,
content: content,
modTime: time.Now(),
}
}
// Open opens a file from the DataNode.
func (d *DataNode) Open(name string) (fs.File, error) {
name = strings.TrimPrefix(name, "/")
if file, ok := d.files[name]; ok {
return &dataFileReader{file: file}, nil
}
// Check if it's a directory
prefix := name + "/"
if name == "." || name == "" {
prefix = ""
}
for p := range d.files {
if strings.HasPrefix(p, prefix) {
return &dirFile{path: name, modTime: time.Now()}, nil
}
}
return nil, fs.ErrNotExist
}
// ReadDir reads and returns all directory entries for the named directory.
func (d *DataNode) ReadDir(name string) ([]fs.DirEntry, error) {
name = strings.TrimPrefix(name, "/")
if name == "." {
name = ""
}
entries := []fs.DirEntry{}
seen := make(map[string]bool)
prefix := ""
if name != "" {
prefix = name + "/"
}
for p := range d.files {
if !strings.HasPrefix(p, prefix) {
continue
}
relPath := strings.TrimPrefix(p, prefix)
firstComponent := strings.Split(relPath, "/")[0]
if seen[firstComponent] {
continue
}
seen[firstComponent] = true
if strings.Contains(relPath, "/") {
// It's a directory
dir := &dirInfo{name: firstComponent, modTime: time.Now()}
entries = append(entries, fs.FileInfoToDirEntry(dir))
} else {
// It's a file
file := d.files[p]
info, _ := file.Stat()
entries = append(entries, fs.FileInfoToDirEntry(info))
}
}
// Sort for stable order in tests
sort.Slice(entries, func(i, j int) bool {
return entries[i].Name() < entries[j].Name()
})
return entries, nil
}
// Stat returns the FileInfo structure describing file.
func (d *DataNode) Stat(name string) (fs.FileInfo, error) {
name = strings.TrimPrefix(name, "/")
if file, ok := d.files[name]; ok {
return file.Stat()
}
// Check if it's a directory
prefix := name + "/"
if name == "." || name == "" {
prefix = ""
}
for p := range d.files {
if strings.HasPrefix(p, prefix) {
return &dirInfo{name: path.Base(name), modTime: time.Now()}, nil
}
}
return nil, fs.ErrNotExist
}
// ExistsOptions allows customizing the Exists check.
type ExistsOptions struct {
WantType fs.FileMode
}
// Exists returns true if the file or directory exists.
func (d *DataNode) Exists(name string, opts ...ExistsOptions) (bool, error) {
info, err := d.Stat(name)
if err != nil {
if err == fs.ErrNotExist || os.IsNotExist(err) {
return false, nil
}
return false, err
}
if len(opts) > 0 {
if opts[0].WantType == fs.ModeDir && !info.IsDir() {
return false, nil
}
if opts[0].WantType != fs.ModeDir && info.IsDir() {
return false, nil
}
}
return true, nil
}
// WalkOptions allows customizing the Walk behavior.
type WalkOptions struct {
MaxDepth int
Filter func(path string, d fs.DirEntry) bool
SkipErrors bool
}
// Walk recursively descends the file tree rooted at root, calling fn for each file or directory.
func (d *DataNode) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error {
var maxDepth int
var filter func(string, fs.DirEntry) bool
var skipErrors bool
if len(opts) > 0 {
maxDepth = opts[0].MaxDepth
filter = opts[0].Filter
skipErrors = opts[0].SkipErrors
}
return fs.WalkDir(d, root, func(path string, de fs.DirEntry, err error) error {
if err != nil {
if skipErrors {
return nil
}
return fn(path, de, err)
}
if filter != nil && !filter(path, de) {
return nil
}
if maxDepth > 0 {
currentDepth := strings.Count(strings.TrimPrefix(path, root), "/")
if de.IsDir() && currentDepth >= maxDepth {
return fs.SkipDir
}
}
return fn(path, de, nil)
})
}
// CopyFile copies a file from the DataNode to the local filesystem.
func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode) error {
sourceFile, err := d.Open(sourcePath)
if err != nil {
return err
}
defer sourceFile.Close()
targetFile, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, perm)
if err != nil {
return err
}
defer targetFile.Close()
_, err = io.Copy(targetFile, sourceFile)
return err
}
// dataFile represents a file in the DataNode.
type dataFile struct {
name string
content []byte
modTime time.Time
}
func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil }
func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF }
func (d *dataFile) Close() error { return nil }
// dataFileInfo implements fs.FileInfo for a dataFile.
type dataFileInfo struct{ file *dataFile }
func (d *dataFileInfo) Name() string { return path.Base(d.file.name) }
func (d *dataFileInfo) Size() int64 { return int64(len(d.file.content)) }
func (d *dataFileInfo) Mode() fs.FileMode { return 0444 }
func (d *dataFileInfo) ModTime() time.Time { return d.file.modTime }
func (d *dataFileInfo) IsDir() bool { return false }
func (d *dataFileInfo) Sys() interface{} { return nil }
// dataFileReader implements fs.File for a dataFile.
type dataFileReader struct {
file *dataFile
reader *bytes.Reader
}
func (d *dataFileReader) Stat() (fs.FileInfo, error) { return d.file.Stat() }
func (d *dataFileReader) Read(p []byte) (int, error) {
if d.reader == nil {
d.reader = bytes.NewReader(d.file.content)
}
return d.reader.Read(p)
}
func (d *dataFileReader) Close() error { return nil }
// dirInfo implements fs.FileInfo for an implicit directory.
type dirInfo struct {
name string
modTime time.Time
}
func (d *dirInfo) Name() string { return d.name }
func (d *dirInfo) Size() int64 { return 0 }
func (d *dirInfo) Mode() fs.FileMode { return fs.ModeDir | 0555 }
func (d *dirInfo) ModTime() time.Time { return d.modTime }
func (d *dirInfo) IsDir() bool { return true }
func (d *dirInfo) Sys() interface{} { return nil }
// dirFile implements fs.File for a directory.
type dirFile struct {
path string
modTime time.Time
}
func (d *dirFile) Stat() (fs.FileInfo, error) {
return &dirInfo{name: path.Base(d.path), modTime: d.modTime}, nil
}
func (d *dirFile) Read([]byte) (int, error) {
return 0, &fs.PathError{Op: "read", Path: d.path, Err: fs.ErrInvalid}
}
func (d *dirFile) Close() error { return nil }

View file

@ -0,0 +1,124 @@
package datanode
import (
"io/fs"
"os"
"reflect"
"sort"
"testing"
)
func TestDataNode(t *testing.T) {
dn := New()
dn.AddData("foo.txt", []byte("foo"))
dn.AddData("bar/baz.txt", []byte("baz"))
dn.AddData("bar/qux.txt", []byte("qux"))
// Test Open
file, err := dn.Open("foo.txt")
if err != nil {
t.Fatalf("Open failed: %v", err)
}
file.Close()
_, err = dn.Open("nonexistent.txt")
if err == nil {
t.Fatalf("Expected error opening nonexistent file, got nil")
}
// Test Stat
info, err := dn.Stat("bar/baz.txt")
if err != nil {
t.Fatalf("Stat failed: %v", err)
}
if info.Name() != "baz.txt" {
t.Errorf("Expected name baz.txt, got %s", info.Name())
}
if info.Size() != 3 {
t.Errorf("Expected size 3, got %d", info.Size())
}
if info.IsDir() {
t.Errorf("Expected baz.txt to not be a directory")
}
dirInfo, err := dn.Stat("bar")
if err != nil {
t.Fatalf("Stat directory failed: %v", err)
}
if !dirInfo.IsDir() {
t.Errorf("Expected 'bar' to be a directory")
}
// Test Exists
exists, err := dn.Exists("foo.txt")
if err != nil || !exists {
t.Errorf("Expected foo.txt to exist, err: %v", err)
}
exists, err = dn.Exists("bar")
if err != nil || !exists {
t.Errorf("Expected 'bar' directory to exist, err: %v", err)
}
exists, err = dn.Exists("nonexistent")
if err != nil || exists {
t.Errorf("Expected 'nonexistent' to not exist, err: %v", err)
}
// Test ReadDir
entries, err := dn.ReadDir(".")
if err != nil {
t.Fatalf("ReadDir failed: %v", err)
}
expectedRootEntries := []string{"bar", "foo.txt"}
if len(entries) != len(expectedRootEntries) {
t.Errorf("Expected %d entries in root, got %d", len(expectedRootEntries), len(entries))
}
var rootEntryNames []string
for _, e := range entries {
rootEntryNames = append(rootEntryNames, e.Name())
}
sort.Strings(rootEntryNames)
if !reflect.DeepEqual(rootEntryNames, expectedRootEntries) {
t.Errorf("Expected entries %v, got %v", expectedRootEntries, rootEntryNames)
}
barEntries, err := dn.ReadDir("bar")
if err != nil {
t.Fatalf("ReadDir('bar') failed: %v", err)
}
expectedBarEntries := []string{"baz.txt", "qux.txt"}
if len(barEntries) != len(expectedBarEntries) {
t.Errorf("Expected %d entries in 'bar', got %d", len(expectedBarEntries), len(barEntries))
}
// Test Walk
var paths []string
dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
paths = append(paths, path)
return nil
})
expectedPaths := []string{".", "bar", "bar/baz.txt", "bar/qux.txt", "foo.txt"}
sort.Strings(paths)
if !reflect.DeepEqual(paths, expectedPaths) {
t.Errorf("Walk expected paths %v, got %v", expectedPaths, paths)
}
// Test CopyFile
tmpfile, err := os.CreateTemp("", "datanode-test-")
if err != nil {
t.Fatalf("CreateTemp failed: %v", err)
}
defer os.Remove(tmpfile.Name())
err = dn.CopyFile("foo.txt", tmpfile.Name(), 0644)
if err != nil {
t.Fatalf("CopyFile failed: %v", err)
}
content, err := os.ReadFile(tmpfile.Name())
if err != nil {
t.Fatalf("ReadFile failed: %v", err)
}
if string(content) != "foo" {
t.Errorf("Expected foo, got %s", string(content))
}
}

169
pkg/pwa/pwa.go Normal file
View file

@ -0,0 +1,169 @@
package pwa
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
"borg-data-collector/pkg/datanode"
"golang.org/x/net/html"
)
// Manifest represents a simple PWA manifest structure.
type Manifest struct {
Name string `json:"name"`
ShortName string `json:"short_name"`
StartURL string `json:"start_url"`
Icons []Icon `json:"icons"`
}
// Icon represents an icon in the PWA manifest.
type Icon struct {
Src string `json:"src"`
Sizes string `json:"sizes"`
Type string `json:"type"`
}
// FindManifest finds the manifest URL from a given HTML page.
func FindManifest(pageURL string) (string, error) {
resp, err := http.Get(pageURL)
if err != nil {
return "", err
}
defer resp.Body.Close()
doc, err := html.Parse(resp.Body)
if err != nil {
return "", err
}
var manifestPath string
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "link" {
isManifest := false
for _, a := range n.Attr {
if a.Key == "rel" && a.Val == "manifest" {
isManifest = true
break
}
}
if isManifest {
for _, a := range n.Attr {
if a.Key == "href" {
manifestPath = a.Val
return // exit once found
}
}
}
}
for c := n.FirstChild; c != nil && manifestPath == ""; c = c.NextSibling {
f(c)
}
}
f(doc)
if manifestPath == "" {
return "", fmt.Errorf("manifest not found")
}
resolvedURL, err := resolveURL(pageURL, manifestPath)
if err != nil {
return "", fmt.Errorf("could not resolve manifest URL: %w", err)
}
return resolvedURL.String(), nil
}
// DownloadAndPackagePWA downloads all assets of a PWA and packages them into a DataNode.
func DownloadAndPackagePWA(baseURL string, manifestURL string) (*datanode.DataNode, error) {
manifestAbsURL, err := resolveURL(baseURL, manifestURL)
if err != nil {
return nil, fmt.Errorf("could not resolve manifest URL: %w", err)
}
resp, err := http.Get(manifestAbsURL.String())
if err != nil {
return nil, fmt.Errorf("could not download manifest: %w", err)
}
defer resp.Body.Close()
manifestBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("could not read manifest body: %w", err)
}
var manifest Manifest
if err := json.Unmarshal(manifestBody, &manifest); err != nil {
return nil, fmt.Errorf("could not parse manifest JSON: %w", err)
}
dn := datanode.New()
dn.AddData("manifest.json", manifestBody)
if manifest.StartURL != "" {
startURLAbs, err := resolveURL(manifestAbsURL.String(), manifest.StartURL)
if err != nil {
return nil, fmt.Errorf("could not resolve start_url: %w", err)
}
err = downloadAndAddFile(dn, startURLAbs, manifest.StartURL)
if err != nil {
return nil, fmt.Errorf("failed to download start_url asset: %w", err)
}
}
for _, icon := range manifest.Icons {
iconURLAbs, err := resolveURL(manifestAbsURL.String(), icon.Src)
if err != nil {
fmt.Printf("Warning: could not resolve icon URL %s: %v\n", icon.Src, err)
continue
}
err = downloadAndAddFile(dn, iconURLAbs, icon.Src)
if err != nil {
fmt.Printf("Warning: failed to download icon %s: %v\n", icon.Src, err)
}
}
baseURLAbs, _ := url.Parse(baseURL)
err = downloadAndAddFile(dn, baseURLAbs, "index.html")
if err != nil {
return nil, fmt.Errorf("failed to download base HTML: %w", err)
}
return dn, nil
}
func resolveURL(base, ref string) (*url.URL, error) {
baseURL, err := url.Parse(base)
if err != nil {
return nil, err
}
refURL, err := url.Parse(ref)
if err != nil {
return nil, err
}
return baseURL.ResolveReference(refURL), nil
}
func downloadAndAddFile(dn *datanode.DataNode, fileURL *url.URL, internalPath string) error {
resp, err := http.Get(fileURL.String())
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status: %s", resp.Status)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
dn.AddData(path.Clean(internalPath), data)
return nil
}

122
pkg/pwa/pwa_test.go Normal file
View file

@ -0,0 +1,122 @@
package pwa
import (
"net/http"
"net/http/httptest"
"testing"
)
func TestFindManifest(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(`
<!DOCTYPE html>
<html>
<head>
<title>Test PWA</title>
<link rel="manifest" href="manifest.json">
</head>
<body>
<h1>Hello, PWA!</h1>
</body>
</html>
`))
}))
defer server.Close()
expectedURL := server.URL + "/manifest.json"
actualURL, err := FindManifest(server.URL)
if err != nil {
t.Fatalf("FindManifest failed: %v", err)
}
if actualURL != expectedURL {
t.Errorf("Expected manifest URL %s, but got %s", expectedURL, actualURL)
}
}
func TestDownloadAndPackagePWA(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/":
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(`
<!DOCTYPE html>
<html>
<head>
<title>Test PWA</title>
<link rel="manifest" href="manifest.json">
</head>
<body>
<h1>Hello, PWA!</h1>
</body>
</html>
`))
case "/manifest.json":
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{
"name": "Test PWA",
"short_name": "TestPWA",
"start_url": "index.html",
"icons": [
{
"src": "icon.png",
"sizes": "192x192",
"type": "image/png"
}
]
}`))
case "/index.html":
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(`<h1>Hello, PWA!</h1>`))
case "/icon.png":
w.Header().Set("Content-Type", "image/png")
w.Write([]byte("fake image data"))
default:
http.NotFound(w, r)
}
}))
defer server.Close()
dn, err := DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json")
if err != nil {
t.Fatalf("DownloadAndPackagePWA failed: %v", err)
}
expectedFiles := []string{"manifest.json", "index.html", "icon.png"}
for _, file := range expectedFiles {
exists, err := dn.Exists(file)
if err != nil {
t.Fatalf("Exists failed for %s: %v", file, err)
}
if !exists {
t.Errorf("Expected to find file %s in DataNode, but it was not found", file)
}
}
}
func TestResolveURL(t *testing.T) {
tests := []struct {
base string
ref string
want string
}{
{"http://example.com/", "foo.html", "http://example.com/foo.html"},
{"http://example.com/foo/", "bar.html", "http://example.com/foo/bar.html"},
{"http://example.com/foo", "bar.html", "http://example.com/bar.html"},
{"http://example.com/foo/", "/bar.html", "http://example.com/bar.html"},
{"http://example.com/foo", "/bar.html", "http://example.com/bar.html"},
{"http://example.com/", "http://example.com/foo/bar.html", "http://example.com/foo/bar.html"},
}
for _, tt := range tests {
got, err := resolveURL(tt.base, tt.ref)
if err != nil {
t.Errorf("resolveURL(%q, %q) returned error: %v", tt.base, tt.ref, err)
continue
}
if got.String() != tt.want {
t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.ref, got.String(), tt.want)
}
}
}

View file

@ -1,63 +0,0 @@
package trix
import (
"archive/tar"
"os"
)
type Cube struct {
writer *tar.Writer
file *os.File
}
func NewCube(path string) (*Cube, error) {
file, err := os.Create(path)
if err != nil {
return nil, err
}
return &Cube{
writer: tar.NewWriter(file),
file: file,
}, nil
}
func (c *Cube) AddFile(path string, content []byte) error {
hdr := &tar.Header{
Name: path,
Mode: 0600,
Size: int64(len(content)),
}
if err := c.writer.WriteHeader(hdr); err != nil {
return err
}
if _, err := c.writer.Write(content); err != nil {
return err
}
return nil
}
func (c *Cube) Close() error {
if err := c.writer.Close(); err != nil {
return err
}
return c.file.Close()
}
func Extract(path string) (*tar.Reader, *os.File, error) {
file, err := os.Open(path)
if err != nil {
return nil, nil, err
}
return tar.NewReader(file), file, nil
}
func AppendToCube(path string) (*Cube, error) {
file, err := os.OpenFile(path, os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return nil, err
}
return &Cube{
writer: tar.NewWriter(file),
file: file,
}, nil
}

15
pkg/ui/progressbar.go Normal file
View file

@ -0,0 +1,15 @@
package ui
import (
"github.com/schollz/progressbar/v3"
)
// NewProgressBar creates a new progress bar with the specified total and description.
func NewProgressBar(total int, description string) *progressbar.ProgressBar {
return progressbar.NewOptions(total,
progressbar.OptionSetDescription(description),
progressbar.OptionSetWidth(15),
progressbar.OptionShowCount(),
progressbar.OptionClearOnFinish(),
)
}

51
pkg/vcs/git.go Normal file
View file

@ -0,0 +1,51 @@
package vcs
import (
"os"
"path/filepath"
"borg-data-collector/pkg/datanode"
"github.com/go-git/go-git/v5"
)
// CloneGitRepository clones a Git repository from a URL and packages it into a DataNode.
func CloneGitRepository(repoURL string) (*datanode.DataNode, error) {
tempPath, err := os.MkdirTemp("", "borg-clone-*")
if err != nil {
return nil, err
}
defer os.RemoveAll(tempPath)
_, err = git.PlainClone(tempPath, false, &git.CloneOptions{
URL: repoURL,
Progress: os.Stdout,
})
if err != nil {
return nil, err
}
dn := datanode.New()
err = filepath.Walk(tempPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
content, err := os.ReadFile(path)
if err != nil {
return err
}
relPath, err := filepath.Rel(tempPath, path)
if err != nil {
return err
}
dn.AddData(relPath, content)
}
return nil
})
if err != nil {
return nil, err
}
return dn, nil
}

72
pkg/vcs/git_test.go Normal file
View file

@ -0,0 +1,72 @@
package vcs
import (
"os"
"os/exec"
"path/filepath"
"testing"
)
func TestCloneGitRepository(t *testing.T) {
// Create a temporary directory for the bare repository
bareRepoPath, err := os.MkdirTemp("", "bare-repo-")
if err != nil {
t.Fatalf("Failed to create temp dir for bare repo: %v", err)
}
defer os.RemoveAll(bareRepoPath)
// Initialize a bare git repository
cmd := exec.Command("git", "init", "--bare")
cmd.Dir = bareRepoPath
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to init bare repo: %v", err)
}
// Clone the bare repository to a temporary directory to add a commit
clonePath, err := os.MkdirTemp("", "clone-")
if err != nil {
t.Fatalf("Failed to create temp dir for clone: %v", err)
}
defer os.RemoveAll(clonePath)
cmd = exec.Command("git", "clone", bareRepoPath, clonePath)
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to clone bare repo: %v", err)
}
// Create a file and commit it
filePath := filepath.Join(clonePath, "foo.txt")
if err := os.WriteFile(filePath, []byte("foo"), 0644); err != nil {
t.Fatalf("Failed to write file: %v", err)
}
cmd = exec.Command("git", "add", "foo.txt")
cmd.Dir = clonePath
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to git add: %v", err)
}
cmd = exec.Command("git", "commit", "-m", "Initial commit")
cmd.Dir = clonePath
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to git commit: %v", err)
}
cmd = exec.Command("git", "push", "origin", "master")
cmd.Dir = clonePath
if err := cmd.Run(); err != nil {
t.Fatalf("Failed to git push: %v", err)
}
// Clone the repository using the function we're testing
dn, err := CloneGitRepository("file://" + bareRepoPath)
if err != nil {
t.Fatalf("CloneGitRepository failed: %v", err)
}
// Verify the DataNode contains the correct file
exists, err := dn.Exists("foo.txt")
if err != nil {
t.Fatalf("Exists failed: %v", err)
}
if !exists {
t.Errorf("Expected to find file foo.txt in DataNode, but it was not found")
}
}

166
pkg/website/website.go Normal file
View file

@ -0,0 +1,166 @@
package website
import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
"borg-data-collector/pkg/datanode"
"github.com/schollz/progressbar/v3"
"golang.org/x/net/html"
)
// Downloader is a recursive website downloader.
type Downloader struct {
baseURL *url.URL
dn *datanode.DataNode
visited map[string]bool
maxDepth int
progressBar *progressbar.ProgressBar
}
// NewDownloader creates a new Downloader.
func NewDownloader(maxDepth int) *Downloader {
return &Downloader{
dn: datanode.New(),
visited: make(map[string]bool),
maxDepth: maxDepth,
}
}
// DownloadAndPackageWebsite downloads a website and packages it into a DataNode.
func DownloadAndPackageWebsite(startURL string, maxDepth int) (*datanode.DataNode, error) {
baseURL, err := url.Parse(startURL)
if err != nil {
return nil, err
}
d := NewDownloader(maxDepth)
d.baseURL = baseURL
fmt.Println("Downloading website...")
d.progressBar = progressbar.NewOptions(1, progressbar.OptionSetDescription("Downloading"))
d.crawl(startURL, 0)
return d.dn, nil
}
func (d *Downloader) crawl(pageURL string, depth int) {
if depth > d.maxDepth || d.visited[pageURL] {
return
}
d.visited[pageURL] = true
d.progressBar.Add(1)
resp, err := http.Get(pageURL)
if err != nil {
fmt.Printf("Error getting %s: %v\n", pageURL, err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Printf("Error reading body of %s: %v\n", pageURL, err)
return
}
relPath := d.getRelativePath(pageURL)
d.dn.AddData(relPath, body)
doc, err := html.Parse(strings.NewReader(string(body)))
if err != nil {
fmt.Printf("Error parsing HTML of %s: %v\n", pageURL, err)
return
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode {
for _, a := range n.Attr {
if a.Key == "href" || a.Key == "src" {
link, err := d.resolveURL(pageURL, a.Val)
if err != nil {
continue
}
if d.isLocal(link) {
if isAsset(link) {
d.downloadAsset(link)
} else {
d.crawl(link, depth+1)
}
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
}
func (d *Downloader) downloadAsset(assetURL string) {
if d.visited[assetURL] {
return
}
d.visited[assetURL] = true
d.progressBar.Add(1)
resp, err := http.Get(assetURL)
if err != nil {
fmt.Printf("Error getting asset %s: %v\n", assetURL, err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Printf("Error reading body of asset %s: %v\n", assetURL, err)
return
}
relPath := d.getRelativePath(assetURL)
d.dn.AddData(relPath, body)
}
func (d *Downloader) getRelativePath(pageURL string) string {
u, err := url.Parse(pageURL)
if err != nil {
return ""
}
return strings.TrimPrefix(u.Path, "/")
}
func (d *Downloader) resolveURL(base, ref string) (string, error) {
baseURL, err := url.Parse(base)
if err != nil {
return "", err
}
refURL, err := url.Parse(ref)
if err != nil {
return "", err
}
return baseURL.ResolveReference(refURL).String(), nil
}
func (d *Downloader) isLocal(pageURL string) bool {
u, err := url.Parse(pageURL)
if err != nil {
return false
}
return u.Hostname() == d.baseURL.Hostname()
}
func isAsset(pageURL string) bool {
ext := []string{".css", ".js", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico"}
for _, e := range ext {
if strings.HasSuffix(pageURL, e) {
return true
}
}
return false
}

View file

@ -0,0 +1,82 @@
package website
import (
"net/http"
"net/http/httptest"
"testing"
)
func TestDownloadAndPackageWebsite(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/":
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(`
<!DOCTYPE html>
<html>
<head>
<title>Test Website</title>
<link rel="stylesheet" href="style.css">
</head>
<body>
<h1>Hello, Website!</h1>
<a href="/page2.html">Page 2</a>
<img src="image.png">
</body>
</html>
`))
case "/style.css":
w.Header().Set("Content-Type", "text/css")
w.Write([]byte(`body { color: red; }`))
case "/image.png":
w.Header().Set("Content-Type", "image/png")
w.Write([]byte("fake image data"))
case "/page2.html":
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(`
<!DOCTYPE html>
<html>
<head>
<title>Page 2</title>
</head>
<body>
<h1>Page 2</h1>
<a href="/page3.html">Page 3</a>
</body>
</html>
`))
case "/page3.html":
w.Header().Set("Content-Type", "text/html")
w.Write([]byte(`
<!DOCTYPE html>
<html>
<head>
<title>Page 3</title>
</head>
<body>
<h1>Page 3</h1>
</body>
</html>
`))
default:
http.NotFound(w, r)
}
}))
defer server.Close()
dn, err := DownloadAndPackageWebsite(server.URL, 2)
if err != nil {
t.Fatalf("DownloadAndPackageWebsite failed: %v", err)
}
expectedFiles := []string{"", "style.css", "image.png", "page2.html", "page3.html"}
for _, file := range expectedFiles {
exists, err := dn.Exists(file)
if err != nil {
t.Fatalf("Exists failed for %s: %v", file, err)
}
if !exists {
t.Errorf("Expected to find file %s in DataNode, but it was not found", file)
}
}
}