diff --git a/cmd/all.go b/cmd/all.go index dcb3dac..1924aa9 100644 --- a/cmd/all.go +++ b/cmd/all.go @@ -3,10 +3,11 @@ package cmd import ( "fmt" "os" + "strings" "borg-data-collector/pkg/borg" "borg-data-collector/pkg/github" - "borg-data-collector/pkg/trix" + "borg-data-collector/pkg/vcs" "github.com/spf13/cobra" ) @@ -15,7 +16,7 @@ import ( var allCmd = &cobra.Command{ Use: "all [user/org]", Short: "Collect all public repositories from a user or organization", - Long: `Collect all public repositories from a user or organization and store them in a Trix cube.`, + Long: `Collect all public repositories from a user or organization and store them in a DataNode.`, Args: cobra.ExactArgs(1), Run: func(cmd *cobra.Command, args []string) { fmt.Println(borg.GetRandomAssimilationMessage()) @@ -26,30 +27,30 @@ var allCmd = &cobra.Command{ return } - outputFile, _ := cmd.Flags().GetString("output") - - cube, err := trix.NewCube(outputFile) - if err != nil { - fmt.Println(err) - return - } - defer cube.Close() + outputDir, _ := cmd.Flags().GetString("output") for _, repoURL := range repos { fmt.Printf("Cloning %s...\n", repoURL) - tempPath, err := os.MkdirTemp("", "borg-clone-*") - if err != nil { - fmt.Println(err) - return - } - defer os.RemoveAll(tempPath) - - err = addRepoToCube(repoURL, cube, tempPath) + dn, err := vcs.CloneGitRepository(repoURL) if err != nil { fmt.Printf("Error cloning %s: %s\n", repoURL, err) continue } + + data, err := dn.ToTar() + if err != nil { + fmt.Printf("Error serializing DataNode for %s: %v\n", repoURL, err) + continue + } + + repoName := strings.Split(repoURL, "/")[len(strings.Split(repoURL, "/"))-1] + outputFile := fmt.Sprintf("%s/%s.dat", outputDir, repoName) + err = os.WriteFile(outputFile, data, 0644) + if err != nil { + fmt.Printf("Error writing DataNode for %s to file: %v\n", repoURL, err) + continue + } } fmt.Println(borg.GetRandomCodeLongMessage()) @@ -57,5 +58,6 @@ var allCmd = &cobra.Command{ } func init() { - collectCmd.AddCommand(allCmd) + rootCmd.AddCommand(allCmd) + allCmd.PersistentFlags().String("output", ".", "Output directory for the DataNodes") } diff --git a/cmd/cat.go b/cmd/cat.go deleted file mode 100644 index 9efce67..0000000 --- a/cmd/cat.go +++ /dev/null @@ -1,53 +0,0 @@ -package cmd - -import ( - "fmt" - "io" - "os" - - "borg-data-collector/pkg/trix" - - "github.com/spf13/cobra" -) - -// catCmd represents the cat command -var catCmd = &cobra.Command{ - Use: "cat [cube-file] [file-to-extract]", - Short: "Extract a file from a Trix cube", - Long: `Extract a file from a Trix cube and print its content to standard output.`, - Args: cobra.ExactArgs(2), - Run: func(cmd *cobra.Command, args []string) { - cubeFile := args[0] - fileToExtract := args[1] - - reader, file, err := trix.Extract(cubeFile) - if err != nil { - fmt.Println(err) - return - } - defer file.Close() - - for { - hdr, err := reader.Next() - if err == io.EOF { - break - } - if err != nil { - fmt.Println(err) - return - } - - if hdr.Name == fileToExtract { - if _, err := io.Copy(os.Stdout, reader); err != nil { - fmt.Println(err) - return - } - return - } - } - }, -} - -func init() { - rootCmd.AddCommand(catCmd) -} diff --git a/cmd/collect.go b/cmd/collect.go index 33fcbe9..57960b2 100644 --- a/cmd/collect.go +++ b/cmd/collect.go @@ -1,45 +1,16 @@ package cmd import ( - "fmt" - - "borg-data-collector/pkg/trix" - "github.com/spf13/cobra" ) // collectCmd represents the collect command var collectCmd = &cobra.Command{ - Use: "collect [repository-url]", - Short: "Collect a single repository", - Long: `Collect a single repository and store it in a Trix cube.`, - Args: cobra.ExactArgs(1), - Run: func(cmd *cobra.Command, args []string) { - if len(args) < 1 { - fmt.Println("Please provide a repository URL") - return - } - repoURL := args[0] - clonePath, _ := cmd.Flags().GetString("path") - outputFile, _ := cmd.Flags().GetString("output") - - cube, err := trix.NewCube(outputFile) - if err != nil { - fmt.Println(err) - return - } - defer cube.Close() - - err = addRepoToCube(repoURL, cube, clonePath) - if err != nil { - fmt.Println(err) - return - } - }, + Use: "collect", + Short: "Collect a resource and store it in a DataNode.", + Long: `Collect a resource from a git repository, a website, or other URI and store it in a DataNode.`, } func init() { rootCmd.AddCommand(collectCmd) - collectCmd.PersistentFlags().String("path", "/tmp/borg-clone", "Path to clone the repository") - collectCmd.PersistentFlags().String("output", "borg.cube", "Output file for the Trix cube") } diff --git a/cmd/collect_git.go b/cmd/collect_git.go new file mode 100644 index 0000000..2fcd450 --- /dev/null +++ b/cmd/collect_git.go @@ -0,0 +1,47 @@ +package cmd + +import ( + "fmt" + "os" + + "borg-data-collector/pkg/vcs" + + "github.com/spf13/cobra" +) + +// collectGitCmd represents the collect git command +var collectGitCmd = &cobra.Command{ + Use: "git [repository-url]", + Short: "Collect a single Git repository", + Long: `Collect a single Git repository and store it in a DataNode.`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + repoURL := args[0] + outputFile, _ := cmd.Flags().GetString("output") + + dn, err := vcs.CloneGitRepository(repoURL) + if err != nil { + fmt.Printf("Error cloning repository: %v\n", err) + return + } + + data, err := dn.ToTar() + if err != nil { + fmt.Printf("Error serializing DataNode: %v\n", err) + return + } + + err = os.WriteFile(outputFile, data, 0644) + if err != nil { + fmt.Printf("Error writing DataNode to file: %v\n", err) + return + } + + fmt.Printf("Repository saved to %s\n", outputFile) + }, +} + +func init() { + collectCmd.AddCommand(collectGitCmd) + collectGitCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode") +} diff --git a/cmd/collect_pwa.go b/cmd/collect_pwa.go new file mode 100644 index 0000000..d7b5402 --- /dev/null +++ b/cmd/collect_pwa.go @@ -0,0 +1,56 @@ +package cmd + +import ( + "fmt" + "os" + + "borg-data-collector/pkg/pwa" + + "github.com/spf13/cobra" +) + +// collectPWACmd represents the collect pwa command +var collectPWACmd = &cobra.Command{ + Use: "pwa [url]", + Short: "Collect a single PWA", + Long: `Collect a single PWA and store it in a DataNode.`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + pwaURL := args[0] + outputFile, _ := cmd.Flags().GetString("output") + + fmt.Println("Finding PWA manifest...") + manifestURL, err := pwa.FindManifest(pwaURL) + if err != nil { + fmt.Printf("Error finding manifest: %v\n", err) + return + } + fmt.Printf("Found manifest: %s\n", manifestURL) + + fmt.Println("Downloading and packaging PWA...") + dn, err := pwa.DownloadAndPackagePWA(pwaURL, manifestURL) + if err != nil { + fmt.Printf("Error downloading and packaging PWA: %v\n", err) + return + } + + pwaData, err := dn.ToTar() + if err != nil { + fmt.Printf("Error converting PWA to bytes: %v\n", err) + return + } + + err = os.WriteFile(outputFile, pwaData, 0644) + if err != nil { + fmt.Printf("Error writing PWA to file: %v\n", err) + return + } + + fmt.Printf("PWA saved to %s\n", outputFile) + }, +} + +func init() { + collectCmd.AddCommand(collectPWACmd) + collectPWACmd.PersistentFlags().String("output", "pwa.dat", "Output file for the DataNode") +} diff --git a/cmd/collect_website.go b/cmd/collect_website.go new file mode 100644 index 0000000..90911a2 --- /dev/null +++ b/cmd/collect_website.go @@ -0,0 +1,49 @@ +package cmd + +import ( + "fmt" + "os" + + "borg-data-collector/pkg/website" + + "github.com/spf13/cobra" +) + +// collectWebsiteCmd represents the collect website command +var collectWebsiteCmd = &cobra.Command{ + Use: "website [url]", + Short: "Collect a single website", + Long: `Collect a single website and store it in a DataNode.`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + websiteURL := args[0] + outputFile, _ := cmd.Flags().GetString("output") + depth, _ := cmd.Flags().GetInt("depth") + + dn, err := website.DownloadAndPackageWebsite(websiteURL, depth) + if err != nil { + fmt.Printf("Error downloading and packaging website: %v\n", err) + return + } + + websiteData, err := dn.ToTar() + if err != nil { + fmt.Printf("Error converting website to bytes: %v\n", err) + return + } + + err = os.WriteFile(outputFile, websiteData, 0644) + if err != nil { + fmt.Printf("Error writing website to file: %v\n", err) + return + } + + fmt.Printf("Website saved to %s\n", outputFile) + }, +} + +func init() { + collectCmd.AddCommand(collectWebsiteCmd) + collectWebsiteCmd.PersistentFlags().String("output", "website.dat", "Output file for the DataNode") + collectWebsiteCmd.PersistentFlags().Int("depth", 2, "Recursion depth for downloading") +} diff --git a/cmd/helpers.go b/cmd/helpers.go deleted file mode 100644 index f318023..0000000 --- a/cmd/helpers.go +++ /dev/null @@ -1,41 +0,0 @@ -package cmd - -import ( - "os" - "path/filepath" - - "borg-data-collector/pkg/trix" - - "github.com/go-git/go-git/v5" -) - -func addRepoToCube(repoURL string, cube *trix.Cube, clonePath string) error { - _, err := git.PlainClone(clonePath, false, &git.CloneOptions{ - URL: repoURL, - Progress: os.Stdout, - }) - - if err != nil { - return err - } - - err = filepath.Walk(clonePath, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() { - content, err := os.ReadFile(path) - if err != nil { - return err - } - relPath, err := filepath.Rel(clonePath, path) - if err != nil { - return err - } - cube.AddFile(relPath, content) - } - return nil - }) - - return err -} diff --git a/cmd/ingest.go b/cmd/ingest.go deleted file mode 100644 index 23a8f32..0000000 --- a/cmd/ingest.go +++ /dev/null @@ -1,56 +0,0 @@ -package cmd - -import ( - "fmt" - "os" - - "borg-data-collector/pkg/borg" - "borg-data-collector/pkg/trix" - - "github.com/spf13/cobra" -) - -// ingestCmd represents the ingest command -var ingestCmd = &cobra.Command{ - Use: "ingest [cube-file] [file-to-add]", - Short: "Add a file to a Trix cube", - Long: `Add a file to a Trix cube. If the cube file does not exist, it will be created.`, - Args: cobra.ExactArgs(2), - Run: func(cmd *cobra.Command, args []string) { - cubeFile := args[0] - fileToAdd := args[1] - - var cube *trix.Cube - var err error - - if _, err := os.Stat(cubeFile); os.IsNotExist(err) { - cube, err = trix.NewCube(cubeFile) - } else { - cube, err = trix.AppendToCube(cubeFile) - } - - if err != nil { - fmt.Println(err) - return - } - defer cube.Close() - - content, err := os.ReadFile(fileToAdd) - if err != nil { - fmt.Println(err) - return - } - - err = cube.AddFile(fileToAdd, content) - if err != nil { - fmt.Println(err) - return - } - - fmt.Println(borg.GetRandomCodeShortMessage()) - }, -} - -func init() { - rootCmd.AddCommand(ingestCmd) -} diff --git a/cmd/serve.go b/cmd/serve.go new file mode 100644 index 0000000..57beb87 --- /dev/null +++ b/cmd/serve.go @@ -0,0 +1,49 @@ +package cmd + +import ( + "fmt" + "net/http" + "os" + + "borg-data-collector/pkg/datanode" + + "github.com/spf13/cobra" +) + +// serveCmd represents the serve command +var serveCmd = &cobra.Command{ + Use: "serve [file]", + Short: "Serve a packaged PWA file", + Long: `Serves the contents of a packaged PWA file using a static file server.`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + pwaFile := args[0] + port, _ := cmd.Flags().GetString("port") + + pwaData, err := os.ReadFile(pwaFile) + if err != nil { + fmt.Printf("Error reading PWA file: %v\n", err) + return + } + + dn, err := datanode.FromTar(pwaData) + if err != nil { + fmt.Printf("Error creating DataNode from tarball: %v\n", err) + return + } + + http.Handle("/", http.FileServer(http.FS(dn))) + + fmt.Printf("Serving PWA on http://localhost:%s\n", port) + err = http.ListenAndServe(":"+port, nil) + if err != nil { + fmt.Printf("Error starting server: %v\n", err) + return + } + }, +} + +func init() { + rootCmd.AddCommand(serveCmd) + serveCmd.PersistentFlags().String("port", "8080", "Port to serve the PWA on") +} diff --git a/go.mod b/go.mod index d98196b..5b4dcca 100644 --- a/go.mod +++ b/go.mod @@ -18,13 +18,18 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect + github.com/leaanthony/debme v1.2.1 // indirect + github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/schollz/progressbar/v3 v3.18.0 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/skeema/knownhosts v1.3.1 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect - golang.org/x/crypto v0.37.0 // indirect - golang.org/x/net v0.39.0 // indirect - golang.org/x/sys v0.32.0 // indirect + golang.org/x/crypto v0.43.0 // indirect + golang.org/x/net v0.46.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/term v0.36.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect ) diff --git a/go.sum b/go.sum index ae7851b..8957994 100644 --- a/go.sum +++ b/go.sum @@ -31,11 +31,21 @@ github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/leaanthony/debme v1.2.1 h1:9Tgwf+kjcrbMQ4WnPcEIUcQuIZYqdWftzZkBr+i/oOc= +github.com/leaanthony/debme v1.2.1/go.mod h1:3V+sCm5tYAgQymvSOfYQ5Xx2JCr+OXiD9Jkw3otUjiA= +github.com/leaanthony/slicer v1.5.0/go.mod h1:FwrApmf8gOrpzEWM2J/9Lh79tyq8KTX5AzRtwV7m4AY= +github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/schollz/progressbar/v3 v3.18.0 h1:uXdoHABRFmNIjUfte/Ex7WtuyVslrw2wVPQmCN62HpA= +github.com/schollz/progressbar/v3 v3.18.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= @@ -54,9 +64,13 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -65,7 +79,11 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/pkg/datanode/datanode.go b/pkg/datanode/datanode.go new file mode 100644 index 0000000..fe2f43b --- /dev/null +++ b/pkg/datanode/datanode.go @@ -0,0 +1,317 @@ +package datanode + +import ( + "archive/tar" + "bytes" + "io" + "io/fs" + "os" + "path" + "sort" + "strings" + "time" +) + +// DataNode is an in-memory filesystem that is compatible with fs.FS. +type DataNode struct { + files map[string]*dataFile +} + +// New creates a new, empty DataNode. +func New() *DataNode { + return &DataNode{files: make(map[string]*dataFile)} +} + +// FromTar creates a new DataNode from a tarball. +func FromTar(tarball []byte) (*DataNode, error) { + dn := New() + tarReader := tar.NewReader(bytes.NewReader(tarball)) + + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + if header.Typeflag == tar.TypeReg { + data, err := io.ReadAll(tarReader) + if err != nil { + return nil, err + } + dn.AddData(header.Name, data) + } + } + + return dn, nil +} + +// ToTar serializes the DataNode to a tarball. +func (d *DataNode) ToTar() ([]byte, error) { + buf := new(bytes.Buffer) + tw := tar.NewWriter(buf) + + for _, file := range d.files { + hdr := &tar.Header{ + Name: file.name, + Mode: 0600, + Size: int64(len(file.content)), + ModTime: file.modTime, + } + if err := tw.WriteHeader(hdr); err != nil { + return nil, err + } + if _, err := tw.Write(file.content); err != nil { + return nil, err + } + } + + if err := tw.Close(); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// AddData adds a file to the DataNode. +func (d *DataNode) AddData(name string, content []byte) { + name = strings.TrimPrefix(name, "/") + d.files[name] = &dataFile{ + name: name, + content: content, + modTime: time.Now(), + } +} + +// Open opens a file from the DataNode. +func (d *DataNode) Open(name string) (fs.File, error) { + name = strings.TrimPrefix(name, "/") + if file, ok := d.files[name]; ok { + return &dataFileReader{file: file}, nil + } + // Check if it's a directory + prefix := name + "/" + if name == "." || name == "" { + prefix = "" + } + for p := range d.files { + if strings.HasPrefix(p, prefix) { + return &dirFile{path: name, modTime: time.Now()}, nil + } + } + return nil, fs.ErrNotExist +} + +// ReadDir reads and returns all directory entries for the named directory. +func (d *DataNode) ReadDir(name string) ([]fs.DirEntry, error) { + name = strings.TrimPrefix(name, "/") + if name == "." { + name = "" + } + + entries := []fs.DirEntry{} + seen := make(map[string]bool) + + prefix := "" + if name != "" { + prefix = name + "/" + } + + for p := range d.files { + if !strings.HasPrefix(p, prefix) { + continue + } + + relPath := strings.TrimPrefix(p, prefix) + firstComponent := strings.Split(relPath, "/")[0] + + if seen[firstComponent] { + continue + } + seen[firstComponent] = true + + if strings.Contains(relPath, "/") { + // It's a directory + dir := &dirInfo{name: firstComponent, modTime: time.Now()} + entries = append(entries, fs.FileInfoToDirEntry(dir)) + } else { + // It's a file + file := d.files[p] + info, _ := file.Stat() + entries = append(entries, fs.FileInfoToDirEntry(info)) + } + } + + // Sort for stable order in tests + sort.Slice(entries, func(i, j int) bool { + return entries[i].Name() < entries[j].Name() + }) + + return entries, nil +} + +// Stat returns the FileInfo structure describing file. +func (d *DataNode) Stat(name string) (fs.FileInfo, error) { + name = strings.TrimPrefix(name, "/") + if file, ok := d.files[name]; ok { + return file.Stat() + } + // Check if it's a directory + prefix := name + "/" + if name == "." || name == "" { + prefix = "" + } + for p := range d.files { + if strings.HasPrefix(p, prefix) { + return &dirInfo{name: path.Base(name), modTime: time.Now()}, nil + } + } + + return nil, fs.ErrNotExist +} + +// ExistsOptions allows customizing the Exists check. +type ExistsOptions struct { + WantType fs.FileMode +} + +// Exists returns true if the file or directory exists. +func (d *DataNode) Exists(name string, opts ...ExistsOptions) (bool, error) { + info, err := d.Stat(name) + if err != nil { + if err == fs.ErrNotExist || os.IsNotExist(err) { + return false, nil + } + return false, err + } + if len(opts) > 0 { + if opts[0].WantType == fs.ModeDir && !info.IsDir() { + return false, nil + } + if opts[0].WantType != fs.ModeDir && info.IsDir() { + return false, nil + } + } + return true, nil +} + +// WalkOptions allows customizing the Walk behavior. +type WalkOptions struct { + MaxDepth int + Filter func(path string, d fs.DirEntry) bool + SkipErrors bool +} + +// Walk recursively descends the file tree rooted at root, calling fn for each file or directory. +func (d *DataNode) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error { + var maxDepth int + var filter func(string, fs.DirEntry) bool + var skipErrors bool + if len(opts) > 0 { + maxDepth = opts[0].MaxDepth + filter = opts[0].Filter + skipErrors = opts[0].SkipErrors + } + + return fs.WalkDir(d, root, func(path string, de fs.DirEntry, err error) error { + if err != nil { + if skipErrors { + return nil + } + return fn(path, de, err) + } + if filter != nil && !filter(path, de) { + return nil + } + if maxDepth > 0 { + currentDepth := strings.Count(strings.TrimPrefix(path, root), "/") + if de.IsDir() && currentDepth >= maxDepth { + return fs.SkipDir + } + } + return fn(path, de, nil) + }) +} + +// CopyFile copies a file from the DataNode to the local filesystem. +func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode) error { + sourceFile, err := d.Open(sourcePath) + if err != nil { + return err + } + defer sourceFile.Close() + + targetFile, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, perm) + if err != nil { + return err + } + defer targetFile.Close() + + _, err = io.Copy(targetFile, sourceFile) + return err +} + +// dataFile represents a file in the DataNode. +type dataFile struct { + name string + content []byte + modTime time.Time +} + +func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil } +func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF } +func (d *dataFile) Close() error { return nil } + +// dataFileInfo implements fs.FileInfo for a dataFile. +type dataFileInfo struct{ file *dataFile } + +func (d *dataFileInfo) Name() string { return path.Base(d.file.name) } +func (d *dataFileInfo) Size() int64 { return int64(len(d.file.content)) } +func (d *dataFileInfo) Mode() fs.FileMode { return 0444 } +func (d *dataFileInfo) ModTime() time.Time { return d.file.modTime } +func (d *dataFileInfo) IsDir() bool { return false } +func (d *dataFileInfo) Sys() interface{} { return nil } + +// dataFileReader implements fs.File for a dataFile. +type dataFileReader struct { + file *dataFile + reader *bytes.Reader +} + +func (d *dataFileReader) Stat() (fs.FileInfo, error) { return d.file.Stat() } +func (d *dataFileReader) Read(p []byte) (int, error) { + if d.reader == nil { + d.reader = bytes.NewReader(d.file.content) + } + return d.reader.Read(p) +} +func (d *dataFileReader) Close() error { return nil } + +// dirInfo implements fs.FileInfo for an implicit directory. +type dirInfo struct { + name string + modTime time.Time +} + +func (d *dirInfo) Name() string { return d.name } +func (d *dirInfo) Size() int64 { return 0 } +func (d *dirInfo) Mode() fs.FileMode { return fs.ModeDir | 0555 } +func (d *dirInfo) ModTime() time.Time { return d.modTime } +func (d *dirInfo) IsDir() bool { return true } +func (d *dirInfo) Sys() interface{} { return nil } + +// dirFile implements fs.File for a directory. +type dirFile struct { + path string + modTime time.Time +} + +func (d *dirFile) Stat() (fs.FileInfo, error) { + return &dirInfo{name: path.Base(d.path), modTime: d.modTime}, nil +} +func (d *dirFile) Read([]byte) (int, error) { + return 0, &fs.PathError{Op: "read", Path: d.path, Err: fs.ErrInvalid} +} +func (d *dirFile) Close() error { return nil } diff --git a/pkg/datanode/datanode_test.go b/pkg/datanode/datanode_test.go new file mode 100644 index 0000000..847d20b --- /dev/null +++ b/pkg/datanode/datanode_test.go @@ -0,0 +1,124 @@ +package datanode + +import ( + "io/fs" + "os" + "reflect" + "sort" + "testing" +) + +func TestDataNode(t *testing.T) { + dn := New() + dn.AddData("foo.txt", []byte("foo")) + dn.AddData("bar/baz.txt", []byte("baz")) + dn.AddData("bar/qux.txt", []byte("qux")) + + // Test Open + file, err := dn.Open("foo.txt") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + file.Close() + + _, err = dn.Open("nonexistent.txt") + if err == nil { + t.Fatalf("Expected error opening nonexistent file, got nil") + } + + // Test Stat + info, err := dn.Stat("bar/baz.txt") + if err != nil { + t.Fatalf("Stat failed: %v", err) + } + if info.Name() != "baz.txt" { + t.Errorf("Expected name baz.txt, got %s", info.Name()) + } + if info.Size() != 3 { + t.Errorf("Expected size 3, got %d", info.Size()) + } + if info.IsDir() { + t.Errorf("Expected baz.txt to not be a directory") + } + + dirInfo, err := dn.Stat("bar") + if err != nil { + t.Fatalf("Stat directory failed: %v", err) + } + if !dirInfo.IsDir() { + t.Errorf("Expected 'bar' to be a directory") + } + + // Test Exists + exists, err := dn.Exists("foo.txt") + if err != nil || !exists { + t.Errorf("Expected foo.txt to exist, err: %v", err) + } + exists, err = dn.Exists("bar") + if err != nil || !exists { + t.Errorf("Expected 'bar' directory to exist, err: %v", err) + } + exists, err = dn.Exists("nonexistent") + if err != nil || exists { + t.Errorf("Expected 'nonexistent' to not exist, err: %v", err) + } + + // Test ReadDir + entries, err := dn.ReadDir(".") + if err != nil { + t.Fatalf("ReadDir failed: %v", err) + } + expectedRootEntries := []string{"bar", "foo.txt"} + if len(entries) != len(expectedRootEntries) { + t.Errorf("Expected %d entries in root, got %d", len(expectedRootEntries), len(entries)) + } + var rootEntryNames []string + for _, e := range entries { + rootEntryNames = append(rootEntryNames, e.Name()) + } + sort.Strings(rootEntryNames) + if !reflect.DeepEqual(rootEntryNames, expectedRootEntries) { + t.Errorf("Expected entries %v, got %v", expectedRootEntries, rootEntryNames) + } + + barEntries, err := dn.ReadDir("bar") + if err != nil { + t.Fatalf("ReadDir('bar') failed: %v", err) + } + expectedBarEntries := []string{"baz.txt", "qux.txt"} + if len(barEntries) != len(expectedBarEntries) { + t.Errorf("Expected %d entries in 'bar', got %d", len(expectedBarEntries), len(barEntries)) + } + + // Test Walk + var paths []string + dn.Walk(".", func(path string, d fs.DirEntry, err error) error { + paths = append(paths, path) + return nil + }) + expectedPaths := []string{".", "bar", "bar/baz.txt", "bar/qux.txt", "foo.txt"} + sort.Strings(paths) + if !reflect.DeepEqual(paths, expectedPaths) { + t.Errorf("Walk expected paths %v, got %v", expectedPaths, paths) + } + + // Test CopyFile + tmpfile, err := os.CreateTemp("", "datanode-test-") + if err != nil { + t.Fatalf("CreateTemp failed: %v", err) + } + defer os.Remove(tmpfile.Name()) + + err = dn.CopyFile("foo.txt", tmpfile.Name(), 0644) + if err != nil { + t.Fatalf("CopyFile failed: %v", err) + } + + content, err := os.ReadFile(tmpfile.Name()) + if err != nil { + t.Fatalf("ReadFile failed: %v", err) + } + if string(content) != "foo" { + t.Errorf("Expected foo, got %s", string(content)) + } +} diff --git a/pkg/pwa/pwa.go b/pkg/pwa/pwa.go new file mode 100644 index 0000000..1679fa9 --- /dev/null +++ b/pkg/pwa/pwa.go @@ -0,0 +1,169 @@ +package pwa + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "path" + + "borg-data-collector/pkg/datanode" + + "golang.org/x/net/html" +) + +// Manifest represents a simple PWA manifest structure. +type Manifest struct { + Name string `json:"name"` + ShortName string `json:"short_name"` + StartURL string `json:"start_url"` + Icons []Icon `json:"icons"` +} + +// Icon represents an icon in the PWA manifest. +type Icon struct { + Src string `json:"src"` + Sizes string `json:"sizes"` + Type string `json:"type"` +} + +// FindManifest finds the manifest URL from a given HTML page. +func FindManifest(pageURL string) (string, error) { + resp, err := http.Get(pageURL) + if err != nil { + return "", err + } + defer resp.Body.Close() + + doc, err := html.Parse(resp.Body) + if err != nil { + return "", err + } + + var manifestPath string + var f func(*html.Node) + f = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "link" { + isManifest := false + for _, a := range n.Attr { + if a.Key == "rel" && a.Val == "manifest" { + isManifest = true + break + } + } + if isManifest { + for _, a := range n.Attr { + if a.Key == "href" { + manifestPath = a.Val + return // exit once found + } + } + } + } + for c := n.FirstChild; c != nil && manifestPath == ""; c = c.NextSibling { + f(c) + } + } + f(doc) + + if manifestPath == "" { + return "", fmt.Errorf("manifest not found") + } + + resolvedURL, err := resolveURL(pageURL, manifestPath) + if err != nil { + return "", fmt.Errorf("could not resolve manifest URL: %w", err) + } + + return resolvedURL.String(), nil +} + +// DownloadAndPackagePWA downloads all assets of a PWA and packages them into a DataNode. +func DownloadAndPackagePWA(baseURL string, manifestURL string) (*datanode.DataNode, error) { + manifestAbsURL, err := resolveURL(baseURL, manifestURL) + if err != nil { + return nil, fmt.Errorf("could not resolve manifest URL: %w", err) + } + + resp, err := http.Get(manifestAbsURL.String()) + if err != nil { + return nil, fmt.Errorf("could not download manifest: %w", err) + } + defer resp.Body.Close() + + manifestBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("could not read manifest body: %w", err) + } + + var manifest Manifest + if err := json.Unmarshal(manifestBody, &manifest); err != nil { + return nil, fmt.Errorf("could not parse manifest JSON: %w", err) + } + + dn := datanode.New() + dn.AddData("manifest.json", manifestBody) + + if manifest.StartURL != "" { + startURLAbs, err := resolveURL(manifestAbsURL.String(), manifest.StartURL) + if err != nil { + return nil, fmt.Errorf("could not resolve start_url: %w", err) + } + err = downloadAndAddFile(dn, startURLAbs, manifest.StartURL) + if err != nil { + return nil, fmt.Errorf("failed to download start_url asset: %w", err) + } + } + + for _, icon := range manifest.Icons { + iconURLAbs, err := resolveURL(manifestAbsURL.String(), icon.Src) + if err != nil { + fmt.Printf("Warning: could not resolve icon URL %s: %v\n", icon.Src, err) + continue + } + err = downloadAndAddFile(dn, iconURLAbs, icon.Src) + if err != nil { + fmt.Printf("Warning: failed to download icon %s: %v\n", icon.Src, err) + } + } + + baseURLAbs, _ := url.Parse(baseURL) + err = downloadAndAddFile(dn, baseURLAbs, "index.html") + if err != nil { + return nil, fmt.Errorf("failed to download base HTML: %w", err) + } + + return dn, nil +} + +func resolveURL(base, ref string) (*url.URL, error) { + baseURL, err := url.Parse(base) + if err != nil { + return nil, err + } + refURL, err := url.Parse(ref) + if err != nil { + return nil, err + } + return baseURL.ResolveReference(refURL), nil +} + +func downloadAndAddFile(dn *datanode.DataNode, fileURL *url.URL, internalPath string) error { + resp, err := http.Get(fileURL.String()) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("bad status: %s", resp.Status) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + dn.AddData(path.Clean(internalPath), data) + return nil +} diff --git a/pkg/pwa/pwa_test.go b/pkg/pwa/pwa_test.go new file mode 100644 index 0000000..f90fdb3 --- /dev/null +++ b/pkg/pwa/pwa_test.go @@ -0,0 +1,122 @@ +package pwa + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestFindManifest(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` + + +
+
+
+
+ `))
+ case "/style.css":
+ w.Header().Set("Content-Type", "text/css")
+ w.Write([]byte(`body { color: red; }`))
+ case "/image.png":
+ w.Header().Set("Content-Type", "image/png")
+ w.Write([]byte("fake image data"))
+ case "/page2.html":
+ w.Header().Set("Content-Type", "text/html")
+ w.Write([]byte(`
+
+
+
+