diff --git a/cmd/pwa.go b/cmd/pwa.go new file mode 100644 index 0000000..d3deb09 --- /dev/null +++ b/cmd/pwa.go @@ -0,0 +1,50 @@ +package cmd + +import ( + "fmt" + "os" + + "borg-data-collector/pkg/pwa" + + "github.com/spf13/cobra" +) + +// pwaCmd represents the pwa command +var pwaCmd = &cobra.Command{ + Use: "pwa [url]", + Short: "Download a PWA from a URL", + Long: `Downloads a Progressive Web Application (PWA) from a given URL by finding its manifest.`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + pwaURL := args[0] + outputFile, _ := cmd.Flags().GetString("output") + + fmt.Println("Finding PWA manifest...") + manifestURL, err := pwa.FindManifestURL(pwaURL) + if err != nil { + fmt.Printf("Error finding manifest: %v\n", err) + return + } + fmt.Printf("Found manifest: %s\n", manifestURL) + + fmt.Println("Downloading and packaging PWA...") + pwaData, err := pwa.DownloadAndPackagePWA(pwaURL, manifestURL) + if err != nil { + fmt.Printf("Error downloading and packaging PWA: %v\n", err) + return + } + + err = os.WriteFile(outputFile, pwaData, 0644) + if err != nil { + fmt.Printf("Error writing PWA to file: %v\n", err) + return + } + + fmt.Printf("PWA saved to %s\n", outputFile) + }, +} + +func init() { + rootCmd.AddCommand(pwaCmd) + pwaCmd.PersistentFlags().String("output", "pwa.tar", "Output file for the PWA tarball") +} diff --git a/cmd/serve.go b/cmd/serve.go new file mode 100644 index 0000000..b780df7 --- /dev/null +++ b/cmd/serve.go @@ -0,0 +1,169 @@ +package cmd + +import ( + "archive/tar" + "bytes" + "fmt" + "io" + "io/fs" + "net/http" + "os" + "path" + "strings" + "time" + + "github.com/spf13/cobra" +) + +// serveCmd represents the serve command +var serveCmd = &cobra.Command{ + Use: "serve [file]", + Short: "Serve a packaged PWA file", + Long: `Serves the contents of a packaged PWA file using a static file server.`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + pwaFile := args[0] + port, _ := cmd.Flags().GetString("port") + + pwaData, err := os.ReadFile(pwaFile) + if err != nil { + fmt.Printf("Error reading PWA file: %v\n", err) + return + } + + memFS, err := newMemoryFS(pwaData) + if err != nil { + fmt.Printf("Error creating in-memory filesystem: %v\n", err) + return + } + + http.Handle("/", http.FileServer(http.FS(memFS))) + + fmt.Printf("Serving PWA on http://localhost:%s\n", port) + err = http.ListenAndServe(":"+port, nil) + if err != nil { + fmt.Printf("Error starting server: %v\n", err) + return + } + }, +} + +// memoryFS is an in-memory filesystem that implements fs.FS +type memoryFS struct { + files map[string]*memoryFile +} + +func newMemoryFS(tarball []byte) (*memoryFS, error) { + memFS := &memoryFS{files: make(map[string]*memoryFile)} + tarReader := tar.NewReader(bytes.NewReader(tarball)) + + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + if header.Typeflag == tar.TypeReg { + data, err := io.ReadAll(tarReader) + if err != nil { + return nil, err + } + name := strings.TrimPrefix(header.Name, "/") + memFS.files[name] = &memoryFile{ + name: name, + content: data, + modTime: header.ModTime, + } + } + } + + return memFS, nil +} + +func (m *memoryFS) Open(name string) (fs.File, error) { + name = strings.TrimPrefix(name, "/") + if name == "" { + name = "index.html" + } + if file, ok := m.files[name]; ok { + return &memoryFileReader{file: file}, nil + } + return nil, fs.ErrNotExist +} + +// memoryFile represents a file in the in-memory filesystem +type memoryFile struct { + name string + content []byte + modTime time.Time +} + +func (m *memoryFile) Stat() (fs.FileInfo, error) { + return &memoryFileInfo{file: m}, nil +} + +func (m *memoryFile) Read(p []byte) (int, error) { + return 0, nil // This is implemented by memoryFileReader +} + +func (m *memoryFile) Close() error { + return nil +} + +// memoryFileInfo implements fs.FileInfo for a memoryFile +type memoryFileInfo struct { + file *memoryFile +} + +func (m *memoryFileInfo) Name() string { + return path.Base(m.file.name) +} + +func (m *memoryFileInfo) Size() int64 { + return int64(len(m.file.content)) +} + +func (m *memoryFileInfo) Mode() fs.FileMode { + return 0444 +} + +func (m *memoryFileInfo) ModTime() time.Time { + return m.file.modTime +} + +func (m *memoryFileInfo) IsDir() bool { + return false +} + +func (m *memoryFileInfo) Sys() interface{} { + return nil +} + +// memoryFileReader implements fs.File for a memoryFile +type memoryFileReader struct { + file *memoryFile + reader *bytes.Reader +} + +func (m *memoryFileReader) Stat() (fs.FileInfo, error) { + return m.file.Stat() +} + +func (m *memoryFileReader) Read(p []byte) (int, error) { + if m.reader == nil { + m.reader = bytes.NewReader(m.file.content) + } + return m.reader.Read(p) +} + +func (m *memoryFileReader) Close() error { + return nil +} + +func init() { + rootCmd.AddCommand(serveCmd) + serveCmd.PersistentFlags().String("port", "8080", "Port to serve the PWA on") +} diff --git a/go.mod b/go.mod index d98196b..78af759 100644 --- a/go.mod +++ b/go.mod @@ -18,13 +18,14 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect + github.com/leaanthony/debme v1.2.1 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/skeema/knownhosts v1.3.1 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect - golang.org/x/crypto v0.37.0 // indirect - golang.org/x/net v0.39.0 // indirect - golang.org/x/sys v0.32.0 // indirect + golang.org/x/crypto v0.43.0 // indirect + golang.org/x/net v0.46.0 // indirect + golang.org/x/sys v0.37.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect ) diff --git a/go.sum b/go.sum index ae7851b..1005854 100644 --- a/go.sum +++ b/go.sum @@ -31,6 +31,10 @@ github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/leaanthony/debme v1.2.1 h1:9Tgwf+kjcrbMQ4WnPcEIUcQuIZYqdWftzZkBr+i/oOc= +github.com/leaanthony/debme v1.2.1/go.mod h1:3V+sCm5tYAgQymvSOfYQ5Xx2JCr+OXiD9Jkw3otUjiA= +github.com/leaanthony/slicer v1.5.0/go.mod h1:FwrApmf8gOrpzEWM2J/9Lh79tyq8KTX5AzRtwV7m4AY= +github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU= github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -54,9 +58,13 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -65,6 +73,8 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/pkg/pwa/pwa.go b/pkg/pwa/pwa.go new file mode 100644 index 0000000..6ee3465 --- /dev/null +++ b/pkg/pwa/pwa.go @@ -0,0 +1,202 @@ +package pwa + +import ( + "archive/tar" + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "path" + + "golang.org/x/net/html" +) + +// Manifest represents a simple PWA manifest structure. +type Manifest struct { + Name string `json:"name"` + ShortName string `json:"short_name"` + StartURL string `json:"start_url"` + Icons []Icon `json:"icons"` +} + +// Icon represents an icon in the PWA manifest. +type Icon struct { + Src string `json:"src"` + Sizes string `json:"sizes"` + Type string `json:"type"` +} + +// FindManifestURL finds the manifest URL from a given HTML page. +func FindManifestURL(pageURL string) (string, error) { + resp, err := http.Get(pageURL) + if err != nil { + return "", err + } + defer resp.Body.Close() + + doc, err := html.Parse(resp.Body) + if err != nil { + return "", err + } + + var manifestPath string + var f func(*html.Node) + f = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "link" { + isManifest := false + for _, a := range n.Attr { + if a.Key == "rel" && a.Val == "manifest" { + isManifest = true + break + } + } + if isManifest { + for _, a := range n.Attr { + if a.Key == "href" { + manifestPath = a.Val + return // exit once found + } + } + } + } + for c := n.FirstChild; c != nil && manifestPath == ""; c = c.NextSibling { + f(c) + } + } + f(doc) + + if manifestPath == "" { + return "", fmt.Errorf("manifest not found") + } + + resolvedURL, err := resolveURL(pageURL, manifestPath) + if err != nil { + return "", fmt.Errorf("could not resolve manifest URL: %w", err) + } + + return resolvedURL.String(), nil +} + +// DownloadAndPackagePWA downloads all assets of a PWA and packages them into a tarball. +func DownloadAndPackagePWA(baseURL string, manifestURL string) ([]byte, error) { + manifestAbsURL, err := resolveURL(baseURL, manifestURL) + if err != nil { + return nil, fmt.Errorf("could not resolve manifest URL: %w", err) + } + + resp, err := http.Get(manifestAbsURL.String()) + if err != nil { + return nil, fmt.Errorf("could not download manifest: %w", err) + } + defer resp.Body.Close() + + manifestBody, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("could not read manifest body: %w", err) + } + + var manifest Manifest + if err := json.Unmarshal(manifestBody, &manifest); err != nil { + return nil, fmt.Errorf("could not parse manifest JSON: %w", err) + } + + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + tw := tar.NewWriter(buf) + + // Add the manifest to the archive + hdr := &tar.Header{ + Name: "manifest.json", + Mode: 0600, + Size: int64(len(manifestBody)), + } + if err := tw.WriteHeader(hdr); err != nil { + return nil, err + } + if _, err := tw.Write(manifestBody); err != nil { + return nil, err + } + + // Add the start_url to the archive + if manifest.StartURL != "" { + startURLAbs, err := resolveURL(manifestAbsURL.String(), manifest.StartURL) + if err != nil { + return nil, fmt.Errorf("could not resolve start_url: %w", err) + } + err = downloadAndAddFileToTar(tw, startURLAbs, manifest.StartURL) + if err != nil { + return nil, fmt.Errorf("failed to download start_url asset: %w", err) + } + } + + // Add the icons to the archive + for _, icon := range manifest.Icons { + iconURLAbs, err := resolveURL(manifestAbsURL.String(), icon.Src) + if err != nil { + fmt.Printf("Warning: could not resolve icon URL %s: %v\n", icon.Src, err) + continue + } + err = downloadAndAddFileToTar(tw, iconURLAbs, icon.Src) + if err != nil { + fmt.Printf("Warning: failed to download icon %s: %v\n", icon.Src, err) + } + } + + // Add the base HTML to the archive + baseURLAbs, _ := url.Parse(baseURL) + err = downloadAndAddFileToTar(tw, baseURLAbs, "index.html") + if err != nil { + return nil, fmt.Errorf("failed to download base HTML: %w", err) + } + + if err := tw.Close(); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func resolveURL(base, ref string) (*url.URL, error) { + baseURL, err := url.Parse(base) + if err != nil { + return nil, err + } + refURL, err := url.Parse(ref) + if err != nil { + return nil, err + } + return baseURL.ResolveReference(refURL), nil +} + +func downloadAndAddFileToTar(tw *tar.Writer, fileURL *url.URL, internalPath string) error { + resp, err := http.Get(fileURL.String()) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("bad status: %s", resp.Status) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + hdr := &tar.Header{ + Name: path.Clean(internalPath), + Mode: 0600, + Size: int64(len(data)), + } + if err := tw.WriteHeader(hdr); err != nil { + return err + } + if _, err := tw.Write(data); err != nil { + return err + } + + return nil +} diff --git a/pkg/pwa/pwa_test.go b/pkg/pwa/pwa_test.go new file mode 100644 index 0000000..ec615a2 --- /dev/null +++ b/pkg/pwa/pwa_test.go @@ -0,0 +1,131 @@ +package pwa + +import ( + "archive/tar" + "bytes" + "net/http" + "net/http/httptest" + "testing" +) + +func TestFindManifestURL(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` + + + + Test PWA + + + +

Hello, PWA!

+ + + `)) + })) + defer server.Close() + + expectedURL := server.URL + "/manifest.json" + actualURL, err := FindManifestURL(server.URL) + if err != nil { + t.Fatalf("FindManifestURL failed: %v", err) + } + + if actualURL != expectedURL { + t.Errorf("Expected manifest URL %s, but got %s", expectedURL, actualURL) + } +} + +func TestDownloadAndPackagePWA(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/": + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` + + + + Test PWA + + + +

Hello, PWA!

+ + + `)) + case "/manifest.json": + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(`{ + "name": "Test PWA", + "short_name": "TestPWA", + "start_url": "index.html", + "icons": [ + { + "src": "icon.png", + "sizes": "192x192", + "type": "image/png" + } + ] + }`)) + case "/index.html": + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(`

Hello, PWA!

`)) + case "/icon.png": + w.Header().Set("Content-Type", "image/png") + w.Write([]byte("fake image data")) + default: + http.NotFound(w, r) + } + })) + defer server.Close() + + tarball, err := DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json") + if err != nil { + t.Fatalf("DownloadAndPackagePWA failed: %v", err) + } + + tarReader := tar.NewReader(bytes.NewReader(tarball)) + expectedFiles := []string{"manifest.json", "index.html", "icon.png"} + foundFiles := make(map[string]bool) + + for { + header, err := tarReader.Next() + if err != nil { + break + } + foundFiles[header.Name] = true + } + + for _, file := range expectedFiles { + if !foundFiles[file] { + t.Errorf("Expected to find file %s in tarball, but it was not found", file) + } + } +} + +func TestResolveURL(t *testing.T) { + tests := []struct { + base string + ref string + want string + }{ + {"http://example.com/", "foo.html", "http://example.com/foo.html"}, + {"http://example.com/foo/", "bar.html", "http://example.com/foo/bar.html"}, + {"http://example.com/foo", "bar.html", "http://example.com/bar.html"}, + {"http://example.com/foo/", "/bar.html", "http://example.com/bar.html"}, + {"http://example.com/foo", "/bar.html", "http://example.com/bar.html"}, + {"http://example.com/", "http://example.com/foo/bar.html", "http://example.com/foo/bar.html"}, + } + + for _, tt := range tests { + got, err := resolveURL(tt.base, tt.ref) + if err != nil { + t.Errorf("resolveURL(%q, %q) returned error: %v", tt.base, tt.ref, err) + continue + } + if got.String() != tt.want { + t.Errorf("resolveURL(%q, %q) = %q, want %q", tt.base, tt.ref, got.String(), tt.want) + } + } +}