From 5149b6440334c4c30e8c98fbbaf7109df08db2f2 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 20:47:11 +0000 Subject: [PATCH] feat: Implement DataNode and update PWA commands This commit introduces a new `DataNode` package, which provides an in-memory, `fs.FS`-compatible filesystem with a `debme`-like interface. The `DataNode` can be serialized to and from a TAR archive, making it suitable for storing downloaded assets. The `pwa` and `serve` commands have been refactored to use the `DataNode`. The `pwa` command now packages downloaded PWA assets into a `DataNode` and saves it as a `.dat` file. The `serve` command loads a `.dat` file into a `DataNode` and serves its contents. --- cmd/pwa.go | 10 +- cmd/serve.go | 130 +------------- pkg/datanode/datanode.go | 317 ++++++++++++++++++++++++++++++++++ pkg/datanode/datanode_test.go | 124 +++++++++++++ pkg/pwa/pwa.go | 57 ++---- pkg/pwa/pwa_test.go | 23 +-- 6 files changed, 473 insertions(+), 188 deletions(-) create mode 100644 pkg/datanode/datanode.go create mode 100644 pkg/datanode/datanode_test.go diff --git a/cmd/pwa.go b/cmd/pwa.go index d3deb09..e288fcb 100644 --- a/cmd/pwa.go +++ b/cmd/pwa.go @@ -28,12 +28,18 @@ var pwaCmd = &cobra.Command{ fmt.Printf("Found manifest: %s\n", manifestURL) fmt.Println("Downloading and packaging PWA...") - pwaData, err := pwa.DownloadAndPackagePWA(pwaURL, manifestURL) + dn, err := pwa.DownloadAndPackagePWA(pwaURL, manifestURL) if err != nil { fmt.Printf("Error downloading and packaging PWA: %v\n", err) return } + pwaData, err := dn.ToTar() + if err != nil { + fmt.Printf("Error serializing PWA data: %v\n", err) + return + } + err = os.WriteFile(outputFile, pwaData, 0644) if err != nil { fmt.Printf("Error writing PWA to file: %v\n", err) @@ -46,5 +52,5 @@ var pwaCmd = &cobra.Command{ func init() { rootCmd.AddCommand(pwaCmd) - pwaCmd.PersistentFlags().String("output", "pwa.tar", "Output file for the PWA tarball") + pwaCmd.PersistentFlags().String("output", "pwa.dat", "Output file for the PWA DataNode") } diff --git a/cmd/serve.go b/cmd/serve.go index b780df7..57beb87 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -1,16 +1,11 @@ package cmd import ( - "archive/tar" - "bytes" "fmt" - "io" - "io/fs" "net/http" "os" - "path" - "strings" - "time" + + "borg-data-collector/pkg/datanode" "github.com/spf13/cobra" ) @@ -31,13 +26,13 @@ var serveCmd = &cobra.Command{ return } - memFS, err := newMemoryFS(pwaData) + dn, err := datanode.FromTar(pwaData) if err != nil { - fmt.Printf("Error creating in-memory filesystem: %v\n", err) + fmt.Printf("Error creating DataNode from tarball: %v\n", err) return } - http.Handle("/", http.FileServer(http.FS(memFS))) + http.Handle("/", http.FileServer(http.FS(dn))) fmt.Printf("Serving PWA on http://localhost:%s\n", port) err = http.ListenAndServe(":"+port, nil) @@ -48,121 +43,6 @@ var serveCmd = &cobra.Command{ }, } -// memoryFS is an in-memory filesystem that implements fs.FS -type memoryFS struct { - files map[string]*memoryFile -} - -func newMemoryFS(tarball []byte) (*memoryFS, error) { - memFS := &memoryFS{files: make(map[string]*memoryFile)} - tarReader := tar.NewReader(bytes.NewReader(tarball)) - - for { - header, err := tarReader.Next() - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - - if header.Typeflag == tar.TypeReg { - data, err := io.ReadAll(tarReader) - if err != nil { - return nil, err - } - name := strings.TrimPrefix(header.Name, "/") - memFS.files[name] = &memoryFile{ - name: name, - content: data, - modTime: header.ModTime, - } - } - } - - return memFS, nil -} - -func (m *memoryFS) Open(name string) (fs.File, error) { - name = strings.TrimPrefix(name, "/") - if name == "" { - name = "index.html" - } - if file, ok := m.files[name]; ok { - return &memoryFileReader{file: file}, nil - } - return nil, fs.ErrNotExist -} - -// memoryFile represents a file in the in-memory filesystem -type memoryFile struct { - name string - content []byte - modTime time.Time -} - -func (m *memoryFile) Stat() (fs.FileInfo, error) { - return &memoryFileInfo{file: m}, nil -} - -func (m *memoryFile) Read(p []byte) (int, error) { - return 0, nil // This is implemented by memoryFileReader -} - -func (m *memoryFile) Close() error { - return nil -} - -// memoryFileInfo implements fs.FileInfo for a memoryFile -type memoryFileInfo struct { - file *memoryFile -} - -func (m *memoryFileInfo) Name() string { - return path.Base(m.file.name) -} - -func (m *memoryFileInfo) Size() int64 { - return int64(len(m.file.content)) -} - -func (m *memoryFileInfo) Mode() fs.FileMode { - return 0444 -} - -func (m *memoryFileInfo) ModTime() time.Time { - return m.file.modTime -} - -func (m *memoryFileInfo) IsDir() bool { - return false -} - -func (m *memoryFileInfo) Sys() interface{} { - return nil -} - -// memoryFileReader implements fs.File for a memoryFile -type memoryFileReader struct { - file *memoryFile - reader *bytes.Reader -} - -func (m *memoryFileReader) Stat() (fs.FileInfo, error) { - return m.file.Stat() -} - -func (m *memoryFileReader) Read(p []byte) (int, error) { - if m.reader == nil { - m.reader = bytes.NewReader(m.file.content) - } - return m.reader.Read(p) -} - -func (m *memoryFileReader) Close() error { - return nil -} - func init() { rootCmd.AddCommand(serveCmd) serveCmd.PersistentFlags().String("port", "8080", "Port to serve the PWA on") diff --git a/pkg/datanode/datanode.go b/pkg/datanode/datanode.go new file mode 100644 index 0000000..fe2f43b --- /dev/null +++ b/pkg/datanode/datanode.go @@ -0,0 +1,317 @@ +package datanode + +import ( + "archive/tar" + "bytes" + "io" + "io/fs" + "os" + "path" + "sort" + "strings" + "time" +) + +// DataNode is an in-memory filesystem that is compatible with fs.FS. +type DataNode struct { + files map[string]*dataFile +} + +// New creates a new, empty DataNode. +func New() *DataNode { + return &DataNode{files: make(map[string]*dataFile)} +} + +// FromTar creates a new DataNode from a tarball. +func FromTar(tarball []byte) (*DataNode, error) { + dn := New() + tarReader := tar.NewReader(bytes.NewReader(tarball)) + + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + if header.Typeflag == tar.TypeReg { + data, err := io.ReadAll(tarReader) + if err != nil { + return nil, err + } + dn.AddData(header.Name, data) + } + } + + return dn, nil +} + +// ToTar serializes the DataNode to a tarball. +func (d *DataNode) ToTar() ([]byte, error) { + buf := new(bytes.Buffer) + tw := tar.NewWriter(buf) + + for _, file := range d.files { + hdr := &tar.Header{ + Name: file.name, + Mode: 0600, + Size: int64(len(file.content)), + ModTime: file.modTime, + } + if err := tw.WriteHeader(hdr); err != nil { + return nil, err + } + if _, err := tw.Write(file.content); err != nil { + return nil, err + } + } + + if err := tw.Close(); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// AddData adds a file to the DataNode. +func (d *DataNode) AddData(name string, content []byte) { + name = strings.TrimPrefix(name, "/") + d.files[name] = &dataFile{ + name: name, + content: content, + modTime: time.Now(), + } +} + +// Open opens a file from the DataNode. +func (d *DataNode) Open(name string) (fs.File, error) { + name = strings.TrimPrefix(name, "/") + if file, ok := d.files[name]; ok { + return &dataFileReader{file: file}, nil + } + // Check if it's a directory + prefix := name + "/" + if name == "." || name == "" { + prefix = "" + } + for p := range d.files { + if strings.HasPrefix(p, prefix) { + return &dirFile{path: name, modTime: time.Now()}, nil + } + } + return nil, fs.ErrNotExist +} + +// ReadDir reads and returns all directory entries for the named directory. +func (d *DataNode) ReadDir(name string) ([]fs.DirEntry, error) { + name = strings.TrimPrefix(name, "/") + if name == "." { + name = "" + } + + entries := []fs.DirEntry{} + seen := make(map[string]bool) + + prefix := "" + if name != "" { + prefix = name + "/" + } + + for p := range d.files { + if !strings.HasPrefix(p, prefix) { + continue + } + + relPath := strings.TrimPrefix(p, prefix) + firstComponent := strings.Split(relPath, "/")[0] + + if seen[firstComponent] { + continue + } + seen[firstComponent] = true + + if strings.Contains(relPath, "/") { + // It's a directory + dir := &dirInfo{name: firstComponent, modTime: time.Now()} + entries = append(entries, fs.FileInfoToDirEntry(dir)) + } else { + // It's a file + file := d.files[p] + info, _ := file.Stat() + entries = append(entries, fs.FileInfoToDirEntry(info)) + } + } + + // Sort for stable order in tests + sort.Slice(entries, func(i, j int) bool { + return entries[i].Name() < entries[j].Name() + }) + + return entries, nil +} + +// Stat returns the FileInfo structure describing file. +func (d *DataNode) Stat(name string) (fs.FileInfo, error) { + name = strings.TrimPrefix(name, "/") + if file, ok := d.files[name]; ok { + return file.Stat() + } + // Check if it's a directory + prefix := name + "/" + if name == "." || name == "" { + prefix = "" + } + for p := range d.files { + if strings.HasPrefix(p, prefix) { + return &dirInfo{name: path.Base(name), modTime: time.Now()}, nil + } + } + + return nil, fs.ErrNotExist +} + +// ExistsOptions allows customizing the Exists check. +type ExistsOptions struct { + WantType fs.FileMode +} + +// Exists returns true if the file or directory exists. +func (d *DataNode) Exists(name string, opts ...ExistsOptions) (bool, error) { + info, err := d.Stat(name) + if err != nil { + if err == fs.ErrNotExist || os.IsNotExist(err) { + return false, nil + } + return false, err + } + if len(opts) > 0 { + if opts[0].WantType == fs.ModeDir && !info.IsDir() { + return false, nil + } + if opts[0].WantType != fs.ModeDir && info.IsDir() { + return false, nil + } + } + return true, nil +} + +// WalkOptions allows customizing the Walk behavior. +type WalkOptions struct { + MaxDepth int + Filter func(path string, d fs.DirEntry) bool + SkipErrors bool +} + +// Walk recursively descends the file tree rooted at root, calling fn for each file or directory. +func (d *DataNode) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error { + var maxDepth int + var filter func(string, fs.DirEntry) bool + var skipErrors bool + if len(opts) > 0 { + maxDepth = opts[0].MaxDepth + filter = opts[0].Filter + skipErrors = opts[0].SkipErrors + } + + return fs.WalkDir(d, root, func(path string, de fs.DirEntry, err error) error { + if err != nil { + if skipErrors { + return nil + } + return fn(path, de, err) + } + if filter != nil && !filter(path, de) { + return nil + } + if maxDepth > 0 { + currentDepth := strings.Count(strings.TrimPrefix(path, root), "/") + if de.IsDir() && currentDepth >= maxDepth { + return fs.SkipDir + } + } + return fn(path, de, nil) + }) +} + +// CopyFile copies a file from the DataNode to the local filesystem. +func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode) error { + sourceFile, err := d.Open(sourcePath) + if err != nil { + return err + } + defer sourceFile.Close() + + targetFile, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, perm) + if err != nil { + return err + } + defer targetFile.Close() + + _, err = io.Copy(targetFile, sourceFile) + return err +} + +// dataFile represents a file in the DataNode. +type dataFile struct { + name string + content []byte + modTime time.Time +} + +func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil } +func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF } +func (d *dataFile) Close() error { return nil } + +// dataFileInfo implements fs.FileInfo for a dataFile. +type dataFileInfo struct{ file *dataFile } + +func (d *dataFileInfo) Name() string { return path.Base(d.file.name) } +func (d *dataFileInfo) Size() int64 { return int64(len(d.file.content)) } +func (d *dataFileInfo) Mode() fs.FileMode { return 0444 } +func (d *dataFileInfo) ModTime() time.Time { return d.file.modTime } +func (d *dataFileInfo) IsDir() bool { return false } +func (d *dataFileInfo) Sys() interface{} { return nil } + +// dataFileReader implements fs.File for a dataFile. +type dataFileReader struct { + file *dataFile + reader *bytes.Reader +} + +func (d *dataFileReader) Stat() (fs.FileInfo, error) { return d.file.Stat() } +func (d *dataFileReader) Read(p []byte) (int, error) { + if d.reader == nil { + d.reader = bytes.NewReader(d.file.content) + } + return d.reader.Read(p) +} +func (d *dataFileReader) Close() error { return nil } + +// dirInfo implements fs.FileInfo for an implicit directory. +type dirInfo struct { + name string + modTime time.Time +} + +func (d *dirInfo) Name() string { return d.name } +func (d *dirInfo) Size() int64 { return 0 } +func (d *dirInfo) Mode() fs.FileMode { return fs.ModeDir | 0555 } +func (d *dirInfo) ModTime() time.Time { return d.modTime } +func (d *dirInfo) IsDir() bool { return true } +func (d *dirInfo) Sys() interface{} { return nil } + +// dirFile implements fs.File for a directory. +type dirFile struct { + path string + modTime time.Time +} + +func (d *dirFile) Stat() (fs.FileInfo, error) { + return &dirInfo{name: path.Base(d.path), modTime: d.modTime}, nil +} +func (d *dirFile) Read([]byte) (int, error) { + return 0, &fs.PathError{Op: "read", Path: d.path, Err: fs.ErrInvalid} +} +func (d *dirFile) Close() error { return nil } diff --git a/pkg/datanode/datanode_test.go b/pkg/datanode/datanode_test.go new file mode 100644 index 0000000..847d20b --- /dev/null +++ b/pkg/datanode/datanode_test.go @@ -0,0 +1,124 @@ +package datanode + +import ( + "io/fs" + "os" + "reflect" + "sort" + "testing" +) + +func TestDataNode(t *testing.T) { + dn := New() + dn.AddData("foo.txt", []byte("foo")) + dn.AddData("bar/baz.txt", []byte("baz")) + dn.AddData("bar/qux.txt", []byte("qux")) + + // Test Open + file, err := dn.Open("foo.txt") + if err != nil { + t.Fatalf("Open failed: %v", err) + } + file.Close() + + _, err = dn.Open("nonexistent.txt") + if err == nil { + t.Fatalf("Expected error opening nonexistent file, got nil") + } + + // Test Stat + info, err := dn.Stat("bar/baz.txt") + if err != nil { + t.Fatalf("Stat failed: %v", err) + } + if info.Name() != "baz.txt" { + t.Errorf("Expected name baz.txt, got %s", info.Name()) + } + if info.Size() != 3 { + t.Errorf("Expected size 3, got %d", info.Size()) + } + if info.IsDir() { + t.Errorf("Expected baz.txt to not be a directory") + } + + dirInfo, err := dn.Stat("bar") + if err != nil { + t.Fatalf("Stat directory failed: %v", err) + } + if !dirInfo.IsDir() { + t.Errorf("Expected 'bar' to be a directory") + } + + // Test Exists + exists, err := dn.Exists("foo.txt") + if err != nil || !exists { + t.Errorf("Expected foo.txt to exist, err: %v", err) + } + exists, err = dn.Exists("bar") + if err != nil || !exists { + t.Errorf("Expected 'bar' directory to exist, err: %v", err) + } + exists, err = dn.Exists("nonexistent") + if err != nil || exists { + t.Errorf("Expected 'nonexistent' to not exist, err: %v", err) + } + + // Test ReadDir + entries, err := dn.ReadDir(".") + if err != nil { + t.Fatalf("ReadDir failed: %v", err) + } + expectedRootEntries := []string{"bar", "foo.txt"} + if len(entries) != len(expectedRootEntries) { + t.Errorf("Expected %d entries in root, got %d", len(expectedRootEntries), len(entries)) + } + var rootEntryNames []string + for _, e := range entries { + rootEntryNames = append(rootEntryNames, e.Name()) + } + sort.Strings(rootEntryNames) + if !reflect.DeepEqual(rootEntryNames, expectedRootEntries) { + t.Errorf("Expected entries %v, got %v", expectedRootEntries, rootEntryNames) + } + + barEntries, err := dn.ReadDir("bar") + if err != nil { + t.Fatalf("ReadDir('bar') failed: %v", err) + } + expectedBarEntries := []string{"baz.txt", "qux.txt"} + if len(barEntries) != len(expectedBarEntries) { + t.Errorf("Expected %d entries in 'bar', got %d", len(expectedBarEntries), len(barEntries)) + } + + // Test Walk + var paths []string + dn.Walk(".", func(path string, d fs.DirEntry, err error) error { + paths = append(paths, path) + return nil + }) + expectedPaths := []string{".", "bar", "bar/baz.txt", "bar/qux.txt", "foo.txt"} + sort.Strings(paths) + if !reflect.DeepEqual(paths, expectedPaths) { + t.Errorf("Walk expected paths %v, got %v", expectedPaths, paths) + } + + // Test CopyFile + tmpfile, err := os.CreateTemp("", "datanode-test-") + if err != nil { + t.Fatalf("CreateTemp failed: %v", err) + } + defer os.Remove(tmpfile.Name()) + + err = dn.CopyFile("foo.txt", tmpfile.Name(), 0644) + if err != nil { + t.Fatalf("CopyFile failed: %v", err) + } + + content, err := os.ReadFile(tmpfile.Name()) + if err != nil { + t.Fatalf("ReadFile failed: %v", err) + } + if string(content) != "foo" { + t.Errorf("Expected foo, got %s", string(content)) + } +} diff --git a/pkg/pwa/pwa.go b/pkg/pwa/pwa.go index 6ee3465..6cef4c5 100644 --- a/pkg/pwa/pwa.go +++ b/pkg/pwa/pwa.go @@ -1,8 +1,6 @@ package pwa import ( - "archive/tar" - "bytes" "encoding/json" "fmt" "io" @@ -10,6 +8,8 @@ import ( "net/url" "path" + "borg-data-collector/pkg/datanode" + "golang.org/x/net/html" ) @@ -79,8 +79,8 @@ func FindManifestURL(pageURL string) (string, error) { return resolvedURL.String(), nil } -// DownloadAndPackagePWA downloads all assets of a PWA and packages them into a tarball. -func DownloadAndPackagePWA(baseURL string, manifestURL string) ([]byte, error) { +// DownloadAndPackagePWA downloads all assets of a PWA and packages them into a DataNode. +func DownloadAndPackagePWA(baseURL string, manifestURL string) (*datanode.DataNode, error) { manifestAbsURL, err := resolveURL(baseURL, manifestURL) if err != nil { return nil, fmt.Errorf("could not resolve manifest URL: %w", err) @@ -102,60 +102,39 @@ func DownloadAndPackagePWA(baseURL string, manifestURL string) ([]byte, error) { return nil, fmt.Errorf("could not parse manifest JSON: %w", err) } - // Create a buffer to write our archive to. - buf := new(bytes.Buffer) - tw := tar.NewWriter(buf) + dn := datanode.New() + dn.AddData("manifest.json", manifestBody) - // Add the manifest to the archive - hdr := &tar.Header{ - Name: "manifest.json", - Mode: 0600, - Size: int64(len(manifestBody)), - } - if err := tw.WriteHeader(hdr); err != nil { - return nil, err - } - if _, err := tw.Write(manifestBody); err != nil { - return nil, err - } - - // Add the start_url to the archive if manifest.StartURL != "" { startURLAbs, err := resolveURL(manifestAbsURL.String(), manifest.StartURL) if err != nil { return nil, fmt.Errorf("could not resolve start_url: %w", err) } - err = downloadAndAddFileToTar(tw, startURLAbs, manifest.StartURL) + err = downloadAndAddFile(dn, startURLAbs, manifest.StartURL) if err != nil { return nil, fmt.Errorf("failed to download start_url asset: %w", err) } } - // Add the icons to the archive for _, icon := range manifest.Icons { iconURLAbs, err := resolveURL(manifestAbsURL.String(), icon.Src) if err != nil { fmt.Printf("Warning: could not resolve icon URL %s: %v\n", icon.Src, err) continue } - err = downloadAndAddFileToTar(tw, iconURLAbs, icon.Src) + err = downloadAndAddFile(dn, iconURLAbs, icon.Src) if err != nil { fmt.Printf("Warning: failed to download icon %s: %v\n", icon.Src, err) } } - // Add the base HTML to the archive baseURLAbs, _ := url.Parse(baseURL) - err = downloadAndAddFileToTar(tw, baseURLAbs, "index.html") + err = downloadAndAddFile(dn, baseURLAbs, "index.html") if err != nil { return nil, fmt.Errorf("failed to download base HTML: %w", err) } - if err := tw.Close(); err != nil { - return nil, err - } - - return buf.Bytes(), nil + return dn, nil } func resolveURL(base, ref string) (*url.URL, error) { @@ -170,7 +149,7 @@ func resolveURL(base, ref string) (*url.URL, error) { return baseURL.ResolveReference(refURL), nil } -func downloadAndAddFileToTar(tw *tar.Writer, fileURL *url.URL, internalPath string) error { +func downloadAndAddFile(dn *datanode.DataNode, fileURL *url.URL, internalPath string) error { resp, err := http.Get(fileURL.String()) if err != nil { return err @@ -185,18 +164,6 @@ func downloadAndAddFileToTar(tw *tar.Writer, fileURL *url.URL, internalPath stri if err != nil { return err } - - hdr := &tar.Header{ - Name: path.Clean(internalPath), - Mode: 0600, - Size: int64(len(data)), - } - if err := tw.WriteHeader(hdr); err != nil { - return err - } - if _, err := tw.Write(data); err != nil { - return err - } - + dn.AddData(path.Clean(internalPath), data) return nil } diff --git a/pkg/pwa/pwa_test.go b/pkg/pwa/pwa_test.go index ec615a2..186412d 100644 --- a/pkg/pwa/pwa_test.go +++ b/pkg/pwa/pwa_test.go @@ -1,8 +1,6 @@ package pwa import ( - "archive/tar" - "bytes" "net/http" "net/http/httptest" "testing" @@ -80,26 +78,19 @@ func TestDownloadAndPackagePWA(t *testing.T) { })) defer server.Close() - tarball, err := DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json") + dn, err := DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json") if err != nil { t.Fatalf("DownloadAndPackagePWA failed: %v", err) } - tarReader := tar.NewReader(bytes.NewReader(tarball)) expectedFiles := []string{"manifest.json", "index.html", "icon.png"} - foundFiles := make(map[string]bool) - - for { - header, err := tarReader.Next() - if err != nil { - break - } - foundFiles[header.Name] = true - } - for _, file := range expectedFiles { - if !foundFiles[file] { - t.Errorf("Expected to find file %s in tarball, but it was not found", file) + exists, err := dn.Exists(file) + if err != nil { + t.Fatalf("Exists failed for %s: %v", file, err) + } + if !exists { + t.Errorf("Expected to find file %s in DataNode, but it was not found", file) } } }