feat: Implement DataNode and update PWA commands

This commit introduces a new `DataNode` package, which provides an in-memory, `fs.FS`-compatible filesystem with a `debme`-like interface. The `DataNode` can be serialized to and from a TAR archive, making it suitable for storing downloaded assets.

The `pwa` and `serve` commands have been refactored to use the `DataNode`. The `pwa` command now packages downloaded PWA assets into a `DataNode` and saves it as a `.dat` file. The `serve` command loads a `.dat` file into a `DataNode` and serves its contents.
This commit is contained in:
google-labs-jules[bot] 2025-10-31 20:47:11 +00:00
parent efee04bfdb
commit 5149b64403
6 changed files with 473 additions and 188 deletions

View file

@ -28,12 +28,18 @@ var pwaCmd = &cobra.Command{
fmt.Printf("Found manifest: %s\n", manifestURL)
fmt.Println("Downloading and packaging PWA...")
pwaData, err := pwa.DownloadAndPackagePWA(pwaURL, manifestURL)
dn, err := pwa.DownloadAndPackagePWA(pwaURL, manifestURL)
if err != nil {
fmt.Printf("Error downloading and packaging PWA: %v\n", err)
return
}
pwaData, err := dn.ToTar()
if err != nil {
fmt.Printf("Error serializing PWA data: %v\n", err)
return
}
err = os.WriteFile(outputFile, pwaData, 0644)
if err != nil {
fmt.Printf("Error writing PWA to file: %v\n", err)
@ -46,5 +52,5 @@ var pwaCmd = &cobra.Command{
func init() {
rootCmd.AddCommand(pwaCmd)
pwaCmd.PersistentFlags().String("output", "pwa.tar", "Output file for the PWA tarball")
pwaCmd.PersistentFlags().String("output", "pwa.dat", "Output file for the PWA DataNode")
}

View file

@ -1,16 +1,11 @@
package cmd
import (
"archive/tar"
"bytes"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"path"
"strings"
"time"
"borg-data-collector/pkg/datanode"
"github.com/spf13/cobra"
)
@ -31,13 +26,13 @@ var serveCmd = &cobra.Command{
return
}
memFS, err := newMemoryFS(pwaData)
dn, err := datanode.FromTar(pwaData)
if err != nil {
fmt.Printf("Error creating in-memory filesystem: %v\n", err)
fmt.Printf("Error creating DataNode from tarball: %v\n", err)
return
}
http.Handle("/", http.FileServer(http.FS(memFS)))
http.Handle("/", http.FileServer(http.FS(dn)))
fmt.Printf("Serving PWA on http://localhost:%s\n", port)
err = http.ListenAndServe(":"+port, nil)
@ -48,121 +43,6 @@ var serveCmd = &cobra.Command{
},
}
// memoryFS is an in-memory filesystem that implements fs.FS
type memoryFS struct {
files map[string]*memoryFile
}
func newMemoryFS(tarball []byte) (*memoryFS, error) {
memFS := &memoryFS{files: make(map[string]*memoryFile)}
tarReader := tar.NewReader(bytes.NewReader(tarball))
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if header.Typeflag == tar.TypeReg {
data, err := io.ReadAll(tarReader)
if err != nil {
return nil, err
}
name := strings.TrimPrefix(header.Name, "/")
memFS.files[name] = &memoryFile{
name: name,
content: data,
modTime: header.ModTime,
}
}
}
return memFS, nil
}
func (m *memoryFS) Open(name string) (fs.File, error) {
name = strings.TrimPrefix(name, "/")
if name == "" {
name = "index.html"
}
if file, ok := m.files[name]; ok {
return &memoryFileReader{file: file}, nil
}
return nil, fs.ErrNotExist
}
// memoryFile represents a file in the in-memory filesystem
type memoryFile struct {
name string
content []byte
modTime time.Time
}
func (m *memoryFile) Stat() (fs.FileInfo, error) {
return &memoryFileInfo{file: m}, nil
}
func (m *memoryFile) Read(p []byte) (int, error) {
return 0, nil // This is implemented by memoryFileReader
}
func (m *memoryFile) Close() error {
return nil
}
// memoryFileInfo implements fs.FileInfo for a memoryFile
type memoryFileInfo struct {
file *memoryFile
}
func (m *memoryFileInfo) Name() string {
return path.Base(m.file.name)
}
func (m *memoryFileInfo) Size() int64 {
return int64(len(m.file.content))
}
func (m *memoryFileInfo) Mode() fs.FileMode {
return 0444
}
func (m *memoryFileInfo) ModTime() time.Time {
return m.file.modTime
}
func (m *memoryFileInfo) IsDir() bool {
return false
}
func (m *memoryFileInfo) Sys() interface{} {
return nil
}
// memoryFileReader implements fs.File for a memoryFile
type memoryFileReader struct {
file *memoryFile
reader *bytes.Reader
}
func (m *memoryFileReader) Stat() (fs.FileInfo, error) {
return m.file.Stat()
}
func (m *memoryFileReader) Read(p []byte) (int, error) {
if m.reader == nil {
m.reader = bytes.NewReader(m.file.content)
}
return m.reader.Read(p)
}
func (m *memoryFileReader) Close() error {
return nil
}
func init() {
rootCmd.AddCommand(serveCmd)
serveCmd.PersistentFlags().String("port", "8080", "Port to serve the PWA on")

317
pkg/datanode/datanode.go Normal file
View file

@ -0,0 +1,317 @@
package datanode
import (
"archive/tar"
"bytes"
"io"
"io/fs"
"os"
"path"
"sort"
"strings"
"time"
)
// DataNode is an in-memory filesystem that is compatible with fs.FS.
type DataNode struct {
files map[string]*dataFile
}
// New creates a new, empty DataNode.
func New() *DataNode {
return &DataNode{files: make(map[string]*dataFile)}
}
// FromTar creates a new DataNode from a tarball.
func FromTar(tarball []byte) (*DataNode, error) {
dn := New()
tarReader := tar.NewReader(bytes.NewReader(tarball))
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if header.Typeflag == tar.TypeReg {
data, err := io.ReadAll(tarReader)
if err != nil {
return nil, err
}
dn.AddData(header.Name, data)
}
}
return dn, nil
}
// ToTar serializes the DataNode to a tarball.
func (d *DataNode) ToTar() ([]byte, error) {
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
for _, file := range d.files {
hdr := &tar.Header{
Name: file.name,
Mode: 0600,
Size: int64(len(file.content)),
ModTime: file.modTime,
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, err
}
if _, err := tw.Write(file.content); err != nil {
return nil, err
}
}
if err := tw.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// AddData adds a file to the DataNode.
func (d *DataNode) AddData(name string, content []byte) {
name = strings.TrimPrefix(name, "/")
d.files[name] = &dataFile{
name: name,
content: content,
modTime: time.Now(),
}
}
// Open opens a file from the DataNode.
func (d *DataNode) Open(name string) (fs.File, error) {
name = strings.TrimPrefix(name, "/")
if file, ok := d.files[name]; ok {
return &dataFileReader{file: file}, nil
}
// Check if it's a directory
prefix := name + "/"
if name == "." || name == "" {
prefix = ""
}
for p := range d.files {
if strings.HasPrefix(p, prefix) {
return &dirFile{path: name, modTime: time.Now()}, nil
}
}
return nil, fs.ErrNotExist
}
// ReadDir reads and returns all directory entries for the named directory.
func (d *DataNode) ReadDir(name string) ([]fs.DirEntry, error) {
name = strings.TrimPrefix(name, "/")
if name == "." {
name = ""
}
entries := []fs.DirEntry{}
seen := make(map[string]bool)
prefix := ""
if name != "" {
prefix = name + "/"
}
for p := range d.files {
if !strings.HasPrefix(p, prefix) {
continue
}
relPath := strings.TrimPrefix(p, prefix)
firstComponent := strings.Split(relPath, "/")[0]
if seen[firstComponent] {
continue
}
seen[firstComponent] = true
if strings.Contains(relPath, "/") {
// It's a directory
dir := &dirInfo{name: firstComponent, modTime: time.Now()}
entries = append(entries, fs.FileInfoToDirEntry(dir))
} else {
// It's a file
file := d.files[p]
info, _ := file.Stat()
entries = append(entries, fs.FileInfoToDirEntry(info))
}
}
// Sort for stable order in tests
sort.Slice(entries, func(i, j int) bool {
return entries[i].Name() < entries[j].Name()
})
return entries, nil
}
// Stat returns the FileInfo structure describing file.
func (d *DataNode) Stat(name string) (fs.FileInfo, error) {
name = strings.TrimPrefix(name, "/")
if file, ok := d.files[name]; ok {
return file.Stat()
}
// Check if it's a directory
prefix := name + "/"
if name == "." || name == "" {
prefix = ""
}
for p := range d.files {
if strings.HasPrefix(p, prefix) {
return &dirInfo{name: path.Base(name), modTime: time.Now()}, nil
}
}
return nil, fs.ErrNotExist
}
// ExistsOptions allows customizing the Exists check.
type ExistsOptions struct {
WantType fs.FileMode
}
// Exists returns true if the file or directory exists.
func (d *DataNode) Exists(name string, opts ...ExistsOptions) (bool, error) {
info, err := d.Stat(name)
if err != nil {
if err == fs.ErrNotExist || os.IsNotExist(err) {
return false, nil
}
return false, err
}
if len(opts) > 0 {
if opts[0].WantType == fs.ModeDir && !info.IsDir() {
return false, nil
}
if opts[0].WantType != fs.ModeDir && info.IsDir() {
return false, nil
}
}
return true, nil
}
// WalkOptions allows customizing the Walk behavior.
type WalkOptions struct {
MaxDepth int
Filter func(path string, d fs.DirEntry) bool
SkipErrors bool
}
// Walk recursively descends the file tree rooted at root, calling fn for each file or directory.
func (d *DataNode) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error {
var maxDepth int
var filter func(string, fs.DirEntry) bool
var skipErrors bool
if len(opts) > 0 {
maxDepth = opts[0].MaxDepth
filter = opts[0].Filter
skipErrors = opts[0].SkipErrors
}
return fs.WalkDir(d, root, func(path string, de fs.DirEntry, err error) error {
if err != nil {
if skipErrors {
return nil
}
return fn(path, de, err)
}
if filter != nil && !filter(path, de) {
return nil
}
if maxDepth > 0 {
currentDepth := strings.Count(strings.TrimPrefix(path, root), "/")
if de.IsDir() && currentDepth >= maxDepth {
return fs.SkipDir
}
}
return fn(path, de, nil)
})
}
// CopyFile copies a file from the DataNode to the local filesystem.
func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode) error {
sourceFile, err := d.Open(sourcePath)
if err != nil {
return err
}
defer sourceFile.Close()
targetFile, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, perm)
if err != nil {
return err
}
defer targetFile.Close()
_, err = io.Copy(targetFile, sourceFile)
return err
}
// dataFile represents a file in the DataNode.
type dataFile struct {
name string
content []byte
modTime time.Time
}
func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil }
func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF }
func (d *dataFile) Close() error { return nil }
// dataFileInfo implements fs.FileInfo for a dataFile.
type dataFileInfo struct{ file *dataFile }
func (d *dataFileInfo) Name() string { return path.Base(d.file.name) }
func (d *dataFileInfo) Size() int64 { return int64(len(d.file.content)) }
func (d *dataFileInfo) Mode() fs.FileMode { return 0444 }
func (d *dataFileInfo) ModTime() time.Time { return d.file.modTime }
func (d *dataFileInfo) IsDir() bool { return false }
func (d *dataFileInfo) Sys() interface{} { return nil }
// dataFileReader implements fs.File for a dataFile.
type dataFileReader struct {
file *dataFile
reader *bytes.Reader
}
func (d *dataFileReader) Stat() (fs.FileInfo, error) { return d.file.Stat() }
func (d *dataFileReader) Read(p []byte) (int, error) {
if d.reader == nil {
d.reader = bytes.NewReader(d.file.content)
}
return d.reader.Read(p)
}
func (d *dataFileReader) Close() error { return nil }
// dirInfo implements fs.FileInfo for an implicit directory.
type dirInfo struct {
name string
modTime time.Time
}
func (d *dirInfo) Name() string { return d.name }
func (d *dirInfo) Size() int64 { return 0 }
func (d *dirInfo) Mode() fs.FileMode { return fs.ModeDir | 0555 }
func (d *dirInfo) ModTime() time.Time { return d.modTime }
func (d *dirInfo) IsDir() bool { return true }
func (d *dirInfo) Sys() interface{} { return nil }
// dirFile implements fs.File for a directory.
type dirFile struct {
path string
modTime time.Time
}
func (d *dirFile) Stat() (fs.FileInfo, error) {
return &dirInfo{name: path.Base(d.path), modTime: d.modTime}, nil
}
func (d *dirFile) Read([]byte) (int, error) {
return 0, &fs.PathError{Op: "read", Path: d.path, Err: fs.ErrInvalid}
}
func (d *dirFile) Close() error { return nil }

View file

@ -0,0 +1,124 @@
package datanode
import (
"io/fs"
"os"
"reflect"
"sort"
"testing"
)
func TestDataNode(t *testing.T) {
dn := New()
dn.AddData("foo.txt", []byte("foo"))
dn.AddData("bar/baz.txt", []byte("baz"))
dn.AddData("bar/qux.txt", []byte("qux"))
// Test Open
file, err := dn.Open("foo.txt")
if err != nil {
t.Fatalf("Open failed: %v", err)
}
file.Close()
_, err = dn.Open("nonexistent.txt")
if err == nil {
t.Fatalf("Expected error opening nonexistent file, got nil")
}
// Test Stat
info, err := dn.Stat("bar/baz.txt")
if err != nil {
t.Fatalf("Stat failed: %v", err)
}
if info.Name() != "baz.txt" {
t.Errorf("Expected name baz.txt, got %s", info.Name())
}
if info.Size() != 3 {
t.Errorf("Expected size 3, got %d", info.Size())
}
if info.IsDir() {
t.Errorf("Expected baz.txt to not be a directory")
}
dirInfo, err := dn.Stat("bar")
if err != nil {
t.Fatalf("Stat directory failed: %v", err)
}
if !dirInfo.IsDir() {
t.Errorf("Expected 'bar' to be a directory")
}
// Test Exists
exists, err := dn.Exists("foo.txt")
if err != nil || !exists {
t.Errorf("Expected foo.txt to exist, err: %v", err)
}
exists, err = dn.Exists("bar")
if err != nil || !exists {
t.Errorf("Expected 'bar' directory to exist, err: %v", err)
}
exists, err = dn.Exists("nonexistent")
if err != nil || exists {
t.Errorf("Expected 'nonexistent' to not exist, err: %v", err)
}
// Test ReadDir
entries, err := dn.ReadDir(".")
if err != nil {
t.Fatalf("ReadDir failed: %v", err)
}
expectedRootEntries := []string{"bar", "foo.txt"}
if len(entries) != len(expectedRootEntries) {
t.Errorf("Expected %d entries in root, got %d", len(expectedRootEntries), len(entries))
}
var rootEntryNames []string
for _, e := range entries {
rootEntryNames = append(rootEntryNames, e.Name())
}
sort.Strings(rootEntryNames)
if !reflect.DeepEqual(rootEntryNames, expectedRootEntries) {
t.Errorf("Expected entries %v, got %v", expectedRootEntries, rootEntryNames)
}
barEntries, err := dn.ReadDir("bar")
if err != nil {
t.Fatalf("ReadDir('bar') failed: %v", err)
}
expectedBarEntries := []string{"baz.txt", "qux.txt"}
if len(barEntries) != len(expectedBarEntries) {
t.Errorf("Expected %d entries in 'bar', got %d", len(expectedBarEntries), len(barEntries))
}
// Test Walk
var paths []string
dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
paths = append(paths, path)
return nil
})
expectedPaths := []string{".", "bar", "bar/baz.txt", "bar/qux.txt", "foo.txt"}
sort.Strings(paths)
if !reflect.DeepEqual(paths, expectedPaths) {
t.Errorf("Walk expected paths %v, got %v", expectedPaths, paths)
}
// Test CopyFile
tmpfile, err := os.CreateTemp("", "datanode-test-")
if err != nil {
t.Fatalf("CreateTemp failed: %v", err)
}
defer os.Remove(tmpfile.Name())
err = dn.CopyFile("foo.txt", tmpfile.Name(), 0644)
if err != nil {
t.Fatalf("CopyFile failed: %v", err)
}
content, err := os.ReadFile(tmpfile.Name())
if err != nil {
t.Fatalf("ReadFile failed: %v", err)
}
if string(content) != "foo" {
t.Errorf("Expected foo, got %s", string(content))
}
}

View file

@ -1,8 +1,6 @@
package pwa
import (
"archive/tar"
"bytes"
"encoding/json"
"fmt"
"io"
@ -10,6 +8,8 @@ import (
"net/url"
"path"
"borg-data-collector/pkg/datanode"
"golang.org/x/net/html"
)
@ -79,8 +79,8 @@ func FindManifestURL(pageURL string) (string, error) {
return resolvedURL.String(), nil
}
// DownloadAndPackagePWA downloads all assets of a PWA and packages them into a tarball.
func DownloadAndPackagePWA(baseURL string, manifestURL string) ([]byte, error) {
// DownloadAndPackagePWA downloads all assets of a PWA and packages them into a DataNode.
func DownloadAndPackagePWA(baseURL string, manifestURL string) (*datanode.DataNode, error) {
manifestAbsURL, err := resolveURL(baseURL, manifestURL)
if err != nil {
return nil, fmt.Errorf("could not resolve manifest URL: %w", err)
@ -102,60 +102,39 @@ func DownloadAndPackagePWA(baseURL string, manifestURL string) ([]byte, error) {
return nil, fmt.Errorf("could not parse manifest JSON: %w", err)
}
// Create a buffer to write our archive to.
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
dn := datanode.New()
dn.AddData("manifest.json", manifestBody)
// Add the manifest to the archive
hdr := &tar.Header{
Name: "manifest.json",
Mode: 0600,
Size: int64(len(manifestBody)),
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, err
}
if _, err := tw.Write(manifestBody); err != nil {
return nil, err
}
// Add the start_url to the archive
if manifest.StartURL != "" {
startURLAbs, err := resolveURL(manifestAbsURL.String(), manifest.StartURL)
if err != nil {
return nil, fmt.Errorf("could not resolve start_url: %w", err)
}
err = downloadAndAddFileToTar(tw, startURLAbs, manifest.StartURL)
err = downloadAndAddFile(dn, startURLAbs, manifest.StartURL)
if err != nil {
return nil, fmt.Errorf("failed to download start_url asset: %w", err)
}
}
// Add the icons to the archive
for _, icon := range manifest.Icons {
iconURLAbs, err := resolveURL(manifestAbsURL.String(), icon.Src)
if err != nil {
fmt.Printf("Warning: could not resolve icon URL %s: %v\n", icon.Src, err)
continue
}
err = downloadAndAddFileToTar(tw, iconURLAbs, icon.Src)
err = downloadAndAddFile(dn, iconURLAbs, icon.Src)
if err != nil {
fmt.Printf("Warning: failed to download icon %s: %v\n", icon.Src, err)
}
}
// Add the base HTML to the archive
baseURLAbs, _ := url.Parse(baseURL)
err = downloadAndAddFileToTar(tw, baseURLAbs, "index.html")
err = downloadAndAddFile(dn, baseURLAbs, "index.html")
if err != nil {
return nil, fmt.Errorf("failed to download base HTML: %w", err)
}
if err := tw.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
return dn, nil
}
func resolveURL(base, ref string) (*url.URL, error) {
@ -170,7 +149,7 @@ func resolveURL(base, ref string) (*url.URL, error) {
return baseURL.ResolveReference(refURL), nil
}
func downloadAndAddFileToTar(tw *tar.Writer, fileURL *url.URL, internalPath string) error {
func downloadAndAddFile(dn *datanode.DataNode, fileURL *url.URL, internalPath string) error {
resp, err := http.Get(fileURL.String())
if err != nil {
return err
@ -185,18 +164,6 @@ func downloadAndAddFileToTar(tw *tar.Writer, fileURL *url.URL, internalPath stri
if err != nil {
return err
}
hdr := &tar.Header{
Name: path.Clean(internalPath),
Mode: 0600,
Size: int64(len(data)),
}
if err := tw.WriteHeader(hdr); err != nil {
return err
}
if _, err := tw.Write(data); err != nil {
return err
}
dn.AddData(path.Clean(internalPath), data)
return nil
}

View file

@ -1,8 +1,6 @@
package pwa
import (
"archive/tar"
"bytes"
"net/http"
"net/http/httptest"
"testing"
@ -80,26 +78,19 @@ func TestDownloadAndPackagePWA(t *testing.T) {
}))
defer server.Close()
tarball, err := DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json")
dn, err := DownloadAndPackagePWA(server.URL, server.URL+"/manifest.json")
if err != nil {
t.Fatalf("DownloadAndPackagePWA failed: %v", err)
}
tarReader := tar.NewReader(bytes.NewReader(tarball))
expectedFiles := []string{"manifest.json", "index.html", "icon.png"}
foundFiles := make(map[string]bool)
for {
header, err := tarReader.Next()
if err != nil {
break
}
foundFiles[header.Name] = true
}
for _, file := range expectedFiles {
if !foundFiles[file] {
t.Errorf("Expected to find file %s in tarball, but it was not found", file)
exists, err := dn.Exists(file)
if err != nil {
t.Fatalf("Exists failed for %s: %v", file, err)
}
if !exists {
t.Errorf("Expected to find file %s in DataNode, but it was not found", file)
}
}
}