Borg/pkg/datanode/datanode.go
google-labs-jules[bot] 38fafbf639 feat: Add comprehensive docstrings and refactor matrix to tim
Add comprehensive Go docstrings with examples to all packages to achieve 100% coverage.

Refactor the `matrix` package to `tim` (Terminal Isolation Matrix). Update all references to the old package and terminology across the codebase, including commands, tests, and examples.

Fix inconsistencies in command-line flags and help text related to the refactoring.
2025-11-14 21:23:11 +00:00

469 lines
12 KiB
Go

package datanode
import (
"archive/tar"
"bytes"
"io"
"io/fs"
"os"
"path"
"sort"
"strings"
"time"
)
// DataNode represents an in-memory filesystem, compatible with the standard
// library's io/fs.FS interface. It stores files and their contents in memory,
// making it useful for manipulating collections of files, such as those from
// a tar archive or a Git repository, without writing them to disk.
type DataNode struct {
files map[string]*dataFile
}
// New creates and returns a new, empty DataNode. This is the starting point
// for building an in-memory filesystem.
//
// Example:
//
// dn := datanode.New()
func New() *DataNode {
return &DataNode{files: make(map[string]*dataFile)}
}
// FromTar creates a new DataNode by reading a tar archive. The tarball's
// contents are unpacked into the in-memory filesystem.
//
// Example:
//
// tarData, err := os.ReadFile("my-archive.tar")
// if err != nil {
// // handle error
// }
// dn, err := datanode.FromTar(tarData)
// if err != nil {
// // handle error
// }
func FromTar(tarball []byte) (*DataNode, error) {
dn := New()
tarReader := tar.NewReader(bytes.NewReader(tarball))
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if header.Typeflag == tar.TypeReg {
data, err := io.ReadAll(tarReader)
if err != nil {
return nil, err
}
dn.AddData(header.Name, data)
}
}
return dn, nil
}
// ToTar serializes the DataNode into a tar archive. This is useful for
// saving the in-memory filesystem to disk or for transmitting it over a
// network.
//
// Example:
//
// tarData, err := dn.ToTar()
// if err != nil {
// // handle error
// }
// err = os.WriteFile("my-archive.tar", tarData, 0644)
// if err != nil {
// // handle error
// }
func (d *DataNode) ToTar() ([]byte, error) {
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
for _, file := range d.files {
hdr := &tar.Header{
Name: file.name,
Mode: 0600,
Size: int64(len(file.content)),
ModTime: file.modTime,
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, err
}
if _, err := tw.Write(file.content); err != nil {
return nil, err
}
}
if err := tw.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// AddData adds a file to the DataNode with the given name and content. If the
// file already exists, it will be overwritten. Directory paths are created
// implicitly and do not need to be added separately.
//
// Example:
//
// dn.AddData("my-file.txt", []byte("hello world"))
// dn.AddData("my-dir/my-other-file.txt", []byte("hello again"))
func (d *DataNode) AddData(name string, content []byte) {
name = strings.TrimPrefix(name, "/")
if name == "" {
return
}
// Directories are implicit, so we don't store them.
// A name ending in "/" is treated as a directory.
if strings.HasSuffix(name, "/") {
return
}
d.files[name] = &dataFile{
name: name,
content: content,
modTime: time.Now(),
}
}
// Open opens a file from the DataNode for reading. It returns an fs.File,
// which can be used with standard library functions that operate on files.
// This method is part of the fs.FS interface implementation.
//
// Example:
//
// file, err := dn.Open("my-file.txt")
// if err != nil {
// // handle error
// }
// defer file.Close()
// content, err := io.ReadAll(file)
// if err != nil {
// // handle error
// }
func (d *DataNode) Open(name string) (fs.File, error) {
name = strings.TrimPrefix(name, "/")
if file, ok := d.files[name]; ok {
return &dataFileReader{file: file}, nil
}
// Check if it's a directory
prefix := name + "/"
if name == "." || name == "" {
prefix = ""
}
for p := range d.files {
if strings.HasPrefix(p, prefix) {
return &dirFile{path: name, modTime: time.Now()}, nil
}
}
return nil, fs.ErrNotExist
}
// ReadDir reads the named directory and returns a list of directory entries.
// This method is part of the fs.ReadDirFS interface implementation.
//
// Example:
//
// entries, err := dn.ReadDir("my-dir")
// if err != nil {
// // handle error
// }
// for _, entry := range entries {
// fmt.Println(entry.Name())
// }
func (d *DataNode) ReadDir(name string) ([]fs.DirEntry, error) {
name = strings.TrimPrefix(name, "/")
if name == "." {
name = ""
}
// Disallow reading a file as a directory.
if info, err := d.Stat(name); err == nil && !info.IsDir() {
return nil, &fs.PathError{Op: "readdir", Path: name, Err: fs.ErrInvalid}
}
entries := []fs.DirEntry{}
seen := make(map[string]bool)
prefix := ""
if name != "" {
prefix = name + "/"
}
for p := range d.files {
if !strings.HasPrefix(p, prefix) {
continue
}
relPath := strings.TrimPrefix(p, prefix)
firstComponent := strings.Split(relPath, "/")[0]
if seen[firstComponent] {
continue
}
seen[firstComponent] = true
if strings.Contains(relPath, "/") {
// It's a directory
dir := &dirInfo{name: firstComponent, modTime: time.Now()}
entries = append(entries, fs.FileInfoToDirEntry(dir))
} else {
// It's a file
file := d.files[p]
info, _ := file.Stat()
entries = append(entries, fs.FileInfoToDirEntry(info))
}
}
// Sort for stable order in tests
sort.Slice(entries, func(i, j int) bool {
return entries[i].Name() < entries[j].Name()
})
return entries, nil
}
// Stat returns the fs.FileInfo structure describing the named file or directory.
// This method is part of the fs.StatFS interface implementation.
//
// Example:
//
// info, err := dn.Stat("my-file.txt")
// if err != nil {
// // handle error
// }
// fmt.Println(info.Size())
func (d *DataNode) Stat(name string) (fs.FileInfo, error) {
name = strings.TrimPrefix(name, "/")
if file, ok := d.files[name]; ok {
return file.Stat()
}
// Check if it's a directory
prefix := name + "/"
if name == "." || name == "" {
prefix = ""
}
for p := range d.files {
if strings.HasPrefix(p, prefix) {
return &dirInfo{name: path.Base(name), modTime: time.Now()}, nil
}
}
return nil, fs.ErrNotExist
}
// ExistsOptions provides options for customizing the behavior of the Exists
// method.
type ExistsOptions struct {
// WantType specifies the desired file type (e.g., fs.ModeDir for a
// directory). If the file exists but is not of the desired type, Exists
// will return false.
WantType fs.FileMode
}
// Exists checks if a file or directory at the given path exists in the DataNode.
// It can optionally check if the file is of a specific type (e.g., a directory).
//
// Example:
//
// // Check if a file exists
// exists, err := dn.Exists("my-file.txt")
// if err != nil {
// // handle error
// }
//
// // Check if a directory exists
// exists, err = dn.Exists("my-dir", datanode.ExistsOptions{WantType: fs.ModeDir})
// if err != nil {
// // handle error
// }
func (d *DataNode) Exists(name string, opts ...ExistsOptions) (bool, error) {
info, err := d.Stat(name)
if err != nil {
if err == fs.ErrNotExist || os.IsNotExist(err) {
return false, nil
}
return false, err
}
if len(opts) > 0 {
if opts[0].WantType == fs.ModeDir && !info.IsDir() {
return false, nil
}
if opts[0].WantType != fs.ModeDir && info.IsDir() {
return false, nil
}
}
return true, nil
}
// WalkOptions provides options for customizing the behavior of the Walk method.
type WalkOptions struct {
// MaxDepth limits the depth of the walk. A value of 0 means no limit.
MaxDepth int
// Filter is a function that can be used to skip files or directories. If
// the function returns false for an entry, that entry is skipped. If the
// entry is a directory, the entire subdirectory is skipped.
Filter func(path string, d fs.DirEntry) bool
// SkipErrors causes the walk to continue when an error is encountered.
SkipErrors bool
}
// Walk walks the in-memory file tree rooted at root, calling fn for each file or
// directory in the tree, including root. The walk is depth-first.
//
// Example:
//
// err := dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
// if err != nil {
// return err
// }
// fmt.Println(path)
// return nil
// })
func (d *DataNode) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error {
var maxDepth int
var filter func(string, fs.DirEntry) bool
var skipErrors bool
if len(opts) > 0 {
maxDepth = opts[0].MaxDepth
filter = opts[0].Filter
skipErrors = opts[0].SkipErrors
}
return fs.WalkDir(d, root, func(path string, de fs.DirEntry, err error) error {
if err != nil {
if skipErrors {
return nil
}
return fn(path, de, err)
}
if filter != nil && !filter(path, de) {
if de.IsDir() {
return fs.SkipDir
}
return nil
}
// Process the entry first.
if err := fn(path, de, nil); err != nil {
return err
}
if maxDepth > 0 {
// Calculate depth relative to root
cleanedPath := strings.TrimPrefix(path, root)
cleanedPath = strings.TrimPrefix(cleanedPath, "/")
currentDepth := 0
if path != root {
if cleanedPath == "" {
// This can happen if root is "bar" and path is "bar"
currentDepth = 0
} else {
currentDepth = strings.Count(cleanedPath, "/") + 1
}
}
if de.IsDir() && currentDepth >= maxDepth {
return fs.SkipDir
}
}
return nil
})
}
// CopyFile copies a file from the DataNode to a specified path on the local
// filesystem.
//
// Example:
//
// err := dn.CopyFile("my-file.txt", "/tmp/my-file.txt", 0644)
// if err != nil {
// // handle error
// }
func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode) error {
sourceFile, err := d.Open(sourcePath)
if err != nil {
return err
}
defer sourceFile.Close()
targetFile, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, perm)
if err != nil {
return err
}
defer targetFile.Close()
_, err = io.Copy(targetFile, sourceFile)
return err
}
// dataFile represents a file in the DataNode.
type dataFile struct {
name string
content []byte
modTime time.Time
}
func (d *dataFile) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil }
func (d *dataFile) Read(p []byte) (int, error) { return 0, io.EOF }
func (d *dataFile) Close() error { return nil }
// dataFileInfo implements fs.FileInfo for a dataFile.
type dataFileInfo struct{ file *dataFile }
func (d *dataFileInfo) Name() string { return path.Base(d.file.name) }
func (d *dataFileInfo) Size() int64 { return int64(len(d.file.content)) }
func (d *dataFileInfo) Mode() fs.FileMode { return 0444 }
func (d *dataFileInfo) ModTime() time.Time { return d.file.modTime }
func (d *dataFileInfo) IsDir() bool { return false }
func (d *dataFileInfo) Sys() interface{} { return nil }
// dataFileReader implements fs.File for a dataFile.
type dataFileReader struct {
file *dataFile
reader *bytes.Reader
}
func (d *dataFileReader) Stat() (fs.FileInfo, error) { return d.file.Stat() }
func (d *dataFileReader) Read(p []byte) (int, error) {
if d.reader == nil {
d.reader = bytes.NewReader(d.file.content)
}
return d.reader.Read(p)
}
func (d *dataFileReader) Close() error { return nil }
// dirInfo implements fs.FileInfo for an implicit directory.
type dirInfo struct {
name string
modTime time.Time
}
func (d *dirInfo) Name() string { return d.name }
func (d *dirInfo) Size() int64 { return 0 }
func (d *dirInfo) Mode() fs.FileMode { return fs.ModeDir | 0555 }
func (d *dirInfo) ModTime() time.Time { return d.modTime }
func (d *dirInfo) IsDir() bool { return true }
func (d *dirInfo) Sys() interface{} { return nil }
// dirFile implements fs.File for a directory.
type dirFile struct {
path string
modTime time.Time
}
func (d *dirFile) Stat() (fs.FileInfo, error) {
return &dirInfo{name: path.Base(d.path), modTime: d.modTime}, nil
}
func (d *dirFile) Read([]byte) (int, error) {
return 0, &fs.PathError{Op: "read", Path: d.path, Err: fs.ErrInvalid}
}
func (d *dirFile) Close() error { return nil }