2025-10-31 20:47:11 +00:00
|
|
|
package datanode
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"archive/tar"
|
|
|
|
|
"bytes"
|
2025-11-14 13:47:27 +00:00
|
|
|
"errors"
|
2025-10-31 20:47:11 +00:00
|
|
|
"io"
|
|
|
|
|
"io/fs"
|
|
|
|
|
"os"
|
|
|
|
|
"path"
|
|
|
|
|
"sort"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
)
|
|
|
|
|
|
2025-11-14 13:47:27 +00:00
|
|
|
var (
|
|
|
|
|
ErrInvalidPassword = errors.New("invalid password")
|
|
|
|
|
ErrPasswordRequired = errors.New("password required")
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-02 00:53:30 +00:00
|
|
|
// DataNode is a filesystem that reads from a tar archive on demand.
|
2025-10-31 20:47:11 +00:00
|
|
|
type DataNode struct {
|
2026-02-02 00:53:30 +00:00
|
|
|
archive io.ReaderAt
|
|
|
|
|
files map[string]*fileIndex
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// fileIndex stores the metadata for a file in the archive.
|
|
|
|
|
type fileIndex struct {
|
|
|
|
|
name string
|
|
|
|
|
offset int64
|
|
|
|
|
size int64
|
|
|
|
|
modTime time.Time
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// New creates a new, empty DataNode.
|
2026-02-02 00:53:30 +00:00
|
|
|
func New(archive io.ReaderAt) *DataNode {
|
|
|
|
|
return &DataNode{
|
|
|
|
|
archive: archive,
|
|
|
|
|
files: make(map[string]*fileIndex),
|
|
|
|
|
}
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FromTar creates a new DataNode from a tarball.
|
2026-02-02 00:53:30 +00:00
|
|
|
func FromTar(archive io.ReaderAt) (*DataNode, error) {
|
|
|
|
|
dn := New(archive)
|
|
|
|
|
if seeker, ok := archive.(io.Seeker); ok {
|
|
|
|
|
seeker.Seek(0, io.SeekStart)
|
|
|
|
|
}
|
2025-10-31 20:47:11 +00:00
|
|
|
|
2026-02-02 00:53:30 +00:00
|
|
|
offset := int64(0)
|
2025-10-31 20:47:11 +00:00
|
|
|
for {
|
2026-02-02 00:53:30 +00:00
|
|
|
headerData := make([]byte, 512)
|
|
|
|
|
_, err := archive.ReadAt(headerData, offset)
|
2025-10-31 20:47:11 +00:00
|
|
|
if err == io.EOF {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-02 00:53:30 +00:00
|
|
|
header, err := tar.NewReader(bytes.NewReader(headerData)).Next()
|
|
|
|
|
if err == io.EOF {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
offset += 512
|
2025-10-31 20:47:11 +00:00
|
|
|
if header.Typeflag == tar.TypeReg {
|
2026-02-02 00:53:30 +00:00
|
|
|
dn.files[header.Name] = &fileIndex{
|
|
|
|
|
name: header.Name,
|
|
|
|
|
offset: offset,
|
|
|
|
|
size: header.Size,
|
|
|
|
|
modTime: header.ModTime,
|
|
|
|
|
}
|
|
|
|
|
offset += header.Size
|
|
|
|
|
if remainder := header.Size % 512; remainder != 0 {
|
|
|
|
|
offset += 512 - remainder
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
}
|
2026-02-02 00:53:30 +00:00
|
|
|
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return dn, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ToTar serializes the DataNode to a tarball.
|
2026-02-02 00:53:30 +00:00
|
|
|
// This function will need to be re-implemented to read from the archive.
|
|
|
|
|
// For now, it will return an error.
|
2025-10-31 20:47:11 +00:00
|
|
|
func (d *DataNode) ToTar() ([]byte, error) {
|
2026-02-02 00:53:30 +00:00
|
|
|
return nil, errors.New("ToTar is not implemented for streaming DataNodes")
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
2026-02-02 00:53:30 +00:00
|
|
|
// AddData is not supported for streaming DataNodes.
|
2025-10-31 20:47:11 +00:00
|
|
|
func (d *DataNode) AddData(name string, content []byte) {
|
2026-02-02 00:53:30 +00:00
|
|
|
// This is a no-op for now.
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Open opens a file from the DataNode.
|
|
|
|
|
func (d *DataNode) Open(name string) (fs.File, error) {
|
|
|
|
|
name = strings.TrimPrefix(name, "/")
|
|
|
|
|
if file, ok := d.files[name]; ok {
|
2026-02-02 00:53:30 +00:00
|
|
|
sectionReader := io.NewSectionReader(d.archive, file.offset, file.size)
|
|
|
|
|
return &dataFileReader{
|
|
|
|
|
file: file,
|
|
|
|
|
reader: sectionReader,
|
|
|
|
|
}, nil
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
// Check if it's a directory
|
|
|
|
|
prefix := name + "/"
|
|
|
|
|
if name == "." || name == "" {
|
|
|
|
|
prefix = ""
|
|
|
|
|
}
|
|
|
|
|
for p := range d.files {
|
|
|
|
|
if strings.HasPrefix(p, prefix) {
|
|
|
|
|
return &dirFile{path: name, modTime: time.Now()}, nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return nil, fs.ErrNotExist
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ReadDir reads and returns all directory entries for the named directory.
|
|
|
|
|
func (d *DataNode) ReadDir(name string) ([]fs.DirEntry, error) {
|
|
|
|
|
name = strings.TrimPrefix(name, "/")
|
|
|
|
|
if name == "." {
|
|
|
|
|
name = ""
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-14 10:36:35 +00:00
|
|
|
// Disallow reading a file as a directory.
|
|
|
|
|
if info, err := d.Stat(name); err == nil && !info.IsDir() {
|
|
|
|
|
return nil, &fs.PathError{Op: "readdir", Path: name, Err: fs.ErrInvalid}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-31 20:47:11 +00:00
|
|
|
entries := []fs.DirEntry{}
|
|
|
|
|
seen := make(map[string]bool)
|
|
|
|
|
|
|
|
|
|
prefix := ""
|
|
|
|
|
if name != "" {
|
|
|
|
|
prefix = name + "/"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for p := range d.files {
|
|
|
|
|
if !strings.HasPrefix(p, prefix) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
relPath := strings.TrimPrefix(p, prefix)
|
|
|
|
|
firstComponent := strings.Split(relPath, "/")[0]
|
|
|
|
|
|
|
|
|
|
if seen[firstComponent] {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
seen[firstComponent] = true
|
|
|
|
|
|
|
|
|
|
if strings.Contains(relPath, "/") {
|
|
|
|
|
// It's a directory
|
|
|
|
|
dir := &dirInfo{name: firstComponent, modTime: time.Now()}
|
|
|
|
|
entries = append(entries, fs.FileInfoToDirEntry(dir))
|
|
|
|
|
} else {
|
|
|
|
|
// It's a file
|
|
|
|
|
file := d.files[p]
|
|
|
|
|
info, _ := file.Stat()
|
|
|
|
|
entries = append(entries, fs.FileInfoToDirEntry(info))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sort for stable order in tests
|
|
|
|
|
sort.Slice(entries, func(i, j int) bool {
|
|
|
|
|
return entries[i].Name() < entries[j].Name()
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return entries, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Stat returns the FileInfo structure describing file.
|
|
|
|
|
func (d *DataNode) Stat(name string) (fs.FileInfo, error) {
|
|
|
|
|
name = strings.TrimPrefix(name, "/")
|
|
|
|
|
if file, ok := d.files[name]; ok {
|
|
|
|
|
return file.Stat()
|
|
|
|
|
}
|
|
|
|
|
// Check if it's a directory
|
|
|
|
|
prefix := name + "/"
|
|
|
|
|
if name == "." || name == "" {
|
|
|
|
|
prefix = ""
|
|
|
|
|
}
|
|
|
|
|
for p := range d.files {
|
|
|
|
|
if strings.HasPrefix(p, prefix) {
|
|
|
|
|
return &dirInfo{name: path.Base(name), modTime: time.Now()}, nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil, fs.ErrNotExist
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ExistsOptions allows customizing the Exists check.
|
|
|
|
|
type ExistsOptions struct {
|
|
|
|
|
WantType fs.FileMode
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Exists returns true if the file or directory exists.
|
|
|
|
|
func (d *DataNode) Exists(name string, opts ...ExistsOptions) (bool, error) {
|
|
|
|
|
info, err := d.Stat(name)
|
|
|
|
|
if err != nil {
|
|
|
|
|
if err == fs.ErrNotExist || os.IsNotExist(err) {
|
|
|
|
|
return false, nil
|
|
|
|
|
}
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
if len(opts) > 0 {
|
|
|
|
|
if opts[0].WantType == fs.ModeDir && !info.IsDir() {
|
|
|
|
|
return false, nil
|
|
|
|
|
}
|
|
|
|
|
if opts[0].WantType != fs.ModeDir && info.IsDir() {
|
|
|
|
|
return false, nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// WalkOptions allows customizing the Walk behavior.
|
|
|
|
|
type WalkOptions struct {
|
|
|
|
|
MaxDepth int
|
|
|
|
|
Filter func(path string, d fs.DirEntry) bool
|
|
|
|
|
SkipErrors bool
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Walk recursively descends the file tree rooted at root, calling fn for each file or directory.
|
|
|
|
|
func (d *DataNode) Walk(root string, fn fs.WalkDirFunc, opts ...WalkOptions) error {
|
|
|
|
|
var maxDepth int
|
|
|
|
|
var filter func(string, fs.DirEntry) bool
|
|
|
|
|
var skipErrors bool
|
|
|
|
|
if len(opts) > 0 {
|
|
|
|
|
maxDepth = opts[0].MaxDepth
|
|
|
|
|
filter = opts[0].Filter
|
2026-02-02 00:53:30 +00:00
|
|
|
_ skipErrors = opts[0].SkipErrors
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return fs.WalkDir(d, root, func(path string, de fs.DirEntry, err error) error {
|
|
|
|
|
if err != nil {
|
|
|
|
|
if skipErrors {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return fn(path, de, err)
|
|
|
|
|
}
|
|
|
|
|
if filter != nil && !filter(path, de) {
|
2025-11-13 21:08:15 +00:00
|
|
|
if de.IsDir() {
|
|
|
|
|
return fs.SkipDir
|
|
|
|
|
}
|
2025-10-31 20:47:11 +00:00
|
|
|
return nil
|
|
|
|
|
}
|
2025-11-13 21:08:15 +00:00
|
|
|
|
|
|
|
|
// Process the entry first.
|
|
|
|
|
if err := fn(path, de, nil); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-31 20:47:11 +00:00
|
|
|
if maxDepth > 0 {
|
2025-11-13 21:08:15 +00:00
|
|
|
// Calculate depth relative to root
|
|
|
|
|
cleanedPath := strings.TrimPrefix(path, root)
|
|
|
|
|
cleanedPath = strings.TrimPrefix(cleanedPath, "/")
|
|
|
|
|
|
|
|
|
|
currentDepth := 0
|
|
|
|
|
if path != root {
|
|
|
|
|
if cleanedPath == "" {
|
|
|
|
|
// This can happen if root is "bar" and path is "bar"
|
|
|
|
|
currentDepth = 0
|
|
|
|
|
} else {
|
|
|
|
|
currentDepth = strings.Count(cleanedPath, "/") + 1
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-31 20:47:11 +00:00
|
|
|
if de.IsDir() && currentDepth >= maxDepth {
|
|
|
|
|
return fs.SkipDir
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-13 21:08:15 +00:00
|
|
|
return nil
|
2025-10-31 20:47:11 +00:00
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// CopyFile copies a file from the DataNode to the local filesystem.
|
|
|
|
|
func (d *DataNode) CopyFile(sourcePath string, target string, perm os.FileMode) error {
|
|
|
|
|
sourceFile, err := d.Open(sourcePath)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
defer sourceFile.Close()
|
|
|
|
|
|
|
|
|
|
targetFile, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, perm)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
defer targetFile.Close()
|
|
|
|
|
|
|
|
|
|
_, err = io.Copy(targetFile, sourceFile)
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-02 00:53:30 +00:00
|
|
|
func (d *fileIndex) Stat() (fs.FileInfo, error) { return &dataFileInfo{file: d}, nil }
|
2025-10-31 20:47:11 +00:00
|
|
|
|
|
|
|
|
// dataFileInfo implements fs.FileInfo for a dataFile.
|
2026-02-02 00:53:30 +00:00
|
|
|
type dataFileInfo struct{ file *fileIndex }
|
2025-10-31 20:47:11 +00:00
|
|
|
|
2025-11-03 18:25:04 +00:00
|
|
|
func (d *dataFileInfo) Name() string { return path.Base(d.file.name) }
|
2026-02-02 00:53:30 +00:00
|
|
|
func (d *dataFileInfo) Size() int64 { return d.file.size }
|
2025-11-03 18:25:04 +00:00
|
|
|
func (d *dataFileInfo) Mode() fs.FileMode { return 0444 }
|
2025-10-31 20:47:11 +00:00
|
|
|
func (d *dataFileInfo) ModTime() time.Time { return d.file.modTime }
|
2025-11-03 18:25:04 +00:00
|
|
|
func (d *dataFileInfo) IsDir() bool { return false }
|
|
|
|
|
func (d *dataFileInfo) Sys() interface{} { return nil }
|
2025-10-31 20:47:11 +00:00
|
|
|
|
|
|
|
|
// dataFileReader implements fs.File for a dataFile.
|
|
|
|
|
type dataFileReader struct {
|
2026-02-02 00:53:30 +00:00
|
|
|
file *fileIndex
|
|
|
|
|
reader io.ReaderAt
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (d *dataFileReader) Stat() (fs.FileInfo, error) { return d.file.Stat() }
|
|
|
|
|
func (d *dataFileReader) Read(p []byte) (int, error) {
|
2026-02-02 00:53:30 +00:00
|
|
|
return 0, &fs.PathError{Op: "read", Path: d.file.name, Err: fs.ErrInvalid}
|
|
|
|
|
}
|
|
|
|
|
func (d *dataFileReader) ReadAt(p []byte, off int64) (n int, err error) {
|
|
|
|
|
return d.reader.ReadAt(p, off)
|
2025-10-31 20:47:11 +00:00
|
|
|
}
|
|
|
|
|
func (d *dataFileReader) Close() error { return nil }
|
|
|
|
|
|
|
|
|
|
// dirInfo implements fs.FileInfo for an implicit directory.
|
|
|
|
|
type dirInfo struct {
|
|
|
|
|
name string
|
|
|
|
|
modTime time.Time
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-03 18:25:04 +00:00
|
|
|
func (d *dirInfo) Name() string { return d.name }
|
|
|
|
|
func (d *dirInfo) Size() int64 { return 0 }
|
|
|
|
|
func (d *dirInfo) Mode() fs.FileMode { return fs.ModeDir | 0555 }
|
2025-10-31 20:47:11 +00:00
|
|
|
func (d *dirInfo) ModTime() time.Time { return d.modTime }
|
2025-11-03 18:25:04 +00:00
|
|
|
func (d *dirInfo) IsDir() bool { return true }
|
|
|
|
|
func (d *dirInfo) Sys() interface{} { return nil }
|
2025-10-31 20:47:11 +00:00
|
|
|
|
|
|
|
|
// dirFile implements fs.File for a directory.
|
|
|
|
|
type dirFile struct {
|
|
|
|
|
path string
|
|
|
|
|
modTime time.Time
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (d *dirFile) Stat() (fs.FileInfo, error) {
|
|
|
|
|
return &dirInfo{name: path.Base(d.path), modTime: d.modTime}, nil
|
|
|
|
|
}
|
|
|
|
|
func (d *dirFile) Read([]byte) (int, error) {
|
|
|
|
|
return 0, &fs.PathError{Op: "read", Path: d.path, Err: fs.ErrInvalid}
|
|
|
|
|
}
|
|
|
|
|
func (d *dirFile) Close() error { return nil }
|