refactor: Use DataNode for repository collection
This commit refactors the repository collection functionality to use the new `DataNode` package instead of the old `trix` package. The `collect` and `all` commands have been updated to use the new `vcs` package, which clones Git repositories and packages them into a `DataNode`. The `trix` package and its related commands (`cat`, `ingest`) have been removed.
This commit is contained in:
parent
5149b64403
commit
bd65eefcd3
8 changed files with 160 additions and 246 deletions
40
cmd/all.go
40
cmd/all.go
|
|
@ -3,10 +3,11 @@ package cmd
|
|||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"borg-data-collector/pkg/borg"
|
||||
"borg-data-collector/pkg/github"
|
||||
"borg-data-collector/pkg/trix"
|
||||
"borg-data-collector/pkg/vcs"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
|
@ -15,7 +16,7 @@ import (
|
|||
var allCmd = &cobra.Command{
|
||||
Use: "all [user/org]",
|
||||
Short: "Collect all public repositories from a user or organization",
|
||||
Long: `Collect all public repositories from a user or organization and store them in a Trix cube.`,
|
||||
Long: `Collect all public repositories from a user or organization and store them in a DataNode.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
fmt.Println(borg.GetRandomAssimilationMessage())
|
||||
|
|
@ -26,30 +27,30 @@ var allCmd = &cobra.Command{
|
|||
return
|
||||
}
|
||||
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
|
||||
cube, err := trix.NewCube(outputFile)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
defer cube.Close()
|
||||
outputDir, _ := cmd.Flags().GetString("output")
|
||||
|
||||
for _, repoURL := range repos {
|
||||
fmt.Printf("Cloning %s...\n", repoURL)
|
||||
|
||||
tempPath, err := os.MkdirTemp("", "borg-clone-*")
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
defer os.RemoveAll(tempPath)
|
||||
|
||||
err = addRepoToCube(repoURL, cube, tempPath)
|
||||
dn, err := vcs.CloneGitRepository(repoURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error cloning %s: %s\n", repoURL, err)
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := dn.ToTar()
|
||||
if err != nil {
|
||||
fmt.Printf("Error serializing DataNode for %s: %v\n", repoURL, err)
|
||||
continue
|
||||
}
|
||||
|
||||
repoName := strings.Split(repoURL, "/")[len(strings.Split(repoURL, "/"))-1]
|
||||
outputFile := fmt.Sprintf("%s/%s.dat", outputDir, repoName)
|
||||
err = os.WriteFile(outputFile, data, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing DataNode for %s to file: %v\n", repoURL, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println(borg.GetRandomCodeLongMessage())
|
||||
|
|
@ -57,5 +58,6 @@ var allCmd = &cobra.Command{
|
|||
}
|
||||
|
||||
func init() {
|
||||
collectCmd.AddCommand(allCmd)
|
||||
rootCmd.AddCommand(allCmd)
|
||||
allCmd.PersistentFlags().String("output", ".", "Output directory for the DataNodes")
|
||||
}
|
||||
|
|
|
|||
53
cmd/cat.go
53
cmd/cat.go
|
|
@ -1,53 +0,0 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"borg-data-collector/pkg/trix"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// catCmd represents the cat command
|
||||
var catCmd = &cobra.Command{
|
||||
Use: "cat [cube-file] [file-to-extract]",
|
||||
Short: "Extract a file from a Trix cube",
|
||||
Long: `Extract a file from a Trix cube and print its content to standard output.`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
cubeFile := args[0]
|
||||
fileToExtract := args[1]
|
||||
|
||||
reader, file, err := trix.Extract(cubeFile)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for {
|
||||
hdr, err := reader.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
if hdr.Name == fileToExtract {
|
||||
if _, err := io.Copy(os.Stdout, reader); err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(catCmd)
|
||||
}
|
||||
|
|
@ -2,8 +2,9 @@ package cmd
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"borg-data-collector/pkg/trix"
|
||||
"borg-data-collector/pkg/vcs"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
|
@ -12,34 +13,35 @@ import (
|
|||
var collectCmd = &cobra.Command{
|
||||
Use: "collect [repository-url]",
|
||||
Short: "Collect a single repository",
|
||||
Long: `Collect a single repository and store it in a Trix cube.`,
|
||||
Long: `Collect a single repository and store it in a DataNode.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
if len(args) < 1 {
|
||||
fmt.Println("Please provide a repository URL")
|
||||
return
|
||||
}
|
||||
repoURL := args[0]
|
||||
clonePath, _ := cmd.Flags().GetString("path")
|
||||
outputFile, _ := cmd.Flags().GetString("output")
|
||||
|
||||
cube, err := trix.NewCube(outputFile)
|
||||
dn, err := vcs.CloneGitRepository(repoURL)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
fmt.Printf("Error cloning repository: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer cube.Close()
|
||||
|
||||
err = addRepoToCube(repoURL, cube, clonePath)
|
||||
data, err := dn.ToTar()
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
fmt.Printf("Error serializing DataNode: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = os.WriteFile(outputFile, data, 0644)
|
||||
if err != nil {
|
||||
fmt.Printf("Error writing DataNode to file: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Repository saved to %s\n", outputFile)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(collectCmd)
|
||||
collectCmd.PersistentFlags().String("path", "/tmp/borg-clone", "Path to clone the repository")
|
||||
collectCmd.PersistentFlags().String("output", "borg.cube", "Output file for the Trix cube")
|
||||
collectCmd.PersistentFlags().String("output", "repo.dat", "Output file for the DataNode")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,41 +0,0 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"borg-data-collector/pkg/trix"
|
||||
|
||||
"github.com/go-git/go-git/v5"
|
||||
)
|
||||
|
||||
func addRepoToCube(repoURL string, cube *trix.Cube, clonePath string) error {
|
||||
_, err := git.PlainClone(clonePath, false, &git.CloneOptions{
|
||||
URL: repoURL,
|
||||
Progress: os.Stdout,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = filepath.Walk(clonePath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
content, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
relPath, err := filepath.Rel(clonePath, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cube.AddFile(relPath, content)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"borg-data-collector/pkg/borg"
|
||||
"borg-data-collector/pkg/trix"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// ingestCmd represents the ingest command
|
||||
var ingestCmd = &cobra.Command{
|
||||
Use: "ingest [cube-file] [file-to-add]",
|
||||
Short: "Add a file to a Trix cube",
|
||||
Long: `Add a file to a Trix cube. If the cube file does not exist, it will be created.`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
cubeFile := args[0]
|
||||
fileToAdd := args[1]
|
||||
|
||||
var cube *trix.Cube
|
||||
var err error
|
||||
|
||||
if _, err := os.Stat(cubeFile); os.IsNotExist(err) {
|
||||
cube, err = trix.NewCube(cubeFile)
|
||||
} else {
|
||||
cube, err = trix.AppendToCube(cubeFile)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
defer cube.Close()
|
||||
|
||||
content, err := os.ReadFile(fileToAdd)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
err = cube.AddFile(fileToAdd, content)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println(borg.GetRandomCodeShortMessage())
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
rootCmd.AddCommand(ingestCmd)
|
||||
}
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
package trix
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"os"
|
||||
)
|
||||
|
||||
type Cube struct {
|
||||
writer *tar.Writer
|
||||
file *os.File
|
||||
}
|
||||
|
||||
func NewCube(path string) (*Cube, error) {
|
||||
file, err := os.Create(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Cube{
|
||||
writer: tar.NewWriter(file),
|
||||
file: file,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *Cube) AddFile(path string, content []byte) error {
|
||||
hdr := &tar.Header{
|
||||
Name: path,
|
||||
Mode: 0600,
|
||||
Size: int64(len(content)),
|
||||
}
|
||||
if err := c.writer.WriteHeader(hdr); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := c.writer.Write(content); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cube) Close() error {
|
||||
if err := c.writer.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.file.Close()
|
||||
}
|
||||
|
||||
func Extract(path string) (*tar.Reader, *os.File, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return tar.NewReader(file), file, nil
|
||||
}
|
||||
|
||||
func AppendToCube(path string) (*Cube, error) {
|
||||
file, err := os.OpenFile(path, os.O_WRONLY|os.O_APPEND, 0644)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Cube{
|
||||
writer: tar.NewWriter(file),
|
||||
file: file,
|
||||
}, nil
|
||||
}
|
||||
51
pkg/vcs/git.go
Normal file
51
pkg/vcs/git.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
package vcs
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"borg-data-collector/pkg/datanode"
|
||||
|
||||
"github.com/go-git/go-git/v5"
|
||||
)
|
||||
|
||||
// CloneGitRepository clones a Git repository from a URL and packages it into a DataNode.
|
||||
func CloneGitRepository(repoURL string) (*datanode.DataNode, error) {
|
||||
tempPath, err := os.MkdirTemp("", "borg-clone-*")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer os.RemoveAll(tempPath)
|
||||
|
||||
_, err = git.PlainClone(tempPath, false, &git.CloneOptions{
|
||||
URL: repoURL,
|
||||
Progress: os.Stdout,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dn := datanode.New()
|
||||
err = filepath.Walk(tempPath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
content, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
relPath, err := filepath.Rel(tempPath, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dn.AddData(relPath, content)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return dn, nil
|
||||
}
|
||||
72
pkg/vcs/git_test.go
Normal file
72
pkg/vcs/git_test.go
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
package vcs
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCloneGitRepository(t *testing.T) {
|
||||
// Create a temporary directory for the bare repository
|
||||
bareRepoPath, err := os.MkdirTemp("", "bare-repo-")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir for bare repo: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(bareRepoPath)
|
||||
|
||||
// Initialize a bare git repository
|
||||
cmd := exec.Command("git", "init", "--bare")
|
||||
cmd.Dir = bareRepoPath
|
||||
if err := cmd.Run(); err != nil {
|
||||
t.Fatalf("Failed to init bare repo: %v", err)
|
||||
}
|
||||
|
||||
// Clone the bare repository to a temporary directory to add a commit
|
||||
clonePath, err := os.MkdirTemp("", "clone-")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir for clone: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(clonePath)
|
||||
|
||||
cmd = exec.Command("git", "clone", bareRepoPath, clonePath)
|
||||
if err := cmd.Run(); err != nil {
|
||||
t.Fatalf("Failed to clone bare repo: %v", err)
|
||||
}
|
||||
|
||||
// Create a file and commit it
|
||||
filePath := filepath.Join(clonePath, "foo.txt")
|
||||
if err := os.WriteFile(filePath, []byte("foo"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write file: %v", err)
|
||||
}
|
||||
cmd = exec.Command("git", "add", "foo.txt")
|
||||
cmd.Dir = clonePath
|
||||
if err := cmd.Run(); err != nil {
|
||||
t.Fatalf("Failed to git add: %v", err)
|
||||
}
|
||||
cmd = exec.Command("git", "commit", "-m", "Initial commit")
|
||||
cmd.Dir = clonePath
|
||||
if err := cmd.Run(); err != nil {
|
||||
t.Fatalf("Failed to git commit: %v", err)
|
||||
}
|
||||
cmd = exec.Command("git", "push", "origin", "master")
|
||||
cmd.Dir = clonePath
|
||||
if err := cmd.Run(); err != nil {
|
||||
t.Fatalf("Failed to git push: %v", err)
|
||||
}
|
||||
|
||||
// Clone the repository using the function we're testing
|
||||
dn, err := CloneGitRepository("file://" + bareRepoPath)
|
||||
if err != nil {
|
||||
t.Fatalf("CloneGitRepository failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify the DataNode contains the correct file
|
||||
exists, err := dn.Exists("foo.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("Exists failed: %v", err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Expected to find file foo.txt in DataNode, but it was not found")
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue