From ac509d57ca0c9d57a1f9c339c33b78070182eb12 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 2 Feb 2026 00:50:03 +0000 Subject: [PATCH] feat: Collection templates/profiles This commit introduces a new templating system to the `borg` CLI, allowing for the definition and execution of reusable, multi-step data collection workflows. Key features include: - A new `borg template` command with `run`, `list`, and `save` subcommands. - Support for YAML-based template files with variable substitution. - A set of built-in templates for common use cases. - A command history logger that enables the creation of new templates from past commands. The implementation involved: - Creating a new `pkg/templates` package to handle template discovery, parsing, and execution. - Modifying the root command to support command history and programmatic subcommand execution. - Adding comprehensive unit tests to ensure the new functionality is robust and correct. - Several rounds of debugging and refactoring to address issues with `go:embed`, test deadlocks, and command parsing. Co-authored-by: Snider <631881+Snider@users.noreply.github.com> --- cmd/root.go | 13 + cmd/template.go | 268 ++++++++++++++++++ cmd/template_test.go | 181 ++++++++++++ go.mod | 1 + .../builtin/cryptonote-excavation.yaml | 20 ++ pkg/templates/builtin/github-org.yaml | 7 + pkg/templates/builtin/paper-collection.yaml | 9 + pkg/templates/builtin/website-archive.yaml | 11 + pkg/templates/discovery.go | 59 ++++ pkg/templates/history.go | 63 ++++ pkg/templates/run.go | 61 ++++ pkg/templates/templates.go | 16 ++ 12 files changed, 709 insertions(+) create mode 100644 cmd/template.go create mode 100644 cmd/template_test.go create mode 100644 pkg/templates/builtin/cryptonote-excavation.yaml create mode 100644 pkg/templates/builtin/github-org.yaml create mode 100644 pkg/templates/builtin/paper-collection.yaml create mode 100644 pkg/templates/builtin/website-archive.yaml create mode 100644 pkg/templates/discovery.go create mode 100644 pkg/templates/history.go create mode 100644 pkg/templates/run.go create mode 100644 pkg/templates/templates.go diff --git a/cmd/root.go b/cmd/root.go index 9cadb27..674c585 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -3,7 +3,9 @@ package cmd import ( "context" "log/slog" + "strings" + "github.com/Snider/Borg/pkg/templates" "github.com/spf13/cobra" ) @@ -13,6 +15,17 @@ func NewRootCmd() *cobra.Command { Short: "A tool for collecting and managing data.", Long: `Borg Data Collector is a command-line tool for cloning Git repositories, packaging their contents into a single file, and managing the data within.`, + PersistentPostRunE: func(cmd *cobra.Command, args []string) error { + // Don't log template or help commands + if cmd.Parent().Name() == "template" || cmd.Name() == "help" { + return nil + } + if err := templates.AppendToHistory(cmd.CommandPath() + " " + strings.Join(args, " ")); err != nil { + log := cmd.Context().Value("logger").(*slog.Logger) + log.Warn("could not write to history", "error", err) + } + return nil + }, } rootCmd.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose logging") diff --git a/cmd/template.go b/cmd/template.go new file mode 100644 index 0000000..c7fe676 --- /dev/null +++ b/cmd/template.go @@ -0,0 +1,268 @@ +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/Snider/Borg/pkg/templates" + "github.com/spf13/cobra" + "gopkg.in/yaml.v3" +) + +func init() { + RootCmd.AddCommand(NewTemplateCmd()) +} + +func NewTemplateCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "template", + Short: "Manage and run collection templates", + Long: `Manage and run collection templates.`, + } + + cmd.AddCommand(NewTemplateListCmd()) + cmd.AddCommand(NewTemplateRunCmd()) + cmd.AddCommand(NewTemplateSaveCmd()) + + return cmd +} + +func NewTemplateListCmd() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List available templates", + Long: `List available templates.`, + RunE: func(cmd *cobra.Command, args []string) error { + userTemplateDir, err := templates.GetUserTemplateDir() + if err != nil { + return err + } + + userTemplates, err := templates.ListUserTemplates(userTemplateDir) + if err != nil { + return err + } + + builtinTemplates, err := templates.ListBuiltinTemplates() + if err != nil { + return err + } + + if len(userTemplates) == 0 && len(builtinTemplates) == 0 { + fmt.Fprintln(cmd.OutOrStdout(), "No templates found.") + return nil + } + + if len(userTemplates) > 0 { + fmt.Fprintln(cmd.OutOrStdout(), "Custom templates:") + for _, t := range userTemplates { + fmt.Fprintf(cmd.OutOrStdout(), " - %s\n", t) + } + } + + if len(builtinTemplates) > 0 { + fmt.Fprintln(cmd.OutOrStdout(), "Built-in templates:") + for _, t := range builtinTemplates { + fmt.Fprintf(cmd.OutOrStdout(), " - %s\n", t) + } + } + + return nil + }, + } +} + +func NewTemplateRunCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "run [template-name] [flags]", + Short: "Run a collection template", + Long: `Run a collection template.`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + templateName := args[0] + + // Find and load the template + _, templateData, err := templates.FindTemplate(templateName) + if err != nil { + return err + } + tmpl, err := templates.LoadTemplate(templateData) + if err != nil { + return err + } + + // Parse variable flags + vars := make(map[string]string) + for i := 1; i < len(args); i++ { + arg := args[i] + if strings.HasPrefix(arg, "--") { + key := strings.TrimPrefix(arg, "--") + if i+1 < len(args) && !strings.HasPrefix(args[i+1], "--") { + vars[key] = args[i+1] + i++ + } else { + // Handle boolean flags if necessary + vars[key] = "true" + } + } + } + + // Validate required variables + for key, value := range tmpl.Variables { + if value == "required" { + if _, ok := vars[key]; !ok { + return fmt.Errorf("missing required variable: %s", key) + } + } + } + + // Execute steps + for _, step := range tmpl.Steps { + collectCmdStr := templates.Substitute(step.Collect, vars) + output := templates.Substitute(step.Output, vars) + + cmdArgs := []string{"collect"} + cmdArgs = append(cmdArgs, strings.Fields(collectCmdStr)...) + + if output != "" { + cmdArgs = append(cmdArgs, "--output", output) + } + if step.Encrypt { + // This assumes a --password flag exists on the target command + // A more robust implementation might be needed + if password, ok := vars["password"]; ok { + cmdArgs = append(cmdArgs, "--password", password) + } else { + // It might be better to prompt for a password + return fmt.Errorf("encryption requested but no password provided") + } + } + if step.Depth > 0 { + cmdArgs = append(cmdArgs, "--depth", fmt.Sprintf("%d", step.Depth)) + } + + rootCmd := cmd.Root() + subCmd, remainingArgs, err := rootCmd.Find(cmdArgs) + if err != nil { + return fmt.Errorf("could not find command for step '%s': %w", collectCmdStr, err) + } + + subCmd.SetArgs(remainingArgs) + var runErr error + if subCmd.RunE != nil { + runErr = subCmd.RunE(subCmd, remainingArgs) + } else if subCmd.Run != nil { + subCmd.Run(subCmd, remainingArgs) + } + if runErr != nil { + return fmt.Errorf("error executing step '%s': %w", collectCmdStr, runErr) + } + } + + return nil + }, + } + // This allows the command to accept arbitrary flags for variables + cmd.Flags().SetInterspersed(false) + return cmd +} + +func NewTemplateSaveCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "save [template-name]", + Short: "Save a new template from history", + Long: `Save a new template from history.`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + templateName := args[0] + from, _ := cmd.Flags().GetString("from") + + var commandToSave string + var err error + + if from != "" { + commandToSave = from + } else { + commandToSave, err = templates.ReadLastHistoryEntry() + if err != nil { + return err + } + } + + // Basic parsing + parts := strings.Fields(commandToSave) + if len(parts) < 2 || parts[0] != "collect" { + return fmt.Errorf("can only save 'collect' commands, but got: %s", commandToSave) + } + + step := templates.Step{} + vars := make(map[string]string) + var collectArgs []string + + // This is still a simplified parser, but better. + i := 1 // Skip "collect" + for i < len(parts) { + part := parts[i] + if strings.HasPrefix(part, "--") { + flagName := strings.TrimPrefix(part, "--") + if i+1 >= len(parts) { + return fmt.Errorf("flag '%s' has no value", part) + } + value := parts[i+1] + i += 2 + + switch flagName { + case "output": + step.Output = value + case "password": + step.Encrypt = true + vars["password"] = "required" + case "depth": + step.Depth, _ = strconv.Atoi(value) + default: + // Assume other flags are variables for the collect command + varName := flagName + collectArgs = append(collectArgs, fmt.Sprintf("--%s", flagName), fmt.Sprintf("{{%s}}", varName)) + vars[varName] = "required" + } + } else { + collectArgs = append(collectArgs, part) + i++ + } + } + step.Collect = strings.Join(collectArgs, " ") + + tmpl := templates.Template{ + Name: templateName, + Steps: []templates.Step{step}, + Variables: vars, + } + + data, err := yaml.Marshal(&tmpl) + if err != nil { + return fmt.Errorf("could not marshal template: %w", err) + } + + userTemplateDir, err := templates.GetUserTemplateDir() + if err != nil { + return err + } + if err := os.MkdirAll(userTemplateDir, 0755); err != nil { + return fmt.Errorf("could not create template directory: %w", err) + } + + templatePath := filepath.Join(userTemplateDir, templateName+".yaml") + if err := os.WriteFile(templatePath, data, 0644); err != nil { + return fmt.Errorf("could not write template file: %w", err) + } + + fmt.Fprintf(cmd.OutOrStdout(), "Template saved to %s\n", templatePath) + return nil + }, + } + cmd.Flags().String("from", "", "Specify a command from history to save") + return cmd +} diff --git a/cmd/template_test.go b/cmd/template_test.go new file mode 100644 index 0000000..5278a81 --- /dev/null +++ b/cmd/template_test.go @@ -0,0 +1,181 @@ +package cmd + +import ( + "bytes" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/spf13/cobra" +) + +// Helper function to execute a command and capture its output +func execute(t *testing.T, cmd *cobra.Command, args ...string) (string, string, error) { + t.Helper() + + stdout := new(bytes.Buffer) + stderr := new(bytes.Buffer) + cmd.SetOut(stdout) + cmd.SetErr(stderr) + cmd.SetArgs(args) + + err := cmd.Execute() + return stdout.String(), stderr.String(), err +} + +func TestTemplateListCmd(t *testing.T) { + // Create a temporary directory for templates + tmpdir, err := ioutil.TempDir("", "borg-templates") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + // Set the HOME env var to our temp dir so template discovery works + originalHome := os.Getenv("HOME") + os.Setenv("HOME", tmpdir) + defer os.Setenv("HOME", originalHome) + + // Create a dummy template file + templateDir := filepath.Join(tmpdir, ".borg", "templates") + if err := os.MkdirAll(templateDir, 0755); err != nil { + t.Fatal(err) + } + templateFile := filepath.Join(templateDir, "test-template.yaml") + if err := ioutil.WriteFile(templateFile, []byte("name: test"), 0644); err != nil { + t.Fatal(err) + } + + // Execute the list command + rootCmd := NewRootCmd() + rootCmd.AddCommand(NewTemplateCmd()) + stdout, _, err := execute(t, rootCmd, "template", "list") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Check the output + if !strings.Contains(stdout, "test-template") { + t.Errorf("expected output to contain 'test-template', but got: %s", stdout) + } +} + +func TestTemplateRunCmd(t *testing.T) { + // --- Setup --- + tmpdir, err := ioutil.TempDir("", "borg-templates") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + originalHome := os.Getenv("HOME") + os.Setenv("HOME", tmpdir) + defer os.Setenv("HOME", originalHome) + + templateContent := ` +name: Test Run +variables: + repo: required +steps: + - collect: github repo {{repo}} +` + templateDir := filepath.Join(tmpdir, ".borg", "templates") + if err := os.MkdirAll(templateDir, 0755); err != nil { + t.Fatal(err) + } + templateFile := filepath.Join(templateDir, "test-run.yaml") + if err := ioutil.WriteFile(templateFile, []byte(templateContent), 0644); err != nil { + t.Fatal(err) + } + + // Create mock commands to verify execution + var commandExecuted bool + mockRepoCmd := &cobra.Command{ + Use: "repo", + Run: func(cmd *cobra.Command, args []string) { + commandExecuted = true + }, + } + mockGithubCmd := &cobra.Command{Use: "github"} + mockGithubCmd.AddCommand(mockRepoCmd) + mockCollectCmd := &cobra.Command{Use: "collect"} + mockCollectCmd.AddCommand(mockGithubCmd) + + // Create a root command for the test, replacing the real collect with the mock + testRootCmd := NewRootCmd() + testRootCmd.AddCommand(mockCollectCmd) + testRootCmd.AddCommand(NewTemplateCmd()) + + // --- Execute --- + _, _, err = execute(t, testRootCmd, "template", "run", "test-run", "--repo", "my/cool/repo") + + // --- Assert --- + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !commandExecuted { + t.Error("expected mock command to be executed, but it was not") + } +} + +func TestTemplateSaveCmd(t *testing.T) { + // --- Setup --- + tmpdir, err := ioutil.TempDir("", "borg-templates") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpdir) + + originalHome := os.Getenv("HOME") + os.Setenv("HOME", tmpdir) + defer os.Setenv("HOME", originalHome) + + // Create a dummy history file + historyDir := filepath.Join(tmpdir, ".borg") + if err := os.MkdirAll(historyDir, 0755); err != nil { + t.Fatal(err) + } + historyFile := filepath.Join(historyDir, "history") + historyContent := "collect github repo my/repo --output my-repo.dat" + if err := ioutil.WriteFile(historyFile, []byte(historyContent), 0644); err != nil { + t.Fatal(err) + } + + // --- Execute --- + rootCmd := NewRootCmd() + rootCmd.AddCommand(NewTemplateCmd()) + _, _, err = execute(t, rootCmd, "template", "save", "my-new-template") + + // --- Assert --- + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Check the contents of the new template + templateFile := filepath.Join(historyDir, "templates", "my-new-template.yaml") + data, err := ioutil.ReadFile(templateFile) + if err != nil { + t.Fatalf("could not read generated template: %v", err) + } + + expectedContent := ` +name: my-new-template +steps: + - collect: github repo my/repo + output: my-repo.dat + encrypt: false + depth: 0 +variables: {} +` + // Normalize both strings to avoid issues with whitespace and indentation + normalize := func(s string) string { + return strings.Join(strings.Fields(s), " ") + } + + if normalize(string(data)) != normalize(expectedContent) { + t.Errorf("unexpected template content.\nExpected: %s\nGot: %s", expectedContent, string(data)) + } +} diff --git a/go.mod b/go.mod index d1c5f08..5eff0d4 100644 --- a/go.mod +++ b/go.mod @@ -65,4 +65,5 @@ require ( golang.org/x/term v0.37.0 // indirect golang.org/x/text v0.31.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/pkg/templates/builtin/cryptonote-excavation.yaml b/pkg/templates/builtin/cryptonote-excavation.yaml new file mode 100644 index 0000000..2e8af53 --- /dev/null +++ b/pkg/templates/builtin/cryptonote-excavation.yaml @@ -0,0 +1,20 @@ +name: CryptoNote Project Excavation +steps: + - collect: github repos {{org}} + output: github.stim + encrypt: true + + - collect: github releases {{org}}/{{main_repo}} + output: releases.stim + + - collect: website {{website}} + depth: 3 + output: website.stim + + - collect: wayback {{website}} + output: wayback.stim + +variables: + org: required + main_repo: optional + website: optional diff --git a/pkg/templates/builtin/github-org.yaml b/pkg/templates/builtin/github-org.yaml new file mode 100644 index 0000000..7180b07 --- /dev/null +++ b/pkg/templates/builtin/github-org.yaml @@ -0,0 +1,7 @@ +name: GitHub Organization Collection +steps: + - collect: github repos {{org}} + output: {{org}}-repos.dat + +variables: + org: required diff --git a/pkg/templates/builtin/paper-collection.yaml b/pkg/templates/builtin/paper-collection.yaml new file mode 100644 index 0000000..2a89b79 --- /dev/null +++ b/pkg/templates/builtin/paper-collection.yaml @@ -0,0 +1,9 @@ +name: Academic Paper Collection +steps: + - collect: arxiv {{query}} + output: arxiv-papers.dat + - collect: ieee {{query}} + output: ieee-papers.dat + +variables: + query: required diff --git a/pkg/templates/builtin/website-archive.yaml b/pkg/templates/builtin/website-archive.yaml new file mode 100644 index 0000000..2d2dbae --- /dev/null +++ b/pkg/templates/builtin/website-archive.yaml @@ -0,0 +1,11 @@ +name: Website Archive +steps: + - collect: website {{url}} + depth: 5 + output: {{hostname}}-website.dat + - collect: wayback {{url}} + output: {{hostname}}-wayback.dat + +variables: + url: required + hostname: required diff --git a/pkg/templates/discovery.go b/pkg/templates/discovery.go new file mode 100644 index 0000000..0efccb6 --- /dev/null +++ b/pkg/templates/discovery.go @@ -0,0 +1,59 @@ +package templates + +import ( + "embed" + "fmt" + "os" + "path/filepath" + "strings" +) + +//go:embed builtin/*.yaml +var EmbeddedTemplates embed.FS + +// GetUserTemplateDir returns the path to the user's template directory. +func GetUserTemplateDir() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("could not get user home directory: %w", err) + } + return filepath.Join(home, ".borg", "templates"), nil +} + +// ListUserTemplates lists all templates in the given directory. +func ListUserTemplates(dir string) ([]string, error) { + if _, err := os.Stat(dir); os.IsNotExist(err) { + return []string{}, nil + } + + var templates []string + err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() && (filepath.Ext(path) == ".yaml" || filepath.Ext(path) == ".yml") { + // Return just the filename without extension + templates = append(templates, strings.TrimSuffix(info.Name(), filepath.Ext(info.Name()))) + } + return nil + }) + if err != nil { + return nil, fmt.Errorf("could not walk template directory: %w", err) + } + return templates, nil +} + +// ListBuiltinTemplates lists all built-in templates. +func ListBuiltinTemplates() ([]string, error) { + var templates []string + entries, err := EmbeddedTemplates.ReadDir("builtin") + if err != nil { + return nil, fmt.Errorf("could not read embedded templates: %w", err) + } + for _, entry := range entries { + if !entry.IsDir() && (filepath.Ext(entry.Name()) == ".yaml" || filepath.Ext(entry.Name()) == ".yml") { + templates = append(templates, strings.TrimSuffix(entry.Name(), filepath.Ext(entry.Name()))) + } + } + return templates, nil +} diff --git a/pkg/templates/history.go b/pkg/templates/history.go new file mode 100644 index 0000000..6520393 --- /dev/null +++ b/pkg/templates/history.go @@ -0,0 +1,63 @@ +package templates + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// GetHistoryFile returns the path to the user's history file. +func GetHistoryFile() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("could not get user home directory: %w", err) + } + borgDir := filepath.Join(home, ".borg") + if _, err := os.Stat(borgDir); os.IsNotExist(err) { + if err := os.MkdirAll(borgDir, 0755); err != nil { + return "", fmt.Errorf("could not create .borg directory: %w", err) + } + } + return filepath.Join(borgDir, "history"), nil +} + +// AppendToHistory appends a command to the history file. +func AppendToHistory(command string) error { + historyFile, err := GetHistoryFile() + if err != nil { + return err + } + + f, err := os.OpenFile(historyFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("could not open history file: %w", err) + } + defer f.Close() + + if _, err := f.WriteString(command + "\n"); err != nil { + return fmt.Errorf("could not write to history file: %w", err) + } + + return nil +} + +// ReadLastHistoryEntry reads the last command from the history file. +func ReadLastHistoryEntry() (string, error) { + historyFile, err := GetHistoryFile() + if err != nil { + return "", err + } + + data, err := os.ReadFile(historyFile) + if err != nil { + return "", fmt.Errorf("could not read history file: %w", err) + } + + lines := strings.Split(strings.TrimSpace(string(data)), "\n") + if len(lines) == 0 { + return "", fmt.Errorf("history is empty") + } + + return lines[len(lines)-1], nil +} diff --git a/pkg/templates/run.go b/pkg/templates/run.go new file mode 100644 index 0000000..6ec502f --- /dev/null +++ b/pkg/templates/run.go @@ -0,0 +1,61 @@ +package templates + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +// FindTemplate finds a template by name. It first searches the user's template +// directory, then falls back to the built-in templates. +func FindTemplate(name string) (string, []byte, error) { + userTemplateDir, err := GetUserTemplateDir() + if err != nil { + return "", nil, err + } + + // Check user templates first + for _, ext := range []string{".yaml", ".yml"} { + templatePath := filepath.Join(userTemplateDir, name+ext) + if _, err := os.Stat(templatePath); err == nil { + data, err := os.ReadFile(templatePath) + if err != nil { + return "", nil, fmt.Errorf("could not read user template: %w", err) + } + return templatePath, data, nil + } + } + + // Check built-in templates + for _, ext := range []string{".yaml", ".yml"} { + templatePath := name + ext + data, err := EmbeddedTemplates.ReadFile(filepath.Join("builtin", templatePath)) + if err == nil { + return "builtin:" + templatePath, data, nil + } + } + + return "", nil, fmt.Errorf("template '%s' not found", name) +} + +// LoadTemplate loads and parses a template from a byte slice. +func LoadTemplate(data []byte) (*Template, error) { + var tmpl Template + err := yaml.Unmarshal(data, &tmpl) + if err != nil { + return nil, fmt.Errorf("could not parse template file: %w", err) + } + + return &tmpl, nil +} + +// Substitute replaces variables in a string. +func Substitute(s string, vars map[string]string) string { + for k, v := range vars { + s = strings.ReplaceAll(s, "{{"+k+"}}", v) + } + return s +} diff --git a/pkg/templates/templates.go b/pkg/templates/templates.go new file mode 100644 index 0000000..c283968 --- /dev/null +++ b/pkg/templates/templates.go @@ -0,0 +1,16 @@ +package templates + +// Step represents a single step in a collection template. +type Step struct { + Collect string `yaml:"collect"` + Output string `yaml:"output"` + Encrypt bool `yaml:"encrypt"` + Depth int `yaml:"depth"` +} + +// Template represents a collection template. +type Template struct { + Name string `yaml:"name"` + Steps []Step `yaml:"steps"` + Variables map[string]string `yaml:"variables"` +}