feat(search): Add full-text search and indexing for archives
This commit introduces two new commands: `borg search` and `borg index`. The `borg index` command builds a trigram index for an archive, which can be used to significantly speed up searches. The `borg search` command allows users to search for patterns within archives. It supports regular expressions, context control, file type filtering, and result limits. The command will automatically use a pre-built index if one is available, falling back to a full scan if not. This commit also includes: - Unit tests for the new commands. - Documentation for the new commands in `docs/cli.md`. - Updates to `.gitignore` to exclude index files. - Improvements to the test infrastructure to prevent state pollution. Co-authored-by: Snider <631881+Snider@users.noreply.github.com>
This commit is contained in:
parent
cf2af53ed3
commit
c3865faf56
7 changed files with 654 additions and 21 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -10,3 +10,4 @@ demo-track.smsg
|
|||
|
||||
# Dev artifacts
|
||||
.playwright-mcp/
|
||||
.borg-index/
|
||||
|
|
|
|||
134
cmd/index.go
Normal file
134
cmd/index.go
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/gob"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// indexCmd represents the index command
|
||||
var indexCmd = NewIndexCmd()
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(GetIndexCmd())
|
||||
}
|
||||
|
||||
func NewIndexCmd() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "index <archive>",
|
||||
Short: "Build search index for an archive.",
|
||||
Long: `Build a search index for a .dat, .tim, or .trix archive.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
archivePath, err := filepath.Abs(args[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get absolute path for archive: %w", err)
|
||||
}
|
||||
|
||||
// Read and decompress the archive
|
||||
compressedData, err := os.ReadFile(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read archive: %w", err)
|
||||
}
|
||||
tarData, err := compress.Decompress(compressedData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decompress archive: %w", err)
|
||||
}
|
||||
|
||||
// Load the DataNode
|
||||
dn, err := datanode.FromTar(tarData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load datanode: %w", err)
|
||||
}
|
||||
|
||||
// Build the index
|
||||
trigramIndex := make(map[[3]byte][]uint32)
|
||||
var fileList []string
|
||||
|
||||
err = dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add file to list and map
|
||||
fileID := uint32(len(fileList))
|
||||
fileList = append(fileList, path)
|
||||
|
||||
// Read file content
|
||||
file, err := dn.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
content, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Generate and add trigrams
|
||||
if len(content) < 3 {
|
||||
return nil
|
||||
}
|
||||
for i := 0; i <= len(content)-3; i++ {
|
||||
var trigram [3]byte
|
||||
copy(trigram[:], content[i:i+3])
|
||||
|
||||
postings := trigramIndex[trigram]
|
||||
if len(postings) == 0 || postings[len(postings)-1] != fileID {
|
||||
trigramIndex[trigram] = append(postings, fileID)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to walk datanode: %w", err)
|
||||
}
|
||||
|
||||
// Save the index
|
||||
indexDir := filepath.Join(filepath.Dir(archivePath), ".borg-index")
|
||||
if err := os.MkdirAll(indexDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create index directory: %w", err)
|
||||
}
|
||||
|
||||
// Save file list
|
||||
fileListPath := filepath.Join(indexDir, "files.json")
|
||||
fileListData, err := json.MarshalIndent(fileList, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal file list: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(fileListPath, fileListData, 0644); err != nil {
|
||||
return fmt.Errorf("failed to write file list: %w", err)
|
||||
}
|
||||
|
||||
// Save trigram index
|
||||
trigramIndexPath := filepath.Join(indexDir, "trigram.idx")
|
||||
var buf bytes.Buffer
|
||||
encoder := gob.NewEncoder(&buf)
|
||||
if err := encoder.Encode(trigramIndex); err != nil {
|
||||
return fmt.Errorf("failed to encode trigram index: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(trigramIndexPath, buf.Bytes(), 0644); err != nil {
|
||||
return fmt.Errorf("failed to write trigram index: %w", err)
|
||||
}
|
||||
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "Successfully built index for %s\n", args[0])
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GetIndexCmd() *cobra.Command {
|
||||
return indexCmd
|
||||
}
|
||||
54
cmd/index_test.go
Normal file
54
cmd/index_test.go
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
)
|
||||
|
||||
func TestIndexCommand_Good(t *testing.T) {
|
||||
// Create a temporary directory
|
||||
tmpDir := t.TempDir()
|
||||
archivePath := filepath.Join(tmpDir, "test.dat")
|
||||
|
||||
// Create a sample DataNode
|
||||
dn := datanode.New()
|
||||
dn.AddData("file1.txt", []byte("hello world"))
|
||||
dn.AddData("file2.go", []byte("package main"))
|
||||
tarData, err := dn.ToTar()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create tar: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(archivePath, tarData, 0644); err != nil {
|
||||
t.Fatalf("failed to write archive: %v", err)
|
||||
}
|
||||
|
||||
// Run the index command
|
||||
output, err := executeCommand(RootCmd, "index", archivePath)
|
||||
if err != nil {
|
||||
t.Fatalf("index command failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(output, "Successfully built index") {
|
||||
t.Errorf("expected success message, got: %s", output)
|
||||
}
|
||||
|
||||
// Verify that the index directory and files were created
|
||||
indexDir := filepath.Join(tmpDir, ".borg-index")
|
||||
if _, err := os.Stat(indexDir); os.IsNotExist(err) {
|
||||
t.Fatalf(".borg-index directory was not created")
|
||||
}
|
||||
|
||||
filesJSONPath := filepath.Join(indexDir, "files.json")
|
||||
if _, err := os.Stat(filesJSONPath); os.IsNotExist(err) {
|
||||
t.Fatalf("files.json was not created")
|
||||
}
|
||||
|
||||
trigramIdxPath := filepath.Join(indexDir, "trigram.idx")
|
||||
if _, err := os.Stat(trigramIdxPath); os.IsNotExist(err) {
|
||||
t.Fatalf("trigram.idx was not created")
|
||||
}
|
||||
}
|
||||
|
|
@ -18,12 +18,16 @@ func executeCommand(root *cobra.Command, args ...string) (string, error) {
|
|||
|
||||
// executeCommandC is a helper function to execute a cobra command and return the output.
|
||||
func executeCommandC(root *cobra.Command, args ...string) (*cobra.Command, string, error) {
|
||||
buf := new(bytes.Buffer)
|
||||
root.SetOut(buf)
|
||||
root.SetErr(buf)
|
||||
root.SetArgs(args)
|
||||
// We need to create a new instance of the root command for each test to avoid state pollution.
|
||||
testRootCmd := NewRootCmd()
|
||||
initAllCommands(testRootCmd) // Pass the new instance to the init function.
|
||||
|
||||
c, err := root.ExecuteC()
|
||||
buf := new(bytes.Buffer)
|
||||
testRootCmd.SetOut(buf)
|
||||
testRootCmd.SetErr(buf)
|
||||
testRootCmd.SetArgs(args)
|
||||
|
||||
c, err := testRootCmd.ExecuteC()
|
||||
|
||||
return c, buf.String(), err
|
||||
}
|
||||
|
|
@ -45,11 +49,6 @@ func TestRootCmd_Good(t *testing.T) {
|
|||
})
|
||||
|
||||
t.Run("Help flag", func(t *testing.T) {
|
||||
// We need to reset the command's state before each run.
|
||||
RootCmd.ResetFlags()
|
||||
RootCmd.ResetCommands()
|
||||
initAllCommands()
|
||||
|
||||
output, err := executeCommand(RootCmd, "--help")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
|
|
@ -62,11 +61,6 @@ func TestRootCmd_Good(t *testing.T) {
|
|||
|
||||
func TestRootCmd_Bad(t *testing.T) {
|
||||
t.Run("Unknown command", func(t *testing.T) {
|
||||
// We need to reset the command's state before each run.
|
||||
RootCmd.ResetFlags()
|
||||
RootCmd.ResetCommands()
|
||||
initAllCommands()
|
||||
|
||||
_, err := executeCommand(RootCmd, "unknown-command")
|
||||
if err == nil {
|
||||
t.Fatal("expected an error for an unknown command, but got none")
|
||||
|
|
@ -75,10 +69,12 @@ func TestRootCmd_Bad(t *testing.T) {
|
|||
}
|
||||
|
||||
// initAllCommands re-initializes all commands for testing.
|
||||
func initAllCommands() {
|
||||
RootCmd.AddCommand(GetAllCmd())
|
||||
RootCmd.AddCommand(GetCollectCmd())
|
||||
RootCmd.AddCommand(GetCompileCmd())
|
||||
RootCmd.AddCommand(GetRunCmd())
|
||||
RootCmd.AddCommand(GetServeCmd())
|
||||
func initAllCommands(cmd *cobra.Command) {
|
||||
cmd.AddCommand(GetAllCmd())
|
||||
cmd.AddCommand(GetCollectCmd())
|
||||
cmd.AddCommand(GetCompileCmd())
|
||||
cmd.AddCommand(GetRunCmd())
|
||||
cmd.AddCommand(GetServeCmd())
|
||||
cmd.AddCommand(GetIndexCmd())
|
||||
cmd.AddCommand(GetSearchCmd())
|
||||
}
|
||||
|
|
|
|||
342
cmd/search.go
Normal file
342
cmd/search.go
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/gob"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/Snider/Borg/pkg/compress"
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// searchCmd represents the search command
|
||||
var searchCmd = NewSearchCmd()
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(GetSearchCmd())
|
||||
}
|
||||
|
||||
type searchResult struct {
|
||||
FilePath string
|
||||
LineNum int
|
||||
Line string
|
||||
}
|
||||
|
||||
func NewSearchCmd() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "search <archive> <pattern>",
|
||||
Short: "Search for a pattern in an archive.",
|
||||
Long: `Search for a pattern in a .dat, .tim, or .trix archive.`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
archivePath, err := filepath.Abs(args[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not get absolute path for archive: %w", err)
|
||||
}
|
||||
pattern := args[1]
|
||||
|
||||
// Read and decompress the archive
|
||||
compressedData, err := os.ReadFile(archivePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read archive: %w", err)
|
||||
}
|
||||
tarData, err := compress.Decompress(compressedData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decompress archive: %w", err)
|
||||
}
|
||||
dn, err := datanode.FromTar(tarData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load datanode: %w", err)
|
||||
}
|
||||
|
||||
indexDir := filepath.Join(filepath.Dir(archivePath), ".borg-index")
|
||||
indexPath := filepath.Join(indexDir, "trigram.idx")
|
||||
|
||||
var results []searchResult
|
||||
|
||||
if _, err := os.Stat(indexPath); err == nil {
|
||||
results, err = searchWithIndex(dn, archivePath, pattern, cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error searching with index: %w", err)
|
||||
}
|
||||
} else {
|
||||
results, err = searchWithoutIndex(dn, pattern, cmd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error searching without index: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return printResults(cmd, dn, results)
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().Bool("regex", false, "Use regex pattern")
|
||||
cmd.Flags().IntP("context", "C", 0, "Show N lines around match")
|
||||
cmd.Flags().String("type", "", "Filter by file extension")
|
||||
cmd.Flags().Int("max-results", 0, "Limit output to N results")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func printResults(cmd *cobra.Command, dn *datanode.DataNode, results []searchResult) error {
|
||||
contextLines, _ := cmd.Flags().GetInt("context")
|
||||
maxResults, _ := cmd.Flags().GetInt("max-results")
|
||||
|
||||
if maxResults > 0 && len(results) > maxResults {
|
||||
results = results[:maxResults]
|
||||
}
|
||||
|
||||
// Group results by file
|
||||
resultsByFile := make(map[string][]searchResult)
|
||||
for _, res := range results {
|
||||
resultsByFile[res.FilePath] = append(resultsByFile[res.FilePath], res)
|
||||
}
|
||||
|
||||
// Process each file
|
||||
for filePath, fileResults := range resultsByFile {
|
||||
if contextLines > 0 {
|
||||
file, err := dn.Open(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open file %s from archive: %w", filePath, err)
|
||||
}
|
||||
|
||||
var lines []string
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
lines = append(lines, scanner.Text())
|
||||
}
|
||||
file.Close()
|
||||
|
||||
for _, res := range fileResults {
|
||||
start := res.LineNum - 1 - contextLines
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
end := res.LineNum + contextLines
|
||||
if end > len(lines) {
|
||||
end = len(lines)
|
||||
}
|
||||
|
||||
for j := start; j < end; j++ {
|
||||
lineNum := j + 1
|
||||
line := lines[j]
|
||||
prefix := " "
|
||||
if lineNum == res.LineNum {
|
||||
prefix = ">"
|
||||
}
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "%s %s:%d: %s\n", prefix, filePath, lineNum, line)
|
||||
}
|
||||
fmt.Fprintln(cmd.OutOrStdout(), "--")
|
||||
}
|
||||
} else {
|
||||
for _, res := range fileResults {
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "%s:%d: %s\n", res.FilePath, res.LineNum, res.Line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func searchWithoutIndex(dn *datanode.DataNode, pattern string, cmd *cobra.Command) ([]searchResult, error) {
|
||||
var results []searchResult
|
||||
|
||||
useRegex, _ := cmd.Flags().GetBool("regex")
|
||||
fileType, _ := cmd.Flags().GetString("type")
|
||||
|
||||
var re *regexp.Regexp
|
||||
var err error
|
||||
if useRegex {
|
||||
re, err = regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regex pattern: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = dn.Walk(".", func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil || d.IsDir() {
|
||||
return err
|
||||
}
|
||||
if fileType != "" && !strings.HasSuffix(path, "."+fileType) {
|
||||
return nil
|
||||
}
|
||||
|
||||
file, err := dn.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for lineNum := 1; scanner.Scan(); lineNum++ {
|
||||
line := scanner.Text()
|
||||
match := false
|
||||
if useRegex {
|
||||
if re.MatchString(line) {
|
||||
match = true
|
||||
}
|
||||
} else {
|
||||
if strings.Contains(line, pattern) {
|
||||
match = true
|
||||
}
|
||||
}
|
||||
|
||||
if match {
|
||||
results = append(results, searchResult{
|
||||
FilePath: path,
|
||||
LineNum: lineNum,
|
||||
Line: strings.TrimSpace(line),
|
||||
})
|
||||
}
|
||||
}
|
||||
return scanner.Err()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error walking datanode: %w", err)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func searchWithIndex(dn *datanode.DataNode, archivePath, pattern string, cmd *cobra.Command) ([]searchResult, error) {
|
||||
indexDir := filepath.Join(filepath.Dir(archivePath), ".borg-index")
|
||||
|
||||
// Load file list
|
||||
fileListPath := filepath.Join(indexDir, "files.json")
|
||||
fileListData, err := os.ReadFile(fileListPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not read file list: %w", err)
|
||||
}
|
||||
var fileList []string
|
||||
if err := json.Unmarshal(fileListData, &fileList); err != nil {
|
||||
return nil, fmt.Errorf("could not unmarshal file list: %w", err)
|
||||
}
|
||||
|
||||
// Load trigram index
|
||||
trigramIndexPath := filepath.Join(indexDir, "trigram.idx")
|
||||
trigramIndexData, err := os.ReadFile(trigramIndexPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not read trigram index: %w", err)
|
||||
}
|
||||
var trigramIndex map[[3]byte][]uint32
|
||||
decoder := gob.NewDecoder(bytes.NewReader(trigramIndexData))
|
||||
if err := decoder.Decode(&trigramIndex); err != nil {
|
||||
return nil, fmt.Errorf("could not decode trigram index: %w", err)
|
||||
}
|
||||
|
||||
// Find candidate files
|
||||
candidateFiles := findCandidateFiles(pattern, trigramIndex, fileList)
|
||||
|
||||
// Search within candidate files
|
||||
var results []searchResult
|
||||
useRegex, _ := cmd.Flags().GetBool("regex")
|
||||
fileType, _ := cmd.Flags().GetString("type")
|
||||
|
||||
var re *regexp.Regexp
|
||||
if useRegex {
|
||||
re, err = regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regex pattern: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
for path := range candidateFiles {
|
||||
if fileType != "" && !strings.HasSuffix(path, "."+fileType) {
|
||||
continue
|
||||
}
|
||||
|
||||
file, err := dn.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not open file from archive: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for lineNum := 1; scanner.Scan(); lineNum++ {
|
||||
line := scanner.Text()
|
||||
match := false
|
||||
if useRegex {
|
||||
if re.MatchString(line) {
|
||||
match = true
|
||||
}
|
||||
} else {
|
||||
if strings.Contains(line, pattern) {
|
||||
match = true
|
||||
}
|
||||
}
|
||||
|
||||
if match {
|
||||
results = append(results, searchResult{
|
||||
FilePath: path,
|
||||
LineNum: lineNum,
|
||||
Line: strings.TrimSpace(line),
|
||||
})
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error scanning file %s: %w", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func findCandidateFiles(pattern string, trigramIndex map[[3]byte][]uint32, fileList []string) map[string]struct{} {
|
||||
if len(pattern) < 3 {
|
||||
// Fallback for short patterns
|
||||
candidateFiles := make(map[string]struct{})
|
||||
for _, file := range fileList {
|
||||
candidateFiles[file] = struct{}{}
|
||||
}
|
||||
return candidateFiles
|
||||
}
|
||||
|
||||
// Generate trigrams from pattern
|
||||
var trigrams [][3]byte
|
||||
for i := 0; i <= len(pattern)-3; i++ {
|
||||
var trigram [3]byte
|
||||
copy(trigram[:], pattern[i:i+3])
|
||||
trigrams = append(trigrams, trigram)
|
||||
}
|
||||
|
||||
// Find intersection of file IDs
|
||||
var intersection map[uint32]struct{}
|
||||
for i, trigram := range trigrams {
|
||||
postings := trigramIndex[trigram]
|
||||
if i == 0 {
|
||||
intersection = make(map[uint32]struct{})
|
||||
for _, fileID := range postings {
|
||||
intersection[fileID] = struct{}{}
|
||||
}
|
||||
} else {
|
||||
newIntersection := make(map[uint32]struct{})
|
||||
for _, fileID := range postings {
|
||||
if _, ok := intersection[fileID]; ok {
|
||||
newIntersection[fileID] = struct{}{}
|
||||
}
|
||||
}
|
||||
intersection = newIntersection
|
||||
}
|
||||
}
|
||||
|
||||
candidateFiles := make(map[string]struct{})
|
||||
for fileID := range intersection {
|
||||
candidateFiles[fileList[fileID]] = struct{}{}
|
||||
}
|
||||
|
||||
return candidateFiles
|
||||
}
|
||||
|
||||
func GetSearchCmd() *cobra.Command {
|
||||
return searchCmd
|
||||
}
|
||||
78
cmd/search_test.go
Normal file
78
cmd/search_test.go
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/Snider/Borg/pkg/datanode"
|
||||
)
|
||||
|
||||
func TestSearchCommand_WithoutIndex(t *testing.T) {
|
||||
// Create a temporary directory
|
||||
tmpDir := t.TempDir()
|
||||
archivePath := filepath.Join(tmpDir, "test.dat")
|
||||
|
||||
// Create a sample DataNode
|
||||
dn := datanode.New()
|
||||
dn.AddData("file1.txt", []byte("hello world"))
|
||||
dn.AddData("file2.go", []byte("package main\n\nfunc main() {\n\tprintln(\"hello\")\n}"))
|
||||
tarData, err := dn.ToTar()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create tar: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(archivePath, tarData, 0644); err != nil {
|
||||
t.Fatalf("failed to write archive: %v", err)
|
||||
}
|
||||
|
||||
// Run the search command
|
||||
output, err := executeCommand(RootCmd, "search", archivePath, "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("search command failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(output, "file1.txt:1: hello world") {
|
||||
t.Errorf("expected to find 'hello' in file1.txt, got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "file2.go:4: println(\"hello\")") {
|
||||
t.Errorf("expected to find 'hello' in file2.go, got: %s", output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearchCommand_WithIndex(t *testing.T) {
|
||||
// Create a temporary directory
|
||||
tmpDir := t.TempDir()
|
||||
archivePath := filepath.Join(tmpDir, "test.dat")
|
||||
|
||||
// Create a sample DataNode
|
||||
dn := datanode.New()
|
||||
dn.AddData("file1.txt", []byte("hello world"))
|
||||
dn.AddData("file2.go", []byte("package main\n\nfunc main() {\n\tprintln(\"hello\")\n}"))
|
||||
tarData, err := dn.ToTar()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create tar: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(archivePath, tarData, 0644); err != nil {
|
||||
t.Fatalf("failed to write archive: %v", err)
|
||||
}
|
||||
|
||||
// Run the index command
|
||||
_, err = executeCommand(RootCmd, "index", archivePath)
|
||||
if err != nil {
|
||||
t.Fatalf("index command failed: %v", err)
|
||||
}
|
||||
|
||||
// Run the search command
|
||||
output, err := executeCommand(RootCmd, "search", archivePath, "hello")
|
||||
if err != nil {
|
||||
t.Fatalf("search command failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(output, "file1.txt:1: hello world") {
|
||||
t.Errorf("expected to find 'hello' in file1.txt, got: %s", output)
|
||||
}
|
||||
if !strings.Contains(output, "file2.go:4: println(\"hello\")") {
|
||||
t.Errorf("expected to find 'hello' in file2.go, got: %s", output)
|
||||
}
|
||||
}
|
||||
28
docs/cli.md
28
docs/cli.md
|
|
@ -74,6 +74,34 @@ Examples:
|
|||
- `borg decode borg.trix --output borg.dat --password "secret"`
|
||||
- `borg decode borg.tim --output borg.dat --i-am-in-isolation`
|
||||
|
||||
### index
|
||||
|
||||
Build a search index for an archive to speed up searches.
|
||||
|
||||
- `borg index <archive-file>`
|
||||
|
||||
Example:
|
||||
- `borg index my-project.dat`
|
||||
|
||||
This will create a `.borg-index` directory next to the archive.
|
||||
|
||||
### search
|
||||
|
||||
Search for a pattern within an archive. Uses a pre-built index if available.
|
||||
|
||||
- `borg search <archive-file> <pattern>`
|
||||
|
||||
Flags:
|
||||
- `--regex`: Treat the pattern as a regular expression.
|
||||
- `-C, --context N`: Show N lines of context before and after each match.
|
||||
- `--type <ext>`: Filter search by file extension (e.g., `go`, `md`).
|
||||
- `--max-results N`: Limit the number of results returned.
|
||||
|
||||
Examples:
|
||||
- `borg search my-project.dat "TODO:"`
|
||||
- `borg search my-project.dat "func.*main" --regex --type go`
|
||||
- `borg search my-project.dat "important" -C 3`
|
||||
|
||||
## Compression
|
||||
|
||||
All collect commands accept `--compression` with values:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue