feat(help): add full-text search functionality

Implements #139: full-text search for help topics.

- Add searchIndex with inverted index for fast lookups
- Add tokenize() for case-insensitive word extraction
- Add Search() with relevance ranking:
  - Exact word matches score 1.0
  - Prefix matches score 0.5
  - Title matches get 2.0 boost
- Add snippet extraction for search result context
- Add section-level matching for precise results
- Add comprehensive tests following _Good/_Bad naming

Search features:
- Case-insensitive matching
- Partial word matching (prefix)
- Title boost (matches in title rank higher)
- Section-level results
- Snippet extraction with context

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Snider 2026-02-01 23:30:30 +00:00
parent df7ff9f128
commit 2b68a26a1b
2 changed files with 542 additions and 0 deletions

277
pkg/help/search.go Normal file
View file

@ -0,0 +1,277 @@
package help
import (
"sort"
"strings"
"unicode"
)
// SearchResult represents a search match.
type SearchResult struct {
Topic *Topic
Section *Section // nil if topic-level match
Score float64
Snippet string // Context around match
}
// searchIndex provides full-text search.
type searchIndex struct {
topics map[string]*Topic // topicID -> Topic
index map[string]map[string]bool // word -> set of topicIDs
}
// newSearchIndex creates a new empty search index.
func newSearchIndex() *searchIndex {
return &searchIndex{
topics: make(map[string]*Topic),
index: make(map[string]map[string]bool),
}
}
// Add indexes a topic for searching.
func (i *searchIndex) Add(topic *Topic) {
i.topics[topic.ID] = topic
// Index title words with boost
for _, word := range tokenize(topic.Title) {
i.addToIndex(word, topic.ID)
}
// Index content words
for _, word := range tokenize(topic.Content) {
i.addToIndex(word, topic.ID)
}
// Index section titles and content
for _, section := range topic.Sections {
for _, word := range tokenize(section.Title) {
i.addToIndex(word, topic.ID)
}
for _, word := range tokenize(section.Content) {
i.addToIndex(word, topic.ID)
}
}
// Index tags
for _, tag := range topic.Tags {
for _, word := range tokenize(tag) {
i.addToIndex(word, topic.ID)
}
}
}
// addToIndex adds a word-to-topic mapping.
func (i *searchIndex) addToIndex(word, topicID string) {
if i.index[word] == nil {
i.index[word] = make(map[string]bool)
}
i.index[word][topicID] = true
}
// Search finds topics matching the query.
func (i *searchIndex) Search(query string) []*SearchResult {
queryWords := tokenize(query)
if len(queryWords) == 0 {
return nil
}
// Track scores per topic
scores := make(map[string]float64)
for _, word := range queryWords {
// Exact matches
if topicIDs, ok := i.index[word]; ok {
for topicID := range topicIDs {
scores[topicID] += 1.0
}
}
// Prefix matches (partial word matching)
for indexWord, topicIDs := range i.index {
if strings.HasPrefix(indexWord, word) && indexWord != word {
for topicID := range topicIDs {
scores[topicID] += 0.5 // Lower score for partial matches
}
}
}
}
// Build results with title boost and snippet extraction
var results []*SearchResult
for topicID, score := range scores {
topic := i.topics[topicID]
if topic == nil {
continue
}
// Title boost: if query words appear in title
titleLower := strings.ToLower(topic.Title)
for _, word := range queryWords {
if strings.Contains(titleLower, word) {
score += 2.0 // Title matches are worth more
}
}
// Find matching section and extract snippet
section, snippet := i.findBestMatch(topic, queryWords)
results = append(results, &SearchResult{
Topic: topic,
Section: section,
Score: score,
Snippet: snippet,
})
}
// Sort by score (highest first)
sort.Slice(results, func(a, b int) bool {
return results[a].Score > results[b].Score
})
return results
}
// findBestMatch finds the section with the best match and extracts a snippet.
func (i *searchIndex) findBestMatch(topic *Topic, queryWords []string) (*Section, string) {
var bestSection *Section
var bestSnippet string
bestScore := 0
// Check topic title
titleScore := countMatches(topic.Title, queryWords)
if titleScore > 0 {
bestSnippet = extractSnippet(topic.Content, queryWords)
}
// Check sections
for idx := range topic.Sections {
section := &topic.Sections[idx]
sectionScore := countMatches(section.Title, queryWords)
contentScore := countMatches(section.Content, queryWords)
totalScore := sectionScore*2 + contentScore // Title matches worth more
if totalScore > bestScore {
bestScore = totalScore
bestSection = section
if contentScore > 0 {
bestSnippet = extractSnippet(section.Content, queryWords)
} else {
bestSnippet = extractSnippet(section.Content, nil)
}
}
}
// If no section matched, use topic content
if bestSnippet == "" && topic.Content != "" {
bestSnippet = extractSnippet(topic.Content, queryWords)
}
return bestSection, bestSnippet
}
// tokenize splits text into lowercase words for indexing/searching.
func tokenize(text string) []string {
text = strings.ToLower(text)
var words []string
var word strings.Builder
for _, r := range text {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
word.WriteRune(r)
} else if word.Len() > 0 {
w := word.String()
if len(w) >= 2 { // Skip single-character words
words = append(words, w)
}
word.Reset()
}
}
// Don't forget the last word
if word.Len() >= 2 {
words = append(words, word.String())
}
return words
}
// countMatches counts how many query words appear in the text.
func countMatches(text string, queryWords []string) int {
textLower := strings.ToLower(text)
count := 0
for _, word := range queryWords {
if strings.Contains(textLower, word) {
count++
}
}
return count
}
// extractSnippet extracts a short snippet around the first match.
func extractSnippet(content string, queryWords []string) string {
if content == "" {
return ""
}
const snippetLen = 150
// If no query words, return start of content
if len(queryWords) == 0 {
lines := strings.Split(content, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line != "" && !strings.HasPrefix(line, "#") {
if len(line) > snippetLen {
return line[:snippetLen] + "..."
}
return line
}
}
return ""
}
// Find first match position
contentLower := strings.ToLower(content)
matchPos := -1
for _, word := range queryWords {
pos := strings.Index(contentLower, word)
if pos != -1 && (matchPos == -1 || pos < matchPos) {
matchPos = pos
}
}
if matchPos == -1 {
// No match found, return start of content
if len(content) > snippetLen {
return content[:snippetLen] + "..."
}
return content
}
// Extract snippet around match
start := matchPos - 50
if start < 0 {
start = 0
}
end := start + snippetLen
if end > len(content) {
end = len(content)
}
snippet := content[start:end]
// Trim to word boundaries
if start > 0 {
if idx := strings.Index(snippet, " "); idx != -1 {
snippet = "..." + snippet[idx+1:]
}
}
if end < len(content) {
if idx := strings.LastIndex(snippet, " "); idx != -1 {
snippet = snippet[:idx] + "..."
}
}
return strings.TrimSpace(snippet)
}

265
pkg/help/search_test.go Normal file
View file

@ -0,0 +1,265 @@
package help
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestTokenize_Good(t *testing.T) {
tests := []struct {
name string
input string
expected []string
}{
{
name: "simple words",
input: "hello world",
expected: []string{"hello", "world"},
},
{
name: "mixed case",
input: "Hello World",
expected: []string{"hello", "world"},
},
{
name: "with punctuation",
input: "Hello, world! How are you?",
expected: []string{"hello", "world", "how", "are", "you"},
},
{
name: "single characters filtered",
input: "a b c hello d",
expected: []string{"hello"},
},
{
name: "numbers included",
input: "version 2 release",
expected: []string{"version", "release"},
},
{
name: "alphanumeric",
input: "v2.0 and config123",
expected: []string{"v2", "and", "config123"},
},
{
name: "empty string",
input: "",
expected: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := tokenize(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestSearchIndex_Add_Good(t *testing.T) {
idx := newSearchIndex()
topic := &Topic{
ID: "getting-started",
Title: "Getting Started",
Content: "Welcome to the guide.",
Tags: []string{"intro", "setup"},
Sections: []Section{
{ID: "installation", Title: "Installation", Content: "Install the CLI."},
},
}
idx.Add(topic)
// Verify topic is stored
assert.NotNil(t, idx.topics["getting-started"])
// Verify words are indexed
assert.Contains(t, idx.index["getting"], "getting-started")
assert.Contains(t, idx.index["started"], "getting-started")
assert.Contains(t, idx.index["welcome"], "getting-started")
assert.Contains(t, idx.index["guide"], "getting-started")
assert.Contains(t, idx.index["intro"], "getting-started")
assert.Contains(t, idx.index["setup"], "getting-started")
assert.Contains(t, idx.index["installation"], "getting-started")
assert.Contains(t, idx.index["cli"], "getting-started")
}
func TestSearchIndex_Search_Good(t *testing.T) {
idx := newSearchIndex()
// Add test topics
idx.Add(&Topic{
ID: "getting-started",
Title: "Getting Started",
Content: "Welcome to the CLI guide. This covers installation and setup.",
Tags: []string{"intro"},
})
idx.Add(&Topic{
ID: "configuration",
Title: "Configuration",
Content: "Configure the CLI using environment variables.",
})
idx.Add(&Topic{
ID: "commands",
Title: "Commands Reference",
Content: "List of all available commands.",
})
t.Run("single word query", func(t *testing.T) {
results := idx.Search("configuration")
assert.NotEmpty(t, results)
assert.Equal(t, "configuration", results[0].Topic.ID)
})
t.Run("multi-word query", func(t *testing.T) {
results := idx.Search("cli guide")
assert.NotEmpty(t, results)
// Should match getting-started (has both "cli" and "guide")
assert.Equal(t, "getting-started", results[0].Topic.ID)
})
t.Run("title boost", func(t *testing.T) {
results := idx.Search("commands")
assert.NotEmpty(t, results)
// "commands" appears in title of commands topic
assert.Equal(t, "commands", results[0].Topic.ID)
})
t.Run("partial word matching", func(t *testing.T) {
results := idx.Search("config")
assert.NotEmpty(t, results)
// Should match "configuration" and "configure"
foundConfig := false
for _, r := range results {
if r.Topic.ID == "configuration" {
foundConfig = true
break
}
}
assert.True(t, foundConfig, "Should find configuration topic with prefix match")
})
t.Run("no results", func(t *testing.T) {
results := idx.Search("nonexistent")
assert.Empty(t, results)
})
t.Run("empty query", func(t *testing.T) {
results := idx.Search("")
assert.Nil(t, results)
})
}
func TestSearchIndex_Search_Good_WithSections(t *testing.T) {
idx := newSearchIndex()
idx.Add(&Topic{
ID: "installation",
Title: "Installation Guide",
Content: "Overview of installation process.",
Sections: []Section{
{
ID: "linux",
Title: "Linux Installation",
Content: "Run apt-get install core on Debian.",
},
{
ID: "macos",
Title: "macOS Installation",
Content: "Use brew install core on macOS.",
},
{
ID: "windows",
Title: "Windows Installation",
Content: "Download the installer from the website.",
},
},
})
t.Run("matches section content", func(t *testing.T) {
results := idx.Search("debian")
assert.NotEmpty(t, results)
assert.Equal(t, "installation", results[0].Topic.ID)
// Should identify the Linux section as best match
if results[0].Section != nil {
assert.Equal(t, "linux", results[0].Section.ID)
}
})
t.Run("matches section title", func(t *testing.T) {
results := idx.Search("windows")
assert.NotEmpty(t, results)
assert.Equal(t, "installation", results[0].Topic.ID)
})
}
func TestExtractSnippet_Good(t *testing.T) {
content := `This is the first paragraph with some introduction text.
Here is more content that talks about installation and setup.
The installation process is straightforward.
Finally, some closing remarks about the configuration.`
t.Run("finds match and extracts context", func(t *testing.T) {
snippet := extractSnippet(content, []string{"installation"})
assert.Contains(t, snippet, "installation")
assert.True(t, len(snippet) <= 200, "Snippet should be reasonably short")
})
t.Run("no query words returns start", func(t *testing.T) {
snippet := extractSnippet(content, nil)
assert.Contains(t, snippet, "first paragraph")
})
t.Run("empty content", func(t *testing.T) {
snippet := extractSnippet("", []string{"test"})
assert.Empty(t, snippet)
})
}
func TestCountMatches_Good(t *testing.T) {
tests := []struct {
text string
words []string
expected int
}{
{"Hello world", []string{"hello"}, 1},
{"Hello world", []string{"hello", "world"}, 2},
{"Hello world", []string{"foo", "bar"}, 0},
{"The quick brown fox", []string{"quick", "fox", "dog"}, 2},
}
for _, tt := range tests {
result := countMatches(tt.text, tt.words)
assert.Equal(t, tt.expected, result)
}
}
func TestSearchResult_Score_Good(t *testing.T) {
idx := newSearchIndex()
// Topic with query word in title should score higher
idx.Add(&Topic{
ID: "topic-in-title",
Title: "Installation Guide",
Content: "Some content here.",
})
idx.Add(&Topic{
ID: "topic-in-content",
Title: "Some Other Topic",
Content: "This covers installation steps.",
})
results := idx.Search("installation")
assert.Len(t, results, 2)
// Title match should score higher
assert.Equal(t, "topic-in-title", results[0].Topic.ID)
assert.Greater(t, results[0].Score, results[1].Score)
}