From cbd41e6837b705ac8ee4cecc42dd791b8ebf18af Mon Sep 17 00:00:00 2001 From: Snider Date: Sun, 1 Feb 2026 23:14:49 +0000 Subject: [PATCH 1/4] feat(help): add markdown parsing and section extraction Implements #137: markdown parsing and section extraction for help system. - Add Topic and Section types for help content structure - Add Frontmatter type for YAML metadata parsing - Add ParseTopic() to parse markdown files into Topic structs - Add ExtractFrontmatter() to extract YAML frontmatter - Add ExtractSections() to extract headings and content - Add GenerateID() to create URL-safe anchor IDs - Add comprehensive tests following _Good/_Bad naming convention This is the foundation for the display-agnostic help system (#133). Co-Authored-By: Claude Opus 4.5 --- pkg/help/parser.go | 173 ++++++++++++++++++++++ pkg/help/parser_test.go | 316 ++++++++++++++++++++++++++++++++++++++++ pkg/help/topic.go | 31 ++++ 3 files changed, 520 insertions(+) create mode 100644 pkg/help/parser.go create mode 100644 pkg/help/parser_test.go create mode 100644 pkg/help/topic.go diff --git a/pkg/help/parser.go b/pkg/help/parser.go new file mode 100644 index 00000000..516afee2 --- /dev/null +++ b/pkg/help/parser.go @@ -0,0 +1,173 @@ +package help + +import ( + "regexp" + "strings" + "unicode" + + "gopkg.in/yaml.v3" +) + +var ( + // frontmatterRegex matches YAML frontmatter delimited by --- + frontmatterRegex = regexp.MustCompile(`(?s)^---\n(.+?)\n---\n?`) + + // headingRegex matches markdown headings (# to ######) + headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`) +) + +// ParseTopic parses a markdown file into a Topic. +func ParseTopic(path string, content []byte) (*Topic, error) { + contentStr := string(content) + + topic := &Topic{ + Path: path, + ID: GenerateID(pathToTitle(path)), + Sections: []Section{}, + Tags: []string{}, + Related: []string{}, + } + + // Extract YAML frontmatter if present + fm, body := ExtractFrontmatter(contentStr) + if fm != nil { + topic.Title = fm.Title + topic.Tags = fm.Tags + topic.Related = fm.Related + topic.Order = fm.Order + if topic.Title != "" { + topic.ID = GenerateID(topic.Title) + } + } + + topic.Content = body + + // Extract sections from headings + topic.Sections = ExtractSections(body) + + // If no title from frontmatter, try first H1 + if topic.Title == "" && len(topic.Sections) > 0 { + for _, s := range topic.Sections { + if s.Level == 1 { + topic.Title = s.Title + topic.ID = GenerateID(s.Title) + break + } + } + } + + return topic, nil +} + +// ExtractFrontmatter extracts YAML frontmatter from markdown content. +// Returns the parsed frontmatter and the remaining content. +func ExtractFrontmatter(content string) (*Frontmatter, string) { + match := frontmatterRegex.FindStringSubmatch(content) + if match == nil { + return nil, content + } + + var fm Frontmatter + if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil { + // Invalid YAML, return content as-is + return nil, content + } + + // Return content without frontmatter + body := content[len(match[0]):] + return &fm, body +} + +// ExtractSections parses markdown and returns sections. +func ExtractSections(content string) []Section { + lines := strings.Split(content, "\n") + sections := []Section{} + + var currentSection *Section + var contentLines []string + + for i, line := range lines { + lineNum := i + 1 // 1-indexed + + match := headingRegex.FindStringSubmatch(line) + if match != nil { + // Save previous section's content + if currentSection != nil { + currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n")) + } + + // Start new section + level := len(match[1]) + title := strings.TrimSpace(match[2]) + + section := Section{ + ID: GenerateID(title), + Title: title, + Level: level, + Line: lineNum, + } + sections = append(sections, section) + currentSection = §ions[len(sections)-1] + contentLines = []string{} + } else if currentSection != nil { + contentLines = append(contentLines, line) + } + } + + // Save last section's content + if currentSection != nil { + currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n")) + } + + return sections +} + +// GenerateID creates a URL-safe ID from a title. +// "Getting Started" -> "getting-started" +func GenerateID(title string) string { + var result strings.Builder + + for _, r := range strings.ToLower(title) { + if unicode.IsLetter(r) || unicode.IsDigit(r) { + result.WriteRune(r) + } else if unicode.IsSpace(r) || r == '-' || r == '_' { + // Only add hyphen if last char isn't already a hyphen + str := result.String() + if len(str) > 0 && str[len(str)-1] != '-' { + result.WriteRune('-') + } + } + // Skip other characters + } + + // Trim trailing hyphens + str := result.String() + return strings.Trim(str, "-") +} + +// pathToTitle converts a file path to a title. +// "getting-started.md" -> "Getting Started" +func pathToTitle(path string) string { + // Get filename without directory + parts := strings.Split(path, "/") + filename := parts[len(parts)-1] + + // Remove extension + if idx := strings.LastIndex(filename, "."); idx != -1 { + filename = filename[:idx] + } + + // Replace hyphens/underscores with spaces + filename = strings.ReplaceAll(filename, "-", " ") + filename = strings.ReplaceAll(filename, "_", " ") + + // Title case + words := strings.Fields(filename) + for i, word := range words { + if len(word) > 0 { + words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:]) + } + } + + return strings.Join(words, " ") +} diff --git a/pkg/help/parser_test.go b/pkg/help/parser_test.go new file mode 100644 index 00000000..a9ea1c2c --- /dev/null +++ b/pkg/help/parser_test.go @@ -0,0 +1,316 @@ +package help + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGenerateID_Good(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "simple title", + input: "Getting Started", + expected: "getting-started", + }, + { + name: "already lowercase", + input: "installation", + expected: "installation", + }, + { + name: "multiple spaces", + input: "Quick Start Guide", + expected: "quick-start-guide", + }, + { + name: "with numbers", + input: "Chapter 1 Introduction", + expected: "chapter-1-introduction", + }, + { + name: "special characters", + input: "What's New? (v2.0)", + expected: "whats-new-v20", + }, + { + name: "underscores", + input: "config_file_reference", + expected: "config-file-reference", + }, + { + name: "hyphens preserved", + input: "pre-commit hooks", + expected: "pre-commit-hooks", + }, + { + name: "leading trailing spaces", + input: " Trimmed Title ", + expected: "trimmed-title", + }, + { + name: "unicode letters", + input: "Configuración Básica", + expected: "configuración-básica", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := GenerateID(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestExtractFrontmatter_Good(t *testing.T) { + content := `--- +title: Getting Started +tags: [intro, setup] +order: 1 +related: + - installation + - configuration +--- + +# Welcome + +This is the content. +` + + fm, body := ExtractFrontmatter(content) + + assert.NotNil(t, fm) + assert.Equal(t, "Getting Started", fm.Title) + assert.Equal(t, []string{"intro", "setup"}, fm.Tags) + assert.Equal(t, 1, fm.Order) + assert.Equal(t, []string{"installation", "configuration"}, fm.Related) + assert.Contains(t, body, "# Welcome") + assert.Contains(t, body, "This is the content.") +} + +func TestExtractFrontmatter_Good_NoFrontmatter(t *testing.T) { + content := `# Just a Heading + +Some content here. +` + + fm, body := ExtractFrontmatter(content) + + assert.Nil(t, fm) + assert.Equal(t, content, body) +} + +func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) { + content := `--- +title: [invalid yaml +--- + +# Content +` + + fm, body := ExtractFrontmatter(content) + + // Invalid YAML should return nil frontmatter and original content + assert.Nil(t, fm) + assert.Equal(t, content, body) +} + +func TestExtractSections_Good(t *testing.T) { + content := `# Main Title + +Introduction paragraph. + +## Installation + +Install instructions here. +More details. + +### Prerequisites + +You need these things. + +## Configuration + +Config info here. +` + + sections := ExtractSections(content) + + assert.Len(t, sections, 4) + + // Main Title (H1) + assert.Equal(t, "main-title", sections[0].ID) + assert.Equal(t, "Main Title", sections[0].Title) + assert.Equal(t, 1, sections[0].Level) + assert.Equal(t, 1, sections[0].Line) + assert.Contains(t, sections[0].Content, "Introduction paragraph.") + + // Installation (H2) + assert.Equal(t, "installation", sections[1].ID) + assert.Equal(t, "Installation", sections[1].Title) + assert.Equal(t, 2, sections[1].Level) + assert.Contains(t, sections[1].Content, "Install instructions here.") + assert.Contains(t, sections[1].Content, "More details.") + + // Prerequisites (H3) + assert.Equal(t, "prerequisites", sections[2].ID) + assert.Equal(t, "Prerequisites", sections[2].Title) + assert.Equal(t, 3, sections[2].Level) + assert.Contains(t, sections[2].Content, "You need these things.") + + // Configuration (H2) + assert.Equal(t, "configuration", sections[3].ID) + assert.Equal(t, "Configuration", sections[3].Title) + assert.Equal(t, 2, sections[3].Level) +} + +func TestExtractSections_Good_AllHeadingLevels(t *testing.T) { + content := `# H1 +## H2 +### H3 +#### H4 +##### H5 +###### H6 +` + + sections := ExtractSections(content) + + assert.Len(t, sections, 6) + for i, level := range []int{1, 2, 3, 4, 5, 6} { + assert.Equal(t, level, sections[i].Level) + } +} + +func TestExtractSections_Good_Empty(t *testing.T) { + content := `Just plain text. +No headings here. +` + + sections := ExtractSections(content) + + assert.Empty(t, sections) +} + +func TestParseTopic_Good(t *testing.T) { + content := []byte(`--- +title: Quick Start Guide +tags: [intro, quickstart] +order: 5 +related: + - installation +--- + +# Quick Start Guide + +Welcome to the guide. + +## First Steps + +Do this first. + +## Next Steps + +Then do this. +`) + + topic, err := ParseTopic("docs/quick-start.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + + // Check metadata from frontmatter + assert.Equal(t, "quick-start-guide", topic.ID) + assert.Equal(t, "Quick Start Guide", topic.Title) + assert.Equal(t, "docs/quick-start.md", topic.Path) + assert.Equal(t, []string{"intro", "quickstart"}, topic.Tags) + assert.Equal(t, []string{"installation"}, topic.Related) + assert.Equal(t, 5, topic.Order) + + // Check sections + assert.Len(t, topic.Sections, 3) + assert.Equal(t, "quick-start-guide", topic.Sections[0].ID) + assert.Equal(t, "first-steps", topic.Sections[1].ID) + assert.Equal(t, "next-steps", topic.Sections[2].ID) + + // Content should not include frontmatter + assert.NotContains(t, topic.Content, "---") + assert.Contains(t, topic.Content, "# Quick Start Guide") +} + +func TestParseTopic_Good_NoFrontmatter(t *testing.T) { + content := []byte(`# Getting Started + +This is a simple doc. + +## Installation + +Install it here. +`) + + topic, err := ParseTopic("getting-started.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + + // Title should come from first H1 + assert.Equal(t, "Getting Started", topic.Title) + assert.Equal(t, "getting-started", topic.ID) + + // Sections extracted + assert.Len(t, topic.Sections, 2) +} + +func TestParseTopic_Good_NoHeadings(t *testing.T) { + content := []byte(`--- +title: Plain Content +--- + +Just some text without any headings. +`) + + topic, err := ParseTopic("plain.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + assert.Equal(t, "Plain Content", topic.Title) + assert.Equal(t, "plain-content", topic.ID) + assert.Empty(t, topic.Sections) +} + +func TestParseTopic_Good_IDFromPath(t *testing.T) { + content := []byte(`Just content, no frontmatter or headings.`) + + topic, err := ParseTopic("commands/dev-workflow.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + + // ID and title should be derived from path + assert.Equal(t, "dev-workflow", topic.ID) + assert.Equal(t, "", topic.Title) // No title available +} + +func TestPathToTitle_Good(t *testing.T) { + tests := []struct { + path string + expected string + }{ + {"getting-started.md", "Getting Started"}, + {"commands/dev.md", "Dev"}, + {"path/to/file_name.md", "File Name"}, + {"UPPERCASE.md", "Uppercase"}, + {"no-extension", "No Extension"}, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + result := pathToTitle(tt.path) + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/pkg/help/topic.go b/pkg/help/topic.go new file mode 100644 index 00000000..b934e988 --- /dev/null +++ b/pkg/help/topic.go @@ -0,0 +1,31 @@ +// Package help provides display-agnostic help content management. +package help + +// Topic represents a help topic/page. +type Topic struct { + ID string `json:"id"` + Title string `json:"title"` + Path string `json:"path"` + Content string `json:"content"` + Sections []Section `json:"sections"` + Tags []string `json:"tags"` + Related []string `json:"related"` + Order int `json:"order"` // For sorting +} + +// Section represents a heading within a topic. +type Section struct { + ID string `json:"id"` + Title string `json:"title"` + Level int `json:"level"` + Line int `json:"line"` // Start line in content (1-indexed) + Content string `json:"content"` // Content under heading +} + +// Frontmatter represents YAML frontmatter metadata. +type Frontmatter struct { + Title string `yaml:"title"` + Tags []string `yaml:"tags"` + Related []string `yaml:"related"` + Order int `yaml:"order"` +} From df7ff9f12830ce85f111acd86b917b16c831085c Mon Sep 17 00:00:00 2001 From: Snider Date: Sun, 1 Feb 2026 23:28:54 +0000 Subject: [PATCH 2/4] fix(test): use manual cleanup for TestDevOps_Boot_Good_FreshWithNoExisting Fixes flaky test that fails with "TempDir RemoveAll cleanup: directory not empty" by using os.MkdirTemp with t.Cleanup instead of t.TempDir(). This is the same fix applied to TestDevOps_Boot_Good_Success in 3423e48. Co-Authored-By: Claude Opus 4.5 --- pkg/devops/devops_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/devops/devops_test.go b/pkg/devops/devops_test.go index edb57427..65f45c9e 100644 --- a/pkg/devops/devops_test.go +++ b/pkg/devops/devops_test.go @@ -699,12 +699,14 @@ func TestDevOps_Stop_Bad_ContainerNotRunning(t *testing.T) { } func TestDevOps_Boot_Good_FreshWithNoExisting(t *testing.T) { - tempDir := t.TempDir() + tempDir, err := os.MkdirTemp("", "devops-boot-fresh-*") + require.NoError(t, err) + t.Cleanup(func() { os.RemoveAll(tempDir) }) t.Setenv("CORE_IMAGES_DIR", tempDir) // Create fake image imagePath := filepath.Join(tempDir, ImageName()) - err := os.WriteFile(imagePath, []byte("fake"), 0644) + err = os.WriteFile(imagePath, []byte("fake"), 0644) require.NoError(t, err) cfg := DefaultConfig() From 2b68a26a1b91756dcf97d443337e906f621a0654 Mon Sep 17 00:00:00 2001 From: Snider Date: Sun, 1 Feb 2026 23:30:30 +0000 Subject: [PATCH 3/4] feat(help): add full-text search functionality Implements #139: full-text search for help topics. - Add searchIndex with inverted index for fast lookups - Add tokenize() for case-insensitive word extraction - Add Search() with relevance ranking: - Exact word matches score 1.0 - Prefix matches score 0.5 - Title matches get 2.0 boost - Add snippet extraction for search result context - Add section-level matching for precise results - Add comprehensive tests following _Good/_Bad naming Search features: - Case-insensitive matching - Partial word matching (prefix) - Title boost (matches in title rank higher) - Section-level results - Snippet extraction with context Co-Authored-By: Claude Opus 4.5 --- pkg/help/search.go | 277 ++++++++++++++++++++++++++++++++++++++++ pkg/help/search_test.go | 265 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 542 insertions(+) create mode 100644 pkg/help/search.go create mode 100644 pkg/help/search_test.go diff --git a/pkg/help/search.go b/pkg/help/search.go new file mode 100644 index 00000000..73f226a9 --- /dev/null +++ b/pkg/help/search.go @@ -0,0 +1,277 @@ +package help + +import ( + "sort" + "strings" + "unicode" +) + +// SearchResult represents a search match. +type SearchResult struct { + Topic *Topic + Section *Section // nil if topic-level match + Score float64 + Snippet string // Context around match +} + +// searchIndex provides full-text search. +type searchIndex struct { + topics map[string]*Topic // topicID -> Topic + index map[string]map[string]bool // word -> set of topicIDs +} + +// newSearchIndex creates a new empty search index. +func newSearchIndex() *searchIndex { + return &searchIndex{ + topics: make(map[string]*Topic), + index: make(map[string]map[string]bool), + } +} + +// Add indexes a topic for searching. +func (i *searchIndex) Add(topic *Topic) { + i.topics[topic.ID] = topic + + // Index title words with boost + for _, word := range tokenize(topic.Title) { + i.addToIndex(word, topic.ID) + } + + // Index content words + for _, word := range tokenize(topic.Content) { + i.addToIndex(word, topic.ID) + } + + // Index section titles and content + for _, section := range topic.Sections { + for _, word := range tokenize(section.Title) { + i.addToIndex(word, topic.ID) + } + for _, word := range tokenize(section.Content) { + i.addToIndex(word, topic.ID) + } + } + + // Index tags + for _, tag := range topic.Tags { + for _, word := range tokenize(tag) { + i.addToIndex(word, topic.ID) + } + } +} + +// addToIndex adds a word-to-topic mapping. +func (i *searchIndex) addToIndex(word, topicID string) { + if i.index[word] == nil { + i.index[word] = make(map[string]bool) + } + i.index[word][topicID] = true +} + +// Search finds topics matching the query. +func (i *searchIndex) Search(query string) []*SearchResult { + queryWords := tokenize(query) + if len(queryWords) == 0 { + return nil + } + + // Track scores per topic + scores := make(map[string]float64) + + for _, word := range queryWords { + // Exact matches + if topicIDs, ok := i.index[word]; ok { + for topicID := range topicIDs { + scores[topicID] += 1.0 + } + } + + // Prefix matches (partial word matching) + for indexWord, topicIDs := range i.index { + if strings.HasPrefix(indexWord, word) && indexWord != word { + for topicID := range topicIDs { + scores[topicID] += 0.5 // Lower score for partial matches + } + } + } + } + + // Build results with title boost and snippet extraction + var results []*SearchResult + for topicID, score := range scores { + topic := i.topics[topicID] + if topic == nil { + continue + } + + // Title boost: if query words appear in title + titleLower := strings.ToLower(topic.Title) + for _, word := range queryWords { + if strings.Contains(titleLower, word) { + score += 2.0 // Title matches are worth more + } + } + + // Find matching section and extract snippet + section, snippet := i.findBestMatch(topic, queryWords) + + results = append(results, &SearchResult{ + Topic: topic, + Section: section, + Score: score, + Snippet: snippet, + }) + } + + // Sort by score (highest first) + sort.Slice(results, func(a, b int) bool { + return results[a].Score > results[b].Score + }) + + return results +} + +// findBestMatch finds the section with the best match and extracts a snippet. +func (i *searchIndex) findBestMatch(topic *Topic, queryWords []string) (*Section, string) { + var bestSection *Section + var bestSnippet string + bestScore := 0 + + // Check topic title + titleScore := countMatches(topic.Title, queryWords) + if titleScore > 0 { + bestSnippet = extractSnippet(topic.Content, queryWords) + } + + // Check sections + for idx := range topic.Sections { + section := &topic.Sections[idx] + sectionScore := countMatches(section.Title, queryWords) + contentScore := countMatches(section.Content, queryWords) + totalScore := sectionScore*2 + contentScore // Title matches worth more + + if totalScore > bestScore { + bestScore = totalScore + bestSection = section + if contentScore > 0 { + bestSnippet = extractSnippet(section.Content, queryWords) + } else { + bestSnippet = extractSnippet(section.Content, nil) + } + } + } + + // If no section matched, use topic content + if bestSnippet == "" && topic.Content != "" { + bestSnippet = extractSnippet(topic.Content, queryWords) + } + + return bestSection, bestSnippet +} + +// tokenize splits text into lowercase words for indexing/searching. +func tokenize(text string) []string { + text = strings.ToLower(text) + var words []string + var word strings.Builder + + for _, r := range text { + if unicode.IsLetter(r) || unicode.IsDigit(r) { + word.WriteRune(r) + } else if word.Len() > 0 { + w := word.String() + if len(w) >= 2 { // Skip single-character words + words = append(words, w) + } + word.Reset() + } + } + + // Don't forget the last word + if word.Len() >= 2 { + words = append(words, word.String()) + } + + return words +} + +// countMatches counts how many query words appear in the text. +func countMatches(text string, queryWords []string) int { + textLower := strings.ToLower(text) + count := 0 + for _, word := range queryWords { + if strings.Contains(textLower, word) { + count++ + } + } + return count +} + +// extractSnippet extracts a short snippet around the first match. +func extractSnippet(content string, queryWords []string) string { + if content == "" { + return "" + } + + const snippetLen = 150 + + // If no query words, return start of content + if len(queryWords) == 0 { + lines := strings.Split(content, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if line != "" && !strings.HasPrefix(line, "#") { + if len(line) > snippetLen { + return line[:snippetLen] + "..." + } + return line + } + } + return "" + } + + // Find first match position + contentLower := strings.ToLower(content) + matchPos := -1 + for _, word := range queryWords { + pos := strings.Index(contentLower, word) + if pos != -1 && (matchPos == -1 || pos < matchPos) { + matchPos = pos + } + } + + if matchPos == -1 { + // No match found, return start of content + if len(content) > snippetLen { + return content[:snippetLen] + "..." + } + return content + } + + // Extract snippet around match + start := matchPos - 50 + if start < 0 { + start = 0 + } + + end := start + snippetLen + if end > len(content) { + end = len(content) + } + + snippet := content[start:end] + + // Trim to word boundaries + if start > 0 { + if idx := strings.Index(snippet, " "); idx != -1 { + snippet = "..." + snippet[idx+1:] + } + } + if end < len(content) { + if idx := strings.LastIndex(snippet, " "); idx != -1 { + snippet = snippet[:idx] + "..." + } + } + + return strings.TrimSpace(snippet) +} diff --git a/pkg/help/search_test.go b/pkg/help/search_test.go new file mode 100644 index 00000000..bbe35cd6 --- /dev/null +++ b/pkg/help/search_test.go @@ -0,0 +1,265 @@ +package help + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTokenize_Good(t *testing.T) { + tests := []struct { + name string + input string + expected []string + }{ + { + name: "simple words", + input: "hello world", + expected: []string{"hello", "world"}, + }, + { + name: "mixed case", + input: "Hello World", + expected: []string{"hello", "world"}, + }, + { + name: "with punctuation", + input: "Hello, world! How are you?", + expected: []string{"hello", "world", "how", "are", "you"}, + }, + { + name: "single characters filtered", + input: "a b c hello d", + expected: []string{"hello"}, + }, + { + name: "numbers included", + input: "version 2 release", + expected: []string{"version", "release"}, + }, + { + name: "alphanumeric", + input: "v2.0 and config123", + expected: []string{"v2", "and", "config123"}, + }, + { + name: "empty string", + input: "", + expected: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tokenize(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestSearchIndex_Add_Good(t *testing.T) { + idx := newSearchIndex() + + topic := &Topic{ + ID: "getting-started", + Title: "Getting Started", + Content: "Welcome to the guide.", + Tags: []string{"intro", "setup"}, + Sections: []Section{ + {ID: "installation", Title: "Installation", Content: "Install the CLI."}, + }, + } + + idx.Add(topic) + + // Verify topic is stored + assert.NotNil(t, idx.topics["getting-started"]) + + // Verify words are indexed + assert.Contains(t, idx.index["getting"], "getting-started") + assert.Contains(t, idx.index["started"], "getting-started") + assert.Contains(t, idx.index["welcome"], "getting-started") + assert.Contains(t, idx.index["guide"], "getting-started") + assert.Contains(t, idx.index["intro"], "getting-started") + assert.Contains(t, idx.index["setup"], "getting-started") + assert.Contains(t, idx.index["installation"], "getting-started") + assert.Contains(t, idx.index["cli"], "getting-started") +} + +func TestSearchIndex_Search_Good(t *testing.T) { + idx := newSearchIndex() + + // Add test topics + idx.Add(&Topic{ + ID: "getting-started", + Title: "Getting Started", + Content: "Welcome to the CLI guide. This covers installation and setup.", + Tags: []string{"intro"}, + }) + + idx.Add(&Topic{ + ID: "configuration", + Title: "Configuration", + Content: "Configure the CLI using environment variables.", + }) + + idx.Add(&Topic{ + ID: "commands", + Title: "Commands Reference", + Content: "List of all available commands.", + }) + + t.Run("single word query", func(t *testing.T) { + results := idx.Search("configuration") + assert.NotEmpty(t, results) + assert.Equal(t, "configuration", results[0].Topic.ID) + }) + + t.Run("multi-word query", func(t *testing.T) { + results := idx.Search("cli guide") + assert.NotEmpty(t, results) + // Should match getting-started (has both "cli" and "guide") + assert.Equal(t, "getting-started", results[0].Topic.ID) + }) + + t.Run("title boost", func(t *testing.T) { + results := idx.Search("commands") + assert.NotEmpty(t, results) + // "commands" appears in title of commands topic + assert.Equal(t, "commands", results[0].Topic.ID) + }) + + t.Run("partial word matching", func(t *testing.T) { + results := idx.Search("config") + assert.NotEmpty(t, results) + // Should match "configuration" and "configure" + foundConfig := false + for _, r := range results { + if r.Topic.ID == "configuration" { + foundConfig = true + break + } + } + assert.True(t, foundConfig, "Should find configuration topic with prefix match") + }) + + t.Run("no results", func(t *testing.T) { + results := idx.Search("nonexistent") + assert.Empty(t, results) + }) + + t.Run("empty query", func(t *testing.T) { + results := idx.Search("") + assert.Nil(t, results) + }) +} + +func TestSearchIndex_Search_Good_WithSections(t *testing.T) { + idx := newSearchIndex() + + idx.Add(&Topic{ + ID: "installation", + Title: "Installation Guide", + Content: "Overview of installation process.", + Sections: []Section{ + { + ID: "linux", + Title: "Linux Installation", + Content: "Run apt-get install core on Debian.", + }, + { + ID: "macos", + Title: "macOS Installation", + Content: "Use brew install core on macOS.", + }, + { + ID: "windows", + Title: "Windows Installation", + Content: "Download the installer from the website.", + }, + }, + }) + + t.Run("matches section content", func(t *testing.T) { + results := idx.Search("debian") + assert.NotEmpty(t, results) + assert.Equal(t, "installation", results[0].Topic.ID) + // Should identify the Linux section as best match + if results[0].Section != nil { + assert.Equal(t, "linux", results[0].Section.ID) + } + }) + + t.Run("matches section title", func(t *testing.T) { + results := idx.Search("windows") + assert.NotEmpty(t, results) + assert.Equal(t, "installation", results[0].Topic.ID) + }) +} + +func TestExtractSnippet_Good(t *testing.T) { + content := `This is the first paragraph with some introduction text. + +Here is more content that talks about installation and setup. +The installation process is straightforward. + +Finally, some closing remarks about the configuration.` + + t.Run("finds match and extracts context", func(t *testing.T) { + snippet := extractSnippet(content, []string{"installation"}) + assert.Contains(t, snippet, "installation") + assert.True(t, len(snippet) <= 200, "Snippet should be reasonably short") + }) + + t.Run("no query words returns start", func(t *testing.T) { + snippet := extractSnippet(content, nil) + assert.Contains(t, snippet, "first paragraph") + }) + + t.Run("empty content", func(t *testing.T) { + snippet := extractSnippet("", []string{"test"}) + assert.Empty(t, snippet) + }) +} + +func TestCountMatches_Good(t *testing.T) { + tests := []struct { + text string + words []string + expected int + }{ + {"Hello world", []string{"hello"}, 1}, + {"Hello world", []string{"hello", "world"}, 2}, + {"Hello world", []string{"foo", "bar"}, 0}, + {"The quick brown fox", []string{"quick", "fox", "dog"}, 2}, + } + + for _, tt := range tests { + result := countMatches(tt.text, tt.words) + assert.Equal(t, tt.expected, result) + } +} + +func TestSearchResult_Score_Good(t *testing.T) { + idx := newSearchIndex() + + // Topic with query word in title should score higher + idx.Add(&Topic{ + ID: "topic-in-title", + Title: "Installation Guide", + Content: "Some content here.", + }) + + idx.Add(&Topic{ + ID: "topic-in-content", + Title: "Some Other Topic", + Content: "This covers installation steps.", + }) + + results := idx.Search("installation") + assert.Len(t, results, 2) + + // Title match should score higher + assert.Equal(t, "topic-in-title", results[0].Topic.ID) + assert.Greater(t, results[0].Score, results[1].Score) +} From 8c550d2360ae94db9a6663bd00dc5d37229dede1 Mon Sep 17 00:00:00 2001 From: Snider Date: Sun, 1 Feb 2026 23:33:22 +0000 Subject: [PATCH 4/4] fix(help): address CodeRabbit review feedback - Add CRLF line ending support to frontmatter regex - Add empty frontmatter block support - Use filepath.Base/Ext for cross-platform path handling - Add tests for CRLF and empty frontmatter cases Co-Authored-By: Claude Opus 4.5 --- pkg/help/parser.go | 13 +++++++------ pkg/help/parser_test.go | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/pkg/help/parser.go b/pkg/help/parser.go index 516afee2..a92b490c 100644 --- a/pkg/help/parser.go +++ b/pkg/help/parser.go @@ -1,6 +1,7 @@ package help import ( + "path/filepath" "regexp" "strings" "unicode" @@ -10,7 +11,8 @@ import ( var ( // frontmatterRegex matches YAML frontmatter delimited by --- - frontmatterRegex = regexp.MustCompile(`(?s)^---\n(.+?)\n---\n?`) + // Supports both LF and CRLF line endings, and empty frontmatter blocks + frontmatterRegex = regexp.MustCompile(`(?s)^---\r?\n(.*?)(?:\r?\n)?---\r?\n?`) // headingRegex matches markdown headings (# to ######) headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`) @@ -148,13 +150,12 @@ func GenerateID(title string) string { // pathToTitle converts a file path to a title. // "getting-started.md" -> "Getting Started" func pathToTitle(path string) string { - // Get filename without directory - parts := strings.Split(path, "/") - filename := parts[len(parts)-1] + // Get filename without directory (cross-platform) + filename := filepath.Base(path) // Remove extension - if idx := strings.LastIndex(filename, "."); idx != -1 { - filename = filename[:idx] + if ext := filepath.Ext(filename); ext != "" { + filename = strings.TrimSuffix(filename, ext) } // Replace hyphens/underscores with spaces diff --git a/pkg/help/parser_test.go b/pkg/help/parser_test.go index a9ea1c2c..b95cadc8 100644 --- a/pkg/help/parser_test.go +++ b/pkg/help/parser_test.go @@ -105,6 +105,29 @@ Some content here. assert.Equal(t, content, body) } +func TestExtractFrontmatter_Good_CRLF(t *testing.T) { + // Content with CRLF line endings (Windows-style) + content := "---\r\ntitle: CRLF Test\r\n---\r\n\r\n# Content" + + fm, body := ExtractFrontmatter(content) + + assert.NotNil(t, fm) + assert.Equal(t, "CRLF Test", fm.Title) + assert.Contains(t, body, "# Content") +} + +func TestExtractFrontmatter_Good_Empty(t *testing.T) { + // Empty frontmatter block + content := "---\n---\n# Content" + + fm, body := ExtractFrontmatter(content) + + // Empty frontmatter should parse successfully + assert.NotNil(t, fm) + assert.Equal(t, "", fm.Title) + assert.Contains(t, body, "# Content") +} + func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) { content := `--- title: [invalid yaml