feat: extract go-help from core/go pkg/help

YAML-based help catalog with topic search.
Single external dependency: gopkg.in/yaml.v3
Module: forge.lthn.ai/core/go-help

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Snider 2026-02-19 16:09:34 +00:00
commit ad5e70937b
9 changed files with 1412 additions and 0 deletions

25
CLAUDE.md Normal file
View file

@ -0,0 +1,25 @@
# CLAUDE.md
## What This Is
YAML-based help catalog with topic search and rendering. Module: `forge.lthn.ai/core/go-help`
## Commands
```bash
go test ./... # Run all tests
go test -v -run Name # Run single test
```
## Architecture
- `Topic` defines help entries with title, body, tags, related topics
- `Catalog` loads topics from YAML files
- `Search` provides fuzzy search across topics by title, tags, body
## Coding Standards
- UK English
- `go test ./...` must pass before commit
- Conventional commits: `type(scope): description`
- Co-Author: `Co-Authored-By: Virgil <virgil@lethean.io>`

87
catalog.go Normal file
View file

@ -0,0 +1,87 @@
package help
import (
"fmt"
)
// Catalog manages help topics.
type Catalog struct {
topics map[string]*Topic
index *searchIndex
}
// DefaultCatalog returns a catalog with built-in topics.
func DefaultCatalog() *Catalog {
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
// Add default topics
c.Add(&Topic{
ID: "getting-started",
Title: "Getting Started",
Content: `# Getting Started
Welcome to Core! This CLI tool helps you manage development workflows.
## Common Commands
- core dev: Development workflows
- core setup: Setup repository
- core doctor: Check environment health
- core test: Run tests
## Next Steps
Run 'core help <topic>' to learn more about a specific topic.
`,
})
c.Add(&Topic{
ID: "config",
Title: "Configuration",
Content: `# Configuration
Core is configured via environment variables and config files.
## Environment Variables
- CORE_DEBUG: Enable debug logging
- GITHUB_TOKEN: GitHub API token
## Config Files
Config is stored in ~/.core/config.yaml
`,
})
return c
}
// Add adds a topic to the catalog.
func (c *Catalog) Add(t *Topic) {
c.topics[t.ID] = t
c.index.Add(t)
}
// List returns all topics.
func (c *Catalog) List() []*Topic {
var list []*Topic
for _, t := range c.topics {
list = append(list, t)
}
return list
}
// Search searches for topics.
func (c *Catalog) Search(query string) []*SearchResult {
return c.index.Search(query)
}
// Get returns a topic by ID.
func (c *Catalog) Get(id string) (*Topic, error) {
t, ok := c.topics[id]
if !ok {
return nil, fmt.Errorf("topic not found: %s", id)
}
return t, nil
}

13
go.mod Normal file
View file

@ -0,0 +1,13 @@
module forge.lthn.ai/core/go-help
go 1.25.5
require (
github.com/stretchr/testify v1.11.1
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
)

10
go.sum Normal file
View file

@ -0,0 +1,10 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

174
parser.go Normal file
View file

@ -0,0 +1,174 @@
package help
import (
"path/filepath"
"regexp"
"strings"
"unicode"
"gopkg.in/yaml.v3"
)
var (
// frontmatterRegex matches YAML frontmatter delimited by ---
// Supports both LF and CRLF line endings, and empty frontmatter blocks
frontmatterRegex = regexp.MustCompile(`(?s)^---\r?\n(.*?)(?:\r?\n)?---\r?\n?`)
// headingRegex matches markdown headings (# to ######)
headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`)
)
// ParseTopic parses a markdown file into a Topic.
func ParseTopic(path string, content []byte) (*Topic, error) {
contentStr := string(content)
topic := &Topic{
Path: path,
ID: GenerateID(pathToTitle(path)),
Sections: []Section{},
Tags: []string{},
Related: []string{},
}
// Extract YAML frontmatter if present
fm, body := ExtractFrontmatter(contentStr)
if fm != nil {
topic.Title = fm.Title
topic.Tags = fm.Tags
topic.Related = fm.Related
topic.Order = fm.Order
if topic.Title != "" {
topic.ID = GenerateID(topic.Title)
}
}
topic.Content = body
// Extract sections from headings
topic.Sections = ExtractSections(body)
// If no title from frontmatter, try first H1
if topic.Title == "" && len(topic.Sections) > 0 {
for _, s := range topic.Sections {
if s.Level == 1 {
topic.Title = s.Title
topic.ID = GenerateID(s.Title)
break
}
}
}
return topic, nil
}
// ExtractFrontmatter extracts YAML frontmatter from markdown content.
// Returns the parsed frontmatter and the remaining content.
func ExtractFrontmatter(content string) (*Frontmatter, string) {
match := frontmatterRegex.FindStringSubmatch(content)
if match == nil {
return nil, content
}
var fm Frontmatter
if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil {
// Invalid YAML, return content as-is
return nil, content
}
// Return content without frontmatter
body := content[len(match[0]):]
return &fm, body
}
// ExtractSections parses markdown and returns sections.
func ExtractSections(content string) []Section {
lines := strings.Split(content, "\n")
sections := []Section{}
var currentSection *Section
var contentLines []string
for i, line := range lines {
lineNum := i + 1 // 1-indexed
match := headingRegex.FindStringSubmatch(line)
if match != nil {
// Save previous section's content
if currentSection != nil {
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
}
// Start new section
level := len(match[1])
title := strings.TrimSpace(match[2])
section := Section{
ID: GenerateID(title),
Title: title,
Level: level,
Line: lineNum,
}
sections = append(sections, section)
currentSection = &sections[len(sections)-1]
contentLines = []string{}
} else if currentSection != nil {
contentLines = append(contentLines, line)
}
}
// Save last section's content
if currentSection != nil {
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
}
return sections
}
// GenerateID creates a URL-safe ID from a title.
// "Getting Started" -> "getting-started"
func GenerateID(title string) string {
var result strings.Builder
for _, r := range strings.ToLower(title) {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
result.WriteRune(r)
} else if unicode.IsSpace(r) || r == '-' || r == '_' {
// Only add hyphen if last char isn't already a hyphen
str := result.String()
if len(str) > 0 && str[len(str)-1] != '-' {
result.WriteRune('-')
}
}
// Skip other characters
}
// Trim trailing hyphens
str := result.String()
return strings.Trim(str, "-")
}
// pathToTitle converts a file path to a title.
// "getting-started.md" -> "Getting Started"
func pathToTitle(path string) string {
// Get filename without directory (cross-platform)
filename := filepath.Base(path)
// Remove extension
if ext := filepath.Ext(filename); ext != "" {
filename = strings.TrimSuffix(filename, ext)
}
// Replace hyphens/underscores with spaces
filename = strings.ReplaceAll(filename, "-", " ")
filename = strings.ReplaceAll(filename, "_", " ")
// Title case
words := strings.Fields(filename)
for i, word := range words {
if len(word) > 0 {
words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:])
}
}
return strings.Join(words, " ")
}

339
parser_test.go Normal file
View file

@ -0,0 +1,339 @@
package help
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestGenerateID_Good(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "simple title",
input: "Getting Started",
expected: "getting-started",
},
{
name: "already lowercase",
input: "installation",
expected: "installation",
},
{
name: "multiple spaces",
input: "Quick Start Guide",
expected: "quick-start-guide",
},
{
name: "with numbers",
input: "Chapter 1 Introduction",
expected: "chapter-1-introduction",
},
{
name: "special characters",
input: "What's New? (v2.0)",
expected: "whats-new-v20",
},
{
name: "underscores",
input: "config_file_reference",
expected: "config-file-reference",
},
{
name: "hyphens preserved",
input: "pre-commit hooks",
expected: "pre-commit-hooks",
},
{
name: "leading trailing spaces",
input: " Trimmed Title ",
expected: "trimmed-title",
},
{
name: "unicode letters",
input: "Configuración Básica",
expected: "configuración-básica",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := GenerateID(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestExtractFrontmatter_Good(t *testing.T) {
content := `---
title: Getting Started
tags: [intro, setup]
order: 1
related:
- installation
- configuration
---
# Welcome
This is the content.
`
fm, body := ExtractFrontmatter(content)
assert.NotNil(t, fm)
assert.Equal(t, "Getting Started", fm.Title)
assert.Equal(t, []string{"intro", "setup"}, fm.Tags)
assert.Equal(t, 1, fm.Order)
assert.Equal(t, []string{"installation", "configuration"}, fm.Related)
assert.Contains(t, body, "# Welcome")
assert.Contains(t, body, "This is the content.")
}
func TestExtractFrontmatter_Good_NoFrontmatter(t *testing.T) {
content := `# Just a Heading
Some content here.
`
fm, body := ExtractFrontmatter(content)
assert.Nil(t, fm)
assert.Equal(t, content, body)
}
func TestExtractFrontmatter_Good_CRLF(t *testing.T) {
// Content with CRLF line endings (Windows-style)
content := "---\r\ntitle: CRLF Test\r\n---\r\n\r\n# Content"
fm, body := ExtractFrontmatter(content)
assert.NotNil(t, fm)
assert.Equal(t, "CRLF Test", fm.Title)
assert.Contains(t, body, "# Content")
}
func TestExtractFrontmatter_Good_Empty(t *testing.T) {
// Empty frontmatter block
content := "---\n---\n# Content"
fm, body := ExtractFrontmatter(content)
// Empty frontmatter should parse successfully
assert.NotNil(t, fm)
assert.Equal(t, "", fm.Title)
assert.Contains(t, body, "# Content")
}
func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) {
content := `---
title: [invalid yaml
---
# Content
`
fm, body := ExtractFrontmatter(content)
// Invalid YAML should return nil frontmatter and original content
assert.Nil(t, fm)
assert.Equal(t, content, body)
}
func TestExtractSections_Good(t *testing.T) {
content := `# Main Title
Introduction paragraph.
## Installation
Install instructions here.
More details.
### Prerequisites
You need these things.
## Configuration
Config info here.
`
sections := ExtractSections(content)
assert.Len(t, sections, 4)
// Main Title (H1)
assert.Equal(t, "main-title", sections[0].ID)
assert.Equal(t, "Main Title", sections[0].Title)
assert.Equal(t, 1, sections[0].Level)
assert.Equal(t, 1, sections[0].Line)
assert.Contains(t, sections[0].Content, "Introduction paragraph.")
// Installation (H2)
assert.Equal(t, "installation", sections[1].ID)
assert.Equal(t, "Installation", sections[1].Title)
assert.Equal(t, 2, sections[1].Level)
assert.Contains(t, sections[1].Content, "Install instructions here.")
assert.Contains(t, sections[1].Content, "More details.")
// Prerequisites (H3)
assert.Equal(t, "prerequisites", sections[2].ID)
assert.Equal(t, "Prerequisites", sections[2].Title)
assert.Equal(t, 3, sections[2].Level)
assert.Contains(t, sections[2].Content, "You need these things.")
// Configuration (H2)
assert.Equal(t, "configuration", sections[3].ID)
assert.Equal(t, "Configuration", sections[3].Title)
assert.Equal(t, 2, sections[3].Level)
}
func TestExtractSections_Good_AllHeadingLevels(t *testing.T) {
content := `# H1
## H2
### H3
#### H4
##### H5
###### H6
`
sections := ExtractSections(content)
assert.Len(t, sections, 6)
for i, level := range []int{1, 2, 3, 4, 5, 6} {
assert.Equal(t, level, sections[i].Level)
}
}
func TestExtractSections_Good_Empty(t *testing.T) {
content := `Just plain text.
No headings here.
`
sections := ExtractSections(content)
assert.Empty(t, sections)
}
func TestParseTopic_Good(t *testing.T) {
content := []byte(`---
title: Quick Start Guide
tags: [intro, quickstart]
order: 5
related:
- installation
---
# Quick Start Guide
Welcome to the guide.
## First Steps
Do this first.
## Next Steps
Then do this.
`)
topic, err := ParseTopic("docs/quick-start.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
// Check metadata from frontmatter
assert.Equal(t, "quick-start-guide", topic.ID)
assert.Equal(t, "Quick Start Guide", topic.Title)
assert.Equal(t, "docs/quick-start.md", topic.Path)
assert.Equal(t, []string{"intro", "quickstart"}, topic.Tags)
assert.Equal(t, []string{"installation"}, topic.Related)
assert.Equal(t, 5, topic.Order)
// Check sections
assert.Len(t, topic.Sections, 3)
assert.Equal(t, "quick-start-guide", topic.Sections[0].ID)
assert.Equal(t, "first-steps", topic.Sections[1].ID)
assert.Equal(t, "next-steps", topic.Sections[2].ID)
// Content should not include frontmatter
assert.NotContains(t, topic.Content, "---")
assert.Contains(t, topic.Content, "# Quick Start Guide")
}
func TestParseTopic_Good_NoFrontmatter(t *testing.T) {
content := []byte(`# Getting Started
This is a simple doc.
## Installation
Install it here.
`)
topic, err := ParseTopic("getting-started.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
// Title should come from first H1
assert.Equal(t, "Getting Started", topic.Title)
assert.Equal(t, "getting-started", topic.ID)
// Sections extracted
assert.Len(t, topic.Sections, 2)
}
func TestParseTopic_Good_NoHeadings(t *testing.T) {
content := []byte(`---
title: Plain Content
---
Just some text without any headings.
`)
topic, err := ParseTopic("plain.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
assert.Equal(t, "Plain Content", topic.Title)
assert.Equal(t, "plain-content", topic.ID)
assert.Empty(t, topic.Sections)
}
func TestParseTopic_Good_IDFromPath(t *testing.T) {
content := []byte(`Just content, no frontmatter or headings.`)
topic, err := ParseTopic("commands/dev-workflow.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
// ID and title should be derived from path
assert.Equal(t, "dev-workflow", topic.ID)
assert.Equal(t, "", topic.Title) // No title available
}
func TestPathToTitle_Good(t *testing.T) {
tests := []struct {
path string
expected string
}{
{"getting-started.md", "Getting Started"},
{"commands/dev.md", "Dev"},
{"path/to/file_name.md", "File Name"},
{"UPPERCASE.md", "Uppercase"},
{"no-extension", "No Extension"},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
result := pathToTitle(tt.path)
assert.Equal(t, tt.expected, result)
})
}
}

393
search.go Normal file
View file

@ -0,0 +1,393 @@
package help
import (
"regexp"
"sort"
"strings"
"unicode"
)
// SearchResult represents a search match.
type SearchResult struct {
Topic *Topic
Section *Section // nil if topic-level match
Score float64
Snippet string // Context around match
}
// searchIndex provides full-text search.
type searchIndex struct {
topics map[string]*Topic // topicID -> Topic
index map[string][]string // word -> []topicID
}
// newSearchIndex creates a new empty search index.
func newSearchIndex() *searchIndex {
return &searchIndex{
topics: make(map[string]*Topic),
index: make(map[string][]string),
}
}
// Add indexes a topic for searching.
func (i *searchIndex) Add(topic *Topic) {
i.topics[topic.ID] = topic
// Index title words with boost
for _, word := range tokenize(topic.Title) {
i.addToIndex(word, topic.ID)
}
// Index content words
for _, word := range tokenize(topic.Content) {
i.addToIndex(word, topic.ID)
}
// Index section titles and content
for _, section := range topic.Sections {
for _, word := range tokenize(section.Title) {
i.addToIndex(word, topic.ID)
}
for _, word := range tokenize(section.Content) {
i.addToIndex(word, topic.ID)
}
}
// Index tags
for _, tag := range topic.Tags {
for _, word := range tokenize(tag) {
i.addToIndex(word, topic.ID)
}
}
}
// addToIndex adds a word-to-topic mapping.
func (i *searchIndex) addToIndex(word, topicID string) {
// Avoid duplicates
for _, id := range i.index[word] {
if id == topicID {
return
}
}
i.index[word] = append(i.index[word], topicID)
}
// Search finds topics matching the query.
func (i *searchIndex) Search(query string) []*SearchResult {
queryWords := tokenize(query)
if len(queryWords) == 0 {
return nil
}
// Track scores per topic
scores := make(map[string]float64)
for _, word := range queryWords {
// Exact matches
if topicIDs, ok := i.index[word]; ok {
for _, topicID := range topicIDs {
scores[topicID] += 1.0
}
}
// Prefix matches (partial word matching)
for indexWord, topicIDs := range i.index {
if strings.HasPrefix(indexWord, word) && indexWord != word {
for _, topicID := range topicIDs {
scores[topicID] += 0.5 // Lower score for partial matches
}
}
}
}
// Pre-compile regexes for snippets
var res []*regexp.Regexp
for _, word := range queryWords {
if len(word) >= 2 {
if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(word)); err == nil {
res = append(res, re)
}
}
}
// Build results with title boost and snippet extraction
var results []*SearchResult
for topicID, score := range scores {
topic := i.topics[topicID]
if topic == nil {
continue
}
// Title boost: if query words appear in title
titleLower := strings.ToLower(topic.Title)
hasTitleMatch := false
for _, word := range queryWords {
if strings.Contains(titleLower, word) {
hasTitleMatch = true
break
}
}
if hasTitleMatch {
score += 10.0
}
// Find matching section and extract snippet
section, snippet := i.findBestMatch(topic, queryWords, res)
// Section title boost
if section != nil {
sectionTitleLower := strings.ToLower(section.Title)
hasSectionTitleMatch := false
for _, word := range queryWords {
if strings.Contains(sectionTitleLower, word) {
hasSectionTitleMatch = true
break
}
}
if hasSectionTitleMatch {
score += 5.0
}
}
results = append(results, &SearchResult{
Topic: topic,
Section: section,
Score: score,
Snippet: snippet,
})
}
// Sort by score (highest first)
sort.Slice(results, func(a, b int) bool {
if results[a].Score != results[b].Score {
return results[a].Score > results[b].Score
}
return results[a].Topic.Title < results[b].Topic.Title
})
return results
}
// findBestMatch finds the section with the best match and extracts a snippet.
func (i *searchIndex) findBestMatch(topic *Topic, queryWords []string, res []*regexp.Regexp) (*Section, string) {
var bestSection *Section
var bestSnippet string
bestScore := 0
// Check topic title
titleScore := countMatches(topic.Title, queryWords)
if titleScore > 0 {
bestSnippet = extractSnippet(topic.Content, res)
}
// Check sections
for idx := range topic.Sections {
section := &topic.Sections[idx]
sectionScore := countMatches(section.Title, queryWords)
contentScore := countMatches(section.Content, queryWords)
totalScore := sectionScore*2 + contentScore // Title matches worth more
if totalScore > bestScore {
bestScore = totalScore
bestSection = section
if contentScore > 0 {
bestSnippet = extractSnippet(section.Content, res)
} else {
bestSnippet = extractSnippet(section.Content, nil)
}
}
}
// If no section matched, use topic content
if bestSnippet == "" && topic.Content != "" {
bestSnippet = extractSnippet(topic.Content, res)
}
return bestSection, bestSnippet
}
// tokenize splits text into lowercase words for indexing/searching.
func tokenize(text string) []string {
text = strings.ToLower(text)
var words []string
var word strings.Builder
for _, r := range text {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
word.WriteRune(r)
} else if word.Len() > 0 {
w := word.String()
if len(w) >= 2 { // Skip single-character words
words = append(words, w)
}
word.Reset()
}
}
// Don't forget the last word
if word.Len() >= 2 {
words = append(words, word.String())
}
return words
}
// countMatches counts how many query words appear in the text.
func countMatches(text string, queryWords []string) int {
textLower := strings.ToLower(text)
count := 0
for _, word := range queryWords {
if strings.Contains(textLower, word) {
count++
}
}
return count
}
// extractSnippet extracts a short snippet around the first match and highlights matches.
func extractSnippet(content string, res []*regexp.Regexp) string {
if content == "" {
return ""
}
const snippetLen = 150
// If no regexes, return start of content without highlighting
if len(res) == 0 {
lines := strings.Split(content, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line != "" && !strings.HasPrefix(line, "#") {
runes := []rune(line)
if len(runes) > snippetLen {
return string(runes[:snippetLen]) + "..."
}
return line
}
}
return ""
}
// Find first match position (byte-based)
matchPos := -1
for _, re := range res {
loc := re.FindStringIndex(content)
if loc != nil && (matchPos == -1 || loc[0] < matchPos) {
matchPos = loc[0]
}
}
// Convert to runes for safe slicing
runes := []rune(content)
runeLen := len(runes)
var start, end int
if matchPos == -1 {
// No match found, use start of content
start = 0
end = snippetLen
if end > runeLen {
end = runeLen
}
} else {
// Convert byte position to rune position
matchRunePos := len([]rune(content[:matchPos]))
// Extract snippet around match (rune-based)
start = matchRunePos - 50
if start < 0 {
start = 0
}
end = start + snippetLen
if end > runeLen {
end = runeLen
}
}
snippet := string(runes[start:end])
// Trim to word boundaries
prefix := ""
suffix := ""
if start > 0 {
if idx := strings.Index(snippet, " "); idx != -1 {
snippet = snippet[idx+1:]
prefix = "..."
}
}
if end < runeLen {
if idx := strings.LastIndex(snippet, " "); idx != -1 {
snippet = snippet[:idx]
suffix = "..."
}
}
snippet = strings.TrimSpace(snippet)
if snippet == "" {
return ""
}
// Apply highlighting
highlighted := highlight(snippet, res)
return prefix + highlighted + suffix
}
// highlight wraps matches in **bold**.
func highlight(text string, res []*regexp.Regexp) string {
if len(res) == 0 {
return text
}
type match struct {
start, end int
}
var matches []match
for _, re := range res {
indices := re.FindAllStringIndex(text, -1)
for _, idx := range indices {
matches = append(matches, match{idx[0], idx[1]})
}
}
if len(matches) == 0 {
return text
}
// Sort matches by start position
sort.Slice(matches, func(i, j int) bool {
if matches[i].start != matches[j].start {
return matches[i].start < matches[j].start
}
return matches[i].end > matches[j].end
})
// Merge overlapping or adjacent matches
var merged []match
if len(matches) > 0 {
curr := matches[0]
for i := 1; i < len(matches); i++ {
if matches[i].start <= curr.end {
if matches[i].end > curr.end {
curr.end = matches[i].end
}
} else {
merged = append(merged, curr)
curr = matches[i]
}
}
merged = append(merged, curr)
}
// Build highlighted string from back to front to avoid position shifts
result := text
for i := len(merged) - 1; i >= 0; i-- {
m := merged[i]
result = result[:m.end] + "**" + result[m.end:]
result = result[:m.start] + "**" + result[m.start:]
}
return result
}

340
search_test.go Normal file
View file

@ -0,0 +1,340 @@
package help
import (
"regexp"
"strings"
"testing"
"unicode/utf8"
"github.com/stretchr/testify/assert"
)
func TestTokenize_Good(t *testing.T) {
tests := []struct {
name string
input string
expected []string
}{
{
name: "simple words",
input: "hello world",
expected: []string{"hello", "world"},
},
{
name: "mixed case",
input: "Hello World",
expected: []string{"hello", "world"},
},
{
name: "with punctuation",
input: "Hello, world! How are you?",
expected: []string{"hello", "world", "how", "are", "you"},
},
{
name: "single characters filtered",
input: "a b c hello d",
expected: []string{"hello"},
},
{
name: "numbers included",
input: "version 2 release",
expected: []string{"version", "release"},
},
{
name: "alphanumeric",
input: "v2.0 and config123",
expected: []string{"v2", "and", "config123"},
},
{
name: "empty string",
input: "",
expected: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := tokenize(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestSearchIndex_Add_Good(t *testing.T) {
idx := newSearchIndex()
topic := &Topic{
ID: "getting-started",
Title: "Getting Started",
Content: "Welcome to the guide.",
Tags: []string{"intro", "setup"},
Sections: []Section{
{ID: "installation", Title: "Installation", Content: "Install the CLI."},
},
}
idx.Add(topic)
// Verify topic is stored
assert.NotNil(t, idx.topics["getting-started"])
// Verify words are indexed
assert.Contains(t, idx.index["getting"], "getting-started")
assert.Contains(t, idx.index["started"], "getting-started")
assert.Contains(t, idx.index["welcome"], "getting-started")
assert.Contains(t, idx.index["guide"], "getting-started")
assert.Contains(t, idx.index["intro"], "getting-started")
assert.Contains(t, idx.index["setup"], "getting-started")
assert.Contains(t, idx.index["installation"], "getting-started")
assert.Contains(t, idx.index["cli"], "getting-started")
}
func TestSearchIndex_Search_Good(t *testing.T) {
idx := newSearchIndex()
// Add test topics
idx.Add(&Topic{
ID: "getting-started",
Title: "Getting Started",
Content: "Welcome to the CLI guide. This covers installation and setup.",
Tags: []string{"intro"},
})
idx.Add(&Topic{
ID: "configuration",
Title: "Configuration",
Content: "Configure the CLI using environment variables.",
})
idx.Add(&Topic{
ID: "commands",
Title: "Commands Reference",
Content: "List of all available commands.",
})
t.Run("single word query", func(t *testing.T) {
results := idx.Search("configuration")
assert.NotEmpty(t, results)
assert.Equal(t, "configuration", results[0].Topic.ID)
})
t.Run("multi-word query", func(t *testing.T) {
results := idx.Search("cli guide")
assert.NotEmpty(t, results)
// Should match getting-started (has both "cli" and "guide")
assert.Equal(t, "getting-started", results[0].Topic.ID)
})
t.Run("title boost", func(t *testing.T) {
results := idx.Search("commands")
assert.NotEmpty(t, results)
// "commands" appears in title of commands topic
assert.Equal(t, "commands", results[0].Topic.ID)
})
t.Run("partial word matching", func(t *testing.T) {
results := idx.Search("config")
assert.NotEmpty(t, results)
// Should match "configuration" and "configure"
foundConfig := false
for _, r := range results {
if r.Topic.ID == "configuration" {
foundConfig = true
break
}
}
assert.True(t, foundConfig, "Should find configuration topic with prefix match")
})
t.Run("no results", func(t *testing.T) {
results := idx.Search("nonexistent")
assert.Empty(t, results)
})
t.Run("empty query", func(t *testing.T) {
results := idx.Search("")
assert.Nil(t, results)
})
}
func TestSearchIndex_Search_Good_WithSections(t *testing.T) {
idx := newSearchIndex()
idx.Add(&Topic{
ID: "installation",
Title: "Installation Guide",
Content: "Overview of installation process.",
Sections: []Section{
{
ID: "linux",
Title: "Linux Installation",
Content: "Run apt-get install core on Debian.",
},
{
ID: "macos",
Title: "macOS Installation",
Content: "Use brew install core on macOS.",
},
{
ID: "windows",
Title: "Windows Installation",
Content: "Download the installer from the website.",
},
},
})
t.Run("matches section content", func(t *testing.T) {
results := idx.Search("debian")
assert.NotEmpty(t, results)
assert.Equal(t, "installation", results[0].Topic.ID)
// Should identify the Linux section as best match
if results[0].Section != nil {
assert.Equal(t, "linux", results[0].Section.ID)
}
})
t.Run("matches section title", func(t *testing.T) {
results := idx.Search("windows")
assert.NotEmpty(t, results)
assert.Equal(t, "installation", results[0].Topic.ID)
})
}
func TestExtractSnippet_Good(t *testing.T) {
content := `This is the first paragraph with some introduction text.
Here is more content that talks about installation and setup.
The installation process is straightforward.
Finally, some closing remarks about the configuration.`
t.Run("finds match and extracts context", func(t *testing.T) {
snippet := extractSnippet(content, compileRegexes([]string{"installation"}))
assert.Contains(t, snippet, "**installation**")
assert.True(t, len(snippet) <= 250, "Snippet should be reasonably short")
})
t.Run("no query words returns start", func(t *testing.T) {
snippet := extractSnippet(content, nil)
assert.Contains(t, snippet, "first paragraph")
})
t.Run("empty content", func(t *testing.T) {
snippet := extractSnippet("", compileRegexes([]string{"test"}))
assert.Empty(t, snippet)
})
}
func TestExtractSnippet_Highlighting(t *testing.T) {
content := "The quick brown fox jumps over the lazy dog."
t.Run("simple highlighting", func(t *testing.T) {
snippet := extractSnippet(content, compileRegexes([]string{"quick", "fox"}))
assert.Contains(t, snippet, "**quick**")
assert.Contains(t, snippet, "**fox**")
})
t.Run("case insensitive highlighting", func(t *testing.T) {
snippet := extractSnippet(content, compileRegexes([]string{"QUICK", "Fox"}))
assert.Contains(t, snippet, "**quick**")
assert.Contains(t, snippet, "**fox**")
})
t.Run("partial word matching", func(t *testing.T) {
content := "The configuration is complete."
snippet := extractSnippet(content, compileRegexes([]string{"config"}))
assert.Contains(t, snippet, "**config**uration")
})
t.Run("overlapping matches", func(t *testing.T) {
content := "Searching for something."
// Both "search" and "searching" match
snippet := extractSnippet(content, compileRegexes([]string{"search", "searching"}))
assert.Equal(t, "**Searching** for something.", snippet)
})
}
func TestExtractSnippet_Good_UTF8(t *testing.T) {
// Content with multi-byte UTF-8 characters
content := "日本語のテキストです。This contains Japanese text. 検索機能をテストします。"
t.Run("handles multi-byte characters without corruption", func(t *testing.T) {
snippet := extractSnippet(content, compileRegexes([]string{"japanese"}))
// Should not panic or produce invalid UTF-8
assert.True(t, len(snippet) > 0)
// Verify the result is valid UTF-8
assert.True(t, isValidUTF8(snippet), "Snippet should be valid UTF-8")
})
t.Run("truncates multi-byte content safely", func(t *testing.T) {
// Long content that will be truncated
longContent := strings.Repeat("日本語", 100) // 300 characters
snippet := extractSnippet(longContent, nil)
assert.True(t, isValidUTF8(snippet), "Truncated snippet should be valid UTF-8")
})
}
// compileRegexes is a helper for tests.
func compileRegexes(words []string) []*regexp.Regexp {
var res []*regexp.Regexp
for _, w := range words {
if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(w)); err == nil {
res = append(res, re)
}
}
return res
}
// isValidUTF8 checks if a string is valid UTF-8
func isValidUTF8(s string) bool {
for i := 0; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
if r == utf8.RuneError && size == 1 {
return false
}
i += size
}
return true
}
func TestCountMatches_Good(t *testing.T) {
tests := []struct {
text string
words []string
expected int
}{
{"Hello world", []string{"hello"}, 1},
{"Hello world", []string{"hello", "world"}, 2},
{"Hello world", []string{"foo", "bar"}, 0},
{"The quick brown fox", []string{"quick", "fox", "dog"}, 2},
}
for _, tt := range tests {
result := countMatches(tt.text, tt.words)
assert.Equal(t, tt.expected, result)
}
}
func TestSearchResult_Score_Good(t *testing.T) {
idx := newSearchIndex()
// Topic with query word in title should score higher
idx.Add(&Topic{
ID: "topic-in-title",
Title: "Installation Guide",
Content: "Some content here.",
})
idx.Add(&Topic{
ID: "topic-in-content",
Title: "Some Other Topic",
Content: "This covers installation steps.",
})
results := idx.Search("installation")
assert.Len(t, results, 2)
// Title match should score higher
assert.Equal(t, "topic-in-title", results[0].Topic.ID)
assert.Greater(t, results[0].Score, results[1].Score)
}

31
topic.go Normal file
View file

@ -0,0 +1,31 @@
// Package help provides display-agnostic help content management.
package help
// Topic represents a help topic/page.
type Topic struct {
ID string `json:"id"`
Title string `json:"title"`
Path string `json:"path"`
Content string `json:"content"`
Sections []Section `json:"sections"`
Tags []string `json:"tags"`
Related []string `json:"related"`
Order int `json:"order"` // For sorting
}
// Section represents a heading within a topic.
type Section struct {
ID string `json:"id"`
Title string `json:"title"`
Level int `json:"level"`
Line int `json:"line"` // Start line in content (1-indexed)
Content string `json:"content"` // Content under heading
}
// Frontmatter represents YAML frontmatter metadata.
type Frontmatter struct {
Title string `yaml:"title"`
Tags []string `yaml:"tags"`
Related []string `yaml:"related"`
Order int `yaml:"order"`
}