feat: extract go-help from core/go pkg/help

YAML-based help catalog with topic search. Single external dependency: gopkg.in/yaml.v3 Module: forge.lthn.ai/core/go-help Co-Authored-By: Virgil <virgil@lethean.io>
2026-02-19 16:09:34 +00:00 · 2026-02-19 16:09:34 +00:00 · ad5e70937b
commit ad5e70937b
9 changed files with 1412 additions and 0 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -0,0 +1,25 @@
+# CLAUDE.md
+
+## What This Is
+
+YAML-based help catalog with topic search and rendering. Module: `forge.lthn.ai/core/go-help`
+
+## Commands
+
+```bash
+go test ./...          # Run all tests
+go test -v -run Name   # Run single test
+```
+
+## Architecture
+
+- `Topic` defines help entries with title, body, tags, related topics
+- `Catalog` loads topics from YAML files
+- `Search` provides fuzzy search across topics by title, tags, body
+
+## Coding Standards
+
+- UK English
+- `go test ./...` must pass before commit
+- Conventional commits: `type(scope): description`
+- Co-Author: `Co-Authored-By: Virgil <virgil@lethean.io>`
--- a/catalog.go
+++ b/catalog.go
@ -0,0 +1,87 @@
+package help
+
+import (
+	"fmt"
+)
+
+// Catalog manages help topics.
+type Catalog struct {
+	topics map[string]*Topic
+	index  *searchIndex
+}
+
+// DefaultCatalog returns a catalog with built-in topics.
+func DefaultCatalog() *Catalog {
+	c := &Catalog{
+		topics: make(map[string]*Topic),
+		index:  newSearchIndex(),
+	}
+
+	// Add default topics
+	c.Add(&Topic{
+		ID:    "getting-started",
+		Title: "Getting Started",
+		Content: `# Getting Started
+
+Welcome to Core! This CLI tool helps you manage development workflows.
+
+## Common Commands
+
+- core dev: Development workflows
+- core setup: Setup repository
+- core doctor: Check environment health
+- core test: Run tests
+
+## Next Steps
+
+Run 'core help <topic>' to learn more about a specific topic.
+`,
+	})
+	c.Add(&Topic{
+		ID:    "config",
+		Title: "Configuration",
+		Content: `# Configuration
+
+Core is configured via environment variables and config files.
+
+## Environment Variables
+
+- CORE_DEBUG: Enable debug logging
+- GITHUB_TOKEN: GitHub API token
+
+## Config Files
+
+Config is stored in ~/.core/config.yaml
+`,
+	})
+	return c
+}
+
+// Add adds a topic to the catalog.
+func (c *Catalog) Add(t *Topic) {
+	c.topics[t.ID] = t
+	c.index.Add(t)
+}
+
+// List returns all topics.
+func (c *Catalog) List() []*Topic {
+	var list []*Topic
+	for _, t := range c.topics {
+		list = append(list, t)
+	}
+	return list
+}
+
+// Search searches for topics.
+func (c *Catalog) Search(query string) []*SearchResult {
+	return c.index.Search(query)
+}
+
+// Get returns a topic by ID.
+func (c *Catalog) Get(id string) (*Topic, error) {
+	t, ok := c.topics[id]
+	if !ok {
+		return nil, fmt.Errorf("topic not found: %s", id)
+	}
+	return t, nil
+}
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,13 @@
+module forge.lthn.ai/core/go-help
+
+go 1.25.5
+
+require (
+	github.com/stretchr/testify v1.11.1
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+)
--- a/go.sum
+++ b/go.sum
@ -0,0 +1,10 @@
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/parser.go
+++ b/parser.go
@ -0,0 +1,174 @@
+package help
+
+import (
+	"path/filepath"
+	"regexp"
+	"strings"
+	"unicode"
+
+	"gopkg.in/yaml.v3"
+)
+
+var (
+	// frontmatterRegex matches YAML frontmatter delimited by ---
+	// Supports both LF and CRLF line endings, and empty frontmatter blocks
+	frontmatterRegex = regexp.MustCompile(`(?s)^---\r?\n(.*?)(?:\r?\n)?---\r?\n?`)
+
+	// headingRegex matches markdown headings (# to ######)
+	headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`)
+)
+
+// ParseTopic parses a markdown file into a Topic.
+func ParseTopic(path string, content []byte) (*Topic, error) {
+	contentStr := string(content)
+
+	topic := &Topic{
+		Path:     path,
+		ID:       GenerateID(pathToTitle(path)),
+		Sections: []Section{},
+		Tags:     []string{},
+		Related:  []string{},
+	}
+
+	// Extract YAML frontmatter if present
+	fm, body := ExtractFrontmatter(contentStr)
+	if fm != nil {
+		topic.Title = fm.Title
+		topic.Tags = fm.Tags
+		topic.Related = fm.Related
+		topic.Order = fm.Order
+		if topic.Title != "" {
+			topic.ID = GenerateID(topic.Title)
+		}
+	}
+
+	topic.Content = body
+
+	// Extract sections from headings
+	topic.Sections = ExtractSections(body)
+
+	// If no title from frontmatter, try first H1
+	if topic.Title == "" && len(topic.Sections) > 0 {
+		for _, s := range topic.Sections {
+			if s.Level == 1 {
+				topic.Title = s.Title
+				topic.ID = GenerateID(s.Title)
+				break
+			}
+		}
+	}
+
+	return topic, nil
+}
+
+// ExtractFrontmatter extracts YAML frontmatter from markdown content.
+// Returns the parsed frontmatter and the remaining content.
+func ExtractFrontmatter(content string) (*Frontmatter, string) {
+	match := frontmatterRegex.FindStringSubmatch(content)
+	if match == nil {
+		return nil, content
+	}
+
+	var fm Frontmatter
+	if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil {
+		// Invalid YAML, return content as-is
+		return nil, content
+	}
+
+	// Return content without frontmatter
+	body := content[len(match[0]):]
+	return &fm, body
+}
+
+// ExtractSections parses markdown and returns sections.
+func ExtractSections(content string) []Section {
+	lines := strings.Split(content, "\n")
+	sections := []Section{}
+
+	var currentSection *Section
+	var contentLines []string
+
+	for i, line := range lines {
+		lineNum := i + 1 // 1-indexed
+
+		match := headingRegex.FindStringSubmatch(line)
+		if match != nil {
+			// Save previous section's content
+			if currentSection != nil {
+				currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
+			}
+
+			// Start new section
+			level := len(match[1])
+			title := strings.TrimSpace(match[2])
+
+			section := Section{
+				ID:    GenerateID(title),
+				Title: title,
+				Level: level,
+				Line:  lineNum,
+			}
+			sections = append(sections, section)
+			currentSection = &sections[len(sections)-1]
+			contentLines = []string{}
+		} else if currentSection != nil {
+			contentLines = append(contentLines, line)
+		}
+	}
+
+	// Save last section's content
+	if currentSection != nil {
+		currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
+	}
+
+	return sections
+}
+
+// GenerateID creates a URL-safe ID from a title.
+// "Getting Started" -> "getting-started"
+func GenerateID(title string) string {
+	var result strings.Builder
+
+	for _, r := range strings.ToLower(title) {
+		if unicode.IsLetter(r) || unicode.IsDigit(r) {
+			result.WriteRune(r)
+		} else if unicode.IsSpace(r) || r == '-' || r == '_' {
+			// Only add hyphen if last char isn't already a hyphen
+			str := result.String()
+			if len(str) > 0 && str[len(str)-1] != '-' {
+				result.WriteRune('-')
+			}
+		}
+		// Skip other characters
+	}
+
+	// Trim trailing hyphens
+	str := result.String()
+	return strings.Trim(str, "-")
+}
+
+// pathToTitle converts a file path to a title.
+// "getting-started.md" -> "Getting Started"
+func pathToTitle(path string) string {
+	// Get filename without directory (cross-platform)
+	filename := filepath.Base(path)
+
+	// Remove extension
+	if ext := filepath.Ext(filename); ext != "" {
+		filename = strings.TrimSuffix(filename, ext)
+	}
+
+	// Replace hyphens/underscores with spaces
+	filename = strings.ReplaceAll(filename, "-", " ")
+	filename = strings.ReplaceAll(filename, "_", " ")
+
+	// Title case
+	words := strings.Fields(filename)
+	for i, word := range words {
+		if len(word) > 0 {
+			words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:])
+		}
+	}
+
+	return strings.Join(words, " ")
+}
--- a/parser_test.go
+++ b/parser_test.go
@ -0,0 +1,339 @@
+package help
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestGenerateID_Good(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "simple title",
+			input:    "Getting Started",
+			expected: "getting-started",
+		},
+		{
+			name:     "already lowercase",
+			input:    "installation",
+			expected: "installation",
+		},
+		{
+			name:     "multiple spaces",
+			input:    "Quick   Start   Guide",
+			expected: "quick-start-guide",
+		},
+		{
+			name:     "with numbers",
+			input:    "Chapter 1 Introduction",
+			expected: "chapter-1-introduction",
+		},
+		{
+			name:     "special characters",
+			input:    "What's New? (v2.0)",
+			expected: "whats-new-v20",
+		},
+		{
+			name:     "underscores",
+			input:    "config_file_reference",
+			expected: "config-file-reference",
+		},
+		{
+			name:     "hyphens preserved",
+			input:    "pre-commit hooks",
+			expected: "pre-commit-hooks",
+		},
+		{
+			name:     "leading trailing spaces",
+			input:    "  Trimmed Title  ",
+			expected: "trimmed-title",
+		},
+		{
+			name:     "unicode letters",
+			input:    "Configuración Básica",
+			expected: "configuración-básica",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := GenerateID(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestExtractFrontmatter_Good(t *testing.T) {
+	content := `---
+title: Getting Started
+tags: [intro, setup]
+order: 1
+related:
+  - installation
+  - configuration
+---
+
+# Welcome
+
+This is the content.
+`
+
+	fm, body := ExtractFrontmatter(content)
+
+	assert.NotNil(t, fm)
+	assert.Equal(t, "Getting Started", fm.Title)
+	assert.Equal(t, []string{"intro", "setup"}, fm.Tags)
+	assert.Equal(t, 1, fm.Order)
+	assert.Equal(t, []string{"installation", "configuration"}, fm.Related)
+	assert.Contains(t, body, "# Welcome")
+	assert.Contains(t, body, "This is the content.")
+}
+
+func TestExtractFrontmatter_Good_NoFrontmatter(t *testing.T) {
+	content := `# Just a Heading
+
+Some content here.
+`
+
+	fm, body := ExtractFrontmatter(content)
+
+	assert.Nil(t, fm)
+	assert.Equal(t, content, body)
+}
+
+func TestExtractFrontmatter_Good_CRLF(t *testing.T) {
+	// Content with CRLF line endings (Windows-style)
+	content := "---\r\ntitle: CRLF Test\r\n---\r\n\r\n# Content"
+
+	fm, body := ExtractFrontmatter(content)
+
+	assert.NotNil(t, fm)
+	assert.Equal(t, "CRLF Test", fm.Title)
+	assert.Contains(t, body, "# Content")
+}
+
+func TestExtractFrontmatter_Good_Empty(t *testing.T) {
+	// Empty frontmatter block
+	content := "---\n---\n# Content"
+
+	fm, body := ExtractFrontmatter(content)
+
+	// Empty frontmatter should parse successfully
+	assert.NotNil(t, fm)
+	assert.Equal(t, "", fm.Title)
+	assert.Contains(t, body, "# Content")
+}
+
+func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) {
+	content := `---
+title: [invalid yaml
+---
+
+# Content
+`
+
+	fm, body := ExtractFrontmatter(content)
+
+	// Invalid YAML should return nil frontmatter and original content
+	assert.Nil(t, fm)
+	assert.Equal(t, content, body)
+}
+
+func TestExtractSections_Good(t *testing.T) {
+	content := `# Main Title
+
+Introduction paragraph.
+
+## Installation
+
+Install instructions here.
+More details.
+
+### Prerequisites
+
+You need these things.
+
+## Configuration
+
+Config info here.
+`
+
+	sections := ExtractSections(content)
+
+	assert.Len(t, sections, 4)
+
+	// Main Title (H1)
+	assert.Equal(t, "main-title", sections[0].ID)
+	assert.Equal(t, "Main Title", sections[0].Title)
+	assert.Equal(t, 1, sections[0].Level)
+	assert.Equal(t, 1, sections[0].Line)
+	assert.Contains(t, sections[0].Content, "Introduction paragraph.")
+
+	// Installation (H2)
+	assert.Equal(t, "installation", sections[1].ID)
+	assert.Equal(t, "Installation", sections[1].Title)
+	assert.Equal(t, 2, sections[1].Level)
+	assert.Contains(t, sections[1].Content, "Install instructions here.")
+	assert.Contains(t, sections[1].Content, "More details.")
+
+	// Prerequisites (H3)
+	assert.Equal(t, "prerequisites", sections[2].ID)
+	assert.Equal(t, "Prerequisites", sections[2].Title)
+	assert.Equal(t, 3, sections[2].Level)
+	assert.Contains(t, sections[2].Content, "You need these things.")
+
+	// Configuration (H2)
+	assert.Equal(t, "configuration", sections[3].ID)
+	assert.Equal(t, "Configuration", sections[3].Title)
+	assert.Equal(t, 2, sections[3].Level)
+}
+
+func TestExtractSections_Good_AllHeadingLevels(t *testing.T) {
+	content := `# H1
+## H2
+### H3
+#### H4
+##### H5
+###### H6
+`
+
+	sections := ExtractSections(content)
+
+	assert.Len(t, sections, 6)
+	for i, level := range []int{1, 2, 3, 4, 5, 6} {
+		assert.Equal(t, level, sections[i].Level)
+	}
+}
+
+func TestExtractSections_Good_Empty(t *testing.T) {
+	content := `Just plain text.
+No headings here.
+`
+
+	sections := ExtractSections(content)
+
+	assert.Empty(t, sections)
+}
+
+func TestParseTopic_Good(t *testing.T) {
+	content := []byte(`---
+title: Quick Start Guide
+tags: [intro, quickstart]
+order: 5
+related:
+  - installation
+---
+
+# Quick Start Guide
+
+Welcome to the guide.
+
+## First Steps
+
+Do this first.
+
+## Next Steps
+
+Then do this.
+`)
+
+	topic, err := ParseTopic("docs/quick-start.md", content)
+
+	assert.NoError(t, err)
+	assert.NotNil(t, topic)
+
+	// Check metadata from frontmatter
+	assert.Equal(t, "quick-start-guide", topic.ID)
+	assert.Equal(t, "Quick Start Guide", topic.Title)
+	assert.Equal(t, "docs/quick-start.md", topic.Path)
+	assert.Equal(t, []string{"intro", "quickstart"}, topic.Tags)
+	assert.Equal(t, []string{"installation"}, topic.Related)
+	assert.Equal(t, 5, topic.Order)
+
+	// Check sections
+	assert.Len(t, topic.Sections, 3)
+	assert.Equal(t, "quick-start-guide", topic.Sections[0].ID)
+	assert.Equal(t, "first-steps", topic.Sections[1].ID)
+	assert.Equal(t, "next-steps", topic.Sections[2].ID)
+
+	// Content should not include frontmatter
+	assert.NotContains(t, topic.Content, "---")
+	assert.Contains(t, topic.Content, "# Quick Start Guide")
+}
+
+func TestParseTopic_Good_NoFrontmatter(t *testing.T) {
+	content := []byte(`# Getting Started
+
+This is a simple doc.
+
+## Installation
+
+Install it here.
+`)
+
+	topic, err := ParseTopic("getting-started.md", content)
+
+	assert.NoError(t, err)
+	assert.NotNil(t, topic)
+
+	// Title should come from first H1
+	assert.Equal(t, "Getting Started", topic.Title)
+	assert.Equal(t, "getting-started", topic.ID)
+
+	// Sections extracted
+	assert.Len(t, topic.Sections, 2)
+}
+
+func TestParseTopic_Good_NoHeadings(t *testing.T) {
+	content := []byte(`---
+title: Plain Content
+---
+
+Just some text without any headings.
+`)
+
+	topic, err := ParseTopic("plain.md", content)
+
+	assert.NoError(t, err)
+	assert.NotNil(t, topic)
+	assert.Equal(t, "Plain Content", topic.Title)
+	assert.Equal(t, "plain-content", topic.ID)
+	assert.Empty(t, topic.Sections)
+}
+
+func TestParseTopic_Good_IDFromPath(t *testing.T) {
+	content := []byte(`Just content, no frontmatter or headings.`)
+
+	topic, err := ParseTopic("commands/dev-workflow.md", content)
+
+	assert.NoError(t, err)
+	assert.NotNil(t, topic)
+
+	// ID and title should be derived from path
+	assert.Equal(t, "dev-workflow", topic.ID)
+	assert.Equal(t, "", topic.Title) // No title available
+}
+
+func TestPathToTitle_Good(t *testing.T) {
+	tests := []struct {
+		path     string
+		expected string
+	}{
+		{"getting-started.md", "Getting Started"},
+		{"commands/dev.md", "Dev"},
+		{"path/to/file_name.md", "File Name"},
+		{"UPPERCASE.md", "Uppercase"},
+		{"no-extension", "No Extension"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.path, func(t *testing.T) {
+			result := pathToTitle(tt.path)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
--- a/search.go
+++ b/search.go
@ -0,0 +1,393 @@
+package help
+
+import (
+	"regexp"
+	"sort"
+	"strings"
+	"unicode"
+)
+
+// SearchResult represents a search match.
+type SearchResult struct {
+	Topic   *Topic
+	Section *Section // nil if topic-level match
+	Score   float64
+	Snippet string // Context around match
+}
+
+// searchIndex provides full-text search.
+type searchIndex struct {
+	topics map[string]*Topic   // topicID -> Topic
+	index  map[string][]string // word -> []topicID
+}
+
+// newSearchIndex creates a new empty search index.
+func newSearchIndex() *searchIndex {
+	return &searchIndex{
+		topics: make(map[string]*Topic),
+		index:  make(map[string][]string),
+	}
+}
+
+// Add indexes a topic for searching.
+func (i *searchIndex) Add(topic *Topic) {
+	i.topics[topic.ID] = topic
+
+	// Index title words with boost
+	for _, word := range tokenize(topic.Title) {
+		i.addToIndex(word, topic.ID)
+	}
+
+	// Index content words
+	for _, word := range tokenize(topic.Content) {
+		i.addToIndex(word, topic.ID)
+	}
+
+	// Index section titles and content
+	for _, section := range topic.Sections {
+		for _, word := range tokenize(section.Title) {
+			i.addToIndex(word, topic.ID)
+		}
+		for _, word := range tokenize(section.Content) {
+			i.addToIndex(word, topic.ID)
+		}
+	}
+
+	// Index tags
+	for _, tag := range topic.Tags {
+		for _, word := range tokenize(tag) {
+			i.addToIndex(word, topic.ID)
+		}
+	}
+}
+
+// addToIndex adds a word-to-topic mapping.
+func (i *searchIndex) addToIndex(word, topicID string) {
+	// Avoid duplicates
+	for _, id := range i.index[word] {
+		if id == topicID {
+			return
+		}
+	}
+	i.index[word] = append(i.index[word], topicID)
+}
+
+// Search finds topics matching the query.
+func (i *searchIndex) Search(query string) []*SearchResult {
+	queryWords := tokenize(query)
+	if len(queryWords) == 0 {
+		return nil
+	}
+
+	// Track scores per topic
+	scores := make(map[string]float64)
+
+	for _, word := range queryWords {
+		// Exact matches
+		if topicIDs, ok := i.index[word]; ok {
+			for _, topicID := range topicIDs {
+				scores[topicID] += 1.0
+			}
+		}
+
+		// Prefix matches (partial word matching)
+		for indexWord, topicIDs := range i.index {
+			if strings.HasPrefix(indexWord, word) && indexWord != word {
+				for _, topicID := range topicIDs {
+					scores[topicID] += 0.5 // Lower score for partial matches
+				}
+			}
+		}
+	}
+
+	// Pre-compile regexes for snippets
+	var res []*regexp.Regexp
+	for _, word := range queryWords {
+		if len(word) >= 2 {
+			if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(word)); err == nil {
+				res = append(res, re)
+			}
+		}
+	}
+
+	// Build results with title boost and snippet extraction
+	var results []*SearchResult
+	for topicID, score := range scores {
+		topic := i.topics[topicID]
+		if topic == nil {
+			continue
+		}
+
+		// Title boost: if query words appear in title
+		titleLower := strings.ToLower(topic.Title)
+		hasTitleMatch := false
+		for _, word := range queryWords {
+			if strings.Contains(titleLower, word) {
+				hasTitleMatch = true
+				break
+			}
+		}
+		if hasTitleMatch {
+			score += 10.0
+		}
+
+		// Find matching section and extract snippet
+		section, snippet := i.findBestMatch(topic, queryWords, res)
+
+		// Section title boost
+		if section != nil {
+			sectionTitleLower := strings.ToLower(section.Title)
+			hasSectionTitleMatch := false
+			for _, word := range queryWords {
+				if strings.Contains(sectionTitleLower, word) {
+					hasSectionTitleMatch = true
+					break
+				}
+			}
+			if hasSectionTitleMatch {
+				score += 5.0
+			}
+		}
+
+		results = append(results, &SearchResult{
+			Topic:   topic,
+			Section: section,
+			Score:   score,
+			Snippet: snippet,
+		})
+	}
+
+	// Sort by score (highest first)
+	sort.Slice(results, func(a, b int) bool {
+		if results[a].Score != results[b].Score {
+			return results[a].Score > results[b].Score
+		}
+		return results[a].Topic.Title < results[b].Topic.Title
+	})
+
+	return results
+}
+
+// findBestMatch finds the section with the best match and extracts a snippet.
+func (i *searchIndex) findBestMatch(topic *Topic, queryWords []string, res []*regexp.Regexp) (*Section, string) {
+	var bestSection *Section
+	var bestSnippet string
+	bestScore := 0
+
+	// Check topic title
+	titleScore := countMatches(topic.Title, queryWords)
+	if titleScore > 0 {
+		bestSnippet = extractSnippet(topic.Content, res)
+	}
+
+	// Check sections
+	for idx := range topic.Sections {
+		section := &topic.Sections[idx]
+		sectionScore := countMatches(section.Title, queryWords)
+		contentScore := countMatches(section.Content, queryWords)
+		totalScore := sectionScore*2 + contentScore // Title matches worth more
+
+		if totalScore > bestScore {
+			bestScore = totalScore
+			bestSection = section
+			if contentScore > 0 {
+				bestSnippet = extractSnippet(section.Content, res)
+			} else {
+				bestSnippet = extractSnippet(section.Content, nil)
+			}
+		}
+	}
+
+	// If no section matched, use topic content
+	if bestSnippet == "" && topic.Content != "" {
+		bestSnippet = extractSnippet(topic.Content, res)
+	}
+
+	return bestSection, bestSnippet
+}
+
+// tokenize splits text into lowercase words for indexing/searching.
+func tokenize(text string) []string {
+	text = strings.ToLower(text)
+	var words []string
+	var word strings.Builder
+
+	for _, r := range text {
+		if unicode.IsLetter(r) || unicode.IsDigit(r) {
+			word.WriteRune(r)
+		} else if word.Len() > 0 {
+			w := word.String()
+			if len(w) >= 2 { // Skip single-character words
+				words = append(words, w)
+			}
+			word.Reset()
+		}
+	}
+
+	// Don't forget the last word
+	if word.Len() >= 2 {
+		words = append(words, word.String())
+	}
+
+	return words
+}
+
+// countMatches counts how many query words appear in the text.
+func countMatches(text string, queryWords []string) int {
+	textLower := strings.ToLower(text)
+	count := 0
+	for _, word := range queryWords {
+		if strings.Contains(textLower, word) {
+			count++
+		}
+	}
+	return count
+}
+
+// extractSnippet extracts a short snippet around the first match and highlights matches.
+func extractSnippet(content string, res []*regexp.Regexp) string {
+	if content == "" {
+		return ""
+	}
+
+	const snippetLen = 150
+
+	// If no regexes, return start of content without highlighting
+	if len(res) == 0 {
+		lines := strings.Split(content, "\n")
+		for _, line := range lines {
+			line = strings.TrimSpace(line)
+			if line != "" && !strings.HasPrefix(line, "#") {
+				runes := []rune(line)
+				if len(runes) > snippetLen {
+					return string(runes[:snippetLen]) + "..."
+				}
+				return line
+			}
+		}
+		return ""
+	}
+
+	// Find first match position (byte-based)
+	matchPos := -1
+	for _, re := range res {
+		loc := re.FindStringIndex(content)
+		if loc != nil && (matchPos == -1 || loc[0] < matchPos) {
+			matchPos = loc[0]
+		}
+	}
+
+	// Convert to runes for safe slicing
+	runes := []rune(content)
+	runeLen := len(runes)
+
+	var start, end int
+	if matchPos == -1 {
+		// No match found, use start of content
+		start = 0
+		end = snippetLen
+		if end > runeLen {
+			end = runeLen
+		}
+	} else {
+		// Convert byte position to rune position
+		matchRunePos := len([]rune(content[:matchPos]))
+
+		// Extract snippet around match (rune-based)
+		start = matchRunePos - 50
+		if start < 0 {
+			start = 0
+		}
+
+		end = start + snippetLen
+		if end > runeLen {
+			end = runeLen
+		}
+	}
+
+	snippet := string(runes[start:end])
+
+	// Trim to word boundaries
+	prefix := ""
+	suffix := ""
+	if start > 0 {
+		if idx := strings.Index(snippet, " "); idx != -1 {
+			snippet = snippet[idx+1:]
+			prefix = "..."
+		}
+	}
+	if end < runeLen {
+		if idx := strings.LastIndex(snippet, " "); idx != -1 {
+			snippet = snippet[:idx]
+			suffix = "..."
+		}
+	}
+
+	snippet = strings.TrimSpace(snippet)
+	if snippet == "" {
+		return ""
+	}
+
+	// Apply highlighting
+	highlighted := highlight(snippet, res)
+
+	return prefix + highlighted + suffix
+}
+
+// highlight wraps matches in **bold**.
+func highlight(text string, res []*regexp.Regexp) string {
+	if len(res) == 0 {
+		return text
+	}
+
+	type match struct {
+		start, end int
+	}
+	var matches []match
+
+	for _, re := range res {
+		indices := re.FindAllStringIndex(text, -1)
+		for _, idx := range indices {
+			matches = append(matches, match{idx[0], idx[1]})
+		}
+	}
+
+	if len(matches) == 0 {
+		return text
+	}
+
+	// Sort matches by start position
+	sort.Slice(matches, func(i, j int) bool {
+		if matches[i].start != matches[j].start {
+			return matches[i].start < matches[j].start
+		}
+		return matches[i].end > matches[j].end
+	})
+
+	// Merge overlapping or adjacent matches
+	var merged []match
+	if len(matches) > 0 {
+		curr := matches[0]
+		for i := 1; i < len(matches); i++ {
+			if matches[i].start <= curr.end {
+				if matches[i].end > curr.end {
+					curr.end = matches[i].end
+				}
+			} else {
+				merged = append(merged, curr)
+				curr = matches[i]
+			}
+		}
+		merged = append(merged, curr)
+	}
+
+	// Build highlighted string from back to front to avoid position shifts
+	result := text
+	for i := len(merged) - 1; i >= 0; i-- {
+		m := merged[i]
+		result = result[:m.end] + "**" + result[m.end:]
+		result = result[:m.start] + "**" + result[m.start:]
+	}
+
+	return result
+}
--- a/search_test.go
+++ b/search_test.go
@ -0,0 +1,340 @@
+package help
+
+import (
+	"regexp"
+	"strings"
+	"testing"
+	"unicode/utf8"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestTokenize_Good(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected []string
+	}{
+		{
+			name:     "simple words",
+			input:    "hello world",
+			expected: []string{"hello", "world"},
+		},
+		{
+			name:     "mixed case",
+			input:    "Hello World",
+			expected: []string{"hello", "world"},
+		},
+		{
+			name:     "with punctuation",
+			input:    "Hello, world! How are you?",
+			expected: []string{"hello", "world", "how", "are", "you"},
+		},
+		{
+			name:     "single characters filtered",
+			input:    "a b c hello d",
+			expected: []string{"hello"},
+		},
+		{
+			name:     "numbers included",
+			input:    "version 2 release",
+			expected: []string{"version", "release"},
+		},
+		{
+			name:     "alphanumeric",
+			input:    "v2.0 and config123",
+			expected: []string{"v2", "and", "config123"},
+		},
+		{
+			name:     "empty string",
+			input:    "",
+			expected: nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tokenize(tt.input)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestSearchIndex_Add_Good(t *testing.T) {
+	idx := newSearchIndex()
+
+	topic := &Topic{
+		ID:      "getting-started",
+		Title:   "Getting Started",
+		Content: "Welcome to the guide.",
+		Tags:    []string{"intro", "setup"},
+		Sections: []Section{
+			{ID: "installation", Title: "Installation", Content: "Install the CLI."},
+		},
+	}
+
+	idx.Add(topic)
+
+	// Verify topic is stored
+	assert.NotNil(t, idx.topics["getting-started"])
+
+	// Verify words are indexed
+	assert.Contains(t, idx.index["getting"], "getting-started")
+	assert.Contains(t, idx.index["started"], "getting-started")
+	assert.Contains(t, idx.index["welcome"], "getting-started")
+	assert.Contains(t, idx.index["guide"], "getting-started")
+	assert.Contains(t, idx.index["intro"], "getting-started")
+	assert.Contains(t, idx.index["setup"], "getting-started")
+	assert.Contains(t, idx.index["installation"], "getting-started")
+	assert.Contains(t, idx.index["cli"], "getting-started")
+}
+
+func TestSearchIndex_Search_Good(t *testing.T) {
+	idx := newSearchIndex()
+
+	// Add test topics
+	idx.Add(&Topic{
+		ID:      "getting-started",
+		Title:   "Getting Started",
+		Content: "Welcome to the CLI guide. This covers installation and setup.",
+		Tags:    []string{"intro"},
+	})
+
+	idx.Add(&Topic{
+		ID:      "configuration",
+		Title:   "Configuration",
+		Content: "Configure the CLI using environment variables.",
+	})
+
+	idx.Add(&Topic{
+		ID:      "commands",
+		Title:   "Commands Reference",
+		Content: "List of all available commands.",
+	})
+
+	t.Run("single word query", func(t *testing.T) {
+		results := idx.Search("configuration")
+		assert.NotEmpty(t, results)
+		assert.Equal(t, "configuration", results[0].Topic.ID)
+	})
+
+	t.Run("multi-word query", func(t *testing.T) {
+		results := idx.Search("cli guide")
+		assert.NotEmpty(t, results)
+		// Should match getting-started (has both "cli" and "guide")
+		assert.Equal(t, "getting-started", results[0].Topic.ID)
+	})
+
+	t.Run("title boost", func(t *testing.T) {
+		results := idx.Search("commands")
+		assert.NotEmpty(t, results)
+		// "commands" appears in title of commands topic
+		assert.Equal(t, "commands", results[0].Topic.ID)
+	})
+
+	t.Run("partial word matching", func(t *testing.T) {
+		results := idx.Search("config")
+		assert.NotEmpty(t, results)
+		// Should match "configuration" and "configure"
+		foundConfig := false
+		for _, r := range results {
+			if r.Topic.ID == "configuration" {
+				foundConfig = true
+				break
+			}
+		}
+		assert.True(t, foundConfig, "Should find configuration topic with prefix match")
+	})
+
+	t.Run("no results", func(t *testing.T) {
+		results := idx.Search("nonexistent")
+		assert.Empty(t, results)
+	})
+
+	t.Run("empty query", func(t *testing.T) {
+		results := idx.Search("")
+		assert.Nil(t, results)
+	})
+}
+
+func TestSearchIndex_Search_Good_WithSections(t *testing.T) {
+	idx := newSearchIndex()
+
+	idx.Add(&Topic{
+		ID:      "installation",
+		Title:   "Installation Guide",
+		Content: "Overview of installation process.",
+		Sections: []Section{
+			{
+				ID:      "linux",
+				Title:   "Linux Installation",
+				Content: "Run apt-get install core on Debian.",
+			},
+			{
+				ID:      "macos",
+				Title:   "macOS Installation",
+				Content: "Use brew install core on macOS.",
+			},
+			{
+				ID:      "windows",
+				Title:   "Windows Installation",
+				Content: "Download the installer from the website.",
+			},
+		},
+	})
+
+	t.Run("matches section content", func(t *testing.T) {
+		results := idx.Search("debian")
+		assert.NotEmpty(t, results)
+		assert.Equal(t, "installation", results[0].Topic.ID)
+		// Should identify the Linux section as best match
+		if results[0].Section != nil {
+			assert.Equal(t, "linux", results[0].Section.ID)
+		}
+	})
+
+	t.Run("matches section title", func(t *testing.T) {
+		results := idx.Search("windows")
+		assert.NotEmpty(t, results)
+		assert.Equal(t, "installation", results[0].Topic.ID)
+	})
+}
+
+func TestExtractSnippet_Good(t *testing.T) {
+	content := `This is the first paragraph with some introduction text.
+
+Here is more content that talks about installation and setup.
+The installation process is straightforward.
+
+Finally, some closing remarks about the configuration.`
+
+	t.Run("finds match and extracts context", func(t *testing.T) {
+		snippet := extractSnippet(content, compileRegexes([]string{"installation"}))
+		assert.Contains(t, snippet, "**installation**")
+		assert.True(t, len(snippet) <= 250, "Snippet should be reasonably short")
+	})
+
+	t.Run("no query words returns start", func(t *testing.T) {
+		snippet := extractSnippet(content, nil)
+		assert.Contains(t, snippet, "first paragraph")
+	})
+
+	t.Run("empty content", func(t *testing.T) {
+		snippet := extractSnippet("", compileRegexes([]string{"test"}))
+		assert.Empty(t, snippet)
+	})
+}
+
+func TestExtractSnippet_Highlighting(t *testing.T) {
+	content := "The quick brown fox jumps over the lazy dog."
+
+	t.Run("simple highlighting", func(t *testing.T) {
+		snippet := extractSnippet(content, compileRegexes([]string{"quick", "fox"}))
+		assert.Contains(t, snippet, "**quick**")
+		assert.Contains(t, snippet, "**fox**")
+	})
+
+	t.Run("case insensitive highlighting", func(t *testing.T) {
+		snippet := extractSnippet(content, compileRegexes([]string{"QUICK", "Fox"}))
+		assert.Contains(t, snippet, "**quick**")
+		assert.Contains(t, snippet, "**fox**")
+	})
+
+	t.Run("partial word matching", func(t *testing.T) {
+		content := "The configuration is complete."
+		snippet := extractSnippet(content, compileRegexes([]string{"config"}))
+		assert.Contains(t, snippet, "**config**uration")
+	})
+
+	t.Run("overlapping matches", func(t *testing.T) {
+		content := "Searching for something."
+		// Both "search" and "searching" match
+		snippet := extractSnippet(content, compileRegexes([]string{"search", "searching"}))
+		assert.Equal(t, "**Searching** for something.", snippet)
+	})
+}
+
+func TestExtractSnippet_Good_UTF8(t *testing.T) {
+	// Content with multi-byte UTF-8 characters
+	content := "日本語のテキストです。This contains Japanese text. 検索機能をテストします。"
+
+	t.Run("handles multi-byte characters without corruption", func(t *testing.T) {
+		snippet := extractSnippet(content, compileRegexes([]string{"japanese"}))
+		// Should not panic or produce invalid UTF-8
+		assert.True(t, len(snippet) > 0)
+		// Verify the result is valid UTF-8
+		assert.True(t, isValidUTF8(snippet), "Snippet should be valid UTF-8")
+	})
+
+	t.Run("truncates multi-byte content safely", func(t *testing.T) {
+		// Long content that will be truncated
+		longContent := strings.Repeat("日本語", 100) // 300 characters
+		snippet := extractSnippet(longContent, nil)
+		assert.True(t, isValidUTF8(snippet), "Truncated snippet should be valid UTF-8")
+	})
+}
+
+// compileRegexes is a helper for tests.
+func compileRegexes(words []string) []*regexp.Regexp {
+	var res []*regexp.Regexp
+	for _, w := range words {
+		if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(w)); err == nil {
+			res = append(res, re)
+		}
+	}
+	return res
+}
+
+// isValidUTF8 checks if a string is valid UTF-8
+func isValidUTF8(s string) bool {
+	for i := 0; i < len(s); {
+		r, size := utf8.DecodeRuneInString(s[i:])
+		if r == utf8.RuneError && size == 1 {
+			return false
+		}
+		i += size
+	}
+	return true
+}
+
+func TestCountMatches_Good(t *testing.T) {
+	tests := []struct {
+		text     string
+		words    []string
+		expected int
+	}{
+		{"Hello world", []string{"hello"}, 1},
+		{"Hello world", []string{"hello", "world"}, 2},
+		{"Hello world", []string{"foo", "bar"}, 0},
+		{"The quick brown fox", []string{"quick", "fox", "dog"}, 2},
+	}
+
+	for _, tt := range tests {
+		result := countMatches(tt.text, tt.words)
+		assert.Equal(t, tt.expected, result)
+	}
+}
+
+func TestSearchResult_Score_Good(t *testing.T) {
+	idx := newSearchIndex()
+
+	// Topic with query word in title should score higher
+	idx.Add(&Topic{
+		ID:      "topic-in-title",
+		Title:   "Installation Guide",
+		Content: "Some content here.",
+	})
+
+	idx.Add(&Topic{
+		ID:      "topic-in-content",
+		Title:   "Some Other Topic",
+		Content: "This covers installation steps.",
+	})
+
+	results := idx.Search("installation")
+	assert.Len(t, results, 2)
+
+	// Title match should score higher
+	assert.Equal(t, "topic-in-title", results[0].Topic.ID)
+	assert.Greater(t, results[0].Score, results[1].Score)
+}
--- a/topic.go
+++ b/topic.go
@ -0,0 +1,31 @@
+// Package help provides display-agnostic help content management.
+package help
+
+// Topic represents a help topic/page.
+type Topic struct {
+	ID       string    `json:"id"`
+	Title    string    `json:"title"`
+	Path     string    `json:"path"`
+	Content  string    `json:"content"`
+	Sections []Section `json:"sections"`
+	Tags     []string  `json:"tags"`
+	Related  []string  `json:"related"`
+	Order    int       `json:"order"` // For sorting
+}
+
+// Section represents a heading within a topic.
+type Section struct {
+	ID      string `json:"id"`
+	Title   string `json:"title"`
+	Level   int    `json:"level"`
+	Line    int    `json:"line"`    // Start line in content (1-indexed)
+	Content string `json:"content"` // Content under heading
+}
+
+// Frontmatter represents YAML frontmatter metadata.
+type Frontmatter struct {
+	Title   string   `yaml:"title"`
+	Tags    []string `yaml:"tags"`
+	Related []string `yaml:"related"`
+	Order   int      `yaml:"order"`
+}