feat: extract go-help from core/go pkg/help
YAML-based help catalog with topic search. Single external dependency: gopkg.in/yaml.v3 Module: forge.lthn.ai/core/go-help Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
commit
ad5e70937b
9 changed files with 1412 additions and 0 deletions
25
CLAUDE.md
Normal file
25
CLAUDE.md
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# CLAUDE.md
|
||||
|
||||
## What This Is
|
||||
|
||||
YAML-based help catalog with topic search and rendering. Module: `forge.lthn.ai/core/go-help`
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
go test ./... # Run all tests
|
||||
go test -v -run Name # Run single test
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
- `Topic` defines help entries with title, body, tags, related topics
|
||||
- `Catalog` loads topics from YAML files
|
||||
- `Search` provides fuzzy search across topics by title, tags, body
|
||||
|
||||
## Coding Standards
|
||||
|
||||
- UK English
|
||||
- `go test ./...` must pass before commit
|
||||
- Conventional commits: `type(scope): description`
|
||||
- Co-Author: `Co-Authored-By: Virgil <virgil@lethean.io>`
|
||||
87
catalog.go
Normal file
87
catalog.go
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
package help
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Catalog manages help topics.
|
||||
type Catalog struct {
|
||||
topics map[string]*Topic
|
||||
index *searchIndex
|
||||
}
|
||||
|
||||
// DefaultCatalog returns a catalog with built-in topics.
|
||||
func DefaultCatalog() *Catalog {
|
||||
c := &Catalog{
|
||||
topics: make(map[string]*Topic),
|
||||
index: newSearchIndex(),
|
||||
}
|
||||
|
||||
// Add default topics
|
||||
c.Add(&Topic{
|
||||
ID: "getting-started",
|
||||
Title: "Getting Started",
|
||||
Content: `# Getting Started
|
||||
|
||||
Welcome to Core! This CLI tool helps you manage development workflows.
|
||||
|
||||
## Common Commands
|
||||
|
||||
- core dev: Development workflows
|
||||
- core setup: Setup repository
|
||||
- core doctor: Check environment health
|
||||
- core test: Run tests
|
||||
|
||||
## Next Steps
|
||||
|
||||
Run 'core help <topic>' to learn more about a specific topic.
|
||||
`,
|
||||
})
|
||||
c.Add(&Topic{
|
||||
ID: "config",
|
||||
Title: "Configuration",
|
||||
Content: `# Configuration
|
||||
|
||||
Core is configured via environment variables and config files.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
- CORE_DEBUG: Enable debug logging
|
||||
- GITHUB_TOKEN: GitHub API token
|
||||
|
||||
## Config Files
|
||||
|
||||
Config is stored in ~/.core/config.yaml
|
||||
`,
|
||||
})
|
||||
return c
|
||||
}
|
||||
|
||||
// Add adds a topic to the catalog.
|
||||
func (c *Catalog) Add(t *Topic) {
|
||||
c.topics[t.ID] = t
|
||||
c.index.Add(t)
|
||||
}
|
||||
|
||||
// List returns all topics.
|
||||
func (c *Catalog) List() []*Topic {
|
||||
var list []*Topic
|
||||
for _, t := range c.topics {
|
||||
list = append(list, t)
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// Search searches for topics.
|
||||
func (c *Catalog) Search(query string) []*SearchResult {
|
||||
return c.index.Search(query)
|
||||
}
|
||||
|
||||
// Get returns a topic by ID.
|
||||
func (c *Catalog) Get(id string) (*Topic, error) {
|
||||
t, ok := c.topics[id]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("topic not found: %s", id)
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
13
go.mod
Normal file
13
go.mod
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
module forge.lthn.ai/core/go-help
|
||||
|
||||
go 1.25.5
|
||||
|
||||
require (
|
||||
github.com/stretchr/testify v1.11.1
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
)
|
||||
10
go.sum
Normal file
10
go.sum
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
174
parser.go
Normal file
174
parser.go
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
package help
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var (
|
||||
// frontmatterRegex matches YAML frontmatter delimited by ---
|
||||
// Supports both LF and CRLF line endings, and empty frontmatter blocks
|
||||
frontmatterRegex = regexp.MustCompile(`(?s)^---\r?\n(.*?)(?:\r?\n)?---\r?\n?`)
|
||||
|
||||
// headingRegex matches markdown headings (# to ######)
|
||||
headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`)
|
||||
)
|
||||
|
||||
// ParseTopic parses a markdown file into a Topic.
|
||||
func ParseTopic(path string, content []byte) (*Topic, error) {
|
||||
contentStr := string(content)
|
||||
|
||||
topic := &Topic{
|
||||
Path: path,
|
||||
ID: GenerateID(pathToTitle(path)),
|
||||
Sections: []Section{},
|
||||
Tags: []string{},
|
||||
Related: []string{},
|
||||
}
|
||||
|
||||
// Extract YAML frontmatter if present
|
||||
fm, body := ExtractFrontmatter(contentStr)
|
||||
if fm != nil {
|
||||
topic.Title = fm.Title
|
||||
topic.Tags = fm.Tags
|
||||
topic.Related = fm.Related
|
||||
topic.Order = fm.Order
|
||||
if topic.Title != "" {
|
||||
topic.ID = GenerateID(topic.Title)
|
||||
}
|
||||
}
|
||||
|
||||
topic.Content = body
|
||||
|
||||
// Extract sections from headings
|
||||
topic.Sections = ExtractSections(body)
|
||||
|
||||
// If no title from frontmatter, try first H1
|
||||
if topic.Title == "" && len(topic.Sections) > 0 {
|
||||
for _, s := range topic.Sections {
|
||||
if s.Level == 1 {
|
||||
topic.Title = s.Title
|
||||
topic.ID = GenerateID(s.Title)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return topic, nil
|
||||
}
|
||||
|
||||
// ExtractFrontmatter extracts YAML frontmatter from markdown content.
|
||||
// Returns the parsed frontmatter and the remaining content.
|
||||
func ExtractFrontmatter(content string) (*Frontmatter, string) {
|
||||
match := frontmatterRegex.FindStringSubmatch(content)
|
||||
if match == nil {
|
||||
return nil, content
|
||||
}
|
||||
|
||||
var fm Frontmatter
|
||||
if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil {
|
||||
// Invalid YAML, return content as-is
|
||||
return nil, content
|
||||
}
|
||||
|
||||
// Return content without frontmatter
|
||||
body := content[len(match[0]):]
|
||||
return &fm, body
|
||||
}
|
||||
|
||||
// ExtractSections parses markdown and returns sections.
|
||||
func ExtractSections(content string) []Section {
|
||||
lines := strings.Split(content, "\n")
|
||||
sections := []Section{}
|
||||
|
||||
var currentSection *Section
|
||||
var contentLines []string
|
||||
|
||||
for i, line := range lines {
|
||||
lineNum := i + 1 // 1-indexed
|
||||
|
||||
match := headingRegex.FindStringSubmatch(line)
|
||||
if match != nil {
|
||||
// Save previous section's content
|
||||
if currentSection != nil {
|
||||
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
|
||||
}
|
||||
|
||||
// Start new section
|
||||
level := len(match[1])
|
||||
title := strings.TrimSpace(match[2])
|
||||
|
||||
section := Section{
|
||||
ID: GenerateID(title),
|
||||
Title: title,
|
||||
Level: level,
|
||||
Line: lineNum,
|
||||
}
|
||||
sections = append(sections, section)
|
||||
currentSection = §ions[len(sections)-1]
|
||||
contentLines = []string{}
|
||||
} else if currentSection != nil {
|
||||
contentLines = append(contentLines, line)
|
||||
}
|
||||
}
|
||||
|
||||
// Save last section's content
|
||||
if currentSection != nil {
|
||||
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
|
||||
}
|
||||
|
||||
return sections
|
||||
}
|
||||
|
||||
// GenerateID creates a URL-safe ID from a title.
|
||||
// "Getting Started" -> "getting-started"
|
||||
func GenerateID(title string) string {
|
||||
var result strings.Builder
|
||||
|
||||
for _, r := range strings.ToLower(title) {
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||||
result.WriteRune(r)
|
||||
} else if unicode.IsSpace(r) || r == '-' || r == '_' {
|
||||
// Only add hyphen if last char isn't already a hyphen
|
||||
str := result.String()
|
||||
if len(str) > 0 && str[len(str)-1] != '-' {
|
||||
result.WriteRune('-')
|
||||
}
|
||||
}
|
||||
// Skip other characters
|
||||
}
|
||||
|
||||
// Trim trailing hyphens
|
||||
str := result.String()
|
||||
return strings.Trim(str, "-")
|
||||
}
|
||||
|
||||
// pathToTitle converts a file path to a title.
|
||||
// "getting-started.md" -> "Getting Started"
|
||||
func pathToTitle(path string) string {
|
||||
// Get filename without directory (cross-platform)
|
||||
filename := filepath.Base(path)
|
||||
|
||||
// Remove extension
|
||||
if ext := filepath.Ext(filename); ext != "" {
|
||||
filename = strings.TrimSuffix(filename, ext)
|
||||
}
|
||||
|
||||
// Replace hyphens/underscores with spaces
|
||||
filename = strings.ReplaceAll(filename, "-", " ")
|
||||
filename = strings.ReplaceAll(filename, "_", " ")
|
||||
|
||||
// Title case
|
||||
words := strings.Fields(filename)
|
||||
for i, word := range words {
|
||||
if len(word) > 0 {
|
||||
words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:])
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(words, " ")
|
||||
}
|
||||
339
parser_test.go
Normal file
339
parser_test.go
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
package help
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestGenerateID_Good(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "simple title",
|
||||
input: "Getting Started",
|
||||
expected: "getting-started",
|
||||
},
|
||||
{
|
||||
name: "already lowercase",
|
||||
input: "installation",
|
||||
expected: "installation",
|
||||
},
|
||||
{
|
||||
name: "multiple spaces",
|
||||
input: "Quick Start Guide",
|
||||
expected: "quick-start-guide",
|
||||
},
|
||||
{
|
||||
name: "with numbers",
|
||||
input: "Chapter 1 Introduction",
|
||||
expected: "chapter-1-introduction",
|
||||
},
|
||||
{
|
||||
name: "special characters",
|
||||
input: "What's New? (v2.0)",
|
||||
expected: "whats-new-v20",
|
||||
},
|
||||
{
|
||||
name: "underscores",
|
||||
input: "config_file_reference",
|
||||
expected: "config-file-reference",
|
||||
},
|
||||
{
|
||||
name: "hyphens preserved",
|
||||
input: "pre-commit hooks",
|
||||
expected: "pre-commit-hooks",
|
||||
},
|
||||
{
|
||||
name: "leading trailing spaces",
|
||||
input: " Trimmed Title ",
|
||||
expected: "trimmed-title",
|
||||
},
|
||||
{
|
||||
name: "unicode letters",
|
||||
input: "Configuración Básica",
|
||||
expected: "configuración-básica",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := GenerateID(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Good(t *testing.T) {
|
||||
content := `---
|
||||
title: Getting Started
|
||||
tags: [intro, setup]
|
||||
order: 1
|
||||
related:
|
||||
- installation
|
||||
- configuration
|
||||
---
|
||||
|
||||
# Welcome
|
||||
|
||||
This is the content.
|
||||
`
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
assert.NotNil(t, fm)
|
||||
assert.Equal(t, "Getting Started", fm.Title)
|
||||
assert.Equal(t, []string{"intro", "setup"}, fm.Tags)
|
||||
assert.Equal(t, 1, fm.Order)
|
||||
assert.Equal(t, []string{"installation", "configuration"}, fm.Related)
|
||||
assert.Contains(t, body, "# Welcome")
|
||||
assert.Contains(t, body, "This is the content.")
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Good_NoFrontmatter(t *testing.T) {
|
||||
content := `# Just a Heading
|
||||
|
||||
Some content here.
|
||||
`
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
assert.Nil(t, fm)
|
||||
assert.Equal(t, content, body)
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Good_CRLF(t *testing.T) {
|
||||
// Content with CRLF line endings (Windows-style)
|
||||
content := "---\r\ntitle: CRLF Test\r\n---\r\n\r\n# Content"
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
assert.NotNil(t, fm)
|
||||
assert.Equal(t, "CRLF Test", fm.Title)
|
||||
assert.Contains(t, body, "# Content")
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Good_Empty(t *testing.T) {
|
||||
// Empty frontmatter block
|
||||
content := "---\n---\n# Content"
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
// Empty frontmatter should parse successfully
|
||||
assert.NotNil(t, fm)
|
||||
assert.Equal(t, "", fm.Title)
|
||||
assert.Contains(t, body, "# Content")
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) {
|
||||
content := `---
|
||||
title: [invalid yaml
|
||||
---
|
||||
|
||||
# Content
|
||||
`
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
// Invalid YAML should return nil frontmatter and original content
|
||||
assert.Nil(t, fm)
|
||||
assert.Equal(t, content, body)
|
||||
}
|
||||
|
||||
func TestExtractSections_Good(t *testing.T) {
|
||||
content := `# Main Title
|
||||
|
||||
Introduction paragraph.
|
||||
|
||||
## Installation
|
||||
|
||||
Install instructions here.
|
||||
More details.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
You need these things.
|
||||
|
||||
## Configuration
|
||||
|
||||
Config info here.
|
||||
`
|
||||
|
||||
sections := ExtractSections(content)
|
||||
|
||||
assert.Len(t, sections, 4)
|
||||
|
||||
// Main Title (H1)
|
||||
assert.Equal(t, "main-title", sections[0].ID)
|
||||
assert.Equal(t, "Main Title", sections[0].Title)
|
||||
assert.Equal(t, 1, sections[0].Level)
|
||||
assert.Equal(t, 1, sections[0].Line)
|
||||
assert.Contains(t, sections[0].Content, "Introduction paragraph.")
|
||||
|
||||
// Installation (H2)
|
||||
assert.Equal(t, "installation", sections[1].ID)
|
||||
assert.Equal(t, "Installation", sections[1].Title)
|
||||
assert.Equal(t, 2, sections[1].Level)
|
||||
assert.Contains(t, sections[1].Content, "Install instructions here.")
|
||||
assert.Contains(t, sections[1].Content, "More details.")
|
||||
|
||||
// Prerequisites (H3)
|
||||
assert.Equal(t, "prerequisites", sections[2].ID)
|
||||
assert.Equal(t, "Prerequisites", sections[2].Title)
|
||||
assert.Equal(t, 3, sections[2].Level)
|
||||
assert.Contains(t, sections[2].Content, "You need these things.")
|
||||
|
||||
// Configuration (H2)
|
||||
assert.Equal(t, "configuration", sections[3].ID)
|
||||
assert.Equal(t, "Configuration", sections[3].Title)
|
||||
assert.Equal(t, 2, sections[3].Level)
|
||||
}
|
||||
|
||||
func TestExtractSections_Good_AllHeadingLevels(t *testing.T) {
|
||||
content := `# H1
|
||||
## H2
|
||||
### H3
|
||||
#### H4
|
||||
##### H5
|
||||
###### H6
|
||||
`
|
||||
|
||||
sections := ExtractSections(content)
|
||||
|
||||
assert.Len(t, sections, 6)
|
||||
for i, level := range []int{1, 2, 3, 4, 5, 6} {
|
||||
assert.Equal(t, level, sections[i].Level)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractSections_Good_Empty(t *testing.T) {
|
||||
content := `Just plain text.
|
||||
No headings here.
|
||||
`
|
||||
|
||||
sections := ExtractSections(content)
|
||||
|
||||
assert.Empty(t, sections)
|
||||
}
|
||||
|
||||
func TestParseTopic_Good(t *testing.T) {
|
||||
content := []byte(`---
|
||||
title: Quick Start Guide
|
||||
tags: [intro, quickstart]
|
||||
order: 5
|
||||
related:
|
||||
- installation
|
||||
---
|
||||
|
||||
# Quick Start Guide
|
||||
|
||||
Welcome to the guide.
|
||||
|
||||
## First Steps
|
||||
|
||||
Do this first.
|
||||
|
||||
## Next Steps
|
||||
|
||||
Then do this.
|
||||
`)
|
||||
|
||||
topic, err := ParseTopic("docs/quick-start.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
|
||||
// Check metadata from frontmatter
|
||||
assert.Equal(t, "quick-start-guide", topic.ID)
|
||||
assert.Equal(t, "Quick Start Guide", topic.Title)
|
||||
assert.Equal(t, "docs/quick-start.md", topic.Path)
|
||||
assert.Equal(t, []string{"intro", "quickstart"}, topic.Tags)
|
||||
assert.Equal(t, []string{"installation"}, topic.Related)
|
||||
assert.Equal(t, 5, topic.Order)
|
||||
|
||||
// Check sections
|
||||
assert.Len(t, topic.Sections, 3)
|
||||
assert.Equal(t, "quick-start-guide", topic.Sections[0].ID)
|
||||
assert.Equal(t, "first-steps", topic.Sections[1].ID)
|
||||
assert.Equal(t, "next-steps", topic.Sections[2].ID)
|
||||
|
||||
// Content should not include frontmatter
|
||||
assert.NotContains(t, topic.Content, "---")
|
||||
assert.Contains(t, topic.Content, "# Quick Start Guide")
|
||||
}
|
||||
|
||||
func TestParseTopic_Good_NoFrontmatter(t *testing.T) {
|
||||
content := []byte(`# Getting Started
|
||||
|
||||
This is a simple doc.
|
||||
|
||||
## Installation
|
||||
|
||||
Install it here.
|
||||
`)
|
||||
|
||||
topic, err := ParseTopic("getting-started.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
|
||||
// Title should come from first H1
|
||||
assert.Equal(t, "Getting Started", topic.Title)
|
||||
assert.Equal(t, "getting-started", topic.ID)
|
||||
|
||||
// Sections extracted
|
||||
assert.Len(t, topic.Sections, 2)
|
||||
}
|
||||
|
||||
func TestParseTopic_Good_NoHeadings(t *testing.T) {
|
||||
content := []byte(`---
|
||||
title: Plain Content
|
||||
---
|
||||
|
||||
Just some text without any headings.
|
||||
`)
|
||||
|
||||
topic, err := ParseTopic("plain.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
assert.Equal(t, "Plain Content", topic.Title)
|
||||
assert.Equal(t, "plain-content", topic.ID)
|
||||
assert.Empty(t, topic.Sections)
|
||||
}
|
||||
|
||||
func TestParseTopic_Good_IDFromPath(t *testing.T) {
|
||||
content := []byte(`Just content, no frontmatter or headings.`)
|
||||
|
||||
topic, err := ParseTopic("commands/dev-workflow.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
|
||||
// ID and title should be derived from path
|
||||
assert.Equal(t, "dev-workflow", topic.ID)
|
||||
assert.Equal(t, "", topic.Title) // No title available
|
||||
}
|
||||
|
||||
func TestPathToTitle_Good(t *testing.T) {
|
||||
tests := []struct {
|
||||
path string
|
||||
expected string
|
||||
}{
|
||||
{"getting-started.md", "Getting Started"},
|
||||
{"commands/dev.md", "Dev"},
|
||||
{"path/to/file_name.md", "File Name"},
|
||||
{"UPPERCASE.md", "Uppercase"},
|
||||
{"no-extension", "No Extension"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.path, func(t *testing.T) {
|
||||
result := pathToTitle(tt.path)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
393
search.go
Normal file
393
search.go
Normal file
|
|
@ -0,0 +1,393 @@
|
|||
package help
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// SearchResult represents a search match.
|
||||
type SearchResult struct {
|
||||
Topic *Topic
|
||||
Section *Section // nil if topic-level match
|
||||
Score float64
|
||||
Snippet string // Context around match
|
||||
}
|
||||
|
||||
// searchIndex provides full-text search.
|
||||
type searchIndex struct {
|
||||
topics map[string]*Topic // topicID -> Topic
|
||||
index map[string][]string // word -> []topicID
|
||||
}
|
||||
|
||||
// newSearchIndex creates a new empty search index.
|
||||
func newSearchIndex() *searchIndex {
|
||||
return &searchIndex{
|
||||
topics: make(map[string]*Topic),
|
||||
index: make(map[string][]string),
|
||||
}
|
||||
}
|
||||
|
||||
// Add indexes a topic for searching.
|
||||
func (i *searchIndex) Add(topic *Topic) {
|
||||
i.topics[topic.ID] = topic
|
||||
|
||||
// Index title words with boost
|
||||
for _, word := range tokenize(topic.Title) {
|
||||
i.addToIndex(word, topic.ID)
|
||||
}
|
||||
|
||||
// Index content words
|
||||
for _, word := range tokenize(topic.Content) {
|
||||
i.addToIndex(word, topic.ID)
|
||||
}
|
||||
|
||||
// Index section titles and content
|
||||
for _, section := range topic.Sections {
|
||||
for _, word := range tokenize(section.Title) {
|
||||
i.addToIndex(word, topic.ID)
|
||||
}
|
||||
for _, word := range tokenize(section.Content) {
|
||||
i.addToIndex(word, topic.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// Index tags
|
||||
for _, tag := range topic.Tags {
|
||||
for _, word := range tokenize(tag) {
|
||||
i.addToIndex(word, topic.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// addToIndex adds a word-to-topic mapping.
|
||||
func (i *searchIndex) addToIndex(word, topicID string) {
|
||||
// Avoid duplicates
|
||||
for _, id := range i.index[word] {
|
||||
if id == topicID {
|
||||
return
|
||||
}
|
||||
}
|
||||
i.index[word] = append(i.index[word], topicID)
|
||||
}
|
||||
|
||||
// Search finds topics matching the query.
|
||||
func (i *searchIndex) Search(query string) []*SearchResult {
|
||||
queryWords := tokenize(query)
|
||||
if len(queryWords) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Track scores per topic
|
||||
scores := make(map[string]float64)
|
||||
|
||||
for _, word := range queryWords {
|
||||
// Exact matches
|
||||
if topicIDs, ok := i.index[word]; ok {
|
||||
for _, topicID := range topicIDs {
|
||||
scores[topicID] += 1.0
|
||||
}
|
||||
}
|
||||
|
||||
// Prefix matches (partial word matching)
|
||||
for indexWord, topicIDs := range i.index {
|
||||
if strings.HasPrefix(indexWord, word) && indexWord != word {
|
||||
for _, topicID := range topicIDs {
|
||||
scores[topicID] += 0.5 // Lower score for partial matches
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pre-compile regexes for snippets
|
||||
var res []*regexp.Regexp
|
||||
for _, word := range queryWords {
|
||||
if len(word) >= 2 {
|
||||
if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(word)); err == nil {
|
||||
res = append(res, re)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build results with title boost and snippet extraction
|
||||
var results []*SearchResult
|
||||
for topicID, score := range scores {
|
||||
topic := i.topics[topicID]
|
||||
if topic == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Title boost: if query words appear in title
|
||||
titleLower := strings.ToLower(topic.Title)
|
||||
hasTitleMatch := false
|
||||
for _, word := range queryWords {
|
||||
if strings.Contains(titleLower, word) {
|
||||
hasTitleMatch = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasTitleMatch {
|
||||
score += 10.0
|
||||
}
|
||||
|
||||
// Find matching section and extract snippet
|
||||
section, snippet := i.findBestMatch(topic, queryWords, res)
|
||||
|
||||
// Section title boost
|
||||
if section != nil {
|
||||
sectionTitleLower := strings.ToLower(section.Title)
|
||||
hasSectionTitleMatch := false
|
||||
for _, word := range queryWords {
|
||||
if strings.Contains(sectionTitleLower, word) {
|
||||
hasSectionTitleMatch = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasSectionTitleMatch {
|
||||
score += 5.0
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, &SearchResult{
|
||||
Topic: topic,
|
||||
Section: section,
|
||||
Score: score,
|
||||
Snippet: snippet,
|
||||
})
|
||||
}
|
||||
|
||||
// Sort by score (highest first)
|
||||
sort.Slice(results, func(a, b int) bool {
|
||||
if results[a].Score != results[b].Score {
|
||||
return results[a].Score > results[b].Score
|
||||
}
|
||||
return results[a].Topic.Title < results[b].Topic.Title
|
||||
})
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
// findBestMatch finds the section with the best match and extracts a snippet.
|
||||
func (i *searchIndex) findBestMatch(topic *Topic, queryWords []string, res []*regexp.Regexp) (*Section, string) {
|
||||
var bestSection *Section
|
||||
var bestSnippet string
|
||||
bestScore := 0
|
||||
|
||||
// Check topic title
|
||||
titleScore := countMatches(topic.Title, queryWords)
|
||||
if titleScore > 0 {
|
||||
bestSnippet = extractSnippet(topic.Content, res)
|
||||
}
|
||||
|
||||
// Check sections
|
||||
for idx := range topic.Sections {
|
||||
section := &topic.Sections[idx]
|
||||
sectionScore := countMatches(section.Title, queryWords)
|
||||
contentScore := countMatches(section.Content, queryWords)
|
||||
totalScore := sectionScore*2 + contentScore // Title matches worth more
|
||||
|
||||
if totalScore > bestScore {
|
||||
bestScore = totalScore
|
||||
bestSection = section
|
||||
if contentScore > 0 {
|
||||
bestSnippet = extractSnippet(section.Content, res)
|
||||
} else {
|
||||
bestSnippet = extractSnippet(section.Content, nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no section matched, use topic content
|
||||
if bestSnippet == "" && topic.Content != "" {
|
||||
bestSnippet = extractSnippet(topic.Content, res)
|
||||
}
|
||||
|
||||
return bestSection, bestSnippet
|
||||
}
|
||||
|
||||
// tokenize splits text into lowercase words for indexing/searching.
|
||||
func tokenize(text string) []string {
|
||||
text = strings.ToLower(text)
|
||||
var words []string
|
||||
var word strings.Builder
|
||||
|
||||
for _, r := range text {
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||||
word.WriteRune(r)
|
||||
} else if word.Len() > 0 {
|
||||
w := word.String()
|
||||
if len(w) >= 2 { // Skip single-character words
|
||||
words = append(words, w)
|
||||
}
|
||||
word.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last word
|
||||
if word.Len() >= 2 {
|
||||
words = append(words, word.String())
|
||||
}
|
||||
|
||||
return words
|
||||
}
|
||||
|
||||
// countMatches counts how many query words appear in the text.
|
||||
func countMatches(text string, queryWords []string) int {
|
||||
textLower := strings.ToLower(text)
|
||||
count := 0
|
||||
for _, word := range queryWords {
|
||||
if strings.Contains(textLower, word) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// extractSnippet extracts a short snippet around the first match and highlights matches.
|
||||
func extractSnippet(content string, res []*regexp.Regexp) string {
|
||||
if content == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
const snippetLen = 150
|
||||
|
||||
// If no regexes, return start of content without highlighting
|
||||
if len(res) == 0 {
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line != "" && !strings.HasPrefix(line, "#") {
|
||||
runes := []rune(line)
|
||||
if len(runes) > snippetLen {
|
||||
return string(runes[:snippetLen]) + "..."
|
||||
}
|
||||
return line
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Find first match position (byte-based)
|
||||
matchPos := -1
|
||||
for _, re := range res {
|
||||
loc := re.FindStringIndex(content)
|
||||
if loc != nil && (matchPos == -1 || loc[0] < matchPos) {
|
||||
matchPos = loc[0]
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to runes for safe slicing
|
||||
runes := []rune(content)
|
||||
runeLen := len(runes)
|
||||
|
||||
var start, end int
|
||||
if matchPos == -1 {
|
||||
// No match found, use start of content
|
||||
start = 0
|
||||
end = snippetLen
|
||||
if end > runeLen {
|
||||
end = runeLen
|
||||
}
|
||||
} else {
|
||||
// Convert byte position to rune position
|
||||
matchRunePos := len([]rune(content[:matchPos]))
|
||||
|
||||
// Extract snippet around match (rune-based)
|
||||
start = matchRunePos - 50
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
end = start + snippetLen
|
||||
if end > runeLen {
|
||||
end = runeLen
|
||||
}
|
||||
}
|
||||
|
||||
snippet := string(runes[start:end])
|
||||
|
||||
// Trim to word boundaries
|
||||
prefix := ""
|
||||
suffix := ""
|
||||
if start > 0 {
|
||||
if idx := strings.Index(snippet, " "); idx != -1 {
|
||||
snippet = snippet[idx+1:]
|
||||
prefix = "..."
|
||||
}
|
||||
}
|
||||
if end < runeLen {
|
||||
if idx := strings.LastIndex(snippet, " "); idx != -1 {
|
||||
snippet = snippet[:idx]
|
||||
suffix = "..."
|
||||
}
|
||||
}
|
||||
|
||||
snippet = strings.TrimSpace(snippet)
|
||||
if snippet == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Apply highlighting
|
||||
highlighted := highlight(snippet, res)
|
||||
|
||||
return prefix + highlighted + suffix
|
||||
}
|
||||
|
||||
// highlight wraps matches in **bold**.
|
||||
func highlight(text string, res []*regexp.Regexp) string {
|
||||
if len(res) == 0 {
|
||||
return text
|
||||
}
|
||||
|
||||
type match struct {
|
||||
start, end int
|
||||
}
|
||||
var matches []match
|
||||
|
||||
for _, re := range res {
|
||||
indices := re.FindAllStringIndex(text, -1)
|
||||
for _, idx := range indices {
|
||||
matches = append(matches, match{idx[0], idx[1]})
|
||||
}
|
||||
}
|
||||
|
||||
if len(matches) == 0 {
|
||||
return text
|
||||
}
|
||||
|
||||
// Sort matches by start position
|
||||
sort.Slice(matches, func(i, j int) bool {
|
||||
if matches[i].start != matches[j].start {
|
||||
return matches[i].start < matches[j].start
|
||||
}
|
||||
return matches[i].end > matches[j].end
|
||||
})
|
||||
|
||||
// Merge overlapping or adjacent matches
|
||||
var merged []match
|
||||
if len(matches) > 0 {
|
||||
curr := matches[0]
|
||||
for i := 1; i < len(matches); i++ {
|
||||
if matches[i].start <= curr.end {
|
||||
if matches[i].end > curr.end {
|
||||
curr.end = matches[i].end
|
||||
}
|
||||
} else {
|
||||
merged = append(merged, curr)
|
||||
curr = matches[i]
|
||||
}
|
||||
}
|
||||
merged = append(merged, curr)
|
||||
}
|
||||
|
||||
// Build highlighted string from back to front to avoid position shifts
|
||||
result := text
|
||||
for i := len(merged) - 1; i >= 0; i-- {
|
||||
m := merged[i]
|
||||
result = result[:m.end] + "**" + result[m.end:]
|
||||
result = result[:m.start] + "**" + result[m.start:]
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
340
search_test.go
Normal file
340
search_test.go
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
package help
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestTokenize_Good(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "simple words",
|
||||
input: "hello world",
|
||||
expected: []string{"hello", "world"},
|
||||
},
|
||||
{
|
||||
name: "mixed case",
|
||||
input: "Hello World",
|
||||
expected: []string{"hello", "world"},
|
||||
},
|
||||
{
|
||||
name: "with punctuation",
|
||||
input: "Hello, world! How are you?",
|
||||
expected: []string{"hello", "world", "how", "are", "you"},
|
||||
},
|
||||
{
|
||||
name: "single characters filtered",
|
||||
input: "a b c hello d",
|
||||
expected: []string{"hello"},
|
||||
},
|
||||
{
|
||||
name: "numbers included",
|
||||
input: "version 2 release",
|
||||
expected: []string{"version", "release"},
|
||||
},
|
||||
{
|
||||
name: "alphanumeric",
|
||||
input: "v2.0 and config123",
|
||||
expected: []string{"v2", "and", "config123"},
|
||||
},
|
||||
{
|
||||
name: "empty string",
|
||||
input: "",
|
||||
expected: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tokenize(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearchIndex_Add_Good(t *testing.T) {
|
||||
idx := newSearchIndex()
|
||||
|
||||
topic := &Topic{
|
||||
ID: "getting-started",
|
||||
Title: "Getting Started",
|
||||
Content: "Welcome to the guide.",
|
||||
Tags: []string{"intro", "setup"},
|
||||
Sections: []Section{
|
||||
{ID: "installation", Title: "Installation", Content: "Install the CLI."},
|
||||
},
|
||||
}
|
||||
|
||||
idx.Add(topic)
|
||||
|
||||
// Verify topic is stored
|
||||
assert.NotNil(t, idx.topics["getting-started"])
|
||||
|
||||
// Verify words are indexed
|
||||
assert.Contains(t, idx.index["getting"], "getting-started")
|
||||
assert.Contains(t, idx.index["started"], "getting-started")
|
||||
assert.Contains(t, idx.index["welcome"], "getting-started")
|
||||
assert.Contains(t, idx.index["guide"], "getting-started")
|
||||
assert.Contains(t, idx.index["intro"], "getting-started")
|
||||
assert.Contains(t, idx.index["setup"], "getting-started")
|
||||
assert.Contains(t, idx.index["installation"], "getting-started")
|
||||
assert.Contains(t, idx.index["cli"], "getting-started")
|
||||
}
|
||||
|
||||
func TestSearchIndex_Search_Good(t *testing.T) {
|
||||
idx := newSearchIndex()
|
||||
|
||||
// Add test topics
|
||||
idx.Add(&Topic{
|
||||
ID: "getting-started",
|
||||
Title: "Getting Started",
|
||||
Content: "Welcome to the CLI guide. This covers installation and setup.",
|
||||
Tags: []string{"intro"},
|
||||
})
|
||||
|
||||
idx.Add(&Topic{
|
||||
ID: "configuration",
|
||||
Title: "Configuration",
|
||||
Content: "Configure the CLI using environment variables.",
|
||||
})
|
||||
|
||||
idx.Add(&Topic{
|
||||
ID: "commands",
|
||||
Title: "Commands Reference",
|
||||
Content: "List of all available commands.",
|
||||
})
|
||||
|
||||
t.Run("single word query", func(t *testing.T) {
|
||||
results := idx.Search("configuration")
|
||||
assert.NotEmpty(t, results)
|
||||
assert.Equal(t, "configuration", results[0].Topic.ID)
|
||||
})
|
||||
|
||||
t.Run("multi-word query", func(t *testing.T) {
|
||||
results := idx.Search("cli guide")
|
||||
assert.NotEmpty(t, results)
|
||||
// Should match getting-started (has both "cli" and "guide")
|
||||
assert.Equal(t, "getting-started", results[0].Topic.ID)
|
||||
})
|
||||
|
||||
t.Run("title boost", func(t *testing.T) {
|
||||
results := idx.Search("commands")
|
||||
assert.NotEmpty(t, results)
|
||||
// "commands" appears in title of commands topic
|
||||
assert.Equal(t, "commands", results[0].Topic.ID)
|
||||
})
|
||||
|
||||
t.Run("partial word matching", func(t *testing.T) {
|
||||
results := idx.Search("config")
|
||||
assert.NotEmpty(t, results)
|
||||
// Should match "configuration" and "configure"
|
||||
foundConfig := false
|
||||
for _, r := range results {
|
||||
if r.Topic.ID == "configuration" {
|
||||
foundConfig = true
|
||||
break
|
||||
}
|
||||
}
|
||||
assert.True(t, foundConfig, "Should find configuration topic with prefix match")
|
||||
})
|
||||
|
||||
t.Run("no results", func(t *testing.T) {
|
||||
results := idx.Search("nonexistent")
|
||||
assert.Empty(t, results)
|
||||
})
|
||||
|
||||
t.Run("empty query", func(t *testing.T) {
|
||||
results := idx.Search("")
|
||||
assert.Nil(t, results)
|
||||
})
|
||||
}
|
||||
|
||||
func TestSearchIndex_Search_Good_WithSections(t *testing.T) {
|
||||
idx := newSearchIndex()
|
||||
|
||||
idx.Add(&Topic{
|
||||
ID: "installation",
|
||||
Title: "Installation Guide",
|
||||
Content: "Overview of installation process.",
|
||||
Sections: []Section{
|
||||
{
|
||||
ID: "linux",
|
||||
Title: "Linux Installation",
|
||||
Content: "Run apt-get install core on Debian.",
|
||||
},
|
||||
{
|
||||
ID: "macos",
|
||||
Title: "macOS Installation",
|
||||
Content: "Use brew install core on macOS.",
|
||||
},
|
||||
{
|
||||
ID: "windows",
|
||||
Title: "Windows Installation",
|
||||
Content: "Download the installer from the website.",
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
t.Run("matches section content", func(t *testing.T) {
|
||||
results := idx.Search("debian")
|
||||
assert.NotEmpty(t, results)
|
||||
assert.Equal(t, "installation", results[0].Topic.ID)
|
||||
// Should identify the Linux section as best match
|
||||
if results[0].Section != nil {
|
||||
assert.Equal(t, "linux", results[0].Section.ID)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("matches section title", func(t *testing.T) {
|
||||
results := idx.Search("windows")
|
||||
assert.NotEmpty(t, results)
|
||||
assert.Equal(t, "installation", results[0].Topic.ID)
|
||||
})
|
||||
}
|
||||
|
||||
func TestExtractSnippet_Good(t *testing.T) {
|
||||
content := `This is the first paragraph with some introduction text.
|
||||
|
||||
Here is more content that talks about installation and setup.
|
||||
The installation process is straightforward.
|
||||
|
||||
Finally, some closing remarks about the configuration.`
|
||||
|
||||
t.Run("finds match and extracts context", func(t *testing.T) {
|
||||
snippet := extractSnippet(content, compileRegexes([]string{"installation"}))
|
||||
assert.Contains(t, snippet, "**installation**")
|
||||
assert.True(t, len(snippet) <= 250, "Snippet should be reasonably short")
|
||||
})
|
||||
|
||||
t.Run("no query words returns start", func(t *testing.T) {
|
||||
snippet := extractSnippet(content, nil)
|
||||
assert.Contains(t, snippet, "first paragraph")
|
||||
})
|
||||
|
||||
t.Run("empty content", func(t *testing.T) {
|
||||
snippet := extractSnippet("", compileRegexes([]string{"test"}))
|
||||
assert.Empty(t, snippet)
|
||||
})
|
||||
}
|
||||
|
||||
func TestExtractSnippet_Highlighting(t *testing.T) {
|
||||
content := "The quick brown fox jumps over the lazy dog."
|
||||
|
||||
t.Run("simple highlighting", func(t *testing.T) {
|
||||
snippet := extractSnippet(content, compileRegexes([]string{"quick", "fox"}))
|
||||
assert.Contains(t, snippet, "**quick**")
|
||||
assert.Contains(t, snippet, "**fox**")
|
||||
})
|
||||
|
||||
t.Run("case insensitive highlighting", func(t *testing.T) {
|
||||
snippet := extractSnippet(content, compileRegexes([]string{"QUICK", "Fox"}))
|
||||
assert.Contains(t, snippet, "**quick**")
|
||||
assert.Contains(t, snippet, "**fox**")
|
||||
})
|
||||
|
||||
t.Run("partial word matching", func(t *testing.T) {
|
||||
content := "The configuration is complete."
|
||||
snippet := extractSnippet(content, compileRegexes([]string{"config"}))
|
||||
assert.Contains(t, snippet, "**config**uration")
|
||||
})
|
||||
|
||||
t.Run("overlapping matches", func(t *testing.T) {
|
||||
content := "Searching for something."
|
||||
// Both "search" and "searching" match
|
||||
snippet := extractSnippet(content, compileRegexes([]string{"search", "searching"}))
|
||||
assert.Equal(t, "**Searching** for something.", snippet)
|
||||
})
|
||||
}
|
||||
|
||||
func TestExtractSnippet_Good_UTF8(t *testing.T) {
|
||||
// Content with multi-byte UTF-8 characters
|
||||
content := "日本語のテキストです。This contains Japanese text. 検索機能をテストします。"
|
||||
|
||||
t.Run("handles multi-byte characters without corruption", func(t *testing.T) {
|
||||
snippet := extractSnippet(content, compileRegexes([]string{"japanese"}))
|
||||
// Should not panic or produce invalid UTF-8
|
||||
assert.True(t, len(snippet) > 0)
|
||||
// Verify the result is valid UTF-8
|
||||
assert.True(t, isValidUTF8(snippet), "Snippet should be valid UTF-8")
|
||||
})
|
||||
|
||||
t.Run("truncates multi-byte content safely", func(t *testing.T) {
|
||||
// Long content that will be truncated
|
||||
longContent := strings.Repeat("日本語", 100) // 300 characters
|
||||
snippet := extractSnippet(longContent, nil)
|
||||
assert.True(t, isValidUTF8(snippet), "Truncated snippet should be valid UTF-8")
|
||||
})
|
||||
}
|
||||
|
||||
// compileRegexes is a helper for tests.
|
||||
func compileRegexes(words []string) []*regexp.Regexp {
|
||||
var res []*regexp.Regexp
|
||||
for _, w := range words {
|
||||
if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(w)); err == nil {
|
||||
res = append(res, re)
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// isValidUTF8 checks if a string is valid UTF-8
|
||||
func isValidUTF8(s string) bool {
|
||||
for i := 0; i < len(s); {
|
||||
r, size := utf8.DecodeRuneInString(s[i:])
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
return false
|
||||
}
|
||||
i += size
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestCountMatches_Good(t *testing.T) {
|
||||
tests := []struct {
|
||||
text string
|
||||
words []string
|
||||
expected int
|
||||
}{
|
||||
{"Hello world", []string{"hello"}, 1},
|
||||
{"Hello world", []string{"hello", "world"}, 2},
|
||||
{"Hello world", []string{"foo", "bar"}, 0},
|
||||
{"The quick brown fox", []string{"quick", "fox", "dog"}, 2},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
result := countMatches(tt.text, tt.words)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearchResult_Score_Good(t *testing.T) {
|
||||
idx := newSearchIndex()
|
||||
|
||||
// Topic with query word in title should score higher
|
||||
idx.Add(&Topic{
|
||||
ID: "topic-in-title",
|
||||
Title: "Installation Guide",
|
||||
Content: "Some content here.",
|
||||
})
|
||||
|
||||
idx.Add(&Topic{
|
||||
ID: "topic-in-content",
|
||||
Title: "Some Other Topic",
|
||||
Content: "This covers installation steps.",
|
||||
})
|
||||
|
||||
results := idx.Search("installation")
|
||||
assert.Len(t, results, 2)
|
||||
|
||||
// Title match should score higher
|
||||
assert.Equal(t, "topic-in-title", results[0].Topic.ID)
|
||||
assert.Greater(t, results[0].Score, results[1].Score)
|
||||
}
|
||||
31
topic.go
Normal file
31
topic.go
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
// Package help provides display-agnostic help content management.
|
||||
package help
|
||||
|
||||
// Topic represents a help topic/page.
|
||||
type Topic struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Path string `json:"path"`
|
||||
Content string `json:"content"`
|
||||
Sections []Section `json:"sections"`
|
||||
Tags []string `json:"tags"`
|
||||
Related []string `json:"related"`
|
||||
Order int `json:"order"` // For sorting
|
||||
}
|
||||
|
||||
// Section represents a heading within a topic.
|
||||
type Section struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Level int `json:"level"`
|
||||
Line int `json:"line"` // Start line in content (1-indexed)
|
||||
Content string `json:"content"` // Content under heading
|
||||
}
|
||||
|
||||
// Frontmatter represents YAML frontmatter metadata.
|
||||
type Frontmatter struct {
|
||||
Title string `yaml:"title"`
|
||||
Tags []string `yaml:"tags"`
|
||||
Related []string `yaml:"related"`
|
||||
Order int `yaml:"order"`
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue