feat: import go-help library as pkg/help

All source, tests, and templates from forge.lthn.ai/core/go-help.
94% test coverage preserved. All tests pass.

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Snider 2026-03-06 16:30:16 +00:00
parent f4aa6b99c0
commit 6b0443c6f7
27 changed files with 5540 additions and 0 deletions

11
go.mod
View file

@ -1,3 +1,14 @@
module forge.lthn.ai/core/docs
go 1.25.0
require (
github.com/stretchr/testify v1.11.1
github.com/yuin/goldmark v1.7.16
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
)

12
go.sum Normal file
View file

@ -0,0 +1,12 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/yuin/goldmark v1.7.16 h1:n+CJdUxaFMiDUNnWC3dMWCIQJSkxH4uz3ZwQBkAlVNE=
github.com/yuin/goldmark v1.7.16/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

100
pkg/help/catalog.go Normal file
View file

@ -0,0 +1,100 @@
package help
import (
"fmt"
"iter"
"maps"
"slices"
)
// Catalog manages help topics.
type Catalog struct {
topics map[string]*Topic
index *searchIndex
}
// DefaultCatalog returns a catalog with built-in topics.
func DefaultCatalog() *Catalog {
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
// Add default topics
c.Add(&Topic{
ID: "getting-started",
Title: "Getting Started",
Content: `# Getting Started
Welcome to Core! This CLI tool helps you manage development workflows.
## Common Commands
- core dev: Development workflows
- core setup: Setup repository
- core doctor: Check environment health
- core test: Run tests
## Next Steps
Run 'core help <topic>' to learn more about a specific topic.
`,
})
c.Add(&Topic{
ID: "config",
Title: "Configuration",
Content: `# Configuration
Core is configured via environment variables and config files.
## Environment Variables
- CORE_DEBUG: Enable debug logging
- GITHUB_TOKEN: GitHub API token
## Config Files
Config is stored in ~/.core/config.yaml
`,
})
return c
}
// Add adds a topic to the catalog.
func (c *Catalog) Add(t *Topic) {
c.topics[t.ID] = t
c.index.Add(t)
}
// List returns all topics.
func (c *Catalog) List() []*Topic {
var list []*Topic
for _, t := range c.topics {
list = append(list, t)
}
return list
}
// All returns an iterator for all topics.
func (c *Catalog) All() iter.Seq[*Topic] {
return maps.Values(c.topics)
}
// Search searches for topics.
func (c *Catalog) Search(query string) []*SearchResult {
return c.index.Search(query)
}
// SearchResults returns an iterator for search results.
func (c *Catalog) SearchResults(query string) iter.Seq[*SearchResult] {
return slices.Values(c.Search(query))
}
// Get returns a topic by ID.
func (c *Catalog) Get(id string) (*Topic, error) {
t, ok := c.topics[id]
if !ok {
return nil, fmt.Errorf("topic not found: %s", id)
}
return t, nil
}

212
pkg/help/catalog_test.go Normal file
View file

@ -0,0 +1,212 @@
package help
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestDefaultCatalog_Good(t *testing.T) {
c := DefaultCatalog()
require.NotNil(t, c)
require.NotNil(t, c.topics)
require.NotNil(t, c.index)
t.Run("contains built-in topics", func(t *testing.T) {
topics := c.List()
assert.GreaterOrEqual(t, len(topics), 2, "should have at least 2 default topics")
})
t.Run("getting-started topic exists", func(t *testing.T) {
topic, err := c.Get("getting-started")
require.NoError(t, err)
assert.Equal(t, "Getting Started", topic.Title)
assert.Contains(t, topic.Content, "Common Commands")
})
t.Run("config topic exists", func(t *testing.T) {
topic, err := c.Get("config")
require.NoError(t, err)
assert.Equal(t, "Configuration", topic.Title)
assert.Contains(t, topic.Content, "Environment Variables")
})
}
func TestCatalog_Add_Good(t *testing.T) {
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
topic := &Topic{
ID: "test-topic",
Title: "Test Topic",
Content: "This is a test topic for unit testing.",
Tags: []string{"test", "unit"},
}
c.Add(topic)
t.Run("topic is retrievable after add", func(t *testing.T) {
got, err := c.Get("test-topic")
require.NoError(t, err)
assert.Equal(t, topic, got)
})
t.Run("topic is searchable after add", func(t *testing.T) {
results := c.Search("test")
assert.NotEmpty(t, results)
})
t.Run("overwrite existing topic", func(t *testing.T) {
replacement := &Topic{
ID: "test-topic",
Title: "Replaced Topic",
Content: "Replacement content.",
}
c.Add(replacement)
got, err := c.Get("test-topic")
require.NoError(t, err)
assert.Equal(t, "Replaced Topic", got.Title)
})
}
func TestCatalog_List_Good(t *testing.T) {
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
t.Run("empty catalog returns empty list", func(t *testing.T) {
list := c.List()
assert.Empty(t, list)
})
t.Run("returns all added topics", func(t *testing.T) {
c.Add(&Topic{ID: "alpha", Title: "Alpha"})
c.Add(&Topic{ID: "beta", Title: "Beta"})
c.Add(&Topic{ID: "gamma", Title: "Gamma"})
list := c.List()
assert.Len(t, list, 3)
// Collect IDs (order is not guaranteed from map)
ids := make(map[string]bool)
for _, t := range list {
ids[t.ID] = true
}
assert.True(t, ids["alpha"])
assert.True(t, ids["beta"])
assert.True(t, ids["gamma"])
})
}
func TestCatalog_Search_Good(t *testing.T) {
c := DefaultCatalog()
t.Run("finds default topics", func(t *testing.T) {
results := c.Search("configuration")
assert.NotEmpty(t, results)
})
t.Run("empty query returns nil", func(t *testing.T) {
results := c.Search("")
assert.Nil(t, results)
})
t.Run("no match returns empty", func(t *testing.T) {
results := c.Search("zzzyyyxxx")
assert.Empty(t, results)
})
}
func TestCatalog_Get_Good(t *testing.T) {
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
c.Add(&Topic{ID: "exists", Title: "Existing Topic"})
t.Run("existing topic", func(t *testing.T) {
topic, err := c.Get("exists")
require.NoError(t, err)
assert.Equal(t, "Existing Topic", topic.Title)
})
t.Run("missing topic returns error", func(t *testing.T) {
topic, err := c.Get("does-not-exist")
assert.Nil(t, topic)
assert.Error(t, err)
assert.Contains(t, err.Error(), "topic not found")
assert.Contains(t, err.Error(), "does-not-exist")
})
}
func TestCatalog_Search_Good_ScoreTiebreaking(t *testing.T) {
// Tests the alphabetical tie-breaking in search result sorting (search.go:165).
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
// Add topics with identical content so they receive the same score.
c.Add(&Topic{
ID: "zebra-topic",
Title: "Zebra",
Content: "Unique keyword zephyr.",
})
c.Add(&Topic{
ID: "alpha-topic",
Title: "Alpha",
Content: "Unique keyword zephyr.",
})
results := c.Search("zephyr")
require.Len(t, results, 2)
// With equal scores, results should be sorted alphabetically by title.
assert.Equal(t, "Alpha", results[0].Topic.Title)
assert.Equal(t, "Zebra", results[1].Topic.Title)
assert.Equal(t, results[0].Score, results[1].Score,
"scores should be equal for tie-breaking to apply")
}
func BenchmarkSearch(b *testing.B) {
// Build a catalog with 100+ topics for benchmarking.
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
for i := range 150 {
c.Add(&Topic{
ID: fmt.Sprintf("topic-%d", i),
Title: fmt.Sprintf("Topic Number %d About Various Subjects", i),
Content: fmt.Sprintf("This is the content of topic %d. It covers installation, configuration, deployment, and testing of the system.", i),
Tags: []string{"generated", fmt.Sprintf("tag%d", i%10)},
Sections: []Section{
{
ID: fmt.Sprintf("section-%d-a", i),
Title: "Overview",
Content: "An overview of the topic and its purpose.",
},
{
ID: fmt.Sprintf("section-%d-b", i),
Title: "Details",
Content: "Detailed information about the topic including examples and usage.",
},
},
})
}
b.ResetTimer()
b.ReportAllocs()
for range b.N {
c.Search("installation configuration")
}
}

251
pkg/help/generate.go Normal file
View file

@ -0,0 +1,251 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"encoding/json"
"os"
"path/filepath"
)
// searchIndexEntry represents a single topic in the client-side search index.
type searchIndexEntry struct {
ID string `json:"id"`
Title string `json:"title"`
Tags []string `json:"tags"`
Content string `json:"content"`
}
// Generate writes a complete static help site to outputDir.
// It creates:
// - index.html -- topic listing grouped by tags
// - topics/{id}.html -- one page per topic
// - search.html -- client-side search with inline JS
// - search-index.json -- JSON index for client-side search
// - 404.html -- not found page
//
// All CSS is inlined; no external stylesheets are needed.
func Generate(catalog *Catalog, outputDir string) error {
topics := catalog.List()
// Ensure output directories exist.
topicsDir := filepath.Join(outputDir, "topics")
if err := os.MkdirAll(topicsDir, 0o755); err != nil {
return err
}
// 1. index.html
if err := writeStaticPage(outputDir, "index.html", "index.html", indexData{
Topics: topics,
Groups: groupTopicsByTag(topics),
}); err != nil {
return err
}
// 2. topics/{id}.html -- one per topic
for _, t := range topics {
if err := writeStaticPage(topicsDir, t.ID+".html", "topic.html", topicData{Topic: t}); err != nil {
return err
}
}
// 3. search.html -- client-side search page
if err := writeSearchPage(outputDir); err != nil {
return err
}
// 4. search-index.json
if err := writeSearchIndex(outputDir, topics); err != nil {
return err
}
// 5. 404.html
if err := writeStaticPage(outputDir, "404.html", "404.html", nil); err != nil {
return err
}
return nil
}
// writeStaticPage renders a template page to a file.
func writeStaticPage(dir, filename, templatePage string, data any) error {
path := filepath.Join(dir, filename)
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return renderPage(f, templatePage, data)
}
// writeSearchIndex writes the JSON search index for client-side search.
func writeSearchIndex(outputDir string, topics []*Topic) error {
entries := make([]searchIndexEntry, 0, len(topics))
for _, t := range topics {
// Truncate content for the index to keep file size reasonable.
content := t.Content
runes := []rune(content)
if len(runes) > 500 {
content = string(runes[:500])
}
entries = append(entries, searchIndexEntry{
ID: t.ID,
Title: t.Title,
Tags: t.Tags,
Content: content,
})
}
path := filepath.Join(outputDir, "search-index.json")
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
enc := json.NewEncoder(f)
enc.SetIndent("", " ")
return enc.Encode(entries)
}
// writeSearchPage generates search.html with inline client-side JS search.
// The JS uses escapeHTML() on all data before DOM insertion to prevent XSS.
// Data comes from our own search-index.json, not external user input.
func writeSearchPage(outputDir string) error {
path := filepath.Join(outputDir, "search.html")
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
// Render via a search template with empty results + inject client-side JS.
data := searchData{Query: "", Results: nil}
if err := renderPage(f, "search.html", data); err != nil {
return err
}
// Append inline script for client-side search.
_, err = f.WriteString(clientSearchScript)
return err
}
// clientSearchScript is the inline JS for static-site client-side search.
// All values are escaped via escapeHTML() before DOM insertion.
// The search index is generated from our own catalog data, not user input.
const clientSearchScript = `
<script>
(function() {
let index = [];
fetch('search-index.json')
.then(r => r.json())
.then(data => { index = data; })
.catch(() => {});
const form = document.querySelector('.search-form');
const input = form ? form.querySelector('input[name="q"]') : null;
const main = document.querySelector('main');
const container = main ? main.querySelector('.container') : null;
if (form && input) {
form.addEventListener('submit', function(e) {
e.preventDefault();
doSearch(input.value.trim());
});
}
function escapeHTML(s) {
const div = document.createElement('div');
div.textContent = s;
return div.innerHTML;
}
function doSearch(query) {
if (!query || !container) return;
const lower = query.toLowerCase();
const words = lower.split(/\s+/);
const results = index
.map(function(entry) {
let score = 0;
const title = (entry.title || '').toLowerCase();
const content = (entry.content || '').toLowerCase();
const tags = (entry.tags || []).map(function(t) { return t.toLowerCase(); });
words.forEach(function(w) {
if (title.indexOf(w) !== -1) score += 10;
if (content.indexOf(w) !== -1) score += 1;
tags.forEach(function(tag) { if (tag.indexOf(w) !== -1) score += 3; });
});
return { entry: entry, score: score };
})
.filter(function(r) { return r.score > 0; })
.sort(function(a, b) { return b.score - a.score; });
// Build result using safe DOM methods
while (container.firstChild) container.removeChild(container.firstChild);
var h1 = document.createElement('h1');
h1.textContent = 'Search Results';
container.appendChild(h1);
var summary = document.createElement('p');
summary.style.color = 'var(--fg-muted)';
if (results.length > 0) {
summary.textContent = 'Found ' + results.length + ' result' + (results.length !== 1 ? 's' : '') + ' for \u201c' + query + '\u201d';
} else {
summary.textContent = 'No results for \u201c' + query + '\u201d';
}
container.appendChild(summary);
results.forEach(function(r) {
var e = r.entry;
var card = document.createElement('div');
card.className = 'card';
var heading = document.createElement('h3');
var link = document.createElement('a');
link.href = 'topics/' + encodeURIComponent(e.id) + '.html';
link.textContent = e.title;
heading.appendChild(link);
card.appendChild(heading);
if (e.content) {
var p = document.createElement('p');
var snippet = e.content.substring(0, 150);
p.textContent = snippet + (e.content.length > 150 ? '...' : '');
card.appendChild(p);
}
if (e.tags && e.tags.length) {
var tagDiv = document.createElement('div');
e.tags.forEach(function(t) {
var span = document.createElement('span');
span.className = 'tag';
span.textContent = t;
tagDiv.appendChild(span);
});
card.appendChild(tagDiv);
}
container.appendChild(card);
});
if (results.length === 0) {
var noResults = document.createElement('div');
noResults.style.cssText = 'margin-top:2rem;text-align:center;color:var(--fg-muted);';
var tip = document.createElement('p');
tip.textContent = 'Try a different search term or browse ';
var browseLink = document.createElement('a');
browseLink.href = 'index.html';
browseLink.textContent = 'all topics';
tip.appendChild(browseLink);
tip.appendChild(document.createTextNode('.'));
noResults.appendChild(tip);
container.appendChild(noResults);
}
}
})();
</script>
`

195
pkg/help/generate_test.go Normal file
View file

@ -0,0 +1,195 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// testCatalog builds a small catalog for generator tests.
func testCatalog() *Catalog {
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
c.Add(&Topic{
ID: "getting-started",
Title: "Getting Started",
Content: "# Getting Started\n\nWelcome to the **guide**.\n",
Tags: []string{"intro"},
Sections: []Section{
{ID: "getting-started", Title: "Getting Started", Level: 1},
},
})
c.Add(&Topic{
ID: "config",
Title: "Configuration",
Content: "# Configuration\n\nSet up your environment.\n",
Tags: []string{"setup"},
Related: []string{"getting-started"},
})
return c
}
func TestGenerate_Good_FileStructure(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
err := Generate(catalog, dir)
require.NoError(t, err)
// Verify expected file structure
expectedFiles := []string{
"index.html",
"search.html",
"search-index.json",
"404.html",
"topics/getting-started.html",
"topics/config.html",
}
for _, f := range expectedFiles {
path := filepath.Join(dir, f)
_, err := os.Stat(path)
assert.NoError(t, err, "expected file %s to exist", f)
}
}
func TestGenerate_Good_IndexContainsTopics(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
err := Generate(catalog, dir)
require.NoError(t, err)
content, err := os.ReadFile(filepath.Join(dir, "index.html"))
require.NoError(t, err)
html := string(content)
assert.Contains(t, html, "Getting Started")
assert.Contains(t, html, "Configuration")
}
func TestGenerate_Good_TopicContainsRenderedMarkdown(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
err := Generate(catalog, dir)
require.NoError(t, err)
content, err := os.ReadFile(filepath.Join(dir, "topics", "getting-started.html"))
require.NoError(t, err)
html := string(content)
assert.Contains(t, html, "Getting Started")
assert.Contains(t, html, "<strong>guide</strong>")
}
func TestGenerate_Good_SearchIndexJSON(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
err := Generate(catalog, dir)
require.NoError(t, err)
content, err := os.ReadFile(filepath.Join(dir, "search-index.json"))
require.NoError(t, err)
var entries []searchIndexEntry
require.NoError(t, json.Unmarshal(content, &entries))
assert.Len(t, entries, 2, "search index should contain all topics")
// Verify fields are populated
ids := make(map[string]bool)
for _, e := range entries {
ids[e.ID] = true
assert.NotEmpty(t, e.Title)
assert.NotEmpty(t, e.Content)
}
assert.True(t, ids["getting-started"])
assert.True(t, ids["config"])
}
func TestGenerate_Good_404Exists(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
err := Generate(catalog, dir)
require.NoError(t, err)
content, err := os.ReadFile(filepath.Join(dir, "404.html"))
require.NoError(t, err)
html := string(content)
assert.Contains(t, html, "404")
assert.Contains(t, html, "not found")
}
func TestGenerate_Good_EmptyDir(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
// Should succeed in an empty directory
err := Generate(catalog, dir)
assert.NoError(t, err)
}
func TestGenerate_Good_OverwriteExisting(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
// Generate once
err := Generate(catalog, dir)
require.NoError(t, err)
// Generate again -- should overwrite without error
err = Generate(catalog, dir)
assert.NoError(t, err)
// Verify files still exist and are valid
content, err := os.ReadFile(filepath.Join(dir, "index.html"))
require.NoError(t, err)
assert.Contains(t, string(content), "Getting Started")
}
func TestGenerate_Good_SearchPageHasScript(t *testing.T) {
dir := t.TempDir()
catalog := testCatalog()
err := Generate(catalog, dir)
require.NoError(t, err)
content, err := os.ReadFile(filepath.Join(dir, "search.html"))
require.NoError(t, err)
html := string(content)
assert.Contains(t, html, "<script>")
assert.Contains(t, html, "search-index.json")
}
func TestGenerate_Good_EmptyCatalog(t *testing.T) {
dir := t.TempDir()
catalog := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
err := Generate(catalog, dir)
require.NoError(t, err)
// index.html should still exist
_, err = os.Stat(filepath.Join(dir, "index.html"))
assert.NoError(t, err)
// search-index.json should be valid empty array
content, err := os.ReadFile(filepath.Join(dir, "search-index.json"))
require.NoError(t, err)
var entries []searchIndexEntry
require.NoError(t, json.Unmarshal(content, &entries))
assert.Empty(t, entries)
}

243
pkg/help/ingest.go Normal file
View file

@ -0,0 +1,243 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"strings"
)
// ParseHelpText parses standard Go CLI help output into a Topic.
//
// The name parameter is the command name (e.g. "dev commit") and text
// is the raw help output. The function converts structured sections
// (Usage, Flags, Options, Examples) into Markdown, and extracts "See also"
// lines into the Related field.
func ParseHelpText(name string, text string) *Topic {
title := titleCaseWords(name)
id := GenerateID(name)
// Extract related topics from "See also:" lines.
related, cleanedText := extractSeeAlso(text)
// Convert help text sections to Markdown.
content := convertHelpToMarkdown(cleanedText)
// Derive tags: "cli" + first word of the command name.
tags := []string{"cli"}
parts := strings.Fields(name)
if len(parts) > 0 {
first := strings.ToLower(parts[0])
if first != "cli" { // avoid duplicate
tags = append(tags, first)
}
}
topic := &Topic{
ID: id,
Title: title,
Content: content,
Tags: tags,
Related: related,
Sections: ExtractSections(content),
}
return topic
}
// IngestCLIHelp batch-ingests CLI help texts into a new Catalog.
// Keys are command names (e.g. "dev commit"), values are raw help output.
func IngestCLIHelp(helpTexts map[string]string) *Catalog {
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
for name, text := range helpTexts {
topic := ParseHelpText(name, text)
c.Add(topic)
}
return c
}
// titleCaseWords converts "dev commit" to "Dev Commit".
func titleCaseWords(name string) string {
words := strings.Fields(name)
for i, word := range words {
if len(word) > 0 {
words[i] = strings.ToUpper(word[:1]) + word[1:]
}
}
return strings.Join(words, " ")
}
// extractSeeAlso extracts related topic references from "See also:" lines.
// Returns the extracted references and the text with the "See also" line removed.
func extractSeeAlso(text string) ([]string, string) {
var related []string
var cleanedLines []string
lines := strings.SplitSeq(text, "\n")
for line := range lines {
trimmed := strings.TrimSpace(line)
lower := strings.ToLower(trimmed)
if after, ok := strings.CutPrefix(lower, "see also:"); ok {
// Extract references after "See also:"
rest := after
// The original casing version
restOrig := trimmed[len("See also:"):]
if len(restOrig) == 0 {
restOrig = rest
}
refs := strings.SplitSeq(restOrig, ",")
for ref := range refs {
ref = strings.TrimSpace(ref)
if ref != "" {
related = append(related, GenerateID(ref))
}
}
continue
}
cleanedLines = append(cleanedLines, line)
}
return related, strings.Join(cleanedLines, "\n")
}
// convertHelpToMarkdown converts structured CLI help text to Markdown.
// It identifies common sections (Usage, Flags, Options, Examples) and
// wraps them in appropriate Markdown headings and code blocks.
func convertHelpToMarkdown(text string) string {
if strings.TrimSpace(text) == "" {
return ""
}
lines := strings.Split(text, "\n")
var result strings.Builder
inCodeBlock := false
var descLines []string
flushDesc := func() {
if len(descLines) > 0 {
for _, dl := range descLines {
result.WriteString(dl)
result.WriteByte('\n')
}
descLines = nil
}
}
closeCodeBlock := func() {
if inCodeBlock {
result.WriteString("```\n\n")
inCodeBlock = false
}
}
for i := 0; i < len(lines); i++ {
line := lines[i]
trimmed := strings.TrimSpace(line)
// Detect section headers
switch {
case isSectionHeader(trimmed, "Usage:"):
flushDesc()
closeCodeBlock()
result.WriteString("## Usage\n\n```\n")
inCodeBlock = true
// Include the rest of the line after "Usage:" if present
rest := strings.TrimSpace(strings.TrimPrefix(trimmed, "Usage:"))
if rest != "" {
result.WriteString(rest)
result.WriteByte('\n')
}
case isSectionHeader(trimmed, "Flags:") || isSectionHeader(trimmed, "Options:"):
flushDesc()
closeCodeBlock()
result.WriteString("## Flags\n\n```\n")
inCodeBlock = true
case isSectionHeader(trimmed, "Examples:"):
flushDesc()
closeCodeBlock()
result.WriteString("## Examples\n\n```\n")
inCodeBlock = true
case isSectionHeader(trimmed, "Commands:") || isSectionHeader(trimmed, "Available Commands:"):
flushDesc()
closeCodeBlock()
result.WriteString("## Commands\n\n")
// Parse subsequent indented lines as subcommand list
for i+1 < len(lines) {
nextLine := lines[i+1]
nextTrimmed := strings.TrimSpace(nextLine)
if nextTrimmed == "" {
i++
continue
}
// Indented lines are subcommands
if strings.HasPrefix(nextLine, " ") || strings.HasPrefix(nextLine, "\t") {
parts := strings.Fields(nextTrimmed)
if len(parts) >= 2 {
cmd := parts[0]
desc := strings.Join(parts[1:], " ")
result.WriteString("- **")
result.WriteString(cmd)
result.WriteString("** -- ")
result.WriteString(desc)
result.WriteByte('\n')
} else {
result.WriteString("- ")
result.WriteString(nextTrimmed)
result.WriteByte('\n')
}
i++
} else {
break
}
}
result.WriteByte('\n')
default:
if inCodeBlock {
// Check if this line starts a new section (end code block)
if trimmed != "" && !strings.HasPrefix(line, " ") && !strings.HasPrefix(line, "\t") && endsCodeBlockSection(trimmed) {
closeCodeBlock()
// Re-process this line
i--
continue
}
result.WriteString(line)
result.WriteByte('\n')
} else {
// Descriptive paragraph text
descLines = append(descLines, line)
}
}
}
flushDesc()
closeCodeBlock()
return strings.TrimSpace(result.String())
}
// isSectionHeader checks if a line starts with the given section prefix.
func isSectionHeader(line, prefix string) bool {
return strings.HasPrefix(line, prefix) || strings.HasPrefix(strings.ToLower(line), strings.ToLower(prefix))
}
// endsCodeBlockSection detects if a line indicates a new section that should
// end an open code block.
func endsCodeBlockSection(trimmed string) bool {
lower := strings.ToLower(trimmed)
prefixes := []string{"usage:", "flags:", "options:", "examples:", "commands:", "available commands:", "see also:"}
for _, p := range prefixes {
if strings.HasPrefix(lower, p) {
return true
}
}
return false
}

277
pkg/help/ingest_test.go Normal file
View file

@ -0,0 +1,277 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParseHelpText_Good_StandardGoFlags(t *testing.T) {
helpText := `Core development workflow tool.
Usage:
core dev [command]
Flags:
-h, --help help for dev
-v, --verbose enable verbose output
`
topic := ParseHelpText("dev", helpText)
assert.Equal(t, "dev", topic.ID)
assert.Equal(t, "Dev", topic.Title)
assert.Contains(t, topic.Content, "## Usage")
assert.Contains(t, topic.Content, "core dev [command]")
assert.Contains(t, topic.Content, "## Flags")
assert.Contains(t, topic.Content, "--help")
assert.Contains(t, topic.Content, "--verbose")
assert.Equal(t, []string{"cli", "dev"}, topic.Tags)
}
func TestParseHelpText_Good_CobraStyleSubcommands(t *testing.T) {
helpText := `Manage repository operations
Usage:
core dev [command]
Available Commands:
commit Commit changes to repositories
push Push commits to remote
pull Pull latest from remote
status Show repository status
Flags:
-h, --help help for dev
`
topic := ParseHelpText("dev", helpText)
assert.Contains(t, topic.Content, "## Commands")
assert.Contains(t, topic.Content, "**commit**")
assert.Contains(t, topic.Content, "**push**")
assert.Contains(t, topic.Content, "**pull**")
assert.Contains(t, topic.Content, "**status**")
assert.Contains(t, topic.Content, "## Flags")
}
func TestParseHelpText_Good_MinimalHelpText(t *testing.T) {
helpText := "Show the current version."
topic := ParseHelpText("version", helpText)
assert.Equal(t, "version", topic.ID)
assert.Equal(t, "Version", topic.Title)
assert.Contains(t, topic.Content, "Show the current version.")
assert.Equal(t, []string{"cli", "version"}, topic.Tags)
}
func TestParseHelpText_Good_WithExamples(t *testing.T) {
helpText := `Commit changes across repositories.
Usage:
core dev commit [flags]
Examples:
core dev commit --all
core dev commit -m "fix: resolve bug"
Flags:
-a, --all commit all changes
-m, --message commit message
`
topic := ParseHelpText("dev commit", helpText)
assert.Equal(t, "dev-commit", topic.ID)
assert.Equal(t, "Dev Commit", topic.Title)
assert.Contains(t, topic.Content, "## Examples")
assert.Contains(t, topic.Content, "core dev commit --all")
assert.Contains(t, topic.Content, `core dev commit -m "fix: resolve bug"`)
assert.Equal(t, []string{"cli", "dev"}, topic.Tags)
}
func TestIngestCLIHelp_Good_BatchIngest(t *testing.T) {
helpTexts := map[string]string{
"dev": "Development workflow tool.\n\nUsage:\n core dev [command]\n",
"dev commit": "Commit changes.\n\nUsage:\n core dev commit [flags]\n",
"dev push": "Push to remote.\n\nUsage:\n core dev push [flags]\n",
}
catalog := IngestCLIHelp(helpTexts)
assert.NotNil(t, catalog)
topics := catalog.List()
assert.Len(t, topics, 3)
// Verify specific topics
devTopic, err := catalog.Get("dev")
require.NoError(t, err)
assert.Equal(t, "Dev", devTopic.Title)
commitTopic, err := catalog.Get("dev-commit")
require.NoError(t, err)
assert.Equal(t, "Dev Commit", commitTopic.Title)
pushTopic, err := catalog.Get("dev-push")
require.NoError(t, err)
assert.Equal(t, "Dev Push", pushTopic.Title)
// All should be searchable
results := catalog.Search("commit")
assert.NotEmpty(t, results)
}
func TestParseHelpText_Good_SeeAlso(t *testing.T) {
helpText := `Push commits to remote repositories.
Usage:
core dev push [flags]
See also: dev commit, dev pull
`
topic := ParseHelpText("dev push", helpText)
assert.Equal(t, "dev-push", topic.ID)
assert.Equal(t, []string{"dev-commit", "dev-pull"}, topic.Related)
// "See also" line should be removed from content
assert.NotContains(t, topic.Content, "See also")
}
func TestParseHelpText_Good_EmptyHelpText(t *testing.T) {
topic := ParseHelpText("empty", "")
assert.Equal(t, "empty", topic.ID)
assert.Equal(t, "Empty", topic.Title)
assert.Equal(t, "", topic.Content)
assert.Equal(t, []string{"cli", "empty"}, topic.Tags)
assert.Empty(t, topic.Sections)
}
func TestParseHelpText_Good_OptionsSection(t *testing.T) {
// Some tools use "Options:" instead of "Flags:"
helpText := `Run tests across repositories.
Options:
--filter string filter test by name
--timeout int timeout in seconds
`
topic := ParseHelpText("test", helpText)
assert.Contains(t, topic.Content, "## Flags") // Options: mapped to ## Flags
assert.Contains(t, topic.Content, "--filter")
}
func TestTitleCaseWords_Good(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"dev commit", "Dev Commit"},
{"version", "Version"},
{"dev", "Dev"},
{"hello world test", "Hello World Test"},
{"", ""},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
assert.Equal(t, tt.expected, titleCaseWords(tt.input))
})
}
}
func TestExtractSeeAlso_Good(t *testing.T) {
t.Run("with see also line", func(t *testing.T) {
text := "Some content.\nSee also: dev commit, dev pull\nMore content."
related, cleaned := extractSeeAlso(text)
assert.Equal(t, []string{"dev-commit", "dev-pull"}, related)
assert.NotContains(t, cleaned, "See also")
assert.Contains(t, cleaned, "Some content.")
assert.Contains(t, cleaned, "More content.")
})
t.Run("no see also", func(t *testing.T) {
text := "Just normal content."
related, cleaned := extractSeeAlso(text)
assert.Empty(t, related)
assert.Equal(t, text, cleaned)
})
}
func TestParseHelpText_Good_MultiLineDescription(t *testing.T) {
helpText := `Core is a multi-repository management tool that helps you
manage development workflows across federated monorepos.
It provides commands for status checking, committing,
pushing, and pulling across all repositories.
Usage:
core [command]
`
topic := ParseHelpText("core", helpText)
assert.Contains(t, topic.Content, "multi-repository management")
assert.Contains(t, topic.Content, "## Usage")
}
func TestParseHelpText_Good_CLINameDoesNotDuplicateTag(t *testing.T) {
// When the command name starts with "cli", the tag list should
// not contain "cli" twice.
topic := ParseHelpText("cli", "CLI root command.")
assert.Equal(t, []string{"cli"}, topic.Tags)
}
func TestParseHelpText_Good_CodeBlockEndedByNewSection(t *testing.T) {
// Tests the endsCodeBlockSection path: a code block for Usage is
// terminated when a Flags section starts on a non-indented line.
helpText := `Usage:
core run
Flags:
-h, --help help
`
topic := ParseHelpText("run", helpText)
assert.Contains(t, topic.Content, "## Usage")
assert.Contains(t, topic.Content, "## Flags")
assert.Contains(t, topic.Content, "--help")
}
func TestParseHelpText_Good_CommandsWithSingleWordEntry(t *testing.T) {
// Subcommand listing with a single-word entry (no description)
helpText := `Available Commands:
help
`
topic := ParseHelpText("test", helpText)
assert.Contains(t, topic.Content, "## Commands")
assert.Contains(t, topic.Content, "help")
}
func TestEndsCodeBlockSection_Good(t *testing.T) {
assert.True(t, endsCodeBlockSection("Usage:"))
assert.True(t, endsCodeBlockSection("Flags:"))
assert.True(t, endsCodeBlockSection("Options:"))
assert.True(t, endsCodeBlockSection("Examples:"))
assert.True(t, endsCodeBlockSection("Commands:"))
assert.True(t, endsCodeBlockSection("Available Commands:"))
assert.True(t, endsCodeBlockSection("See also: something"))
assert.False(t, endsCodeBlockSection(" -h, --help"))
assert.False(t, endsCodeBlockSection("some random text"))
}
func TestIsSectionHeader_Good(t *testing.T) {
assert.True(t, isSectionHeader("Usage:", "Usage:"))
assert.True(t, isSectionHeader("usage:", "Usage:"))
assert.False(t, isSectionHeader("NotUsage:", "Usage:"))
}

186
pkg/help/parser.go Normal file
View file

@ -0,0 +1,186 @@
package help
import (
"iter"
"path/filepath"
"regexp"
"slices"
"strings"
"unicode"
"gopkg.in/yaml.v3"
)
var (
// frontmatterRegex matches YAML frontmatter delimited by ---
// Supports both LF and CRLF line endings, and empty frontmatter blocks
frontmatterRegex = regexp.MustCompile(`(?s)^---\r?\n(.*?)(?:\r?\n)?---\r?\n?`)
// headingRegex matches markdown headings (# to ######)
headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`)
)
// ParseTopic parses a markdown file into a Topic.
func ParseTopic(path string, content []byte) (*Topic, error) {
contentStr := string(content)
topic := &Topic{
Path: path,
ID: GenerateID(pathToTitle(path)),
Sections: []Section{},
Tags: []string{},
Related: []string{},
}
// Extract YAML frontmatter if present
fm, body := ExtractFrontmatter(contentStr)
if fm != nil {
topic.Title = fm.Title
topic.Tags = fm.Tags
topic.Related = fm.Related
topic.Order = fm.Order
if topic.Title != "" {
topic.ID = GenerateID(topic.Title)
}
}
topic.Content = body
// Extract sections from headings
topic.Sections = ExtractSections(body)
// If no title from frontmatter, try first H1
if topic.Title == "" && len(topic.Sections) > 0 {
for _, s := range topic.Sections {
if s.Level == 1 {
topic.Title = s.Title
topic.ID = GenerateID(s.Title)
break
}
}
}
return topic, nil
}
// ExtractFrontmatter extracts YAML frontmatter from markdown content.
// Returns the parsed frontmatter and the remaining content.
func ExtractFrontmatter(content string) (*Frontmatter, string) {
match := frontmatterRegex.FindStringSubmatch(content)
if match == nil {
return nil, content
}
var fm Frontmatter
if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil {
// Invalid YAML, return content as-is
return nil, content
}
// Return content without frontmatter
body := content[len(match[0]):]
return &fm, body
}
// ExtractSections parses markdown and returns sections.
func ExtractSections(content string) []Section {
return slices.Collect(AllSections(content))
}
// AllSections returns an iterator for markdown sections.
func AllSections(content string) iter.Seq[Section] {
return func(yield func(Section) bool) {
lines := strings.SplitSeq(content, "\n")
var currentSection *Section
var contentLines []string
i := 0
for line := range lines {
lineNum := i + 1 // 1-indexed
match := headingRegex.FindStringSubmatch(line)
if match != nil {
// Save previous section's content
if currentSection != nil {
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
if !yield(*currentSection) {
return
}
}
// Start new section
level := len(match[1])
title := strings.TrimSpace(match[2])
section := Section{
ID: GenerateID(title),
Title: title,
Level: level,
Line: lineNum,
}
// We need to keep a pointer to the current section to update its content
currentSection = &section
contentLines = []string{}
} else if currentSection != nil {
contentLines = append(contentLines, line)
}
i++
}
// Save last section's content
if currentSection != nil {
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
yield(*currentSection)
}
}
}
// GenerateID creates a URL-safe ID from a title.
// "Getting Started" -> "getting-started"
func GenerateID(title string) string {
var result strings.Builder
for _, r := range strings.ToLower(title) {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
result.WriteRune(r)
} else if unicode.IsSpace(r) || r == '-' || r == '_' {
// Only add hyphen if last char isn't already a hyphen
str := result.String()
if len(str) > 0 && str[len(str)-1] != '-' {
result.WriteRune('-')
}
}
// Skip other characters
}
// Trim trailing hyphens
str := result.String()
return strings.Trim(str, "-")
}
// pathToTitle converts a file path to a title.
// "getting-started.md" -> "Getting Started"
func pathToTitle(path string) string {
// Get filename without directory (cross-platform)
filename := filepath.Base(path)
// Remove extension
if ext := filepath.Ext(filename); ext != "" {
filename = strings.TrimSuffix(filename, ext)
}
// Replace hyphens/underscores with spaces
filename = strings.ReplaceAll(filename, "-", " ")
filename = strings.ReplaceAll(filename, "_", " ")
// Title case
words := strings.Fields(filename)
for i, word := range words {
if len(word) > 0 {
words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:])
}
}
return strings.Join(words, " ")
}

696
pkg/help/parser_test.go Normal file
View file

@ -0,0 +1,696 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"fmt"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestGenerateID_Good(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "simple title",
input: "Getting Started",
expected: "getting-started",
},
{
name: "already lowercase",
input: "installation",
expected: "installation",
},
{
name: "multiple spaces",
input: "Quick Start Guide",
expected: "quick-start-guide",
},
{
name: "with numbers",
input: "Chapter 1 Introduction",
expected: "chapter-1-introduction",
},
{
name: "special characters",
input: "What's New? (v2.0)",
expected: "whats-new-v20",
},
{
name: "underscores",
input: "config_file_reference",
expected: "config-file-reference",
},
{
name: "hyphens preserved",
input: "pre-commit hooks",
expected: "pre-commit-hooks",
},
{
name: "leading trailing spaces",
input: " Trimmed Title ",
expected: "trimmed-title",
},
{
name: "unicode letters",
input: "Configuración Básica",
expected: "configuración-básica",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := GenerateID(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestExtractFrontmatter_Good(t *testing.T) {
content := `---
title: Getting Started
tags: [intro, setup]
order: 1
related:
- installation
- configuration
---
# Welcome
This is the content.
`
fm, body := ExtractFrontmatter(content)
assert.NotNil(t, fm)
assert.Equal(t, "Getting Started", fm.Title)
assert.Equal(t, []string{"intro", "setup"}, fm.Tags)
assert.Equal(t, 1, fm.Order)
assert.Equal(t, []string{"installation", "configuration"}, fm.Related)
assert.Contains(t, body, "# Welcome")
assert.Contains(t, body, "This is the content.")
}
func TestExtractFrontmatter_Good_NoFrontmatter(t *testing.T) {
content := `# Just a Heading
Some content here.
`
fm, body := ExtractFrontmatter(content)
assert.Nil(t, fm)
assert.Equal(t, content, body)
}
func TestExtractFrontmatter_Good_CRLF(t *testing.T) {
// Content with CRLF line endings (Windows-style)
content := "---\r\ntitle: CRLF Test\r\n---\r\n\r\n# Content"
fm, body := ExtractFrontmatter(content)
assert.NotNil(t, fm)
assert.Equal(t, "CRLF Test", fm.Title)
assert.Contains(t, body, "# Content")
}
func TestExtractFrontmatter_Good_Empty(t *testing.T) {
// Empty frontmatter block
content := "---\n---\n# Content"
fm, body := ExtractFrontmatter(content)
// Empty frontmatter should parse successfully
assert.NotNil(t, fm)
assert.Equal(t, "", fm.Title)
assert.Contains(t, body, "# Content")
}
func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) {
content := `---
title: [invalid yaml
---
# Content
`
fm, body := ExtractFrontmatter(content)
// Invalid YAML should return nil frontmatter and original content
assert.Nil(t, fm)
assert.Equal(t, content, body)
}
func TestExtractSections_Good(t *testing.T) {
content := `# Main Title
Introduction paragraph.
## Installation
Install instructions here.
More details.
### Prerequisites
You need these things.
## Configuration
Config info here.
`
sections := ExtractSections(content)
assert.Len(t, sections, 4)
// Main Title (H1)
assert.Equal(t, "main-title", sections[0].ID)
assert.Equal(t, "Main Title", sections[0].Title)
assert.Equal(t, 1, sections[0].Level)
assert.Equal(t, 1, sections[0].Line)
assert.Contains(t, sections[0].Content, "Introduction paragraph.")
// Installation (H2)
assert.Equal(t, "installation", sections[1].ID)
assert.Equal(t, "Installation", sections[1].Title)
assert.Equal(t, 2, sections[1].Level)
assert.Contains(t, sections[1].Content, "Install instructions here.")
assert.Contains(t, sections[1].Content, "More details.")
// Prerequisites (H3)
assert.Equal(t, "prerequisites", sections[2].ID)
assert.Equal(t, "Prerequisites", sections[2].Title)
assert.Equal(t, 3, sections[2].Level)
assert.Contains(t, sections[2].Content, "You need these things.")
// Configuration (H2)
assert.Equal(t, "configuration", sections[3].ID)
assert.Equal(t, "Configuration", sections[3].Title)
assert.Equal(t, 2, sections[3].Level)
}
func TestExtractSections_Good_AllHeadingLevels(t *testing.T) {
content := `# H1
## H2
### H3
#### H4
##### H5
###### H6
`
sections := ExtractSections(content)
assert.Len(t, sections, 6)
for i, level := range []int{1, 2, 3, 4, 5, 6} {
assert.Equal(t, level, sections[i].Level)
}
}
func TestExtractSections_Good_Empty(t *testing.T) {
content := `Just plain text.
No headings here.
`
sections := ExtractSections(content)
assert.Empty(t, sections)
}
func TestParseTopic_Good(t *testing.T) {
content := []byte(`---
title: Quick Start Guide
tags: [intro, quickstart]
order: 5
related:
- installation
---
# Quick Start Guide
Welcome to the guide.
## First Steps
Do this first.
## Next Steps
Then do this.
`)
topic, err := ParseTopic("docs/quick-start.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
// Check metadata from frontmatter
assert.Equal(t, "quick-start-guide", topic.ID)
assert.Equal(t, "Quick Start Guide", topic.Title)
assert.Equal(t, "docs/quick-start.md", topic.Path)
assert.Equal(t, []string{"intro", "quickstart"}, topic.Tags)
assert.Equal(t, []string{"installation"}, topic.Related)
assert.Equal(t, 5, topic.Order)
// Check sections
assert.Len(t, topic.Sections, 3)
assert.Equal(t, "quick-start-guide", topic.Sections[0].ID)
assert.Equal(t, "first-steps", topic.Sections[1].ID)
assert.Equal(t, "next-steps", topic.Sections[2].ID)
// Content should not include frontmatter
assert.NotContains(t, topic.Content, "---")
assert.Contains(t, topic.Content, "# Quick Start Guide")
}
func TestParseTopic_Good_NoFrontmatter(t *testing.T) {
content := []byte(`# Getting Started
This is a simple doc.
## Installation
Install it here.
`)
topic, err := ParseTopic("getting-started.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
// Title should come from first H1
assert.Equal(t, "Getting Started", topic.Title)
assert.Equal(t, "getting-started", topic.ID)
// Sections extracted
assert.Len(t, topic.Sections, 2)
}
func TestParseTopic_Good_NoHeadings(t *testing.T) {
content := []byte(`---
title: Plain Content
---
Just some text without any headings.
`)
topic, err := ParseTopic("plain.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
assert.Equal(t, "Plain Content", topic.Title)
assert.Equal(t, "plain-content", topic.ID)
assert.Empty(t, topic.Sections)
}
func TestParseTopic_Good_IDFromPath(t *testing.T) {
content := []byte(`Just content, no frontmatter or headings.`)
topic, err := ParseTopic("commands/dev-workflow.md", content)
assert.NoError(t, err)
assert.NotNil(t, topic)
// ID and title should be derived from path
assert.Equal(t, "dev-workflow", topic.ID)
assert.Equal(t, "", topic.Title) // No title available
}
func TestPathToTitle_Good(t *testing.T) {
tests := []struct {
path string
expected string
}{
{"getting-started.md", "Getting Started"},
{"commands/dev.md", "Dev"},
{"path/to/file_name.md", "File Name"},
{"UPPERCASE.md", "Uppercase"},
{"no-extension", "No Extension"},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
result := pathToTitle(tt.path)
assert.Equal(t, tt.expected, result)
})
}
}
// --- Phase 0: Expanded parser tests ---
func TestParseTopic_Good_EmptyInput(t *testing.T) {
// Empty byte slice should produce a valid topic with no content
topic, err := ParseTopic("empty.md", []byte(""))
require.NoError(t, err)
assert.NotNil(t, topic)
assert.Equal(t, "empty", topic.ID)
assert.Equal(t, "", topic.Title)
assert.Equal(t, "", topic.Content)
assert.Empty(t, topic.Sections)
assert.Empty(t, topic.Tags)
assert.Empty(t, topic.Related)
}
func TestParseTopic_Good_FrontmatterOnly(t *testing.T) {
// Frontmatter with no body or sections
content := []byte(`---
title: Metadata Only
tags: [meta]
order: 99
---
`)
topic, err := ParseTopic("meta.md", content)
require.NoError(t, err)
assert.Equal(t, "metadata-only", topic.ID)
assert.Equal(t, "Metadata Only", topic.Title)
assert.Equal(t, []string{"meta"}, topic.Tags)
assert.Equal(t, 99, topic.Order)
assert.Empty(t, topic.Sections)
// Body after frontmatter is just a newline
assert.Equal(t, "", strings.TrimSpace(topic.Content))
}
func TestExtractFrontmatter_Bad_MalformedYAML(t *testing.T) {
tests := []struct {
name string
content string
}{
{
name: "unclosed bracket",
content: `---
title: [broken
tags: [also broken
---
# Content`,
},
{
name: "tab indentation error",
content: "---\ntitle: Good\n\t- bad indent\n---\n\n# Content",
},
{
name: "duplicate keys with conflicting types",
// YAML spec allows duplicate keys but implementations may vary;
// this tests that the parser does not panic regardless.
content: `---
title: First
title:
nested: value
---
# Content`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fm, body := ExtractFrontmatter(tt.content)
// Malformed YAML should return nil frontmatter without panic
if fm == nil {
// Body should be original content when YAML fails
assert.Equal(t, tt.content, body)
}
// No panic is the key assertion — test reaching here is success
})
}
}
func TestExtractFrontmatter_Bad_NotAtStart(t *testing.T) {
// Frontmatter delimiters that do not start at the beginning of the file
content := `Some preamble text.
---
title: Should Not Parse
---
# Content`
fm, body := ExtractFrontmatter(content)
assert.Nil(t, fm)
assert.Equal(t, content, body)
}
func TestExtractSections_Good_DeeplyNested(t *testing.T) {
content := `# Level 1
Top-level content.
## Level 2
Second level.
### Level 3
Third level.
#### Level 4
Fourth level details.
##### Level 5
Fifth level fine print.
###### Level 6
Deepest heading level.
`
sections := ExtractSections(content)
require.Len(t, sections, 6)
for i, expected := range []struct {
level int
title string
}{
{1, "Level 1"},
{2, "Level 2"},
{3, "Level 3"},
{4, "Level 4"},
{5, "Level 5"},
{6, "Level 6"},
} {
assert.Equal(t, expected.level, sections[i].Level, "section %d level", i)
assert.Equal(t, expected.title, sections[i].Title, "section %d title", i)
}
// Verify content is associated with correct sections
assert.Contains(t, sections[0].Content, "Top-level content.")
assert.Contains(t, sections[3].Content, "Fourth level details.")
assert.Contains(t, sections[5].Content, "Deepest heading level.")
}
func TestExtractSections_Good_DeeplyNestedWithContent(t *testing.T) {
// H4, H5, H6 with meaningful content under each
content := `#### Configuration Options
Set these in your config file.
##### Advanced Options
Only for power users.
###### Experimental Flags
These may change without notice.
`
sections := ExtractSections(content)
require.Len(t, sections, 3)
assert.Equal(t, 4, sections[0].Level)
assert.Equal(t, "Configuration Options", sections[0].Title)
assert.Contains(t, sections[0].Content, "Set these in your config file.")
assert.Equal(t, 5, sections[1].Level)
assert.Equal(t, "Advanced Options", sections[1].Title)
assert.Contains(t, sections[1].Content, "Only for power users.")
assert.Equal(t, 6, sections[2].Level)
assert.Equal(t, "Experimental Flags", sections[2].Title)
assert.Contains(t, sections[2].Content, "These may change without notice.")
}
func TestParseTopic_Good_Unicode(t *testing.T) {
tests := []struct {
name string
content string
title string
}{
{
name: "CJK characters",
content: `---
title: 日本語ドキュメント
tags: [日本語, ドキュメント]
---
# 日本語ドキュメント
はじめにの内容です
## インストール
インストール手順はこちら
`,
title: "日本語ドキュメント",
},
{
name: "emoji in title and content",
content: `---
title: Rocket Launch 🚀
tags: [emoji, fun]
---
# Rocket Launch 🚀
This topic has emoji 🎉 in the content.
## Features
- Fast
- Reliable 🔒
`,
title: "Rocket Launch 🚀",
},
{
name: "diacritics and accented characters",
content: `---
title: Présentation Générale
tags: [français]
---
# Présentation Générale
Bienvenue à la documentation. Les données sont protégées.
## Résumé
Aperçu des fonctionnalités clés.
`,
title: "Présentation Générale",
},
{
name: "mixed scripts",
content: `---
title: Mixed Скрипты 混合
---
# Mixed Скрипты 混合
Content with Кириллица, 中文, العربية, and िन्द.
`,
title: "Mixed Скрипты 混合",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
topic, err := ParseTopic("unicode.md", []byte(tt.content))
require.NoError(t, err)
assert.Equal(t, tt.title, topic.Title)
assert.NotEmpty(t, topic.ID)
assert.True(t, len(topic.Sections) > 0, "should extract sections from unicode content")
})
}
}
func TestParseTopic_Good_VeryLongDocument(t *testing.T) {
// Build a document with 10,000+ lines
var b strings.Builder
b.WriteString("---\ntitle: Massive Document\ntags: [large, stress]\n---\n\n")
// Generate 100 sections, each with ~100 lines of content
for i := range 100 {
b.WriteString(fmt.Sprintf("## Section %d\n\n", i+1))
for j := range 100 {
b.WriteString(fmt.Sprintf("Line %d of section %d: Lorem ipsum dolor sit amet.\n", j+1, i+1))
}
b.WriteString("\n")
}
content := b.String()
lineCount := strings.Count(content, "\n")
assert.Greater(t, lineCount, 10000, "document should exceed 10K lines")
topic, err := ParseTopic("massive.md", []byte(content))
require.NoError(t, err)
assert.Equal(t, "Massive Document", topic.Title)
assert.Equal(t, "massive-document", topic.ID)
assert.Len(t, topic.Sections, 100)
// Verify first and last sections have correct titles
assert.Equal(t, "Section 1", topic.Sections[0].Title)
assert.Equal(t, "Section 100", topic.Sections[99].Title)
// Verify content is captured in sections
assert.Contains(t, topic.Sections[0].Content, "Line 1 of section 1")
assert.Contains(t, topic.Sections[99].Content, "Line 100 of section 100")
}
func TestExtractSections_Bad_EmptyString(t *testing.T) {
sections := ExtractSections("")
assert.Empty(t, sections)
}
func TestExtractSections_Bad_HeadingWithoutSpace(t *testing.T) {
// "#NoSpace" is not a valid markdown heading (needs space after #)
content := `#NoSpace
##AlsoNoSpace
Some text.
`
sections := ExtractSections(content)
assert.Empty(t, sections, "headings without space after # should not be parsed")
}
func TestExtractSections_Good_ConsecutiveHeadings(t *testing.T) {
// Headings with no content between them
content := `# Title
## Subtitle
### Sub-subtitle
`
sections := ExtractSections(content)
require.Len(t, sections, 3)
// First two sections should have empty content
assert.Equal(t, "", sections[0].Content)
assert.Equal(t, "", sections[1].Content)
assert.Equal(t, "", sections[2].Content)
}
func TestGenerateID_Ugly_EmptyString(t *testing.T) {
result := GenerateID("")
assert.Equal(t, "", result)
}
func TestGenerateID_Good_OnlySpecialChars(t *testing.T) {
result := GenerateID("!@#$%^&*()")
assert.Equal(t, "", result)
}
func TestGenerateID_Good_CJK(t *testing.T) {
result := GenerateID("日本語テスト")
assert.NotEmpty(t, result)
assert.NotContains(t, result, " ")
}
func TestGenerateID_Good_Emoji(t *testing.T) {
result := GenerateID("Hello 🌍 World")
// Emoji are not letters or digits, so they are dropped
assert.Equal(t, "hello-world", result)
}

36
pkg/help/render.go Normal file
View file

@ -0,0 +1,36 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"bytes"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/renderer/html"
)
// RenderMarkdown converts Markdown content to an HTML fragment.
// It uses goldmark with GitHub Flavoured Markdown (tables, strikethrough,
// autolinks), smart quotes/dashes (typographer), and allows raw HTML
// in the source for embedded code examples.
//
// The returned string is an HTML fragment without <html>/<body> wrappers;
// the server templates handle the page structure.
func RenderMarkdown(content string) (string, error) {
md := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
extension.Typographer,
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
)
var buf bytes.Buffer
if err := md.Convert([]byte(content), &buf); err != nil {
return "", err
}
return buf.String(), nil
}

132
pkg/help/render_test.go Normal file
View file

@ -0,0 +1,132 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRenderMarkdown_Good(t *testing.T) {
t.Run("heading hierarchy H1-H6", func(t *testing.T) {
input := "# H1\n## H2\n### H3\n#### H4\n##### H5\n###### H6\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, "<h1>H1</h1>")
assert.Contains(t, html, "<h2>H2</h2>")
assert.Contains(t, html, "<h3>H3</h3>")
assert.Contains(t, html, "<h4>H4</h4>")
assert.Contains(t, html, "<h5>H5</h5>")
assert.Contains(t, html, "<h6>H6</h6>")
})
t.Run("fenced code blocks with language", func(t *testing.T) {
input := "```go\nfmt.Println(\"hello\")\n```\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, "<pre><code class=\"language-go\">")
assert.Contains(t, html, "fmt.Println")
assert.Contains(t, html, "</code></pre>")
})
t.Run("inline code backticks", func(t *testing.T) {
input := "Use `go test` to run tests.\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, "<code>go test</code>")
})
t.Run("unordered lists", func(t *testing.T) {
input := "- Alpha\n- Beta\n- Gamma\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, "<ul>")
assert.Contains(t, html, "<li>Alpha</li>")
assert.Contains(t, html, "<li>Beta</li>")
assert.Contains(t, html, "<li>Gamma</li>")
assert.Contains(t, html, "</ul>")
})
t.Run("ordered lists", func(t *testing.T) {
input := "1. First\n2. Second\n3. Third\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, "<ol>")
assert.Contains(t, html, "<li>First</li>")
assert.Contains(t, html, "<li>Second</li>")
assert.Contains(t, html, "<li>Third</li>")
assert.Contains(t, html, "</ol>")
})
t.Run("links and images", func(t *testing.T) {
input := "[Example](https://example.com)\n\n![Alt text](image.png)\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, `<a href="https://example.com">Example</a>`)
assert.Contains(t, html, `<img src="image.png" alt="Alt text"`)
})
t.Run("GFM tables", func(t *testing.T) {
input := "| Name | Value |\n|------|-------|\n| foo | 42 |\n| bar | 99 |\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, "<table>")
assert.Contains(t, html, "<th>Name</th>")
assert.Contains(t, html, "<th>Value</th>")
assert.Contains(t, html, "<td>foo</td>")
assert.Contains(t, html, "<td>42</td>")
assert.Contains(t, html, "</table>")
})
t.Run("empty input returns empty string", func(t *testing.T) {
html, err := RenderMarkdown("")
require.NoError(t, err)
assert.Equal(t, "", html)
})
t.Run("special characters escaped in text", func(t *testing.T) {
input := "Use `<div>` tags & \"quotes\".\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
// The & in prose should be escaped
assert.Contains(t, html, "&amp;")
// Angle brackets in code should be escaped
assert.Contains(t, html, "&lt;div&gt;")
})
}
func TestRenderMarkdown_Good_RawHTML(t *testing.T) {
// html.WithUnsafe() should allow raw HTML pass-through
input := "<div class=\"custom\">raw html</div>\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, `<div class="custom">raw html</div>`)
}
func TestRenderMarkdown_Good_GFMExtras(t *testing.T) {
t.Run("strikethrough", func(t *testing.T) {
input := "~~deleted~~\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, "<del>deleted</del>")
})
t.Run("autolinks", func(t *testing.T) {
input := "Visit https://example.com for details.\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
assert.Contains(t, html, `<a href="https://example.com">`)
})
}
func TestRenderMarkdown_Good_Typographer(t *testing.T) {
// Typographer extension converts straight quotes to smart quotes
// and -- to en-dash, --- to em-dash.
input := "She said -- \"hello\" --- and left.\n"
html, err := RenderMarkdown(input)
require.NoError(t, err)
// Check that dashes are converted (en-dash or em-dash entities)
assert.NotContains(t, html, " -- ")
assert.NotContains(t, html, " --- ")
}

562
pkg/help/search.go Normal file
View file

@ -0,0 +1,562 @@
package help
import (
"cmp"
"iter"
"regexp"
"slices"
"strings"
"unicode"
)
// Scoring weights for search result ranking.
const (
scoreExactWord = 1.0 // Exact word match in the index
scorePrefixWord = 0.5 // Prefix/partial word match
scoreFuzzyWord = 0.3 // Fuzzy (Levenshtein) match
scoreStemWord = 0.7 // Stemmed word match (between exact and prefix)
scoreTitleBoost = 10.0 // Query word appears in topic title
scoreSectionBoost = 5.0 // Query word appears in section title
scoreTagBoost = 3.0 // Query word appears in topic tags
scorePhraseBoost = 8.0 // Exact phrase match in content
scoreAllWords = 2.0 // All query words present (multi-word bonus)
fuzzyMaxDistance = 2 // Maximum edit distance for fuzzy matching
)
// SearchResult represents a search match.
type SearchResult struct {
Topic *Topic
Section *Section // nil if topic-level match
Score float64
Snippet string // Context around match
}
// searchIndex provides full-text search.
type searchIndex struct {
topics map[string]*Topic // topicID -> Topic
index map[string][]string // word -> []topicID
}
// newSearchIndex creates a new empty search index.
func newSearchIndex() *searchIndex {
return &searchIndex{
topics: make(map[string]*Topic),
index: make(map[string][]string),
}
}
// Add indexes a topic for searching.
func (i *searchIndex) Add(topic *Topic) {
i.topics[topic.ID] = topic
// Index title words with boost
for _, word := range tokenize(topic.Title) {
i.addToIndex(word, topic.ID)
}
// Index content words
for _, word := range tokenize(topic.Content) {
i.addToIndex(word, topic.ID)
}
// Index section titles and content
for _, section := range topic.Sections {
for _, word := range tokenize(section.Title) {
i.addToIndex(word, topic.ID)
}
for _, word := range tokenize(section.Content) {
i.addToIndex(word, topic.ID)
}
}
// Index tags
for _, tag := range topic.Tags {
for _, word := range tokenize(tag) {
i.addToIndex(word, topic.ID)
}
}
}
// addToIndex adds a word-to-topic mapping.
func (i *searchIndex) addToIndex(word, topicID string) {
// Avoid duplicates
if slices.Contains(i.index[word], topicID) {
return
}
i.index[word] = append(i.index[word], topicID)
}
// Search finds topics matching the query. Supports:
// - Single and multi-word keyword queries
// - Quoted phrase search (e.g. `"rate limit"`)
// - Fuzzy matching via Levenshtein distance for typo tolerance
// - Prefix matching for partial words
func (i *searchIndex) Search(query string) []*SearchResult {
// Extract quoted phrases before tokenising.
phrases, stripped := extractPhrases(query)
queryWords := tokenize(stripped)
if len(queryWords) == 0 && len(phrases) == 0 {
return nil
}
// Track scores per topic
scores := make(map[string]float64)
// Build set of stemmed query variants for stem-aware scoring.
stemmedWords := make(map[string]bool)
for _, word := range queryWords {
if s := stem(word); s != word {
stemmedWords[s] = true
}
}
for _, word := range queryWords {
isStem := stemmedWords[word]
// Exact matches — score stems lower than raw words.
if topicIDs, ok := i.index[word]; ok {
sc := scoreExactWord
if isStem {
sc = scoreStemWord
}
for _, topicID := range topicIDs {
scores[topicID] += sc
}
}
// Prefix matches (partial word matching)
for indexWord, topicIDs := range i.index {
if strings.HasPrefix(indexWord, word) && indexWord != word {
for _, topicID := range topicIDs {
scores[topicID] += scorePrefixWord
}
}
}
// Fuzzy matches (Levenshtein distance)
if len(word) >= 3 {
for indexWord, topicIDs := range i.index {
if indexWord == word {
continue // Already scored as exact match
}
if strings.HasPrefix(indexWord, word) {
continue // Already scored as prefix match
}
dist := levenshtein(word, indexWord)
if dist > 0 && dist <= fuzzyMaxDistance {
for _, topicID := range topicIDs {
scores[topicID] += scoreFuzzyWord
}
}
}
}
}
// Pre-compile regexes for snippets
var res []*regexp.Regexp
for _, word := range queryWords {
if len(word) >= 2 {
if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(word)); err == nil {
res = append(res, re)
}
}
}
// Also add phrase regexes for highlighting
for _, phrase := range phrases {
if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(phrase)); err == nil {
res = append(res, re)
}
}
// Phrase matching: boost topics that contain the exact phrase.
for _, phrase := range phrases {
phraseLower := strings.ToLower(phrase)
for topicID, topic := range i.topics {
var text strings.Builder
text.WriteString(strings.ToLower(topic.Title + " " + topic.Content))
for _, section := range topic.Sections {
text.WriteString(" " + strings.ToLower(section.Title+" "+section.Content))
}
if strings.Contains(text.String(), phraseLower) {
scores[topicID] += scorePhraseBoost
}
}
}
// Build results with title/section/tag boosts and snippet extraction
var results []*SearchResult
for topicID, score := range scores {
topic := i.topics[topicID]
if topic == nil {
continue
}
// Title boost: if query words appear in title
titleLower := strings.ToLower(topic.Title)
titleMatchCount := 0
for _, word := range queryWords {
if strings.Contains(titleLower, word) {
titleMatchCount++
}
}
if titleMatchCount > 0 {
score += scoreTitleBoost
}
// Tag boost: if query words match tags
for _, tag := range topic.Tags {
tagLower := strings.ToLower(tag)
for _, word := range queryWords {
if tagLower == word || strings.Contains(tagLower, word) {
score += scoreTagBoost
break
}
}
}
// Multi-word bonus: if all query words are present in the topic
if len(queryWords) > 1 {
allPresent := true
fullText := strings.ToLower(topic.Title + " " + topic.Content)
for _, word := range queryWords {
if !strings.Contains(fullText, word) {
allPresent = false
break
}
}
if allPresent {
score += scoreAllWords
}
}
// Find matching section and extract snippet
section, snippet := i.findBestMatch(topic, queryWords, res)
// Section title boost
if section != nil {
sectionTitleLower := strings.ToLower(section.Title)
hasSectionTitleMatch := false
for _, word := range queryWords {
if strings.Contains(sectionTitleLower, word) {
hasSectionTitleMatch = true
break
}
}
if hasSectionTitleMatch {
score += scoreSectionBoost
}
}
results = append(results, &SearchResult{
Topic: topic,
Section: section,
Score: score,
Snippet: snippet,
})
}
// Sort by score (highest first)
slices.SortFunc(results, func(a, b *SearchResult) int {
if a.Score != b.Score {
return cmp.Compare(b.Score, a.Score) // Reversed for highest first
}
return cmp.Compare(a.Topic.Title, b.Topic.Title)
})
return results
}
// extractPhrases pulls quoted substrings from the query and returns them
// alongside the remaining query text with quotes removed.
// For example: `hello "rate limit" world` returns
// phrases=["rate limit"], remaining="hello world".
func extractPhrases(query string) (phrases []string, remaining string) {
re := regexp.MustCompile(`"([^"]+)"`)
matches := re.FindAllStringSubmatch(query, -1)
for _, m := range matches {
phrase := strings.TrimSpace(m[1])
if phrase != "" {
phrases = append(phrases, phrase)
}
}
remaining = re.ReplaceAllString(query, "")
return phrases, remaining
}
// levenshtein computes the edit distance between two strings.
// Used for fuzzy matching to tolerate typos in search queries.
func levenshtein(a, b string) int {
aRunes := []rune(a)
bRunes := []rune(b)
aLen := len(aRunes)
bLen := len(bRunes)
if aLen == 0 {
return bLen
}
if bLen == 0 {
return aLen
}
// Use two rows instead of full matrix to save memory.
prev := make([]int, bLen+1)
curr := make([]int, bLen+1)
for j := range bLen + 1 {
prev[j] = j
}
for i := 1; i <= aLen; i++ {
curr[0] = i
for j := 1; j <= bLen; j++ {
cost := 1
if aRunes[i-1] == bRunes[j-1] {
cost = 0
}
curr[j] = min(
prev[j]+1, // deletion
curr[j-1]+1, // insertion
prev[j-1]+cost, // substitution
)
}
prev, curr = curr, prev
}
return prev[bLen]
}
// findBestMatch finds the section with the best match and extracts a snippet.
func (i *searchIndex) findBestMatch(topic *Topic, queryWords []string, res []*regexp.Regexp) (*Section, string) {
var bestSection *Section
var bestSnippet string
bestScore := 0
// Check topic title
titleScore := countMatches(topic.Title, queryWords)
if titleScore > 0 {
bestSnippet = extractSnippet(topic.Content, res)
}
// Check sections
for idx := range topic.Sections {
section := &topic.Sections[idx]
sectionScore := countMatches(section.Title, queryWords)
contentScore := countMatches(section.Content, queryWords)
totalScore := sectionScore*2 + contentScore // Title matches worth more
if totalScore > bestScore {
bestScore = totalScore
bestSection = section
if contentScore > 0 {
bestSnippet = extractSnippet(section.Content, res)
} else {
bestSnippet = extractSnippet(section.Content, nil)
}
}
}
// If no section matched, use topic content
if bestSnippet == "" && topic.Content != "" {
bestSnippet = extractSnippet(topic.Content, res)
}
return bestSection, bestSnippet
}
// tokenize splits text into lowercase words for indexing/searching.
// For each word, it also emits the stemmed variant (if different from the
// original) so the index contains both raw and stemmed forms.
func tokenize(text string) []string {
return slices.Collect(Tokens(text))
}
// Tokens returns an iterator for lowercase words in text.
func Tokens(text string) iter.Seq[string] {
return func(yield func(string) bool) {
text = strings.ToLower(text)
var word strings.Builder
emit := func(w string) bool {
if len(w) < 2 {
return true
}
if !yield(w) {
return false
}
if s := stem(w); s != w {
if !yield(s) {
return false
}
}
return true
}
for _, r := range text {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
word.WriteRune(r)
} else if word.Len() > 0 {
if !emit(word.String()) {
return
}
word.Reset()
}
}
// Don't forget the last word
if word.Len() > 0 {
emit(word.String())
}
}
}
// countMatches counts how many query words appear in the text.
func countMatches(text string, queryWords []string) int {
textLower := strings.ToLower(text)
count := 0
for _, word := range queryWords {
if strings.Contains(textLower, word) {
count++
}
}
return count
}
// extractSnippet extracts a short snippet around the first match and highlights matches.
func extractSnippet(content string, res []*regexp.Regexp) string {
if content == "" {
return ""
}
const snippetLen = 150
// If no regexes, return start of content without highlighting
if len(res) == 0 {
lines := strings.SplitSeq(content, "\n")
for line := range lines {
line = strings.TrimSpace(line)
if line != "" && !strings.HasPrefix(line, "#") {
runes := []rune(line)
if len(runes) > snippetLen {
return string(runes[:snippetLen]) + "..."
}
return line
}
}
return ""
}
// Find first match position (byte-based)
matchPos := -1
for _, re := range res {
loc := re.FindStringIndex(content)
if loc != nil && (matchPos == -1 || loc[0] < matchPos) {
matchPos = loc[0]
}
}
// Convert to runes for safe slicing
runes := []rune(content)
runeLen := len(runes)
var start, end int
if matchPos == -1 {
// No match found, use start of content
start = 0
end = min(snippetLen, runeLen)
} else {
// Convert byte position to rune position
matchRunePos := len([]rune(content[:matchPos]))
// Extract snippet around match (rune-based)
start = max(matchRunePos-50, 0)
end = min(start+snippetLen, runeLen)
}
snippet := string(runes[start:end])
// Trim to word boundaries
prefix := ""
suffix := ""
if start > 0 {
if idx := strings.Index(snippet, " "); idx != -1 {
snippet = snippet[idx+1:]
prefix = "..."
}
}
if end < runeLen {
if idx := strings.LastIndex(snippet, " "); idx != -1 {
snippet = snippet[:idx]
suffix = "..."
}
}
snippet = strings.TrimSpace(snippet)
if snippet == "" {
return ""
}
// Apply highlighting
highlighted := highlight(snippet, res)
return prefix + highlighted + suffix
}
// highlight wraps matches in **bold**.
func highlight(text string, res []*regexp.Regexp) string {
if len(res) == 0 {
return text
}
type match struct {
start, end int
}
var matches []match
for _, re := range res {
indices := re.FindAllStringIndex(text, -1)
for _, idx := range indices {
matches = append(matches, match{idx[0], idx[1]})
}
}
if len(matches) == 0 {
return text
}
// Sort matches by start position
slices.SortFunc(matches, func(a, b match) int {
if a.start != b.start {
return cmp.Compare(a.start, b.start)
}
return cmp.Compare(b.end, a.end)
})
// Merge overlapping or adjacent matches
var merged []match
if len(matches) > 0 {
curr := matches[0]
for i := 1; i < len(matches); i++ {
if matches[i].start <= curr.end {
if matches[i].end > curr.end {
curr.end = matches[i].end
}
} else {
merged = append(merged, curr)
curr = matches[i]
}
}
merged = append(merged, curr)
}
// Build highlighted string from back to front to avoid position shifts
result := text
for i := len(merged) - 1; i >= 0; i-- {
m := merged[i]
result = result[:m.end] + "**" + result[m.end:]
result = result[:m.start] + "**" + result[m.start:]
}
return result
}

View file

@ -0,0 +1,176 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"fmt"
"strings"
"testing"
)
// titleCase capitalises the first letter of a string.
// Used in benchmarks to avoid deprecated strings.Title.
func titleCase(s string) string {
if len(s) == 0 {
return s
}
return strings.ToUpper(s[:1]) + s[1:]
}
// buildLargeCatalog creates a search index with n topics for benchmarking.
// Each topic has a title, content with multiple paragraphs, sections, and tags.
func buildLargeCatalog(n int) *searchIndex {
idx := newSearchIndex()
// Word pools for generating varied content
subjects := []string{
"configuration", "deployment", "monitoring", "testing", "debugging",
"authentication", "authorisation", "networking", "storage", "logging",
"caching", "scheduling", "routing", "migration", "backup",
"encryption", "compression", "validation", "serialisation", "templating",
}
verbs := []string{
"install", "configure", "deploy", "monitor", "debug",
"authenticate", "authorise", "connect", "store", "analyse",
"cache", "schedule", "route", "migrate", "restore",
}
adjectives := []string{
"advanced", "basic", "custom", "distributed", "encrypted",
"federated", "graceful", "hybrid", "incremental", "just-in-time",
}
for i := range n {
subj := subjects[i%len(subjects)]
verb := verbs[i%len(verbs)]
adj := adjectives[i%len(adjectives)]
title := fmt.Sprintf("%s %s Guide %d", titleCase(adj), titleCase(subj), i)
content := fmt.Sprintf(
"This guide covers how to %s %s %s systems. "+
"It includes step-by-step instructions for setting up %s "+
"in both development and production environments. "+
"The %s process requires careful planning and %s tools. "+
"Make sure to review the prerequisites before starting.",
verb, adj, subj, subj, subj, adj,
)
sections := []Section{
{
ID: fmt.Sprintf("overview-%d", i),
Title: "Overview",
Content: fmt.Sprintf("An overview of %s %s patterns and best practices.", adj, subj),
},
{
ID: fmt.Sprintf("setup-%d", i),
Title: fmt.Sprintf("%s Setup", titleCase(subj)),
Content: fmt.Sprintf("Detailed setup instructions for %s. Run the %s command to begin.", subj, verb),
},
{
ID: fmt.Sprintf("troubleshooting-%d", i),
Title: "Troubleshooting",
Content: fmt.Sprintf("Common issues when working with %s and how to resolve them.", subj),
},
}
idx.Add(&Topic{
ID: fmt.Sprintf("%s-%s-%d", adj, subj, i),
Title: title,
Content: content,
Sections: sections,
Tags: []string{subj, adj, verb, "guide"},
})
}
return idx
}
func BenchmarkSearch_SingleWord(b *testing.B) {
idx := buildLargeCatalog(200)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
idx.Search("configuration")
}
}
func BenchmarkSearch_MultiWord(b *testing.B) {
idx := buildLargeCatalog(200)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
idx.Search("advanced deployment guide")
}
}
func BenchmarkSearch_NoResults(b *testing.B) {
idx := buildLargeCatalog(200)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
idx.Search("xylophone")
}
}
func BenchmarkSearch_PartialMatch(b *testing.B) {
idx := buildLargeCatalog(200)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
idx.Search("config")
}
}
func BenchmarkSearch_LargeCatalog500(b *testing.B) {
idx := buildLargeCatalog(500)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
idx.Search("deployment monitoring")
}
}
func BenchmarkSearch_LargeCatalog1000(b *testing.B) {
idx := buildLargeCatalog(1000)
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
idx.Search("testing guide")
}
}
func BenchmarkSearchIndex_Add(b *testing.B) {
// Benchmark the indexing/add path
topic := &Topic{
ID: "bench-topic",
Title: "Benchmark Topic Title",
Content: "This is benchmark content with several words for indexing purposes.",
Tags: []string{"bench", "performance"},
Sections: []Section{
{ID: "s1", Title: "First Section", Content: "Section content for benchmarking."},
{ID: "s2", Title: "Second Section", Content: "More section content here."},
},
}
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
idx := newSearchIndex()
idx.Add(topic)
}
}
func BenchmarkTokenize(b *testing.B) {
text := "The quick brown fox jumps over the lazy dog. Configuration and deployment are covered in detail."
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
tokenize(text)
}
}

1169
pkg/help/search_test.go Normal file

File diff suppressed because it is too large Load diff

162
pkg/help/server.go Normal file
View file

@ -0,0 +1,162 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"encoding/json"
"net/http"
)
// Server serves the help catalog over HTTP.
type Server struct {
catalog *Catalog
addr string
mux *http.ServeMux
}
// NewServer creates an HTTP server for the given catalog.
// Routes are registered on creation; the caller can use ServeHTTP as
// an http.Handler or call ListenAndServe to start listening.
func NewServer(catalog *Catalog, addr string) *Server {
s := &Server{
catalog: catalog,
addr: addr,
mux: http.NewServeMux(),
}
// HTML routes
s.mux.HandleFunc("GET /", s.handleIndex)
s.mux.HandleFunc("GET /topics/{id}", s.handleTopic)
s.mux.HandleFunc("GET /search", s.handleSearch)
// JSON API routes
s.mux.HandleFunc("GET /api/topics", s.handleAPITopics)
s.mux.HandleFunc("GET /api/topics/{id}", s.handleAPITopic)
s.mux.HandleFunc("GET /api/search", s.handleAPISearch)
return s
}
// ServeHTTP implements http.Handler, delegating to the internal mux.
func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.mux.ServeHTTP(w, r)
}
// ListenAndServe starts the HTTP server.
func (s *Server) ListenAndServe() error {
srv := &http.Server{
Addr: s.addr,
Handler: s.mux,
}
return srv.ListenAndServe()
}
// setSecurityHeaders sets common security headers.
func setSecurityHeaders(w http.ResponseWriter) {
w.Header().Set("X-Content-Type-Options", "nosniff")
}
// --- HTML handlers ---
func (s *Server) handleIndex(w http.ResponseWriter, _ *http.Request) {
setSecurityHeaders(w)
w.Header().Set("Content-Type", "text/html; charset=utf-8")
topics := s.catalog.List()
data := indexData{
Topics: topics,
Groups: groupTopicsByTag(topics),
}
if err := renderPage(w, "index.html", data); err != nil {
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
}
func (s *Server) handleTopic(w http.ResponseWriter, r *http.Request) {
setSecurityHeaders(w)
id := r.PathValue("id")
topic, err := s.catalog.Get(id)
if err != nil {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.WriteHeader(http.StatusNotFound)
_ = renderPage(w, "404.html", nil)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
if err := renderPage(w, "topic.html", topicData{Topic: topic}); err != nil {
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
}
func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
setSecurityHeaders(w)
query := r.URL.Query().Get("q")
if query == "" {
http.Error(w, "Missing search query parameter 'q'", http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "text/html; charset=utf-8")
results := s.catalog.Search(query)
data := searchData{
Query: query,
Results: results,
}
if err := renderPage(w, "search.html", data); err != nil {
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
}
// --- JSON API handlers ---
func (s *Server) handleAPITopics(w http.ResponseWriter, _ *http.Request) {
setSecurityHeaders(w)
w.Header().Set("Content-Type", "application/json")
topics := s.catalog.List()
if err := json.NewEncoder(w).Encode(topics); err != nil {
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
}
func (s *Server) handleAPITopic(w http.ResponseWriter, r *http.Request) {
setSecurityHeaders(w)
id := r.PathValue("id")
topic, err := s.catalog.Get(id)
if err != nil {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusNotFound)
_ = json.NewEncoder(w).Encode(map[string]string{"error": "topic not found"})
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(topic); err != nil {
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
}
func (s *Server) handleAPISearch(w http.ResponseWriter, r *http.Request) {
setSecurityHeaders(w)
query := r.URL.Query().Get("q")
if query == "" {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusBadRequest)
_ = json.NewEncoder(w).Encode(map[string]string{"error": "missing query parameter 'q'"})
return
}
w.Header().Set("Content-Type", "application/json")
results := s.catalog.Search(query)
if err := json.NewEncoder(w).Encode(results); err != nil {
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
}

231
pkg/help/server_test.go Normal file
View file

@ -0,0 +1,231 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// testServer creates a test catalog with topics and returns an httptest.Server.
func testServer(t *testing.T) *httptest.Server {
t.Helper()
c := &Catalog{
topics: make(map[string]*Topic),
index: newSearchIndex(),
}
c.Add(&Topic{
ID: "getting-started",
Title: "Getting Started",
Content: "# Getting Started\n\nWelcome to the **guide**.\n\n## Installation\n\nInstall the tool.\n",
Tags: []string{"intro", "setup"},
Sections: []Section{
{ID: "getting-started", Title: "Getting Started", Level: 1},
{ID: "installation", Title: "Installation", Level: 2, Content: "Install the tool."},
},
Related: []string{"config"},
})
c.Add(&Topic{
ID: "config",
Title: "Configuration",
Content: "# Configuration\n\nConfigure your environment.\n",
Tags: []string{"setup"},
})
srv := NewServer(c, ":0")
return httptest.NewServer(srv)
}
func TestServer_HandleIndex_Good(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "text/html")
assert.Equal(t, "nosniff", resp.Header.Get("X-Content-Type-Options"))
var buf [64 * 1024]byte
n, _ := resp.Body.Read(buf[:])
body := string(buf[:n])
assert.Contains(t, body, "Getting Started")
assert.Contains(t, body, "Configuration")
}
func TestServer_HandleTopic_Good(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/topics/getting-started")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "text/html")
var buf [64 * 1024]byte
n, _ := resp.Body.Read(buf[:])
body := string(buf[:n])
assert.Contains(t, body, "Getting Started")
assert.Contains(t, body, "<strong>guide</strong>")
}
func TestServer_HandleTopic_Bad_NotFound(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/topics/nonexistent")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusNotFound, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "text/html")
}
func TestServer_HandleSearch_Good(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/search?q=install")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "text/html")
var buf [64 * 1024]byte
n, _ := resp.Body.Read(buf[:])
body := string(buf[:n])
assert.Contains(t, body, "install")
}
func TestServer_HandleSearch_Bad_NoQuery(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/search")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusBadRequest, resp.StatusCode)
}
func TestServer_HandleAPITopics_Good(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/api/topics")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "application/json")
assert.Equal(t, "nosniff", resp.Header.Get("X-Content-Type-Options"))
var topics []Topic
require.NoError(t, json.NewDecoder(resp.Body).Decode(&topics))
assert.Len(t, topics, 2)
}
func TestServer_HandleAPITopic_Good(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/api/topics/getting-started")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "application/json")
var topic Topic
require.NoError(t, json.NewDecoder(resp.Body).Decode(&topic))
assert.Equal(t, "Getting Started", topic.Title)
assert.Equal(t, "getting-started", topic.ID)
}
func TestServer_HandleAPITopic_Bad_NotFound(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/api/topics/nonexistent")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusNotFound, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "application/json")
}
func TestServer_HandleAPISearch_Good(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/api/search?q=config")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "application/json")
var results []SearchResult
require.NoError(t, json.NewDecoder(resp.Body).Decode(&results))
assert.NotEmpty(t, results)
}
func TestServer_HandleAPISearch_Bad_NoQuery(t *testing.T) {
ts := testServer(t)
defer ts.Close()
resp, err := http.Get(ts.URL + "/api/search")
require.NoError(t, err)
defer resp.Body.Close()
assert.Equal(t, http.StatusBadRequest, resp.StatusCode)
assert.Contains(t, resp.Header.Get("Content-Type"), "application/json")
}
func TestServer_ContentTypeHeaders_Good(t *testing.T) {
ts := testServer(t)
defer ts.Close()
tests := []struct {
name string
path string
contentType string
}{
{"index HTML", "/", "text/html"},
{"topic HTML", "/topics/getting-started", "text/html"},
{"search HTML", "/search?q=test", "text/html"},
{"API topics JSON", "/api/topics", "application/json"},
{"API topic JSON", "/api/topics/getting-started", "application/json"},
{"API search JSON", "/api/search?q=test", "application/json"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resp, err := http.Get(ts.URL + tt.path)
require.NoError(t, err)
defer resp.Body.Close()
assert.Contains(t, resp.Header.Get("Content-Type"), tt.contentType,
"Content-Type for %s should contain %s", tt.path, tt.contentType)
})
}
}
func TestNewServer_Good(t *testing.T) {
c := DefaultCatalog()
srv := NewServer(c, ":8080")
assert.NotNil(t, srv)
assert.Equal(t, ":8080", srv.addr)
assert.NotNil(t, srv.mux)
assert.Equal(t, c, srv.catalog)
}

93
pkg/help/stemmer.go Normal file
View file

@ -0,0 +1,93 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import "strings"
// stem performs lightweight Porter-style suffix stripping on an English word.
// Words shorter than 4 characters are returned unchanged. The result is
// guaranteed to be at least 2 characters long.
//
// This is intentionally NOT the full Porter algorithm — it covers only the
// most impactful suffix rules for a help-catalog search context.
func stem(word string) string {
if len(word) < 4 {
return word
}
s := word
// Step 1: plurals and verb inflections.
s = stemInflectional(s)
// Step 2: derivational suffixes (longest match first).
s = stemDerivational(s)
// Guard: result must be at least 2 characters.
if len(s) < 2 {
return word
}
return s
}
// stemInflectional handles plurals and -ed/-ing verb forms.
func stemInflectional(s string) string {
switch {
case strings.HasSuffix(s, "sses"):
return s[:len(s)-2] // -sses → -ss
case strings.HasSuffix(s, "ies"):
return s[:len(s)-2] // -ies → -i
case strings.HasSuffix(s, "eed"):
return s[:len(s)-1] // -eed → -ee
case strings.HasSuffix(s, "ing"):
r := s[:len(s)-3]
if len(r) >= 2 {
return r
}
case strings.HasSuffix(s, "ed"):
r := s[:len(s)-2]
if len(r) >= 2 {
return r
}
case strings.HasSuffix(s, "s") && !strings.HasSuffix(s, "ss"):
return s[:len(s)-1] // -s → "" (but not -ss)
}
return s
}
// stemDerivational strips common derivational suffixes.
// Ordered longest-first so we match the most specific rule.
func stemDerivational(s string) string {
// Longest suffixes first (8+ chars).
type rule struct {
suffix string
replacement string
}
rules := []rule{
{"fulness", "ful"}, // -fulness → -ful
{"ational", "ate"}, // -ational → -ate
{"tional", "tion"}, // -tional → -tion
{"ously", "ous"}, // -ously → -ous
{"ively", "ive"}, // -ively → -ive
{"ingly", "ing"}, // -ingly → -ing
{"ation", "ate"}, // -ation → -ate
{"ness", ""}, // -ness → ""
{"ment", ""}, // -ment → ""
{"ably", "able"}, // -ably → -able
{"ally", "al"}, // -ally → -al
{"izer", "ize"}, // -izer → -ize
}
for _, r := range rules {
if strings.HasSuffix(s, r.suffix) {
result := s[:len(s)-len(r.suffix)] + r.replacement
if len(result) >= 2 {
return result
}
return s // Guard: don't over-strip
}
}
return s
}

222
pkg/help/stemmer_test.go Normal file
View file

@ -0,0 +1,222 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// ---------------------------------------------------------------------------
// stem() unit tests
// ---------------------------------------------------------------------------
func TestStem_Good(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
// Step 1: plurals and verb inflections
{name: "sses to ss", input: "addresses", expected: "address"},
{name: "ies to i", input: "eries", expected: "eri"},
{name: "ies to i (ponies)", input: "ponies", expected: "poni"},
{name: "eed to ee", input: "agreed", expected: "agree"},
{name: "ed removed", input: "configured", expected: "configur"},
{name: "ing removed", input: "running", expected: "runn"},
{name: "ing removed (testing)", input: "testing", expected: "test"},
{name: "s removed (servers)", input: "servers", expected: "server"},
{name: "s removed then derivational (configurations)", input: "configurations", expected: "configurate"},
{name: "ss unchanged", input: "boss", expected: "boss"},
// Step 2: derivational suffixes
{name: "ational to ate", input: "configurational", expected: "configurate"},
{name: "tional to tion", input: "nutritional", expected: "nutrition"},
{name: "fulness to ful", input: "cheerfulness", expected: "cheerful"},
{name: "ness removed", input: "darkness", expected: "dark"},
{name: "ment removed", input: "deployment", expected: "deploy"},
{name: "ation to ate", input: "configuration", expected: "configurate"},
{name: "ously to ous", input: "dangerously", expected: "dangerous"},
{name: "ively to ive", input: "effectively", expected: "effective"},
{name: "ably to able", input: "comfortably", expected: "comfortable"},
{name: "ally to al", input: "manually", expected: "manual"},
{name: "izer to ize", input: "organizer", expected: "organize"},
{name: "ingly to ing", input: "surprisingly", expected: "surprising"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := stem(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestStem_ShortWordsUnchanged(t *testing.T) {
tests := []struct {
name string
input string
}{
{name: "single char", input: "a"},
{name: "two chars", input: "go"},
{name: "three chars", input: "run"},
{name: "three chars (the)", input: "the"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.input, stem(tt.input), "words under 4 chars should be unchanged")
})
}
}
func TestStem_GuardMinLength(t *testing.T) {
// The stem function must never reduce a word below 2 characters.
// "ed" removal from a 4-char word like "abed" would leave "ab" (ok).
// We test that it doesn't return a single-char result.
result := stem("abed")
assert.GreaterOrEqual(t, len(result), 2, "result must be at least 2 chars")
}
// ---------------------------------------------------------------------------
// Search integration tests — stemming recall
// ---------------------------------------------------------------------------
func TestSearch_StemRunningMatchesRun(t *testing.T) {
idx := newSearchIndex()
idx.Add(&Topic{
ID: "topic-run",
Title: "How to Run Commands",
Content: "You can run any command from the terminal.",
})
results := idx.Search("running")
require.NotEmpty(t, results, "searching 'running' should match topic containing 'run'")
assert.Equal(t, "topic-run", results[0].Topic.ID)
}
func TestSearch_StemConfigurationsMatchesConfigure(t *testing.T) {
idx := newSearchIndex()
idx.Add(&Topic{
ID: "topic-configure",
Title: "Configure Your Application",
Content: "Learn how to configure settings for your application.",
})
results := idx.Search("configurations")
require.NotEmpty(t, results, "searching 'configurations' should match topic containing 'configure'")
assert.Equal(t, "topic-configure", results[0].Topic.ID)
}
func TestSearch_StemPluralServersMatchesServer(t *testing.T) {
idx := newSearchIndex()
idx.Add(&Topic{
ID: "topic-server",
Title: "Server Management",
Content: "Manage your server with these tools.",
})
results := idx.Search("servers")
require.NotEmpty(t, results, "searching 'servers' should match topic containing 'server'")
assert.Equal(t, "topic-server", results[0].Topic.ID)
}
func TestSearch_StemScoringLowerThanExact(t *testing.T) {
idx := newSearchIndex()
idx.Add(&Topic{
ID: "exact-match",
Title: "Running Guide",
Content: "Guide to running applications.",
})
idx.Add(&Topic{
ID: "stem-match",
Title: "How to Run",
Content: "Run your application.",
})
results := idx.Search("running")
require.Len(t, results, 2, "should match both topics")
// The topic containing the exact word "running" should score higher
// than the one matched only via the stem "run" (all else being equal,
// scoreExactWord > scoreStemWord).
var exactScore, stemScore float64
for _, r := range results {
if r.Topic.ID == "exact-match" {
exactScore = r.Score
}
if r.Topic.ID == "stem-match" {
stemScore = r.Score
}
}
assert.Greater(t, exactScore, stemScore,
"exact word match should score higher than stem-only match")
}
func TestSearch_ExistingExactMatchUnaffected(t *testing.T) {
// Ensure stemming doesn't break exact-match searches.
idx := newSearchIndex()
idx.Add(&Topic{
ID: "topic-deploy",
Title: "Deploy Guide",
Content: "How to deploy your application step by step.",
})
results := idx.Search("deploy")
require.NotEmpty(t, results)
assert.Equal(t, "topic-deploy", results[0].Topic.ID)
}
func TestTokenize_IncludesStemmedVariants(t *testing.T) {
words := tokenize("running configurations servers")
// Should contain originals
assert.Contains(t, words, "running")
assert.Contains(t, words, "configurations")
assert.Contains(t, words, "servers")
// Should also contain stems
assert.Contains(t, words, "runn") // stem of running (ing removed)
assert.Contains(t, words, "configurate") // stem of configurations (s->configuration->ation->ate)
assert.Contains(t, words, "server") // stem of servers (s removed)
}
// ---------------------------------------------------------------------------
// Benchmark
// ---------------------------------------------------------------------------
func BenchmarkStem(b *testing.B) {
words := []string{
"running", "configurations", "servers", "deployment", "testing",
"addresses", "agreed", "configured", "operational", "cheerfulness",
"darkness", "dangerously", "effectively", "comfortably", "manually",
"organizer", "surprisingly", "configuration", "authentication",
"authorisation", "networking", "monitoring", "scheduling", "routing",
"migration", "encryption", "compression", "validation", "serialisation",
"templating", "distributed", "federated", "graceful", "hybrid",
"incremental", "advanced", "basic", "custom", "encrypted", "install",
"configure", "deploy", "monitor", "debug", "authenticate", "authorise",
"connect", "store", "analyse", "cache", "schedule", "route", "migrate",
"restore", "help", "guide", "overview", "setup", "troubleshooting",
"performance", "benchmark", "analysis", "documentation", "reference",
"tutorial", "quickstart", "installation", "requirements", "dependencies",
"modules", "packages", "services", "workers", "processes", "threads",
"connections", "sessions", "transactions", "queries", "responses",
"requests", "handlers", "middleware", "controllers", "models",
"views", "templates", "layouts", "components", "widgets", "plugins",
"extensions", "integrations", "providers", "factories", "builders",
"adapters", "decorators", "observers", "listeners", "subscribers",
"publishers", "dispatchers", "resolvers", "transformers", "formatters",
"validators", "sanitizers", "parsers", "compilers", "interpreters",
}
b.ReportAllocs()
b.ResetTimer()
for b.Loop() {
for _, w := range words {
stem(w)
}
}
}

129
pkg/help/templates.go Normal file
View file

@ -0,0 +1,129 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"cmp"
"embed"
"html/template"
"io"
"slices"
"strings"
)
//go:embed templates/*.html
var templateFS embed.FS
// templateFuncs returns the function map for help templates.
func templateFuncs() template.FuncMap {
return template.FuncMap{
"renderMarkdown": func(content string) template.HTML {
html, err := RenderMarkdown(content)
if err != nil {
return template.HTML("<p>Error rendering content.</p>")
}
return template.HTML(html) //nolint:gosec // trusted content from catalog
},
"truncate": func(s string, n int) string {
// Strip markdown headings for truncation preview
lines := strings.SplitSeq(s, "\n")
var clean []string
for line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" || strings.HasPrefix(trimmed, "#") {
continue
}
clean = append(clean, trimmed)
}
text := strings.Join(clean, " ")
runes := []rune(text)
if len(runes) <= n {
return text
}
return string(runes[:n]) + "..."
},
"pluralise": func(count int, singular, plural string) string {
if count == 1 {
return singular
}
return plural
},
"multiply": func(a, b int) int {
return a * b
},
"sub": func(a, b int) int {
return a - b
},
}
}
// parseTemplates parses the base layout together with a page template.
func parseTemplates(page string) (*template.Template, error) {
return template.New("base.html").Funcs(templateFuncs()).ParseFS(
templateFS, "templates/base.html", "templates/"+page,
)
}
// topicGroup groups topics under a tag for the index page.
type topicGroup struct {
Tag string
Topics []*Topic
}
// indexData holds template data for the index page.
type indexData struct {
Topics []*Topic
Groups []topicGroup
}
// topicData holds template data for a single topic page.
type topicData struct {
Topic *Topic
}
// searchData holds template data for the search results page.
type searchData struct {
Query string
Results []*SearchResult
}
// groupTopicsByTag groups topics by their first tag.
// Topics without tags are grouped under "other".
// Groups are sorted alphabetically by tag name.
func groupTopicsByTag(topics []*Topic) []topicGroup {
// Sort topics first by Order then Title
sorted := slices.Clone(topics)
slices.SortFunc(sorted, func(a, b *Topic) int {
if a.Order != b.Order {
return cmp.Compare(a.Order, b.Order)
}
return cmp.Compare(a.Title, b.Title)
})
groups := make(map[string][]*Topic)
for _, t := range sorted {
tag := "other"
if len(t.Tags) > 0 {
tag = t.Tags[0]
}
groups[tag] = append(groups[tag], t)
}
var result []topicGroup
for tag, topics := range groups {
result = append(result, topicGroup{Tag: tag, Topics: topics})
}
slices.SortFunc(result, func(a, b topicGroup) int {
return cmp.Compare(a.Tag, b.Tag)
})
return result
}
// renderPage renders a named page template into the writer.
func renderPage(w io.Writer, page string, data any) error {
tmpl, err := parseTemplates(page)
if err != nil {
return err
}
return tmpl.Execute(w, data)
}

View file

@ -0,0 +1,10 @@
{{define "title"}}Not Found - Help{{end}}
{{define "content"}}
<div style="text-align: center; margin-top: 4rem;">
<h1 style="font-size: 3rem; border: none; color: var(--fg-muted);">404</h1>
<p style="font-size: 1.1rem; color: var(--fg-muted);">Topic not found.</p>
<p style="margin-top: 1.5rem;">
<a href="/">Browse all topics</a> or try searching above.
</p>
</div>
{{end}}

View file

@ -0,0 +1,161 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{block "title" .}}Help{{end}}</title>
<style>
:root {
--bg: #0d1117;
--bg-secondary: #161b22;
--bg-tertiary: #21262d;
--fg: #c9d1d9;
--fg-muted: #8b949e;
--fg-subtle: #6e7681;
--accent: #58a6ff;
--accent-hover: #79c0ff;
--border: #30363d;
--success: #3fb950;
--warning: #d29922;
--danger: #f85149;
}
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: ui-monospace, 'Cascadia Code', 'Source Code Pro', Menlo, Consolas, 'DejaVu Sans Mono', monospace;
background: var(--bg);
color: var(--fg);
line-height: 1.6;
font-size: 14px;
}
a { color: var(--accent); text-decoration: none; }
a:hover { color: var(--accent-hover); text-decoration: underline; }
.container { max-width: 960px; margin: 0 auto; padding: 0 1.5rem; }
nav {
background: var(--bg-secondary);
border-bottom: 1px solid var(--border);
padding: 0.75rem 0;
}
nav .container {
display: flex;
align-items: center;
justify-content: space-between;
gap: 1rem;
}
nav .brand {
font-weight: bold;
font-size: 1rem;
color: var(--fg);
white-space: nowrap;
}
nav .brand:hover { color: var(--accent); text-decoration: none; }
.search-form { display: flex; flex: 1; max-width: 400px; }
.search-form input {
flex: 1;
background: var(--bg-tertiary);
border: 1px solid var(--border);
border-radius: 6px;
color: var(--fg);
padding: 0.4rem 0.75rem;
font-family: inherit;
font-size: 0.85rem;
}
.search-form input::placeholder { color: var(--fg-subtle); }
.search-form input:focus { outline: none; border-color: var(--accent); }
main { padding: 2rem 0; min-height: calc(100vh - 120px); }
footer {
border-top: 1px solid var(--border);
padding: 1rem 0;
text-align: center;
color: var(--fg-muted);
font-size: 0.8rem;
}
h1, h2, h3, h4, h5, h6 {
color: var(--fg);
margin: 1.5rem 0 0.75rem;
line-height: 1.3;
}
h1 { font-size: 1.5rem; border-bottom: 1px solid var(--border); padding-bottom: 0.5rem; }
h2 { font-size: 1.25rem; }
h3 { font-size: 1.1rem; }
p { margin: 0.5rem 0; }
pre {
background: var(--bg-secondary);
border: 1px solid var(--border);
border-radius: 6px;
padding: 1rem;
overflow-x: auto;
margin: 0.75rem 0;
}
code {
background: var(--bg-tertiary);
padding: 0.15rem 0.3rem;
border-radius: 3px;
font-size: 0.9em;
}
pre code { background: none; padding: 0; }
table {
border-collapse: collapse;
width: 100%;
margin: 0.75rem 0;
}
th, td {
border: 1px solid var(--border);
padding: 0.5rem 0.75rem;
text-align: left;
}
th { background: var(--bg-secondary); font-weight: 600; }
ul, ol { padding-left: 1.5rem; margin: 0.5rem 0; }
li { margin: 0.25rem 0; }
.tag {
display: inline-block;
background: var(--bg-tertiary);
border: 1px solid var(--border);
border-radius: 12px;
padding: 0.1rem 0.5rem;
font-size: 0.75rem;
color: var(--accent);
margin: 0.15rem 0.15rem;
}
.card {
background: var(--bg-secondary);
border: 1px solid var(--border);
border-radius: 6px;
padding: 1rem 1.25rem;
margin: 0.75rem 0;
transition: border-color 0.15s;
}
.card:hover { border-color: var(--accent); }
.card h3 { margin-top: 0; font-size: 1rem; }
.card p { color: var(--fg-muted); font-size: 0.85rem; }
.badge {
display: inline-block;
background: var(--bg-tertiary);
border-radius: 10px;
padding: 0.1rem 0.5rem;
font-size: 0.7rem;
color: var(--fg-muted);
}
</style>
{{block "head" .}}{{end}}
</head>
<body>
<nav>
<div class="container">
<a href="/" class="brand">core help</a>
<form class="search-form" action="/search" method="get">
<input type="text" name="q" placeholder="Search topics..." value="{{block "search_value" .}}{{end}}" autocomplete="off">
</form>
</div>
</nav>
<main>
<div class="container">
{{block "content" .}}{{end}}
</div>
</main>
<footer>
<div class="container">
go-help &middot; forge.lthn.ai/core/go-help
</div>
</footer>
</body>
</html>

View file

@ -0,0 +1,19 @@
{{define "title"}}Help Topics{{end}}
{{define "content"}}
<h1>Help Topics <span class="badge">{{len .Topics}} {{pluralise (len .Topics) "topic" "topics"}}</span></h1>
{{if .Groups}}
{{range .Groups}}
<h2><span class="tag">{{.Tag}}</span></h2>
{{range .Topics}}
<div class="card">
<h3><a href="/topics/{{.ID}}">{{.Title}}</a></h3>
{{if .Tags}}<div>{{range .Tags}}<span class="tag">{{.}}</span>{{end}}</div>{{end}}
{{if .Content}}<p>{{truncate .Content 120}}</p>{{end}}
</div>
{{end}}
{{end}}
{{else}}
<p style="color: var(--fg-muted);">No topics available.</p>
{{end}}
{{end}}

View file

@ -0,0 +1,22 @@
{{define "title"}}Search: {{.Query}} - Help{{end}}
{{define "search_value"}}{{.Query}}{{end}}
{{define "content"}}
<h1>Search Results</h1>
<p style="color: var(--fg-muted);">
{{if .Results}}Found {{len .Results}} {{pluralise (len .Results) "result" "results"}} for &ldquo;{{.Query}}&rdquo;{{else}}No results for &ldquo;{{.Query}}&rdquo;{{end}}
</p>
{{if .Results}}
{{range .Results}}
<div class="card">
<h3><a href="/topics/{{.Topic.ID}}">{{.Topic.Title}}</a> <span class="badge">{{printf "%.1f" .Score}}</span></h3>
{{if .Snippet}}<p>{{.Snippet}}</p>{{end}}
{{if .Topic.Tags}}<div>{{range .Topic.Tags}}<span class="tag">{{.}}</span>{{end}}</div>{{end}}
</div>
{{end}}
{{else}}
<div style="margin-top: 2rem; text-align: center; color: var(--fg-muted);">
<p>Try a different search term or browse <a href="/">all topics</a>.</p>
</div>
{{end}}
{{end}}

View file

@ -0,0 +1,35 @@
{{define "title"}}{{.Topic.Title}} - Help{{end}}
{{define "content"}}
<div style="display: flex; gap: 2rem;">
<article style="flex: 1; min-width: 0;">
{{if .Topic.Tags}}<div style="margin-bottom: 1rem;">{{range .Topic.Tags}}<span class="tag">{{.}}</span>{{end}}</div>{{end}}
<div class="topic-body">{{renderMarkdown .Topic.Content}}</div>
</article>
<aside style="width: 220px; flex-shrink: 0;">
{{if .Topic.Sections}}
<div style="position: sticky; top: 1rem;">
<h3 style="font-size: 0.85rem; color: var(--fg-muted); margin-top: 0;">On this page</h3>
<ul style="list-style: none; padding-left: 0; font-size: 0.8rem;">
{{range .Topic.Sections}}
<li style="padding-left: {{multiply (sub .Level 1) 12}}px; margin: 0.3rem 0;">
<a href="#{{.ID}}" style="color: var(--fg-muted);">{{.Title}}</a>
</li>
{{end}}
</ul>
</div>
{{end}}
{{if .Topic.Related}}
<div style="margin-top: 1.5rem;">
<h3 style="font-size: 0.85rem; color: var(--fg-muted);">Related</h3>
<ul style="list-style: none; padding-left: 0; font-size: 0.8rem;">
{{range .Topic.Related}}
<li style="margin: 0.3rem 0;"><a href="/topics/{{.}}">{{.}}</a></li>
{{end}}
</ul>
</div>
{{end}}
</aside>
</div>
{{end}}

167
pkg/help/templates_test.go Normal file
View file

@ -0,0 +1,167 @@
// SPDX-Licence-Identifier: EUPL-1.2
package help
import (
"bytes"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParseTemplates_Good(t *testing.T) {
pages := []string{"index.html", "topic.html", "search.html", "404.html"}
for _, page := range pages {
t.Run(page, func(t *testing.T) {
tmpl, err := parseTemplates(page)
require.NoError(t, err, "template %s should parse without error", page)
assert.NotNil(t, tmpl)
})
}
}
func TestRenderPage_Good_Index(t *testing.T) {
topics := []*Topic{
{ID: "getting-started", Title: "Getting Started", Tags: []string{"intro"}, Content: "Welcome."},
{ID: "config", Title: "Configuration", Tags: []string{"setup"}, Content: "Config options."},
}
data := indexData{
Topics: topics,
Groups: groupTopicsByTag(topics),
}
var buf bytes.Buffer
err := renderPage(&buf, "index.html", data)
require.NoError(t, err)
html := buf.String()
assert.Contains(t, html, "Getting Started")
assert.Contains(t, html, "Configuration")
assert.Contains(t, html, "2 topics")
assert.Contains(t, html, "core help")
}
func TestRenderPage_Good_Topic(t *testing.T) {
topic := &Topic{
ID: "getting-started",
Title: "Getting Started",
Content: "# Getting Started\n\nWelcome to the **guide**.\n",
Tags: []string{"intro"},
Sections: []Section{
{ID: "overview", Title: "Overview", Level: 2},
},
Related: []string{"config"},
}
data := topicData{Topic: topic}
var buf bytes.Buffer
err := renderPage(&buf, "topic.html", data)
require.NoError(t, err)
html := buf.String()
assert.Contains(t, html, "Getting Started")
assert.Contains(t, html, "<strong>guide</strong>")
assert.Contains(t, html, "Overview")
assert.Contains(t, html, "config")
}
func TestRenderPage_Good_Search(t *testing.T) {
data := searchData{
Query: "install",
Results: []*SearchResult{
{
Topic: &Topic{ID: "install", Title: "Installation", Tags: []string{"setup"}},
Score: 12.5,
Snippet: "How to **install** the tool.",
},
},
}
var buf bytes.Buffer
err := renderPage(&buf, "search.html", data)
require.NoError(t, err)
html := buf.String()
assert.Contains(t, html, "install")
assert.Contains(t, html, "Installation")
assert.Contains(t, html, "1 result")
assert.Contains(t, html, "12.5")
}
func TestRenderPage_Good_404(t *testing.T) {
var buf bytes.Buffer
err := renderPage(&buf, "404.html", nil)
require.NoError(t, err)
html := buf.String()
assert.Contains(t, html, "not found")
assert.Contains(t, html, "404")
}
func TestGroupTopicsByTag_Good(t *testing.T) {
topics := []*Topic{
{ID: "a", Title: "Alpha", Tags: []string{"setup"}, Order: 2},
{ID: "b", Title: "Beta", Tags: []string{"setup"}, Order: 1},
{ID: "c", Title: "Gamma", Tags: []string{"advanced"}},
{ID: "d", Title: "Delta"}, // no tags -> "other"
}
groups := groupTopicsByTag(topics)
require.Len(t, groups, 3)
// Groups should be sorted alphabetically by tag
assert.Equal(t, "advanced", groups[0].Tag)
assert.Equal(t, "other", groups[1].Tag)
assert.Equal(t, "setup", groups[2].Tag)
// Within "setup", topics should be sorted by Order then Title
setupGroup := groups[2]
require.Len(t, setupGroup.Topics, 2)
assert.Equal(t, "Beta", setupGroup.Topics[0].Title) // Order 1
assert.Equal(t, "Alpha", setupGroup.Topics[1].Title) // Order 2
}
func TestTemplateFuncs_Good(t *testing.T) {
fns := templateFuncs()
t.Run("truncate short string", func(t *testing.T) {
fn := fns["truncate"].(func(string, int) string)
assert.Equal(t, "hello", fn("hello", 10))
})
t.Run("truncate long string", func(t *testing.T) {
fn := fns["truncate"].(func(string, int) string)
result := fn("hello world this is long", 11)
assert.Equal(t, "hello world...", result)
})
t.Run("truncate strips headings", func(t *testing.T) {
fn := fns["truncate"].(func(string, int) string)
result := fn("# Title\n\nSome content here.", 100)
assert.Equal(t, "Some content here.", result)
assert.NotContains(t, result, "#")
})
t.Run("pluralise singular", func(t *testing.T) {
fn := fns["pluralise"].(func(int, string, string) string)
assert.Equal(t, "topic", fn(1, "topic", "topics"))
})
t.Run("pluralise plural", func(t *testing.T) {
fn := fns["pluralise"].(func(int, string, string) string)
assert.Equal(t, "topics", fn(0, "topic", "topics"))
assert.Equal(t, "topics", fn(5, "topic", "topics"))
})
t.Run("multiply", func(t *testing.T) {
fn := fns["multiply"].(func(int, int) int)
assert.Equal(t, 24, fn(4, 6))
})
t.Run("sub", func(t *testing.T) {
fn := fns["sub"].(func(int, int) int)
assert.Equal(t, 2, fn(5, 3))
})
}

31
pkg/help/topic.go Normal file
View file

@ -0,0 +1,31 @@
// Package help provides display-agnostic help content management.
package help
// Topic represents a help topic/page.
type Topic struct {
ID string `json:"id"`
Title string `json:"title"`
Path string `json:"path"`
Content string `json:"content"`
Sections []Section `json:"sections"`
Tags []string `json:"tags"`
Related []string `json:"related"`
Order int `json:"order"` // For sorting
}
// Section represents a heading within a topic.
type Section struct {
ID string `json:"id"`
Title string `json:"title"`
Level int `json:"level"`
Line int `json:"line"` // Start line in content (1-indexed)
Content string `json:"content"` // Content under heading
}
// Frontmatter represents YAML frontmatter metadata.
type Frontmatter struct {
Title string `yaml:"title"`
Tags []string `yaml:"tags"`
Related []string `yaml:"related"`
Order int `yaml:"order"`
}