feat(help): add markdown parsing and section extraction
Implements #137: markdown parsing and section extraction for help system. - Add Topic and Section types for help content structure - Add Frontmatter type for YAML metadata parsing - Add ParseTopic() to parse markdown files into Topic structs - Add ExtractFrontmatter() to extract YAML frontmatter - Add ExtractSections() to extract headings and content - Add GenerateID() to create URL-safe anchor IDs - Add comprehensive tests following _Good/_Bad naming convention This is the foundation for the display-agnostic help system (#133). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
547c65f264
commit
cbd41e6837
3 changed files with 520 additions and 0 deletions
173
pkg/help/parser.go
Normal file
173
pkg/help/parser.go
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
package help
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var (
|
||||
// frontmatterRegex matches YAML frontmatter delimited by ---
|
||||
frontmatterRegex = regexp.MustCompile(`(?s)^---\n(.+?)\n---\n?`)
|
||||
|
||||
// headingRegex matches markdown headings (# to ######)
|
||||
headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`)
|
||||
)
|
||||
|
||||
// ParseTopic parses a markdown file into a Topic.
|
||||
func ParseTopic(path string, content []byte) (*Topic, error) {
|
||||
contentStr := string(content)
|
||||
|
||||
topic := &Topic{
|
||||
Path: path,
|
||||
ID: GenerateID(pathToTitle(path)),
|
||||
Sections: []Section{},
|
||||
Tags: []string{},
|
||||
Related: []string{},
|
||||
}
|
||||
|
||||
// Extract YAML frontmatter if present
|
||||
fm, body := ExtractFrontmatter(contentStr)
|
||||
if fm != nil {
|
||||
topic.Title = fm.Title
|
||||
topic.Tags = fm.Tags
|
||||
topic.Related = fm.Related
|
||||
topic.Order = fm.Order
|
||||
if topic.Title != "" {
|
||||
topic.ID = GenerateID(topic.Title)
|
||||
}
|
||||
}
|
||||
|
||||
topic.Content = body
|
||||
|
||||
// Extract sections from headings
|
||||
topic.Sections = ExtractSections(body)
|
||||
|
||||
// If no title from frontmatter, try first H1
|
||||
if topic.Title == "" && len(topic.Sections) > 0 {
|
||||
for _, s := range topic.Sections {
|
||||
if s.Level == 1 {
|
||||
topic.Title = s.Title
|
||||
topic.ID = GenerateID(s.Title)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return topic, nil
|
||||
}
|
||||
|
||||
// ExtractFrontmatter extracts YAML frontmatter from markdown content.
|
||||
// Returns the parsed frontmatter and the remaining content.
|
||||
func ExtractFrontmatter(content string) (*Frontmatter, string) {
|
||||
match := frontmatterRegex.FindStringSubmatch(content)
|
||||
if match == nil {
|
||||
return nil, content
|
||||
}
|
||||
|
||||
var fm Frontmatter
|
||||
if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil {
|
||||
// Invalid YAML, return content as-is
|
||||
return nil, content
|
||||
}
|
||||
|
||||
// Return content without frontmatter
|
||||
body := content[len(match[0]):]
|
||||
return &fm, body
|
||||
}
|
||||
|
||||
// ExtractSections parses markdown and returns sections.
|
||||
func ExtractSections(content string) []Section {
|
||||
lines := strings.Split(content, "\n")
|
||||
sections := []Section{}
|
||||
|
||||
var currentSection *Section
|
||||
var contentLines []string
|
||||
|
||||
for i, line := range lines {
|
||||
lineNum := i + 1 // 1-indexed
|
||||
|
||||
match := headingRegex.FindStringSubmatch(line)
|
||||
if match != nil {
|
||||
// Save previous section's content
|
||||
if currentSection != nil {
|
||||
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
|
||||
}
|
||||
|
||||
// Start new section
|
||||
level := len(match[1])
|
||||
title := strings.TrimSpace(match[2])
|
||||
|
||||
section := Section{
|
||||
ID: GenerateID(title),
|
||||
Title: title,
|
||||
Level: level,
|
||||
Line: lineNum,
|
||||
}
|
||||
sections = append(sections, section)
|
||||
currentSection = §ions[len(sections)-1]
|
||||
contentLines = []string{}
|
||||
} else if currentSection != nil {
|
||||
contentLines = append(contentLines, line)
|
||||
}
|
||||
}
|
||||
|
||||
// Save last section's content
|
||||
if currentSection != nil {
|
||||
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
|
||||
}
|
||||
|
||||
return sections
|
||||
}
|
||||
|
||||
// GenerateID creates a URL-safe ID from a title.
|
||||
// "Getting Started" -> "getting-started"
|
||||
func GenerateID(title string) string {
|
||||
var result strings.Builder
|
||||
|
||||
for _, r := range strings.ToLower(title) {
|
||||
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
||||
result.WriteRune(r)
|
||||
} else if unicode.IsSpace(r) || r == '-' || r == '_' {
|
||||
// Only add hyphen if last char isn't already a hyphen
|
||||
str := result.String()
|
||||
if len(str) > 0 && str[len(str)-1] != '-' {
|
||||
result.WriteRune('-')
|
||||
}
|
||||
}
|
||||
// Skip other characters
|
||||
}
|
||||
|
||||
// Trim trailing hyphens
|
||||
str := result.String()
|
||||
return strings.Trim(str, "-")
|
||||
}
|
||||
|
||||
// pathToTitle converts a file path to a title.
|
||||
// "getting-started.md" -> "Getting Started"
|
||||
func pathToTitle(path string) string {
|
||||
// Get filename without directory
|
||||
parts := strings.Split(path, "/")
|
||||
filename := parts[len(parts)-1]
|
||||
|
||||
// Remove extension
|
||||
if idx := strings.LastIndex(filename, "."); idx != -1 {
|
||||
filename = filename[:idx]
|
||||
}
|
||||
|
||||
// Replace hyphens/underscores with spaces
|
||||
filename = strings.ReplaceAll(filename, "-", " ")
|
||||
filename = strings.ReplaceAll(filename, "_", " ")
|
||||
|
||||
// Title case
|
||||
words := strings.Fields(filename)
|
||||
for i, word := range words {
|
||||
if len(word) > 0 {
|
||||
words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:])
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(words, " ")
|
||||
}
|
||||
316
pkg/help/parser_test.go
Normal file
316
pkg/help/parser_test.go
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
package help
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestGenerateID_Good(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "simple title",
|
||||
input: "Getting Started",
|
||||
expected: "getting-started",
|
||||
},
|
||||
{
|
||||
name: "already lowercase",
|
||||
input: "installation",
|
||||
expected: "installation",
|
||||
},
|
||||
{
|
||||
name: "multiple spaces",
|
||||
input: "Quick Start Guide",
|
||||
expected: "quick-start-guide",
|
||||
},
|
||||
{
|
||||
name: "with numbers",
|
||||
input: "Chapter 1 Introduction",
|
||||
expected: "chapter-1-introduction",
|
||||
},
|
||||
{
|
||||
name: "special characters",
|
||||
input: "What's New? (v2.0)",
|
||||
expected: "whats-new-v20",
|
||||
},
|
||||
{
|
||||
name: "underscores",
|
||||
input: "config_file_reference",
|
||||
expected: "config-file-reference",
|
||||
},
|
||||
{
|
||||
name: "hyphens preserved",
|
||||
input: "pre-commit hooks",
|
||||
expected: "pre-commit-hooks",
|
||||
},
|
||||
{
|
||||
name: "leading trailing spaces",
|
||||
input: " Trimmed Title ",
|
||||
expected: "trimmed-title",
|
||||
},
|
||||
{
|
||||
name: "unicode letters",
|
||||
input: "Configuración Básica",
|
||||
expected: "configuración-básica",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := GenerateID(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Good(t *testing.T) {
|
||||
content := `---
|
||||
title: Getting Started
|
||||
tags: [intro, setup]
|
||||
order: 1
|
||||
related:
|
||||
- installation
|
||||
- configuration
|
||||
---
|
||||
|
||||
# Welcome
|
||||
|
||||
This is the content.
|
||||
`
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
assert.NotNil(t, fm)
|
||||
assert.Equal(t, "Getting Started", fm.Title)
|
||||
assert.Equal(t, []string{"intro", "setup"}, fm.Tags)
|
||||
assert.Equal(t, 1, fm.Order)
|
||||
assert.Equal(t, []string{"installation", "configuration"}, fm.Related)
|
||||
assert.Contains(t, body, "# Welcome")
|
||||
assert.Contains(t, body, "This is the content.")
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Good_NoFrontmatter(t *testing.T) {
|
||||
content := `# Just a Heading
|
||||
|
||||
Some content here.
|
||||
`
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
assert.Nil(t, fm)
|
||||
assert.Equal(t, content, body)
|
||||
}
|
||||
|
||||
func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) {
|
||||
content := `---
|
||||
title: [invalid yaml
|
||||
---
|
||||
|
||||
# Content
|
||||
`
|
||||
|
||||
fm, body := ExtractFrontmatter(content)
|
||||
|
||||
// Invalid YAML should return nil frontmatter and original content
|
||||
assert.Nil(t, fm)
|
||||
assert.Equal(t, content, body)
|
||||
}
|
||||
|
||||
func TestExtractSections_Good(t *testing.T) {
|
||||
content := `# Main Title
|
||||
|
||||
Introduction paragraph.
|
||||
|
||||
## Installation
|
||||
|
||||
Install instructions here.
|
||||
More details.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
You need these things.
|
||||
|
||||
## Configuration
|
||||
|
||||
Config info here.
|
||||
`
|
||||
|
||||
sections := ExtractSections(content)
|
||||
|
||||
assert.Len(t, sections, 4)
|
||||
|
||||
// Main Title (H1)
|
||||
assert.Equal(t, "main-title", sections[0].ID)
|
||||
assert.Equal(t, "Main Title", sections[0].Title)
|
||||
assert.Equal(t, 1, sections[0].Level)
|
||||
assert.Equal(t, 1, sections[0].Line)
|
||||
assert.Contains(t, sections[0].Content, "Introduction paragraph.")
|
||||
|
||||
// Installation (H2)
|
||||
assert.Equal(t, "installation", sections[1].ID)
|
||||
assert.Equal(t, "Installation", sections[1].Title)
|
||||
assert.Equal(t, 2, sections[1].Level)
|
||||
assert.Contains(t, sections[1].Content, "Install instructions here.")
|
||||
assert.Contains(t, sections[1].Content, "More details.")
|
||||
|
||||
// Prerequisites (H3)
|
||||
assert.Equal(t, "prerequisites", sections[2].ID)
|
||||
assert.Equal(t, "Prerequisites", sections[2].Title)
|
||||
assert.Equal(t, 3, sections[2].Level)
|
||||
assert.Contains(t, sections[2].Content, "You need these things.")
|
||||
|
||||
// Configuration (H2)
|
||||
assert.Equal(t, "configuration", sections[3].ID)
|
||||
assert.Equal(t, "Configuration", sections[3].Title)
|
||||
assert.Equal(t, 2, sections[3].Level)
|
||||
}
|
||||
|
||||
func TestExtractSections_Good_AllHeadingLevels(t *testing.T) {
|
||||
content := `# H1
|
||||
## H2
|
||||
### H3
|
||||
#### H4
|
||||
##### H5
|
||||
###### H6
|
||||
`
|
||||
|
||||
sections := ExtractSections(content)
|
||||
|
||||
assert.Len(t, sections, 6)
|
||||
for i, level := range []int{1, 2, 3, 4, 5, 6} {
|
||||
assert.Equal(t, level, sections[i].Level)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractSections_Good_Empty(t *testing.T) {
|
||||
content := `Just plain text.
|
||||
No headings here.
|
||||
`
|
||||
|
||||
sections := ExtractSections(content)
|
||||
|
||||
assert.Empty(t, sections)
|
||||
}
|
||||
|
||||
func TestParseTopic_Good(t *testing.T) {
|
||||
content := []byte(`---
|
||||
title: Quick Start Guide
|
||||
tags: [intro, quickstart]
|
||||
order: 5
|
||||
related:
|
||||
- installation
|
||||
---
|
||||
|
||||
# Quick Start Guide
|
||||
|
||||
Welcome to the guide.
|
||||
|
||||
## First Steps
|
||||
|
||||
Do this first.
|
||||
|
||||
## Next Steps
|
||||
|
||||
Then do this.
|
||||
`)
|
||||
|
||||
topic, err := ParseTopic("docs/quick-start.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
|
||||
// Check metadata from frontmatter
|
||||
assert.Equal(t, "quick-start-guide", topic.ID)
|
||||
assert.Equal(t, "Quick Start Guide", topic.Title)
|
||||
assert.Equal(t, "docs/quick-start.md", topic.Path)
|
||||
assert.Equal(t, []string{"intro", "quickstart"}, topic.Tags)
|
||||
assert.Equal(t, []string{"installation"}, topic.Related)
|
||||
assert.Equal(t, 5, topic.Order)
|
||||
|
||||
// Check sections
|
||||
assert.Len(t, topic.Sections, 3)
|
||||
assert.Equal(t, "quick-start-guide", topic.Sections[0].ID)
|
||||
assert.Equal(t, "first-steps", topic.Sections[1].ID)
|
||||
assert.Equal(t, "next-steps", topic.Sections[2].ID)
|
||||
|
||||
// Content should not include frontmatter
|
||||
assert.NotContains(t, topic.Content, "---")
|
||||
assert.Contains(t, topic.Content, "# Quick Start Guide")
|
||||
}
|
||||
|
||||
func TestParseTopic_Good_NoFrontmatter(t *testing.T) {
|
||||
content := []byte(`# Getting Started
|
||||
|
||||
This is a simple doc.
|
||||
|
||||
## Installation
|
||||
|
||||
Install it here.
|
||||
`)
|
||||
|
||||
topic, err := ParseTopic("getting-started.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
|
||||
// Title should come from first H1
|
||||
assert.Equal(t, "Getting Started", topic.Title)
|
||||
assert.Equal(t, "getting-started", topic.ID)
|
||||
|
||||
// Sections extracted
|
||||
assert.Len(t, topic.Sections, 2)
|
||||
}
|
||||
|
||||
func TestParseTopic_Good_NoHeadings(t *testing.T) {
|
||||
content := []byte(`---
|
||||
title: Plain Content
|
||||
---
|
||||
|
||||
Just some text without any headings.
|
||||
`)
|
||||
|
||||
topic, err := ParseTopic("plain.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
assert.Equal(t, "Plain Content", topic.Title)
|
||||
assert.Equal(t, "plain-content", topic.ID)
|
||||
assert.Empty(t, topic.Sections)
|
||||
}
|
||||
|
||||
func TestParseTopic_Good_IDFromPath(t *testing.T) {
|
||||
content := []byte(`Just content, no frontmatter or headings.`)
|
||||
|
||||
topic, err := ParseTopic("commands/dev-workflow.md", content)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, topic)
|
||||
|
||||
// ID and title should be derived from path
|
||||
assert.Equal(t, "dev-workflow", topic.ID)
|
||||
assert.Equal(t, "", topic.Title) // No title available
|
||||
}
|
||||
|
||||
func TestPathToTitle_Good(t *testing.T) {
|
||||
tests := []struct {
|
||||
path string
|
||||
expected string
|
||||
}{
|
||||
{"getting-started.md", "Getting Started"},
|
||||
{"commands/dev.md", "Dev"},
|
||||
{"path/to/file_name.md", "File Name"},
|
||||
{"UPPERCASE.md", "Uppercase"},
|
||||
{"no-extension", "No Extension"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.path, func(t *testing.T) {
|
||||
result := pathToTitle(tt.path)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
31
pkg/help/topic.go
Normal file
31
pkg/help/topic.go
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
// Package help provides display-agnostic help content management.
|
||||
package help
|
||||
|
||||
// Topic represents a help topic/page.
|
||||
type Topic struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Path string `json:"path"`
|
||||
Content string `json:"content"`
|
||||
Sections []Section `json:"sections"`
|
||||
Tags []string `json:"tags"`
|
||||
Related []string `json:"related"`
|
||||
Order int `json:"order"` // For sorting
|
||||
}
|
||||
|
||||
// Section represents a heading within a topic.
|
||||
type Section struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Level int `json:"level"`
|
||||
Line int `json:"line"` // Start line in content (1-indexed)
|
||||
Content string `json:"content"` // Content under heading
|
||||
}
|
||||
|
||||
// Frontmatter represents YAML frontmatter metadata.
|
||||
type Frontmatter struct {
|
||||
Title string `yaml:"title"`
|
||||
Tags []string `yaml:"tags"`
|
||||
Related []string `yaml:"related"`
|
||||
Order int `yaml:"order"`
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue