* feat(help): add markdown parsing and section extraction Implements #137: markdown parsing and section extraction for help system. - Add Topic and Section types for help content structure - Add Frontmatter type for YAML metadata parsing - Add ParseTopic() to parse markdown files into Topic structs - Add ExtractFrontmatter() to extract YAML frontmatter - Add ExtractSections() to extract headings and content - Add GenerateID() to create URL-safe anchor IDs - Add comprehensive tests following _Good/_Bad naming convention This is the foundation for the display-agnostic help system (#133). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(test): use manual cleanup for TestDevOps_Boot_Good_FreshWithNoExisting Fixes flaky test that fails with "TempDir RemoveAll cleanup: directory not empty" by using os.MkdirTemp with t.Cleanup instead of t.TempDir(). This is the same fix applied to TestDevOps_Boot_Good_Success in8effbda. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(help): address CodeRabbit review feedback - Add CRLF line ending support to frontmatter regex - Add empty frontmatter block support - Use filepath.Base/Ext for cross-platform path handling - Add tests for CRLF and empty frontmatter cases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(help): add full-text search functionality (#175) * fix(test): use manual cleanup for TestDevOps_Boot_Good_FreshWithNoExisting Fixes flaky test that fails with "TempDir RemoveAll cleanup: directory not empty" by using os.MkdirTemp with t.Cleanup instead of t.TempDir(). This is the same fix applied to TestDevOps_Boot_Good_Success in8effbda. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(help): add full-text search functionality Implements #139: full-text search for help topics. - Add searchIndex with inverted index for fast lookups - Add tokenize() for case-insensitive word extraction - Add Search() with relevance ranking: - Exact word matches score 1.0 - Prefix matches score 0.5 - Title matches get 2.0 boost - Add snippet extraction for search result context - Add section-level matching for precise results - Add comprehensive tests following _Good/_Bad naming Search features: - Case-insensitive matching - Partial word matching (prefix) - Title boost (matches in title rank higher) - Section-level results - Snippet extraction with context Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(help): address CodeRabbit review feedback - Add CRLF line ending support to frontmatter regex - Add empty frontmatter block support - Use filepath.Base/Ext for cross-platform path handling - Add tests for CRLF and empty frontmatter cases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> * fix(help): use rune-based slicing for UTF-8 safe snippets Address CodeRabbit feedback: byte-based slicing can corrupt multi-byte UTF-8 characters. Now uses rune-based indexing for snippet extraction. - Convert content to []rune before slicing - Convert byte position to rune position for match location - Add UTF-8 validation tests with Japanese text Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(help): use correct string for byte-to-rune conversion in extractSnippet strings.ToLower can change byte lengths for certain Unicode characters (e.g., K U+212A 3 bytes → k 1 byte). Since matchPos is a byte index from strings.Index(contentLower, word), the rune conversion must also use contentLower to maintain correct index alignment. Fixes CodeRabbit review feedback. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
174 lines
4.2 KiB
Go
174 lines
4.2 KiB
Go
package help
|
|
|
|
import (
|
|
"path/filepath"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
var (
|
|
// frontmatterRegex matches YAML frontmatter delimited by ---
|
|
// Supports both LF and CRLF line endings, and empty frontmatter blocks
|
|
frontmatterRegex = regexp.MustCompile(`(?s)^---\r?\n(.*?)(?:\r?\n)?---\r?\n?`)
|
|
|
|
// headingRegex matches markdown headings (# to ######)
|
|
headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`)
|
|
)
|
|
|
|
// ParseTopic parses a markdown file into a Topic.
|
|
func ParseTopic(path string, content []byte) (*Topic, error) {
|
|
contentStr := string(content)
|
|
|
|
topic := &Topic{
|
|
Path: path,
|
|
ID: GenerateID(pathToTitle(path)),
|
|
Sections: []Section{},
|
|
Tags: []string{},
|
|
Related: []string{},
|
|
}
|
|
|
|
// Extract YAML frontmatter if present
|
|
fm, body := ExtractFrontmatter(contentStr)
|
|
if fm != nil {
|
|
topic.Title = fm.Title
|
|
topic.Tags = fm.Tags
|
|
topic.Related = fm.Related
|
|
topic.Order = fm.Order
|
|
if topic.Title != "" {
|
|
topic.ID = GenerateID(topic.Title)
|
|
}
|
|
}
|
|
|
|
topic.Content = body
|
|
|
|
// Extract sections from headings
|
|
topic.Sections = ExtractSections(body)
|
|
|
|
// If no title from frontmatter, try first H1
|
|
if topic.Title == "" && len(topic.Sections) > 0 {
|
|
for _, s := range topic.Sections {
|
|
if s.Level == 1 {
|
|
topic.Title = s.Title
|
|
topic.ID = GenerateID(s.Title)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return topic, nil
|
|
}
|
|
|
|
// ExtractFrontmatter extracts YAML frontmatter from markdown content.
|
|
// Returns the parsed frontmatter and the remaining content.
|
|
func ExtractFrontmatter(content string) (*Frontmatter, string) {
|
|
match := frontmatterRegex.FindStringSubmatch(content)
|
|
if match == nil {
|
|
return nil, content
|
|
}
|
|
|
|
var fm Frontmatter
|
|
if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil {
|
|
// Invalid YAML, return content as-is
|
|
return nil, content
|
|
}
|
|
|
|
// Return content without frontmatter
|
|
body := content[len(match[0]):]
|
|
return &fm, body
|
|
}
|
|
|
|
// ExtractSections parses markdown and returns sections.
|
|
func ExtractSections(content string) []Section {
|
|
lines := strings.Split(content, "\n")
|
|
sections := []Section{}
|
|
|
|
var currentSection *Section
|
|
var contentLines []string
|
|
|
|
for i, line := range lines {
|
|
lineNum := i + 1 // 1-indexed
|
|
|
|
match := headingRegex.FindStringSubmatch(line)
|
|
if match != nil {
|
|
// Save previous section's content
|
|
if currentSection != nil {
|
|
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
|
|
}
|
|
|
|
// Start new section
|
|
level := len(match[1])
|
|
title := strings.TrimSpace(match[2])
|
|
|
|
section := Section{
|
|
ID: GenerateID(title),
|
|
Title: title,
|
|
Level: level,
|
|
Line: lineNum,
|
|
}
|
|
sections = append(sections, section)
|
|
currentSection = §ions[len(sections)-1]
|
|
contentLines = []string{}
|
|
} else if currentSection != nil {
|
|
contentLines = append(contentLines, line)
|
|
}
|
|
}
|
|
|
|
// Save last section's content
|
|
if currentSection != nil {
|
|
currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n"))
|
|
}
|
|
|
|
return sections
|
|
}
|
|
|
|
// GenerateID creates a URL-safe ID from a title.
|
|
// "Getting Started" -> "getting-started"
|
|
func GenerateID(title string) string {
|
|
var result strings.Builder
|
|
|
|
for _, r := range strings.ToLower(title) {
|
|
if unicode.IsLetter(r) || unicode.IsDigit(r) {
|
|
result.WriteRune(r)
|
|
} else if unicode.IsSpace(r) || r == '-' || r == '_' {
|
|
// Only add hyphen if last char isn't already a hyphen
|
|
str := result.String()
|
|
if len(str) > 0 && str[len(str)-1] != '-' {
|
|
result.WriteRune('-')
|
|
}
|
|
}
|
|
// Skip other characters
|
|
}
|
|
|
|
// Trim trailing hyphens
|
|
str := result.String()
|
|
return strings.Trim(str, "-")
|
|
}
|
|
|
|
// pathToTitle converts a file path to a title.
|
|
// "getting-started.md" -> "Getting Started"
|
|
func pathToTitle(path string) string {
|
|
// Get filename without directory (cross-platform)
|
|
filename := filepath.Base(path)
|
|
|
|
// Remove extension
|
|
if ext := filepath.Ext(filename); ext != "" {
|
|
filename = strings.TrimSuffix(filename, ext)
|
|
}
|
|
|
|
// Replace hyphens/underscores with spaces
|
|
filename = strings.ReplaceAll(filename, "-", " ")
|
|
filename = strings.ReplaceAll(filename, "_", " ")
|
|
|
|
// Title case
|
|
words := strings.Fields(filename)
|
|
for i, word := range words {
|
|
if len(word) > 0 {
|
|
words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:])
|
|
}
|
|
}
|
|
|
|
return strings.Join(words, " ")
|
|
}
|