diff --git a/pkg/help/parser.go b/pkg/help/parser.go new file mode 100644 index 00000000..516afee2 --- /dev/null +++ b/pkg/help/parser.go @@ -0,0 +1,173 @@ +package help + +import ( + "regexp" + "strings" + "unicode" + + "gopkg.in/yaml.v3" +) + +var ( + // frontmatterRegex matches YAML frontmatter delimited by --- + frontmatterRegex = regexp.MustCompile(`(?s)^---\n(.+?)\n---\n?`) + + // headingRegex matches markdown headings (# to ######) + headingRegex = regexp.MustCompile(`^(#{1,6})\s+(.+)$`) +) + +// ParseTopic parses a markdown file into a Topic. +func ParseTopic(path string, content []byte) (*Topic, error) { + contentStr := string(content) + + topic := &Topic{ + Path: path, + ID: GenerateID(pathToTitle(path)), + Sections: []Section{}, + Tags: []string{}, + Related: []string{}, + } + + // Extract YAML frontmatter if present + fm, body := ExtractFrontmatter(contentStr) + if fm != nil { + topic.Title = fm.Title + topic.Tags = fm.Tags + topic.Related = fm.Related + topic.Order = fm.Order + if topic.Title != "" { + topic.ID = GenerateID(topic.Title) + } + } + + topic.Content = body + + // Extract sections from headings + topic.Sections = ExtractSections(body) + + // If no title from frontmatter, try first H1 + if topic.Title == "" && len(topic.Sections) > 0 { + for _, s := range topic.Sections { + if s.Level == 1 { + topic.Title = s.Title + topic.ID = GenerateID(s.Title) + break + } + } + } + + return topic, nil +} + +// ExtractFrontmatter extracts YAML frontmatter from markdown content. +// Returns the parsed frontmatter and the remaining content. +func ExtractFrontmatter(content string) (*Frontmatter, string) { + match := frontmatterRegex.FindStringSubmatch(content) + if match == nil { + return nil, content + } + + var fm Frontmatter + if err := yaml.Unmarshal([]byte(match[1]), &fm); err != nil { + // Invalid YAML, return content as-is + return nil, content + } + + // Return content without frontmatter + body := content[len(match[0]):] + return &fm, body +} + +// ExtractSections parses markdown and returns sections. +func ExtractSections(content string) []Section { + lines := strings.Split(content, "\n") + sections := []Section{} + + var currentSection *Section + var contentLines []string + + for i, line := range lines { + lineNum := i + 1 // 1-indexed + + match := headingRegex.FindStringSubmatch(line) + if match != nil { + // Save previous section's content + if currentSection != nil { + currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n")) + } + + // Start new section + level := len(match[1]) + title := strings.TrimSpace(match[2]) + + section := Section{ + ID: GenerateID(title), + Title: title, + Level: level, + Line: lineNum, + } + sections = append(sections, section) + currentSection = §ions[len(sections)-1] + contentLines = []string{} + } else if currentSection != nil { + contentLines = append(contentLines, line) + } + } + + // Save last section's content + if currentSection != nil { + currentSection.Content = strings.TrimSpace(strings.Join(contentLines, "\n")) + } + + return sections +} + +// GenerateID creates a URL-safe ID from a title. +// "Getting Started" -> "getting-started" +func GenerateID(title string) string { + var result strings.Builder + + for _, r := range strings.ToLower(title) { + if unicode.IsLetter(r) || unicode.IsDigit(r) { + result.WriteRune(r) + } else if unicode.IsSpace(r) || r == '-' || r == '_' { + // Only add hyphen if last char isn't already a hyphen + str := result.String() + if len(str) > 0 && str[len(str)-1] != '-' { + result.WriteRune('-') + } + } + // Skip other characters + } + + // Trim trailing hyphens + str := result.String() + return strings.Trim(str, "-") +} + +// pathToTitle converts a file path to a title. +// "getting-started.md" -> "Getting Started" +func pathToTitle(path string) string { + // Get filename without directory + parts := strings.Split(path, "/") + filename := parts[len(parts)-1] + + // Remove extension + if idx := strings.LastIndex(filename, "."); idx != -1 { + filename = filename[:idx] + } + + // Replace hyphens/underscores with spaces + filename = strings.ReplaceAll(filename, "-", " ") + filename = strings.ReplaceAll(filename, "_", " ") + + // Title case + words := strings.Fields(filename) + for i, word := range words { + if len(word) > 0 { + words[i] = strings.ToUpper(string(word[0])) + strings.ToLower(word[1:]) + } + } + + return strings.Join(words, " ") +} diff --git a/pkg/help/parser_test.go b/pkg/help/parser_test.go new file mode 100644 index 00000000..a9ea1c2c --- /dev/null +++ b/pkg/help/parser_test.go @@ -0,0 +1,316 @@ +package help + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGenerateID_Good(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "simple title", + input: "Getting Started", + expected: "getting-started", + }, + { + name: "already lowercase", + input: "installation", + expected: "installation", + }, + { + name: "multiple spaces", + input: "Quick Start Guide", + expected: "quick-start-guide", + }, + { + name: "with numbers", + input: "Chapter 1 Introduction", + expected: "chapter-1-introduction", + }, + { + name: "special characters", + input: "What's New? (v2.0)", + expected: "whats-new-v20", + }, + { + name: "underscores", + input: "config_file_reference", + expected: "config-file-reference", + }, + { + name: "hyphens preserved", + input: "pre-commit hooks", + expected: "pre-commit-hooks", + }, + { + name: "leading trailing spaces", + input: " Trimmed Title ", + expected: "trimmed-title", + }, + { + name: "unicode letters", + input: "Configuración Básica", + expected: "configuración-básica", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := GenerateID(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestExtractFrontmatter_Good(t *testing.T) { + content := `--- +title: Getting Started +tags: [intro, setup] +order: 1 +related: + - installation + - configuration +--- + +# Welcome + +This is the content. +` + + fm, body := ExtractFrontmatter(content) + + assert.NotNil(t, fm) + assert.Equal(t, "Getting Started", fm.Title) + assert.Equal(t, []string{"intro", "setup"}, fm.Tags) + assert.Equal(t, 1, fm.Order) + assert.Equal(t, []string{"installation", "configuration"}, fm.Related) + assert.Contains(t, body, "# Welcome") + assert.Contains(t, body, "This is the content.") +} + +func TestExtractFrontmatter_Good_NoFrontmatter(t *testing.T) { + content := `# Just a Heading + +Some content here. +` + + fm, body := ExtractFrontmatter(content) + + assert.Nil(t, fm) + assert.Equal(t, content, body) +} + +func TestExtractFrontmatter_Bad_InvalidYAML(t *testing.T) { + content := `--- +title: [invalid yaml +--- + +# Content +` + + fm, body := ExtractFrontmatter(content) + + // Invalid YAML should return nil frontmatter and original content + assert.Nil(t, fm) + assert.Equal(t, content, body) +} + +func TestExtractSections_Good(t *testing.T) { + content := `# Main Title + +Introduction paragraph. + +## Installation + +Install instructions here. +More details. + +### Prerequisites + +You need these things. + +## Configuration + +Config info here. +` + + sections := ExtractSections(content) + + assert.Len(t, sections, 4) + + // Main Title (H1) + assert.Equal(t, "main-title", sections[0].ID) + assert.Equal(t, "Main Title", sections[0].Title) + assert.Equal(t, 1, sections[0].Level) + assert.Equal(t, 1, sections[0].Line) + assert.Contains(t, sections[0].Content, "Introduction paragraph.") + + // Installation (H2) + assert.Equal(t, "installation", sections[1].ID) + assert.Equal(t, "Installation", sections[1].Title) + assert.Equal(t, 2, sections[1].Level) + assert.Contains(t, sections[1].Content, "Install instructions here.") + assert.Contains(t, sections[1].Content, "More details.") + + // Prerequisites (H3) + assert.Equal(t, "prerequisites", sections[2].ID) + assert.Equal(t, "Prerequisites", sections[2].Title) + assert.Equal(t, 3, sections[2].Level) + assert.Contains(t, sections[2].Content, "You need these things.") + + // Configuration (H2) + assert.Equal(t, "configuration", sections[3].ID) + assert.Equal(t, "Configuration", sections[3].Title) + assert.Equal(t, 2, sections[3].Level) +} + +func TestExtractSections_Good_AllHeadingLevels(t *testing.T) { + content := `# H1 +## H2 +### H3 +#### H4 +##### H5 +###### H6 +` + + sections := ExtractSections(content) + + assert.Len(t, sections, 6) + for i, level := range []int{1, 2, 3, 4, 5, 6} { + assert.Equal(t, level, sections[i].Level) + } +} + +func TestExtractSections_Good_Empty(t *testing.T) { + content := `Just plain text. +No headings here. +` + + sections := ExtractSections(content) + + assert.Empty(t, sections) +} + +func TestParseTopic_Good(t *testing.T) { + content := []byte(`--- +title: Quick Start Guide +tags: [intro, quickstart] +order: 5 +related: + - installation +--- + +# Quick Start Guide + +Welcome to the guide. + +## First Steps + +Do this first. + +## Next Steps + +Then do this. +`) + + topic, err := ParseTopic("docs/quick-start.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + + // Check metadata from frontmatter + assert.Equal(t, "quick-start-guide", topic.ID) + assert.Equal(t, "Quick Start Guide", topic.Title) + assert.Equal(t, "docs/quick-start.md", topic.Path) + assert.Equal(t, []string{"intro", "quickstart"}, topic.Tags) + assert.Equal(t, []string{"installation"}, topic.Related) + assert.Equal(t, 5, topic.Order) + + // Check sections + assert.Len(t, topic.Sections, 3) + assert.Equal(t, "quick-start-guide", topic.Sections[0].ID) + assert.Equal(t, "first-steps", topic.Sections[1].ID) + assert.Equal(t, "next-steps", topic.Sections[2].ID) + + // Content should not include frontmatter + assert.NotContains(t, topic.Content, "---") + assert.Contains(t, topic.Content, "# Quick Start Guide") +} + +func TestParseTopic_Good_NoFrontmatter(t *testing.T) { + content := []byte(`# Getting Started + +This is a simple doc. + +## Installation + +Install it here. +`) + + topic, err := ParseTopic("getting-started.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + + // Title should come from first H1 + assert.Equal(t, "Getting Started", topic.Title) + assert.Equal(t, "getting-started", topic.ID) + + // Sections extracted + assert.Len(t, topic.Sections, 2) +} + +func TestParseTopic_Good_NoHeadings(t *testing.T) { + content := []byte(`--- +title: Plain Content +--- + +Just some text without any headings. +`) + + topic, err := ParseTopic("plain.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + assert.Equal(t, "Plain Content", topic.Title) + assert.Equal(t, "plain-content", topic.ID) + assert.Empty(t, topic.Sections) +} + +func TestParseTopic_Good_IDFromPath(t *testing.T) { + content := []byte(`Just content, no frontmatter or headings.`) + + topic, err := ParseTopic("commands/dev-workflow.md", content) + + assert.NoError(t, err) + assert.NotNil(t, topic) + + // ID and title should be derived from path + assert.Equal(t, "dev-workflow", topic.ID) + assert.Equal(t, "", topic.Title) // No title available +} + +func TestPathToTitle_Good(t *testing.T) { + tests := []struct { + path string + expected string + }{ + {"getting-started.md", "Getting Started"}, + {"commands/dev.md", "Dev"}, + {"path/to/file_name.md", "File Name"}, + {"UPPERCASE.md", "Uppercase"}, + {"no-extension", "No Extension"}, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + result := pathToTitle(tt.path) + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/pkg/help/topic.go b/pkg/help/topic.go new file mode 100644 index 00000000..b934e988 --- /dev/null +++ b/pkg/help/topic.go @@ -0,0 +1,31 @@ +// Package help provides display-agnostic help content management. +package help + +// Topic represents a help topic/page. +type Topic struct { + ID string `json:"id"` + Title string `json:"title"` + Path string `json:"path"` + Content string `json:"content"` + Sections []Section `json:"sections"` + Tags []string `json:"tags"` + Related []string `json:"related"` + Order int `json:"order"` // For sorting +} + +// Section represents a heading within a topic. +type Section struct { + ID string `json:"id"` + Title string `json:"title"` + Level int `json:"level"` + Line int `json:"line"` // Start line in content (1-indexed) + Content string `json:"content"` // Content under heading +} + +// Frontmatter represents YAML frontmatter metadata. +type Frontmatter struct { + Title string `yaml:"title"` + Tags []string `yaml:"tags"` + Related []string `yaml:"related"` + Order int `yaml:"order"` +}