package help import ( "cmp" "iter" "regexp" "slices" "strings" "unicode" ) // Scoring weights for search result ranking. const ( scoreExactWord = 1.0 // Exact word match in the index scorePrefixWord = 0.5 // Prefix/partial word match scoreFuzzyWord = 0.3 // Fuzzy (Levenshtein) match scoreStemWord = 0.7 // Stemmed word match (between exact and prefix) scoreTitleBoost = 10.0 // Query word appears in topic title scoreSectionBoost = 5.0 // Query word appears in section title scoreTagBoost = 3.0 // Query word appears in topic tags scorePhraseBoost = 8.0 // Exact phrase match in content scoreAllWords = 2.0 // All query words present (multi-word bonus) fuzzyMaxDistance = 2 // Maximum edit distance for fuzzy matching ) // SearchResult represents a search match. type SearchResult struct { Topic *Topic Section *Section // nil if topic-level match Score float64 Snippet string // Context around match } // searchIndex provides full-text search. type searchIndex struct { topics map[string]*Topic // topicID -> Topic index map[string][]string // word -> []topicID } // newSearchIndex creates a new empty search index. func newSearchIndex() *searchIndex { return &searchIndex{ topics: make(map[string]*Topic), index: make(map[string][]string), } } // Add indexes a topic for searching. func (i *searchIndex) Add(topic *Topic) { i.topics[topic.ID] = topic // Index title words with boost for _, word := range tokenize(topic.Title) { i.addToIndex(word, topic.ID) } // Index content words for _, word := range tokenize(topic.Content) { i.addToIndex(word, topic.ID) } // Index section titles and content for _, section := range topic.Sections { for _, word := range tokenize(section.Title) { i.addToIndex(word, topic.ID) } for _, word := range tokenize(section.Content) { i.addToIndex(word, topic.ID) } } // Index tags for _, tag := range topic.Tags { for _, word := range tokenize(tag) { i.addToIndex(word, topic.ID) } } } // addToIndex adds a word-to-topic mapping. func (i *searchIndex) addToIndex(word, topicID string) { // Avoid duplicates if slices.Contains(i.index[word], topicID) { return } i.index[word] = append(i.index[word], topicID) } // Search finds topics matching the query. Supports: // - Single and multi-word keyword queries // - Quoted phrase search (e.g. `"rate limit"`) // - Fuzzy matching via Levenshtein distance for typo tolerance // - Prefix matching for partial words func (i *searchIndex) Search(query string) []*SearchResult { // Extract quoted phrases before tokenising. phrases, stripped := extractPhrases(query) queryWords := tokenize(stripped) if len(queryWords) == 0 && len(phrases) == 0 { return nil } // Track scores per topic scores := make(map[string]float64) // Build set of stemmed query variants for stem-aware scoring. stemmedWords := make(map[string]bool) for _, word := range queryWords { if s := stem(word); s != word { stemmedWords[s] = true } } for _, word := range queryWords { isStem := stemmedWords[word] // Exact matches — score stems lower than raw words. if topicIDs, ok := i.index[word]; ok { sc := scoreExactWord if isStem { sc = scoreStemWord } for _, topicID := range topicIDs { scores[topicID] += sc } } // Prefix matches (partial word matching) for indexWord, topicIDs := range i.index { if strings.HasPrefix(indexWord, word) && indexWord != word { for _, topicID := range topicIDs { scores[topicID] += scorePrefixWord } } } // Fuzzy matches (Levenshtein distance) if len(word) >= 3 { for indexWord, topicIDs := range i.index { if indexWord == word { continue // Already scored as exact match } if strings.HasPrefix(indexWord, word) { continue // Already scored as prefix match } dist := levenshtein(word, indexWord) if dist > 0 && dist <= fuzzyMaxDistance { for _, topicID := range topicIDs { scores[topicID] += scoreFuzzyWord } } } } } // Pre-compile regexes for snippets var res []*regexp.Regexp for _, word := range queryWords { if len(word) >= 2 { if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(word)); err == nil { res = append(res, re) } } } // Also add phrase regexes for highlighting for _, phrase := range phrases { if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(phrase)); err == nil { res = append(res, re) } } // Phrase matching: boost topics that contain the exact phrase. for _, phrase := range phrases { phraseLower := strings.ToLower(phrase) for topicID, topic := range i.topics { var text strings.Builder text.WriteString(strings.ToLower(topic.Title + " " + topic.Content)) for _, section := range topic.Sections { text.WriteString(" " + strings.ToLower(section.Title+" "+section.Content)) } if strings.Contains(text.String(), phraseLower) { scores[topicID] += scorePhraseBoost } } } // Build results with title/section/tag boosts and snippet extraction var results []*SearchResult for topicID, score := range scores { topic := i.topics[topicID] if topic == nil { continue } // Title boost: if query words appear in title titleLower := strings.ToLower(topic.Title) titleMatchCount := 0 for _, word := range queryWords { if strings.Contains(titleLower, word) { titleMatchCount++ } } if titleMatchCount > 0 { score += scoreTitleBoost } // Tag boost: if query words match tags for _, tag := range topic.Tags { tagLower := strings.ToLower(tag) for _, word := range queryWords { if tagLower == word || strings.Contains(tagLower, word) { score += scoreTagBoost break } } } // Multi-word bonus: if all query words are present in the topic if len(queryWords) > 1 { allPresent := true fullText := strings.ToLower(topic.Title + " " + topic.Content) for _, word := range queryWords { if !strings.Contains(fullText, word) { allPresent = false break } } if allPresent { score += scoreAllWords } } // Find matching section and extract snippet section, snippet := i.findBestMatch(topic, queryWords, res) // Section title boost if section != nil { sectionTitleLower := strings.ToLower(section.Title) hasSectionTitleMatch := false for _, word := range queryWords { if strings.Contains(sectionTitleLower, word) { hasSectionTitleMatch = true break } } if hasSectionTitleMatch { score += scoreSectionBoost } } results = append(results, &SearchResult{ Topic: topic, Section: section, Score: score, Snippet: snippet, }) } // Sort by score (highest first) slices.SortFunc(results, func(a, b *SearchResult) int { if a.Score != b.Score { return cmp.Compare(b.Score, a.Score) // Reversed for highest first } return cmp.Compare(a.Topic.Title, b.Topic.Title) }) return results } // extractPhrases pulls quoted substrings from the query and returns them // alongside the remaining query text with quotes removed. // For example: `hello "rate limit" world` returns // phrases=["rate limit"], remaining="hello world". func extractPhrases(query string) (phrases []string, remaining string) { re := regexp.MustCompile(`"([^"]+)"`) matches := re.FindAllStringSubmatch(query, -1) for _, m := range matches { phrase := strings.TrimSpace(m[1]) if phrase != "" { phrases = append(phrases, phrase) } } remaining = re.ReplaceAllString(query, "") return phrases, remaining } // levenshtein computes the edit distance between two strings. // Used for fuzzy matching to tolerate typos in search queries. func levenshtein(a, b string) int { aRunes := []rune(a) bRunes := []rune(b) aLen := len(aRunes) bLen := len(bRunes) if aLen == 0 { return bLen } if bLen == 0 { return aLen } // Use two rows instead of full matrix to save memory. prev := make([]int, bLen+1) curr := make([]int, bLen+1) for j := range bLen + 1 { prev[j] = j } for i := 1; i <= aLen; i++ { curr[0] = i for j := 1; j <= bLen; j++ { cost := 1 if aRunes[i-1] == bRunes[j-1] { cost = 0 } curr[j] = min( prev[j]+1, // deletion curr[j-1]+1, // insertion prev[j-1]+cost, // substitution ) } prev, curr = curr, prev } return prev[bLen] } // findBestMatch finds the section with the best match and extracts a snippet. func (i *searchIndex) findBestMatch(topic *Topic, queryWords []string, res []*regexp.Regexp) (*Section, string) { var bestSection *Section var bestSnippet string bestScore := 0 // Check topic title titleScore := countMatches(topic.Title, queryWords) if titleScore > 0 { bestSnippet = extractSnippet(topic.Content, res) } // Check sections for idx := range topic.Sections { section := &topic.Sections[idx] sectionScore := countMatches(section.Title, queryWords) contentScore := countMatches(section.Content, queryWords) totalScore := sectionScore*2 + contentScore // Title matches worth more if totalScore > bestScore { bestScore = totalScore bestSection = section if contentScore > 0 { bestSnippet = extractSnippet(section.Content, res) } else { bestSnippet = extractSnippet(section.Content, nil) } } } // If no section matched, use topic content if bestSnippet == "" && topic.Content != "" { bestSnippet = extractSnippet(topic.Content, res) } return bestSection, bestSnippet } // tokenize splits text into lowercase words for indexing/searching. // For each word, it also emits the stemmed variant (if different from the // original) so the index contains both raw and stemmed forms. func tokenize(text string) []string { return slices.Collect(Tokens(text)) } // Tokens returns an iterator for lowercase words in text. func Tokens(text string) iter.Seq[string] { return func(yield func(string) bool) { text = strings.ToLower(text) var word strings.Builder emit := func(w string) bool { if len(w) < 2 { return true } if !yield(w) { return false } if s := stem(w); s != w { if !yield(s) { return false } } return true } for _, r := range text { if unicode.IsLetter(r) || unicode.IsDigit(r) { word.WriteRune(r) } else if word.Len() > 0 { if !emit(word.String()) { return } word.Reset() } } // Don't forget the last word if word.Len() > 0 { emit(word.String()) } } } // countMatches counts how many query words appear in the text. func countMatches(text string, queryWords []string) int { textLower := strings.ToLower(text) count := 0 for _, word := range queryWords { if strings.Contains(textLower, word) { count++ } } return count } // extractSnippet extracts a short snippet around the first match and highlights matches. func extractSnippet(content string, res []*regexp.Regexp) string { if content == "" { return "" } const snippetLen = 150 // If no regexes, return start of content without highlighting if len(res) == 0 { lines := strings.SplitSeq(content, "\n") for line := range lines { line = strings.TrimSpace(line) if line != "" && !strings.HasPrefix(line, "#") { runes := []rune(line) if len(runes) > snippetLen { return string(runes[:snippetLen]) + "..." } return line } } return "" } // Find first match position (byte-based) matchPos := -1 for _, re := range res { loc := re.FindStringIndex(content) if loc != nil && (matchPos == -1 || loc[0] < matchPos) { matchPos = loc[0] } } // Convert to runes for safe slicing runes := []rune(content) runeLen := len(runes) var start, end int if matchPos == -1 { // No match found, use start of content start = 0 end = min(snippetLen, runeLen) } else { // Convert byte position to rune position matchRunePos := len([]rune(content[:matchPos])) // Extract snippet around match (rune-based) start = max(matchRunePos-50, 0) end = min(start+snippetLen, runeLen) } snippet := string(runes[start:end]) // Trim to word boundaries prefix := "" suffix := "" if start > 0 { if idx := strings.Index(snippet, " "); idx != -1 { snippet = snippet[idx+1:] prefix = "..." } } if end < runeLen { if idx := strings.LastIndex(snippet, " "); idx != -1 { snippet = snippet[:idx] suffix = "..." } } snippet = strings.TrimSpace(snippet) if snippet == "" { return "" } // Apply highlighting highlighted := highlight(snippet, res) return prefix + highlighted + suffix } // highlight wraps matches in **bold**. func highlight(text string, res []*regexp.Regexp) string { if len(res) == 0 { return text } type match struct { start, end int } var matches []match for _, re := range res { indices := re.FindAllStringIndex(text, -1) for _, idx := range indices { matches = append(matches, match{idx[0], idx[1]}) } } if len(matches) == 0 { return text } // Sort matches by start position slices.SortFunc(matches, func(a, b match) int { if a.start != b.start { return cmp.Compare(a.start, b.start) } return cmp.Compare(b.end, a.end) }) // Merge overlapping or adjacent matches var merged []match if len(matches) > 0 { curr := matches[0] for i := 1; i < len(matches); i++ { if matches[i].start <= curr.end { if matches[i].end > curr.end { curr.end = matches[i].end } } else { merged = append(merged, curr) curr = matches[i] } } merged = append(merged, curr) } // Build highlighted string from back to front to avoid position shifts result := text for i := len(merged) - 1; i >= 0; i-- { m := merged[i] result = result[:m.end] + "**" + result[m.end:] result = result[:m.start] + "**" + result[m.start:] } return result }