2026-02-02 00:07:32 +00:00
|
|
|
package help
|
|
|
|
|
|
|
|
|
|
import (
|
feat(help): Implement full-text search (#294)
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with markdown bold highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Ensured all project files are correctly formatted.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections as specified.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Verified that `tokenize` is correctly defined and used within `pkg/help`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import and added `--repo` flag to `auto-merge` workflow.
2026-02-05 10:26:16 +00:00
|
|
|
"regexp"
|
2026-02-02 00:07:32 +00:00
|
|
|
"strings"
|
|
|
|
|
"testing"
|
|
|
|
|
"unicode/utf8"
|
|
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func TestTokenize_Good(t *testing.T) {
|
|
|
|
|
tests := []struct {
|
|
|
|
|
name string
|
|
|
|
|
input string
|
|
|
|
|
expected []string
|
|
|
|
|
}{
|
|
|
|
|
{
|
|
|
|
|
name: "simple words",
|
|
|
|
|
input: "hello world",
|
|
|
|
|
expected: []string{"hello", "world"},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "mixed case",
|
|
|
|
|
input: "Hello World",
|
|
|
|
|
expected: []string{"hello", "world"},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "with punctuation",
|
|
|
|
|
input: "Hello, world! How are you?",
|
|
|
|
|
expected: []string{"hello", "world", "how", "are", "you"},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "single characters filtered",
|
|
|
|
|
input: "a b c hello d",
|
|
|
|
|
expected: []string{"hello"},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "numbers included",
|
|
|
|
|
input: "version 2 release",
|
|
|
|
|
expected: []string{"version", "release"},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "alphanumeric",
|
|
|
|
|
input: "v2.0 and config123",
|
|
|
|
|
expected: []string{"v2", "and", "config123"},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
name: "empty string",
|
|
|
|
|
input: "",
|
|
|
|
|
expected: nil,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, tt := range tests {
|
|
|
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
|
|
|
result := tokenize(tt.input)
|
|
|
|
|
assert.Equal(t, tt.expected, result)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestSearchIndex_Add_Good(t *testing.T) {
|
|
|
|
|
idx := newSearchIndex()
|
|
|
|
|
|
|
|
|
|
topic := &Topic{
|
|
|
|
|
ID: "getting-started",
|
|
|
|
|
Title: "Getting Started",
|
|
|
|
|
Content: "Welcome to the guide.",
|
|
|
|
|
Tags: []string{"intro", "setup"},
|
|
|
|
|
Sections: []Section{
|
|
|
|
|
{ID: "installation", Title: "Installation", Content: "Install the CLI."},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx.Add(topic)
|
|
|
|
|
|
|
|
|
|
// Verify topic is stored
|
|
|
|
|
assert.NotNil(t, idx.topics["getting-started"])
|
|
|
|
|
|
|
|
|
|
// Verify words are indexed
|
|
|
|
|
assert.Contains(t, idx.index["getting"], "getting-started")
|
|
|
|
|
assert.Contains(t, idx.index["started"], "getting-started")
|
|
|
|
|
assert.Contains(t, idx.index["welcome"], "getting-started")
|
|
|
|
|
assert.Contains(t, idx.index["guide"], "getting-started")
|
|
|
|
|
assert.Contains(t, idx.index["intro"], "getting-started")
|
|
|
|
|
assert.Contains(t, idx.index["setup"], "getting-started")
|
|
|
|
|
assert.Contains(t, idx.index["installation"], "getting-started")
|
|
|
|
|
assert.Contains(t, idx.index["cli"], "getting-started")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestSearchIndex_Search_Good(t *testing.T) {
|
|
|
|
|
idx := newSearchIndex()
|
|
|
|
|
|
|
|
|
|
// Add test topics
|
|
|
|
|
idx.Add(&Topic{
|
|
|
|
|
ID: "getting-started",
|
|
|
|
|
Title: "Getting Started",
|
|
|
|
|
Content: "Welcome to the CLI guide. This covers installation and setup.",
|
|
|
|
|
Tags: []string{"intro"},
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
idx.Add(&Topic{
|
|
|
|
|
ID: "configuration",
|
|
|
|
|
Title: "Configuration",
|
|
|
|
|
Content: "Configure the CLI using environment variables.",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
idx.Add(&Topic{
|
|
|
|
|
ID: "commands",
|
|
|
|
|
Title: "Commands Reference",
|
|
|
|
|
Content: "List of all available commands.",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("single word query", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("configuration")
|
|
|
|
|
assert.NotEmpty(t, results)
|
|
|
|
|
assert.Equal(t, "configuration", results[0].Topic.ID)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("multi-word query", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("cli guide")
|
|
|
|
|
assert.NotEmpty(t, results)
|
|
|
|
|
// Should match getting-started (has both "cli" and "guide")
|
|
|
|
|
assert.Equal(t, "getting-started", results[0].Topic.ID)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("title boost", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("commands")
|
|
|
|
|
assert.NotEmpty(t, results)
|
|
|
|
|
// "commands" appears in title of commands topic
|
|
|
|
|
assert.Equal(t, "commands", results[0].Topic.ID)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("partial word matching", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("config")
|
|
|
|
|
assert.NotEmpty(t, results)
|
|
|
|
|
// Should match "configuration" and "configure"
|
|
|
|
|
foundConfig := false
|
|
|
|
|
for _, r := range results {
|
|
|
|
|
if r.Topic.ID == "configuration" {
|
|
|
|
|
foundConfig = true
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
assert.True(t, foundConfig, "Should find configuration topic with prefix match")
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("no results", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("nonexistent")
|
|
|
|
|
assert.Empty(t, results)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("empty query", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("")
|
|
|
|
|
assert.Nil(t, results)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestSearchIndex_Search_Good_WithSections(t *testing.T) {
|
|
|
|
|
idx := newSearchIndex()
|
|
|
|
|
|
|
|
|
|
idx.Add(&Topic{
|
|
|
|
|
ID: "installation",
|
|
|
|
|
Title: "Installation Guide",
|
|
|
|
|
Content: "Overview of installation process.",
|
|
|
|
|
Sections: []Section{
|
|
|
|
|
{
|
|
|
|
|
ID: "linux",
|
|
|
|
|
Title: "Linux Installation",
|
|
|
|
|
Content: "Run apt-get install core on Debian.",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
ID: "macos",
|
|
|
|
|
Title: "macOS Installation",
|
|
|
|
|
Content: "Use brew install core on macOS.",
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
ID: "windows",
|
|
|
|
|
Title: "Windows Installation",
|
|
|
|
|
Content: "Download the installer from the website.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("matches section content", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("debian")
|
|
|
|
|
assert.NotEmpty(t, results)
|
|
|
|
|
assert.Equal(t, "installation", results[0].Topic.ID)
|
|
|
|
|
// Should identify the Linux section as best match
|
|
|
|
|
if results[0].Section != nil {
|
|
|
|
|
assert.Equal(t, "linux", results[0].Section.ID)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("matches section title", func(t *testing.T) {
|
|
|
|
|
results := idx.Search("windows")
|
|
|
|
|
assert.NotEmpty(t, results)
|
|
|
|
|
assert.Equal(t, "installation", results[0].Topic.ID)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestExtractSnippet_Good(t *testing.T) {
|
|
|
|
|
content := `This is the first paragraph with some introduction text.
|
|
|
|
|
|
|
|
|
|
Here is more content that talks about installation and setup.
|
|
|
|
|
The installation process is straightforward.
|
|
|
|
|
|
|
|
|
|
Finally, some closing remarks about the configuration.`
|
|
|
|
|
|
|
|
|
|
t.Run("finds match and extracts context", func(t *testing.T) {
|
feat(help): Implement full-text search (#294)
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with markdown bold highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Ensured all project files are correctly formatted.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections as specified.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Verified that `tokenize` is correctly defined and used within `pkg/help`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import and added `--repo` flag to `auto-merge` workflow.
2026-02-05 10:26:16 +00:00
|
|
|
snippet := extractSnippet(content, compileRegexes([]string{"installation"}))
|
|
|
|
|
assert.Contains(t, snippet, "**installation**")
|
|
|
|
|
assert.True(t, len(snippet) <= 250, "Snippet should be reasonably short")
|
2026-02-02 00:07:32 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("no query words returns start", func(t *testing.T) {
|
|
|
|
|
snippet := extractSnippet(content, nil)
|
|
|
|
|
assert.Contains(t, snippet, "first paragraph")
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("empty content", func(t *testing.T) {
|
feat(help): Implement full-text search (#294)
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with markdown bold highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Ensured all project files are correctly formatted.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections as specified.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Verified that `tokenize` is correctly defined and used within `pkg/help`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import and added `--repo` flag to `auto-merge` workflow.
2026-02-05 10:26:16 +00:00
|
|
|
snippet := extractSnippet("", compileRegexes([]string{"test"}))
|
2026-02-02 00:07:32 +00:00
|
|
|
assert.Empty(t, snippet)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
feat(help): Implement full-text search (#294)
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with markdown bold highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Ensured all project files are correctly formatted.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections as specified.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Verified that `tokenize` is correctly defined and used within `pkg/help`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import and added `--repo` flag to `auto-merge` workflow.
2026-02-05 10:26:16 +00:00
|
|
|
func TestExtractSnippet_Highlighting(t *testing.T) {
|
|
|
|
|
content := "The quick brown fox jumps over the lazy dog."
|
|
|
|
|
|
|
|
|
|
t.Run("simple highlighting", func(t *testing.T) {
|
|
|
|
|
snippet := extractSnippet(content, compileRegexes([]string{"quick", "fox"}))
|
|
|
|
|
assert.Contains(t, snippet, "**quick**")
|
|
|
|
|
assert.Contains(t, snippet, "**fox**")
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("case insensitive highlighting", func(t *testing.T) {
|
|
|
|
|
snippet := extractSnippet(content, compileRegexes([]string{"QUICK", "Fox"}))
|
|
|
|
|
assert.Contains(t, snippet, "**quick**")
|
|
|
|
|
assert.Contains(t, snippet, "**fox**")
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("partial word matching", func(t *testing.T) {
|
|
|
|
|
content := "The configuration is complete."
|
|
|
|
|
snippet := extractSnippet(content, compileRegexes([]string{"config"}))
|
|
|
|
|
assert.Contains(t, snippet, "**config**uration")
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("overlapping matches", func(t *testing.T) {
|
|
|
|
|
content := "Searching for something."
|
|
|
|
|
// Both "search" and "searching" match
|
|
|
|
|
snippet := extractSnippet(content, compileRegexes([]string{"search", "searching"}))
|
|
|
|
|
assert.Equal(t, "**Searching** for something.", snippet)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-02 00:07:32 +00:00
|
|
|
func TestExtractSnippet_Good_UTF8(t *testing.T) {
|
|
|
|
|
// Content with multi-byte UTF-8 characters
|
|
|
|
|
content := "日本語のテキストです。This contains Japanese text. 検索機能をテストします。"
|
|
|
|
|
|
|
|
|
|
t.Run("handles multi-byte characters without corruption", func(t *testing.T) {
|
feat(help): Implement full-text search (#294)
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with markdown bold highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Ensured all project files are correctly formatted.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections as specified.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Verified that `tokenize` is correctly defined and used within `pkg/help`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import and added `--repo` flag to `auto-merge` workflow.
2026-02-05 10:26:16 +00:00
|
|
|
snippet := extractSnippet(content, compileRegexes([]string{"japanese"}))
|
2026-02-02 00:07:32 +00:00
|
|
|
// Should not panic or produce invalid UTF-8
|
|
|
|
|
assert.True(t, len(snippet) > 0)
|
|
|
|
|
// Verify the result is valid UTF-8
|
|
|
|
|
assert.True(t, isValidUTF8(snippet), "Snippet should be valid UTF-8")
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("truncates multi-byte content safely", func(t *testing.T) {
|
|
|
|
|
// Long content that will be truncated
|
|
|
|
|
longContent := strings.Repeat("日本語", 100) // 300 characters
|
|
|
|
|
snippet := extractSnippet(longContent, nil)
|
|
|
|
|
assert.True(t, isValidUTF8(snippet), "Truncated snippet should be valid UTF-8")
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
feat(help): Implement full-text search (#294)
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with markdown bold highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Ensured all project files are correctly formatted.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections as specified.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized performance by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import in `internal/cmd/help/cmd.go`.
- Verified that `tokenize` is correctly defined and used within `pkg/help`.
* feat(help): implement full-text search with ranking and highlighting
- Implemented inverted index for help topics and sections.
- Added weighted scoring: Title (10.0), Section (5.0), Content (1.0).
- Implemented snippet extraction with robust markdown highlighting.
- Optimized search by pre-compiling regexes for match finding.
- Updated CLI help command to display matched sections and snippets with ANSI bold.
- Added comprehensive tests for search accuracy and highlighting.
- Fixed missing `strings` import and added `--repo` flag to `auto-merge` workflow.
2026-02-05 10:26:16 +00:00
|
|
|
// compileRegexes is a helper for tests.
|
|
|
|
|
func compileRegexes(words []string) []*regexp.Regexp {
|
|
|
|
|
var res []*regexp.Regexp
|
|
|
|
|
for _, w := range words {
|
|
|
|
|
if re, err := regexp.Compile("(?i)" + regexp.QuoteMeta(w)); err == nil {
|
|
|
|
|
res = append(res, re)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return res
|
|
|
|
|
}
|
|
|
|
|
|
2026-02-02 00:07:32 +00:00
|
|
|
// isValidUTF8 checks if a string is valid UTF-8
|
|
|
|
|
func isValidUTF8(s string) bool {
|
|
|
|
|
for i := 0; i < len(s); {
|
|
|
|
|
r, size := utf8.DecodeRuneInString(s[i:])
|
|
|
|
|
if r == utf8.RuneError && size == 1 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
i += size
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestCountMatches_Good(t *testing.T) {
|
|
|
|
|
tests := []struct {
|
|
|
|
|
text string
|
|
|
|
|
words []string
|
|
|
|
|
expected int
|
|
|
|
|
}{
|
|
|
|
|
{"Hello world", []string{"hello"}, 1},
|
|
|
|
|
{"Hello world", []string{"hello", "world"}, 2},
|
|
|
|
|
{"Hello world", []string{"foo", "bar"}, 0},
|
|
|
|
|
{"The quick brown fox", []string{"quick", "fox", "dog"}, 2},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, tt := range tests {
|
|
|
|
|
result := countMatches(tt.text, tt.words)
|
|
|
|
|
assert.Equal(t, tt.expected, result)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestSearchResult_Score_Good(t *testing.T) {
|
|
|
|
|
idx := newSearchIndex()
|
|
|
|
|
|
|
|
|
|
// Topic with query word in title should score higher
|
|
|
|
|
idx.Add(&Topic{
|
|
|
|
|
ID: "topic-in-title",
|
|
|
|
|
Title: "Installation Guide",
|
|
|
|
|
Content: "Some content here.",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
idx.Add(&Topic{
|
|
|
|
|
ID: "topic-in-content",
|
|
|
|
|
Title: "Some Other Topic",
|
|
|
|
|
Content: "This covers installation steps.",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
results := idx.Search("installation")
|
|
|
|
|
assert.Len(t, results, 2)
|
|
|
|
|
|
|
|
|
|
// Title match should score higher
|
|
|
|
|
assert.Equal(t, "topic-in-title", results[0].Topic.ID)
|
|
|
|
|
assert.Greater(t, results[0].Score, results[1].Score)
|
|
|
|
|
}
|