Add RAG (Retrieval Augmented Generation) tools for storing documentation in Qdrant vector database and querying with semantic search. This replaces the Python tools/rag implementation with a native Go solution. New commands: - core rag ingest [directory] - Ingest markdown files into Qdrant - core rag query [question] - Query vector database with semantic search - core rag collections - List and manage Qdrant collections Features: - Markdown chunking by sections and paragraphs with overlap - UTF-8 safe text handling for international content - Automatic category detection from file paths - Multiple output formats: text, JSON, LLM context injection - Environment variable support for host configuration Dependencies: - github.com/qdrant/go-client (gRPC client) - github.com/ollama/ollama/api (embeddings API) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
120 lines
2.8 KiB
Go
120 lines
2.8 KiB
Go
package rag
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
)
|
|
|
|
func TestChunkMarkdown_Good_SmallSection(t *testing.T) {
|
|
text := `# Title
|
|
|
|
This is a small section that fits in one chunk.
|
|
`
|
|
chunks := ChunkMarkdown(text, DefaultChunkConfig())
|
|
|
|
assert.Len(t, chunks, 1)
|
|
assert.Contains(t, chunks[0].Text, "small section")
|
|
}
|
|
|
|
func TestChunkMarkdown_Good_MultipleSections(t *testing.T) {
|
|
text := `# Main Title
|
|
|
|
Introduction paragraph.
|
|
|
|
## Section One
|
|
|
|
Content for section one.
|
|
|
|
## Section Two
|
|
|
|
Content for section two.
|
|
`
|
|
chunks := ChunkMarkdown(text, DefaultChunkConfig())
|
|
|
|
assert.GreaterOrEqual(t, len(chunks), 2)
|
|
}
|
|
|
|
func TestChunkMarkdown_Good_LargeSection(t *testing.T) {
|
|
// Create a section larger than chunk size
|
|
text := `## Large Section
|
|
|
|
` + repeatString("This is a test paragraph with some content. ", 50)
|
|
|
|
cfg := ChunkConfig{Size: 200, Overlap: 20}
|
|
chunks := ChunkMarkdown(text, cfg)
|
|
|
|
assert.Greater(t, len(chunks), 1)
|
|
for _, chunk := range chunks {
|
|
assert.NotEmpty(t, chunk.Text)
|
|
assert.Equal(t, "Large Section", chunk.Section)
|
|
}
|
|
}
|
|
|
|
func TestChunkMarkdown_Good_ExtractsTitle(t *testing.T) {
|
|
text := `## My Section Title
|
|
|
|
Some content here.
|
|
`
|
|
chunks := ChunkMarkdown(text, DefaultChunkConfig())
|
|
|
|
assert.Len(t, chunks, 1)
|
|
assert.Equal(t, "My Section Title", chunks[0].Section)
|
|
}
|
|
|
|
func TestCategory_Good_UIComponent(t *testing.T) {
|
|
tests := []struct {
|
|
path string
|
|
expected string
|
|
}{
|
|
{"docs/flux/button.md", "ui-component"},
|
|
{"ui/components/modal.md", "ui-component"},
|
|
{"brand/vi-personality.md", "brand"},
|
|
{"mascot/expressions.md", "brand"},
|
|
{"product-brief.md", "product-brief"},
|
|
{"tasks/2024-01-15-feature.md", "task"},
|
|
{"plans/architecture.md", "task"},
|
|
{"architecture/migration.md", "architecture"},
|
|
{"docs/api.md", "documentation"},
|
|
}
|
|
|
|
for _, tc := range tests {
|
|
t.Run(tc.path, func(t *testing.T) {
|
|
assert.Equal(t, tc.expected, Category(tc.path))
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestChunkID_Good_Deterministic(t *testing.T) {
|
|
id1 := ChunkID("test.md", 0, "hello world")
|
|
id2 := ChunkID("test.md", 0, "hello world")
|
|
|
|
assert.Equal(t, id1, id2)
|
|
}
|
|
|
|
func TestChunkID_Good_DifferentForDifferentInputs(t *testing.T) {
|
|
id1 := ChunkID("test.md", 0, "hello world")
|
|
id2 := ChunkID("test.md", 1, "hello world")
|
|
id3 := ChunkID("other.md", 0, "hello world")
|
|
|
|
assert.NotEqual(t, id1, id2)
|
|
assert.NotEqual(t, id1, id3)
|
|
}
|
|
|
|
func TestShouldProcess_Good_MarkdownFiles(t *testing.T) {
|
|
assert.True(t, ShouldProcess("doc.md"))
|
|
assert.True(t, ShouldProcess("doc.markdown"))
|
|
assert.True(t, ShouldProcess("doc.txt"))
|
|
assert.False(t, ShouldProcess("doc.go"))
|
|
assert.False(t, ShouldProcess("doc.py"))
|
|
assert.False(t, ShouldProcess("doc"))
|
|
}
|
|
|
|
// Helper function
|
|
func repeatString(s string, n int) string {
|
|
result := ""
|
|
for i := 0; i < n; i++ {
|
|
result += s
|
|
}
|
|
return result
|
|
}
|