LEM scoring pipeline, native MLX Metal bindings, Claude SDK wrapper, RAG with Qdrant/Ollama, unified AI facade, and MCP protocol server. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
120 lines
2.8 KiB
Go
120 lines
2.8 KiB
Go
package rag
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
)
|
|
|
|
func TestChunkMarkdown_Good_SmallSection(t *testing.T) {
|
|
text := `# Title
|
|
|
|
This is a small section that fits in one chunk.
|
|
`
|
|
chunks := ChunkMarkdown(text, DefaultChunkConfig())
|
|
|
|
assert.Len(t, chunks, 1)
|
|
assert.Contains(t, chunks[0].Text, "small section")
|
|
}
|
|
|
|
func TestChunkMarkdown_Good_MultipleSections(t *testing.T) {
|
|
text := `# Main Title
|
|
|
|
Introduction paragraph.
|
|
|
|
## Section One
|
|
|
|
Content for section one.
|
|
|
|
## Section Two
|
|
|
|
Content for section two.
|
|
`
|
|
chunks := ChunkMarkdown(text, DefaultChunkConfig())
|
|
|
|
assert.GreaterOrEqual(t, len(chunks), 2)
|
|
}
|
|
|
|
func TestChunkMarkdown_Good_LargeSection(t *testing.T) {
|
|
// Create a section larger than chunk size
|
|
text := `## Large Section
|
|
|
|
` + repeatString("This is a test paragraph with some content. ", 50)
|
|
|
|
cfg := ChunkConfig{Size: 200, Overlap: 20}
|
|
chunks := ChunkMarkdown(text, cfg)
|
|
|
|
assert.Greater(t, len(chunks), 1)
|
|
for _, chunk := range chunks {
|
|
assert.NotEmpty(t, chunk.Text)
|
|
assert.Equal(t, "Large Section", chunk.Section)
|
|
}
|
|
}
|
|
|
|
func TestChunkMarkdown_Good_ExtractsTitle(t *testing.T) {
|
|
text := `## My Section Title
|
|
|
|
Some content here.
|
|
`
|
|
chunks := ChunkMarkdown(text, DefaultChunkConfig())
|
|
|
|
assert.Len(t, chunks, 1)
|
|
assert.Equal(t, "My Section Title", chunks[0].Section)
|
|
}
|
|
|
|
func TestCategory_Good_UIComponent(t *testing.T) {
|
|
tests := []struct {
|
|
path string
|
|
expected string
|
|
}{
|
|
{"docs/flux/button.md", "ui-component"},
|
|
{"ui/components/modal.md", "ui-component"},
|
|
{"brand/vi-personality.md", "brand"},
|
|
{"mascot/expressions.md", "brand"},
|
|
{"product-brief.md", "product-brief"},
|
|
{"tasks/2024-01-15-feature.md", "task"},
|
|
{"plans/architecture.md", "task"},
|
|
{"architecture/migration.md", "architecture"},
|
|
{"docs/api.md", "documentation"},
|
|
}
|
|
|
|
for _, tc := range tests {
|
|
t.Run(tc.path, func(t *testing.T) {
|
|
assert.Equal(t, tc.expected, Category(tc.path))
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestChunkID_Good_Deterministic(t *testing.T) {
|
|
id1 := ChunkID("test.md", 0, "hello world")
|
|
id2 := ChunkID("test.md", 0, "hello world")
|
|
|
|
assert.Equal(t, id1, id2)
|
|
}
|
|
|
|
func TestChunkID_Good_DifferentForDifferentInputs(t *testing.T) {
|
|
id1 := ChunkID("test.md", 0, "hello world")
|
|
id2 := ChunkID("test.md", 1, "hello world")
|
|
id3 := ChunkID("other.md", 0, "hello world")
|
|
|
|
assert.NotEqual(t, id1, id2)
|
|
assert.NotEqual(t, id1, id3)
|
|
}
|
|
|
|
func TestShouldProcess_Good_MarkdownFiles(t *testing.T) {
|
|
assert.True(t, ShouldProcess("doc.md"))
|
|
assert.True(t, ShouldProcess("doc.markdown"))
|
|
assert.True(t, ShouldProcess("doc.txt"))
|
|
assert.False(t, ShouldProcess("doc.go"))
|
|
assert.False(t, ShouldProcess("doc.py"))
|
|
assert.False(t, ShouldProcess("doc"))
|
|
}
|
|
|
|
// Helper function
|
|
func repeatString(s string, n int) string {
|
|
result := ""
|
|
for i := 0; i < n; i++ {
|
|
result += s
|
|
}
|
|
return result
|
|
}
|