- Change module from forge.lthn.ai/core/go to forge.lthn.ai/core/cli - Remove pkg/ directory (now served from core/go) - Add require + replace for forge.lthn.ai/core/go => ../go - Update go.work to include ../go workspace module - Fix all internal/cmd/* imports: pkg/ refs → forge.lthn.ai/core/go/pkg/ - Rename internal/cmd/sdk package to sdkcmd (avoids conflict with pkg/sdk) - Remove SDK library files from internal/cmd/sdk/ (now in core/go/pkg/sdk/) - Remove duplicate RAG helper functions from internal/cmd/rag/ - Remove stale cmd/core-ide/ (now in core/ide repo) - Update IDE variant to remove core-ide import - Fix test assertion for new module name - Run go mod tidy to sync dependencies core/cli is now a pure CLI application importing core/go for packages. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Claude <developers@lethean.io> Reviewed-on: #1
117 lines
2.7 KiB
Go
117 lines
2.7 KiB
Go
package rag
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"forge.lthn.ai/core/go/pkg/cli"
|
|
"forge.lthn.ai/core/go/pkg/i18n"
|
|
"forge.lthn.ai/core/go/pkg/rag"
|
|
"github.com/spf13/cobra"
|
|
)
|
|
|
|
var (
|
|
collection string
|
|
recreate bool
|
|
chunkSize int
|
|
chunkOverlap int
|
|
)
|
|
|
|
var ingestCmd = &cobra.Command{
|
|
Use: "ingest [directory]",
|
|
Short: i18n.T("cmd.rag.ingest.short"),
|
|
Long: i18n.T("cmd.rag.ingest.long"),
|
|
Args: cobra.MaximumNArgs(1),
|
|
RunE: runIngest,
|
|
}
|
|
|
|
func runIngest(cmd *cobra.Command, args []string) error {
|
|
directory := "."
|
|
if len(args) > 0 {
|
|
directory = args[0]
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
// Connect to Qdrant
|
|
fmt.Printf("Connecting to Qdrant at %s:%d...\n", qdrantHost, qdrantPort)
|
|
qdrantClient, err := rag.NewQdrantClient(rag.QdrantConfig{
|
|
Host: qdrantHost,
|
|
Port: qdrantPort,
|
|
UseTLS: false,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to connect to Qdrant: %w", err)
|
|
}
|
|
defer func() { _ = qdrantClient.Close() }()
|
|
|
|
if err := qdrantClient.HealthCheck(ctx); err != nil {
|
|
return fmt.Errorf("qdrant health check failed: %w", err)
|
|
}
|
|
|
|
// Connect to Ollama
|
|
fmt.Printf("Using embedding model: %s (via %s:%d)\n", model, ollamaHost, ollamaPort)
|
|
ollamaClient, err := rag.NewOllamaClient(rag.OllamaConfig{
|
|
Host: ollamaHost,
|
|
Port: ollamaPort,
|
|
Model: model,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to connect to Ollama: %w", err)
|
|
}
|
|
|
|
if err := ollamaClient.VerifyModel(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Configure ingestion
|
|
if chunkSize <= 0 {
|
|
return fmt.Errorf("chunk-size must be > 0")
|
|
}
|
|
if chunkOverlap < 0 || chunkOverlap >= chunkSize {
|
|
return fmt.Errorf("chunk-overlap must be >= 0 and < chunk-size")
|
|
}
|
|
|
|
cfg := rag.IngestConfig{
|
|
Directory: directory,
|
|
Collection: collection,
|
|
Recreate: recreate,
|
|
Verbose: verbose,
|
|
BatchSize: 100,
|
|
Chunk: rag.ChunkConfig{
|
|
Size: chunkSize,
|
|
Overlap: chunkOverlap,
|
|
},
|
|
}
|
|
|
|
// Progress callback
|
|
progress := func(file string, chunks int, total int) {
|
|
if verbose {
|
|
fmt.Printf(" Processed: %s (%d chunks total)\n", file, chunks)
|
|
} else {
|
|
fmt.Printf("\r %s (%d chunks) ", cli.DimStyle.Render(file), chunks)
|
|
}
|
|
}
|
|
|
|
// Run ingestion
|
|
fmt.Printf("\nIngesting from: %s\n", directory)
|
|
if recreate {
|
|
fmt.Printf(" (recreating collection: %s)\n", collection)
|
|
}
|
|
|
|
stats, err := rag.Ingest(ctx, qdrantClient, ollamaClient, cfg, progress)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Summary
|
|
fmt.Printf("\n\n%s\n", cli.TitleStyle.Render("Ingestion complete!"))
|
|
fmt.Printf(" Files processed: %d\n", stats.Files)
|
|
fmt.Printf(" Chunks created: %d\n", stats.Chunks)
|
|
if stats.Errors > 0 {
|
|
fmt.Printf(" Errors: %s\n", cli.ErrorStyle.Render(fmt.Sprintf("%d", stats.Errors)))
|
|
}
|
|
fmt.Printf(" Collection: %s\n", collection)
|
|
|
|
return nil
|
|
}
|