* ci: consolidate duplicate workflows and merge CodeQL configs Remove 17 duplicate workflow files that were split copies of the combined originals. Each family (CI, CodeQL, Coverage, PR Build, Alpha Release) had the same job duplicated across separate push/pull_request/schedule/manual trigger files. Merge codeql.yml and codescan.yml into a single codeql.yml with a language matrix covering go, javascript-typescript, python, and actions — matching the previous default setup coverage. Remaining workflows (one per family): - ci.yml (push + PR + manual) - codeql.yml (push + PR + schedule, all languages) - coverage.yml (push + PR + manual) - alpha-release.yml (push + manual) - pr-build.yml (PR + manual) - release.yml (tag push) - agent-verify.yml, auto-label.yml, auto-project.yml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat: add collect, config, crypt, plugin packages and fix all lint issues Add four new infrastructure packages with CLI commands: - pkg/config: layered configuration (defaults → file → env → flags) - pkg/crypt: crypto primitives (Argon2id, AES-GCM, ChaCha20, HMAC, checksums) - pkg/plugin: plugin system with GitHub-based install/update/remove - pkg/collect: collection subsystem (GitHub, BitcoinTalk, market, papers, excavate) Fix all golangci-lint issues across the entire codebase (~100 errcheck, staticcheck SA1012/SA1019/ST1005, unused, ineffassign fixes) so that `core go qa` passes with 0 issues. Closes #167, #168, #170, #250, #251, #252, #253, #254, #255, #256 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
171 lines
4.2 KiB
Go
171 lines
4.2 KiB
Go
package rag
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/host-uk/core/pkg/cli"
|
|
"github.com/host-uk/core/pkg/i18n"
|
|
"github.com/host-uk/core/pkg/rag"
|
|
"github.com/spf13/cobra"
|
|
)
|
|
|
|
var (
|
|
collection string
|
|
recreate bool
|
|
chunkSize int
|
|
chunkOverlap int
|
|
)
|
|
|
|
var ingestCmd = &cobra.Command{
|
|
Use: "ingest [directory]",
|
|
Short: i18n.T("cmd.rag.ingest.short"),
|
|
Long: i18n.T("cmd.rag.ingest.long"),
|
|
Args: cobra.MaximumNArgs(1),
|
|
RunE: runIngest,
|
|
}
|
|
|
|
func runIngest(cmd *cobra.Command, args []string) error {
|
|
directory := "."
|
|
if len(args) > 0 {
|
|
directory = args[0]
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
// Connect to Qdrant
|
|
fmt.Printf("Connecting to Qdrant at %s:%d...\n", qdrantHost, qdrantPort)
|
|
qdrantClient, err := rag.NewQdrantClient(rag.QdrantConfig{
|
|
Host: qdrantHost,
|
|
Port: qdrantPort,
|
|
UseTLS: false,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to connect to Qdrant: %w", err)
|
|
}
|
|
defer func() { _ = qdrantClient.Close() }()
|
|
|
|
if err := qdrantClient.HealthCheck(ctx); err != nil {
|
|
return fmt.Errorf("qdrant health check failed: %w", err)
|
|
}
|
|
|
|
// Connect to Ollama
|
|
fmt.Printf("Using embedding model: %s (via %s:%d)\n", model, ollamaHost, ollamaPort)
|
|
ollamaClient, err := rag.NewOllamaClient(rag.OllamaConfig{
|
|
Host: ollamaHost,
|
|
Port: ollamaPort,
|
|
Model: model,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to connect to Ollama: %w", err)
|
|
}
|
|
|
|
if err := ollamaClient.VerifyModel(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Configure ingestion
|
|
if chunkSize <= 0 {
|
|
return fmt.Errorf("chunk-size must be > 0")
|
|
}
|
|
if chunkOverlap < 0 || chunkOverlap >= chunkSize {
|
|
return fmt.Errorf("chunk-overlap must be >= 0 and < chunk-size")
|
|
}
|
|
|
|
cfg := rag.IngestConfig{
|
|
Directory: directory,
|
|
Collection: collection,
|
|
Recreate: recreate,
|
|
Verbose: verbose,
|
|
BatchSize: 100,
|
|
Chunk: rag.ChunkConfig{
|
|
Size: chunkSize,
|
|
Overlap: chunkOverlap,
|
|
},
|
|
}
|
|
|
|
// Progress callback
|
|
progress := func(file string, chunks int, total int) {
|
|
if verbose {
|
|
fmt.Printf(" Processed: %s (%d chunks total)\n", file, chunks)
|
|
} else {
|
|
fmt.Printf("\r %s (%d chunks) ", cli.DimStyle.Render(file), chunks)
|
|
}
|
|
}
|
|
|
|
// Run ingestion
|
|
fmt.Printf("\nIngesting from: %s\n", directory)
|
|
if recreate {
|
|
fmt.Printf(" (recreating collection: %s)\n", collection)
|
|
}
|
|
|
|
stats, err := rag.Ingest(ctx, qdrantClient, ollamaClient, cfg, progress)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Summary
|
|
fmt.Printf("\n\n%s\n", cli.TitleStyle.Render("Ingestion complete!"))
|
|
fmt.Printf(" Files processed: %d\n", stats.Files)
|
|
fmt.Printf(" Chunks created: %d\n", stats.Chunks)
|
|
if stats.Errors > 0 {
|
|
fmt.Printf(" Errors: %s\n", cli.ErrorStyle.Render(fmt.Sprintf("%d", stats.Errors)))
|
|
}
|
|
fmt.Printf(" Collection: %s\n", collection)
|
|
|
|
return nil
|
|
}
|
|
|
|
// IngestDirectory is exported for use by other packages (e.g., MCP).
|
|
func IngestDirectory(ctx context.Context, directory, collectionName string, recreateCollection bool) error {
|
|
qdrantClient, err := rag.NewQdrantClient(rag.DefaultQdrantConfig())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() { _ = qdrantClient.Close() }()
|
|
|
|
if err := qdrantClient.HealthCheck(ctx); err != nil {
|
|
return fmt.Errorf("qdrant health check failed: %w", err)
|
|
}
|
|
|
|
ollamaClient, err := rag.NewOllamaClient(rag.DefaultOllamaConfig())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := ollamaClient.VerifyModel(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
cfg := rag.DefaultIngestConfig()
|
|
cfg.Directory = directory
|
|
cfg.Collection = collectionName
|
|
cfg.Recreate = recreateCollection
|
|
|
|
_, err = rag.Ingest(ctx, qdrantClient, ollamaClient, cfg, nil)
|
|
return err
|
|
}
|
|
|
|
// IngestFile is exported for use by other packages (e.g., MCP).
|
|
func IngestFile(ctx context.Context, filePath, collectionName string) (int, error) {
|
|
qdrantClient, err := rag.NewQdrantClient(rag.DefaultQdrantConfig())
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
defer func() { _ = qdrantClient.Close() }()
|
|
|
|
if err := qdrantClient.HealthCheck(ctx); err != nil {
|
|
return 0, fmt.Errorf("qdrant health check failed: %w", err)
|
|
}
|
|
|
|
ollamaClient, err := rag.NewOllamaClient(rag.DefaultOllamaConfig())
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
if err := ollamaClient.VerifyModel(ctx); err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
return rag.IngestFile(ctx, qdrantClient, ollamaClient, collectionName, filePath, rag.DefaultChunkConfig())
|
|
}
|