cli/tools/rag/query.py

#!/usr/bin/env python3
"""
RAG Query Tool for Host UK Documentation

Query the vector database and retrieve relevant documentation chunks.

Usage:
    python query.py "how do I create a Flux button"
    python query.py "what is Vi's personality" --collection hostuk-docs
    python query.py "path sandboxing" --top 10 --category architecture

Requirements:
    pip install qdrant-client ollama
"""

import argparse
import html
import json
import os
import sys
from typing import Optional

try:
    from qdrant_client import QdrantClient
    from qdrant_client.models import Filter, FieldCondition, MatchValue
    import ollama
except ImportError:
    print("Install dependencies: pip install qdrant-client ollama")
    sys.exit(1)


# Configuration
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")


def generate_embedding(text: str) -> list[float]:
    """Generate embedding using Ollama."""
    response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)
    return response["embedding"]


def query_rag(
    query: str,
    client: QdrantClient,
    collection: str,
    top_k: int = 5,
    category: Optional[str] = None,
    score_threshold: float = 0.5,
) -> list[dict]:
    """Query the RAG database and return relevant chunks."""

    # Generate query embedding
    query_embedding = generate_embedding(query)

    # Build filter if category specified
    query_filter = None
    if category:
        query_filter = Filter(
            must=[
                FieldCondition(key="category", match=MatchValue(value=category))
            ]
        )

    # Search
    results = client.query_points(
        collection_name=collection,
        query=query_embedding,
        query_filter=query_filter,
        limit=top_k,
        score_threshold=score_threshold,
    ).points

    return [
        {
            "score": hit.score,
            "text": hit.payload["text"],
            "source": hit.payload["source"],
            "section": hit.payload.get("section", ""),
            "category": hit.payload.get("category", ""),
        }
        for hit in results
    ]


def format_results(results: list[dict], query: str, format: str = "text") -> str:
    """Format results for display."""

    if format == "json":
        return json.dumps(results, indent=2)

    if not results:
        return f"No results found for: {query}"

    output = []
    output.append(f"Query: {query}")
    output.append(f"Results: {len(results)}")
    output.append("=" * 60)

    for i, r in enumerate(results, 1):
        output.append(f"\n[{i}] {r['source']} (score: {r['score']:.3f})")
        if r['section']:
            output.append(f"    Section: {r['section']}")
        output.append(f"    Category: {r['category']}")
        output.append("-" * 40)
        # Truncate long text for display
        text = r['text']
        if len(text) > 500:
            text = text[:500] + "..."
        output.append(text)
        output.append("")

    return "\n".join(output)


def format_for_context(results: list[dict], query: str) -> str:
    """Format results as context for LLM injection."""

    if not results:
        return ""

    output = []
    output.append(f'<retrieved_context query="{html.escape(query)}">')

    for r in results:
        output.append(f'\n<document source="{html.escape(r["source"])}" category="{html.escape(r["category"])}">')
        output.append(html.escape(r['text']))
        output.append("</document>")

    output.append("\n</retrieved_context>")

    return "\n".join(output)

def main():
    parser = argparse.ArgumentParser(description="Query RAG documentation")
    parser.add_argument("query", nargs="?", help="Search query")
    parser.add_argument("--collection", default="hostuk-docs", help="Qdrant collection name")
    parser.add_argument("--top", "-k", type=int, default=5, help="Number of results")
    parser.add_argument("--category", "-c", help="Filter by category")
    parser.add_argument("--threshold", "-t", type=float, default=0.5, help="Score threshold")
    parser.add_argument("--format", "-f", choices=["text", "json", "context"], default="text")
    parser.add_argument("--qdrant-host", default=QDRANT_HOST)
    parser.add_argument("--qdrant-port", type=int, default=QDRANT_PORT)
    parser.add_argument("--list-collections", action="store_true", help="List available collections")
    parser.add_argument("--stats", action="store_true", help="Show collection stats")

    args = parser.parse_args()

    # Connect to Qdrant
    client = QdrantClient(host=args.qdrant_host, port=args.qdrant_port)

    # List collections
    if args.list_collections:
        collections = client.get_collections().collections
        print("Available collections:")
        for c in collections:
            info = client.get_collection(c.name)
            print(f"  - {c.name}: {info.points_count} vectors")
        return

    # Show stats
    if args.stats:
        try:
            info = client.get_collection(args.collection)
            print(f"Collection: {args.collection}")
            print(f"  Vectors: {info.points_count}")
            print(f"  Status: {info.status}")
        except Exception as e:
            print(f"Collection not found: {args.collection}")
        return

    # Query required
    if not args.query:
        parser.print_help()
        return

    # Execute query
    results = query_rag(
        query=args.query,
        client=client,
        collection=args.collection,
        top_k=args.top,
        category=args.category,
        score_threshold=args.threshold,
    )

    # Format output
    if args.format == "context":
        print(format_for_context(results, args.query))
    else:
        print(format_results(results, args.query, args.format))


if __name__ == "__main__":
    main()
feat: wire release command, add tar.xz support, unified installers (#277) * feat(cli): wire release command and add installer scripts - Wire up `core build release` subcommand (was orphaned) - Wire up `core monitor` command (missing import in full variant) - Add installer scripts for Unix (.sh) and Windows (.bat) - setup: Interactive with variant selection - ci: Minimal for CI/CD environments - dev: Full development variant - go/php/agent: Targeted development variants - All scripts include security hardening: - Secure temp directories (mktemp -d) - Architecture validation - Version validation after GitHub API call - Proper cleanup on exit - PowerShell PATH updates on Windows (avoids setx truncation) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(build): add tar.xz support and unified installer scripts - Add tar.xz archive support using Borg's compress package - ArchiveXZ() and ArchiveWithFormat() for configurable compression - Better compression ratio than gzip for release artifacts - Consolidate 12 installer scripts into 2 unified scripts - install.sh and install.bat with BunnyCDN edge variable support - Subdomains: setup.core.help, ci.core.help, dev.core.help, etc. - MODE and VARIANT transformed at edge based on subdomain - Installers prefer tar.xz with automatic fallback to tar.gz - Fixed CodeRabbit issues: HTTP status patterns, tar error handling, verify_install params, VARIANT validation, CI PATH persistence Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * chore: add build and release config files - .core/build.yaml - cross-platform build configuration - .core/release.yaml - release workflow configuration Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * chore: move plans from docs/ to tasks/ Consolidate planning documents in tasks/plans/ directory. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(install): address CodeRabbit review feedback - Add curl timeout (--max-time) to prevent hanging on slow networks - Rename TMPDIR to WORK_DIR to avoid clobbering system env var - Add chmod +x to ensure binary has execute permissions - Add error propagation after subroutine calls in batch file - Remove System32 install attempt in CI mode (use consistent INSTALL_DIR) - Fix HTTP status regex for HTTP/2 compatibility Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(rag): add Go RAG implementation with Qdrant + Ollama Add RAG (Retrieval Augmented Generation) tools for storing documentation in Qdrant vector database and querying with semantic search. This replaces the Python tools/rag implementation with a native Go solution. New commands: - core rag ingest [directory] - Ingest markdown files into Qdrant - core rag query [question] - Query vector database with semantic search - core rag collections - List and manage Qdrant collections Features: - Markdown chunking by sections and paragraphs with overlap - UTF-8 safe text handling for international content - Automatic category detection from file paths - Multiple output formats: text, JSON, LLM context injection - Environment variable support for host configuration Dependencies: - github.com/qdrant/go-client (gRPC client) - github.com/ollama/ollama/api (embeddings API) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(deploy): add pure-Go Ansible executor and Coolify API integration Implement infrastructure deployment system with: - pkg/ansible: Pure Go Ansible executor - Playbook/inventory parsing (types.go, parser.go) - Full execution engine with variable templating, loops, blocks, conditionals, handlers, and fact gathering (executor.go) - SSH client with key/password auth and privilege escalation (ssh.go) - 35+ module implementations: shell, command, copy, template, file, apt, service, systemd, user, group, git, docker_compose, etc. (modules.go) - pkg/deploy/coolify: Coolify API client wrapping Python swagger client - List/get servers, projects, applications, databases, services - Generic Call() for any OpenAPI operation - pkg/deploy/python: Embedded Python runtime for swagger client integration - internal/cmd/deploy: CLI commands - core deploy servers/projects/apps/databases/services/team - core deploy call <operation> [params-json] This enables Docker-free infrastructure deployment with Ansible-compatible playbooks executed natively in Go. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(deploy): address linter warnings and build errors - Fix fmt.Sprintf format verb error in ssh.go (remove unused stat command) - Fix errcheck warnings by explicitly ignoring best-effort operations - Fix ineffassign warning in cmd_ansible.go All golangci-lint checks now pass for deploy packages. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * style(deploy): fix gofmt formatting Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(deploy): use known_hosts for SSH host key verification Address CodeQL security alert by using the user's known_hosts file for SSH host key verification when available. Falls back to accepting any key only when known_hosts doesn't exist (common in containerized or ephemeral environments). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(ai,security,ide): add agentic MVP, security jobs, and Core IDE desktop app Wire up AI infrastructure with unified pkg/ai package (metrics JSONL, RAG integration), move RAG under `core ai rag`, add `core ai metrics` command, and enrich task context with Qdrant documentation. Add `--target` flag to all security commands for external repo scanning, `core security jobs` for distributing findings as GitHub Issues, and consistent error logging across scan/deps/alerts/secrets commands. Add Core IDE Wails v3 desktop app with Angular 20 frontend, MCP bridge (loopback-only HTTP server), WebSocket hub, and Claude Code bridge. Production-ready with Lethean CIC branding, macOS code signing support, and security hardening (origin validation, body size limits, URL scheme checks, memory leak prevention, XSS mitigation). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: address PR review comments from CodeRabbit, Copilot, and Gemini Fixes across 25 files addressing 46+ review comments: - pkg/ai/metrics.go: handle error from Close() on writable file handle - pkg/ansible: restore loop vars after loop, restore become settings, fix Upload with become=true and no password (use sudo -n), honour SSH timeout config, use E() helper for contextual errors, quote git refs in checkout commands - pkg/rag: validate chunk config, guard negative-to-uint64 conversion, use E() helper for errors, add context timeout to Ollama HTTP calls - pkg/deploy/python: fix exec.ExitError type assertion (was os.PathError), handle os.UserHomeDir() error - pkg/build/buildcmd: use cmd.Context() instead of context.Background() for proper Ctrl+C cancellation - install.bat: add curl timeouts, CRLF line endings, use --connect-timeout for archive downloads - install.sh: use absolute path for version check in CI mode - tools/rag: fix broken ingest.py function def, escape HTML in query.py, pin qdrant-client version, add markdown code block languages - internal/cmd/rag: add chunk size validation, env override handling Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(build): make release dry-run by default and remove darwin/amd64 target Replace --dry-run (default false) with --we-are-go-for-launch (default false) so `core build release` is safe by default. Remove darwin/amd64 from default build targets (arm64 only for macOS). Fix cmd_project.go to use command context instead of context.Background(). Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> 2026-02-04 00:49:57 +00:00			`#!/usr/bin/env python3`
			`"""`
			`RAG Query Tool for Host UK Documentation`

			`Query the vector database and retrieve relevant documentation chunks.`

			`Usage:`
			`python query.py "how do I create a Flux button"`
			`python query.py "what is Vi's personality" --collection hostuk-docs`
			`python query.py "path sandboxing" --top 10 --category architecture`

			`Requirements:`
			`pip install qdrant-client ollama`
			`"""`

			`import argparse`
			`import html`
			`import json`
			`import os`
			`import sys`
			`from typing import Optional`

			`try:`
			`from qdrant_client import QdrantClient`
			`from qdrant_client.models import Filter, FieldCondition, MatchValue`
			`import ollama`
			`except ImportError:`
			`print("Install dependencies: pip install qdrant-client ollama")`
			`sys.exit(1)`


			`# Configuration`
			`QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")`
			`QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))`
			`EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "nomic-embed-text")`


			`def generate_embedding(text: str) -> list[float]:`
			`"""Generate embedding using Ollama."""`
			`response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)`
			`return response["embedding"]`


			`def query_rag(`
			`query: str,`
			`client: QdrantClient,`
			`collection: str,`
			`top_k: int = 5,`
			`category: Optional[str] = None,`
			`score_threshold: float = 0.5,`
			`) -> list[dict]:`
			`"""Query the RAG database and return relevant chunks."""`

			`# Generate query embedding`
			`query_embedding = generate_embedding(query)`

			`# Build filter if category specified`
			`query_filter = None`
			`if category:`
			`query_filter = Filter(`
			`must=[`
			`FieldCondition(key="category", match=MatchValue(value=category))`
			`]`
			`)`

			`# Search`
			`results = client.query_points(`
			`collection_name=collection,`
			`query=query_embedding,`
			`query_filter=query_filter,`
			`limit=top_k,`
			`score_threshold=score_threshold,`
			`).points`

			`return [`
			`{`
			`"score": hit.score,`
			`"text": hit.payload["text"],`
			`"source": hit.payload["source"],`
			`"section": hit.payload.get("section", ""),`
			`"category": hit.payload.get("category", ""),`
			`}`
			`for hit in results`
			`]`


			`def format_results(results: list[dict], query: str, format: str = "text") -> str:`
			`"""Format results for display."""`

			`if format == "json":`
			`return json.dumps(results, indent=2)`

			`if not results:`
			`return f"No results found for: {query}"`

			`output = []`
			`output.append(f"Query: {query}")`
			`output.append(f"Results: {len(results)}")`
			`output.append("=" * 60)`

			`for i, r in enumerate(results, 1):`
			`output.append(f"\n[{i}] {r['source']} (score: {r['score']:.3f})")`
			`if r['section']:`
			`output.append(f" Section: {r['section']}")`
			`output.append(f" Category: {r['category']}")`
			`output.append("-" * 40)`
			`# Truncate long text for display`
			`text = r['text']`
			`if len(text) > 500:`
			`text = text[:500] + "..."`
			`output.append(text)`
			`output.append("")`

			`return "\n".join(output)`


			`def format_for_context(results: list[dict], query: str) -> str:`
			`"""Format results as context for LLM injection."""`

			`if not results:`
			`return ""`

			`output = []`
			`output.append(f'<retrieved_context query="{html.escape(query)}">')`

			`for r in results:`
			`output.append(f'\n<document source="{html.escape(r["source"])}" category="{html.escape(r["category"])}">')`
			`output.append(html.escape(r['text']))`
			`output.append("</document>")`

			`output.append("\n</retrieved_context>")`

			`return "\n".join(output)`

			`def main():`
			`parser = argparse.ArgumentParser(description="Query RAG documentation")`
			`parser.add_argument("query", nargs="?", help="Search query")`
			`parser.add_argument("--collection", default="hostuk-docs", help="Qdrant collection name")`
			`parser.add_argument("--top", "-k", type=int, default=5, help="Number of results")`
			`parser.add_argument("--category", "-c", help="Filter by category")`
			`parser.add_argument("--threshold", "-t", type=float, default=0.5, help="Score threshold")`
			`parser.add_argument("--format", "-f", choices=["text", "json", "context"], default="text")`
			`parser.add_argument("--qdrant-host", default=QDRANT_HOST)`
			`parser.add_argument("--qdrant-port", type=int, default=QDRANT_PORT)`
			`parser.add_argument("--list-collections", action="store_true", help="List available collections")`
			`parser.add_argument("--stats", action="store_true", help="Show collection stats")`

			`args = parser.parse_args()`

			`# Connect to Qdrant`
			`client = QdrantClient(host=args.qdrant_host, port=args.qdrant_port)`

			`# List collections`
			`if args.list_collections:`
			`collections = client.get_collections().collections`
			`print("Available collections:")`
			`for c in collections:`
			`info = client.get_collection(c.name)`
			`print(f" - {c.name}: {info.points_count} vectors")`
			`return`

			`# Show stats`
			`if args.stats:`
			`try:`
			`info = client.get_collection(args.collection)`
			`print(f"Collection: {args.collection}")`
			`print(f" Vectors: {info.points_count}")`
			`print(f" Status: {info.status}")`
			`except Exception as e:`
			`print(f"Collection not found: {args.collection}")`
			`return`

			`# Query required`
			`if not args.query:`
			`parser.print_help()`
			`return`

			`# Execute query`
			`results = query_rag(`
			`query=args.query,`
			`client=client,`
			`collection=args.collection,`
			`top_k=args.top,`
			`category=args.category,`
			`score_threshold=args.threshold,`
			`)`

			`# Format output`
			`if args.format == "context":`
			`print(format_for_context(results, args.query))`
			`else:`
			`print(format_results(results, args.query, args.format))`


			`if __name__ == "__main__":`
			`main()`