diff --git a/go.mod b/go.mod index 07b9aea..23bbf6c 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( dappco.re/go/process v0.8.0-alpha.1 dappco.re/go/store v0.8.0-alpha.1 dappco.re/go/ws v0.8.0-alpha.1 + forge.lthn.ai/Snider/Poindexter v0.0.0-20260223032814-5ab751f16d06 github.com/gin-gonic/gin v1.12.0 github.com/gorilla/websocket v1.5.3 github.com/modelcontextprotocol/go-sdk v1.5.0 @@ -137,3 +138,5 @@ require ( ) replace dappco.re/go/mcp => ../mcp + +replace forge.lthn.ai/Snider/Poindexter => ../../snider/Poindexter diff --git a/pkg/agentic/qa.go b/pkg/agentic/qa.go index ab00bfe..c417e0e 100644 --- a/pkg/agentic/qa.go +++ b/pkg/agentic/qa.go @@ -83,6 +83,7 @@ type DispatchReport struct { Workspace string `json:"workspace"` Commit string `json:"commit,omitempty"` Summary map[string]any `json:"summary"` + SummaryText string `json:"summary_text,omitempty"` Findings []QAFinding `json:"findings,omitempty"` Tools []QAToolRun `json:"tools,omitempty"` BuildPassed bool `json:"build_passed"` @@ -283,22 +284,12 @@ func (s *PrepSubsystem) runQAWithReport(ctx context.Context, workspaceDir string lintPassed := report.Summary.Errors == 0 workspaceName := WorkspaceName(workspaceDir) - previousCycles := readPreviousJournalCycles(storeInstance, workspaceName, persistentThreshold) - - dispatchReport := DispatchReport{ - Workspace: workspaceName, - Summary: workspace.Aggregate(), - Findings: report.Findings, - Tools: report.Tools, - BuildPassed: buildPassed, - TestPassed: testPassed, - LintPassed: lintPassed, - Passed: buildPassed && testPassed, - GeneratedAt: time.Now().UTC(), - Clusters: clusterFindings(report.Findings), - } - - dispatchReport.New, dispatchReport.Resolved, dispatchReport.Persistent = diffFindingsAgainstJournal(report.Findings, previousCycles) + dispatchReport := s.analyseWorkspaceNamed(workspace, workspaceName) + dispatchReport.BuildPassed = buildPassed + dispatchReport.TestPassed = testPassed + dispatchReport.LintPassed = lintPassed + dispatchReport.Passed = buildPassed && testPassed + dispatchReport.GeneratedAt = time.Now().UTC() writeDispatchReport(workspaceDir, dispatchReport) @@ -351,8 +342,13 @@ func publishDispatchReport(storeInstance *store.Store, workspaceName string, dis "test_passed": dispatchReport.TestPassed, "lint_passed": dispatchReport.LintPassed, "summary": dispatchReport.Summary, + "summary_text": dispatchReport.SummaryText, "findings": findings, "tools": tools, + "clusters": dispatchReport.Clusters, + "new": dispatchReport.New, + "resolved": dispatchReport.Resolved, + "persistent": dispatchReport.Persistent, "generated_at": dispatchReport.GeneratedAt.Format(time.RFC3339Nano), } tags := map[string]string{"workspace": workspaceName} @@ -565,61 +561,8 @@ func findingToMap(finding QAFinding) map[string]any { // // Usage example: `newList, resolvedList, persistentList := diffFindingsAgainstJournal(current, previous)` func diffFindingsAgainstJournal(current []QAFinding, previous [][]map[string]any) (newList, resolvedList, persistentList []map[string]any) { - if len(previous) == 0 { - return nil, nil, nil - } - - currentByKey := make(map[string]QAFinding, len(current)) - for _, finding := range current { - currentByKey[findingFingerprint(finding)] = finding - } - - lastCycle := previous[len(previous)-1] - lastCycleByKey := make(map[string]map[string]any, len(lastCycle)) - for _, entry := range lastCycle { - lastCycleByKey[findingFingerprintFromMap(entry)] = entry - } - - for key, finding := range currentByKey { - if _, ok := lastCycleByKey[key]; !ok { - newList = append(newList, findingToMap(finding)) - } - } - - for key, entry := range lastCycleByKey { - if _, ok := currentByKey[key]; !ok { - resolvedList = append(resolvedList, entry) - } - } - - // Persistent findings must appear in every one of the last - // `persistentThreshold` cycles AND in the current cycle. We slice from the - // tail so shorter histories still participate — as the journal grows past - // the threshold the list becomes stricter. - window := previous - if len(window) > persistentThreshold-1 { - window = window[len(window)-(persistentThreshold-1):] - } - if len(window) == persistentThreshold-1 { - counts := make(map[string]int, len(currentByKey)) - for _, cycle := range window { - seen := make(map[string]bool, len(cycle)) - for _, entry := range cycle { - key := findingFingerprintFromMap(entry) - if seen[key] { - continue - } - seen[key] = true - counts[key]++ - } - } - for key, finding := range currentByKey { - if counts[key] == len(window) { - persistentList = append(persistentList, findingToMap(finding)) - } - } - } - + newList, resolvedList = diffFindings(current, previous) + persistentList = persistentFindings(current, previous) return newList, resolvedList, persistentList } diff --git a/pkg/agentic/qa_analysis.go b/pkg/agentic/qa_analysis.go new file mode 100644 index 0000000..3d3dc18 --- /dev/null +++ b/pkg/agentic/qa_analysis.go @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: EUPL-1.2 + +package agentic + +import ( + "hash/fnv" + "maps" + "time" + + core "dappco.re/go/core" + store "dappco.re/go/store" + poindexter "forge.lthn.ai/Snider/Poindexter" +) + +const qaAnalysisClusterDistance = 0.15 + +type qaAnalysisPoint struct { + Index int + ToolID float64 + Severity float64 + FileHash float64 + Category float64 + Frequency float64 +} + +var qaAnalysisClusterer = qaAnalysisClusters + +// analyseWorkspace reads the buffered QA findings from the workspace DuckDB +// and returns the RFC §7 dispatch report. When called without the caller's +// original workspace name, the journal comparison falls back to the QA buffer +// name with the `qa-` prefix removed. +// +// Usage example: `report := s.analyseWorkspace(workspace)` +func (s *PrepSubsystem) analyseWorkspace(workspace *store.Workspace) DispatchReport { + return s.analyseWorkspaceNamed(workspace, qaAnalysisMeasurementName(qaAnalysisWorkspaceName(workspace))) +} + +func (s *PrepSubsystem) analyseWorkspaceNamed(workspace *store.Workspace, workspaceName string) DispatchReport { + report := DispatchReport{ + Workspace: core.Trim(workspaceName), + Summary: map[string]any{}, + GeneratedAt: time.Now().UTC(), + } + if report.Workspace == "" { + report.Workspace = qaAnalysisMeasurementName(qaAnalysisWorkspaceName(workspace)) + } + if workspace == nil { + report.SummaryText = qaAnalysisSummaryText(report) + return report + } + + report.Findings = qaAnalysisWorkspaceFindings(workspace) + report.Tools = qaAnalysisWorkspaceToolRuns(workspace) + report.Clusters = qaAnalysisSafeClusters(report.Findings) + + previousCycles := readPreviousJournalCycles(s.stateStoreInstance(), report.Workspace, persistentThreshold) + report.New, report.Resolved = diffFindings(report.Findings, previousCycles) + report.Persistent = persistentFindings(report.Findings, previousCycles) + report.Summary = qaAnalysisSummary(workspace.Aggregate(), report) + report.SummaryText = qaAnalysisSummaryText(report) + return report +} + +func qaAnalysisWorkspaceName(workspace *store.Workspace) string { + if workspace == nil { + return "" + } + return workspace.Name() +} + +func qaAnalysisMeasurementName(name string) string { + trimmed := core.Trim(name) + if core.HasPrefix(trimmed, "qa-") { + return core.TrimPrefix(trimmed, "qa-") + } + return trimmed +} + +func qaAnalysisWorkspaceFindings(workspace *store.Workspace) []QAFinding { + rows := qaAnalysisWorkspaceRows(workspace, "finding") + findings := make([]QAFinding, 0, len(rows)) + for _, row := range rows { + var finding QAFinding + if parseResult := core.JSONUnmarshalString(row, &finding); parseResult.OK { + findings = append(findings, finding) + } + } + return findings +} + +func qaAnalysisWorkspaceToolRuns(workspace *store.Workspace) []QAToolRun { + rows := qaAnalysisWorkspaceRows(workspace, "tool_run") + toolRuns := make([]QAToolRun, 0, len(rows)) + for _, row := range rows { + var toolRun QAToolRun + if parseResult := core.JSONUnmarshalString(row, &toolRun); parseResult.OK { + toolRuns = append(toolRuns, toolRun) + } + } + return toolRuns +} + +func qaAnalysisWorkspaceRows(workspace *store.Workspace, kind string) []string { + if workspace == nil || kind == "" { + return nil + } + + result := workspace.Query( + core.Sprintf( + "SELECT data FROM entries WHERE kind = '%s' ORDER BY id", + escapeJournalLiteral(kind), + ), + ) + if !result.OK || result.Value == nil { + return nil + } + + rows, ok := result.Value.([]map[string]any) + if !ok { + return nil + } + + values := make([]string, 0, len(rows)) + for _, row := range rows { + if payload := stringValue(row["data"]); payload != "" { + values = append(values, payload) + } + } + return values +} + +// findingToPoint projects a finding into the RFC §7 clustering dimensions. +// Frequency defaults to 1 for direct callers; the cluster builder supplies the +// observed per-fingerprint frequency for each point. +// +// Usage example: `coords := findingToPoint(QAFinding{Tool: "gosec", Severity: "error", File: "main.go", Category: "security"})` +func findingToPoint(finding QAFinding) []float64 { + return qaAnalysisPointCoords(finding, 1) +} + +func qaAnalysisPointCoords(finding QAFinding, frequency float64) []float64 { + return []float64{ + qaAnalysisHash(core.Lower(finding.Tool)), + qaAnalysisSeverityScore(finding.Severity), + qaAnalysisHash(core.Lower(finding.File)), + qaAnalysisHash(core.Lower(firstNonEmpty(finding.Category, finding.Code, finding.RuleID))), + frequency, + } +} + +func diffFindings(current []QAFinding, previous [][]map[string]any) (newList, resolvedList []map[string]any) { + if len(previous) == 0 { + return nil, nil + } + + currentByKey := make(map[string]QAFinding, len(current)) + for _, finding := range current { + currentByKey[findingFingerprint(finding)] = finding + } + + lastCycle := previous[len(previous)-1] + lastCycleByKey := make(map[string]map[string]any, len(lastCycle)) + for _, entry := range lastCycle { + lastCycleByKey[findingFingerprintFromMap(entry)] = entry + } + + for key, finding := range currentByKey { + if _, ok := lastCycleByKey[key]; !ok { + newList = append(newList, findingToMap(finding)) + } + } + + for key, entry := range lastCycleByKey { + if _, ok := currentByKey[key]; !ok { + resolvedList = append(resolvedList, entry) + } + } + + return newList, resolvedList +} + +func persistentFindings(current []QAFinding, previous [][]map[string]any) []map[string]any { + if len(previous) < persistentThreshold-1 || len(current) == 0 { + return nil + } + + currentByKey := make(map[string]QAFinding, len(current)) + for _, finding := range current { + currentByKey[findingFingerprint(finding)] = finding + } + + window := previous + if len(window) > persistentThreshold-1 { + window = window[len(window)-(persistentThreshold-1):] + } + + counts := make(map[string]int, len(currentByKey)) + for _, cycle := range window { + seen := make(map[string]bool, len(cycle)) + for _, entry := range cycle { + key := findingFingerprintFromMap(entry) + if seen[key] { + continue + } + seen[key] = true + counts[key]++ + } + } + + persistentList := make([]map[string]any, 0, len(currentByKey)) + for key, finding := range currentByKey { + if counts[key] == len(window) { + persistentList = append(persistentList, findingToMap(finding)) + } + } + if len(persistentList) == 0 { + return nil + } + return persistentList +} + +func qaAnalysisSafeClusters(findings []QAFinding) (clusters []DispatchCluster) { + if len(findings) == 0 { + return nil + } + + defer func() { + if recovered := recover(); recovered != nil { + core.Warn("agentic: Poindexter workspace analysis panicked", "reason", recovered) + clusters = clusterFindingsFallback(findings) + } + }() + + clusters = qaAnalysisClusterer(findings) + if len(clusters) > 0 { + return clusters + } + return clusterFindingsFallback(findings) +} + +func qaAnalysisClusters(findings []QAFinding) []DispatchCluster { + if len(findings) == 0 { + return nil + } + + frequencies := qaAnalysisFrequencies(findings) + items := make([]qaAnalysisPoint, len(findings)) + for index, finding := range findings { + coords := qaAnalysisPointCoords(finding, frequencies[findingFingerprint(finding)]) + items[index] = qaAnalysisPoint{ + Index: index, + ToolID: coords[0], + Severity: coords[1], + FileHash: coords[2], + Category: coords[3], + Frequency: coords[4], + } + } + + points, err := poindexter.BuildND(items, + func(item qaAnalysisPoint) string { return core.Sprintf("finding-%d", item.Index) }, + []func(qaAnalysisPoint) float64{ + func(item qaAnalysisPoint) float64 { return item.ToolID }, + func(item qaAnalysisPoint) float64 { return item.Severity }, + func(item qaAnalysisPoint) float64 { return item.FileHash }, + func(item qaAnalysisPoint) float64 { return item.Category }, + func(item qaAnalysisPoint) float64 { return item.Frequency }, + }, + []float64{1, 1, 1, 1, 1}, + []bool{false, false, false, false, false}, + ) + if err != nil || len(points) == 0 { + return clusterFindingsFallback(findings) + } + + union := qaAnalysisClusterByDistance(points, findings, qaAnalysisClusterDistance) + if union == nil { + return clusterFindingsFallback(findings) + } + return qaClusterDispatchClusters(findings, union) +} + +func qaAnalysisClusterByDistance(points []poindexter.KDPoint[qaAnalysisPoint], findings []QAFinding, distance float64) *qaClusterUnion { + tree, err := poindexter.NewKDTree(points, poindexter.WithMetric(poindexter.EuclideanDistance{})) + if err != nil { + return nil + } + + union := newQAClusterUnion(len(points)) + for _, point := range points { + neighbours, _ := tree.Radius(point.Coords, distance) + for _, neighbour := range neighbours { + leftIndex := point.Value.Index + rightIndex := neighbour.Value.Index + if leftIndex == rightIndex { + continue + } + if !qaAnalysisCompatible(findings[leftIndex], findings[rightIndex]) { + continue + } + union.Union(leftIndex, rightIndex) + } + } + return union +} + +func qaAnalysisCompatible(left, right QAFinding) bool { + if !qaClusterCompatible(left, right) { + return false + } + + leftCategory := firstNonEmpty(left.Category, left.Code, left.RuleID) + rightCategory := firstNonEmpty(right.Category, right.Code, right.RuleID) + if leftCategory != "" && rightCategory != "" && leftCategory != rightCategory { + return false + } + return true +} + +func qaAnalysisFrequencies(findings []QAFinding) map[string]float64 { + frequencies := make(map[string]float64, len(findings)) + for _, finding := range findings { + frequencies[findingFingerprint(finding)]++ + } + return frequencies +} + +func qaAnalysisSummary(base map[string]any, report DispatchReport) map[string]any { + summary := maps.Clone(base) + if summary == nil { + summary = map[string]any{} + } + summary["clusters"] = len(report.Clusters) + summary["new"] = len(report.New) + summary["resolved"] = len(report.Resolved) + summary["persistent"] = len(report.Persistent) + return summary +} + +func qaAnalysisSummaryText(report DispatchReport) string { + return core.Sprintf( + "%d findings across %d clusters; %d new, %d resolved, %d persistent", + len(report.Findings), + len(report.Clusters), + len(report.New), + len(report.Resolved), + len(report.Persistent), + ) +} + +func qaAnalysisSeverityScore(severity string) float64 { + switch core.Lower(core.Trim(severity)) { + case "critical", "error", "high": + return 1 + case "warning", "warn", "medium": + return 0.6 + case "info", "low": + return 0.3 + default: + return 0.1 + } +} + +func qaAnalysisHash(value string) float64 { + if core.Trim(value) == "" { + return 0 + } + hash := fnv.New32a() + _, _ = hash.Write([]byte(value)) + return float64(hash.Sum32()) +} diff --git a/pkg/agentic/qa_analysis_test.go b/pkg/agentic/qa_analysis_test.go new file mode 100644 index 0000000..e36e239 --- /dev/null +++ b/pkg/agentic/qa_analysis_test.go @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: EUPL-1.2 + +package agentic + +import ( + "testing" + "time" + + core "dappco.re/go/core" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAnalyseWorkspace_Good_EmptyFindings(t *testing.T) { + root := t.TempDir() + setTestWorkspace(t, root) + + subsystem := newPrepWithProcess() + t.Cleanup(subsystem.closeStateStore) + + workspaceDir := core.JoinPath(WorkspaceRoot(), "core", "go-io", "task-empty") + workspaceName := WorkspaceName(workspaceDir) + workspace, err := subsystem.stateStoreInstance().NewWorkspace(qaWorkspaceName(workspaceDir)) + require.NoError(t, err) + t.Cleanup(workspace.Discard) + + report := subsystem.analyseWorkspaceNamed(workspace, workspaceName) + + assert.Equal(t, workspaceName, report.Workspace) + assert.Empty(t, report.Findings) + assert.Empty(t, report.Clusters) + assert.Empty(t, report.New) + assert.Empty(t, report.Resolved) + assert.Empty(t, report.Persistent) + assert.Equal(t, 0, report.Summary["clusters"]) + assert.Equal(t, "0 findings across 0 clusters; 0 new, 0 resolved, 0 persistent", report.SummaryText) +} + +func TestAnalyseWorkspace_Good_FiveClusters(t *testing.T) { + root := t.TempDir() + setTestWorkspace(t, root) + + subsystem := newPrepWithProcess() + t.Cleanup(subsystem.closeStateStore) + + workspaceDir := core.JoinPath(WorkspaceRoot(), "core", "go-io", "task-five") + workspaceName := WorkspaceName(workspaceDir) + workspace, err := subsystem.stateStoreInstance().NewWorkspace(qaWorkspaceName(workspaceDir)) + require.NoError(t, err) + t.Cleanup(workspace.Discard) + + repeated := QAFinding{Tool: "gosec", Severity: "error", Category: "security-secret", Code: "G101", File: "secret.go", Line: 10, Message: "hardcoded secret"} + for cycle := 0; cycle < persistentThreshold-1; cycle++ { + publishDispatchReport(subsystem.stateStoreInstance(), workspaceName, DispatchReport{ + Workspace: workspaceName, + Findings: []QAFinding{repeated}, + GeneratedAt: time.Now().UTC(), + }) + } + + currentFindings := []QAFinding{ + repeated, + {Tool: "gosec", Severity: "error", Category: "security-path", Code: "G304", File: "path.go", Line: 20, Message: "tainted path"}, + {Tool: "staticcheck", Severity: "warning", Category: "correctness-regexp", Code: "SA1000", File: "regexp.go", Line: 30, Message: "invalid regexp"}, + {Tool: "govet", Severity: "warning", Category: "printf", Code: "printf", File: "printf.go", Line: 40, Message: "printf mismatch"}, + {Tool: "revive", Severity: "info", Category: "var-naming", Code: "var-naming", File: "style.go", Line: 50, Message: "bad variable name"}, + } + for _, finding := range currentFindings { + require.NoError(t, workspace.Put("finding", findingToMap(finding))) + } + + report := subsystem.analyseWorkspaceNamed(workspace, workspaceName) + + if assert.Len(t, report.Clusters, 5) { + for _, cluster := range report.Clusters { + assert.Equal(t, 1, cluster.Count) + } + } + assert.Len(t, report.New, 4) + assert.Empty(t, report.Resolved) + assert.Len(t, report.Persistent, 1) + assert.Equal(t, 5, report.Summary["clusters"]) + assert.Equal(t, 1, report.Summary["persistent"]) +} + +func TestAnalyseWorkspace_Bad_NilWorkspace(t *testing.T) { + var subsystem *PrepSubsystem + + assert.NotPanics(t, func() { + report := subsystem.analyseWorkspace(nil) + assert.Empty(t, report.Workspace) + assert.Empty(t, report.Findings) + assert.Empty(t, report.Clusters) + assert.Empty(t, report.New) + assert.Empty(t, report.Resolved) + assert.Empty(t, report.Persistent) + assert.Equal(t, "0 findings across 0 clusters; 0 new, 0 resolved, 0 persistent", report.SummaryText) + }) +} + +func TestAnalyseWorkspace_Ugly_PoindexterPanic(t *testing.T) { + root := t.TempDir() + setTestWorkspace(t, root) + + subsystem := newPrepWithProcess() + t.Cleanup(subsystem.closeStateStore) + + workspaceDir := core.JoinPath(WorkspaceRoot(), "core", "go-io", "task-panic") + workspaceName := WorkspaceName(workspaceDir) + workspace, err := subsystem.stateStoreInstance().NewWorkspace(qaWorkspaceName(workspaceDir)) + require.NoError(t, err) + t.Cleanup(workspace.Discard) + + require.NoError(t, workspace.Put("finding", findingToMap(QAFinding{ + Tool: "gosec", + Severity: "error", + Category: "security-secret", + Code: "G101", + File: "panic.go", + Line: 10, + Message: "hardcoded secret", + }))) + + previousClusterer := qaAnalysisClusterer + qaAnalysisClusterer = func([]QAFinding) []DispatchCluster { + panic("poindexter panic") + } + t.Cleanup(func() { qaAnalysisClusterer = previousClusterer }) + + assert.NotPanics(t, func() { + report := subsystem.analyseWorkspaceNamed(workspace, workspaceName) + if assert.Len(t, report.Clusters, 1) { + assert.Equal(t, 1, report.Clusters[0].Count) + } + assert.Equal(t, 1, report.Summary["clusters"]) + }) +}