diff --git a/agentci/clotho.go b/agentci/clotho.go index ac70b04..52b8d88 100644 --- a/agentci/clotho.go +++ b/agentci/clotho.go @@ -5,6 +5,7 @@ package agentci import ( "context" strings "dappco.re/go/core/scm/internal/ax/stringsx" + "math" "dappco.re/go/core/scm/jobrunner" ) @@ -89,8 +90,61 @@ func (s *Spinner) FindByForgejoUser(forgejoUser string) (string, AgentConfig, bo } // Weave compares primary and verifier outputs. Returns true if they converge. -// This is a placeholder for future semantic diff logic. +// The comparison is a coarse token-overlap check controlled by the configured +// validation threshold. It is intentionally deterministic and fast; richer +// semantic diffing can replace it later without changing the signature. // Usage: Weave(...) func (s *Spinner) Weave(ctx context.Context, primaryOutput, signedOutput []byte) (bool, error) { - return string(primaryOutput) == string(signedOutput), nil + if ctx != nil { + select { + case <-ctx.Done(): + return false, ctx.Err() + default: + } + } + + primary := tokenizeWeaveOutput(primaryOutput) + signed := tokenizeWeaveOutput(signedOutput) + + if len(primary) == 0 && len(signed) == 0 { + return true, nil + } + + threshold := s.Config.ValidationThreshold + if threshold <= 0 || threshold > 1 { + threshold = 0.85 + } + + similarity := weaveDiceSimilarity(primary, signed) + return similarity >= threshold, nil +} + +func tokenizeWeaveOutput(output []byte) []string { + fields := strings.Fields(strings.ReplaceAll(string(output), "\r\n", "\n")) + if len(fields) == 0 { + return nil + } + return fields +} + +func weaveDiceSimilarity(primary, signed []string) float64 { + if len(primary) == 0 || len(signed) == 0 { + return 0 + } + + counts := make(map[string]int, len(primary)) + for _, token := range primary { + counts[token]++ + } + + common := 0 + for _, token := range signed { + if counts[token] == 0 { + continue + } + counts[token]-- + common++ + } + + return math.Min(1, (2*float64(common))/float64(len(primary)+len(signed))) } diff --git a/agentci/clotho_test.go b/agentci/clotho_test.go new file mode 100644 index 0000000..b2b9722 --- /dev/null +++ b/agentci/clotho_test.go @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: EUPL-1.2 + +package agentci + +import ( + "context" + "errors" + "testing" + + "dappco.re/go/core/scm/jobrunner" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSpinner_Weave_Good_ExactMatch(t *testing.T) { + spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.85}, nil) + + ok, err := spinner.Weave(context.Background(), []byte("alpha beta gamma"), []byte("alpha beta gamma")) + require.NoError(t, err) + assert.True(t, ok) +} + +func TestSpinner_Weave_Good_ThresholdMatch(t *testing.T) { + spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.8}, nil) + + ok, err := spinner.Weave( + context.Background(), + []byte("alpha beta gamma delta epsilon zeta"), + []byte("alpha beta gamma delta epsilon eta"), + ) + require.NoError(t, err) + assert.True(t, ok) +} + +func TestSpinner_Weave_Bad_ThresholdMismatch(t *testing.T) { + spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.9}, nil) + + ok, err := spinner.Weave( + context.Background(), + []byte("alpha beta gamma delta epsilon zeta"), + []byte("alpha beta gamma delta epsilon eta"), + ) + require.NoError(t, err) + assert.False(t, ok) +} + +func TestSpinner_Weave_Good_EmptyOutputs(t *testing.T) { + spinner := NewSpinner(ClothoConfig{}, nil) + + ok, err := spinner.Weave(context.Background(), nil, nil) + require.NoError(t, err) + assert.True(t, ok) +} + +func TestSpinner_Weave_Bad_ContextCancelled(t *testing.T) { + spinner := NewSpinner(ClothoConfig{}, nil) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + ok, err := spinner.Weave(ctx, []byte("alpha"), []byte("alpha")) + assert.False(t, ok) + require.Error(t, err) + assert.True(t, errors.Is(err, context.Canceled)) +} + +func TestSpinner_DeterminePlan_Good(t *testing.T) { + spinner := NewSpinner(ClothoConfig{Strategy: "clotho-verified"}, map[string]AgentConfig{ + "charon": {DualRun: true}, + }) + + ok := spinner.DeterminePlan(&jobrunner.PipelineSignal{RepoName: "docs"}, "charon") + assert.Equal(t, ModeDual, ok) +} diff --git a/docs/architecture.md b/docs/architecture.md index 509826b..70ab3e6 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -350,7 +350,7 @@ agentci: 3. If the repository name is `core` or contains `security`, dual (Axiom 1: critical repos always verified). 4. Otherwise, standard. -In dual-run mode, `DispatchHandler` populates `DispatchTicket.VerifyModel` and `DispatchTicket.DualRun=true`. The `Weave` method compares primary and verifier outputs for convergence (currently byte-equal; semantic diff reserved for a future phase). +In dual-run mode, `DispatchHandler` populates `DispatchTicket.VerifyModel` and `DispatchTicket.DualRun=true`. The `Weave` method compares primary and verifier outputs for convergence using a deterministic token-overlap score against `validation_threshold`; richer semantic diffing remains a future phase. ### Dispatch Ticket Transfer diff --git a/docs/history.md b/docs/history.md index 13f64cd..0fe8e6f 100644 --- a/docs/history.md +++ b/docs/history.md @@ -122,9 +122,9 @@ Full signal-to-result flow tested for all five handlers via a mock Forgejo serve The Forgejo SDK v2 and Gitea SDK do not accept `context.Context`. All Forgejo/Gitea API calls are blocking with no cancellation path. When the SDK is updated to support context (v3 or later), a follow-up task should thread `ctx` through all forge/ and gitea/ wrapper signatures. -**Clotho Weave — byte-equal only** +**Clotho Weave — thresholded token overlap** -`Spinner.Weave(ctx, primary, signed)` currently returns `string(primaryOutput) == string(signedOutput)`. This is a placeholder. Meaningful dual-run verification requires semantic diff logic (e.g., normalised AST comparison, embedding cosine similarity, or LLM-assisted diffing). The interface signature is stable; the implementation is not production-ready for divergent outputs. +`Spinner.Weave(ctx, primary, signed)` now uses the configured `validation_threshold` to decide convergence from a deterministic token-overlap score. This is still a lightweight approximation rather than full semantic diffing, but it now honours the config knob already exposed by `ClothoConfig`. **collect/ HTTP collectors — no retry**