feat(agentci): honour validation threshold in weave
Some checks failed
Security Scan / security (push) Failing after 13s
Test / test (push) Successful in 2m31s

Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
Virgil 2026-04-02 13:26:35 +00:00
parent c303abbd95
commit b65ec9f052
4 changed files with 132 additions and 5 deletions

View file

@ -5,6 +5,7 @@ package agentci
import ( import (
"context" "context"
strings "dappco.re/go/core/scm/internal/ax/stringsx" strings "dappco.re/go/core/scm/internal/ax/stringsx"
"math"
"dappco.re/go/core/scm/jobrunner" "dappco.re/go/core/scm/jobrunner"
) )
@ -89,8 +90,61 @@ func (s *Spinner) FindByForgejoUser(forgejoUser string) (string, AgentConfig, bo
} }
// Weave compares primary and verifier outputs. Returns true if they converge. // Weave compares primary and verifier outputs. Returns true if they converge.
// This is a placeholder for future semantic diff logic. // The comparison is a coarse token-overlap check controlled by the configured
// validation threshold. It is intentionally deterministic and fast; richer
// semantic diffing can replace it later without changing the signature.
// Usage: Weave(...) // Usage: Weave(...)
func (s *Spinner) Weave(ctx context.Context, primaryOutput, signedOutput []byte) (bool, error) { func (s *Spinner) Weave(ctx context.Context, primaryOutput, signedOutput []byte) (bool, error) {
return string(primaryOutput) == string(signedOutput), nil if ctx != nil {
select {
case <-ctx.Done():
return false, ctx.Err()
default:
}
}
primary := tokenizeWeaveOutput(primaryOutput)
signed := tokenizeWeaveOutput(signedOutput)
if len(primary) == 0 && len(signed) == 0 {
return true, nil
}
threshold := s.Config.ValidationThreshold
if threshold <= 0 || threshold > 1 {
threshold = 0.85
}
similarity := weaveDiceSimilarity(primary, signed)
return similarity >= threshold, nil
}
func tokenizeWeaveOutput(output []byte) []string {
fields := strings.Fields(strings.ReplaceAll(string(output), "\r\n", "\n"))
if len(fields) == 0 {
return nil
}
return fields
}
func weaveDiceSimilarity(primary, signed []string) float64 {
if len(primary) == 0 || len(signed) == 0 {
return 0
}
counts := make(map[string]int, len(primary))
for _, token := range primary {
counts[token]++
}
common := 0
for _, token := range signed {
if counts[token] == 0 {
continue
}
counts[token]--
common++
}
return math.Min(1, (2*float64(common))/float64(len(primary)+len(signed)))
} }

73
agentci/clotho_test.go Normal file
View file

@ -0,0 +1,73 @@
// SPDX-License-Identifier: EUPL-1.2
package agentci
import (
"context"
"errors"
"testing"
"dappco.re/go/core/scm/jobrunner"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSpinner_Weave_Good_ExactMatch(t *testing.T) {
spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.85}, nil)
ok, err := spinner.Weave(context.Background(), []byte("alpha beta gamma"), []byte("alpha beta gamma"))
require.NoError(t, err)
assert.True(t, ok)
}
func TestSpinner_Weave_Good_ThresholdMatch(t *testing.T) {
spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.8}, nil)
ok, err := spinner.Weave(
context.Background(),
[]byte("alpha beta gamma delta epsilon zeta"),
[]byte("alpha beta gamma delta epsilon eta"),
)
require.NoError(t, err)
assert.True(t, ok)
}
func TestSpinner_Weave_Bad_ThresholdMismatch(t *testing.T) {
spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.9}, nil)
ok, err := spinner.Weave(
context.Background(),
[]byte("alpha beta gamma delta epsilon zeta"),
[]byte("alpha beta gamma delta epsilon eta"),
)
require.NoError(t, err)
assert.False(t, ok)
}
func TestSpinner_Weave_Good_EmptyOutputs(t *testing.T) {
spinner := NewSpinner(ClothoConfig{}, nil)
ok, err := spinner.Weave(context.Background(), nil, nil)
require.NoError(t, err)
assert.True(t, ok)
}
func TestSpinner_Weave_Bad_ContextCancelled(t *testing.T) {
spinner := NewSpinner(ClothoConfig{}, nil)
ctx, cancel := context.WithCancel(context.Background())
cancel()
ok, err := spinner.Weave(ctx, []byte("alpha"), []byte("alpha"))
assert.False(t, ok)
require.Error(t, err)
assert.True(t, errors.Is(err, context.Canceled))
}
func TestSpinner_DeterminePlan_Good(t *testing.T) {
spinner := NewSpinner(ClothoConfig{Strategy: "clotho-verified"}, map[string]AgentConfig{
"charon": {DualRun: true},
})
ok := spinner.DeterminePlan(&jobrunner.PipelineSignal{RepoName: "docs"}, "charon")
assert.Equal(t, ModeDual, ok)
}

View file

@ -350,7 +350,7 @@ agentci:
3. If the repository name is `core` or contains `security`, dual (Axiom 1: critical repos always verified). 3. If the repository name is `core` or contains `security`, dual (Axiom 1: critical repos always verified).
4. Otherwise, standard. 4. Otherwise, standard.
In dual-run mode, `DispatchHandler` populates `DispatchTicket.VerifyModel` and `DispatchTicket.DualRun=true`. The `Weave` method compares primary and verifier outputs for convergence (currently byte-equal; semantic diff reserved for a future phase). In dual-run mode, `DispatchHandler` populates `DispatchTicket.VerifyModel` and `DispatchTicket.DualRun=true`. The `Weave` method compares primary and verifier outputs for convergence using a deterministic token-overlap score against `validation_threshold`; richer semantic diffing remains a future phase.
### Dispatch Ticket Transfer ### Dispatch Ticket Transfer

View file

@ -122,9 +122,9 @@ Full signal-to-result flow tested for all five handlers via a mock Forgejo serve
The Forgejo SDK v2 and Gitea SDK do not accept `context.Context`. All Forgejo/Gitea API calls are blocking with no cancellation path. When the SDK is updated to support context (v3 or later), a follow-up task should thread `ctx` through all forge/ and gitea/ wrapper signatures. The Forgejo SDK v2 and Gitea SDK do not accept `context.Context`. All Forgejo/Gitea API calls are blocking with no cancellation path. When the SDK is updated to support context (v3 or later), a follow-up task should thread `ctx` through all forge/ and gitea/ wrapper signatures.
**Clotho Weave — byte-equal only** **Clotho Weave — thresholded token overlap**
`Spinner.Weave(ctx, primary, signed)` currently returns `string(primaryOutput) == string(signedOutput)`. This is a placeholder. Meaningful dual-run verification requires semantic diff logic (e.g., normalised AST comparison, embedding cosine similarity, or LLM-assisted diffing). The interface signature is stable; the implementation is not production-ready for divergent outputs. `Spinner.Weave(ctx, primary, signed)` now uses the configured `validation_threshold` to decide convergence from a deterministic token-overlap score. This is still a lightweight approximation rather than full semantic diffing, but it now honours the config knob already exposed by `ClothoConfig`.
**collect/ HTTP collectors — no retry** **collect/ HTTP collectors — no retry**