feat(agentci): honour validation threshold in weave
Co-Authored-By: Virgil <virgil@lethean.io>
This commit is contained in:
parent
c303abbd95
commit
b65ec9f052
4 changed files with 132 additions and 5 deletions
|
|
@ -5,6 +5,7 @@ package agentci
|
|||
import (
|
||||
"context"
|
||||
strings "dappco.re/go/core/scm/internal/ax/stringsx"
|
||||
"math"
|
||||
|
||||
"dappco.re/go/core/scm/jobrunner"
|
||||
)
|
||||
|
|
@ -89,8 +90,61 @@ func (s *Spinner) FindByForgejoUser(forgejoUser string) (string, AgentConfig, bo
|
|||
}
|
||||
|
||||
// Weave compares primary and verifier outputs. Returns true if they converge.
|
||||
// This is a placeholder for future semantic diff logic.
|
||||
// The comparison is a coarse token-overlap check controlled by the configured
|
||||
// validation threshold. It is intentionally deterministic and fast; richer
|
||||
// semantic diffing can replace it later without changing the signature.
|
||||
// Usage: Weave(...)
|
||||
func (s *Spinner) Weave(ctx context.Context, primaryOutput, signedOutput []byte) (bool, error) {
|
||||
return string(primaryOutput) == string(signedOutput), nil
|
||||
if ctx != nil {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false, ctx.Err()
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
primary := tokenizeWeaveOutput(primaryOutput)
|
||||
signed := tokenizeWeaveOutput(signedOutput)
|
||||
|
||||
if len(primary) == 0 && len(signed) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
threshold := s.Config.ValidationThreshold
|
||||
if threshold <= 0 || threshold > 1 {
|
||||
threshold = 0.85
|
||||
}
|
||||
|
||||
similarity := weaveDiceSimilarity(primary, signed)
|
||||
return similarity >= threshold, nil
|
||||
}
|
||||
|
||||
func tokenizeWeaveOutput(output []byte) []string {
|
||||
fields := strings.Fields(strings.ReplaceAll(string(output), "\r\n", "\n"))
|
||||
if len(fields) == 0 {
|
||||
return nil
|
||||
}
|
||||
return fields
|
||||
}
|
||||
|
||||
func weaveDiceSimilarity(primary, signed []string) float64 {
|
||||
if len(primary) == 0 || len(signed) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
counts := make(map[string]int, len(primary))
|
||||
for _, token := range primary {
|
||||
counts[token]++
|
||||
}
|
||||
|
||||
common := 0
|
||||
for _, token := range signed {
|
||||
if counts[token] == 0 {
|
||||
continue
|
||||
}
|
||||
counts[token]--
|
||||
common++
|
||||
}
|
||||
|
||||
return math.Min(1, (2*float64(common))/float64(len(primary)+len(signed)))
|
||||
}
|
||||
|
|
|
|||
73
agentci/clotho_test.go
Normal file
73
agentci/clotho_test.go
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
// SPDX-License-Identifier: EUPL-1.2
|
||||
|
||||
package agentci
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"dappco.re/go/core/scm/jobrunner"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSpinner_Weave_Good_ExactMatch(t *testing.T) {
|
||||
spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.85}, nil)
|
||||
|
||||
ok, err := spinner.Weave(context.Background(), []byte("alpha beta gamma"), []byte("alpha beta gamma"))
|
||||
require.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
}
|
||||
|
||||
func TestSpinner_Weave_Good_ThresholdMatch(t *testing.T) {
|
||||
spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.8}, nil)
|
||||
|
||||
ok, err := spinner.Weave(
|
||||
context.Background(),
|
||||
[]byte("alpha beta gamma delta epsilon zeta"),
|
||||
[]byte("alpha beta gamma delta epsilon eta"),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
}
|
||||
|
||||
func TestSpinner_Weave_Bad_ThresholdMismatch(t *testing.T) {
|
||||
spinner := NewSpinner(ClothoConfig{ValidationThreshold: 0.9}, nil)
|
||||
|
||||
ok, err := spinner.Weave(
|
||||
context.Background(),
|
||||
[]byte("alpha beta gamma delta epsilon zeta"),
|
||||
[]byte("alpha beta gamma delta epsilon eta"),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, ok)
|
||||
}
|
||||
|
||||
func TestSpinner_Weave_Good_EmptyOutputs(t *testing.T) {
|
||||
spinner := NewSpinner(ClothoConfig{}, nil)
|
||||
|
||||
ok, err := spinner.Weave(context.Background(), nil, nil)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, ok)
|
||||
}
|
||||
|
||||
func TestSpinner_Weave_Bad_ContextCancelled(t *testing.T) {
|
||||
spinner := NewSpinner(ClothoConfig{}, nil)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
ok, err := spinner.Weave(ctx, []byte("alpha"), []byte("alpha"))
|
||||
assert.False(t, ok)
|
||||
require.Error(t, err)
|
||||
assert.True(t, errors.Is(err, context.Canceled))
|
||||
}
|
||||
|
||||
func TestSpinner_DeterminePlan_Good(t *testing.T) {
|
||||
spinner := NewSpinner(ClothoConfig{Strategy: "clotho-verified"}, map[string]AgentConfig{
|
||||
"charon": {DualRun: true},
|
||||
})
|
||||
|
||||
ok := spinner.DeterminePlan(&jobrunner.PipelineSignal{RepoName: "docs"}, "charon")
|
||||
assert.Equal(t, ModeDual, ok)
|
||||
}
|
||||
|
|
@ -350,7 +350,7 @@ agentci:
|
|||
3. If the repository name is `core` or contains `security`, dual (Axiom 1: critical repos always verified).
|
||||
4. Otherwise, standard.
|
||||
|
||||
In dual-run mode, `DispatchHandler` populates `DispatchTicket.VerifyModel` and `DispatchTicket.DualRun=true`. The `Weave` method compares primary and verifier outputs for convergence (currently byte-equal; semantic diff reserved for a future phase).
|
||||
In dual-run mode, `DispatchHandler` populates `DispatchTicket.VerifyModel` and `DispatchTicket.DualRun=true`. The `Weave` method compares primary and verifier outputs for convergence using a deterministic token-overlap score against `validation_threshold`; richer semantic diffing remains a future phase.
|
||||
|
||||
### Dispatch Ticket Transfer
|
||||
|
||||
|
|
|
|||
|
|
@ -122,9 +122,9 @@ Full signal-to-result flow tested for all five handlers via a mock Forgejo serve
|
|||
|
||||
The Forgejo SDK v2 and Gitea SDK do not accept `context.Context`. All Forgejo/Gitea API calls are blocking with no cancellation path. When the SDK is updated to support context (v3 or later), a follow-up task should thread `ctx` through all forge/ and gitea/ wrapper signatures.
|
||||
|
||||
**Clotho Weave — byte-equal only**
|
||||
**Clotho Weave — thresholded token overlap**
|
||||
|
||||
`Spinner.Weave(ctx, primary, signed)` currently returns `string(primaryOutput) == string(signedOutput)`. This is a placeholder. Meaningful dual-run verification requires semantic diff logic (e.g., normalised AST comparison, embedding cosine similarity, or LLM-assisted diffing). The interface signature is stable; the implementation is not production-ready for divergent outputs.
|
||||
`Spinner.Weave(ctx, primary, signed)` now uses the configured `validation_threshold` to decide convergence from a deterministic token-overlap score. This is still a lightweight approximation rather than full semantic diffing, but it now honours the config knob already exposed by `ClothoConfig`.
|
||||
|
||||
**collect/ HTTP collectors — no retry**
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue