feat(agent/content): SEO Natural Progression scheduler with Googlebot trigger (#543)

Implements pkg/agentic/content_seo.go: SEORevision, ScheduleRevision,
GetPendingRevisions, OnGooglebotVisit, Gin middleware hook, and
content_seo_schedule MCP tool. ScheduledAt assigned random 8-62m window
on first Googlebot hit. Persistence via .core/db.duckdb seo_revisions
group.

AX-10 coverage in content_seo_test.go (Good/Bad/Ugly + MCP tool registration).

Closes tasks.lthn.sh/view.php?id=543

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Snider 2026-04-26 00:16:49 +01:00
parent 7a9dbadb57
commit 084e69357f
3 changed files with 464 additions and 0 deletions

View file

@ -438,6 +438,7 @@ func (s *PrepSubsystem) registerContentTools(svc *coremcp.Service) {
Name: "content_schema_generate",
Description: "Generate SEO schema JSON-LD for article, FAQ, or how-to content.",
}, s.contentSchemaGenerate)
s.registerContentSEOTool(svc)
}
func (s *PrepSubsystem) contentGenerate(ctx context.Context, _ *mcp.CallToolRequest, input ContentGenerateInput) (*mcp.CallToolResult, ContentGenerateOutput, error) {

302
pkg/agentic/content_seo.go Normal file
View file

@ -0,0 +1,302 @@
// SPDX-License-Identifier: EUPL-1.2
package agentic
import (
"context"
"crypto/rand"
"encoding/hex"
"math/big"
"net/http"
"time"
core "dappco.re/go/core"
coremcp "dappco.re/go/mcp/pkg/mcp"
store "dappco.re/go/store"
"github.com/gin-gonic/gin"
"github.com/modelcontextprotocol/go-sdk/mcp"
)
// Usage example: `revision := agentic.SEORevision{PageID: "/help/hosting", Content: "Updated copy", ScheduledAt: nil, CreatedAt: time.Now()}`
type SEORevision struct {
PageID string `json:"page_id"`
Content string `json:"content"`
ScheduledAt *time.Time `json:"scheduled_at"`
CreatedAt time.Time `json:"created_at"`
}
// input := agentic.ContentSEOScheduleInput{PageID: "/help/hosting", Content: "Updated copy"}
type ContentSEOScheduleInput struct {
PageID string `json:"page_id"`
Content string `json:"content"`
}
// out := agentic.ContentSEOScheduleOutput{Success: true, Revision: agentic.SEORevision{PageID: "/help/hosting"}}
type ContentSEOScheduleOutput struct {
Success bool `json:"success"`
Revision SEORevision `json:"revision"`
}
type seoRevisionRecord struct {
Key string
Revision SEORevision
}
const contentSEORevisionGroup = "seo_revisions"
var (
contentSEONow = time.Now
contentSEORandomDelay = func() (time.Duration, error) {
window := big.NewInt(55)
value, err := rand.Int(rand.Reader, window)
if err != nil {
return 0, core.E("contentSEORandomDelay", "read random delay", err)
}
return time.Duration(value.Int64()+8) * time.Minute, nil
}
)
func (s *PrepSubsystem) registerContentSEOTool(svc *coremcp.Service) {
coremcp.AddToolRecorded(svc, svc.Server(), "agentic", &mcp.Tool{
Name: "content_seo_schedule",
Description: "Create a pending Natural Progression SEO revision that stays unpublished until a Googlebot visit schedules it.",
}, s.contentSEOScheduleTool)
}
func (s *PrepSubsystem) contentSEOScheduleTool(ctx context.Context, _ *mcp.CallToolRequest, input ContentSEOScheduleInput) (*mcp.CallToolResult, ContentSEOScheduleOutput, error) {
revision, err := s.ScheduleRevision(ctx, input.PageID, input.Content)
if err != nil {
return nil, ContentSEOScheduleOutput{}, err
}
return nil, ContentSEOScheduleOutput{
Success: true,
Revision: revision,
}, nil
}
// revision, err := subsystem.ScheduleRevision(ctx, "/help/hosting", "Updated copy")
func (s *PrepSubsystem) ScheduleRevision(ctx context.Context, pageID, content string) (SEORevision, error) {
if err := contentSEOContextErr("scheduleRevision", ctx); err != nil {
return SEORevision{}, err
}
pageID = core.Trim(pageID)
if pageID == "" {
return SEORevision{}, core.E("scheduleRevision", "page_id is required", nil)
}
storeInstance, err := s.contentSEOStore()
if err != nil {
return SEORevision{}, err
}
revision := SEORevision{
PageID: pageID,
Content: content,
ScheduledAt: nil,
CreatedAt: contentSEONow(),
}
if err := storeInstance.Set(contentSEORevisionGroup, contentSEORevisionKey(revision.CreatedAt), core.JSONMarshalString(revision)); err != nil {
return SEORevision{}, core.E("scheduleRevision", "persist revision", err)
}
return revision, nil
}
// revisions, err := subsystem.GetPendingRevisions("/help/hosting")
func (s *PrepSubsystem) GetPendingRevisions(pageID string) ([]SEORevision, error) {
pageID = core.Trim(pageID)
if pageID == "" {
return nil, core.E("getPendingRevisions", "page_id is required", nil)
}
storeInstance, err := s.contentSEOStore()
if err != nil {
return nil, err
}
records, err := s.contentSEORevisionRecords(storeInstance, pageID, true)
if err != nil {
return nil, err
}
revisions := make([]SEORevision, 0, len(records))
for _, record := range records {
revisions = append(revisions, record.Revision)
}
return revisions, nil
}
// err := subsystem.OnGooglebotVisit(ctx, "/help/hosting")
func (s *PrepSubsystem) OnGooglebotVisit(ctx context.Context, pageID string) error {
if err := contentSEOContextErr("onGooglebotVisit", ctx); err != nil {
return err
}
pageID = core.Trim(pageID)
if pageID == "" {
return core.E("onGooglebotVisit", "page_id is required", nil)
}
storeInstance, err := s.contentSEOStore()
if err != nil {
return err
}
records, err := s.contentSEORevisionRecords(storeInstance, pageID, true)
if err != nil {
return err
}
if len(records) == 0 {
return nil
}
baseTime := contentSEONow()
if err := storeInstance.Transaction(func(transaction *store.StoreTransaction) error {
for _, record := range records {
if err := contentSEOContextErr("onGooglebotVisit", ctx); err != nil {
return err
}
delay, err := contentSEORandomDelay()
if err != nil {
return core.E("onGooglebotVisit", "compute publish delay", err)
}
scheduledAt := baseTime.Add(delay)
record.Revision.ScheduledAt = &scheduledAt
if err := transaction.Set(contentSEORevisionGroup, record.Key, core.JSONMarshalString(record.Revision)); err != nil {
return core.E("onGooglebotVisit", "persist scheduled revision", err)
}
}
return nil
}); err != nil {
return core.E("onGooglebotVisit", "transaction", err)
}
return nil
}
// err := subsystem.HandleGooglebotVisit(ctx, "/help/hosting", request.UserAgent())
func (s *PrepSubsystem) HandleGooglebotVisit(ctx context.Context, pageID, userAgent string) error {
if !contentSEOIsGooglebot(userAgent) {
return nil
}
return s.OnGooglebotVisit(ctx, pageID)
}
// middleware := subsystem.ContentSEOGooglebotMiddleware(nil)
func (s *PrepSubsystem) ContentSEOGooglebotMiddleware(resolvePageID func(*gin.Context) string) gin.HandlerFunc {
return func(c *gin.Context) {
c.Next()
if c == nil || c.Request == nil || c.Request.Method != http.MethodGet {
return
}
if !contentSEOIsGooglebot(c.Request.UserAgent()) {
return
}
if c.Writer != nil && c.Writer.Status() >= http.StatusBadRequest {
return
}
pageID := ""
if resolvePageID != nil {
pageID = resolvePageID(c)
}
if pageID == "" && c.Request.URL != nil {
pageID = c.Request.URL.Path
}
pageID = core.Trim(pageID)
if pageID == "" {
return
}
if err := s.OnGooglebotVisit(c.Request.Context(), pageID); err != nil {
core.Warn("content seo googlebot trigger failed", "page_id", pageID, "error", err)
}
}
}
func (s *PrepSubsystem) contentSEOStore() (*store.Store, error) {
if s == nil {
return nil, core.E("contentSEOStore", "subsystem is nil", nil)
}
storeInstance := s.stateStoreInstance()
if storeInstance != nil {
return storeInstance, nil
}
if err := s.stateStoreErr(); err != nil {
return nil, core.E("contentSEOStore", "state store unavailable", err)
}
return nil, core.E("contentSEOStore", "state store unavailable", nil)
}
func (s *PrepSubsystem) contentSEORevisionRecords(storeInstance *store.Store, pageID string, pendingOnly bool) ([]seoRevisionRecord, error) {
pageID = core.Trim(pageID)
records := make([]seoRevisionRecord, 0)
for entry, err := range storeInstance.AllSeq(contentSEORevisionGroup) {
if err != nil {
return nil, core.E("contentSEORevisionRecords", "iterate revisions", err)
}
revision, err := contentSEORevisionValue(entry.Value)
if err != nil {
return nil, err
}
if pageID != "" && revision.PageID != pageID {
continue
}
if pendingOnly && revision.ScheduledAt != nil {
continue
}
records = append(records, seoRevisionRecord{
Key: entry.Key,
Revision: revision,
})
}
return records, nil
}
func contentSEORevisionValue(value string) (SEORevision, error) {
var revision SEORevision
result := core.JSONUnmarshalString(value, &revision)
if !result.OK {
if err, ok := result.Value.(error); ok {
return SEORevision{}, core.E("contentSEORevisionValue", "decode revision", err)
}
return SEORevision{}, core.E("contentSEORevisionValue", "decode revision", nil)
}
if core.Trim(revision.PageID) == "" {
return SEORevision{}, core.E("contentSEORevisionValue", "revision page_id is empty", nil)
}
return revision, nil
}
func contentSEORevisionKey(createdAt time.Time) string {
return core.Concat(core.Sprint(createdAt.UnixNano()), "-", contentSEORandomHex())
}
func contentSEORandomHex() string {
bytes := make([]byte, 3)
if _, err := rand.Read(bytes); err != nil {
return "000000"
}
return hex.EncodeToString(bytes)
}
func contentSEOContextErr(operation string, ctx context.Context) error {
if ctx == nil || ctx.Err() == nil {
return nil
}
return core.E(operation, "cancelled", ctx.Err())
}
func contentSEOIsGooglebot(userAgent string) bool {
return core.Contains(core.Lower(core.Trim(userAgent)), "googlebot")
}

View file

@ -0,0 +1,161 @@
// SPDX-License-Identifier: EUPL-1.2
package agentic
import (
"context"
"testing"
"time"
coremcp "dappco.re/go/mcp/pkg/mcp"
mcpsdk "github.com/modelcontextprotocol/go-sdk/mcp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestScheduleRevision_Good_CreatesPendingRevision(t *testing.T) {
withStateStoreTempDir(t)
now := time.Date(2026, time.April, 26, 12, 0, 0, 0, time.UTC)
restoreContentSEONow(t, now)
subsystem := &PrepSubsystem{}
defer subsystem.closeStateStore()
revision, err := subsystem.ScheduleRevision(context.Background(), "/help/hosting", "Updated copy")
require.NoError(t, err)
assert.Equal(t, "/help/hosting", revision.PageID)
assert.Equal(t, "Updated copy", revision.Content)
assert.Nil(t, revision.ScheduledAt)
assert.True(t, revision.CreatedAt.Equal(now))
pending, err := subsystem.GetPendingRevisions("/help/hosting")
require.NoError(t, err)
require.Len(t, pending, 1)
assert.Nil(t, pending[0].ScheduledAt)
var rawEntries []string
subsystem.stateStoreRestore(contentSEORevisionGroup, func(_ string, value string) bool {
rawEntries = append(rawEntries, value)
return true
})
require.Len(t, rawEntries, 1)
assert.Contains(t, rawEntries[0], `"scheduled_at":null`)
}
func TestScheduleRevision_Bad_EmptyPageID(t *testing.T) {
withStateStoreTempDir(t)
subsystem := &PrepSubsystem{}
defer subsystem.closeStateStore()
_, err := subsystem.ScheduleRevision(context.Background(), "", "Updated copy")
require.Error(t, err)
assert.Contains(t, err.Error(), "page_id is required")
}
func TestOnGooglebotVisit_Good_SetsPublishTimeInRange(t *testing.T) {
withStateStoreTempDir(t)
now := time.Date(2026, time.April, 26, 12, 0, 0, 0, time.UTC)
restoreContentSEONow(t, now)
restoreContentSEORandomDelay(t, 37*time.Minute)
subsystem := &PrepSubsystem{}
defer subsystem.closeStateStore()
_, err := subsystem.ScheduleRevision(context.Background(), "/help/hosting", "Updated copy")
require.NoError(t, err)
require.NoError(t, subsystem.OnGooglebotVisit(context.Background(), "/help/hosting"))
pending, err := subsystem.GetPendingRevisions("/help/hosting")
require.NoError(t, err)
assert.Len(t, pending, 0)
records, err := subsystem.contentSEORevisionRecords(subsystem.stateStoreInstance(), "/help/hosting", false)
require.NoError(t, err)
require.Len(t, records, 1)
require.NotNil(t, records[0].Revision.ScheduledAt)
delta := records[0].Revision.ScheduledAt.Sub(now)
assert.GreaterOrEqual(t, delta, 8*time.Minute)
assert.LessOrEqual(t, delta, 62*time.Minute)
assert.Equal(t, 37*time.Minute, delta)
}
func TestOnGooglebotVisit_Bad_NoPendingRevision(t *testing.T) {
withStateStoreTempDir(t)
subsystem := &PrepSubsystem{}
defer subsystem.closeStateStore()
require.NoError(t, subsystem.OnGooglebotVisit(context.Background(), "/help/hosting"))
assert.Equal(t, 0, subsystem.stateStoreCount(contentSEORevisionGroup))
}
func TestOnGooglebotVisit_Ugly_StoreError(t *testing.T) {
root := t.TempDir()
blocked := root + "/blocked"
writeResult := fs.Write(blocked, "blocked")
require.True(t, writeResult.OK)
t.Setenv("CORE_WORKSPACE", blocked)
subsystem := &PrepSubsystem{}
defer subsystem.closeStateStore()
err := subsystem.OnGooglebotVisit(context.Background(), "/help/hosting")
require.Error(t, err)
assert.Contains(t, err.Error(), "state store unavailable")
}
func TestContentSEO_RegisterTools_Good_RegistersScheduleTool(t *testing.T) {
t.Setenv("CORE_MCP_FULL", "1")
svc, err := coremcp.New(coremcp.Options{Unrestricted: true})
require.NoError(t, err)
subsystem := &PrepSubsystem{}
subsystem.RegisterTools(svc)
server := svc.Server()
client := mcpsdk.NewClient(&mcpsdk.Implementation{Name: "test", Version: "0.1.0"}, nil)
clientTransport, serverTransport := mcpsdk.NewInMemoryTransports()
serverSession, err := server.Connect(context.Background(), serverTransport, nil)
require.NoError(t, err)
t.Cleanup(func() { _ = serverSession.Close() })
clientSession, err := client.Connect(context.Background(), clientTransport, nil)
require.NoError(t, err)
t.Cleanup(func() { _ = clientSession.Close() })
result, err := clientSession.ListTools(context.Background(), nil)
require.NoError(t, err)
var toolNames []string
for _, tool := range result.Tools {
toolNames = append(toolNames, tool.Name)
}
assert.Contains(t, toolNames, "content_seo_schedule")
}
func restoreContentSEONow(t *testing.T, now time.Time) {
t.Helper()
previous := contentSEONow
contentSEONow = func() time.Time { return now }
t.Cleanup(func() {
contentSEONow = previous
})
}
func restoreContentSEORandomDelay(t *testing.T, delay time.Duration) {
t.Helper()
previous := contentSEORandomDelay
contentSEORandomDelay = func() (time.Duration, error) { return delay, nil }
t.Cleanup(func() {
contentSEORandomDelay = previous
})
}