From 084e69357f6125be5e8c6953581ebcdff85304e5 Mon Sep 17 00:00:00 2001 From: Snider Date: Sun, 26 Apr 2026 00:16:49 +0100 Subject: [PATCH] feat(agent/content): SEO Natural Progression scheduler with Googlebot trigger (#543) Implements pkg/agentic/content_seo.go: SEORevision, ScheduleRevision, GetPendingRevisions, OnGooglebotVisit, Gin middleware hook, and content_seo_schedule MCP tool. ScheduledAt assigned random 8-62m window on first Googlebot hit. Persistence via .core/db.duckdb seo_revisions group. AX-10 coverage in content_seo_test.go (Good/Bad/Ugly + MCP tool registration). Closes tasks.lthn.sh/view.php?id=543 Co-authored-by: Codex --- pkg/agentic/content.go | 1 + pkg/agentic/content_seo.go | 302 ++++++++++++++++++++++++++++++++ pkg/agentic/content_seo_test.go | 161 +++++++++++++++++ 3 files changed, 464 insertions(+) create mode 100644 pkg/agentic/content_seo.go create mode 100644 pkg/agentic/content_seo_test.go diff --git a/pkg/agentic/content.go b/pkg/agentic/content.go index 66268d7..3147646 100644 --- a/pkg/agentic/content.go +++ b/pkg/agentic/content.go @@ -438,6 +438,7 @@ func (s *PrepSubsystem) registerContentTools(svc *coremcp.Service) { Name: "content_schema_generate", Description: "Generate SEO schema JSON-LD for article, FAQ, or how-to content.", }, s.contentSchemaGenerate) + s.registerContentSEOTool(svc) } func (s *PrepSubsystem) contentGenerate(ctx context.Context, _ *mcp.CallToolRequest, input ContentGenerateInput) (*mcp.CallToolResult, ContentGenerateOutput, error) { diff --git a/pkg/agentic/content_seo.go b/pkg/agentic/content_seo.go new file mode 100644 index 0000000..725e9ab --- /dev/null +++ b/pkg/agentic/content_seo.go @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: EUPL-1.2 + +package agentic + +import ( + "context" + "crypto/rand" + "encoding/hex" + "math/big" + "net/http" + "time" + + core "dappco.re/go/core" + coremcp "dappco.re/go/mcp/pkg/mcp" + store "dappco.re/go/store" + "github.com/gin-gonic/gin" + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +// Usage example: `revision := agentic.SEORevision{PageID: "/help/hosting", Content: "Updated copy", ScheduledAt: nil, CreatedAt: time.Now()}` +type SEORevision struct { + PageID string `json:"page_id"` + Content string `json:"content"` + ScheduledAt *time.Time `json:"scheduled_at"` + CreatedAt time.Time `json:"created_at"` +} + +// input := agentic.ContentSEOScheduleInput{PageID: "/help/hosting", Content: "Updated copy"} +type ContentSEOScheduleInput struct { + PageID string `json:"page_id"` + Content string `json:"content"` +} + +// out := agentic.ContentSEOScheduleOutput{Success: true, Revision: agentic.SEORevision{PageID: "/help/hosting"}} +type ContentSEOScheduleOutput struct { + Success bool `json:"success"` + Revision SEORevision `json:"revision"` +} + +type seoRevisionRecord struct { + Key string + Revision SEORevision +} + +const contentSEORevisionGroup = "seo_revisions" + +var ( + contentSEONow = time.Now + contentSEORandomDelay = func() (time.Duration, error) { + window := big.NewInt(55) + value, err := rand.Int(rand.Reader, window) + if err != nil { + return 0, core.E("contentSEORandomDelay", "read random delay", err) + } + return time.Duration(value.Int64()+8) * time.Minute, nil + } +) + +func (s *PrepSubsystem) registerContentSEOTool(svc *coremcp.Service) { + coremcp.AddToolRecorded(svc, svc.Server(), "agentic", &mcp.Tool{ + Name: "content_seo_schedule", + Description: "Create a pending Natural Progression SEO revision that stays unpublished until a Googlebot visit schedules it.", + }, s.contentSEOScheduleTool) +} + +func (s *PrepSubsystem) contentSEOScheduleTool(ctx context.Context, _ *mcp.CallToolRequest, input ContentSEOScheduleInput) (*mcp.CallToolResult, ContentSEOScheduleOutput, error) { + revision, err := s.ScheduleRevision(ctx, input.PageID, input.Content) + if err != nil { + return nil, ContentSEOScheduleOutput{}, err + } + + return nil, ContentSEOScheduleOutput{ + Success: true, + Revision: revision, + }, nil +} + +// revision, err := subsystem.ScheduleRevision(ctx, "/help/hosting", "Updated copy") +func (s *PrepSubsystem) ScheduleRevision(ctx context.Context, pageID, content string) (SEORevision, error) { + if err := contentSEOContextErr("scheduleRevision", ctx); err != nil { + return SEORevision{}, err + } + + pageID = core.Trim(pageID) + if pageID == "" { + return SEORevision{}, core.E("scheduleRevision", "page_id is required", nil) + } + + storeInstance, err := s.contentSEOStore() + if err != nil { + return SEORevision{}, err + } + + revision := SEORevision{ + PageID: pageID, + Content: content, + ScheduledAt: nil, + CreatedAt: contentSEONow(), + } + if err := storeInstance.Set(contentSEORevisionGroup, contentSEORevisionKey(revision.CreatedAt), core.JSONMarshalString(revision)); err != nil { + return SEORevision{}, core.E("scheduleRevision", "persist revision", err) + } + + return revision, nil +} + +// revisions, err := subsystem.GetPendingRevisions("/help/hosting") +func (s *PrepSubsystem) GetPendingRevisions(pageID string) ([]SEORevision, error) { + pageID = core.Trim(pageID) + if pageID == "" { + return nil, core.E("getPendingRevisions", "page_id is required", nil) + } + + storeInstance, err := s.contentSEOStore() + if err != nil { + return nil, err + } + + records, err := s.contentSEORevisionRecords(storeInstance, pageID, true) + if err != nil { + return nil, err + } + + revisions := make([]SEORevision, 0, len(records)) + for _, record := range records { + revisions = append(revisions, record.Revision) + } + return revisions, nil +} + +// err := subsystem.OnGooglebotVisit(ctx, "/help/hosting") +func (s *PrepSubsystem) OnGooglebotVisit(ctx context.Context, pageID string) error { + if err := contentSEOContextErr("onGooglebotVisit", ctx); err != nil { + return err + } + + pageID = core.Trim(pageID) + if pageID == "" { + return core.E("onGooglebotVisit", "page_id is required", nil) + } + + storeInstance, err := s.contentSEOStore() + if err != nil { + return err + } + + records, err := s.contentSEORevisionRecords(storeInstance, pageID, true) + if err != nil { + return err + } + if len(records) == 0 { + return nil + } + + baseTime := contentSEONow() + if err := storeInstance.Transaction(func(transaction *store.StoreTransaction) error { + for _, record := range records { + if err := contentSEOContextErr("onGooglebotVisit", ctx); err != nil { + return err + } + + delay, err := contentSEORandomDelay() + if err != nil { + return core.E("onGooglebotVisit", "compute publish delay", err) + } + + scheduledAt := baseTime.Add(delay) + record.Revision.ScheduledAt = &scheduledAt + if err := transaction.Set(contentSEORevisionGroup, record.Key, core.JSONMarshalString(record.Revision)); err != nil { + return core.E("onGooglebotVisit", "persist scheduled revision", err) + } + } + return nil + }); err != nil { + return core.E("onGooglebotVisit", "transaction", err) + } + + return nil +} + +// err := subsystem.HandleGooglebotVisit(ctx, "/help/hosting", request.UserAgent()) +func (s *PrepSubsystem) HandleGooglebotVisit(ctx context.Context, pageID, userAgent string) error { + if !contentSEOIsGooglebot(userAgent) { + return nil + } + return s.OnGooglebotVisit(ctx, pageID) +} + +// middleware := subsystem.ContentSEOGooglebotMiddleware(nil) +func (s *PrepSubsystem) ContentSEOGooglebotMiddleware(resolvePageID func(*gin.Context) string) gin.HandlerFunc { + return func(c *gin.Context) { + c.Next() + + if c == nil || c.Request == nil || c.Request.Method != http.MethodGet { + return + } + if !contentSEOIsGooglebot(c.Request.UserAgent()) { + return + } + if c.Writer != nil && c.Writer.Status() >= http.StatusBadRequest { + return + } + + pageID := "" + if resolvePageID != nil { + pageID = resolvePageID(c) + } + if pageID == "" && c.Request.URL != nil { + pageID = c.Request.URL.Path + } + pageID = core.Trim(pageID) + if pageID == "" { + return + } + + if err := s.OnGooglebotVisit(c.Request.Context(), pageID); err != nil { + core.Warn("content seo googlebot trigger failed", "page_id", pageID, "error", err) + } + } +} + +func (s *PrepSubsystem) contentSEOStore() (*store.Store, error) { + if s == nil { + return nil, core.E("contentSEOStore", "subsystem is nil", nil) + } + + storeInstance := s.stateStoreInstance() + if storeInstance != nil { + return storeInstance, nil + } + if err := s.stateStoreErr(); err != nil { + return nil, core.E("contentSEOStore", "state store unavailable", err) + } + return nil, core.E("contentSEOStore", "state store unavailable", nil) +} + +func (s *PrepSubsystem) contentSEORevisionRecords(storeInstance *store.Store, pageID string, pendingOnly bool) ([]seoRevisionRecord, error) { + pageID = core.Trim(pageID) + records := make([]seoRevisionRecord, 0) + + for entry, err := range storeInstance.AllSeq(contentSEORevisionGroup) { + if err != nil { + return nil, core.E("contentSEORevisionRecords", "iterate revisions", err) + } + + revision, err := contentSEORevisionValue(entry.Value) + if err != nil { + return nil, err + } + if pageID != "" && revision.PageID != pageID { + continue + } + if pendingOnly && revision.ScheduledAt != nil { + continue + } + + records = append(records, seoRevisionRecord{ + Key: entry.Key, + Revision: revision, + }) + } + + return records, nil +} + +func contentSEORevisionValue(value string) (SEORevision, error) { + var revision SEORevision + result := core.JSONUnmarshalString(value, &revision) + if !result.OK { + if err, ok := result.Value.(error); ok { + return SEORevision{}, core.E("contentSEORevisionValue", "decode revision", err) + } + return SEORevision{}, core.E("contentSEORevisionValue", "decode revision", nil) + } + if core.Trim(revision.PageID) == "" { + return SEORevision{}, core.E("contentSEORevisionValue", "revision page_id is empty", nil) + } + return revision, nil +} + +func contentSEORevisionKey(createdAt time.Time) string { + return core.Concat(core.Sprint(createdAt.UnixNano()), "-", contentSEORandomHex()) +} + +func contentSEORandomHex() string { + bytes := make([]byte, 3) + if _, err := rand.Read(bytes); err != nil { + return "000000" + } + return hex.EncodeToString(bytes) +} + +func contentSEOContextErr(operation string, ctx context.Context) error { + if ctx == nil || ctx.Err() == nil { + return nil + } + return core.E(operation, "cancelled", ctx.Err()) +} + +func contentSEOIsGooglebot(userAgent string) bool { + return core.Contains(core.Lower(core.Trim(userAgent)), "googlebot") +} diff --git a/pkg/agentic/content_seo_test.go b/pkg/agentic/content_seo_test.go new file mode 100644 index 0000000..3833442 --- /dev/null +++ b/pkg/agentic/content_seo_test.go @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: EUPL-1.2 + +package agentic + +import ( + "context" + "testing" + "time" + + coremcp "dappco.re/go/mcp/pkg/mcp" + mcpsdk "github.com/modelcontextprotocol/go-sdk/mcp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestScheduleRevision_Good_CreatesPendingRevision(t *testing.T) { + withStateStoreTempDir(t) + + now := time.Date(2026, time.April, 26, 12, 0, 0, 0, time.UTC) + restoreContentSEONow(t, now) + + subsystem := &PrepSubsystem{} + defer subsystem.closeStateStore() + + revision, err := subsystem.ScheduleRevision(context.Background(), "/help/hosting", "Updated copy") + require.NoError(t, err) + assert.Equal(t, "/help/hosting", revision.PageID) + assert.Equal(t, "Updated copy", revision.Content) + assert.Nil(t, revision.ScheduledAt) + assert.True(t, revision.CreatedAt.Equal(now)) + + pending, err := subsystem.GetPendingRevisions("/help/hosting") + require.NoError(t, err) + require.Len(t, pending, 1) + assert.Nil(t, pending[0].ScheduledAt) + + var rawEntries []string + subsystem.stateStoreRestore(contentSEORevisionGroup, func(_ string, value string) bool { + rawEntries = append(rawEntries, value) + return true + }) + require.Len(t, rawEntries, 1) + assert.Contains(t, rawEntries[0], `"scheduled_at":null`) +} + +func TestScheduleRevision_Bad_EmptyPageID(t *testing.T) { + withStateStoreTempDir(t) + + subsystem := &PrepSubsystem{} + defer subsystem.closeStateStore() + + _, err := subsystem.ScheduleRevision(context.Background(), "", "Updated copy") + require.Error(t, err) + assert.Contains(t, err.Error(), "page_id is required") +} + +func TestOnGooglebotVisit_Good_SetsPublishTimeInRange(t *testing.T) { + withStateStoreTempDir(t) + + now := time.Date(2026, time.April, 26, 12, 0, 0, 0, time.UTC) + restoreContentSEONow(t, now) + restoreContentSEORandomDelay(t, 37*time.Minute) + + subsystem := &PrepSubsystem{} + defer subsystem.closeStateStore() + + _, err := subsystem.ScheduleRevision(context.Background(), "/help/hosting", "Updated copy") + require.NoError(t, err) + require.NoError(t, subsystem.OnGooglebotVisit(context.Background(), "/help/hosting")) + + pending, err := subsystem.GetPendingRevisions("/help/hosting") + require.NoError(t, err) + assert.Len(t, pending, 0) + + records, err := subsystem.contentSEORevisionRecords(subsystem.stateStoreInstance(), "/help/hosting", false) + require.NoError(t, err) + require.Len(t, records, 1) + require.NotNil(t, records[0].Revision.ScheduledAt) + + delta := records[0].Revision.ScheduledAt.Sub(now) + assert.GreaterOrEqual(t, delta, 8*time.Minute) + assert.LessOrEqual(t, delta, 62*time.Minute) + assert.Equal(t, 37*time.Minute, delta) +} + +func TestOnGooglebotVisit_Bad_NoPendingRevision(t *testing.T) { + withStateStoreTempDir(t) + + subsystem := &PrepSubsystem{} + defer subsystem.closeStateStore() + + require.NoError(t, subsystem.OnGooglebotVisit(context.Background(), "/help/hosting")) + assert.Equal(t, 0, subsystem.stateStoreCount(contentSEORevisionGroup)) +} + +func TestOnGooglebotVisit_Ugly_StoreError(t *testing.T) { + root := t.TempDir() + blocked := root + "/blocked" + writeResult := fs.Write(blocked, "blocked") + require.True(t, writeResult.OK) + t.Setenv("CORE_WORKSPACE", blocked) + + subsystem := &PrepSubsystem{} + defer subsystem.closeStateStore() + + err := subsystem.OnGooglebotVisit(context.Background(), "/help/hosting") + require.Error(t, err) + assert.Contains(t, err.Error(), "state store unavailable") +} + +func TestContentSEO_RegisterTools_Good_RegistersScheduleTool(t *testing.T) { + t.Setenv("CORE_MCP_FULL", "1") + + svc, err := coremcp.New(coremcp.Options{Unrestricted: true}) + require.NoError(t, err) + + subsystem := &PrepSubsystem{} + subsystem.RegisterTools(svc) + + server := svc.Server() + client := mcpsdk.NewClient(&mcpsdk.Implementation{Name: "test", Version: "0.1.0"}, nil) + clientTransport, serverTransport := mcpsdk.NewInMemoryTransports() + + serverSession, err := server.Connect(context.Background(), serverTransport, nil) + require.NoError(t, err) + t.Cleanup(func() { _ = serverSession.Close() }) + + clientSession, err := client.Connect(context.Background(), clientTransport, nil) + require.NoError(t, err) + t.Cleanup(func() { _ = clientSession.Close() }) + + result, err := clientSession.ListTools(context.Background(), nil) + require.NoError(t, err) + + var toolNames []string + for _, tool := range result.Tools { + toolNames = append(toolNames, tool.Name) + } + + assert.Contains(t, toolNames, "content_seo_schedule") +} + +func restoreContentSEONow(t *testing.T, now time.Time) { + t.Helper() + + previous := contentSEONow + contentSEONow = func() time.Time { return now } + t.Cleanup(func() { + contentSEONow = previous + }) +} + +func restoreContentSEORandomDelay(t *testing.T, delay time.Duration) { + t.Helper() + + previous := contentSEORandomDelay + contentSEORandomDelay = func() (time.Duration, error) { return delay, nil } + t.Cleanup(func() { + contentSEORandomDelay = previous + }) +}