';
-
- return strip_tags($content, $allowedTags);
+ return app(HtmlSanitiser::class)->sanitise($content);
}
/**
diff --git a/Services/HtmlSanitiser.php b/Services/HtmlSanitiser.php
new file mode 100644
index 0000000..301e225
--- /dev/null
+++ b/Services/HtmlSanitiser.php
@@ -0,0 +1,151 @@
+set('HTML.Allowed', implode(',', [
+ // Structure
+ 'div[id|class]',
+ 'span[id|class]',
+ 'section[id|class]',
+ 'article[id|class]',
+
+ // Text
+ 'h1[id|class]',
+ 'h2[id|class]',
+ 'h3[id|class]',
+ 'h4[id|class]',
+ 'h5[id|class]',
+ 'h6[id|class]',
+ 'p[id|class]',
+ 'br',
+ 'hr[id|class]',
+ 'strong',
+ 'em',
+ 'b',
+ 'i',
+ 'u',
+ 'small',
+ 'mark',
+ 'del',
+ 'ins',
+ 'sub',
+ 'sup',
+ 'code',
+ 'pre[id|class]',
+ 'blockquote[id|class]',
+
+ // Lists
+ 'ul[id|class]',
+ 'ol[id|class]',
+ 'li[id|class]',
+
+ // Links and media
+ 'a[href|id|class|target|rel]',
+ 'img[src|alt|width|height|id|class]',
+ 'figure[id|class]',
+ 'figcaption[id|class]',
+
+ // Tables
+ 'table[id|class]',
+ 'thead[id|class]',
+ 'tbody[id|class]',
+ 'tr[id|class]',
+ 'th[id|class|colspan|rowspan]',
+ 'td[id|class|colspan|rowspan]',
+ ]));
+
+ // Safe link targets
+ $config->set('Attr.AllowedFrameTargets', ['_blank', '_self']);
+
+ // Add rel="noopener" to external links for security
+ $config->set('HTML.Nofollow', true);
+ $config->set('HTML.TargetNoopener', true);
+
+ // Disable cache in development, enable via config in production
+ $cacheDir = config('content.purifier_cache_dir');
+ if ($cacheDir && is_dir($cacheDir) && is_writable($cacheDir)) {
+ $config->set('Cache.SerializerPath', $cacheDir);
+ } else {
+ $config->set('Cache.DefinitionImpl', null);
+ }
+
+ // Safe URI schemes only
+ $config->set('URI.AllowedSchemes', [
+ 'http' => true,
+ 'https' => true,
+ 'mailto' => true,
+ 'tel' => true,
+ ]);
+
+ // Do not allow data: URIs (can contain XSS)
+ $config->set('URI.DisableExternalResources', false);
+ $config->set('URI.DisableResources', false);
+
+ $this->purifier = new HTMLPurifier($config);
+ }
+
+ /**
+ * Sanitise HTML content to prevent XSS attacks.
+ *
+ * This method removes dangerous HTML, JavaScript, and CSS while preserving
+ * safe formatting elements. Always use this before rendering user content.
+ *
+ * @param string $html The raw HTML content to sanitise
+ * @return string The sanitised HTML, safe for rendering
+ */
+ public function sanitise(string $html): string
+ {
+ if (empty($html)) {
+ return '';
+ }
+
+ return $this->purifier->purify($html);
+ }
+
+ /**
+ * Check if HTMLPurifier is available.
+ *
+ * Use this method to verify the dependency is installed before attempting
+ * to create a sanitiser instance.
+ */
+ public static function isAvailable(): bool
+ {
+ return class_exists(HTMLPurifier::class);
+ }
+}
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..d569462
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,318 @@
+# TODO - core-content
+
+Production quality improvements for the Content Module.
+
+**Legend:**
+- P1: Critical/Security - Must fix immediately
+- P2: High priority - Fix soon
+- P3: Medium priority - Important improvements
+- P4: Low priority - Nice to have
+- P5: Nice-to-have - When time permits
+- P6+: Future/Backlog - Long-term improvements
+
+---
+
+## P1 - Critical/Security
+
+### SEC-001: Add CSRF protection to webhook endpoints
+- **Status:** Open
+- **Description:** The webhook endpoint at `POST /api/content/webhooks/{endpoint}` accepts external requests but only validates via HMAC signature. If signature verification is skipped (when no secret is configured), the endpoint is vulnerable.
+- **File:** `Controllers/Api/ContentWebhookController.php:205-210`
+- **Fix:** Require signature verification always OR add explicit opt-in flag to disable it, with warning logs.
+- **Acceptance:** Webhooks without secrets must be explicitly enabled per-endpoint.
+
+### SEC-002: Sanitise HTML content before rendering
+- **Status:** Fixed
+- **Description:** `ContentItem::getSanitisedContent()` falls back to `strip_tags()` if HTMLPurifier is unavailable. This fallback is insufficient for XSS protection.
+- **File:** `Models/ContentItem.php:333-351`
+- **Fix:** Always require HTMLPurifier or a robust sanitiser. Add package dependency check in boot.
+- **Acceptance:** Content rendering always goes through proper XSS sanitisation.
+- **Resolution:** Created `Services/HtmlSanitiser.php` using HTMLPurifier as a required dependency. Added HTMLPurifier to composer.json require. Added boot-time validation that throws RuntimeException if dependency missing. Removed insecure strip_tags() fallback. Added comprehensive XSS prevention tests in `tests/Unit/HtmlSanitiserTest.php`.
+
+### SEC-003: Validate workspace access in MCP handlers
+- **Status:** Open
+- **Description:** MCP handlers check entitlements but workspace resolution via `orWhere('id', $slug)` could expose content across workspaces if numeric IDs are guessed.
+- **File:** `Mcp/Handlers/ContentCreateHandler.php:212-220`, `ContentSearchHandler.php:129-137`
+- **Fix:** Add explicit workspace ownership/membership check before returning data.
+- **Acceptance:** Users can only access content from workspaces they own or are members of.
+
+### SEC-004: Rate limit preview URL generation
+- **Status:** Open
+- **Description:** Preview token generation has no rate limiting. An attacker could enumerate valid content IDs by watching response times.
+- **File:** `Controllers/ContentPreviewController.php:26-49`
+- **Fix:** Add rate limiting to preview generation endpoint.
+- **Acceptance:** Preview generation limited to 30/minute per user.
+
+### SEC-005: Validate content_type enum in webhook payloads
+- **Status:** Open
+- **Description:** Webhook processing accepts arbitrary `content_type` strings from external sources without validation.
+- **File:** `Jobs/ProcessContentWebhook.php:288-289`
+- **Fix:** Validate against `ContentType` enum before assigning to model.
+- **Acceptance:** Invalid content types rejected with clear error message.
+
+---
+
+## P2 - High Priority
+
+### DX-001: Add missing type hints to scope methods
+- **Status:** Open
+- **Description:** Scope methods like `scopeForWorkspace`, `scopePublished` etc. use `$query` without `Builder` type hint.
+- **Files:** `Models/ContentItem.php:147-198`, `Models/ContentBrief.php:181-215`
+- **Fix:** Add `\Illuminate\Database\Eloquent\Builder` type hints.
+- **Acceptance:** All scope methods have proper return types.
+
+### DX-002: Document search service API response format
+- **Status:** Open
+- **Description:** `ContentSearchService::formatForApi()` returns a specific structure but it's not documented.
+- **File:** `Services/ContentSearchService.php:467-493`
+- **Fix:** Add PHPDoc with return type schema or create a Resource class.
+- **Acceptance:** API response format documented with example JSON.
+
+### TEST-001: Add integration tests for AI generation pipeline
+- **Status:** Open
+- **Description:** `AIGatewayService` has no tests. The two-stage Gemini+Claude pipeline is critical but untested.
+- **File:** `Services/AIGatewayService.php`
+- **Fix:** Add tests with mocked API responses for `generateDraft`, `refineDraft`, `generateAndRefine`.
+- **Acceptance:** 80%+ coverage on AIGatewayService with edge case tests.
+
+### TEST-002: Add tests for webhook signature verification
+- **Status:** Open
+- **Description:** `ContentWebhookEndpoint::verifySignature()` handles multiple formats but isn't fully tested.
+- **File:** `Models/ContentWebhookEndpoint.php:204-237`
+- **Fix:** Add unit tests for each signature format and grace period behaviour.
+- **Acceptance:** Tests cover: sha256= prefix, grace period rotation, empty signature handling.
+
+### PERF-001: Add database index for content search
+- **Status:** Open
+- **Description:** LIKE-based search on `content_html` has no fulltext index, causing table scans.
+- **File:** `Services/ContentSearchService.php:142-162`, `Migrations/0001_01_01_000001_create_content_tables.php`
+- **Fix:** Add MySQL fulltext index on title, excerpt, content_markdown columns OR document Meilisearch as required for production.
+- **Acceptance:** Search queries under 100ms for 10k+ content items.
+
+### PERF-002: Optimise revision pruning for large datasets
+- **Status:** Open
+- **Description:** `ContentRevision::pruneAll()` loads all content_item_ids into memory before iterating.
+- **File:** `Models/ContentRevision.php:595-609`
+- **Fix:** Use `chunk()` or cursor to process in batches.
+- **Acceptance:** Pruning handles 100k+ content items without memory issues.
+
+### BUG-001: Fix content_briefs migration schema mismatch
+- **Status:** Open
+- **Description:** Migration defines `content_briefs` with different columns than model fillable (e.g., `user_id` vs model relationships).
+- **File:** `Migrations/0001_01_01_000001_create_content_tables.php:215-238`, `Models/ContentBrief.php:49-75`
+- **Fix:** Align migration with actual model usage or add a migration to fix schema.
+- **Acceptance:** All ContentBrief columns are used and documented.
+
+### BUG-002: Fix ai_usage migration column naming
+- **Status:** Open
+- **Description:** Migration creates `feature` column but model uses `purpose`. Creates confusion.
+- **File:** `Migrations/0001_01_01_000001_create_content_tables.php:246`, `Models/AIUsage.php:46`
+- **Fix:** Add migration to rename column OR update model to use `feature`.
+- **Acceptance:** Column name matches model fillable property.
+
+---
+
+## P3 - Medium Priority
+
+### CODE-001: Extract webhook processing logic into service
+- **Status:** Open
+- **Description:** `ProcessContentWebhook` job contains 500+ lines of business logic that should be in a service.
+- **File:** `Jobs/ProcessContentWebhook.php`
+- **Fix:** Create `ContentWebhookProcessingService` with methods for each event type.
+- **Acceptance:** Job is under 100 lines, delegates to service.
+
+### CODE-002: Create ContentBriefResource for API responses
+- **Status:** Open
+- **Description:** Controllers manually format brief responses. A Resource class would ensure consistency.
+- **File:** `Controllers/Api/ContentBriefController.php` references `ContentBriefResource` which may not exist.
+- **Fix:** Create or verify `Resources/ContentBriefResource.php` exists with proper formatting.
+- **Acceptance:** All brief API responses use the Resource class.
+
+### CODE-003: Consolidate workspace resolution logic
+- **Status:** Open
+- **Description:** Three different `resolveWorkspace()` methods exist with similar but not identical logic.
+- **Files:** `Controllers/Api/ContentSearchController.php`, `Mcp/Handlers/*`, `Services/ContentRender.php`
+- **Fix:** Create trait or shared helper in core-tenant.
+- **Acceptance:** Single source of truth for workspace resolution.
+
+### TEST-003: Add tests for revision diff algorithm
+- **Status:** Open
+- **Description:** `ContentRevision::getDiff()` and LCS algorithm are complex but only lightly tested.
+- **File:** `Models/ContentRevision.php:233-509`
+- **Fix:** Add unit tests for edge cases: empty content, identical content, very long content.
+- **Acceptance:** Diff algorithm has 90%+ coverage with edge cases documented.
+
+### TEST-004: Add webhook retry service tests
+- **Status:** Open
+- **Description:** `WebhookRetryService` has retry logic with exponential backoff but no tests.
+- **File:** `Services/WebhookRetryService.php`
+- **Fix:** Add tests for retry scheduling, backoff intervals, exhaustion handling.
+- **Acceptance:** Full coverage of retry state transitions.
+
+### FEAT-001: Add content scheduling command
+- **Status:** Open
+- **Description:** `PublishScheduledContent` command is registered but implementation needs verification.
+- **File:** `Console/Commands/PublishScheduledContent.php`
+- **Fix:** Verify command works, add scheduler entry documentation.
+- **Acceptance:** Scheduled content publishes automatically at the correct time.
+
+### FEAT-002: Add media upload validation
+- **Status:** Open
+- **Description:** `ContentMediaController` store method should validate file types, sizes, dimensions.
+- **File:** `Controllers/Api/ContentMediaController.php`
+- **Fix:** Add comprehensive validation rules for media uploads.
+- **Acceptance:** Reject files over size limit, invalid types, malformed images.
+
+### FEAT-003: Add bulk operations for content items
+- **Status:** Open
+- **Description:** No bulk delete, bulk status change, or bulk category assignment endpoints.
+- **Files:** API routes, new controller methods needed
+- **Fix:** Add bulk endpoints with proper authorisation and rate limiting.
+- **Acceptance:** Can bulk-update up to 50 items per request.
+
+---
+
+## P4 - Low Priority
+
+### DX-003: Add IDE helper annotations to models
+- **Status:** Open
+- **Description:** Models lack `@property` annotations for dynamic attributes like `status_color`.
+- **Files:** All models in `Models/`
+- **Fix:** Add comprehensive `@property` PHPDoc blocks for all magic attributes.
+- **Acceptance:** IDE autocomplete works for all model properties.
+
+### DX-004: Document configuration options
+- **Status:** Open
+- **Description:** `config.php` has comments but no comprehensive documentation of all options and their effects.
+- **File:** `config.php`
+- **Fix:** Add CLAUDE.md section or dedicated config docs explaining each option.
+- **Acceptance:** Every config option documented with type, default, and example.
+
+### CODE-004: Remove deprecated WordPress-specific code paths
+- **Status:** Open
+- **Description:** Multiple methods have WordPress-specific handling that may be unused.
+- **Files:** `Models/ContentItem.php` (wp_id, wp_guid), various scopes
+- **Fix:** Audit usage, add deprecation notices if still needed, or remove.
+- **Acceptance:** Clear documentation of what is deprecated vs maintained.
+
+### CODE-005: Standardise error response format
+- **Status:** Open
+- **Description:** Error responses vary: `['error' => ...]`, `['message' => ...]`, different status codes.
+- **Files:** All controllers in `Controllers/Api/`
+- **Fix:** Use consistent error format: `{error: string, code: string, message: string}`.
+- **Acceptance:** All error responses follow documented schema.
+
+### PERF-003: Add eager loading hints to API responses
+- **Status:** Open
+- **Description:** Some API responses trigger N+1 queries for related data.
+- **Files:** `Controllers/Api/ContentBriefController.php:31-77`
+- **Fix:** Add `->with(['workspace', 'contentItem'])` where appropriate.
+- **Acceptance:** No N+1 queries in API responses (verified with debugbar).
+
+### TEST-005: Add factory states for all content statuses
+- **Status:** Open
+- **Description:** Factory states exist but may not cover all status/type combinations.
+- **Files:** `Database/Factories/*.php` (if they exist, or in test setup)
+- **Fix:** Ensure factories have states for: draft, publish, future, private, pending, trash.
+- **Acceptance:** Tests can easily create content in any status.
+
+---
+
+## P5 - Nice to Have
+
+### FEAT-004: Add content versioning comparison UI support
+- **Status:** Open
+- **Description:** `ContentRevision::getDiff()` returns data but no documented UI integration.
+- **File:** `Models/ContentRevision.php`
+- **Fix:** Document how to integrate diff data with frontend diff viewer.
+- **Acceptance:** Example Livewire component or documentation for diff display.
+
+### FEAT-005: Add webhook event deduplication
+- **Status:** Open
+- **Description:** Same webhook could be received multiple times (network retry). No dedup.
+- **File:** `Jobs/ProcessContentWebhook.php`
+- **Fix:** Add deduplication based on payload hash + timestamp window.
+- **Acceptance:** Duplicate webhooks within 5 minutes are skipped.
+
+### FEAT-006: Add content analytics tracking
+- **Status:** Open
+- **Description:** No tracking of content views, engagement, or performance metrics.
+- **Files:** New feature needed
+- **Fix:** Integrate with core-analytics or add simple view tracking.
+- **Acceptance:** Can see view counts and basic metrics per content item.
+
+### CODE-006: Add event dispatching for content lifecycle
+- **Status:** Open
+- **Description:** Content creation/update/publish doesn't dispatch domain events for other modules.
+- **Files:** `Models/ContentItem.php`, `Observers/ContentItemObserver.php`
+- **Fix:** Dispatch events like `ContentPublished`, `ContentUpdated` etc.
+- **Acceptance:** Other modules can listen for content events.
+
+### DOCS-001: Add API documentation
+- **Status:** Open
+- **Description:** API endpoints lack OpenAPI/Swagger documentation.
+- **Files:** `routes/api.php`
+- **Fix:** Add Scribe or OpenAPI annotations for all endpoints.
+- **Acceptance:** OpenAPI spec can be generated and used in API clients.
+
+---
+
+## P6 - Future/Backlog
+
+### FEAT-007: Add content workflow/approval system
+- **Status:** Backlog
+- **Description:** No formal review/approval workflow for content before publishing.
+- **Fix:** Add ContentWorkflow model with states and transitions.
+
+### FEAT-008: Add content localisation/translation support
+- **Status:** Backlog
+- **Description:** No i18n support for multilingual content.
+- **Fix:** Add locale field and translation linking to ContentItem.
+
+### FEAT-009: Add content A/B testing
+- **Status:** Backlog
+- **Description:** No ability to test content variations.
+- **Fix:** Add ContentVariant model for headline/content testing.
+
+### PERF-004: Add content caching layer
+- **Status:** Backlog
+- **Description:** CDN purge exists but no server-side caching strategy documented.
+- **Fix:** Document caching strategy, add Redis caching for hot content.
+
+### CODE-007: Extract prompts to database-driven system
+- **Status:** Backlog
+- **Description:** AI prompts are hardcoded in `AIGatewayService`. Prompts table exists but unused for this.
+- **File:** `Services/AIGatewayService.php:226-525`
+- **Fix:** Load prompts from database, allow admin editing.
+
+---
+
+## Completed
+
+### SEC-002: HTML sanitisation fallback vulnerability (2026-01-29)
+- Created `Services/HtmlSanitiser.php` using HTMLPurifier
+- Added `ezyang/htmlpurifier` as required dependency in composer.json
+- Updated `ContentItem::getSanitisedContent()` to use the new service
+- Added boot-time validation to throw exception if HTMLPurifier is missing
+- Removed insecure `strip_tags()` fallback that allowed XSS via event handlers
+- Added 30+ unit tests covering XSS attack vectors and safe HTML preservation
+
+---
+
+## Notes
+
+### Dependencies
+- Requires `core-php` for events and base infrastructure
+- Requires `core-tenant` for workspace and user models
+- Requires `ezyang/htmlpurifier` for XSS sanitisation (security-critical)
+- Optional: `core-agentic` for AI services (GeminiService, ClaudeService)
+- Optional: `core-mcp` for MCP tool registration
+
+### Testing
+Run tests with: `composer test` from package root.
+Run single test: `./vendor/bin/pest --filter=ContentSearchServiceTest`
+
+### Last Audit
+- **Date:** 2026-01-29
+- **By:** Claude Code (core-content audit)
+- **Files Reviewed:** ~70 PHP files
diff --git a/composer.json b/composer.json
index 2d22fa3..fe81244 100644
--- a/composer.json
+++ b/composer.json
@@ -5,7 +5,8 @@
"license": "EUPL-1.2",
"require": {
"php": "^8.2",
- "host-uk/core": "dev-main"
+ "host-uk/core": "dev-main",
+ "ezyang/htmlpurifier": "^4.17"
},
"require-dev": {
"laravel/pint": "^1.18",
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..405652d
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,422 @@
+---
+title: Architecture
+description: Technical architecture of the core-content package
+updated: 2026-01-29
+---
+
+# Architecture
+
+The `core-content` package provides headless CMS functionality for the Host UK platform. It handles content management, AI-powered generation, revision history, webhooks for external CMS integration, and search capabilities.
+
+## Package Overview
+
+**Namespace:** `Core\Mod\Content\`
+**Entry Point:** `Boot.php` (Laravel Service Provider)
+**Dependencies:**
+- `core-php` (Foundation framework, events)
+- `core-tenant` (Workspaces, users, entitlements)
+- Optional: `core-agentic` (AI services for content generation)
+- Optional: `core-mcp` (MCP tool handlers)
+
+## Directory Structure
+
+```
+core-content/
+├── Boot.php # Service provider with event listeners
+├── config.php # Package configuration
+├── Models/ # Eloquent models (10 models)
+├── Services/ # Business logic services
+├── Controllers/ # API and web controllers
+│ └── Api/ # REST API controllers
+├── Jobs/ # Queue jobs
+├── Mcp/ # MCP tool handlers
+│ └── Handlers/ # Individual MCP tools
+├── Concerns/ # Traits
+├── Console/ # Artisan commands
+│ └── Commands/ # Command implementations
+├── Enums/ # PHP enums
+├── Migrations/ # Database migrations
+├── Observers/ # Model observers
+├── routes/ # Route definitions
+├── View/ # Livewire components and Blade views
+│ ├── Modal/ # Livewire components
+│ └── Blade/ # Blade templates
+├── tests/ # Test suite
+└── docs/ # Documentation
+```
+
+## Core Concepts
+
+### Content Items
+
+The primary content model. Supports multiple content types and sources:
+
+```php
+// Content types (where content originates)
+enum ContentType: string {
+ case NATIVE = 'native'; // Created in Host Hub editor
+ case HOSTUK = 'hostuk'; // Alias for native (backwards compat)
+ case SATELLITE = 'satellite'; // Per-service content
+ case WORDPRESS = 'wordpress'; // Legacy synced content
+}
+```
+
+Content items belong to workspaces and have:
+- Title, slug, excerpt, content (HTML/Markdown/JSON)
+- Status (draft, publish, future, private, pending)
+- Author and last editor tracking
+- Revision history
+- Taxonomy (categories, tags)
+- SEO metadata
+- Preview tokens for sharing unpublished content
+- CDN cache invalidation tracking
+
+### Content Briefs
+
+Briefs drive AI-powered content generation. They define what content to create:
+
+```php
+// Brief content types (what to generate)
+enum BriefContentType: string {
+ case HELP_ARTICLE = 'help_article'; // Documentation
+ case BLOG_POST = 'blog_post'; // Blog articles
+ case LANDING_PAGE = 'landing_page'; // Marketing pages
+ case SOCIAL_POST = 'social_post'; // Social media
+}
+```
+
+Brief workflow: `pending` -> `queued` -> `generating` -> `review` -> `published`
+
+### Revisions
+
+Every content change creates an immutable revision snapshot. Revisions support:
+- Change type tracking (edit, autosave, restore, publish)
+- Word/character count tracking
+- Side-by-side diff comparison with LCS algorithm
+- Configurable retention policies (max count, max age)
+
+## Service Layer
+
+### AIGatewayService
+
+Orchestrates two-stage AI content generation:
+
+1. **Stage 1: Draft (Gemini)** - Fast, cost-effective initial generation
+2. **Stage 2: Refine (Claude)** - Quality refinement and brand voice alignment
+
+```php
+$gateway = app(AIGatewayService::class);
+
+// Two-stage pipeline
+$result = $gateway->generateAndRefine($brief);
+
+// Or individual stages
+$draft = $gateway->generateDraft($brief);
+$refined = $gateway->refineDraft($brief, $draftContent);
+
+// Direct Claude generation (skip Gemini)
+$content = $gateway->generateDirect($brief);
+```
+
+### ContentSearchService
+
+Full-text search with multiple backend support:
+
+```php
+// Backends (configured via CONTENT_SEARCH_BACKEND)
+const BACKEND_DATABASE = 'database'; // LIKE queries with relevance
+const BACKEND_SCOUT_DATABASE = 'scout_database'; // Laravel Scout
+const BACKEND_MEILISEARCH = 'meilisearch'; // Laravel Scout + Meilisearch
+```
+
+Features:
+- Relevance scoring (title > slug > excerpt > content)
+- Filters: type, status, category, tag, date range, content_type
+- Autocomplete suggestions
+- Re-indexing support for Scout backends
+
+### WebhookRetryService
+
+Handles failed webhook processing with exponential backoff:
+
+```
+Retry intervals: 1m, 5m, 15m, 1h, 4h
+Max retries: 5 (configurable per webhook)
+```
+
+### ContentRender
+
+Public-facing content renderer with caching:
+- Homepage, blog listing, post, page rendering
+- Cache TTL: 1 hour production, 1 minute development
+- Cache key sanitisation for special characters
+
+### CdnPurgeService
+
+CDN cache invalidation via Bunny CDN:
+- Triggered by ContentItemObserver on publish/update
+- URL-based and tag-based purging
+- Workspace-level cache clearing
+
+## Event-Driven Architecture
+
+The package uses the event-driven module loading pattern from `core-php`:
+
+```php
+class Boot extends ServiceProvider
+{
+ public static array $listens = [
+ WebRoutesRegistering::class => 'onWebRoutes',
+ ApiRoutesRegistering::class => 'onApiRoutes',
+ ConsoleBooting::class => 'onConsole',
+ McpToolsRegistering::class => 'onMcpTools',
+ ];
+}
+```
+
+Handlers register:
+- **Web Routes:** Public blog, help pages, content preview
+- **API Routes:** REST API for briefs, media, search, generation
+- **Console:** Artisan commands for scheduling, pruning
+- **MCP Tools:** AI agent content management tools
+
+## API Structure
+
+### Authenticated Endpoints (Session or API Key)
+
+```
+# Content Briefs
+GET /api/content/briefs # List briefs
+POST /api/content/briefs # Create brief
+GET /api/content/briefs/{id} # Get brief
+PUT /api/content/briefs/{id} # Update brief
+DELETE /api/content/briefs/{id} # Delete brief
+POST /api/content/briefs/bulk # Bulk create
+GET /api/content/briefs/next # Next ready for processing
+
+# AI Generation (rate limited: 10/min)
+POST /api/content/generate/draft # Generate draft (Gemini)
+POST /api/content/generate/refine # Refine draft (Claude)
+POST /api/content/generate/full # Full pipeline
+POST /api/content/generate/social # Social posts from content
+
+# Content Search (rate limited: 60/min)
+GET /api/content/search # Full-text search
+GET /api/content/search/suggest # Autocomplete
+GET /api/content/search/info # Backend info
+POST /api/content/search/reindex # Trigger re-index
+
+# Revisions
+GET /api/content/items/{id}/revisions # List revisions
+GET /api/content/revisions/{id} # Get revision
+POST /api/content/revisions/{id}/restore # Restore revision
+GET /api/content/revisions/{id}/compare/{other} # Compare
+
+# Preview
+POST /api/content/items/{id}/preview/generate # Generate preview link
+DELETE /api/content/items/{id}/preview/revoke # Revoke preview link
+```
+
+### Public Endpoints
+
+```
+# Webhooks (signature verified, no auth)
+POST /api/content/webhooks/{endpoint} # Receive external webhooks
+
+# Web Routes
+GET /blog # Blog listing
+GET /blog/{slug} # Blog post
+GET /help # Help centre
+GET /help/{slug} # Help article
+GET /content/preview/{id} # Preview content
+```
+
+## Rate Limiting
+
+Defined in `Boot::configureRateLimiting()`:
+
+| Limiter | Authenticated | Unauthenticated |
+|---------|---------------|-----------------|
+| `content-generate` | 10/min per user/workspace | 2/min per IP |
+| `content-briefs` | 30/min per user | 5/min per IP |
+| `content-webhooks` | 60/min per endpoint | 30/min per IP |
+| `content-search` | Configurable (default 60/min) | 20/min per IP |
+
+## MCP Tools
+
+Seven MCP tools for AI agent integration:
+
+| Tool | Description |
+|------|-------------|
+| `content_list` | List content items with filters |
+| `content_read` | Read content by ID or slug |
+| `content_search` | Full-text search |
+| `content_create` | Create new content |
+| `content_update` | Update existing content |
+| `content_delete` | Soft delete content |
+| `content_taxonomies` | List categories and tags |
+
+All tools:
+- Require workspace resolution
+- Check entitlements (`content.mcp_access`, `content.items`)
+- Log actions to MCP session
+- Return structured responses
+
+## Data Flow
+
+### Content Creation via MCP
+
+```
+Agent Request
+ ↓
+ContentCreateHandler::handle()
+ ↓
+resolveWorkspace() → Workspace model
+ ↓
+checkEntitlement() → EntitlementService
+ ↓
+ContentItem::create()
+ ↓
+createRevision() → ContentRevision
+ ↓
+recordUsage() → EntitlementService
+ ↓
+Response with content ID
+```
+
+### Webhook Processing
+
+```
+External CMS
+ ↓
+POST /api/content/webhooks/{endpoint}
+ ↓
+ContentWebhookController::receive()
+ ↓
+Verify signature → ContentWebhookEndpoint::verifySignature()
+ ↓
+Check type allowed → ContentWebhookEndpoint::isTypeAllowed()
+ ↓
+Create ContentWebhookLog
+ ↓
+Dispatch ProcessContentWebhook job
+ ↓
+Job::handle()
+ ↓
+Process based on event type (wordpress.*, cms.*, generic.*)
+ ↓
+Create/Update/Delete ContentItem
+ ↓
+Mark log completed
+```
+
+### AI Generation Pipeline
+
+```
+ContentBrief
+ ↓
+GenerateContentJob dispatched
+ ↓
+Stage 1: AIGatewayService::generateDraft()
+ ↓
+GeminiService::generate() → Draft content
+ ↓
+Brief::markDraftComplete()
+ ↓
+Stage 2: AIGatewayService::refineDraft()
+ ↓
+ClaudeService::generate() → Refined content
+ ↓
+Brief::markRefined()
+ ↓
+AIUsage records created for each stage
+```
+
+## Configuration
+
+Key settings in `config.php`:
+
+```php
+return [
+ 'generation' => [
+ 'default_timeout' => env('CONTENT_GENERATION_TIMEOUT', 300),
+ 'timeouts' => [
+ 'help_article' => 180,
+ 'blog_post' => 240,
+ 'landing_page' => 300,
+ 'social_post' => 60,
+ ],
+ 'max_retries' => 3,
+ 'backoff' => [30, 60, 120],
+ ],
+ 'revisions' => [
+ 'max_per_item' => env('CONTENT_MAX_REVISIONS', 50),
+ 'max_age_days' => 180,
+ 'preserve_published' => true,
+ ],
+ 'cache' => [
+ 'ttl' => env('CONTENT_CACHE_TTL', 3600),
+ 'prefix' => 'content:render',
+ ],
+ 'search' => [
+ 'backend' => env('CONTENT_SEARCH_BACKEND', 'database'),
+ 'min_query_length' => 2,
+ 'max_per_page' => 50,
+ 'default_per_page' => 20,
+ 'rate_limit' => 60,
+ ],
+];
+```
+
+## Database Schema
+
+### Primary Tables
+
+| Table | Purpose |
+|-------|---------|
+| `content_items` | Content storage (posts, pages) |
+| `content_revisions` | Version history |
+| `content_taxonomies` | Categories and tags |
+| `content_item_taxonomy` | Pivot table |
+| `content_media` | Media attachments |
+| `content_authors` | Author profiles |
+| `content_briefs` | AI generation briefs |
+| `content_tasks` | Scheduled content tasks |
+| `content_webhook_endpoints` | Webhook configurations |
+| `content_webhook_logs` | Webhook processing logs |
+| `ai_usage` | AI API usage tracking |
+| `prompts` | AI prompt templates |
+| `prompt_versions` | Prompt version history |
+
+### Key Indexes
+
+- `content_items`: Composite indexes on `(workspace_id, slug, type)`, `(workspace_id, status, type)`, `(workspace_id, status, content_type)`
+- `content_revisions`: Index on `(content_item_id, revision_number)`
+- `content_webhook_logs`: Index on `(workspace_id, status)`, `(status, created_at)`
+
+## Extension Points
+
+### Adding New Content Types
+
+1. Add value to `ContentType` enum
+2. Update `ContentType::isNative()` if applicable
+3. Add any type-specific scopes to `ContentItem`
+
+### Adding New AI Generation Types
+
+1. Add value to `BriefContentType` enum
+2. Add timeout to `config.php` generation.timeouts
+3. Add prompt in `AIGatewayService::getDraftSystemPrompt()`
+
+### Adding New Webhook Event Types
+
+1. Add to `ContentWebhookEndpoint::ALLOWED_TYPES`
+2. Add handler in `ProcessContentWebhook::processWordPress()` or `processCms()`
+3. Add event type mapping in `ContentWebhookController::normaliseEventType()`
+
+### Adding New MCP Tools
+
+1. Create handler in `Mcp/Handlers/` implementing `McpToolHandler`
+2. Define `schema()` with tool name, description, input schema
+3. Implement `handle()` with workspace resolution and entitlement checks
+4. Register in `Boot::onMcpTools()`
diff --git a/docs/security.md b/docs/security.md
new file mode 100644
index 0000000..0b3d220
--- /dev/null
+++ b/docs/security.md
@@ -0,0 +1,389 @@
+---
+title: Security
+description: Security considerations and audit notes for core-content
+updated: 2026-01-29
+---
+
+# Security
+
+This document covers security considerations, known risks, and recommended mitigations for the `core-content` package.
+
+## Authentication and Authorisation
+
+### API Authentication
+
+The content API supports two authentication methods:
+
+1. **Session Authentication** (`auth` middleware)
+ - For browser-based access
+ - CSRF protection via Laravel's standard middleware
+
+2. **API Key Authentication** (`api.auth` middleware)
+ - For programmatic access
+ - Keys prefixed with `hk_`
+ - Scope enforcement via `api.scope.enforce` middleware
+
+### Webhook Authentication
+
+Webhooks use HMAC signature verification instead of session/API key auth:
+
+```php
+// Signature verification in ContentWebhookEndpoint
+public function verifySignature(string $payload, ?string $signature): bool
+{
+ $expectedSignature = hash_hmac('sha256', $payload, $this->secret);
+ return hash_equals($expectedSignature, $signature);
+}
+```
+
+**Supported signature headers:**
+- `X-Signature`
+- `X-Hub-Signature-256` (GitHub format)
+- `X-WP-Webhook-Signature` (WordPress format)
+- `X-Content-Signature`
+- `Signature`
+
+### MCP Tool Authentication
+
+MCP tools authenticate via the MCP session context. Workspace access is verified through:
+- Workspace resolution (by slug or ID)
+- Entitlement checks (`content.mcp_access`, `content.items`)
+
+## Known Security Considerations
+
+### HIGH: HTML Sanitisation Fallback
+
+**Location:** `Models/ContentItem.php:333-351`
+
+**Issue:** The `getSanitisedContent()` method falls back to `strip_tags()` if HTMLPurifier is unavailable. This is insufficient for XSS protection.
+
+```php
+// Current fallback (insufficient)
+$allowedTags = '......';
+return strip_tags($content, $allowedTags);
+```
+
+**Risk:** XSS attacks via crafted HTML in content body.
+
+**Mitigation:**
+1. Ensure HTMLPurifier is installed in production
+2. Add package check in boot to fail loudly if missing
+3. Consider using `voku/anti-xss` as a lighter alternative
+
+### HIGH: Webhook Signature Optional
+
+**Location:** `Models/ContentWebhookEndpoint.php:205-210`
+
+**Issue:** When no secret is configured, signature verification is skipped:
+
+```php
+if (empty($this->secret)) {
+ return true; // Accepts all requests
+}
+```
+
+**Risk:** Unauthenticated webhook injection if endpoint has no secret.
+
+**Mitigation:**
+1. Require secrets for all production endpoints
+2. Add explicit `allow_unsigned` flag if intentional
+3. Log warning when unsigned webhooks are accepted
+4. Rate limit unsigned endpoints more aggressively
+
+### MEDIUM: Workspace Access in MCP Handlers
+
+**Location:** `Mcp/Handlers/*.php`
+
+**Issue:** Workspace resolution allows lookup by ID:
+
+```php
+return Workspace::where('slug', $slug)
+ ->orWhere('id', $slug)
+ ->first();
+```
+
+**Risk:** If an attacker knows a workspace ID, they could potentially access content without being a workspace member.
+
+**Mitigation:**
+1. Always verify workspace membership after resolution
+2. Use entitlement checks (already present but verify coverage)
+3. Consider removing ID-based lookup for MCP
+
+### MEDIUM: Preview Token Enumeration
+
+**Location:** `Controllers/ContentPreviewController.php`
+
+**Issue:** No rate limiting on preview token generation endpoint. An attacker could probe for valid content IDs.
+
+**Mitigation:**
+1. Add rate limiting (30/min per user)
+2. Use constant-time responses regardless of content existence
+3. Consider using UUIDs instead of sequential IDs for preview URLs
+
+### LOW: Webhook Payload Content Types
+
+**Location:** `Jobs/ProcessContentWebhook.php:288-289`
+
+**Issue:** Content type from external webhook is assigned directly:
+
+```php
+$contentItem->content_type = ContentType::NATIVE;
+```
+
+**Risk:** External systems could potentially inject invalid content types.
+
+**Mitigation:**
+1. Validate against `ContentType` enum
+2. Default to a safe type if validation fails
+3. Log invalid types for monitoring
+
+## Input Validation
+
+### API Request Validation
+
+All API controllers use Laravel's validation:
+
+```php
+$validated = $request->validate([
+ 'q' => 'required|string|min:2|max:500',
+ 'type' => 'nullable|string|in:post,page',
+ 'status' => 'nullable',
+ // ...
+]);
+```
+
+**Validated inputs:**
+- Search queries (min/max length, string type)
+- Content types (enum validation)
+- Pagination (min/max values)
+- Date ranges (date format, logical order)
+
+### MCP Input Validation
+
+MCP handlers validate via JSON schema:
+
+```php
+'inputSchema' => [
+ 'type' => 'object',
+ 'properties' => [
+ 'workspace' => ['type' => 'string'],
+ 'title' => ['type' => 'string'],
+ 'type' => ['type' => 'string', 'enum' => ['post', 'page']],
+ ],
+ 'required' => ['workspace', 'title'],
+]
+```
+
+### Webhook Payload Validation
+
+Webhook payloads undergo:
+- JSON decode validation
+- Event type normalisation
+- Content ID extraction with fallbacks
+
+**Note:** Payload content is stored in JSON column without full validation. Processing logic handles missing/invalid fields gracefully.
+
+## Rate Limiting
+
+### Configured Limiters
+
+| Endpoint | Auth | Unauthenticated | Key |
+|----------|------|-----------------|-----|
+| AI Generation | 10/min | 2/min | `content-generate` |
+| Brief Creation | 30/min | 5/min | `content-briefs` |
+| Webhooks | 60/min | 30/min | `content-webhooks` |
+| Search | 60/min | 20/min | `content-search` |
+
+### Rate Limit Bypass Risks
+
+1. **IP Spoofing:** Ensure `X-Forwarded-For` handling is configured correctly
+2. **Workspace Switching:** Workspace-based limits should use user ID as fallback
+3. **API Key Sharing:** Each key should have independent limits
+
+## Data Protection
+
+### Sensitive Data Handling
+
+**Encrypted at rest:**
+- `ContentWebhookEndpoint.secret` (cast to `encrypted`)
+- `ContentWebhookEndpoint.previous_secret` (cast to `encrypted`)
+
+**Hidden from serialisation:**
+- Webhook secrets (via `$hidden` property)
+
+### PII Considerations
+
+Content may contain PII in:
+- Article body content
+- Author information
+- Webhook payloads
+
+**Recommendations:**
+1. Implement content retention policies
+2. Add GDPR data export/deletion support
+3. Log access to PII-containing content
+
+## Webhook Security
+
+### Circuit Breaker
+
+Endpoints automatically disable after 10 consecutive failures:
+
+```php
+const MAX_FAILURES = 10;
+
+public function incrementFailureCount(): void
+{
+ $this->increment('failure_count');
+ if ($this->failure_count >= self::MAX_FAILURES) {
+ $this->update(['is_enabled' => false]);
+ }
+}
+```
+
+### Secret Rotation
+
+Grace period support for secret rotation:
+
+```php
+public function isInGracePeriod(): bool
+{
+ // Accepts both current and previous secret during grace
+}
+```
+
+Default grace period: 24 hours
+
+### Allowed Event Types
+
+Endpoints can restrict which event types they accept:
+
+```php
+const ALLOWED_TYPES = [
+ 'wordpress.post_created',
+ 'wordpress.post_updated',
+ // ...
+ 'generic.payload',
+];
+```
+
+Wildcard support: `wordpress.*` matches all WordPress events.
+
+## Content Security
+
+### XSS Prevention
+
+1. **Input:** Content stored as-is to preserve formatting
+2. **Output:** `getSanitisedContent()` for public rendering
+3. **Admin:** Trusted content displayed with proper escaping
+
+**Blade template guidelines:**
+- Use `{{ $title }}` for plain text (auto-escaped)
+- Use `{!! $content !!}` only for sanitised HTML
+- Comments document which fields need which treatment
+
+### SQL Injection
+
+All database queries use:
+- Eloquent ORM (parameterised queries)
+- Query builder with bindings
+- No raw SQL with user input
+
+### CSRF Protection
+
+Web routes include CSRF middleware automatically. API routes exempt (use API key auth).
+
+## Audit Logging
+
+### Logged Events
+
+- Webhook receipt and processing
+- AI generation requests and results
+- Content creation/update/deletion via MCP
+- CDN cache purges
+- Authentication failures
+
+### Log Levels
+
+| Event | Level |
+|-------|-------|
+| Webhook signature failure | WARNING |
+| Circuit breaker triggered | WARNING |
+| Processing failure | ERROR |
+| Successful operations | INFO |
+| Skipped operations | DEBUG |
+
+## Recommendations
+
+### Immediate (P1)
+
+1. [ ] Require HTMLPurifier or equivalent in production
+2. [ ] Make webhook signature verification mandatory
+3. [ ] Add rate limiting to preview generation
+4. [ ] Validate content_type from webhook payloads
+
+### Short-term (P2)
+
+1. [ ] Add comprehensive audit logging
+2. [ ] Implement content access logging
+3. [ ] Add IP allowlisting option for webhooks
+4. [ ] Create security-focused test suite
+
+### Long-term (P3+)
+
+1. [ ] Implement content encryption at rest option
+2. [ ] Add GDPR compliance features
+3. [ ] Create security monitoring dashboard
+4. [ ] Add anomaly detection for webhook patterns
+
+## Security Testing
+
+### Manual Testing Checklist
+
+```
+[ ] Verify webhook signature rejection with invalid signature
+[ ] Test rate limiting enforcement
+[ ] Confirm XSS payloads are sanitised
+[ ] Verify workspace isolation in API responses
+[ ] Test preview token expiration
+[ ] Verify CSRF protection on web routes
+[ ] Test SQL injection attempts in search
+[ ] Verify file type validation on media uploads
+```
+
+### Automated Testing
+
+```bash
+# Run security-focused tests
+./vendor/bin/pest --filter=Security
+
+# Check for common vulnerabilities
+./vendor/bin/pint --test # Code style (includes some security patterns)
+```
+
+## Incident Response
+
+### Webhook Compromise
+
+1. Disable affected endpoint
+2. Rotate all secrets
+3. Review webhook logs for suspicious patterns
+4. Regenerate secrets for all endpoints
+
+### Content Injection
+
+1. Identify affected content items
+2. Restore from revision history
+3. Review webhook source
+4. Add additional validation
+
+### API Key Leak
+
+1. Revoke compromised key
+2. Review access logs
+3. Generate new key with reduced scope
+4. Monitor for unauthorised access
+
+## Contact
+
+Security issues should be reported to the security team. Do not create public issues for security vulnerabilities.
diff --git a/tests/Unit/HtmlSanitiserTest.php b/tests/Unit/HtmlSanitiserTest.php
new file mode 100644
index 0000000..1b0aa78
--- /dev/null
+++ b/tests/Unit/HtmlSanitiserTest.php
@@ -0,0 +1,322 @@
+sanitiser = new HtmlSanitiser;
+ }
+
+ // -------------------------------------------------------------------------
+ // XSS Attack Prevention Tests
+ // -------------------------------------------------------------------------
+
+ public function test_removes_script_tags(): void
+ {
+ $malicious = 'Hello
World
';
+ $result = $this->sanitiser->sanitise($malicious);
+
+ $this->assertStringNotContainsString('">Click ';
+ $result = $this->sanitiser->sanitise($malicious);
+
+ $this->assertStringNotContainsString('data:text/html', $result);
+ $this->assertStringNotContainsString('">';
+ $result = $this->sanitiser->sanitise($malicious);
+
+ $this->assertStringNotContainsString('assertStringNotContainsString('