fix(security): require HTMLPurifier for XSS sanitisation

The previous getSanitisedContent() method fell back to strip_tags() when HTMLPurifier was unavailable. This fallback was insecure as strip_tags() does not sanitise attributes, allowing XSS via onclick, onerror, and javascript: URLs. Changes: - Created Services/HtmlSanitiser.php using HTMLPurifier as the sole sanitiser - Added ezyang/htmlpurifier as a required dependency in composer.json - Added boot-time validation that throws RuntimeException if missing - Removed insecure strip_tags() fallback from ContentItem model - Added 30+ unit tests covering XSS attack vectors Closes SEC-002 from TODO.md Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 12:34:35 +00:00 · 2026-01-29 12:34:35 +00:00 · fa4893d064
commit fa4893d064
parent 5c92e92a29
8 changed files with 1631 additions and 10 deletions
--- a/Boot.php
+++ b/Boot.php
@ -12,6 +12,8 @@ use Illuminate\Cache\RateLimiting\Limit;
 use Illuminate\Http\Request;
 use Illuminate\Support\Facades\RateLimiter;
 use Illuminate\Support\ServiceProvider;
+use Core\Mod\Content\Services\HtmlSanitiser;
+use RuntimeException;

 /**
 * Content Module Boot
@ -38,12 +40,31 @@ class Boot extends ServiceProvider
    public function register(): void
    {
        $this->mergeConfigFrom(__DIR__.'/config.php', 'content');
+
+        // Register HtmlSanitiser as a singleton for performance
+        $this->app->singleton(HtmlSanitiser::class);
    }

    public function boot(): void
    {
        $this->loadMigrationsFrom(__DIR__.'/Migrations');
        $this->configureRateLimiting();
+        $this->validateSecurityDependencies();
+    }
+
+    /**
+     * Validate that security-critical dependencies are available.
+     *
+     * @throws RuntimeException If HTMLPurifier is not installed
+     */
+    protected function validateSecurityDependencies(): void
+    {
+        if (! HtmlSanitiser::isAvailable()) {
+            throw new RuntimeException(
+                'core-content requires HTMLPurifier for secure HTML sanitisation. '.
+                'Install it with: composer require ezyang/htmlpurifier'
+            );
+        }
    }

    /**
--- a/Models/ContentItem.php
+++ b/Models/ContentItem.php
@ -16,6 +16,7 @@ use Illuminate\Database\Eloquent\Relations\HasMany;
 use Illuminate\Database\Eloquent\SoftDeletes;
 use Core\Mod\Content\Enums\ContentType;
 use Core\Mod\Content\Observers\ContentItemObserver;
+use Core\Mod\Content\Services\HtmlSanitiser;

 #[ObservedBy([ContentItemObserver::class])]
 class ContentItem extends Model
@ -330,6 +331,10 @@ class ContentItem extends Model
     *
     * Uses HTMLPurifier to remove XSS vectors while preserving
     * safe HTML elements like paragraphs, headings, lists, etc.
+     *
+     * SECURITY: This method uses HTMLPurifier which is a required dependency.
+     * Never fall back to strip_tags() as it does not sanitise attributes
+     * (e.g., onclick, onerror) which can still execute JavaScript.
     */
    public function getSanitisedContent(): string
    {
@ -339,15 +344,7 @@ class ContentItem extends Model
            return '';
        }

-        // Use the StaticPageSanitiser if available
-        if (class_exists(\Mod\Bio\Services\StaticPageSanitiser::class)) {
-            return app(\Mod\Bio\Services\StaticPageSanitiser::class)->sanitiseHtml($content);
-        }
-
-        // Fallback: basic sanitisation using strip_tags with allowed tags
-        $allowedTags = '<p><br><strong><b><em><i><u><h1><h2><h3><h4><h5><h6><ul><ol><li><a><blockquote><pre><code><img><table><thead><tbody><tr><th><td><div><span><hr>';
-
-        return strip_tags($content, $allowedTags);
+        return app(HtmlSanitiser::class)->sanitise($content);
    }

    /**
--- a/Services/HtmlSanitiser.php
+++ b/Services/HtmlSanitiser.php
@ -0,0 +1,151 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Core\Mod\Content\Services;
+
+use HTMLPurifier;
+use HTMLPurifier_Config;
+use RuntimeException;
+
+/**
+ * HTML sanitiser for content rendering.
+ *
+ * Uses HTMLPurifier to remove XSS vectors while preserving safe HTML elements.
+ * This is a security-critical service - all user-generated HTML content must
+ * be sanitised before rendering.
+ *
+ * @see https://htmlpurifier.org/
+ */
+class HtmlSanitiser
+{
+    private HTMLPurifier $purifier;
+
+    /**
+     * Create a new HTML sanitiser instance.
+     *
+     * @throws RuntimeException If HTMLPurifier is not installed
+     */
+    public function __construct()
+    {
+        if (! class_exists(HTMLPurifier::class)) {
+            throw new RuntimeException(
+                'HTMLPurifier is required for HTML sanitisation. '.
+                'Install it with: composer require ezyang/htmlpurifier'
+            );
+        }
+
+        $config = HTMLPurifier_Config::createDefault();
+
+        // Allow a safe set of HTML5 elements for content rendering
+        $config->set('HTML.Allowed', implode(',', [
+            // Structure
+            'div[id|class]',
+            'span[id|class]',
+            'section[id|class]',
+            'article[id|class]',
+
+            // Text
+            'h1[id|class]',
+            'h2[id|class]',
+            'h3[id|class]',
+            'h4[id|class]',
+            'h5[id|class]',
+            'h6[id|class]',
+            'p[id|class]',
+            'br',
+            'hr[id|class]',
+            'strong',
+            'em',
+            'b',
+            'i',
+            'u',
+            'small',
+            'mark',
+            'del',
+            'ins',
+            'sub',
+            'sup',
+            'code',
+            'pre[id|class]',
+            'blockquote[id|class]',
+
+            // Lists
+            'ul[id|class]',
+            'ol[id|class]',
+            'li[id|class]',
+
+            // Links and media
+            'a[href|id|class|target|rel]',
+            'img[src|alt|width|height|id|class]',
+            'figure[id|class]',
+            'figcaption[id|class]',
+
+            // Tables
+            'table[id|class]',
+            'thead[id|class]',
+            'tbody[id|class]',
+            'tr[id|class]',
+            'th[id|class|colspan|rowspan]',
+            'td[id|class|colspan|rowspan]',
+        ]));
+
+        // Safe link targets
+        $config->set('Attr.AllowedFrameTargets', ['_blank', '_self']);
+
+        // Add rel="noopener" to external links for security
+        $config->set('HTML.Nofollow', true);
+        $config->set('HTML.TargetNoopener', true);
+
+        // Disable cache in development, enable via config in production
+        $cacheDir = config('content.purifier_cache_dir');
+        if ($cacheDir && is_dir($cacheDir) && is_writable($cacheDir)) {
+            $config->set('Cache.SerializerPath', $cacheDir);
+        } else {
+            $config->set('Cache.DefinitionImpl', null);
+        }
+
+        // Safe URI schemes only
+        $config->set('URI.AllowedSchemes', [
+            'http' => true,
+            'https' => true,
+            'mailto' => true,
+            'tel' => true,
+        ]);
+
+        // Do not allow data: URIs (can contain XSS)
+        $config->set('URI.DisableExternalResources', false);
+        $config->set('URI.DisableResources', false);
+
+        $this->purifier = new HTMLPurifier($config);
+    }
+
+    /**
+     * Sanitise HTML content to prevent XSS attacks.
+     *
+     * This method removes dangerous HTML, JavaScript, and CSS while preserving
+     * safe formatting elements. Always use this before rendering user content.
+     *
+     * @param  string  $html  The raw HTML content to sanitise
+     * @return string The sanitised HTML, safe for rendering
+     */
+    public function sanitise(string $html): string
+    {
+        if (empty($html)) {
+            return '';
+        }
+
+        return $this->purifier->purify($html);
+    }
+
+    /**
+     * Check if HTMLPurifier is available.
+     *
+     * Use this method to verify the dependency is installed before attempting
+     * to create a sanitiser instance.
+     */
+    public static function isAvailable(): bool
+    {
+        return class_exists(HTMLPurifier::class);
+    }
+}
--- a/TODO.md
+++ b/TODO.md
@ -0,0 +1,318 @@
+# TODO - core-content
+
+Production quality improvements for the Content Module.
+
+**Legend:**
+- P1: Critical/Security - Must fix immediately
+- P2: High priority - Fix soon
+- P3: Medium priority - Important improvements
+- P4: Low priority - Nice to have
+- P5: Nice-to-have - When time permits
+- P6+: Future/Backlog - Long-term improvements
+
+---
+
+## P1 - Critical/Security
+
+### SEC-001: Add CSRF protection to webhook endpoints
+- **Status:** Open
+- **Description:** The webhook endpoint at `POST /api/content/webhooks/{endpoint}` accepts external requests but only validates via HMAC signature. If signature verification is skipped (when no secret is configured), the endpoint is vulnerable.
+- **File:** `Controllers/Api/ContentWebhookController.php:205-210`
+- **Fix:** Require signature verification always OR add explicit opt-in flag to disable it, with warning logs.
+- **Acceptance:** Webhooks without secrets must be explicitly enabled per-endpoint.
+
+### SEC-002: Sanitise HTML content before rendering
+- **Status:** Fixed
+- **Description:** `ContentItem::getSanitisedContent()` falls back to `strip_tags()` if HTMLPurifier is unavailable. This fallback is insufficient for XSS protection.
+- **File:** `Models/ContentItem.php:333-351`
+- **Fix:** Always require HTMLPurifier or a robust sanitiser. Add package dependency check in boot.
+- **Acceptance:** Content rendering always goes through proper XSS sanitisation.
+- **Resolution:** Created `Services/HtmlSanitiser.php` using HTMLPurifier as a required dependency. Added HTMLPurifier to composer.json require. Added boot-time validation that throws RuntimeException if dependency missing. Removed insecure strip_tags() fallback. Added comprehensive XSS prevention tests in `tests/Unit/HtmlSanitiserTest.php`.
+
+### SEC-003: Validate workspace access in MCP handlers
+- **Status:** Open
+- **Description:** MCP handlers check entitlements but workspace resolution via `orWhere('id', $slug)` could expose content across workspaces if numeric IDs are guessed.
+- **File:** `Mcp/Handlers/ContentCreateHandler.php:212-220`, `ContentSearchHandler.php:129-137`
+- **Fix:** Add explicit workspace ownership/membership check before returning data.
+- **Acceptance:** Users can only access content from workspaces they own or are members of.
+
+### SEC-004: Rate limit preview URL generation
+- **Status:** Open
+- **Description:** Preview token generation has no rate limiting. An attacker could enumerate valid content IDs by watching response times.
+- **File:** `Controllers/ContentPreviewController.php:26-49`
+- **Fix:** Add rate limiting to preview generation endpoint.
+- **Acceptance:** Preview generation limited to 30/minute per user.
+
+### SEC-005: Validate content_type enum in webhook payloads
+- **Status:** Open
+- **Description:** Webhook processing accepts arbitrary `content_type` strings from external sources without validation.
+- **File:** `Jobs/ProcessContentWebhook.php:288-289`
+- **Fix:** Validate against `ContentType` enum before assigning to model.
+- **Acceptance:** Invalid content types rejected with clear error message.
+
+---
+
+## P2 - High Priority
+
+### DX-001: Add missing type hints to scope methods
+- **Status:** Open
+- **Description:** Scope methods like `scopeForWorkspace`, `scopePublished` etc. use `$query` without `Builder` type hint.
+- **Files:** `Models/ContentItem.php:147-198`, `Models/ContentBrief.php:181-215`
+- **Fix:** Add `\Illuminate\Database\Eloquent\Builder` type hints.
+- **Acceptance:** All scope methods have proper return types.
+
+### DX-002: Document search service API response format
+- **Status:** Open
+- **Description:** `ContentSearchService::formatForApi()` returns a specific structure but it's not documented.
+- **File:** `Services/ContentSearchService.php:467-493`
+- **Fix:** Add PHPDoc with return type schema or create a Resource class.
+- **Acceptance:** API response format documented with example JSON.
+
+### TEST-001: Add integration tests for AI generation pipeline
+- **Status:** Open
+- **Description:** `AIGatewayService` has no tests. The two-stage Gemini+Claude pipeline is critical but untested.
+- **File:** `Services/AIGatewayService.php`
+- **Fix:** Add tests with mocked API responses for `generateDraft`, `refineDraft`, `generateAndRefine`.
+- **Acceptance:** 80%+ coverage on AIGatewayService with edge case tests.
+
+### TEST-002: Add tests for webhook signature verification
+- **Status:** Open
+- **Description:** `ContentWebhookEndpoint::verifySignature()` handles multiple formats but isn't fully tested.
+- **File:** `Models/ContentWebhookEndpoint.php:204-237`
+- **Fix:** Add unit tests for each signature format and grace period behaviour.
+- **Acceptance:** Tests cover: sha256= prefix, grace period rotation, empty signature handling.
+
+### PERF-001: Add database index for content search
+- **Status:** Open
+- **Description:** LIKE-based search on `content_html` has no fulltext index, causing table scans.
+- **File:** `Services/ContentSearchService.php:142-162`, `Migrations/0001_01_01_000001_create_content_tables.php`
+- **Fix:** Add MySQL fulltext index on title, excerpt, content_markdown columns OR document Meilisearch as required for production.
+- **Acceptance:** Search queries under 100ms for 10k+ content items.
+
+### PERF-002: Optimise revision pruning for large datasets
+- **Status:** Open
+- **Description:** `ContentRevision::pruneAll()` loads all content_item_ids into memory before iterating.
+- **File:** `Models/ContentRevision.php:595-609`
+- **Fix:** Use `chunk()` or cursor to process in batches.
+- **Acceptance:** Pruning handles 100k+ content items without memory issues.
+
+### BUG-001: Fix content_briefs migration schema mismatch
+- **Status:** Open
+- **Description:** Migration defines `content_briefs` with different columns than model fillable (e.g., `user_id` vs model relationships).
+- **File:** `Migrations/0001_01_01_000001_create_content_tables.php:215-238`, `Models/ContentBrief.php:49-75`
+- **Fix:** Align migration with actual model usage or add a migration to fix schema.
+- **Acceptance:** All ContentBrief columns are used and documented.
+
+### BUG-002: Fix ai_usage migration column naming
+- **Status:** Open
+- **Description:** Migration creates `feature` column but model uses `purpose`. Creates confusion.
+- **File:** `Migrations/0001_01_01_000001_create_content_tables.php:246`, `Models/AIUsage.php:46`
+- **Fix:** Add migration to rename column OR update model to use `feature`.
+- **Acceptance:** Column name matches model fillable property.
+
+---
+
+## P3 - Medium Priority
+
+### CODE-001: Extract webhook processing logic into service
+- **Status:** Open
+- **Description:** `ProcessContentWebhook` job contains 500+ lines of business logic that should be in a service.
+- **File:** `Jobs/ProcessContentWebhook.php`
+- **Fix:** Create `ContentWebhookProcessingService` with methods for each event type.
+- **Acceptance:** Job is under 100 lines, delegates to service.
+
+### CODE-002: Create ContentBriefResource for API responses
+- **Status:** Open
+- **Description:** Controllers manually format brief responses. A Resource class would ensure consistency.
+- **File:** `Controllers/Api/ContentBriefController.php` references `ContentBriefResource` which may not exist.
+- **Fix:** Create or verify `Resources/ContentBriefResource.php` exists with proper formatting.
+- **Acceptance:** All brief API responses use the Resource class.
+
+### CODE-003: Consolidate workspace resolution logic
+- **Status:** Open
+- **Description:** Three different `resolveWorkspace()` methods exist with similar but not identical logic.
+- **Files:** `Controllers/Api/ContentSearchController.php`, `Mcp/Handlers/*`, `Services/ContentRender.php`
+- **Fix:** Create trait or shared helper in core-tenant.
+- **Acceptance:** Single source of truth for workspace resolution.
+
+### TEST-003: Add tests for revision diff algorithm
+- **Status:** Open
+- **Description:** `ContentRevision::getDiff()` and LCS algorithm are complex but only lightly tested.
+- **File:** `Models/ContentRevision.php:233-509`
+- **Fix:** Add unit tests for edge cases: empty content, identical content, very long content.
+- **Acceptance:** Diff algorithm has 90%+ coverage with edge cases documented.
+
+### TEST-004: Add webhook retry service tests
+- **Status:** Open
+- **Description:** `WebhookRetryService` has retry logic with exponential backoff but no tests.
+- **File:** `Services/WebhookRetryService.php`
+- **Fix:** Add tests for retry scheduling, backoff intervals, exhaustion handling.
+- **Acceptance:** Full coverage of retry state transitions.
+
+### FEAT-001: Add content scheduling command
+- **Status:** Open
+- **Description:** `PublishScheduledContent` command is registered but implementation needs verification.
+- **File:** `Console/Commands/PublishScheduledContent.php`
+- **Fix:** Verify command works, add scheduler entry documentation.
+- **Acceptance:** Scheduled content publishes automatically at the correct time.
+
+### FEAT-002: Add media upload validation
+- **Status:** Open
+- **Description:** `ContentMediaController` store method should validate file types, sizes, dimensions.
+- **File:** `Controllers/Api/ContentMediaController.php`
+- **Fix:** Add comprehensive validation rules for media uploads.
+- **Acceptance:** Reject files over size limit, invalid types, malformed images.
+
+### FEAT-003: Add bulk operations for content items
+- **Status:** Open
+- **Description:** No bulk delete, bulk status change, or bulk category assignment endpoints.
+- **Files:** API routes, new controller methods needed
+- **Fix:** Add bulk endpoints with proper authorisation and rate limiting.
+- **Acceptance:** Can bulk-update up to 50 items per request.
+
+---
+
+## P4 - Low Priority
+
+### DX-003: Add IDE helper annotations to models
+- **Status:** Open
+- **Description:** Models lack `@property` annotations for dynamic attributes like `status_color`.
+- **Files:** All models in `Models/`
+- **Fix:** Add comprehensive `@property` PHPDoc blocks for all magic attributes.
+- **Acceptance:** IDE autocomplete works for all model properties.
+
+### DX-004: Document configuration options
+- **Status:** Open
+- **Description:** `config.php` has comments but no comprehensive documentation of all options and their effects.
+- **File:** `config.php`
+- **Fix:** Add CLAUDE.md section or dedicated config docs explaining each option.
+- **Acceptance:** Every config option documented with type, default, and example.
+
+### CODE-004: Remove deprecated WordPress-specific code paths
+- **Status:** Open
+- **Description:** Multiple methods have WordPress-specific handling that may be unused.
+- **Files:** `Models/ContentItem.php` (wp_id, wp_guid), various scopes
+- **Fix:** Audit usage, add deprecation notices if still needed, or remove.
+- **Acceptance:** Clear documentation of what is deprecated vs maintained.
+
+### CODE-005: Standardise error response format
+- **Status:** Open
+- **Description:** Error responses vary: `['error' => ...]`, `['message' => ...]`, different status codes.
+- **Files:** All controllers in `Controllers/Api/`
+- **Fix:** Use consistent error format: `{error: string, code: string, message: string}`.
+- **Acceptance:** All error responses follow documented schema.
+
+### PERF-003: Add eager loading hints to API responses
+- **Status:** Open
+- **Description:** Some API responses trigger N+1 queries for related data.
+- **Files:** `Controllers/Api/ContentBriefController.php:31-77`
+- **Fix:** Add `->with(['workspace', 'contentItem'])` where appropriate.
+- **Acceptance:** No N+1 queries in API responses (verified with debugbar).
+
+### TEST-005: Add factory states for all content statuses
+- **Status:** Open
+- **Description:** Factory states exist but may not cover all status/type combinations.
+- **Files:** `Database/Factories/*.php` (if they exist, or in test setup)
+- **Fix:** Ensure factories have states for: draft, publish, future, private, pending, trash.
+- **Acceptance:** Tests can easily create content in any status.
+
+---
+
+## P5 - Nice to Have
+
+### FEAT-004: Add content versioning comparison UI support
+- **Status:** Open
+- **Description:** `ContentRevision::getDiff()` returns data but no documented UI integration.
+- **File:** `Models/ContentRevision.php`
+- **Fix:** Document how to integrate diff data with frontend diff viewer.
+- **Acceptance:** Example Livewire component or documentation for diff display.
+
+### FEAT-005: Add webhook event deduplication
+- **Status:** Open
+- **Description:** Same webhook could be received multiple times (network retry). No dedup.
+- **File:** `Jobs/ProcessContentWebhook.php`
+- **Fix:** Add deduplication based on payload hash + timestamp window.
+- **Acceptance:** Duplicate webhooks within 5 minutes are skipped.
+
+### FEAT-006: Add content analytics tracking
+- **Status:** Open
+- **Description:** No tracking of content views, engagement, or performance metrics.
+- **Files:** New feature needed
+- **Fix:** Integrate with core-analytics or add simple view tracking.
+- **Acceptance:** Can see view counts and basic metrics per content item.
+
+### CODE-006: Add event dispatching for content lifecycle
+- **Status:** Open
+- **Description:** Content creation/update/publish doesn't dispatch domain events for other modules.
+- **Files:** `Models/ContentItem.php`, `Observers/ContentItemObserver.php`
+- **Fix:** Dispatch events like `ContentPublished`, `ContentUpdated` etc.
+- **Acceptance:** Other modules can listen for content events.
+
+### DOCS-001: Add API documentation
+- **Status:** Open
+- **Description:** API endpoints lack OpenAPI/Swagger documentation.
+- **Files:** `routes/api.php`
+- **Fix:** Add Scribe or OpenAPI annotations for all endpoints.
+- **Acceptance:** OpenAPI spec can be generated and used in API clients.
+
+---
+
+## P6 - Future/Backlog
+
+### FEAT-007: Add content workflow/approval system
+- **Status:** Backlog
+- **Description:** No formal review/approval workflow for content before publishing.
+- **Fix:** Add ContentWorkflow model with states and transitions.
+
+### FEAT-008: Add content localisation/translation support
+- **Status:** Backlog
+- **Description:** No i18n support for multilingual content.
+- **Fix:** Add locale field and translation linking to ContentItem.
+
+### FEAT-009: Add content A/B testing
+- **Status:** Backlog
+- **Description:** No ability to test content variations.
+- **Fix:** Add ContentVariant model for headline/content testing.
+
+### PERF-004: Add content caching layer
+- **Status:** Backlog
+- **Description:** CDN purge exists but no server-side caching strategy documented.
+- **Fix:** Document caching strategy, add Redis caching for hot content.
+
+### CODE-007: Extract prompts to database-driven system
+- **Status:** Backlog
+- **Description:** AI prompts are hardcoded in `AIGatewayService`. Prompts table exists but unused for this.
+- **File:** `Services/AIGatewayService.php:226-525`
+- **Fix:** Load prompts from database, allow admin editing.
+
+---
+
+## Completed
+
+### SEC-002: HTML sanitisation fallback vulnerability (2026-01-29)
+- Created `Services/HtmlSanitiser.php` using HTMLPurifier
+- Added `ezyang/htmlpurifier` as required dependency in composer.json
+- Updated `ContentItem::getSanitisedContent()` to use the new service
+- Added boot-time validation to throw exception if HTMLPurifier is missing
+- Removed insecure `strip_tags()` fallback that allowed XSS via event handlers
+- Added 30+ unit tests covering XSS attack vectors and safe HTML preservation
+
+---
+
+## Notes
+
+### Dependencies
+- Requires `core-php` for events and base infrastructure
+- Requires `core-tenant` for workspace and user models
+- Requires `ezyang/htmlpurifier` for XSS sanitisation (security-critical)
+- Optional: `core-agentic` for AI services (GeminiService, ClaudeService)
+- Optional: `core-mcp` for MCP tool registration
+
+### Testing
+Run tests with: `composer test` from package root.
+Run single test: `./vendor/bin/pest --filter=ContentSearchServiceTest`
+
+### Last Audit
+- **Date:** 2026-01-29
+- **By:** Claude Code (core-content audit)
+- **Files Reviewed:** ~70 PHP files
--- a/composer.json
+++ b/composer.json
@ -5,7 +5,8 @@
    "license": "EUPL-1.2",
    "require": {
        "php": "^8.2",
-        "host-uk/core": "dev-main"
+        "host-uk/core": "dev-main",
+        "ezyang/htmlpurifier": "^4.17"
    },
    "require-dev": {
        "laravel/pint": "^1.18",
--- a/docs/architecture.md
+++ b/docs/architecture.md
@ -0,0 +1,422 @@
+---
+title: Architecture
+description: Technical architecture of the core-content package
+updated: 2026-01-29
+---
+
+# Architecture
+
+The `core-content` package provides headless CMS functionality for the Host UK platform. It handles content management, AI-powered generation, revision history, webhooks for external CMS integration, and search capabilities.
+
+## Package Overview
+
+**Namespace:** `Core\Mod\Content\`
+**Entry Point:** `Boot.php` (Laravel Service Provider)
+**Dependencies:**
+- `core-php` (Foundation framework, events)
+- `core-tenant` (Workspaces, users, entitlements)
+- Optional: `core-agentic` (AI services for content generation)
+- Optional: `core-mcp` (MCP tool handlers)
+
+## Directory Structure
+
+```
+core-content/
+├── Boot.php              # Service provider with event listeners
+├── config.php            # Package configuration
+├── Models/               # Eloquent models (10 models)
+├── Services/             # Business logic services
+├── Controllers/          # API and web controllers
+│   └── Api/              # REST API controllers
+├── Jobs/                 # Queue jobs
+├── Mcp/                  # MCP tool handlers
+│   └── Handlers/         # Individual MCP tools
+├── Concerns/             # Traits
+├── Console/              # Artisan commands
+│   └── Commands/         # Command implementations
+├── Enums/                # PHP enums
+├── Migrations/           # Database migrations
+├── Observers/            # Model observers
+├── routes/               # Route definitions
+├── View/                 # Livewire components and Blade views
+│   ├── Modal/            # Livewire components
+│   └── Blade/            # Blade templates
+├── tests/                # Test suite
+└── docs/                 # Documentation
+```
+
+## Core Concepts
+
+### Content Items
+
+The primary content model. Supports multiple content types and sources:
+
+```php
+// Content types (where content originates)
+enum ContentType: string {
+    case NATIVE = 'native';       // Created in Host Hub editor
+    case HOSTUK = 'hostuk';       // Alias for native (backwards compat)
+    case SATELLITE = 'satellite'; // Per-service content
+    case WORDPRESS = 'wordpress'; // Legacy synced content
+}
+```
+
+Content items belong to workspaces and have:
+- Title, slug, excerpt, content (HTML/Markdown/JSON)
+- Status (draft, publish, future, private, pending)
+- Author and last editor tracking
+- Revision history
+- Taxonomy (categories, tags)
+- SEO metadata
+- Preview tokens for sharing unpublished content
+- CDN cache invalidation tracking
+
+### Content Briefs
+
+Briefs drive AI-powered content generation. They define what content to create:
+
+```php
+// Brief content types (what to generate)
+enum BriefContentType: string {
+    case HELP_ARTICLE = 'help_article';   // Documentation
+    case BLOG_POST = 'blog_post';         // Blog articles
+    case LANDING_PAGE = 'landing_page';   // Marketing pages
+    case SOCIAL_POST = 'social_post';     // Social media
+}
+```
+
+Brief workflow: `pending` -> `queued` -> `generating` -> `review` -> `published`
+
+### Revisions
+
+Every content change creates an immutable revision snapshot. Revisions support:
+- Change type tracking (edit, autosave, restore, publish)
+- Word/character count tracking
+- Side-by-side diff comparison with LCS algorithm
+- Configurable retention policies (max count, max age)
+
+## Service Layer
+
+### AIGatewayService
+
+Orchestrates two-stage AI content generation:
+
+1. **Stage 1: Draft (Gemini)** - Fast, cost-effective initial generation
+2. **Stage 2: Refine (Claude)** - Quality refinement and brand voice alignment
+
+```php
+$gateway = app(AIGatewayService::class);
+
+// Two-stage pipeline
+$result = $gateway->generateAndRefine($brief);
+
+// Or individual stages
+$draft = $gateway->generateDraft($brief);
+$refined = $gateway->refineDraft($brief, $draftContent);
+
+// Direct Claude generation (skip Gemini)
+$content = $gateway->generateDirect($brief);
+```
+
+### ContentSearchService
+
+Full-text search with multiple backend support:
+
+```php
+// Backends (configured via CONTENT_SEARCH_BACKEND)
+const BACKEND_DATABASE = 'database';        // LIKE queries with relevance
+const BACKEND_SCOUT_DATABASE = 'scout_database';  // Laravel Scout
+const BACKEND_MEILISEARCH = 'meilisearch';  // Laravel Scout + Meilisearch
+```
+
+Features:
+- Relevance scoring (title > slug > excerpt > content)
+- Filters: type, status, category, tag, date range, content_type
+- Autocomplete suggestions
+- Re-indexing support for Scout backends
+
+### WebhookRetryService
+
+Handles failed webhook processing with exponential backoff:
+
+```
+Retry intervals: 1m, 5m, 15m, 1h, 4h
+Max retries: 5 (configurable per webhook)
+```
+
+### ContentRender
+
+Public-facing content renderer with caching:
+- Homepage, blog listing, post, page rendering
+- Cache TTL: 1 hour production, 1 minute development
+- Cache key sanitisation for special characters
+
+### CdnPurgeService
+
+CDN cache invalidation via Bunny CDN:
+- Triggered by ContentItemObserver on publish/update
+- URL-based and tag-based purging
+- Workspace-level cache clearing
+
+## Event-Driven Architecture
+
+The package uses the event-driven module loading pattern from `core-php`:
+
+```php
+class Boot extends ServiceProvider
+{
+    public static array $listens = [
+        WebRoutesRegistering::class => 'onWebRoutes',
+        ApiRoutesRegistering::class => 'onApiRoutes',
+        ConsoleBooting::class => 'onConsole',
+        McpToolsRegistering::class => 'onMcpTools',
+    ];
+}
+```
+
+Handlers register:
+- **Web Routes:** Public blog, help pages, content preview
+- **API Routes:** REST API for briefs, media, search, generation
+- **Console:** Artisan commands for scheduling, pruning
+- **MCP Tools:** AI agent content management tools
+
+## API Structure
+
+### Authenticated Endpoints (Session or API Key)
+
+```
+# Content Briefs
+GET    /api/content/briefs           # List briefs
+POST   /api/content/briefs           # Create brief
+GET    /api/content/briefs/{id}      # Get brief
+PUT    /api/content/briefs/{id}      # Update brief
+DELETE /api/content/briefs/{id}      # Delete brief
+POST   /api/content/briefs/bulk      # Bulk create
+GET    /api/content/briefs/next      # Next ready for processing
+
+# AI Generation (rate limited: 10/min)
+POST   /api/content/generate/draft   # Generate draft (Gemini)
+POST   /api/content/generate/refine  # Refine draft (Claude)
+POST   /api/content/generate/full    # Full pipeline
+POST   /api/content/generate/social  # Social posts from content
+
+# Content Search (rate limited: 60/min)
+GET    /api/content/search           # Full-text search
+GET    /api/content/search/suggest   # Autocomplete
+GET    /api/content/search/info      # Backend info
+POST   /api/content/search/reindex   # Trigger re-index
+
+# Revisions
+GET    /api/content/items/{id}/revisions    # List revisions
+GET    /api/content/revisions/{id}          # Get revision
+POST   /api/content/revisions/{id}/restore  # Restore revision
+GET    /api/content/revisions/{id}/compare/{other}  # Compare
+
+# Preview
+POST   /api/content/items/{id}/preview/generate  # Generate preview link
+DELETE /api/content/items/{id}/preview/revoke    # Revoke preview link
+```
+
+### Public Endpoints
+
+```
+# Webhooks (signature verified, no auth)
+POST   /api/content/webhooks/{endpoint}  # Receive external webhooks
+
+# Web Routes
+GET    /blog                             # Blog listing
+GET    /blog/{slug}                      # Blog post
+GET    /help                             # Help centre
+GET    /help/{slug}                      # Help article
+GET    /content/preview/{id}             # Preview content
+```
+
+## Rate Limiting
+
+Defined in `Boot::configureRateLimiting()`:
+
+| Limiter | Authenticated | Unauthenticated |
+|---------|---------------|-----------------|
+| `content-generate` | 10/min per user/workspace | 2/min per IP |
+| `content-briefs` | 30/min per user | 5/min per IP |
+| `content-webhooks` | 60/min per endpoint | 30/min per IP |
+| `content-search` | Configurable (default 60/min) | 20/min per IP |
+
+## MCP Tools
+
+Seven MCP tools for AI agent integration:
+
+| Tool | Description |
+|------|-------------|
+| `content_list` | List content items with filters |
+| `content_read` | Read content by ID or slug |
+| `content_search` | Full-text search |
+| `content_create` | Create new content |
+| `content_update` | Update existing content |
+| `content_delete` | Soft delete content |
+| `content_taxonomies` | List categories and tags |
+
+All tools:
+- Require workspace resolution
+- Check entitlements (`content.mcp_access`, `content.items`)
+- Log actions to MCP session
+- Return structured responses
+
+## Data Flow
+
+### Content Creation via MCP
+
+```
+Agent Request
+    ↓
+ContentCreateHandler::handle()
+    ↓
+resolveWorkspace() → Workspace model
+    ↓
+checkEntitlement() → EntitlementService
+    ↓
+ContentItem::create()
+    ↓
+createRevision() → ContentRevision
+    ↓
+recordUsage() → EntitlementService
+    ↓
+Response with content ID
+```
+
+### Webhook Processing
+
+```
+External CMS
+    ↓
+POST /api/content/webhooks/{endpoint}
+    ↓
+ContentWebhookController::receive()
+    ↓
+Verify signature → ContentWebhookEndpoint::verifySignature()
+    ↓
+Check type allowed → ContentWebhookEndpoint::isTypeAllowed()
+    ↓
+Create ContentWebhookLog
+    ↓
+Dispatch ProcessContentWebhook job
+    ↓
+Job::handle()
+    ↓
+Process based on event type (wordpress.*, cms.*, generic.*)
+    ↓
+Create/Update/Delete ContentItem
+    ↓
+Mark log completed
+```
+
+### AI Generation Pipeline
+
+```
+ContentBrief
+    ↓
+GenerateContentJob dispatched
+    ↓
+Stage 1: AIGatewayService::generateDraft()
+    ↓
+GeminiService::generate() → Draft content
+    ↓
+Brief::markDraftComplete()
+    ↓
+Stage 2: AIGatewayService::refineDraft()
+    ↓
+ClaudeService::generate() → Refined content
+    ↓
+Brief::markRefined()
+    ↓
+AIUsage records created for each stage
+```
+
+## Configuration
+
+Key settings in `config.php`:
+
+```php
+return [
+    'generation' => [
+        'default_timeout' => env('CONTENT_GENERATION_TIMEOUT', 300),
+        'timeouts' => [
+            'help_article' => 180,
+            'blog_post' => 240,
+            'landing_page' => 300,
+            'social_post' => 60,
+        ],
+        'max_retries' => 3,
+        'backoff' => [30, 60, 120],
+    ],
+    'revisions' => [
+        'max_per_item' => env('CONTENT_MAX_REVISIONS', 50),
+        'max_age_days' => 180,
+        'preserve_published' => true,
+    ],
+    'cache' => [
+        'ttl' => env('CONTENT_CACHE_TTL', 3600),
+        'prefix' => 'content:render',
+    ],
+    'search' => [
+        'backend' => env('CONTENT_SEARCH_BACKEND', 'database'),
+        'min_query_length' => 2,
+        'max_per_page' => 50,
+        'default_per_page' => 20,
+        'rate_limit' => 60,
+    ],
+];
+```
+
+## Database Schema
+
+### Primary Tables
+
+| Table | Purpose |
+|-------|---------|
+| `content_items` | Content storage (posts, pages) |
+| `content_revisions` | Version history |
+| `content_taxonomies` | Categories and tags |
+| `content_item_taxonomy` | Pivot table |
+| `content_media` | Media attachments |
+| `content_authors` | Author profiles |
+| `content_briefs` | AI generation briefs |
+| `content_tasks` | Scheduled content tasks |
+| `content_webhook_endpoints` | Webhook configurations |
+| `content_webhook_logs` | Webhook processing logs |
+| `ai_usage` | AI API usage tracking |
+| `prompts` | AI prompt templates |
+| `prompt_versions` | Prompt version history |
+
+### Key Indexes
+
+- `content_items`: Composite indexes on `(workspace_id, slug, type)`, `(workspace_id, status, type)`, `(workspace_id, status, content_type)`
+- `content_revisions`: Index on `(content_item_id, revision_number)`
+- `content_webhook_logs`: Index on `(workspace_id, status)`, `(status, created_at)`
+
+## Extension Points
+
+### Adding New Content Types
+
+1. Add value to `ContentType` enum
+2. Update `ContentType::isNative()` if applicable
+3. Add any type-specific scopes to `ContentItem`
+
+### Adding New AI Generation Types
+
+1. Add value to `BriefContentType` enum
+2. Add timeout to `config.php` generation.timeouts
+3. Add prompt in `AIGatewayService::getDraftSystemPrompt()`
+
+### Adding New Webhook Event Types
+
+1. Add to `ContentWebhookEndpoint::ALLOWED_TYPES`
+2. Add handler in `ProcessContentWebhook::processWordPress()` or `processCms()`
+3. Add event type mapping in `ContentWebhookController::normaliseEventType()`
+
+### Adding New MCP Tools
+
+1. Create handler in `Mcp/Handlers/` implementing `McpToolHandler`
+2. Define `schema()` with tool name, description, input schema
+3. Implement `handle()` with workspace resolution and entitlement checks
+4. Register in `Boot::onMcpTools()`
--- a/docs/security.md
+++ b/docs/security.md
@ -0,0 +1,389 @@
+---
+title: Security
+description: Security considerations and audit notes for core-content
+updated: 2026-01-29
+---
+
+# Security
+
+This document covers security considerations, known risks, and recommended mitigations for the `core-content` package.
+
+## Authentication and Authorisation
+
+### API Authentication
+
+The content API supports two authentication methods:
+
+1. **Session Authentication** (`auth` middleware)
+   - For browser-based access
+   - CSRF protection via Laravel's standard middleware
+
+2. **API Key Authentication** (`api.auth` middleware)
+   - For programmatic access
+   - Keys prefixed with `hk_`
+   - Scope enforcement via `api.scope.enforce` middleware
+
+### Webhook Authentication
+
+Webhooks use HMAC signature verification instead of session/API key auth:
+
+```php
+// Signature verification in ContentWebhookEndpoint
+public function verifySignature(string $payload, ?string $signature): bool
+{
+    $expectedSignature = hash_hmac('sha256', $payload, $this->secret);
+    return hash_equals($expectedSignature, $signature);
+}
+```
+
+**Supported signature headers:**
+- `X-Signature`
+- `X-Hub-Signature-256` (GitHub format)
+- `X-WP-Webhook-Signature` (WordPress format)
+- `X-Content-Signature`
+- `Signature`
+
+### MCP Tool Authentication
+
+MCP tools authenticate via the MCP session context. Workspace access is verified through:
+- Workspace resolution (by slug or ID)
+- Entitlement checks (`content.mcp_access`, `content.items`)
+
+## Known Security Considerations
+
+### HIGH: HTML Sanitisation Fallback
+
+**Location:** `Models/ContentItem.php:333-351`
+
+**Issue:** The `getSanitisedContent()` method falls back to `strip_tags()` if HTMLPurifier is unavailable. This is insufficient for XSS protection.
+
+```php
+// Current fallback (insufficient)
+$allowedTags = '<p><br><strong>...<a>...';
+return strip_tags($content, $allowedTags);
+```
+
+**Risk:** XSS attacks via crafted HTML in content body.
+
+**Mitigation:**
+1. Ensure HTMLPurifier is installed in production
+2. Add package check in boot to fail loudly if missing
+3. Consider using `voku/anti-xss` as a lighter alternative
+
+### HIGH: Webhook Signature Optional
+
+**Location:** `Models/ContentWebhookEndpoint.php:205-210`
+
+**Issue:** When no secret is configured, signature verification is skipped:
+
+```php
+if (empty($this->secret)) {
+    return true;  // Accepts all requests
+}
+```
+
+**Risk:** Unauthenticated webhook injection if endpoint has no secret.
+
+**Mitigation:**
+1. Require secrets for all production endpoints
+2. Add explicit `allow_unsigned` flag if intentional
+3. Log warning when unsigned webhooks are accepted
+4. Rate limit unsigned endpoints more aggressively
+
+### MEDIUM: Workspace Access in MCP Handlers
+
+**Location:** `Mcp/Handlers/*.php`
+
+**Issue:** Workspace resolution allows lookup by ID:
+
+```php
+return Workspace::where('slug', $slug)
+    ->orWhere('id', $slug)
+    ->first();
+```
+
+**Risk:** If an attacker knows a workspace ID, they could potentially access content without being a workspace member.
+
+**Mitigation:**
+1. Always verify workspace membership after resolution
+2. Use entitlement checks (already present but verify coverage)
+3. Consider removing ID-based lookup for MCP
+
+### MEDIUM: Preview Token Enumeration
+
+**Location:** `Controllers/ContentPreviewController.php`
+
+**Issue:** No rate limiting on preview token generation endpoint. An attacker could probe for valid content IDs.
+
+**Mitigation:**
+1. Add rate limiting (30/min per user)
+2. Use constant-time responses regardless of content existence
+3. Consider using UUIDs instead of sequential IDs for preview URLs
+
+### LOW: Webhook Payload Content Types
+
+**Location:** `Jobs/ProcessContentWebhook.php:288-289`
+
+**Issue:** Content type from external webhook is assigned directly:
+
+```php
+$contentItem->content_type = ContentType::NATIVE;
+```
+
+**Risk:** External systems could potentially inject invalid content types.
+
+**Mitigation:**
+1. Validate against `ContentType` enum
+2. Default to a safe type if validation fails
+3. Log invalid types for monitoring
+
+## Input Validation
+
+### API Request Validation
+
+All API controllers use Laravel's validation:
+
+```php
+$validated = $request->validate([
+    'q' => 'required|string|min:2|max:500',
+    'type' => 'nullable|string|in:post,page',
+    'status' => 'nullable',
+    // ...
+]);
+```
+
+**Validated inputs:**
+- Search queries (min/max length, string type)
+- Content types (enum validation)
+- Pagination (min/max values)
+- Date ranges (date format, logical order)
+
+### MCP Input Validation
+
+MCP handlers validate via JSON schema:
+
+```php
+'inputSchema' => [
+    'type' => 'object',
+    'properties' => [
+        'workspace' => ['type' => 'string'],
+        'title' => ['type' => 'string'],
+        'type' => ['type' => 'string', 'enum' => ['post', 'page']],
+    ],
+    'required' => ['workspace', 'title'],
+]
+```
+
+### Webhook Payload Validation
+
+Webhook payloads undergo:
+- JSON decode validation
+- Event type normalisation
+- Content ID extraction with fallbacks
+
+**Note:** Payload content is stored in JSON column without full validation. Processing logic handles missing/invalid fields gracefully.
+
+## Rate Limiting
+
+### Configured Limiters
+
+| Endpoint | Auth | Unauthenticated | Key |
+|----------|------|-----------------|-----|
+| AI Generation | 10/min | 2/min | `content-generate` |
+| Brief Creation | 30/min | 5/min | `content-briefs` |
+| Webhooks | 60/min | 30/min | `content-webhooks` |
+| Search | 60/min | 20/min | `content-search` |
+
+### Rate Limit Bypass Risks
+
+1. **IP Spoofing:** Ensure `X-Forwarded-For` handling is configured correctly
+2. **Workspace Switching:** Workspace-based limits should use user ID as fallback
+3. **API Key Sharing:** Each key should have independent limits
+
+## Data Protection
+
+### Sensitive Data Handling
+
+**Encrypted at rest:**
+- `ContentWebhookEndpoint.secret` (cast to `encrypted`)
+- `ContentWebhookEndpoint.previous_secret` (cast to `encrypted`)
+
+**Hidden from serialisation:**
+- Webhook secrets (via `$hidden` property)
+
+### PII Considerations
+
+Content may contain PII in:
+- Article body content
+- Author information
+- Webhook payloads
+
+**Recommendations:**
+1. Implement content retention policies
+2. Add GDPR data export/deletion support
+3. Log access to PII-containing content
+
+## Webhook Security
+
+### Circuit Breaker
+
+Endpoints automatically disable after 10 consecutive failures:
+
+```php
+const MAX_FAILURES = 10;
+
+public function incrementFailureCount(): void
+{
+    $this->increment('failure_count');
+    if ($this->failure_count >= self::MAX_FAILURES) {
+        $this->update(['is_enabled' => false]);
+    }
+}
+```
+
+### Secret Rotation
+
+Grace period support for secret rotation:
+
+```php
+public function isInGracePeriod(): bool
+{
+    // Accepts both current and previous secret during grace
+}
+```
+
+Default grace period: 24 hours
+
+### Allowed Event Types
+
+Endpoints can restrict which event types they accept:
+
+```php
+const ALLOWED_TYPES = [
+    'wordpress.post_created',
+    'wordpress.post_updated',
+    // ...
+    'generic.payload',
+];
+```
+
+Wildcard support: `wordpress.*` matches all WordPress events.
+
+## Content Security
+
+### XSS Prevention
+
+1. **Input:** Content stored as-is to preserve formatting
+2. **Output:** `getSanitisedContent()` for public rendering
+3. **Admin:** Trusted content displayed with proper escaping
+
+**Blade template guidelines:**
+- Use `{{ $title }}` for plain text (auto-escaped)
+- Use `{!! $content !!}` only for sanitised HTML
+- Comments document which fields need which treatment
+
+### SQL Injection
+
+All database queries use:
+- Eloquent ORM (parameterised queries)
+- Query builder with bindings
+- No raw SQL with user input
+
+### CSRF Protection
+
+Web routes include CSRF middleware automatically. API routes exempt (use API key auth).
+
+## Audit Logging
+
+### Logged Events
+
+- Webhook receipt and processing
+- AI generation requests and results
+- Content creation/update/deletion via MCP
+- CDN cache purges
+- Authentication failures
+
+### Log Levels
+
+| Event | Level |
+|-------|-------|
+| Webhook signature failure | WARNING |
+| Circuit breaker triggered | WARNING |
+| Processing failure | ERROR |
+| Successful operations | INFO |
+| Skipped operations | DEBUG |
+
+## Recommendations
+
+### Immediate (P1)
+
+1. [ ] Require HTMLPurifier or equivalent in production
+2. [ ] Make webhook signature verification mandatory
+3. [ ] Add rate limiting to preview generation
+4. [ ] Validate content_type from webhook payloads
+
+### Short-term (P2)
+
+1. [ ] Add comprehensive audit logging
+2. [ ] Implement content access logging
+3. [ ] Add IP allowlisting option for webhooks
+4. [ ] Create security-focused test suite
+
+### Long-term (P3+)
+
+1. [ ] Implement content encryption at rest option
+2. [ ] Add GDPR compliance features
+3. [ ] Create security monitoring dashboard
+4. [ ] Add anomaly detection for webhook patterns
+
+## Security Testing
+
+### Manual Testing Checklist
+
+```
+[ ] Verify webhook signature rejection with invalid signature
+[ ] Test rate limiting enforcement
+[ ] Confirm XSS payloads are sanitised
+[ ] Verify workspace isolation in API responses
+[ ] Test preview token expiration
+[ ] Verify CSRF protection on web routes
+[ ] Test SQL injection attempts in search
+[ ] Verify file type validation on media uploads
+```
+
+### Automated Testing
+
+```bash
+# Run security-focused tests
+./vendor/bin/pest --filter=Security
+
+# Check for common vulnerabilities
+./vendor/bin/pint --test  # Code style (includes some security patterns)
+```
+
+## Incident Response
+
+### Webhook Compromise
+
+1. Disable affected endpoint
+2. Rotate all secrets
+3. Review webhook logs for suspicious patterns
+4. Regenerate secrets for all endpoints
+
+### Content Injection
+
+1. Identify affected content items
+2. Restore from revision history
+3. Review webhook source
+4. Add additional validation
+
+### API Key Leak
+
+1. Revoke compromised key
+2. Review access logs
+3. Generate new key with reduced scope
+4. Monitor for unauthorised access
+
+## Contact
+
+Security issues should be reported to the security team. Do not create public issues for security vulnerabilities.
--- a/tests/Unit/HtmlSanitiserTest.php
+++ b/tests/Unit/HtmlSanitiserTest.php
@ -0,0 +1,322 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Core\Mod\Content\Tests\Unit;
+
+use Core\Mod\Content\Services\HtmlSanitiser;
+use Tests\TestCase;
+
+/**
+ * Security tests for HTML sanitisation.
+ *
+ * These tests verify that XSS attack vectors are properly neutralised
+ * while preserving safe HTML formatting.
+ */
+class HtmlSanitiserTest extends TestCase
+{
+    protected HtmlSanitiser $sanitiser;
+
+    protected function setUp(): void
+    {
+        parent::setUp();
+        $this->sanitiser = new HtmlSanitiser;
+    }
+
+    // -------------------------------------------------------------------------
+    // XSS Attack Prevention Tests
+    // -------------------------------------------------------------------------
+
+    public function test_removes_script_tags(): void
+    {
+        $malicious = '<p>Hello</p><script>alert("XSS")</script><p>World</p>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('<script>', $result);
+        $this->assertStringNotContainsString('alert', $result);
+        $this->assertStringContainsString('<p>Hello</p>', $result);
+        $this->assertStringContainsString('<p>World</p>', $result);
+    }
+
+    public function test_removes_onclick_attributes(): void
+    {
+        $malicious = '<a href="#" onclick="alert(\'XSS\')">Click me</a>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('onclick', $result);
+        $this->assertStringContainsString('Click me', $result);
+    }
+
+    public function test_removes_onerror_attributes(): void
+    {
+        $malicious = '<img src="x" onerror="alert(\'XSS\')">';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('onerror', $result);
+        $this->assertStringNotContainsString('alert', $result);
+    }
+
+    public function test_removes_onload_attributes(): void
+    {
+        $malicious = '<body onload="alert(\'XSS\')">';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('onload', $result);
+        $this->assertStringNotContainsString('alert', $result);
+    }
+
+    public function test_removes_javascript_protocol_in_href(): void
+    {
+        $malicious = '<a href="javascript:alert(\'XSS\')">Click me</a>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('javascript:', $result);
+        $this->assertStringContainsString('Click me', $result);
+    }
+
+    public function test_removes_javascript_protocol_in_src(): void
+    {
+        $malicious = '<img src="javascript:alert(\'XSS\')">';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('javascript:', $result);
+    }
+
+    public function test_removes_data_uri_xss(): void
+    {
+        $malicious = '<a href="data:text/html,<script>alert(\'XSS\')</script>">Click</a>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('data:text/html', $result);
+        $this->assertStringNotContainsString('<script>', $result);
+    }
+
+    public function test_removes_style_expression_xss(): void
+    {
+        $malicious = '<div style="background:url(javascript:alert(\'XSS\'))">Test</div>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('javascript:', $result);
+        $this->assertStringContainsString('Test', $result);
+    }
+
+    public function test_removes_svg_xss(): void
+    {
+        $malicious = '<svg onload="alert(\'XSS\')"><circle r="50"/></svg>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('<svg', $result);
+        $this->assertStringNotContainsString('onload', $result);
+    }
+
+    public function test_removes_iframe_by_default(): void
+    {
+        $malicious = '<iframe src="https://evil.com"></iframe>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('<iframe', $result);
+    }
+
+    public function test_removes_form_action_xss(): void
+    {
+        $malicious = '<form action="javascript:alert(\'XSS\')"><input type="submit"></form>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('javascript:', $result);
+        $this->assertStringNotContainsString('<form', $result);
+    }
+
+    public function test_removes_meta_refresh_xss(): void
+    {
+        $malicious = '<meta http-equiv="refresh" content="0;url=javascript:alert(\'XSS\')">';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('<meta', $result);
+        $this->assertStringNotContainsString('javascript:', $result);
+    }
+
+    public function test_removes_object_tag(): void
+    {
+        $malicious = '<object data="data:text/html,<script>alert(\'XSS\')</script>"></object>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('<object', $result);
+        $this->assertStringNotContainsString('<script>', $result);
+    }
+
+    public function test_removes_embed_tag(): void
+    {
+        $malicious = '<embed src="javascript:alert(\'XSS\')">';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('<embed', $result);
+        $this->assertStringNotContainsString('javascript:', $result);
+    }
+
+    public function test_removes_base_tag(): void
+    {
+        $malicious = '<base href="javascript:alert(\'XSS\')//"/>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('<base', $result);
+    }
+
+    // -------------------------------------------------------------------------
+    // Safe HTML Preservation Tests
+    // -------------------------------------------------------------------------
+
+    public function test_preserves_paragraphs(): void
+    {
+        $html = '<p>Hello World</p>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('<p>Hello World</p>', $result);
+    }
+
+    public function test_preserves_headings(): void
+    {
+        $html = '<h1>Title</h1><h2>Subtitle</h2><h3>Section</h3>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('<h1>Title</h1>', $result);
+        $this->assertStringContainsString('<h2>Subtitle</h2>', $result);
+        $this->assertStringContainsString('<h3>Section</h3>', $result);
+    }
+
+    public function test_preserves_formatting(): void
+    {
+        $html = '<p><strong>Bold</strong> and <em>italic</em> and <u>underline</u></p>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('<strong>Bold</strong>', $result);
+        $this->assertStringContainsString('<em>italic</em>', $result);
+        $this->assertStringContainsString('<u>underline</u>', $result);
+    }
+
+    public function test_preserves_lists(): void
+    {
+        $html = '<ul><li>Item 1</li><li>Item 2</li></ul>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('<ul>', $result);
+        $this->assertStringContainsString('<li>Item 1</li>', $result);
+        $this->assertStringContainsString('<li>Item 2</li>', $result);
+    }
+
+    public function test_preserves_safe_links(): void
+    {
+        $html = '<a href="https://example.com">Link</a>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('href="https://example.com"', $result);
+        $this->assertStringContainsString('Link</a>', $result);
+    }
+
+    public function test_preserves_mailto_links(): void
+    {
+        $html = '<a href="mailto:test@example.com">Email</a>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('mailto:test@example.com', $result);
+    }
+
+    public function test_preserves_tel_links(): void
+    {
+        $html = '<a href="tel:+1234567890">Call</a>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('tel:+1234567890', $result);
+    }
+
+    public function test_preserves_safe_images(): void
+    {
+        $html = '<img src="https://example.com/image.jpg" alt="Test image">';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('src="https://example.com/image.jpg"', $result);
+        $this->assertStringContainsString('alt="Test image"', $result);
+    }
+
+    public function test_preserves_tables(): void
+    {
+        $html = '<table><tr><th>Header</th></tr><tr><td>Data</td></tr></table>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('<table>', $result);
+        $this->assertStringContainsString('<th>Header</th>', $result);
+        $this->assertStringContainsString('<td>Data</td>', $result);
+    }
+
+    public function test_preserves_code_blocks(): void
+    {
+        $html = '<pre><code>function test() {}</code></pre>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('<pre>', $result);
+        $this->assertStringContainsString('<code>', $result);
+        $this->assertStringContainsString('function test() {}', $result);
+    }
+
+    public function test_preserves_blockquotes(): void
+    {
+        $html = '<blockquote>A famous quote</blockquote>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('<blockquote>A famous quote</blockquote>', $result);
+    }
+
+    public function test_preserves_id_and_class_attributes(): void
+    {
+        $html = '<div id="main" class="container"><p class="intro">Content</p></div>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('id="main"', $result);
+        $this->assertStringContainsString('class="container"', $result);
+        $this->assertStringContainsString('class="intro"', $result);
+    }
+
+    // -------------------------------------------------------------------------
+    // Edge Cases
+    // -------------------------------------------------------------------------
+
+    public function test_handles_empty_string(): void
+    {
+        $result = $this->sanitiser->sanitise('');
+
+        $this->assertSame('', $result);
+    }
+
+    public function test_handles_plain_text(): void
+    {
+        $text = 'Just plain text without any HTML';
+        $result = $this->sanitiser->sanitise($text);
+
+        $this->assertSame($text, $result);
+    }
+
+    public function test_handles_unicode_content(): void
+    {
+        $html = '<p>Caf?? au lait and ????????</p>';
+        $result = $this->sanitiser->sanitise($html);
+
+        $this->assertStringContainsString('Caf??', $result);
+        $this->assertStringContainsString('????????', $result);
+    }
+
+    public function test_handles_nested_xss_attempts(): void
+    {
+        $malicious = '<div><p onclick="alert(1)"><a href="javascript:void(0)" onmouseover="alert(2)">Text</a></p></div>';
+        $result = $this->sanitiser->sanitise($malicious);
+
+        $this->assertStringNotContainsString('onclick', $result);
+        $this->assertStringNotContainsString('onmouseover', $result);
+        $this->assertStringNotContainsString('javascript:', $result);
+        $this->assertStringContainsString('Text', $result);
+    }
+
+    public function test_is_available_returns_true(): void
+    {
+        // HTMLPurifier should be installed as a required dependency
+        $this->assertTrue(HtmlSanitiser::isAvailable());
+    }
+}