2026-01-21 14:11:45 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
|
|
namespace Core\Tests\Feature;
|
|
|
|
|
|
|
|
|
|
use Core\Input\Sanitiser;
|
|
|
|
|
use Core\Tests\TestCase;
|
2026-01-21 20:20:14 +00:00
|
|
|
use Normalizer;
|
|
|
|
|
use Psr\Log\LoggerInterface;
|
2026-01-21 14:11:45 +00:00
|
|
|
|
|
|
|
|
class SanitiserTest extends TestCase
|
|
|
|
|
{
|
|
|
|
|
protected Sanitiser $sanitiser;
|
|
|
|
|
|
|
|
|
|
protected function setUp(): void
|
|
|
|
|
{
|
|
|
|
|
parent::setUp();
|
|
|
|
|
$this->sanitiser = new Sanitiser;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_returns_empty_array_for_empty_input(): void
|
|
|
|
|
{
|
|
|
|
|
$result = $this->sanitiser->filter([]);
|
|
|
|
|
|
|
|
|
|
$this->assertIsArray($result);
|
|
|
|
|
$this->assertEmpty($result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_preserves_normal_strings(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'name' => 'John Doe',
|
|
|
|
|
'email' => 'john@example.com',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('John Doe', $result['name']);
|
|
|
|
|
$this->assertEquals('john@example.com', $result['email']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_strips_null_bytes(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'data' => "Hello\x00World",
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('HelloWorld', $result['data']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_strips_control_characters(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'text' => "Line1\x01\x02\x03Line2",
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('Line1Line2', $result['text']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_preserves_newlines_and_tabs(): void
|
|
|
|
|
{
|
|
|
|
|
// Note: FILTER_FLAG_STRIP_LOW strips ASCII 0-31
|
|
|
|
|
// which includes \n (10) and \t (9)
|
|
|
|
|
// This test documents actual behavior
|
|
|
|
|
$input = [
|
|
|
|
|
'text' => "Line1\nLine2\tTabbed",
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
// Control characters are stripped
|
|
|
|
|
$this->assertIsString($result['text']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_preserves_unicode(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'name' => '日本語テスト',
|
2026-01-21 20:20:14 +00:00
|
|
|
'emoji' => 'Hello',
|
|
|
|
|
'accents' => 'Cafe resume naive',
|
2026-01-21 14:11:45 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('日本語テスト', $result['name']);
|
2026-01-21 20:20:14 +00:00
|
|
|
$this->assertEquals('Hello', $result['emoji']);
|
|
|
|
|
$this->assertEquals('Cafe resume naive', $result['accents']);
|
2026-01-21 14:11:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_handles_nested_arrays(): void
|
|
|
|
|
{
|
|
|
|
|
// Note: filter_var_array processes top-level keys only
|
|
|
|
|
// Nested arrays are returned as-is (or filtered based on definition)
|
|
|
|
|
$input = [
|
|
|
|
|
'simple' => 'value',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('value', $result['simple']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_handles_numeric_values_as_strings(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'number' => '12345',
|
|
|
|
|
'float' => '123.45',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('12345', $result['number']);
|
|
|
|
|
$this->assertEquals('123.45', $result['float']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_handles_special_html_characters(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'html' => '<script>alert("xss")</script>',
|
|
|
|
|
'entities' => '<div>',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
// FILTER_UNSAFE_RAW doesn't strip HTML, just control characters
|
|
|
|
|
$this->assertEquals('<script>alert("xss")</script>', $result['html']);
|
|
|
|
|
$this->assertEquals('<div>', $result['entities']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_processes_multiple_keys(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'key1' => 'value1',
|
|
|
|
|
'key2' => 'value2',
|
|
|
|
|
'key3' => 'value3',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertCount(3, $result);
|
|
|
|
|
$this->assertEquals('value1', $result['key1']);
|
|
|
|
|
$this->assertEquals('value2', $result['key2']);
|
|
|
|
|
$this->assertEquals('value3', $result['key3']);
|
|
|
|
|
}
|
2026-01-21 20:20:14 +00:00
|
|
|
|
|
|
|
|
// ========================================
|
|
|
|
|
// New tests for configurable filter rules
|
|
|
|
|
// ========================================
|
|
|
|
|
|
|
|
|
|
public function test_with_schema_returns_new_instance(): void
|
|
|
|
|
{
|
|
|
|
|
$original = new Sanitiser;
|
|
|
|
|
$withSchema = $original->withSchema(['email' => ['filters' => [FILTER_SANITIZE_EMAIL]]]);
|
|
|
|
|
|
|
|
|
|
$this->assertNotSame($original, $withSchema);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_schema_applies_additional_filters_to_specified_fields(): void
|
|
|
|
|
{
|
|
|
|
|
$sanitiser = new Sanitiser([
|
|
|
|
|
'email' => ['filters' => [FILTER_SANITIZE_EMAIL]],
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
$input = [
|
|
|
|
|
'email' => 'test (at) example.com',
|
|
|
|
|
'name' => 'test (at) example.com', // Same input, but not email field
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
// Email field gets sanitized
|
|
|
|
|
$this->assertEquals('testatexample.com', $result['email']);
|
|
|
|
|
// Name field keeps original (minus any control chars)
|
|
|
|
|
$this->assertEquals('test (at) example.com', $result['name']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_schema_can_skip_control_character_stripping(): void
|
|
|
|
|
{
|
|
|
|
|
$sanitiser = new Sanitiser([
|
|
|
|
|
'raw' => ['skip_control_strip' => true],
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
$input = [
|
|
|
|
|
'raw' => "Has\x00Null",
|
|
|
|
|
'normal' => "Has\x00Null",
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
// Raw field keeps null byte
|
|
|
|
|
$this->assertEquals("Has\x00Null", $result['raw']);
|
|
|
|
|
// Normal field has null byte stripped
|
|
|
|
|
$this->assertEquals('HasNull', $result['normal']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_constructor_accepts_schema(): void
|
|
|
|
|
{
|
|
|
|
|
$schema = ['email' => ['filters' => [FILTER_SANITIZE_EMAIL]]];
|
|
|
|
|
$sanitiser = new Sanitiser($schema);
|
|
|
|
|
|
|
|
|
|
$input = ['email' => 'test (at) example.com'];
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('testatexample.com', $result['email']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ========================================
|
|
|
|
|
// Tests for Unicode NFC normalization
|
|
|
|
|
// ========================================
|
|
|
|
|
|
|
|
|
|
public function test_unicode_normalization_is_enabled_by_default(): void
|
|
|
|
|
{
|
2026-01-26 21:21:53 +00:00
|
|
|
if (! class_exists(Normalizer::class)) {
|
2026-01-21 20:20:14 +00:00
|
|
|
$this->markTestSkipped('intl extension not available');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$sanitiser = new Sanitiser;
|
|
|
|
|
|
|
|
|
|
// NFD: e + combining acute accent (two code points)
|
|
|
|
|
$nfd = "cafe\xCC\x81"; // 'cafe' + combining acute accent
|
|
|
|
|
// NFC: e with acute (single code point)
|
|
|
|
|
$nfc = "caf\xC3\xA9"; // 'cafe' as single accented char
|
|
|
|
|
|
|
|
|
|
$input = ['text' => $nfd];
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
// Should be normalized to NFC
|
|
|
|
|
$this->assertEquals($nfc, $result['text']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_with_normalization_false_disables_nfc(): void
|
|
|
|
|
{
|
2026-01-26 21:21:53 +00:00
|
|
|
if (! class_exists(Normalizer::class)) {
|
2026-01-21 20:20:14 +00:00
|
|
|
$this->markTestSkipped('intl extension not available');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$sanitiser = (new Sanitiser)->withNormalization(false);
|
|
|
|
|
|
|
|
|
|
// NFD form
|
|
|
|
|
$nfd = "cafe\xCC\x81";
|
|
|
|
|
|
|
|
|
|
$input = ['text' => $nfd];
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
// Should NOT be normalized (stays NFD)
|
|
|
|
|
$this->assertEquals($nfd, $result['text']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_schema_can_skip_normalization_per_field(): void
|
|
|
|
|
{
|
2026-01-26 21:21:53 +00:00
|
|
|
if (! class_exists(Normalizer::class)) {
|
2026-01-21 20:20:14 +00:00
|
|
|
$this->markTestSkipped('intl extension not available');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$sanitiser = new Sanitiser([
|
|
|
|
|
'raw' => ['skip_normalize' => true],
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
// NFD form
|
|
|
|
|
$nfd = "cafe\xCC\x81";
|
|
|
|
|
$nfc = "caf\xC3\xA9";
|
|
|
|
|
|
|
|
|
|
$input = [
|
|
|
|
|
'raw' => $nfd,
|
|
|
|
|
'normal' => $nfd,
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
// Raw field keeps NFD
|
|
|
|
|
$this->assertEquals($nfd, $result['raw']);
|
|
|
|
|
// Normal field gets NFC
|
|
|
|
|
$this->assertEquals($nfc, $result['normal']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ========================================
|
|
|
|
|
// Tests for audit logging
|
|
|
|
|
// ========================================
|
|
|
|
|
|
|
|
|
|
public function test_with_logger_returns_new_instance(): void
|
|
|
|
|
{
|
|
|
|
|
$logger = $this->createMock(LoggerInterface::class);
|
|
|
|
|
$original = new Sanitiser;
|
|
|
|
|
$withLogger = $original->withLogger($logger);
|
|
|
|
|
|
|
|
|
|
$this->assertNotSame($original, $withLogger);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_audit_logging_logs_when_content_modified(): void
|
|
|
|
|
{
|
|
|
|
|
$logger = $this->createMock(LoggerInterface::class);
|
|
|
|
|
$logger->expects($this->once())
|
|
|
|
|
->method('info')
|
|
|
|
|
->with(
|
|
|
|
|
'Input sanitised',
|
|
|
|
|
$this->callback(function ($context) {
|
|
|
|
|
return $context['field'] === 'data'
|
|
|
|
|
&& $context['sanitised'] === 'HelloWorld'
|
|
|
|
|
&& $context['original_length'] === 11
|
|
|
|
|
&& $context['sanitised_length'] === 10;
|
|
|
|
|
})
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
$sanitiser = new Sanitiser([], $logger, true);
|
|
|
|
|
|
|
|
|
|
$input = ['data' => "Hello\x00World"];
|
|
|
|
|
$sanitiser->filter($input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_audit_logging_does_not_log_when_content_unchanged(): void
|
|
|
|
|
{
|
|
|
|
|
$logger = $this->createMock(LoggerInterface::class);
|
|
|
|
|
$logger->expects($this->never())->method('info');
|
|
|
|
|
|
|
|
|
|
$sanitiser = new Sanitiser([], $logger, true);
|
|
|
|
|
|
|
|
|
|
$input = ['data' => 'HelloWorld'];
|
|
|
|
|
$sanitiser->filter($input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_audit_logging_disabled_by_default(): void
|
|
|
|
|
{
|
|
|
|
|
$logger = $this->createMock(LoggerInterface::class);
|
|
|
|
|
$logger->expects($this->never())->method('info');
|
|
|
|
|
|
|
|
|
|
// Logger provided but audit not enabled
|
|
|
|
|
$sanitiser = new Sanitiser([], $logger, false);
|
|
|
|
|
|
|
|
|
|
$input = ['data' => "Hello\x00World"];
|
|
|
|
|
$sanitiser->filter($input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_audit_logging_requires_logger(): void
|
|
|
|
|
{
|
|
|
|
|
// Audit enabled but no logger - should not crash
|
|
|
|
|
$sanitiser = new Sanitiser([], null, true);
|
|
|
|
|
|
|
|
|
|
$input = ['data' => "Hello\x00World"];
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('HelloWorld', $result['data']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_audit_logging_includes_nested_path(): void
|
|
|
|
|
{
|
|
|
|
|
$logger = $this->createMock(LoggerInterface::class);
|
|
|
|
|
$logger->expects($this->once())
|
|
|
|
|
->method('info')
|
|
|
|
|
->with(
|
|
|
|
|
'Input sanitised',
|
|
|
|
|
$this->callback(function ($context) {
|
|
|
|
|
return $context['field'] === 'nested.deep.value';
|
|
|
|
|
})
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
$sanitiser = new Sanitiser([], $logger, true);
|
|
|
|
|
|
|
|
|
|
$input = [
|
|
|
|
|
'nested' => [
|
|
|
|
|
'deep' => [
|
|
|
|
|
'value' => "Has\x00Null",
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$sanitiser->filter($input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ========================================
|
|
|
|
|
// Tests for backwards compatibility
|
|
|
|
|
// ========================================
|
|
|
|
|
|
|
|
|
|
public function test_default_constructor_works_with_no_arguments(): void
|
|
|
|
|
{
|
|
|
|
|
$sanitiser = new Sanitiser;
|
|
|
|
|
|
|
|
|
|
$input = ['test' => "Hello\x00World"];
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('HelloWorld', $result['test']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_fluent_interface_chains_correctly(): void
|
|
|
|
|
{
|
|
|
|
|
$logger = $this->createMock(LoggerInterface::class);
|
|
|
|
|
|
|
|
|
|
$sanitiser = (new Sanitiser)
|
|
|
|
|
->withSchema(['email' => ['filters' => [FILTER_SANITIZE_EMAIL]]])
|
|
|
|
|
->withLogger($logger, true)
|
|
|
|
|
->withNormalization(false);
|
|
|
|
|
|
|
|
|
|
$input = ['email' => 'test (at) example.com'];
|
|
|
|
|
$result = $sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('testatexample.com', $result['email']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_handles_deeply_nested_arrays(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'level1' => [
|
|
|
|
|
'level2' => [
|
|
|
|
|
'level3' => [
|
|
|
|
|
'data' => "Hello\x00World",
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
],
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertEquals('HelloWorld', $result['level1']['level2']['level3']['data']);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public function test_filter_preserves_non_string_values(): void
|
|
|
|
|
{
|
|
|
|
|
$input = [
|
|
|
|
|
'int' => 123,
|
|
|
|
|
'float' => 45.67,
|
|
|
|
|
'bool' => true,
|
|
|
|
|
'null' => null,
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
$result = $this->sanitiser->filter($input);
|
|
|
|
|
|
|
|
|
|
$this->assertSame(123, $result['int']);
|
|
|
|
|
$this->assertSame(45.67, $result['float']);
|
|
|
|
|
$this->assertSame(true, $result['bool']);
|
|
|
|
|
$this->assertNull($result['null']);
|
|
|
|
|
}
|
2026-01-21 14:11:45 +00:00
|
|
|
}
|