sanitiser = new Sanitiser; } public function test_filter_returns_empty_array_for_empty_input(): void { $result = $this->sanitiser->filter([]); $this->assertIsArray($result); $this->assertEmpty($result); } public function test_filter_preserves_normal_strings(): void { $input = [ 'name' => 'John Doe', 'email' => 'john@example.com', ]; $result = $this->sanitiser->filter($input); $this->assertEquals('John Doe', $result['name']); $this->assertEquals('john@example.com', $result['email']); } public function test_filter_strips_null_bytes(): void { $input = [ 'data' => "Hello\x00World", ]; $result = $this->sanitiser->filter($input); $this->assertEquals('HelloWorld', $result['data']); } public function test_filter_strips_control_characters(): void { $input = [ 'text' => "Line1\x01\x02\x03Line2", ]; $result = $this->sanitiser->filter($input); $this->assertEquals('Line1Line2', $result['text']); } public function test_filter_preserves_newlines_and_tabs(): void { // Note: FILTER_FLAG_STRIP_LOW strips ASCII 0-31 // which includes \n (10) and \t (9) // This test documents actual behavior $input = [ 'text' => "Line1\nLine2\tTabbed", ]; $result = $this->sanitiser->filter($input); // Control characters are stripped $this->assertIsString($result['text']); } public function test_filter_preserves_unicode(): void { $input = [ 'name' => '日本語テスト', 'emoji' => 'Hello', 'accents' => 'Cafe resume naive', ]; $result = $this->sanitiser->filter($input); $this->assertEquals('日本語テスト', $result['name']); $this->assertEquals('Hello', $result['emoji']); $this->assertEquals('Cafe resume naive', $result['accents']); } public function test_filter_handles_nested_arrays(): void { // Note: filter_var_array processes top-level keys only // Nested arrays are returned as-is (or filtered based on definition) $input = [ 'simple' => 'value', ]; $result = $this->sanitiser->filter($input); $this->assertEquals('value', $result['simple']); } public function test_filter_handles_numeric_values_as_strings(): void { $input = [ 'number' => '12345', 'float' => '123.45', ]; $result = $this->sanitiser->filter($input); $this->assertEquals('12345', $result['number']); $this->assertEquals('123.45', $result['float']); } public function test_filter_handles_special_html_characters(): void { $input = [ 'html' => '', 'entities' => '<div>', ]; $result = $this->sanitiser->filter($input); // FILTER_UNSAFE_RAW doesn't strip HTML, just control characters $this->assertEquals('', $result['html']); $this->assertEquals('<div>', $result['entities']); } public function test_filter_processes_multiple_keys(): void { $input = [ 'key1' => 'value1', 'key2' => 'value2', 'key3' => 'value3', ]; $result = $this->sanitiser->filter($input); $this->assertCount(3, $result); $this->assertEquals('value1', $result['key1']); $this->assertEquals('value2', $result['key2']); $this->assertEquals('value3', $result['key3']); } // ======================================== // New tests for configurable filter rules // ======================================== public function test_with_schema_returns_new_instance(): void { $original = new Sanitiser; $withSchema = $original->withSchema(['email' => ['filters' => [FILTER_SANITIZE_EMAIL]]]); $this->assertNotSame($original, $withSchema); } public function test_schema_applies_additional_filters_to_specified_fields(): void { $sanitiser = new Sanitiser([ 'email' => ['filters' => [FILTER_SANITIZE_EMAIL]], ]); $input = [ 'email' => 'test (at) example.com', 'name' => 'test (at) example.com', // Same input, but not email field ]; $result = $sanitiser->filter($input); // Email field gets sanitized $this->assertEquals('testatexample.com', $result['email']); // Name field keeps original (minus any control chars) $this->assertEquals('test (at) example.com', $result['name']); } public function test_schema_can_skip_control_character_stripping(): void { $sanitiser = new Sanitiser([ 'raw' => ['skip_control_strip' => true], ]); $input = [ 'raw' => "Has\x00Null", 'normal' => "Has\x00Null", ]; $result = $sanitiser->filter($input); // Raw field keeps null byte $this->assertEquals("Has\x00Null", $result['raw']); // Normal field has null byte stripped $this->assertEquals('HasNull', $result['normal']); } public function test_constructor_accepts_schema(): void { $schema = ['email' => ['filters' => [FILTER_SANITIZE_EMAIL]]]; $sanitiser = new Sanitiser($schema); $input = ['email' => 'test (at) example.com']; $result = $sanitiser->filter($input); $this->assertEquals('testatexample.com', $result['email']); } // ======================================== // Tests for Unicode NFC normalization // ======================================== public function test_unicode_normalization_is_enabled_by_default(): void { if (! class_exists(Normalizer::class)) { $this->markTestSkipped('intl extension not available'); } $sanitiser = new Sanitiser; // NFD: e + combining acute accent (two code points) $nfd = "cafe\xCC\x81"; // 'cafe' + combining acute accent // NFC: e with acute (single code point) $nfc = "caf\xC3\xA9"; // 'cafe' as single accented char $input = ['text' => $nfd]; $result = $sanitiser->filter($input); // Should be normalized to NFC $this->assertEquals($nfc, $result['text']); } public function test_with_normalization_false_disables_nfc(): void { if (! class_exists(Normalizer::class)) { $this->markTestSkipped('intl extension not available'); } $sanitiser = (new Sanitiser)->withNormalization(false); // NFD form $nfd = "cafe\xCC\x81"; $input = ['text' => $nfd]; $result = $sanitiser->filter($input); // Should NOT be normalized (stays NFD) $this->assertEquals($nfd, $result['text']); } public function test_schema_can_skip_normalization_per_field(): void { if (! class_exists(Normalizer::class)) { $this->markTestSkipped('intl extension not available'); } $sanitiser = new Sanitiser([ 'raw' => ['skip_normalize' => true], ]); // NFD form $nfd = "cafe\xCC\x81"; $nfc = "caf\xC3\xA9"; $input = [ 'raw' => $nfd, 'normal' => $nfd, ]; $result = $sanitiser->filter($input); // Raw field keeps NFD $this->assertEquals($nfd, $result['raw']); // Normal field gets NFC $this->assertEquals($nfc, $result['normal']); } // ======================================== // Tests for audit logging // ======================================== public function test_with_logger_returns_new_instance(): void { $logger = $this->createMock(LoggerInterface::class); $original = new Sanitiser; $withLogger = $original->withLogger($logger); $this->assertNotSame($original, $withLogger); } public function test_audit_logging_logs_when_content_modified(): void { $logger = $this->createMock(LoggerInterface::class); $logger->expects($this->once()) ->method('info') ->with( 'Input sanitised', $this->callback(function ($context) { return $context['field'] === 'data' && $context['sanitised'] === 'HelloWorld' && $context['original_length'] === 11 && $context['sanitised_length'] === 10; }) ); $sanitiser = new Sanitiser([], $logger, true); $input = ['data' => "Hello\x00World"]; $sanitiser->filter($input); } public function test_audit_logging_does_not_log_when_content_unchanged(): void { $logger = $this->createMock(LoggerInterface::class); $logger->expects($this->never())->method('info'); $sanitiser = new Sanitiser([], $logger, true); $input = ['data' => 'HelloWorld']; $sanitiser->filter($input); } public function test_audit_logging_disabled_by_default(): void { $logger = $this->createMock(LoggerInterface::class); $logger->expects($this->never())->method('info'); // Logger provided but audit not enabled $sanitiser = new Sanitiser([], $logger, false); $input = ['data' => "Hello\x00World"]; $sanitiser->filter($input); } public function test_audit_logging_requires_logger(): void { // Audit enabled but no logger - should not crash $sanitiser = new Sanitiser([], null, true); $input = ['data' => "Hello\x00World"]; $result = $sanitiser->filter($input); $this->assertEquals('HelloWorld', $result['data']); } public function test_audit_logging_includes_nested_path(): void { $logger = $this->createMock(LoggerInterface::class); $logger->expects($this->once()) ->method('info') ->with( 'Input sanitised', $this->callback(function ($context) { return $context['field'] === 'nested.deep.value'; }) ); $sanitiser = new Sanitiser([], $logger, true); $input = [ 'nested' => [ 'deep' => [ 'value' => "Has\x00Null", ], ], ]; $sanitiser->filter($input); } // ======================================== // Tests for backwards compatibility // ======================================== public function test_default_constructor_works_with_no_arguments(): void { $sanitiser = new Sanitiser; $input = ['test' => "Hello\x00World"]; $result = $sanitiser->filter($input); $this->assertEquals('HelloWorld', $result['test']); } public function test_fluent_interface_chains_correctly(): void { $logger = $this->createMock(LoggerInterface::class); $sanitiser = (new Sanitiser) ->withSchema(['email' => ['filters' => [FILTER_SANITIZE_EMAIL]]]) ->withLogger($logger, true) ->withNormalization(false); $input = ['email' => 'test (at) example.com']; $result = $sanitiser->filter($input); $this->assertEquals('testatexample.com', $result['email']); } public function test_filter_handles_deeply_nested_arrays(): void { $input = [ 'level1' => [ 'level2' => [ 'level3' => [ 'data' => "Hello\x00World", ], ], ], ]; $result = $this->sanitiser->filter($input); $this->assertEquals('HelloWorld', $result['level1']['level2']['level3']['data']); } public function test_filter_preserves_non_string_values(): void { $input = [ 'int' => 123, 'float' => 45.67, 'bool' => true, 'null' => null, ]; $result = $this->sanitiser->filter($input); $this->assertSame(123, $result['int']); $this->assertSame(45.67, $result['float']); $this->assertSame(true, $result['bool']); $this->assertNull($result['null']); } }