From 44a9e75ad497f6462281605c02147a4782341107 Mon Sep 17 00:00:00 2001 From: Danny van der Sluijs Date: Wed, 5 Mar 2025 21:22:01 +0100 Subject: [PATCH] fix: replace filter_var for uri and uri-reference to userland code to be RFC 3986 compliant https://github.com/jsonrainbow/json-schema/issues/685 --- .../Constraints/FormatConstraint.php | 27 ++------ .../Validator/RelativeReferenceValidator.php | 53 +++++++++++++++ .../Tool/Validator/UriValidator.php | 65 +++++++++++++++++++ .../RelativeReferenceValidatorTest.php | 40 ++++++++++++ tests/Tool/Validator/UriValidatorTest.php | 49 ++++++++++++++ 5 files changed, 212 insertions(+), 22 deletions(-) create mode 100644 src/JsonSchema/Tool/Validator/RelativeReferenceValidator.php create mode 100644 src/JsonSchema/Tool/Validator/UriValidator.php create mode 100644 tests/Tool/Validator/RelativeReferenceValidatorTest.php create mode 100644 tests/Tool/Validator/UriValidatorTest.php diff --git a/src/JsonSchema/Constraints/FormatConstraint.php b/src/JsonSchema/Constraints/FormatConstraint.php index 5b07c0b5..9ed4df9d 100644 --- a/src/JsonSchema/Constraints/FormatConstraint.php +++ b/src/JsonSchema/Constraints/FormatConstraint.php @@ -14,6 +14,8 @@ use JsonSchema\ConstraintError; use JsonSchema\Entity\JsonPointer; use JsonSchema\Rfc3339; +use JsonSchema\Tool\Validator\RelativeReferenceValidator; +use JsonSchema\Tool\Validator\UriValidator; /** * Validates against the "format" property @@ -101,34 +103,15 @@ public function check(&$element, $schema = null, ?JsonPointer $path = null, $i = break; case 'uri': - if (is_string($element) && null === filter_var($element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE)) { + if (is_string($element) && !UriValidator::isValid($element)) { $this->addError(ConstraintError::FORMAT_URL(), $path, ['format' => $schema->format]); } break; case 'uriref': case 'uri-reference': - if (is_string($element) && null === filter_var($element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE)) { - // FILTER_VALIDATE_URL does not conform to RFC-3986, and cannot handle relative URLs, but - // the json-schema spec uses RFC-3986, so need a bit of hackery to properly validate them. - // See https://tools.ietf.org/html/rfc3986#section-4.2 for additional information. - if (substr($element, 0, 2) === '//') { // network-path reference - $validURL = filter_var('scheme:' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE); - } elseif (substr($element, 0, 1) === '/') { // absolute-path reference - $validURL = filter_var('scheme://host' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE); - } elseif (strlen($element)) { // relative-path reference - $pathParts = explode('/', $element, 2); - if (strpos($pathParts[0], ':') !== false) { - $validURL = null; - } else { - $validURL = filter_var('scheme://host/' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE); - } - } else { - $validURL = null; - } - if ($validURL === null) { - $this->addError(ConstraintError::FORMAT_URL_REF(), $path, ['format' => $schema->format]); - } + if (is_string($element) && !(UriValidator::isValid($element) || RelativeReferenceValidator::isValid($element))) { + $this->addError(ConstraintError::FORMAT_URL(), $path, ['format' => $schema->format]); } break; diff --git a/src/JsonSchema/Tool/Validator/RelativeReferenceValidator.php b/src/JsonSchema/Tool/Validator/RelativeReferenceValidator.php new file mode 100644 index 00000000..2409f144 --- /dev/null +++ b/src/JsonSchema/Tool/Validator/RelativeReferenceValidator.php @@ -0,0 +1,53 @@ + 65535)) { + return false; + } + + // Validate path (reject illegal characters: < > { } | \ ^ `) + if (!empty($matches[6]) && preg_match('/[<>{}|\\\^`]/', $matches[6])) { + return false; + } + + return true; + } + + // If not hierarchical, check non-hierarchical URIs + if (preg_match($nonHierarchicalPattern, $uri, $matches) === 1) { + $scheme = strtolower($matches[1]); // Extract the scheme + + // Special case: `mailto:` must contain a **valid email address** + if ($scheme === 'mailto') { + return preg_match($emailPattern, $matches[2]) === 1; + } + + return true; // Valid non-hierarchical URI + } + + return false; + } +} diff --git a/tests/Tool/Validator/RelativeReferenceValidatorTest.php b/tests/Tool/Validator/RelativeReferenceValidatorTest.php new file mode 100644 index 00000000..2a9b6d66 --- /dev/null +++ b/tests/Tool/Validator/RelativeReferenceValidatorTest.php @@ -0,0 +1,40 @@ + ['ref' => '/relative/path']; + yield 'Relative path up one level' => ['ref' => '../up-one-level']; + yield 'Relative path from current' => ['ref' => 'foo/bar']; + } + + public function invalidRelativeReferenceDataProvider(): \Generator + { + yield 'Absolute URI' => ['ref' => 'http://example.com']; + yield 'Three slashes' => ['ref' => '///three/slashes']; + yield 'Path with spaces' => ['ref' => '/path with spaces']; + yield 'No path having query and fragment' => ['ref' => '?#invalid']; + yield 'Missing path having fragment' => ['ref' => '#']; + yield 'Missing path having query' => ['ref' => '?']; + } +} diff --git a/tests/Tool/Validator/UriValidatorTest.php b/tests/Tool/Validator/UriValidatorTest.php new file mode 100644 index 00000000..80a45c81 --- /dev/null +++ b/tests/Tool/Validator/UriValidatorTest.php @@ -0,0 +1,49 @@ + ['uri' => 'https://example.com']; + yield 'Subdomain HTTP URI' => ['uri' => 'https://sub.domain.example.com']; + yield 'Full HTTP URI' => ['uri' => 'https://example.com:8080/path/to/resource?query=string#fragment']; + yield 'Full FTP URI' => ['uri' => 'ftp://user:pass@ftp.example.com:21/path']; + yield 'IPV6 HTTP URI' => ['uri' => 'http://[2001:db8::ff00:42:8329]']; + yield 'Mailto URI' => ['uri' => 'mailto:user@example.com']; + yield 'Data URI' => ['uri' => 'data:text/plain;charset=utf-8,Hello%20World!']; + yield 'ISBN URN URI' => ['uri' => 'urn:isbn:0451450523']; + yield 'OASIS URN URI' => ['uri' => 'urn:oasis:names:specification:docbook:dtd:xml:4.1.2']; + } + + public function invalidUriDataProvider(): \Generator + { + yield 'Invalid schema' => ['uri' => 'ht!tp://example.com']; + yield 'Missing schema' => ['uri' => '://example.com']; + yield 'Double dot in domain' => ['uri' => 'https://example..com']; + yield 'To high of a port number' => ['uri' => 'https://example.com:65536']; + yield 'Invalid path characters with "<>"' => ['uri' => 'http://example.com/<>']; + yield 'Invalid path characters with "{}"' => ['uri' => 'http://example.com/{bad}']; + yield 'Invalid path characters with "^"' => ['uri' => 'http://example.com/^invalid']; + yield 'Only mailto:' => ['uri' => 'mailto:']; + yield 'Invalid email used in mailto:' => ['uri' => 'mailto:user@.com']; + } +}