Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: replace filter_var for uri and uri-reference to userland code to be RFC 3986 compliant #800

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 5 additions & 22 deletions src/JsonSchema/Constraints/FormatConstraint.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
use JsonSchema\ConstraintError;
use JsonSchema\Entity\JsonPointer;
use JsonSchema\Rfc3339;
use JsonSchema\Tool\Validator\RelativeReferenceValidator;
use JsonSchema\Tool\Validator\UriValidator;

/**
* Validates against the "format" property
Expand Down Expand Up @@ -101,34 +103,15 @@ public function check(&$element, $schema = null, ?JsonPointer $path = null, $i =
break;

case 'uri':
if (is_string($element) && null === filter_var($element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE)) {
if (is_string($element) && !UriValidator::isValid($element)) {
$this->addError(ConstraintError::FORMAT_URL(), $path, ['format' => $schema->format]);
}
break;

case 'uriref':
case 'uri-reference':
if (is_string($element) && null === filter_var($element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE)) {
// FILTER_VALIDATE_URL does not conform to RFC-3986, and cannot handle relative URLs, but
// the json-schema spec uses RFC-3986, so need a bit of hackery to properly validate them.
// See https://tools.ietf.org/html/rfc3986#section-4.2 for additional information.
if (substr($element, 0, 2) === '//') { // network-path reference
$validURL = filter_var('scheme:' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE);
} elseif (substr($element, 0, 1) === '/') { // absolute-path reference
$validURL = filter_var('scheme://host' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE);
} elseif (strlen($element)) { // relative-path reference
$pathParts = explode('/', $element, 2);
if (strpos($pathParts[0], ':') !== false) {
$validURL = null;
} else {
$validURL = filter_var('scheme://host/' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE);
}
} else {
$validURL = null;
}
if ($validURL === null) {
$this->addError(ConstraintError::FORMAT_URL_REF(), $path, ['format' => $schema->format]);
}
if (is_string($element) && !(UriValidator::isValid($element) || RelativeReferenceValidator::isValid($element))) {
$this->addError(ConstraintError::FORMAT_URL(), $path, ['format' => $schema->format]);
}
break;

Expand Down
53 changes: 53 additions & 0 deletions src/JsonSchema/Tool/Validator/RelativeReferenceValidator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php

declare(strict_types=1);

namespace JsonSchema\Tool\Validator;

class RelativeReferenceValidator
{
public static function isValid(string $ref): bool
{
// Relative reference pattern as per RFC 3986, Section 4.1
$pattern = '/^(([^\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/';

if (preg_match($pattern, $ref) !== 1) {
return false;
}

// Additional checks for invalid cases
if (preg_match('/^(http|https):\/\//', $ref)) {
return false; // Absolute URI
}

if (preg_match('/^:\/\//', $ref)) {
return false; // Missing scheme in authority
}

if (preg_match('/^:\//', $ref)) {
return false; // Invalid scheme separator
}

if (preg_match('/^\/\/$/', $ref)) {
return false; // Empty authority
}

if (preg_match('/^\/\/\/[^\/]/', $ref)) {
return false; // Invalid authority with three slashes
}

if (preg_match('/\s/', $ref)) {
return false; // Spaces are not allowed in URIs
}

if (preg_match('/^\?#|^#$/', $ref)) {
return false; // Missing path but having query and fragment
}

if ($ref === '#' || $ref === '?') {
return false; // Missing path and having only fragment or query
}

return true;
}
}
65 changes: 65 additions & 0 deletions src/JsonSchema/Tool/Validator/UriValidator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php

declare(strict_types=1);

namespace JsonSchema\Tool\Validator;

class UriValidator
{
public static function isValid(string $uri): bool
{
// RFC 3986: Hierarchical URIs (http, https, ftp, etc.)
$hierarchicalPattern = '/^
([a-z][a-z0-9+\-.]*):\/\/ # Scheme (http, https, ftp, etc.)
(?:([^:@\/?#]+)(?::([^@\/?#]*))?@)? # Optional userinfo (user:pass@)
([a-z0-9.-]+|\[[a-f0-9:.]+\]) # Hostname or IPv6 in brackets
(?::(\d{1,5}))? # Optional port
(\/[a-zA-Z0-9._~!$&\'()*+,;=:@\/%-]*)* # Path (valid characters only)
(\?([^#]*))? # Optional query
(\#(.*))? # Optional fragment
$/ix';

// RFC 3986: Non-Hierarchical URIs (mailto, data, urn)
$nonHierarchicalPattern = '/^
(mailto|data|urn): # Only allow known non-hierarchical schemes
(.+) # Must contain at least one character after scheme
$/ix';

// RFC 5322-compliant email validation for `mailto:` URIs
$emailPattern = '/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/';

// First, check if it's a valid hierarchical URI
if (preg_match($hierarchicalPattern, $uri, $matches) === 1) {
// Validate domain name (no double dots like example..com)
if (!empty($matches[4]) && preg_match('/\.\./', $matches[4])) {
return false;
}

// Validate port (should be between 1 and 65535 if specified)
if (!empty($matches[5]) && ($matches[5] < 1 || $matches[5] > 65535)) {
return false;
}

// Validate path (reject illegal characters: < > { } | \ ^ `)
if (!empty($matches[6]) && preg_match('/[<>{}|\\\^`]/', $matches[6])) {
return false;
}

return true;
}

// If not hierarchical, check non-hierarchical URIs
if (preg_match($nonHierarchicalPattern, $uri, $matches) === 1) {
$scheme = strtolower($matches[1]); // Extract the scheme

// Special case: `mailto:` must contain a **valid email address**
if ($scheme === 'mailto') {
return preg_match($emailPattern, $matches[2]) === 1;
}

return true; // Valid non-hierarchical URI
}

return false;
}
}
40 changes: 40 additions & 0 deletions tests/Tool/Validator/RelativeReferenceValidatorTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

declare(strict_types=1);

namespace Tool\Validator;

use JsonSchema\Tool\Validator\RelativeReferenceValidator;
use PHPUnit\Framework\TestCase;

class RelativeReferenceValidatorTest extends TestCase
{
/** @dataProvider validRelativeReferenceDataProvider */
public function testValidRelativeReferencesAreValidatedAsSuch(string $ref): void
{
self::assertTrue(RelativeReferenceValidator::isValid($ref));
}

/** @dataProvider invalidRelativeReferenceDataProvider */
public function testInvalidRelativeReferencesAreValidatedAsSuch(string $ref): void
{
self::assertFalse(RelativeReferenceValidator::isValid($ref));
}

public function validRelativeReferenceDataProvider(): \Generator
{
yield 'Relative path from root' => ['ref' => '/relative/path'];
yield 'Relative path up one level' => ['ref' => '../up-one-level'];
yield 'Relative path from current' => ['ref' => 'foo/bar'];
}

public function invalidRelativeReferenceDataProvider(): \Generator
{
yield 'Absolute URI' => ['ref' => 'http://example.com'];
yield 'Three slashes' => ['ref' => '///three/slashes'];
yield 'Path with spaces' => ['ref' => '/path with spaces'];
yield 'No path having query and fragment' => ['ref' => '?#invalid'];
yield 'Missing path having fragment' => ['ref' => '#'];
yield 'Missing path having query' => ['ref' => '?'];
}
}
49 changes: 49 additions & 0 deletions tests/Tool/Validator/UriValidatorTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

declare(strict_types=1);

namespace Tool\Validator;

use JsonSchema\Tool\Validator\UriValidator;
use PHPUnit\Framework\TestCase;

class UriValidatorTest extends TestCase
{
/** @dataProvider validUriDataProvider */
public function testValidUrisAreValidatedAsSuch(string $uri): void
{
self::assertTrue(UriValidator::isValid($uri));
}

/** @dataProvider invalidUriDataProvider */
public function testInvalidUrisAreValidatedAsSuch(string $uri): void
{
self::assertFalse(UriValidator::isValid($uri));
}

public function validUriDataProvider(): \Generator
{
yield 'Simple HTTP URI' => ['uri' => 'https://example.com'];
yield 'Subdomain HTTP URI' => ['uri' => 'https://sub.domain.example.com'];
yield 'Full HTTP URI' => ['uri' => 'https://example.com:8080/path/to/resource?query=string#fragment'];
yield 'Full FTP URI' => ['uri' => 'ftp://user:[email protected]:21/path'];
yield 'IPV6 HTTP URI' => ['uri' => 'http://[2001:db8::ff00:42:8329]'];
yield 'Mailto URI' => ['uri' => 'mailto:[email protected]'];
yield 'Data URI' => ['uri' => 'data:text/plain;charset=utf-8,Hello%20World!'];
yield 'ISBN URN URI' => ['uri' => 'urn:isbn:0451450523'];
yield 'OASIS URN URI' => ['uri' => 'urn:oasis:names:specification:docbook:dtd:xml:4.1.2'];
}

public function invalidUriDataProvider(): \Generator
{
yield 'Invalid schema' => ['uri' => 'ht!tp://example.com'];
yield 'Missing schema' => ['uri' => '://example.com'];
yield 'Double dot in domain' => ['uri' => 'https://example..com'];
yield 'To high of a port number' => ['uri' => 'https://example.com:65536'];
yield 'Invalid path characters with "<>"' => ['uri' => 'http://example.com/<>'];
yield 'Invalid path characters with "{}"' => ['uri' => 'http://example.com/{bad}'];
yield 'Invalid path characters with "^"' => ['uri' => 'http://example.com/^invalid'];
yield 'Only mailto:' => ['uri' => 'mailto:'];
yield 'Invalid email used in mailto:' => ['uri' => 'mailto:[email protected]'];
}
}