Skip to content

Commit

Permalink
Issue #11495 - Add UriCompliance rules that follow the HTTP / URI / S…
Browse files Browse the repository at this point in the history
…ervlet specs for illegal & suspicious characters
  • Loading branch information
joakime committed Mar 7, 2024
1 parent 369d9f7 commit da02801
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.io.Serial;
import java.io.Serializable;
import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
Expand Down Expand Up @@ -548,6 +549,87 @@ private enum State
.with("%u002e%u002e", Boolean.TRUE)
.build();

/**
* Encoded character sequences that violate the Servlet 6.0 spec
* https://jakarta.ee/specifications/servlet/6.0/jakarta-servlet-spec-6.0.html#uri-path-canonicalization
*/
private static final Index<Boolean> __suspiciousEncodedPathSequences;

/**
* Unencoded US-ASCII character sequences not allowed by HTTP or URI specs in path segments.
*/
private static final boolean[] __illegalPathCharacters;

static
{
// Establish allowed and disallowed characters per the path rules of
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
// ABNF
// path = path-abempty ; begins with "/" or is empty
// / path-absolute ; begins with "/" but not "//"
// / path-noscheme ; begins with a non-colon segment
// / path-rootless ; begins with a segment
// / path-empty ; zero characters
// path-abempty = *( "/" segment )
// path-absolute = "/" [ segment-nz *( "/" segment ) ]
// path-noscheme = segment-nz-nc *( "/" segment )
// path-rootless = segment-nz *( "/" segment )
// path-empty = 0<pchar>
//
// segment = *pchar
// segment-nz = 1*pchar
// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
// ; non-zero-length segment without any colon ":"
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
// pct-encoded = "%" HEXDIG HEXDIG
//
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
// reserved = gen-delims / sub-delims
// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="

// we are limited to US-ASCII per https://datatracker.ietf.org/doc/html/rfc3986#section-2
boolean[] illegalChars = new boolean[128];
Arrays.fill(illegalChars, true);
// pct-encoded
illegalChars['%'] = false;
// unreserved
for (int i = 0; i < illegalChars.length; i++)
{
if (((i >= 'a') && (i <= 'z')) || // ALPHA (lower)
((i >= 'A') && (i <= 'Z')) || // ALPHA (upper)
((i >= '0') && (i <= '9')) || // DIGIT
(i == '-') || (i == '.') || (i == '_') || (i == '_') || (i == '~')
)
{
illegalChars[i] = false;
}
}
// reserved
String reserved = ":/?#[]@!$&'()*+,=";
for (char c: reserved.toCharArray())
illegalChars[c] = false;
__illegalPathCharacters = illegalChars;
// anything else in the US-ASCII space is not allowed

// suspicious path sequences
Index.Builder<Boolean> suspiciousSequencesBuilder = new Index.Builder<Boolean>();
suspiciousSequencesBuilder.caseSensitive(false);
// backslash
suspiciousSequencesBuilder.with("%5c", Boolean.TRUE);
suspiciousSequencesBuilder.with("%u005c", Boolean.TRUE);
// control characters
suspiciousSequencesBuilder.with("%7f", Boolean.TRUE);
suspiciousSequencesBuilder.with("%u007f", Boolean.TRUE);
for (int i = 0x00; i <= 0x1F; i++)
{
suspiciousSequencesBuilder.with(String.format("%%%02x", i), Boolean.TRUE);
suspiciousSequencesBuilder.with(String.format("%%u00%02x", i), Boolean.TRUE);
}
__suspiciousEncodedPathSequences = suspiciousSequencesBuilder.build();
}

private String _scheme;
private String _user;
private String _host;
Expand Down Expand Up @@ -895,8 +977,9 @@ public Mutable path(String path)
if (!URIUtil.isPathValid(path))
throw new IllegalArgumentException("Path not correctly encoded: " + path);
_uri = null;
_path = path;
_path = null;
_canonicalPath = null;
parse(State.PATH, path);

// If the passed path does not have a parameter, then keep the current parameter
// else delete the current parameter
Expand Down Expand Up @@ -1483,6 +1566,25 @@ private void checkSegment(String uri, int segment, int end, boolean param)
if (param)
addViolation(Violation.AMBIGUOUS_PATH_PARAMETER);
}

for (int i = segment; i < end; i++)
{
char c = uri.charAt(i);
// The RFC does not allow raw path characters that are outside the ABNF {@code unreserved / pct-encoded / sub-delims / ":" / "@"}
if (c > __illegalPathCharacters.length || __illegalPathCharacters[c])
addViolation(Violation.ILLEGAL_PATH_CHARACTERS);
// Look for suspicious encoded sequence
if (c == '%')
{
Boolean suspicious = __suspiciousEncodedPathSequences.getBest(uri, i, end - i);
if (suspicious != null)
{
// The segment is suspicious.
if (Boolean.TRUE.equals(suspicious))
addViolation(Violation.SUSPICIOUS_PATH_CHARACTERS);
}
}
}
}

private void addViolation(Violation violation)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,18 @@ public enum Violation implements ComplianceViolation
/**
* Allow Bad UTF-8 encodings to be substituted by the replacement character.
*/
BAD_UTF8_ENCODING("https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1", "Bad UTF-8 encoding");
BAD_UTF8_ENCODING("https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1", "Bad UTF-8 encoding"),

/**
* Allow encoded path characters not allowed by the Servlet spec rules.
*/
SUSPICIOUS_PATH_CHARACTERS("https://jakarta.ee/specifications/servlet/6.0/jakarta-servlet-spec-6.0.html#uri-path-canonicalization", "Suspicious Path Character"),

/**
* Allow path characters not allowed in the path portion of the URI and HTTP specs.
* <p>This would allow characters that fall outside of the {@code unreserved / pct-encoded / sub-delims / ":" / "@"} ABNF</p>
*/
ILLEGAL_PATH_CHARACTERS("https://datatracker.ietf.org/doc/html/rfc3986#section-3.3", "Illegal Path Character");

private final String _url;
private final String _description;
Expand Down Expand Up @@ -167,7 +178,7 @@ public String getDescription()
Violation.UTF16_ENCODINGS));

/**
* Compliance mode that allows all URI Violations, including allowing ambiguous paths in non-canonical form.
* Compliance mode that allows all URI Violations, including allowing ambiguous paths in non-canonical form, and illegal characters
*/
public static final UriCompliance UNSAFE = new UriCompliance("UNSAFE", allOf(Violation.class));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.hamcrest.MatcherAssert.assertThat;
Expand Down Expand Up @@ -641,6 +642,91 @@ public void testPathQuery(String input, String canonicalPath, String decodedPath
assertThat(uri.hasAmbiguousEncoding(), is(expected.contains(Violation.AMBIGUOUS_PATH_ENCODING)));
}

@ParameterizedTest
@ValueSource(strings = {
"/a%2Fb",
"/a%2F",
"/%2f",
"/%2f/"
})
public void testAmbiguousViaBuilderPath(String input)
{
HttpURI uri = HttpURI.build().path(input);
assertThat("has any violation", uri.hasViolations(), is(true));
assertThat("is ambiguous", uri.isAmbiguous(), is(true));
}

public static Stream<Arguments> suspiciousPathCharacterData()
{
return Stream.of(
// backslash
Arguments.of("/a%5Cb"),
Arguments.of("/foo/bar/zed/%5c"),
Arguments.of("/foo/bar/zed/%5C"),
// TAB
Arguments.of("/%09b"),
Arguments.of("/%09"),
// CR / LF
Arguments.of("/%0A"),
Arguments.of("/%0a"),
Arguments.of("/%0D"),
Arguments.of("/%0d"),
Arguments.of("/%0d%0a")
);
}

@ParameterizedTest
@MethodSource("suspiciousPathCharacterData")
public void testSuspiciousPathCharacterBuilderPath(String input)
{
HttpURI uri = HttpURI.build().path(input);
assertThat("has any violations", uri.hasViolations(), is(true));
assertThat("has SUSPICIOUS_PATH_CHARACTERS violation", uri.hasViolation(Violation.SUSPICIOUS_PATH_CHARACTERS), is(true));
}

@ParameterizedTest
@MethodSource("suspiciousPathCharacterData")
public void testSuspiciousPathCharacterFromString(String input)
{
HttpURI uri = HttpURI.from(input);
assertThat("has any violations", uri.hasViolations(), is(true));
assertThat("has SUSPICIOUS_PATH_CHARACTERS violation", uri.hasViolation(Violation.SUSPICIOUS_PATH_CHARACTERS), is(true));
}

public static Stream<Arguments> illegalPathCharacterData()
{
return Stream.of(
// backslash
Arguments.of("/a\\b"),
Arguments.of("/a/..\\b"),
// control character
Arguments.of("/a\tb"),
Arguments.of("/a\r\nb"),
// Pipe symbol
Arguments.of("/a|b"),
// space character
Arguments.of("/a b")
);
}

@ParameterizedTest
@MethodSource("illegalPathCharacterData")
public void testIllegalPathCharacterBuilderPath(String input)
{
HttpURI uri = HttpURI.build().path(input);
assertThat("has any violations", uri.hasViolations(), is(true));
assertThat("has ILLEGAL_PATH_CHARACTERS violation", uri.hasViolation(Violation.ILLEGAL_PATH_CHARACTERS), is(true));
}

@ParameterizedTest
@MethodSource("illegalPathCharacterData")
public void testIllegalPathCharacterFromString(String input)
{
HttpURI uri = HttpURI.from(input);
assertThat("has any violations", uri.hasViolations(), is(true));
assertThat("has ILLEGAL_PATH_CHARACTERS violation", uri.hasViolation(Violation.ILLEGAL_PATH_CHARACTERS), is(true));
}

public static Stream<Arguments> parseData()
{
return Stream.of(
Expand Down

0 comments on commit da02801

Please sign in to comment.