From da02801947bdd22af7f3fed5ab2f96b34bff4bd2 Mon Sep 17 00:00:00 2001 From: Joakim Erdfelt Date: Thu, 7 Mar 2024 07:44:42 -0600 Subject: [PATCH] Issue #11495 - Add UriCompliance rules that follow the HTTP / URI / Servlet specs for illegal & suspicious characters --- .../java/org/eclipse/jetty/http/HttpURI.java | 104 +++++++++++++++++- .../org/eclipse/jetty/http/UriCompliance.java | 15 ++- .../org/eclipse/jetty/http/HttpURITest.java | 86 +++++++++++++++ 3 files changed, 202 insertions(+), 3 deletions(-) diff --git a/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java b/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java index c22beca65c00..66e65315a46a 100644 --- a/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java +++ b/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java @@ -16,6 +16,7 @@ import java.io.Serial; import java.io.Serializable; import java.net.URI; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; @@ -548,6 +549,87 @@ private enum State .with("%u002e%u002e", Boolean.TRUE) .build(); + /** + * Encoded character sequences that violate the Servlet 6.0 spec + * https://jakarta.ee/specifications/servlet/6.0/jakarta-servlet-spec-6.0.html#uri-path-canonicalization + */ + private static final Index __suspiciousEncodedPathSequences; + + /** + * Unencoded US-ASCII character sequences not allowed by HTTP or URI specs in path segments. + */ + private static final boolean[] __illegalPathCharacters; + + static + { + // Establish allowed and disallowed characters per the path rules of + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 + // ABNF + // path = path-abempty ; begins with "/" or is empty + // / path-absolute ; begins with "/" but not "//" + // / path-noscheme ; begins with a non-colon segment + // / path-rootless ; begins with a segment + // / path-empty ; zero characters + // path-abempty = *( "/" segment ) + // path-absolute = "/" [ segment-nz *( "/" segment ) ] + // path-noscheme = segment-nz-nc *( "/" segment ) + // path-rootless = segment-nz *( "/" segment ) + // path-empty = 0 + // + // segment = *pchar + // segment-nz = 1*pchar + // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + // ; non-zero-length segment without any colon ":" + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + // pct-encoded = "%" HEXDIG HEXDIG + // + // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + // reserved = gen-delims / sub-delims + // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + // / "*" / "+" / "," / ";" / "=" + + // we are limited to US-ASCII per https://datatracker.ietf.org/doc/html/rfc3986#section-2 + boolean[] illegalChars = new boolean[128]; + Arrays.fill(illegalChars, true); + // pct-encoded + illegalChars['%'] = false; + // unreserved + for (int i = 0; i < illegalChars.length; i++) + { + if (((i >= 'a') && (i <= 'z')) || // ALPHA (lower) + ((i >= 'A') && (i <= 'Z')) || // ALPHA (upper) + ((i >= '0') && (i <= '9')) || // DIGIT + (i == '-') || (i == '.') || (i == '_') || (i == '_') || (i == '~') + ) + { + illegalChars[i] = false; + } + } + // reserved + String reserved = ":/?#[]@!$&'()*+,="; + for (char c: reserved.toCharArray()) + illegalChars[c] = false; + __illegalPathCharacters = illegalChars; + // anything else in the US-ASCII space is not allowed + + // suspicious path sequences + Index.Builder suspiciousSequencesBuilder = new Index.Builder(); + suspiciousSequencesBuilder.caseSensitive(false); + // backslash + suspiciousSequencesBuilder.with("%5c", Boolean.TRUE); + suspiciousSequencesBuilder.with("%u005c", Boolean.TRUE); + // control characters + suspiciousSequencesBuilder.with("%7f", Boolean.TRUE); + suspiciousSequencesBuilder.with("%u007f", Boolean.TRUE); + for (int i = 0x00; i <= 0x1F; i++) + { + suspiciousSequencesBuilder.with(String.format("%%%02x", i), Boolean.TRUE); + suspiciousSequencesBuilder.with(String.format("%%u00%02x", i), Boolean.TRUE); + } + __suspiciousEncodedPathSequences = suspiciousSequencesBuilder.build(); + } + private String _scheme; private String _user; private String _host; @@ -895,8 +977,9 @@ public Mutable path(String path) if (!URIUtil.isPathValid(path)) throw new IllegalArgumentException("Path not correctly encoded: " + path); _uri = null; - _path = path; + _path = null; _canonicalPath = null; + parse(State.PATH, path); // If the passed path does not have a parameter, then keep the current parameter // else delete the current parameter @@ -1483,6 +1566,25 @@ private void checkSegment(String uri, int segment, int end, boolean param) if (param) addViolation(Violation.AMBIGUOUS_PATH_PARAMETER); } + + for (int i = segment; i < end; i++) + { + char c = uri.charAt(i); + // The RFC does not allow raw path characters that are outside the ABNF {@code unreserved / pct-encoded / sub-delims / ":" / "@"} + if (c > __illegalPathCharacters.length || __illegalPathCharacters[c]) + addViolation(Violation.ILLEGAL_PATH_CHARACTERS); + // Look for suspicious encoded sequence + if (c == '%') + { + Boolean suspicious = __suspiciousEncodedPathSequences.getBest(uri, i, end - i); + if (suspicious != null) + { + // The segment is suspicious. + if (Boolean.TRUE.equals(suspicious)) + addViolation(Violation.SUSPICIOUS_PATH_CHARACTERS); + } + } + } } private void addViolation(Violation violation) diff --git a/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/UriCompliance.java b/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/UriCompliance.java index e1d16ed11e85..574530ddb098 100644 --- a/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/UriCompliance.java +++ b/jetty-core/jetty-http/src/main/java/org/eclipse/jetty/http/UriCompliance.java @@ -98,7 +98,18 @@ public enum Violation implements ComplianceViolation /** * Allow Bad UTF-8 encodings to be substituted by the replacement character. */ - BAD_UTF8_ENCODING("https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1", "Bad UTF-8 encoding"); + BAD_UTF8_ENCODING("https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1", "Bad UTF-8 encoding"), + + /** + * Allow encoded path characters not allowed by the Servlet spec rules. + */ + SUSPICIOUS_PATH_CHARACTERS("https://jakarta.ee/specifications/servlet/6.0/jakarta-servlet-spec-6.0.html#uri-path-canonicalization", "Suspicious Path Character"), + + /** + * Allow path characters not allowed in the path portion of the URI and HTTP specs. + *

This would allow characters that fall outside of the {@code unreserved / pct-encoded / sub-delims / ":" / "@"} ABNF

+ */ + ILLEGAL_PATH_CHARACTERS("https://datatracker.ietf.org/doc/html/rfc3986#section-3.3", "Illegal Path Character"); private final String _url; private final String _description; @@ -167,7 +178,7 @@ public String getDescription() Violation.UTF16_ENCODINGS)); /** - * Compliance mode that allows all URI Violations, including allowing ambiguous paths in non-canonical form. + * Compliance mode that allows all URI Violations, including allowing ambiguous paths in non-canonical form, and illegal characters */ public static final UriCompliance UNSAFE = new UriCompliance("UNSAFE", allOf(Violation.class)); diff --git a/jetty-core/jetty-http/src/test/java/org/eclipse/jetty/http/HttpURITest.java b/jetty-core/jetty-http/src/test/java/org/eclipse/jetty/http/HttpURITest.java index ba9339183a3c..97aee38e2c15 100644 --- a/jetty-core/jetty-http/src/test/java/org/eclipse/jetty/http/HttpURITest.java +++ b/jetty-core/jetty-http/src/test/java/org/eclipse/jetty/http/HttpURITest.java @@ -25,6 +25,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import static java.nio.charset.StandardCharsets.UTF_8; import static org.hamcrest.MatcherAssert.assertThat; @@ -641,6 +642,91 @@ public void testPathQuery(String input, String canonicalPath, String decodedPath assertThat(uri.hasAmbiguousEncoding(), is(expected.contains(Violation.AMBIGUOUS_PATH_ENCODING))); } + @ParameterizedTest + @ValueSource(strings = { + "/a%2Fb", + "/a%2F", + "/%2f", + "/%2f/" + }) + public void testAmbiguousViaBuilderPath(String input) + { + HttpURI uri = HttpURI.build().path(input); + assertThat("has any violation", uri.hasViolations(), is(true)); + assertThat("is ambiguous", uri.isAmbiguous(), is(true)); + } + + public static Stream suspiciousPathCharacterData() + { + return Stream.of( + // backslash + Arguments.of("/a%5Cb"), + Arguments.of("/foo/bar/zed/%5c"), + Arguments.of("/foo/bar/zed/%5C"), + // TAB + Arguments.of("/%09b"), + Arguments.of("/%09"), + // CR / LF + Arguments.of("/%0A"), + Arguments.of("/%0a"), + Arguments.of("/%0D"), + Arguments.of("/%0d"), + Arguments.of("/%0d%0a") + ); + } + + @ParameterizedTest + @MethodSource("suspiciousPathCharacterData") + public void testSuspiciousPathCharacterBuilderPath(String input) + { + HttpURI uri = HttpURI.build().path(input); + assertThat("has any violations", uri.hasViolations(), is(true)); + assertThat("has SUSPICIOUS_PATH_CHARACTERS violation", uri.hasViolation(Violation.SUSPICIOUS_PATH_CHARACTERS), is(true)); + } + + @ParameterizedTest + @MethodSource("suspiciousPathCharacterData") + public void testSuspiciousPathCharacterFromString(String input) + { + HttpURI uri = HttpURI.from(input); + assertThat("has any violations", uri.hasViolations(), is(true)); + assertThat("has SUSPICIOUS_PATH_CHARACTERS violation", uri.hasViolation(Violation.SUSPICIOUS_PATH_CHARACTERS), is(true)); + } + + public static Stream illegalPathCharacterData() + { + return Stream.of( + // backslash + Arguments.of("/a\\b"), + Arguments.of("/a/..\\b"), + // control character + Arguments.of("/a\tb"), + Arguments.of("/a\r\nb"), + // Pipe symbol + Arguments.of("/a|b"), + // space character + Arguments.of("/a b") + ); + } + + @ParameterizedTest + @MethodSource("illegalPathCharacterData") + public void testIllegalPathCharacterBuilderPath(String input) + { + HttpURI uri = HttpURI.build().path(input); + assertThat("has any violations", uri.hasViolations(), is(true)); + assertThat("has ILLEGAL_PATH_CHARACTERS violation", uri.hasViolation(Violation.ILLEGAL_PATH_CHARACTERS), is(true)); + } + + @ParameterizedTest + @MethodSource("illegalPathCharacterData") + public void testIllegalPathCharacterFromString(String input) + { + HttpURI uri = HttpURI.from(input); + assertThat("has any violations", uri.hasViolations(), is(true)); + assertThat("has ILLEGAL_PATH_CHARACTERS violation", uri.hasViolation(Violation.ILLEGAL_PATH_CHARACTERS), is(true)); + } + public static Stream parseData() { return Stream.of(