Skip to content

Commit

Permalink
Reject FHIR resources with the control chars #2605
Browse files Browse the repository at this point in the history
Signed-off-by: Paul Bastide <[email protected]>
  • Loading branch information
prb112 committed Oct 1, 2021
1 parent 0f07c83 commit 459d60a
Show file tree
Hide file tree
Showing 9 changed files with 721 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public Validator initialValue() {
}
};
private static final Set<Character> WHITESPACE = new HashSet<>(Arrays.asList(' ', '\t', '\r', '\n'));
private static final Set<Character> UNSUPPORTED_UNICODE = buildUnsupportedUnicodeCharacterSet();
private static final char [] BASE64_CHARS = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
Expand All @@ -80,6 +81,22 @@ public Validator initialValue() {

private ValidationSupport() { }

/**
* Builds a set of unsupported Unicode characters for fast lookup.
*
* @implNote Per the specification: Strings SHOULD not contain Unicode character points below 32
* except for u0009 (horizontal tab), u0010 (carriage return) and u0013 (line feed).
*/
private static Set<Character> buildUnsupportedUnicodeCharacterSet() {
Set<Character> chars = new HashSet<>();
for (int i = 0; i < 32; i++) {
if (i != 9 && i != 10 && i != 13) {
chars.add(Character.valueOf((char) i));
}
}
return chars;
}

private static Map<Character, Integer> buildBase64IndexMap() {
Map<Character, Integer> base64IndexMap = new LinkedHashMap<>();
for (int i = 0; i < BASE64_CHARS.length; i++) {
Expand All @@ -106,17 +123,29 @@ public static void checkString(String s) {
int count = 0;
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
Character character = Character.valueOf(ch);
checkUnsupportedUnicode(s, ch);
if (!Character.isWhitespace(ch)) {
count++;
} else if (!WHITESPACE.contains(ch)) {
throw new IllegalStateException(String.format("String value: '%s' is not valid with respect to pattern: [ \\r\\n\\t\\S]+", s));
} else if (!WHITESPACE.contains(character)) {
throw new IllegalStateException(String.format("String value: '%s' is not valid with respect to pattern: [\\r\\n\\t\\S]+", s));
}
}
if (count < MIN_STRING_LENGTH) {
throw new IllegalStateException(String.format("Trimmed String value length: %d is less than minimum required length: %d", count, MIN_STRING_LENGTH));
}
}

/**
* Helper method to check if there is unsupported unicode.
* @param ch
*/
private static void checkUnsupportedUnicode(String s, Character ch) {
if (UNSUPPORTED_UNICODE.contains(ch)) {
throw new IllegalStateException(String.format("String value contains unsupported unicode values: [\\0000-0008,0011,0012,0014-0031] value=[%s]", s));
}
}

/**
* A string which has at least one character and no leading or trailing whitespace and where there is no whitespace other
* than single spaces in the contents.
Expand Down Expand Up @@ -147,6 +176,7 @@ public static void checkCode(String s) {
}
previousIsSpace = true;
} else {
checkUnsupportedUnicode(s, current);
if (previousIsSpace) {
previousIsSpace = false;
}
Expand Down Expand Up @@ -175,6 +205,7 @@ public static void checkId(String s) {
}
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
// @implNote By implication, this excludes invalid unicode.
//45 = '-'
//46 = '.'
//48 = '0'
Expand Down Expand Up @@ -205,7 +236,9 @@ public static void checkUri(String s) {
throw new IllegalStateException(String.format("Uri value length: %d is greater than maximum allowed length: %d", s.length(), MAX_STRING_LENGTH));
}
for (int i = 0; i < s.length(); i++) {
if (Character.isWhitespace(s.charAt(i))) {
char ch = s.charAt(i);
checkUnsupportedUnicode(s, ch);
if (Character.isWhitespace(ch)) {
throw new IllegalStateException(String.format("Uri value: '%s' must not contain whitespace", s));
}
}
Expand Down Expand Up @@ -800,4 +833,4 @@ private static boolean hasDataAbsentReasonExtension(Element element) {
return false;
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* (C) Copyright IBM Corp. 2021
*
* SPDX-License-Identifier: Apache-2.0
*/

package com.ibm.fhir.model.util.test.unicode;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;

import com.ibm.fhir.model.util.test.unicode.strategy.CharacterControlStrategy;

/**
* InjectCharacterChannel modifies the Channel based on the CharacterControlStategy.
*
* @implNote The injection (today) occurs on the first key. It's designed to be extensible, such that
* other hooks on the stream are used for injection.
*/
public class InjectCharacterChannel implements ReadableByteChannel {

private ReadableByteChannel channel;
private CharacterControlStrategy strategy;
private boolean first = true;

public InjectCharacterChannel(ReadableByteChannel channel, CharacterControlStrategy strategy) {
this.channel = channel;
this.strategy = strategy;
}

@Override
public void close() throws IOException {
channel.close();
}

@Override
public boolean isOpen() {
return channel.isOpen();
}

@Override
public int read(ByteBuffer dst) throws IOException {
/*
* Based on the strategy, the ReadableByteChannel is updated.
*/
ByteBuffer temp = ByteBuffer.allocate(4096);
int len = channel.read(temp);

if (strategy.isApplicable(len, temp) && len > 0) {
byte[] pre = strategy.pre();

// Translate the body to a smaller byte array.
if (first) {
len++;
}
byte[] body = new byte[len];

int pad = 0;
for (int index = 0; index < len; index++) {
byte t = temp.get(index - pad);
int x = t;
if (x == 34 && first) {
body[index] = t;
index++;
body[index] = strategy.pre()[0];
pad = 1;
first = false;
} else {
body[index] = t;
}
}
byte[] post = strategy.post();

// Don't reallocate the dst buffer above.
dst.put(body);
//len += post.length;
} else if (len > 0) {
byte[] body = new byte[len];
for (int index = 0; index < len; index++) {
byte t = temp.get(index);
body[index] = t;
}
dst = dst.put(body);
}
return len;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* (C) Copyright IBM Corp. 2021
*
* SPDX-License-Identifier: Apache-2.0
*/

package com.ibm.fhir.model.util.test.unicode;

import java.util.ArrayList;
import java.util.List;

/**
* The UnicodeChar class wraps a val, and when called converts the character to a set of bytes.
* These bytes are either 4, 3, 2 or 1 byte segments.
*
* A useful reference for Unicode is https://www.utf8-chartable.de/
*/
public class UnicodeChar {

private int val;

/**
* public constructor
* @param val the codepoint value of the unicode character
*/
public UnicodeChar(int val) {
this.val = val;
}

/**
* generates the byte representation of the unicode character at a specific code point.
* @return
*/
public byte[] getBytes() {
return getCharactersString().getBytes();
}

/**
* gets the characters referenced by the charater codepoint
* @return
*/
public String getCharactersString() {
char[] inter = Character.toChars(val);
return String.valueOf(inter);
}

/**
* gets the value as an html entity
* @return
*/
public String getHtmlEntityValue() {
StringBuilder builder = new StringBuilder();
builder.append('&');
builder.append(val);
builder.append(';');
return builder.toString();
}

/**
* gets the value as an escaped string
* @return
*/
public String getEscapedValue() {
StringBuilder builder = new StringBuilder();
builder.append('\\');
String t = Integer.toString(val);
for (int idx = t.length(); idx <= 4; idx++) {
builder.append('0');
}
builder.append(val);
return builder.toString();
}

/**
* generates a list of the forbidden unicode characters in the specification
*
* @return List of UnicodeChars (either in utf8, utf16)
*/
public static List<UnicodeChar> forbidden() {
// Strings SHOULD not contain Unicode character points below 32
// , except for u0009 (horizontal tab), u0010 (carriage return)
// and u0013 (line feed).
List<UnicodeChar> forbidden = new ArrayList<>();
for (int i = 0; i < 32; i++) {
if (i != 9 && i != 10 && i != 13) {
forbidden.add(new UnicodeChar(i));
}
}
return forbidden;
}
}
Loading

0 comments on commit 459d60a

Please sign in to comment.