From 3999818903da417138b297ceb32db37d51a0a139 Mon Sep 17 00:00:00 2001 From: Leonid Startsev Date: Wed, 10 Nov 2021 13:59:05 +0300 Subject: [PATCH] Correctly handle buffer boundaries while decoding escape sequences from json stream (#1706) Fixes #1702 --- .../json/internal/lexer/AbstractJsonLexer.kt | 32 ++++++++++++------- .../json/internal/lexer/StringJsonLexer.kt | 6 +++- .../serialization/json/JsonUnicodeTest.kt | 24 +++----------- .../kotlinx/serialization/test/TestHelpers.kt | 20 +++++++++++- .../json/internal/JsonLexerJvm.kt | 8 ++--- .../features/JsonJvmStreamsTest.kt | 28 +++++++++++++--- 6 files changed, 77 insertions(+), 41 deletions(-) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt index bc72ec3cc7..5ff983d019 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt @@ -140,7 +140,7 @@ internal abstract class AbstractJsonLexer { open fun ensureHaveChars() {} // Used as bound check in loops - abstract fun definitelyNotEof(position: Int): Int + abstract fun prefetchOrEof(position: Int): Int abstract fun tryConsumeComma(): Boolean @@ -182,7 +182,7 @@ internal abstract class AbstractJsonLexer { val source = source var cpos = currentPosition while (true) { - cpos = definitelyNotEof(cpos) + cpos = prefetchOrEof(cpos) if (cpos == -1) break // could be inline function but KT-1436 val c = source[cpos++] if (c == ' ' || c == '\n' || c == '\r' || c == '\t') continue @@ -223,7 +223,7 @@ internal abstract class AbstractJsonLexer { val source = source var cpos = currentPosition while (true) { - cpos = definitelyNotEof(cpos) + cpos = prefetchOrEof(cpos) if (cpos == -1) break val ch = source[cpos] if (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t') { @@ -244,7 +244,7 @@ internal abstract class AbstractJsonLexer { */ fun tryConsumeNotNull(): Boolean { var current = skipWhitespaces() - current = definitelyNotEof(current) + current = prefetchOrEof(current) // Cannot consume null due to EOF, maybe something else val len = source.length - current if (len < 4 || current == -1) return true @@ -264,7 +264,7 @@ internal abstract class AbstractJsonLexer { var current = currentPosition // Skip whitespaces while (true) { - current = definitelyNotEof(current) + current = prefetchOrEof(current) if (current == -1) break val c = source[current] // Faster than char2TokenClass actually @@ -317,13 +317,15 @@ internal abstract class AbstractJsonLexer { while (char != STRING) { if (char == STRING_ESC) { usedAppend = true - currentPosition = appendEscape(lastPosition, currentPosition) + currentPosition = prefetchOrEof(appendEscape(lastPosition, currentPosition)) + if (currentPosition == -1) + fail("EOF", currentPosition) lastPosition = currentPosition } else if (++currentPosition >= source.length) { usedAppend = true // end of chunk appendRange(lastPosition, currentPosition) - currentPosition = definitelyNotEof(currentPosition) + currentPosition = prefetchOrEof(currentPosition) if (currentPosition == -1) fail("EOF", currentPosition) lastPosition = currentPosition @@ -395,7 +397,7 @@ internal abstract class AbstractJsonLexer { if (current >= source.length) { usedAppend = true appendRange(currentPosition, current) - val eof = definitelyNotEof(current) + val eof = prefetchOrEof(current) if (eof == -1) { // to handle plain lenient strings, such as top-level currentPosition = current @@ -421,7 +423,7 @@ internal abstract class AbstractJsonLexer { private fun appendEsc(startPosition: Int): Int { var currentPosition = startPosition - currentPosition = definitelyNotEof(currentPosition) + currentPosition = prefetchOrEof(currentPosition) if (currentPosition == -1) fail("Expected escape sequence to continue, got EOF") val currentChar = source[currentPosition++] if (currentChar == UNICODE_ESC) { @@ -435,7 +437,13 @@ internal abstract class AbstractJsonLexer { } private fun appendHex(source: CharSequence, startPos: Int): Int { - if (startPos + 4 >= source.length) fail("Unexpected EOF during unicode escape") + if (startPos + 4 >= source.length) { + currentPosition = startPos + ensureHaveChars() + if (currentPosition + 4 >= source.length) + fail("Unexpected EOF during unicode escape") + return appendHex(source, currentPosition) + } escapedString.append( ((fromHexChar(source, startPos) shl 12) + (fromHexChar(source, startPos + 1) shl 8) + @@ -520,7 +528,7 @@ internal abstract class AbstractJsonLexer { * that doesn't allocate and also doesn't support any radix but 10 */ var current = skipWhitespaces() - current = definitelyNotEof(current) + current = prefetchOrEof(current) if (current >= source.length || current == -1) fail("EOF") val hasQuotation = if (source[current] == STRING) { // Check it again @@ -598,7 +606,7 @@ internal abstract class AbstractJsonLexer { * in 6-th bit and we leverage this fact, our implementation consumes boolean literals * in a case-insensitive manner. */ - var current = definitelyNotEof(start) + var current = prefetchOrEof(start) if (current >= source.length || current == -1) fail("EOF") return when (source[current++].code or asciiCaseMask) { 't'.code -> { diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt index fc6a6c1ac0..0ff980a2c6 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt @@ -1,8 +1,12 @@ +/* + * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. + */ + package kotlinx.serialization.json.internal internal class StringJsonLexer(override val source: String) : AbstractJsonLexer() { - override fun definitelyNotEof(position: Int): Int = if (position < source.length) position else -1 + override fun prefetchOrEof(position: Int): Int = if (position < source.length) position else -1 override fun consumeNextToken(): Byte { val source = source diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt index 51da529a0f..1f6f814f14 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt @@ -1,8 +1,11 @@ +/* + * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. + */ + package kotlinx.serialization.json import kotlinx.serialization.* import kotlinx.serialization.builtins.* -import kotlinx.serialization.json.internal.* import kotlinx.serialization.test.* import kotlin.random.* import kotlin.test.* @@ -59,7 +62,7 @@ class JsonUnicodeTest : JsonTestBase() { @Test fun testRandomEscapeSequences() = noJs { // Too slow on JS repeat(10_000) { - val s = generateRandomString() + val s = generateRandomUnicodeString(Random.nextInt(1, 2047)) try { assertSerializedAndRestored(s, String.serializer()) } catch (e: Throwable) { @@ -68,21 +71,4 @@ class JsonUnicodeTest : JsonTestBase() { } } } - - private fun generateRandomString(): String { - val size = Random.nextInt(1, 2047) - return buildString(size) { - repeat(size) { - val pickEscape = Random.nextBoolean() - if (pickEscape) { - // Definitely escape symbol - // null can be appended as well, completely okay - append(ESCAPE_STRINGS.random()) - } else { - // Any symbol, including escaping one - append(Char(Random.nextInt(Char.MIN_VALUE.code..Char.MAX_VALUE.code))) - } - } - } - } } diff --git a/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt b/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt index 9829503b6a..d178c871c4 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt @@ -1,10 +1,13 @@ /* - * Copyright 2017-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. + * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license. */ package kotlinx.serialization.test import kotlinx.serialization.* import kotlinx.serialization.descriptors.* +import kotlinx.serialization.json.internal.ESCAPE_STRINGS +import kotlin.random.Random +import kotlin.random.nextInt import kotlin.test.* fun SerialDescriptor.assertDescriptorEqualsTo(other: SerialDescriptor) { @@ -40,3 +43,18 @@ inline fun assertFailsWithMissingField(block: () -> Unit) { val e = assertFailsWith(block = block) assertTrue(e.message?.contains("but it was missing") ?: false) } + +fun generateRandomUnicodeString(size: Int): String { + return buildString(size) { + repeat(size) { + val pickEscape = Random.nextBoolean() + if (pickEscape) { + // Definitely an escape symbol + append(ESCAPE_STRINGS.random().takeIf { it != null } ?: 'N') + } else { + // Any symbol, including escaping one + append(Char(Random.nextInt(Char.MIN_VALUE.code..Char.MAX_VALUE.code)).takeIf { it.isDefined() && !it.isSurrogate()} ?: 'U') + } + } + } +} diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt index 6a4c19c7c2..eabfd08869 100644 --- a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt +++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt @@ -58,7 +58,7 @@ internal class ReaderJsonLexer( ensureHaveChars() var current = currentPosition while (true) { - current = definitelyNotEof(current) + current = prefetchOrEof(current) if (current == -1) break // could be inline function but KT-1436 val c = source[current] // Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass @@ -93,7 +93,7 @@ internal class ReaderJsonLexer( currentPosition = 0 } - override fun definitelyNotEof(position: Int): Int { + override fun prefetchOrEof(position: Int): Int { if (position < source.length) return position currentPosition = position ensureHaveChars() @@ -106,7 +106,7 @@ internal class ReaderJsonLexer( val source = source var cpos = currentPosition while (true) { - cpos = definitelyNotEof(cpos) + cpos = prefetchOrEof(cpos) if (cpos == -1) break val ch = source[cpos++] return when (val tc = charToTokenClass(ch)) { @@ -141,7 +141,7 @@ internal class ReaderJsonLexer( var current = currentPosition val closingQuote = indexOf('"', current) if (closingQuote == -1) { - current = definitelyNotEof(current) + current = prefetchOrEof(current) if (current == -1) fail(TC_STRING) // it's also possible just to resize buffer, // instead of falling back to slow path, diff --git a/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt b/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt index 1ed4078faa..b576a2c1ba 100644 --- a/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt +++ b/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt @@ -4,13 +4,15 @@ package kotlinx.serialization.features -import kotlinx.serialization.* +import kotlinx.serialization.SerializationException +import kotlinx.serialization.StringData import kotlinx.serialization.builtins.serializer -import kotlinx.serialization.json.Json +import kotlinx.serialization.json.* import kotlinx.serialization.json.internal.BATCH_SIZE -import kotlinx.serialization.test.decodeViaStream -import kotlinx.serialization.test.encodeViaStream +import kotlinx.serialization.test.* import org.junit.Test +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream import kotlin.test.assertEquals import kotlin.test.assertFailsWith @@ -65,4 +67,22 @@ class JsonJvmStreamsTest { Json.decodeViaStream(String.serializer(), "\"") } } + + @Test + fun testRandomEscapeSequences() { + repeat(1000) { + val s = generateRandomUnicodeString(strLen) + try { + val serializer = String.serializer() + val b = ByteArrayOutputStream() + Json.encodeToStream(serializer, s, b) + val restored = Json.decodeFromStream(serializer, ByteArrayInputStream(b.toByteArray())) + assertEquals(s, restored) + } catch (e: Throwable) { + // Not assertion error to preserve cause + throw IllegalStateException("Unexpectedly failed test, cause string: $s", e) + } + } + } + }