From 0ef1440f4da6cd0a7b75bf4b1dd1d1355225fd72 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Thu, 25 Feb 2021 15:41:30 +0300 Subject: [PATCH 1/6] Add more benchmarks --- .../benchmarks/json/JacksonComparisonBenchmark.kt | 14 ++++++++++++-- .../kotlinx/benchmarks/json/TwitterBenchmark.kt | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt index ad6fbda75..84f811ef8 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt @@ -33,7 +33,7 @@ open class JacksonComparisonBenchmark { private val objectMapper: ObjectMapper = jacksonObjectMapper() - private val data = DefaultPixelEvent( + private val data = DefaultPixelEvent( version = 1, dateTime2 = System.currentTimeMillis().toString(), serverName = "some-endpoint-qwer", @@ -48,15 +48,25 @@ open class JacksonComparisonBenchmark { cookies = "_ga=GA1.2.971852807.1546968515" ) - private val stringData = Json.encodeToString(DefaultPixelEvent.serializer(), data) + private val stringData = Json.encodeToString(DefaultPixelEvent.serializer(), data) + @Serializable + private class SmallDataClass(val id: Int, val name: String) + + private val smallData = SmallDataClass(42, "Vincent") @Benchmark fun jacksonToString(): String = objectMapper.writeValueAsString(data) + @Benchmark + fun jacksonSmallToString(): String = objectMapper.writeValueAsString(smallData) + @Benchmark fun kotlinToString(): String = Json.encodeToString(DefaultPixelEvent.serializer(), data) + @Benchmark + fun kotlinSmallToString(): String = Json.encodeToString(SmallDataClass.serializer(), smallData) + @Benchmark fun jacksonFromString(): DefaultPixelEvent = objectMapper.readValue(stringData, DefaultPixelEvent::class.java) diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt index 4505c28bd..9556f0f95 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt @@ -20,8 +20,12 @@ open class TwitterBenchmark { * with Kotlin classes generated by Json2Kotlin plugin (and also manually adjusted) */ private val input = TwitterBenchmark::class.java.getResource("/twitter.json").readBytes().decodeToString() + private val twitter = Json.decodeFromString(Twitter.serializer(), input) // Order of magnitude: 4-7 op/ms @Benchmark fun parseTwitter() = Json.decodeFromString(Twitter.serializer(), input) + + @Benchmark + fun writeTwitter() = Json.encodeToString(Twitter.serializer(), twitter) } From 962d89e87b6b9bc362763c738ed64f7236c3a03b Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Fri, 26 Feb 2021 13:14:02 +0300 Subject: [PATCH 2/6] Add Jackson comparison with escape symbols --- .../json/JacksonComparisonBenchmark.kt | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt index 84f811ef8..c0609a75b 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt @@ -48,6 +48,21 @@ open class JacksonComparisonBenchmark { cookies = "_ga=GA1.2.971852807.1546968515" ) + private val dataWithEscapes = DefaultPixelEvent( + version = 1, + dateTime2 = System.currentTimeMillis().toString(), + serverName = "some-endp\"oint-qwer", + domain = "", + method = "POST", + clientIp = "127.0.0.1", + queryString = "anxa=CASCative&anxv=13.901.16.34566&anxe=\"FoolbarActive\"&anxt=E7AFBF15-1761-4343-92C1-78167ED19B1C&anxtv=13.901.16.34566&anxp=%5ECQ6%5Expt292%5ES33656%5Eus&anxsi&anxd=2019-10-08T17%3A03%3A57.246Z&f=00400000&anxr=1571945992297&coid=\"66abafd0d49f42e58dc7536109395306\"&userSegment&cwsid=opgkcnbminncdgghighmimmphiooeohh", + userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:70.0) Gecko/20100101 Firefox/70.0", + contentType = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + browserLanguage = "\"en\"-\"US\",en;\\q=0.5", + postData = "-", + cookies = "_ga=GA1.2.971852807.1546968515" + ) + private val stringData = Json.encodeToString(DefaultPixelEvent.serializer(), data) @Serializable @@ -58,12 +73,18 @@ open class JacksonComparisonBenchmark { @Benchmark fun jacksonToString(): String = objectMapper.writeValueAsString(data) + @Benchmark + fun jacksonToStringWithEscapes(): String = objectMapper.writeValueAsString(dataWithEscapes) + @Benchmark fun jacksonSmallToString(): String = objectMapper.writeValueAsString(smallData) @Benchmark fun kotlinToString(): String = Json.encodeToString(DefaultPixelEvent.serializer(), data) + @Benchmark + fun kotlinToStringWithEscapes(): String = Json.encodeToString(DefaultPixelEvent.serializer(), dataWithEscapes) + @Benchmark fun kotlinSmallToString(): String = Json.encodeToString(SmallDataClass.serializer(), smallData) @@ -73,3 +94,7 @@ open class JacksonComparisonBenchmark { @Benchmark fun kotlinFromString(): DefaultPixelEvent = Json.decodeFromString(DefaultPixelEvent.serializer(), stringData) } + +fun main() { + println(JacksonComparisonBenchmark().kotlinToString()) +} From 81194b5e4d7bbc789ab417fd1a930264c03ee2e7 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Fri, 26 Feb 2021 17:38:45 +0300 Subject: [PATCH 3/6] More benchmarks --- .../kotlinx/benchmarks/json/CitmBenchmark.kt | 16 ++++++++++++++-- .../json/JacksonComparisonBenchmark.kt | 4 ---- .../kotlinx/benchmarks/json/TwitterBenchmark.kt | 12 ++++++++++-- .../benchmarks/json/TwitterFeedBenchmark.kt | 13 ++++++++++++- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CitmBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CitmBenchmark.kt index 5a388fc01..1ba896684 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CitmBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/CitmBenchmark.kt @@ -1,7 +1,10 @@ package kotlinx.benchmarks.json import kotlinx.benchmarks.model.* +import kotlinx.serialization.* import kotlinx.serialization.json.* +import kotlinx.serialization.json.Json.Default.decodeFromString +import kotlinx.serialization.json.Json.Default.encodeToString import org.openjdk.jmh.annotations.* import java.util.concurrent.* @@ -16,8 +19,17 @@ open class CitmBenchmark { * For some reason Citm is kind of de-facto standard cross-language benchmark. * Order of magnitude: 200 ops/sec */ - private val citm = CitmBenchmark::class.java.getResource("/citm_catalog.json").readBytes().decodeToString() + private val input = CitmBenchmark::class.java.getResource("/citm_catalog.json").readBytes().decodeToString() + private val citm = Json.decodeFromString(CitmCatalog.serializer(), input) + + @Setup + fun init() { + require(citm == Json.decodeFromString(CitmCatalog.serializer(), Json.encodeToString(citm))) + } + + @Benchmark + fun decodeCitm(): CitmCatalog = Json.decodeFromString(CitmCatalog.serializer(), input) @Benchmark - fun decodeCitm(): CitmCatalog = Json.decodeFromString(CitmCatalog.serializer(), citm) + fun encodeCitm(): String = Json.encodeToString(CitmCatalog.serializer(), citm) } diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt index c0609a75b..b81250014 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/JacksonComparisonBenchmark.kt @@ -94,7 +94,3 @@ open class JacksonComparisonBenchmark { @Benchmark fun kotlinFromString(): DefaultPixelEvent = Json.decodeFromString(DefaultPixelEvent.serializer(), stringData) } - -fun main() { - println(JacksonComparisonBenchmark().kotlinToString()) -} diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt index 9556f0f95..15e9ea46b 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterBenchmark.kt @@ -1,7 +1,10 @@ package kotlinx.benchmarks.json import kotlinx.benchmarks.model.* +import kotlinx.serialization.* import kotlinx.serialization.json.* +import kotlinx.serialization.json.Json.Default.decodeFromString +import kotlinx.serialization.json.Json.Default.encodeToString import org.openjdk.jmh.annotations.* import java.util.concurrent.* @@ -22,10 +25,15 @@ open class TwitterBenchmark { private val input = TwitterBenchmark::class.java.getResource("/twitter.json").readBytes().decodeToString() private val twitter = Json.decodeFromString(Twitter.serializer(), input) + @Setup + fun init() { + require(twitter == Json.decodeFromString(Twitter.serializer(), Json.encodeToString(Twitter.serializer(), twitter))) + } + // Order of magnitude: 4-7 op/ms @Benchmark - fun parseTwitter() = Json.decodeFromString(Twitter.serializer(), input) + fun decodeTwitter() = Json.decodeFromString(Twitter.serializer(), input) @Benchmark - fun writeTwitter() = Json.encodeToString(Twitter.serializer(), twitter) + fun encodeTwitter() = Json.encodeToString(Twitter.serializer(), twitter) } diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedBenchmark.kt index d15630a86..22ae35f5a 100644 --- a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedBenchmark.kt +++ b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedBenchmark.kt @@ -1,7 +1,10 @@ package kotlinx.benchmarks.json import kotlinx.benchmarks.model.* +import kotlinx.serialization.* import kotlinx.serialization.json.* +import kotlinx.serialization.json.Json.Default.decodeFromString +import kotlinx.serialization.json.Json.Default.encodeToString import org.openjdk.jmh.annotations.* import java.util.concurrent.* @@ -20,9 +23,17 @@ open class TwitterFeedBenchmark { * with Kotlin classes generated by Json2Kotlin plugin (and also manually adjusted) */ private val input = TwitterFeedBenchmark::class.java.getResource("/twitter_macro.json").readBytes().decodeToString() + private val twitter = Json.decodeFromString(MacroTwitterFeed.serializer(), input) + @Setup + fun init() { + require(twitter == Json.decodeFromString(MacroTwitterFeed.serializer(), Json.encodeToString(MacroTwitterFeed.serializer(), twitter))) + } // Order of magnitude: ~400 op/s @Benchmark - fun parseTwitter() = Json.decodeFromString(MacroTwitterFeed.serializer(), input) + fun decodeTwitter() = Json.decodeFromString(MacroTwitterFeed.serializer(), input) + + @Benchmark + fun encodeTwitter() = Json.encodeToString(MacroTwitterFeed.serializer(), twitter) } From 67b156bb2078db4363925b6be2d4308839281a64 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Fri, 26 Feb 2021 17:40:25 +0300 Subject: [PATCH 4/6] Draft implementation --- .../src/kotlinx/serialization/json/Json.kt | 4 +- .../serialization/json/internal/Composers.kt | 62 +++++++++++++ .../json/internal/JsonStringBuilder.kt | 9 ++ .../json/internal/StreamingJsonEncoder.kt | 63 +------------- .../serialization/json/internal/StringOps.kt | 41 +++++---- .../json/internal/JsonStringBuilder.kt | 25 ++++++ .../json/internal/JsonStringBuilder.kt | 86 +++++++++++++++++++ .../JsonStringBuilder.kt | 25 ++++++ 8 files changed, 238 insertions(+), 77 deletions(-) create mode 100644 formats/json/commonMain/src/kotlinx/serialization/json/internal/Composers.kt create mode 100644 formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt create mode 100644 formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt create mode 100644 formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt create mode 100644 formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt index 465859527..a363ac877 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt @@ -63,8 +63,10 @@ public sealed class Json(internal val configuration: JsonConf) : StringFormat { * * @throws [SerializationException] if the given value cannot be serialized to JSON. */ + private val result = JsonStringBuilder() + public final override fun encodeToString(serializer: SerializationStrategy, value: T): String { - val result = StringBuilder() + val result = result val encoder = StreamingJsonEncoder( result, this, WriteMode.OBJ, diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/Composers.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/Composers.kt new file mode 100644 index 000000000..4ddac0ce8 --- /dev/null +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/Composers.kt @@ -0,0 +1,62 @@ +package kotlinx.serialization.json.internal + +import kotlinx.serialization.json.* +import kotlin.jvm.* + +internal open class Composer(@JvmField internal val sb: JsonStringBuilder, @JvmField internal val json: Json) { + private var level = 0 + var writingFirst = true + private set + + fun indent() { + writingFirst = true + level++ + } + + fun unIndent() { + level-- + } + + fun nextItem() { + writingFirst = false + if (json.configuration.prettyPrint) { + print("\n") + repeat(level) { print(json.configuration.prettyPrintIndent) } + } + } + + fun space() { + if (json.configuration.prettyPrint) + print(' ') + } + + fun print(v: Char) = sb.append(v) + fun print(v: String) = sb.append(v) + open fun print(v: Float) = sb.append(v.toString()) + open fun print(v: Double) = sb.append(v.toString()) + open fun print(v: Byte) = sb.append(v.toLong()) + open fun print(v: Short) = sb.append(v.toLong()) + open fun print(v: Int) = sb.append(v.toLong()) + open fun print(v: Long) = sb.append(v) + open fun print(v: Boolean) = sb.append(v.toString()) + fun printQuoted(value: String): Unit = sb.appendQuoted(value) +} + +@ExperimentalUnsignedTypes +internal class ComposerForUnsignedNumbers(sb: JsonStringBuilder, json: Json) : Composer(sb, json) { + override fun print(v: Int) { + return super.print(v.toUInt().toString()) + } + + override fun print(v: Long) { + return super.print(v.toULong().toString()) + } + + override fun print(v: Byte) { + return super.print(v.toUByte().toString()) + } + + override fun print(v: Short) { + return super.print(v.toUShort().toString()) + } +} diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt new file mode 100644 index 000000000..8dac7eb9c --- /dev/null +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt @@ -0,0 +1,9 @@ +package kotlinx.serialization.json.internal + +internal expect class JsonStringBuilder constructor() { + fun append(value: Long) + fun append(ch: Char) + fun append(string: String) + fun appendQuoted(string: String) + override fun toString(): String +} diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt index bc9d69efb..15c8dbc6a 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt @@ -36,11 +36,11 @@ internal class StreamingJsonEncoder( ) : JsonEncoder, AbstractEncoder() { internal constructor( - output: StringBuilder, json: Json, mode: WriteMode, + output: JsonStringBuilder, json: Json, mode: WriteMode, modeReuseCache: Array ) : this(Composer(output, json), json, mode, modeReuseCache) - public override val serializersModule: SerializersModule = json.serializersModule + override val serializersModule: SerializersModule = json.serializersModule private val configuration = json.configuration // Forces serializer to wrap all values into quotes @@ -152,7 +152,7 @@ internal class StreamingJsonEncoder( return if (inlineDescriptor.isUnsignedNumber) StreamingJsonEncoder( ComposerForUnsignedNumbers( composer.sb, - composer.json + json ), json, mode, null ) else this @@ -207,61 +207,4 @@ internal class StreamingJsonEncoder( override fun encodeEnum(enumDescriptor: SerialDescriptor, index: Int) { encodeString(enumDescriptor.getElementName(index)) } - - internal open class Composer(@JvmField internal val sb: StringBuilder, @JvmField internal val json: Json) { - private var level = 0 - var writingFirst = true - private set - - fun indent() { - writingFirst = true; level++ - } - - fun unIndent() { - level-- - } - - fun nextItem() { - writingFirst = false - if (json.configuration.prettyPrint) { - print("\n") - repeat(level) { print(json.configuration.prettyPrintIndent) } - } - } - - fun space() { - if (json.configuration.prettyPrint) - print(' ') - } - - open fun print(v: Char) = sb.append(v) - open fun print(v: String) = sb.append(v) - open fun print(v: Float) = sb.append(v) - open fun print(v: Double) = sb.append(v) - open fun print(v: Byte) = sb.append(v) - open fun print(v: Short) = sb.append(v) - open fun print(v: Int) = sb.append(v) - open fun print(v: Long) = sb.append(v) - open fun print(v: Boolean) = sb.append(v) - open fun printQuoted(value: String): Unit = sb.printQuoted(value) - } - - @ExperimentalUnsignedTypes - internal class ComposerForUnsignedNumbers(sb: StringBuilder, json: Json) : Composer(sb, json) { - override fun print(v: Int): StringBuilder { - return super.print(v.toUInt().toString()) - } - - override fun print(v: Long): StringBuilder { - return super.print(v.toULong().toString()) - } - - override fun print(v: Byte): StringBuilder { - return super.print(v.toUByte().toString()) - } - - override fun print(v: Short): StringBuilder { - return super.print(v.toUShort().toString()) - } - } } diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt index 29e463f54..462d93571 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt @@ -4,7 +4,7 @@ package kotlinx.serialization.json.internal -import kotlin.native.concurrent.SharedImmutable +import kotlin.native.concurrent.* private fun toHexChar(i: Int) : Char { val d = i and 0xf @@ -12,12 +12,8 @@ private fun toHexChar(i: Int) : Char { else (d - 10 + 'a'.toInt()).toChar() } -/* - * Even though the actual size of this array is 92, it has to be the power of two, otherwise - * JVM cannot perform advanced range-check elimination and vectorization in printQuoted - */ @SharedImmutable -private val ESCAPE_CHARS: Array = arrayOfNulls(128).apply { +internal val ESCAPE_STRINGS: Array = arrayOfNulls(93).apply { for (c in 0..0x1f) { val c1 = toHexChar(c shr 12) val c2 = toHexChar(c shr 8) @@ -34,21 +30,34 @@ private val ESCAPE_CHARS: Array = arrayOfNulls(128).apply { this[0x0c] = "\\f" } +@SharedImmutable +internal val ESCAPE_MARKERS: CharArray = CharArray(93).apply { + for (c in 0..0x1f) { + this[c] = 0.toChar() + } + this['"'.toInt()] = '"' + this['\\'.toInt()] = '\\' + this['\t'.toInt()] = 't' + this['\b'.toInt()] = 'b' + this['\n'.toInt()] = 'n' + this['\r'.toInt()] = 'r' + this[0x0c] = 'f' +} + internal fun StringBuilder.printQuoted(value: String) { append(STRING) var lastPos = 0 - val length = value.length - for (i in 0 until length) { + for (i in value.indices) { val c = value[i].toInt() - // Do not replace this constant with C2ESC_MAX (which is smaller than ESCAPE_CHARS size), - // otherwise JIT won't eliminate range check and won't vectorize this loop - if (c >= ESCAPE_CHARS.size) continue // no need to escape - val esc = ESCAPE_CHARS[c] ?: continue - append(value, lastPos, i) // flush prev - append(esc) - lastPos = i + 1 + if (c < ESCAPE_STRINGS.size && ESCAPE_STRINGS[c] != null) { + append(value, lastPos, i) // flush prev + append(ESCAPE_STRINGS[c]) + lastPos = i + 1 + } } - append(value, lastPos, length) + + if (lastPos != 0) append(value, lastPos, value.length) + else append(value) append(STRING) } diff --git a/formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt b/formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt new file mode 100644 index 000000000..45abc9994 --- /dev/null +++ b/formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt @@ -0,0 +1,25 @@ +package kotlinx.serialization.json.internal + +internal actual class JsonStringBuilder actual constructor() { + private val sb = StringBuilder(128) + + actual fun append(value: Long) { + sb.append(value) + } + + actual fun append(ch: Char) { + sb.append(ch) + } + + actual fun append(string: String) { + sb.append(string) + } + + actual fun appendQuoted(string: String) { + sb.printQuoted(string) + } + + actual override fun toString(): String { + return sb.toString() + } +} diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt new file mode 100644 index 000000000..7a827dc2e --- /dev/null +++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt @@ -0,0 +1,86 @@ +package kotlinx.serialization.json.internal + +/* + * Optimized version of StringBuilder that is specific to JSON-encoding + */ +internal actual class JsonStringBuilder { + private var array = CharArray(32) + private var size = 0 + + actual fun append(value: Long) { + ensureAdditionalCapacity(20) // Long length + append(value.toString()) + } + + actual fun append(ch: Char) { + ensureAdditionalCapacity(1) + array[size++] = ch + } + + actual fun append(string: String) { + val length = string.length + ensureAdditionalCapacity(length) + string.toCharArray(array, size, 0, string.length) + size += length + } + + actual fun appendQuoted(string: String) { + ensureAdditionalCapacity(string.length + 2) + val arr = array + var sz = size + arr[sz++] = '"' + val length = string.length + string.toCharArray(arr, sz, 0, length) + for (i in sz until sz + length) { + val ch = arr[i].toInt() + // Do we have unescaped symbols? + if (ch < ESCAPE_MARKERS.size && ESCAPE_MARKERS[ch] != 0.toChar()) { + // Go to slow path + return appendStringSlowPath(i - sz, i, string) + } + } + // Update the state + sz += length + arr[sz++] = '"' + size = sz + } + + private fun appendStringSlowPath(firstEscapedChar: Int, currentSize: Int, string: String) { + var sz = currentSize + for (i in firstEscapedChar until string.length) { + val ch = string[i].toInt() + // Do we have unescaped symbols? + var marker: Char = 0.toChar() + if (ch < ESCAPE_MARKERS.size && ESCAPE_MARKERS[ch].also { marker = it } != 0.toChar()) { + if (marker != 0.toChar()) { + array[sz] = '\\' + array[sz + 1] = marker + sz += 2 + } else { + val escapedString = ESCAPE_STRINGS[ch]!! + ensureTotalCapacity(sz + escapedString.length) + escapedString.toCharArray(array, sz, 0, escapedString.length) + sz += escapedString.length + } + } else { + array[sz++] = string[i] + } + } + array[sz++] = '"' + size = sz + } + + actual override fun toString(): String { + return String(array, 0, size).also { size = 0 } + } + + private fun ensureAdditionalCapacity(expected: Int) { + ensureTotalCapacity(size + expected) + } + + private fun ensureTotalCapacity(newSize: Int) { + if (array.size <= newSize) { + array = array.copyOf(newSize.coerceAtLeast(size * 2)) + } + } +} diff --git a/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt b/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt new file mode 100644 index 000000000..45abc9994 --- /dev/null +++ b/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt @@ -0,0 +1,25 @@ +package kotlinx.serialization.json.internal + +internal actual class JsonStringBuilder actual constructor() { + private val sb = StringBuilder(128) + + actual fun append(value: Long) { + sb.append(value) + } + + actual fun append(ch: Char) { + sb.append(ch) + } + + actual fun append(string: String) { + sb.append(string) + } + + actual fun appendQuoted(string: String) { + sb.printQuoted(string) + } + + actual override fun toString(): String { + return sb.toString() + } +} From 034e1213a92ef6b3c0061c95a554b61a4e447dd3 Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Fri, 26 Feb 2021 18:08:15 +0300 Subject: [PATCH 5/6] Properly pool char arrays --- .../src/kotlinx/serialization/json/Json.kt | 22 +++++++++-------- .../json/internal/JsonStringBuilder.kt | 1 + .../json/internal/JsonStringBuilder.kt | 3 +++ .../json/internal/CharArrayPool.kt | 24 +++++++++++++++++++ .../json/internal/JsonStringBuilder.kt | 8 +++++-- .../JsonStringBuilder.kt | 3 +++ 6 files changed, 49 insertions(+), 12 deletions(-) create mode 100644 formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt index a363ac877..4dd0cf6d1 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt @@ -63,17 +63,19 @@ public sealed class Json(internal val configuration: JsonConf) : StringFormat { * * @throws [SerializationException] if the given value cannot be serialized to JSON. */ - private val result = JsonStringBuilder() - public final override fun encodeToString(serializer: SerializationStrategy, value: T): String { - val result = result - val encoder = StreamingJsonEncoder( - result, this, - WriteMode.OBJ, - arrayOfNulls(WriteMode.values().size) - ) - encoder.encodeSerializableValue(serializer, value) - return result.toString() + val result = JsonStringBuilder() + try { + val encoder = StreamingJsonEncoder( + result, this, + WriteMode.OBJ, + arrayOfNulls(WriteMode.values().size) + ) + encoder.encodeSerializableValue(serializer, value) + return result.toString() + } finally { + result.release() + } } /** diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt index 8dac7eb9c..f9245d584 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt @@ -6,4 +6,5 @@ internal expect class JsonStringBuilder constructor() { fun append(string: String) fun appendQuoted(string: String) override fun toString(): String + fun release() } diff --git a/formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt b/formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt index 45abc9994..1b79e27ef 100644 --- a/formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt +++ b/formats/json/jsMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt @@ -22,4 +22,7 @@ internal actual class JsonStringBuilder actual constructor() { actual override fun toString(): String { return sb.toString() } + + actual fun release() { + } } diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt new file mode 100644 index 000000000..02b5138e6 --- /dev/null +++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt @@ -0,0 +1,24 @@ +package kotlinx.serialization.json.internal + +import java.util.concurrent.* + +internal object CharArrayPool { + private val arrays = ArrayDeque() + private var charsTotal = 0 + private val MAX_CHARS_IN_POOL = runCatching { + System.getProperty("kotlinx.serialization.json.pool.size").toIntOrNull() + }.getOrNull() ?: 1024 * 1024 // 2 MB seems to be a reasonable contraint + + public fun take(): CharArray { + val candidate = synchronized(this) { + arrays.lastOrNull()?.also { charsTotal -= it.size } + } + return candidate ?: CharArray(128) + } + + public fun release(array: CharArray) = synchronized(this) { + if (charsTotal + array.size >= MAX_CHARS_IN_POOL) return@synchronized + charsTotal += array.size + arrays.addLast(array) + } +} diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt index 7a827dc2e..49c9a3678 100644 --- a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt +++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt @@ -4,7 +4,7 @@ package kotlinx.serialization.json.internal * Optimized version of StringBuilder that is specific to JSON-encoding */ internal actual class JsonStringBuilder { - private var array = CharArray(32) + private var array = CharArrayPool.take() private var size = 0 actual fun append(value: Long) { @@ -71,7 +71,7 @@ internal actual class JsonStringBuilder { } actual override fun toString(): String { - return String(array, 0, size).also { size = 0 } + return String(array, 0, size) } private fun ensureAdditionalCapacity(expected: Int) { @@ -83,4 +83,8 @@ internal actual class JsonStringBuilder { array = array.copyOf(newSize.coerceAtLeast(size * 2)) } } + + actual fun release() { + CharArrayPool.release(array) + } } diff --git a/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt b/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt index 45abc9994..1b79e27ef 100644 --- a/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt +++ b/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt @@ -22,4 +22,7 @@ internal actual class JsonStringBuilder actual constructor() { actual override fun toString(): String { return sb.toString() } + + actual fun release() { + } } From ec06ec6b746635911b5cb3582f013ca4fbd778be Mon Sep 17 00:00:00 2001 From: Vsevolod Tolstopyatov Date: Thu, 4 Mar 2021 14:54:09 +0300 Subject: [PATCH 6/6] ~comments and fixes --- .../src/kotlinx/serialization/json/Json.kt | 1 - .../json/internal/StreamingJsonEncoder.kt | 1 - .../serialization/json/internal/StringOps.kt | 18 ++--- .../serialization/json/JsonParserTest.kt | 18 ++++- .../json/internal/CharArrayPool.kt | 14 +++- .../json/internal/JsonStringBuilder.kt | 65 ++++++++++++++----- .../json/internal}/JsonStringBuilder.kt | 0 7 files changed, 84 insertions(+), 33 deletions(-) rename formats/json/nativeMain/src/{kotlinx.serialization.json.internal => kotlinx/serialization/json/internal}/JsonStringBuilder.kt (100%) diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt index 4dd0cf6d1..afcbd902e 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt @@ -7,7 +7,6 @@ package kotlinx.serialization.json import kotlinx.serialization.* import kotlinx.serialization.json.internal.* import kotlinx.serialization.modules.* -import kotlin.js.* /** * The main entry point to work with JSON serialization. diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt index 15c8dbc6a..e6307e607 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonEncoder.kt @@ -10,7 +10,6 @@ import kotlinx.serialization.descriptors.* import kotlinx.serialization.encoding.* import kotlinx.serialization.json.* import kotlinx.serialization.modules.* -import kotlin.jvm.* import kotlin.native.concurrent.* @ExperimentalSerializationApi diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt index 462d93571..2cfa7bf5b 100644 --- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt +++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StringOps.kt @@ -31,17 +31,17 @@ internal val ESCAPE_STRINGS: Array = arrayOfNulls(93).apply { } @SharedImmutable -internal val ESCAPE_MARKERS: CharArray = CharArray(93).apply { +internal val ESCAPE_MARKERS: ByteArray = ByteArray(93).apply { for (c in 0..0x1f) { - this[c] = 0.toChar() + this[c] = 1.toByte() } - this['"'.toInt()] = '"' - this['\\'.toInt()] = '\\' - this['\t'.toInt()] = 't' - this['\b'.toInt()] = 'b' - this['\n'.toInt()] = 'n' - this['\r'.toInt()] = 'r' - this[0x0c] = 'f' + this['"'.toInt()] = '"'.toByte() + this['\\'.toInt()] = '\\'.toByte() + this['\t'.toInt()] = 't'.toByte() + this['\b'.toInt()] = 'b'.toByte() + this['\n'.toInt()] = 'n'.toByte() + this['\r'.toInt()] = 'r'.toByte() + this[0x0c] = 'f'.toByte() } internal fun StringBuilder.printQuoted(value: String) { diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt index fc8d3383a..8117fbbd7 100644 --- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt +++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonParserTest.kt @@ -5,6 +5,7 @@ package kotlinx.serialization.json import kotlinx.serialization.* +import kotlinx.serialization.builtins.* import kotlinx.serialization.json.internal.* import kotlinx.serialization.test.* import kotlin.test.* @@ -41,7 +42,6 @@ class JsonParserTest : JsonTestBase() { } } - @Test fun testParseEscapedSymbols() { assertEquals( @@ -58,11 +58,23 @@ class JsonParserTest : JsonTestBase() { assertStringFormAndRestored( """{"data":"Русские Буквы 🤔"}""", StringData("Русские Буквы \uD83E\uDD14"), - StringData.serializer(), - printResult = false + StringData.serializer() ) } + @Test + fun testUnicodeEscapes() { + val data = buildString { + append(1.toChar()) + append(".") + append(0x20.toChar()) + append(".") + append("\n") + } + + assertJsonFormAndRestored(String.serializer(), data, "\"\\u0001. .\\n\"") + } + @Test fun testTrailingComma() { testTrailingComma("{\"id\":0,}") diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt index 02b5138e6..2f910fc3a 100644 --- a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt +++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/CharArrayPool.kt @@ -5,18 +5,26 @@ import java.util.concurrent.* internal object CharArrayPool { private val arrays = ArrayDeque() private var charsTotal = 0 + /* + * Not really documented kill switch as a workaround for potential + * (unlikely) problems with memory consumptions. + */ private val MAX_CHARS_IN_POOL = runCatching { System.getProperty("kotlinx.serialization.json.pool.size").toIntOrNull() - }.getOrNull() ?: 1024 * 1024 // 2 MB seems to be a reasonable contraint + }.getOrNull() ?: 1024 * 1024 // 2 MB seems to be a reasonable constraint, (1M of chars) - public fun take(): CharArray { + fun take(): CharArray { + /* + * Initially the pool is empty, so an instance will be allocated + * and the pool will be populated in the 'release' + */ val candidate = synchronized(this) { arrays.lastOrNull()?.also { charsTotal -= it.size } } return candidate ?: CharArray(128) } - public fun release(array: CharArray) = synchronized(this) { + fun release(array: CharArray) = synchronized(this) { if (charsTotal + array.size >= MAX_CHARS_IN_POOL) return@synchronized charsTotal += array.size arrays.addLast(array) diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt index 49c9a3678..a28a43e79 100644 --- a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt +++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt @@ -1,14 +1,36 @@ package kotlinx.serialization.json.internal -/* - * Optimized version of StringBuilder that is specific to JSON-encoding +/** + * Optimized version of StringBuilder that is specific to JSON-encoding. + * + * ## Implementation note + * + * In order to encode a single string, it should be processed symbol-per-symbol, + * in order to detect and escape unicode symbols. + * + * Doing naively, it drastically slows down strings processing due to to factors: + * * Byte-by-byte copying that does not leverage optimized array copying + * * A lot of range and flags checks due to Java's compact strings + * + * The following technique is used: + * 1) Instead of storing intermediate result in `StringBuilder`, we store it in + * `CharArray` directly, skipping compact strings checks in `StringBuilder` + * 2) Instead of copying symbols one-by-one, we optimistically copy it in batch using + * optimized and intrinsified `string.toCharArray(destination)`. + * It copies the content by up-to 8 times faster. + * Then we iterate over the char-array and execute single check over + * each character that is easily unrolled and vectorized by the inliner. + * If escape character is found, we fallback to per-symbol processing. + * + * 3) We pool char arrays in order to save excess resizes, allocations + * and nulls-out of arrays. */ internal actual class JsonStringBuilder { private var array = CharArrayPool.take() private var size = 0 actual fun append(value: Long) { - ensureAdditionalCapacity(20) // Long length + // Can be hand-rolled, but requires a lot of code and corner-cases handling append(value.toString()) } @@ -34,7 +56,7 @@ internal actual class JsonStringBuilder { for (i in sz until sz + length) { val ch = arr[i].toInt() // Do we have unescaped symbols? - if (ch < ESCAPE_MARKERS.size && ESCAPE_MARKERS[ch] != 0.toChar()) { + if (ch < ESCAPE_MARKERS.size && ESCAPE_MARKERS[ch] != 0.toByte()) { // Go to slow path return appendStringSlowPath(i - sz, i, string) } @@ -50,20 +72,31 @@ internal actual class JsonStringBuilder { for (i in firstEscapedChar until string.length) { val ch = string[i].toInt() // Do we have unescaped symbols? - var marker: Char = 0.toChar() - if (ch < ESCAPE_MARKERS.size && ESCAPE_MARKERS[ch].also { marker = it } != 0.toChar()) { - if (marker != 0.toChar()) { - array[sz] = '\\' - array[sz + 1] = marker - sz += 2 - } else { - val escapedString = ESCAPE_STRINGS[ch]!! - ensureTotalCapacity(sz + escapedString.length) - escapedString.toCharArray(array, sz, 0, escapedString.length) - sz += escapedString.length + if (ch < ESCAPE_MARKERS.size) { + /* + * Escape markers are populated for backslash-escaped symbols. + * E.g. ESCAPE_MARKERS['\b'] == 'b'.toByte() + * Everything else is populated with either zeros (no escapes) + * or ones (unicode escape) + */ + when (val marker = ESCAPE_MARKERS[ch]) { + 0.toByte() -> { + array[sz++] = ch.toChar() + } + 1.toByte() -> { + val escapedString = ESCAPE_STRINGS[ch]!! + ensureTotalCapacity(sz + escapedString.length) + escapedString.toCharArray(array, sz, 0, escapedString.length) + sz += escapedString.length + } + else -> { + array[sz] = '\\' + array[sz + 1] = marker.toChar() + sz += 2 + } } } else { - array[sz++] = string[i] + array[sz++] = ch.toChar() } } array[sz++] = '"' diff --git a/formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt b/formats/json/nativeMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt similarity index 100% rename from formats/json/nativeMain/src/kotlinx.serialization.json.internal/JsonStringBuilder.kt rename to formats/json/nativeMain/src/kotlinx/serialization/json/internal/JsonStringBuilder.kt