Kotlin · sandwwraith · Mar 26, 2024 · Feb 26, 2024 · Mar 5, 2024 · Mar 14, 2024
diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedCommentsBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedCommentsBenchmark.kt
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ */
+
+package kotlinx.benchmarks.json
+
+import kotlinx.benchmarks.model.*
+import kotlinx.serialization.json.*
+import org.openjdk.jmh.annotations.*
+import java.io.*
+import java.util.concurrent.*
+
+@Warmup(iterations = 7, time = 1)
+@Measurement(iterations = 7, time = 1)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@State(Scope.Benchmark)
+@Fork(2)
+open class TwitterFeedCommentsBenchmark {
+    val inputBytes = TwitterFeedBenchmark::class.java.getResource("/twitter_macro.json").readBytes()
+    private val input = inputBytes.decodeToString()
+    private val inputWithComments = prepareInputWithComments(input)
+    private val inputWithCommentsBytes = inputWithComments.encodeToByteArray()
+
+    private val jsonComments = Json { ignoreUnknownKeys = true; allowComments = true; }
+    private val jsonNoComments = Json { ignoreUnknownKeys = true; allowComments = false; }
+
+    fun prepareInputWithComments(inp: String): String {
+        val result = inp.lineSequence().map { s ->
+            // "id", "in_...", "is_...", etc
+            if (!s.trimStart().startsWith("\"i")) s else "$s // json comment"
+        }.joinToString("\n")
+        assert(result.contains("// json comment"))
+        return result
+    }
+
+    @Setup
+    fun init() {
+        // Explicitly invoking both variants before benchmarking so we know that both parser implementation classes are loaded
+        require("foobar" == jsonComments.decodeFromString<String>("\"foobar\""))
+        require("foobar" == jsonNoComments.decodeFromString<String>("\"foobar\""))
+    }
+
+    // The difference with TwitterFeedBenchmark.decodeMicroTwitter shows if we slow down when both StringJsonLexer and CommentsJsonLexer
+    // are loaded by JVM. Should be almost non-existent on modern JVMs (but on OpenJDK-Corretto-11.0.14.1 there is one. 17 is fine.)
+    @Benchmark
+    fun decodeMicroTwitter() = jsonNoComments.decodeFromString(MicroTwitterFeed.serializer(), input)
+
+    // The difference with this.decodeMicroTwitter shows if we slow down when comments are enabled but no comments present
+    // in the input. It is around 13% slower than without comments support, mainly because skipWhitespaces is a separate function
+    // that sometimes is not inlined by JIT.
+    @Benchmark
+    fun decodeMicroTwitterCommentSupport() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), input)
+
+    // Shows how much actual skipping of the comments takes: around 10%.
+    @Benchmark
+    fun decodeMicroTwitterCommentInData() = jsonComments.decodeFromString(MicroTwitterFeed.serializer(), inputWithComments)
+
+    @Benchmark
+    fun decodeMicroTwitterCommentSupportStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputBytes))
+
+    @Benchmark
+    fun decodeMicroTwitterCommentInDataStream() = jsonComments.decodeFromStream(MicroTwitterFeed.serializer(), ByteArrayInputStream(inputWithCommentsBytes))
+}
diff --git a/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedStreamBenchmark.kt b/benchmark/src/jmh/kotlin/kotlinx/benchmarks/json/TwitterFeedStreamBenchmark.kt
@@ -30,6 +30,14 @@ open class TwitterFeedStreamBenchmark {
         jacksonObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
 
 
+    @Setup
+    fun init() {
+        // Explicitly invoking decodeFromStream before benchmarking so we know that both parser implementation classes are loaded
+        require("foobar" == Json.decodeFromStream<String>(ByteArrayInputStream("\"foobar\"".encodeToByteArray())))
+        require("foobar" == Json.decodeFromString<String>("\"foobar\""))
+    }
+
+
     private val inputStream: InputStream
         get() = ByteArrayInputStream(bytes)
     private val outputStream: OutputStream
@@ -59,6 +67,7 @@ open class TwitterFeedStreamBenchmark {
         }
     }
 
+    // Difference with TwitterFeedBenchmark.decodeMicroTwitter shows how heavy Java's standard UTF-8 decoding actually is.
     @Benchmark
     fun decodeMicroTwitterReadText(): MicroTwitterFeed {
         return inputStream.use {

diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/features/JsonCommentsTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/features/JsonCommentsTest.kt
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2017-2024 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ */
+
+package kotlinx.serialization.features
+
+import kotlinx.serialization.*
+import kotlinx.serialization.json.*
+import kotlin.test.*
+
+class JsonCommentsTest: JsonTestBase() {
 class SpecConformanceTest { 
 class SpecConformanceTest { 
+    val json = Json(default) {
+        allowComments = true
+        allowTrailingComma = true
+    }
+
+    val withLenient = Json(json) {
+        isLenient = true
+        ignoreUnknownKeys = true
+    }
+
+    @Test
+    fun testBasic() = parametrizedTest { mode ->
+        val inputBlock = """{"data": "b" /*value b*/ }"""
+        val inputLine = "{\"data\": \"b\" // value b \n }"
+        assertEquals(StringData("b"), json.decodeFromString(inputBlock, mode))
+        assertEquals(StringData("b"), json.decodeFromString(inputLine, mode))
+    }
+
+    @Serializable
+    data class Target(val key: String, val key2: List<Int>, val key3: NestedTarget, val key4: String)
+
+    @Serializable
+    data class NestedTarget(val nestedKey: String)
+
+    private fun target(key4: String): Target = Target("value", listOf(1, 2), NestedTarget("foo"), key4)
+
+    @Test
+    fun testAllBlocks() = parametrizedTest { mode ->
+        val input = """{ /*beginning*/
+            /*before key*/ "key" /*after key*/ : /*after colon*/ "value" /*before comma*/,
+            "key2": [ /*array1*/ 1, /*array2*/ 2, /*end array*/],
+            "key3": { /*nested obj*/ "nestedKey": "foo"} /*after nested*/,
+            "key4": "/*comment inside quotes is a part of value*/",
+            /*before end*/
+        }"""
+        assertEquals(target("/*comment inside quotes is a part of value*/"), json.decodeFromString(input, mode))
+    }
+
+    @Test
+    fun testAllLines() = parametrizedTest { mode ->
+        val input = """{ //beginning
+            //before key
+            "key" // after key
+             : // after colon
+              "value" //before comma
+              ,
+            "key2": [ //array1
+             1, //array2
+              2, //end array
+              ],
+            "key3": { //nested obj
+            "nestedKey": "foo"
+            } , //after nested
+            "key4": "//comment inside quotes is a part of value",
+            //before end
+        }"""
+        assertEquals(target("//comment inside quotes is a part of value"), json.decodeFromString(input, mode))
+    }
+
+    @Test
+    fun testMixed() = parametrizedTest { mode ->
+        val input = """{ // begin
+           "key": "value", // after
+            "key2": /* array */ /*another comment */ [1, 2],
+            "key3": /* //this is a block comment */ { "nestedKey": // /*this is a line comment*/ "bar"
+                "foo" },
+            "key4": /* nesting block comments /* not supported */ "*/"
+        /* end */}"""
+        assertEquals(target("*/"), json.decodeFromString(input, mode))
+    }
+
+    @Test
+    fun testWeirdKeys() {
+        val map = mapOf(
+            "// comment inside quotes is a part of key" to "/* comment inside quotes is a part of value */",
+            "/*key */" to "/* value",
+            "/* key" to "*/ value"
+        )
+        val input = """/* before begin */
+            {
+            ${map.entries.joinToString(separator = ",\n") { (k, v) -> "\"$k\" : \"$v\"" }}
+            } // after end
+        """.trimIndent()
+        val afterMap = json.parseToJsonElement(input).jsonObject.mapValues { (_, v) ->
+            v as JsonPrimitive
+            assertTrue(v.isString)
+            v.content
+        }
+        assertEquals(map, afterMap)
+    }
+
+    @Test
+    fun testWithLenient() = parametrizedTest { mode ->
+        val input = """{ //beginning
+            //before key
+            key // after key
+             : // after colon
+              value //before comma
+              ,
+            key2: [ //array1
+             1, //array2
+              2, //end array
+              ],
+            key3: { //nested obj
+            nestedKey: "foo"
+            } , //after nested
+            key4: value//comment_cannot_break_value_apart, 
+            key5: //comment without quotes where new token expected is still a comment
+            value5,
+            //before end
+        }"""
+        assertEquals(target("value//comment_cannot_break_value_apart"), withLenient.decodeFromString(input, mode))
+    }
+
+    @Test
+    fun testUnclosedCommentsErrorMsg() = parametrizedTest { mode ->
+        val input = """{"data": "x"} // no newline"""
+        assertEquals(StringData("x"),  json.decodeFromString<StringData>(input, mode))
+        val input2 = """{"data": "x"} /* no endblock"""
+        assertFailsWith<SerializationException>("Expected end of the block comment: \"*/\", but had EOF instead at path: \$") {
+            json.decodeFromString<StringData>(input2, mode)
+        }
+    }
+
+    private val lexerBatchSize = 16 * 1024
+
+    @Test
+    fun testVeryLargeComments() = parametrizedTest { mode ->
+        val strLen = lexerBatchSize * 2 + 42
+        val inputLine = """{"data":  //a""" + "a".repeat(strLen) + "\n\"x\"}"
+        assertEquals(StringData("x"),  json.decodeFromString<StringData>(inputLine, mode))
+        val inputBlock = """{"data":  /*a""" + "a".repeat(strLen) + "*/\"x\"}"
+        assertEquals(StringData("x"),  json.decodeFromString<StringData>(inputBlock, mode))
+    }
+
+    @Test
+    fun testCommentsOnThresholdEdge() = parametrizedTest { mode ->
+        val inputPrefix = """{"data":  /*a"""
+        // Here, we test the situation when closing */ is divided in buffer:
+        // * fits in the initial buffer, but / is not.
+        // E.g. situation with batches looks like this: ['{', '"', 'd', ..., '*'], ['/', ...]
+        val bloatSize = lexerBatchSize - inputPrefix.length - 1
+        val inputLine = inputPrefix + "a".repeat(bloatSize) + "*/\"x\"}"
+        assertEquals(StringData("x"),  json.decodeFromString<StringData>(inputLine, mode))
+
+        // Test when * is unclosed and last in buffer:
+        val inputLine2 = inputPrefix + "a".repeat(bloatSize) + "*"
+        assertFailsWith<SerializationException>("Expected end of the block comment: \"*/\", but had EOF instead at path: \$") {
+            json.decodeFromString<StringData>(inputLine2, mode)
+        }
+
+    }
+
+}
diff --git a/formats/json-tests/jvmTest/resources/spec_cases/listing.txt b/formats/json-tests/jvmTest/resources/spec_cases/listing.txt
@@ -228,4 +228,4 @@ y_structure_lonely_true.json
 y_structure_string_empty.json
 y_structure_trailing_newline.json
 y_structure_true_in_array.json
-y_structure_whitespace_array.json
+y_structure_whitespace_array.json
diff --git a/formats/json-tests/jvmTest/resources/spec_cases/listing_comments.txt b/formats/json-tests/jvmTest/resources/spec_cases/listing_comments.txt
@@ -0,0 +1,4 @@
+// Non-spec inputs that we accept with allowComments = true setting:
+n_object_trailing_comment.json
+n_object_trailing_comment_slash_open.json
+n_structure_object_with_comment.json
diff --git a/formats/json-tests/jvmTest/src/kotlinx/serialization/json/SpecConformanceTest.kt b/formats/json-tests/jvmTest/src/kotlinx/serialization/json/SpecConformanceTest.kt
diff --git a/formats/json/api/kotlinx-serialization-json.api b/formats/json/api/kotlinx-serialization-json.api
@@ -94,6 +94,7 @@ public final class kotlinx/serialization/json/JsonArraySerializer : kotlinx/seri
 }
 
 public final class kotlinx/serialization/json/JsonBuilder {
+	public final fun getAllowComments ()Z
 	public final fun getAllowSpecialFloatingPointValues ()Z
 	public final fun getAllowStructuredMapKeys ()Z
 	public final fun getAllowTrailingComma ()Z
@@ -111,6 +112,7 @@ public final class kotlinx/serialization/json/JsonBuilder {
 	public final fun getUseAlternativeNames ()Z
 	public final fun getUseArrayPolymorphism ()Z
 	public final fun isLenient ()Z
+	public final fun setAllowComments (Z)V
 	public final fun setAllowSpecialFloatingPointValues (Z)V
 	public final fun setAllowStructuredMapKeys (Z)V
 	public final fun setAllowTrailingComma (Z)V
@@ -141,6 +143,7 @@ public synthetic class kotlinx/serialization/json/JsonClassDiscriminator$Impl :
 
 public final class kotlinx/serialization/json/JsonConfiguration {
 	public fun <init> ()V
+	public final fun getAllowComments ()Z
 	public final fun getAllowSpecialFloatingPointValues ()Z
 	public final fun getAllowStructuredMapKeys ()Z
 	public final fun getAllowTrailingComma ()Z

diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt b/formats/json/commonMain/src/kotlinx/serialization/json/Json.kt
@@ -102,12 +102,13 @@ public sealed class Json(
      * @throws [IllegalArgumentException] if the decoded input cannot be represented as a valid instance of type [T]
      */
     public final override fun <T> decodeFromString(deserializer: DeserializationStrategy<T>, @FormatLanguage("json", "", "") string: String): T {
-        val lexer = StringJsonLexer(string)
+        val lexer = StringJsonLexer(this, string)
         val input = StreamingJsonDecoder(this, WriteMode.OBJ, lexer, deserializer.descriptor, null)
         val result = input.decodeSerializableValue(deserializer)
         lexer.expectEof()
         return result
     }
+
     /**
      * Serializes the given [value] into an equivalent [JsonElement] using the given [serializer]
      *
@@ -385,6 +386,22 @@ public class JsonBuilder internal constructor(json: Json) {
     @ExperimentalSerializationApi
     public var allowTrailingComma: Boolean = json.configuration.allowTrailingComma
 
+    /**
+     * Allows parser to accept C/Java-style comments in JSON input.
+     *
+     * Comments are being skipped and are not stored anywhere; this setting does not affect encoding in any way.
+     *
+     * More specifically, a comment is a substring that is not a part of JSON key or value, conforming to one of those:
+     *
+     * 1. Starts with `//` characters and ends with a newline character `\n`.
+     * 2. Starts with `/*` characters and ends with `*/` characters. Nesting block comments
+     *  is not supported: no matter how many `/*` characters you have, first `*/` will end the comment.
+     *
+     *  `false` by default.
+     */
+    @ExperimentalSerializationApi
+    public var allowComments: Boolean = json.configuration.allowComments
+
     /**
      * Module with contextual and polymorphic serializers to be used in the resulting [Json] instance.
      *
@@ -422,7 +439,7 @@ public class JsonBuilder internal constructor(json: Json) {
             allowStructuredMapKeys, prettyPrint, explicitNulls, prettyPrintIndent,
             coerceInputValues, useArrayPolymorphism,
             classDiscriminator, allowSpecialFloatingPointValues, useAlternativeNames,
-            namingStrategy, decodeEnumsCaseInsensitive, allowTrailingComma, classDiscriminatorMode
+            namingStrategy, decodeEnumsCaseInsensitive, allowTrailingComma, allowComments, classDiscriminatorMode
         )
     }
 }

diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/JsonConfiguration.kt b/formats/json/commonMain/src/kotlinx/serialization/json/JsonConfiguration.kt
@@ -38,6 +38,8 @@ public class JsonConfiguration @OptIn(ExperimentalSerializationApi::class) inter
     @ExperimentalSerializationApi
     public val allowTrailingComma: Boolean = false,
     @ExperimentalSerializationApi
+    public val allowComments: Boolean = false,
+    @ExperimentalSerializationApi
     public var classDiscriminatorMode: ClassDiscriminatorMode = ClassDiscriminatorMode.POLYMORPHIC,
 ) {
 
@@ -49,7 +51,7 @@ public class JsonConfiguration @OptIn(ExperimentalSerializationApi::class) inter
                 "prettyPrintIndent='$prettyPrintIndent', coerceInputValues=$coerceInputValues, useArrayPolymorphism=$useArrayPolymorphism, " +
                 "classDiscriminator='$classDiscriminator', allowSpecialFloatingPointValues=$allowSpecialFloatingPointValues, " +
                 "useAlternativeNames=$useAlternativeNames, namingStrategy=$namingStrategy, decodeEnumsCaseInsensitive=$decodeEnumsCaseInsensitive, " +
-                "allowTrailingComma=$allowTrailingComma, classDiscriminatorMode=$classDiscriminatorMode)"
+                "allowTrailingComma=$allowTrailingComma, allowComments=$allowComments, classDiscriminatorMode=$classDiscriminatorMode)"
     }
 }
 

diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStreams.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/JsonStreams.kt
@@ -37,7 +37,7 @@ public fun <T> decodeByReader(
     deserializer: DeserializationStrategy<T>,
     reader: InternalJsonReader
 ): T {
-    val lexer = ReaderJsonLexer(reader)
+    val lexer = ReaderJsonLexer(json, reader)
     try {
         val input = StreamingJsonDecoder(json, WriteMode.OBJ, lexer, deserializer.descriptor, null)
         val result = input.decodeSerializableValue(deserializer)
@@ -56,7 +56,7 @@ public fun <T> decodeToSequenceByReader(
     deserializer: DeserializationStrategy<T>,
     format: DecodeSequenceMode = DecodeSequenceMode.AUTO_DETECT
 ): Sequence<T> {
-    val lexer = ReaderJsonLexer(reader, CharArray(BATCH_SIZE)) // Unpooled buffer due to lazy nature of sequence
+    val lexer = ReaderJsonLexer(json, reader, CharArray(BATCH_SIZE)) // Unpooled buffer due to lazy nature of sequence
     val iter = JsonIterator(format, json, lexer, deserializer)
     return Sequence { iter }.constrainOnce()
 }

diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/StreamingJsonDecoder.kt
@@ -359,7 +359,7 @@ public fun <T> decodeStringToJsonTree(
     deserializer: DeserializationStrategy<T>,
     source: String
 ): JsonElement {
-    val lexer = StringJsonLexer(source)
+    val lexer = StringJsonLexer(json, source)
     val input = StreamingJsonDecoder(json, WriteMode.OBJ, lexer, deserializer.descriptor, null)
     val tree = input.decodeJsonElement()
     lexer.expectEof()

diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/TreeJsonDecoder.kt
@@ -153,9 +153,12 @@ private sealed class AbstractJsonTreeDecoder(
         return this as? JsonLiteral ?: throw JsonDecodingException(-1, "Unexpected 'null' literal when non-nullable $type was expected")
     }
 
-    override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder =
-        if (inlineDescriptor.isUnsignedNumber) JsonDecoderForUnsignedTypes(StringJsonLexer(getPrimitiveValue(tag).content), json)
-        else super.decodeTaggedInline(tag, inlineDescriptor)
+    override fun decodeTaggedInline(tag: String, inlineDescriptor: SerialDescriptor): Decoder {
+        return if (inlineDescriptor.isUnsignedNumber) {
+            val lexer = StringJsonLexer(json, getPrimitiveValue(tag).content)
+            JsonDecoderForUnsignedTypes(lexer, json)
+        } else super.decodeTaggedInline(tag, inlineDescriptor)
+    }
 
     override fun decodeInline(descriptor: SerialDescriptor): Decoder {
         return if (currentTagOrNull != null) super.decodeInline(descriptor)