Kotlin · IgnatBeresnev · Oct 31, 2023 · Oct 17, 2023 · Oct 18, 2023 · Oct 26, 2023
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
@@ -26,7 +26,7 @@ korlibs-template = "4.0.10"
 kotlinx-html = "0.9.1"
 
 ## Markdown
-jetbrains-markdown = "0.3.1"
+jetbrains-markdown = "0.5.2"
 
 ## JSON
 jackson = "2.12.7" # jackson 2.13.X does not support kotlin language version 1.4, check before updating

diff --git a/plugins/base/src/test/kotlin/markdown/ParserTest.kt b/plugins/base/src/test/kotlin/markdown/ParserTest.kt
@@ -1573,7 +1573,60 @@ class ParserTest : KDocTest() {
             P(listOf(Text(" sdsdsds sdd"))),
             P(listOf(Text(" eweww  ")))
         )
-        print(expectedDocumentationNode)
+        assertEquals(actualDocumentationNode, expectedDocumentationNode)
+    }
+
+    @Test // exists due to #3231
+    fun `should ignore the leading whitespace in header in-between the hash symbol and header text`() {
+        val markdown = """
+        | #   first header
+        | ##     second header
+        | ###                third header
+        """.trimMargin()
+        val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
+        val expectedDocumentationNode = listOf(
+            H1(listOf(Text("first header"))),
+            H2(listOf(Text("second header"))),
+            H3(listOf(Text("third header"))),
+        )
+        assertEquals(actualDocumentationNode, expectedDocumentationNode)
+    }
+
+    @Test // exists due to #3231
+    fun `should ignore trailing whitespace in header`() {
+        val markdown = """
+        | # first header     
+        | ## second header        
+        | ### third header                                          
+        """.trimMargin()
+        val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
+        val expectedDocumentationNode = listOf(
+            H1(listOf(Text("first header"))),
+            H2(listOf(Text("second header"))),
+            H3(listOf(Text("third header"))),
+        )
+        assertEquals(actualDocumentationNode, expectedDocumentationNode)
+    }
+
+    @Test // exists due to #3231
+    fun `should ignore leading and trailing whitespace in header, but not whitespace in the middle`() {
+        val markdown = """
+        | #          first header     
+        | ##     second ~~header~~   in a **long** sentence ending     with whitespaces   
+        | ###                third      header        
+        """.trimMargin()
+        val actualDocumentationNode = parseMarkdownToDocNode(markdown).children
+        val expectedDocumentationNode = listOf(
+            H1(listOf(Text("first header"))),
+            H2(listOf(
+                Text("second "),
+                Strikethrough(listOf(Text("header"))),
+                Text("   in a "),
+                B(listOf(Text("long"))),
+                Text(" sentence ending     with whitespaces")
+            )),
+            H3(listOf(Text("third      header"))),
+        )
         assertEquals(actualDocumentationNode, expectedDocumentationNode)
     }
 }

diff --git a/...is-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt b/...is-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt
@@ -77,6 +77,44 @@ public open class MarkdownParser(
             ).flatMap { it.children }
         )
 
+    /**
+     * Handler for [MarkdownTokenTypes.ATX_CONTENT], which is the content of the header
+     * elements like [MarkdownElementTypes.ATX_1], [MarkdownElementTypes.ATX_2] and so on.
+     *
+     * For example, a header line like `# Header text` is expected to be parsed into:
+     * - One [MarkdownTokenTypes.ATX_HEADER] with startOffset = 0, endOffset = 1 (only the `#` symbol)
+     * - Composite [MarkdownTokenTypes.ATX_CONTENT] with four children: WHITE_SPACE, TEXT, WHITE_SPACE, TEXT.
+     */
+    private fun headerContentHandler(node: ASTNode): List<DocTag> {
+        // ATX_CONTENT contains everything after the `#` symbol, so if there's a space
+        // in-between the `#` symbol and the text (like `# header`), it will be present here too.
+        // However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces,
+        // so we just skip it (otherwise the header text will be parsed as `<whitespace>header` instead of `header`).
+        // If there's more space between `#` and text, like `#     header`, it will still be a single WHITE_SPACE
+        // element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces.
+        val trimmedChildren = node.children.trimWhitespaceToken()
+
+        val children = trimmedChildren.evaluateChildren()
+        return DocTagsFromIElementFactory.getInstance(
+            MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions
+            children = children
+        )
+    }
+
+    /**
+     * @return a sublist of [this] list that does not contain
+     *         leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements
+     */
+    private fun List<ASTNode>.trimWhitespaceToken(): List<ASTNode> {
+        val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE }
+        if (firstNonWhitespaceIndex == -1) {
+            return this
+        }
+        val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE }
+
+        return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1)
+    }
+
     private fun horizontalRulesHandler() =
         DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)
 
@@ -365,6 +403,7 @@ public open class MarkdownParser(
             MarkdownElementTypes.ATX_5,
             MarkdownElementTypes.ATX_6,
             -> headersHandler(node)
+            MarkdownTokenTypes.ATX_CONTENT -> headerContentHandler(node)
             MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler()
             MarkdownElementTypes.STRONG -> strongHandler(node)
             MarkdownElementTypes.EMPH -> emphasisHandler(node)