Skip to content

Commit

Permalink
Merge pull request #32843 from ppalaga/230423-javadoc2asciidoc
Browse files Browse the repository at this point in the history
Improve JavaDoc -> AsciiDoc transformation for lists, paragraphs and code blocks
  • Loading branch information
gsmet authored Jun 22, 2023
2 parents 7e8156e + 1616f28 commit ae7a5cf
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ final class JavaDocParser {
private static final String ORDERED_LIST_NODE = "ol";
private static final String SUPER_SCRIPT_NODE = "sup";
private static final String UN_ORDERED_LIST_NODE = "ul";
private static final String PREFORMATED_NODE = "pre";
private static final String BLOCKQUOTE_NODE = "blockquote";

private static final String BIG_ASCIDOC_STYLE = "[.big]";
private static final String LINK_ATTRIBUTE_FORMAT = "[%s]";
Expand All @@ -62,6 +64,10 @@ final class JavaDocParser {
private static final String UNORDERED_LIST_ITEM_ASCIDOC_STYLE = " - ";
private static final String UNDERLINE_ASCIDOC_STYLE = "[.underline]";
private static final String LINE_THROUGH_ASCIDOC_STYLE = "[.line-through]";
private static final String HARD_LINE_BREAK_ASCIDOC_STYLE = " +\n";
private static final String CODE_BLOCK_ASCIDOC_STYLE = "```";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE = "[quote]\n____";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END = "____";

private final boolean inlineMacroMode;

Expand Down Expand Up @@ -185,25 +191,51 @@ private String htmlJavadocToAsciidoc(JavadocDescription javadocDescription) {
}
}

return sb.toString().trim();
return trim(sb);
}

private void appendHtml(StringBuilder sb, Node node) {
for (Node childNode : node.childNodes()) {
switch (childNode.nodeName()) {
case PARAGRAPH_NODE:
sb.append(NEW_LINE);
newLine(sb);
newLine(sb);
appendHtml(sb, childNode);
break;
case PREFORMATED_NODE:
newLine(sb);
newLine(sb);
sb.append(CODE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
for (Node grandChildNode : childNode.childNodes()) {
unescapeHtmlEntities(sb, grandChildNode.toString());
}
newLineIfNeeded(sb);
sb.append(CODE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
newLine(sb);
break;
case BLOCKQUOTE_NODE:
newLine(sb);
newLine(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
appendHtml(sb, childNode);
newLineIfNeeded(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END);
newLine(sb);
newLine(sb);
break;
case ORDERED_LIST_NODE:
case UN_ORDERED_LIST_NODE:
newLine(sb);
appendHtml(sb, childNode);
break;
case LIST_ITEM_NODE:
final String marker = childNode.parent().nodeName().equals(ORDERED_LIST_NODE)
? ORDERED_LIST_ITEM_ASCIDOC_STYLE
: UNORDERED_LIST_ITEM_ASCIDOC_STYLE;
sb.append(NEW_LINE);
newLine(sb);
sb.append(marker);
appendHtml(sb, childNode);
break;
Expand All @@ -213,7 +245,7 @@ private void appendHtml(StringBuilder sb, Node node) {
sb.append(link);
final StringBuilder caption = new StringBuilder();
appendHtml(caption, childNode);
sb.append(String.format(LINK_ATTRIBUTE_FORMAT, caption.toString().trim()));
sb.append(String.format(LINK_ATTRIBUTE_FORMAT, trim(caption)));
break;
case CODE_NODE:
sb.append(BACKTICK);
Expand Down Expand Up @@ -269,7 +301,7 @@ private void appendHtml(StringBuilder sb, Node node) {
sb.append(HASH);
break;
case NEW_LINE_NODE:
sb.append(NEW_LINE);
sb.append(HARD_LINE_BREAK_ASCIDOC_STYLE);
break;
case TEXT_NODE:
String text = ((TextNode) childNode).text();
Expand All @@ -295,6 +327,142 @@ private void appendHtml(StringBuilder sb, Node node) {
}
}

/**
* Trim the content of the given {@link StringBuilder} holding also AsciiDoc had line break {@code " +\n"}
* for whitespace in addition to characters <= {@code ' '}.
*
* @param sb the {@link StringBuilder} to trim
* @return the trimmed content of the given {@link StringBuilder}
*/
static String trim(StringBuilder sb) {
int length = sb.length();
int offset = 0;
while (offset < length) {
final char ch = sb.charAt(offset);
if (ch == ' '
&& offset + 2 < length
&& sb.charAt(offset + 1) == '+'
&& sb.charAt(offset + 2) == '\n') {
/* Space followed by + and newline is AsciiDoc hard break that we consider whitespace */
offset += 3;
continue;
} else if (ch > ' ') {
/* Non-whitespace as defined by String.trim() */
break;
}
offset++;
}
if (offset > 0) {
sb.delete(0, offset);
}
if (sb.length() > 0) {
offset = sb.length() - 1;
while (offset >= 0) {
final char ch = sb.charAt(offset);
if (ch == '\n'
&& offset - 2 >= 0
&& sb.charAt(offset - 1) == '+'
&& sb.charAt(offset - 2) == ' ') {
/* Space followed by + is AsciiDoc hard break that we consider whitespace */
offset -= 3;
continue;
} else if (ch > ' ') {
/* Non-whitespace as defined by String.trim() */
break;
}
offset--;
}
if (offset < sb.length() - 1) {
sb.setLength(offset + 1);
}
}
return sb.toString();
}

private static StringBuilder newLineIfNeeded(StringBuilder sb) {
trimText(sb, " \t\r\n");
return sb.append(NEW_LINE);
}

private static StringBuilder newLine(StringBuilder sb) {
/* Trim trailing spaces and tabs at the end of line */
trimText(sb, " \t");
return sb.append(NEW_LINE);
}

private static StringBuilder trimText(StringBuilder sb, String charsToTrim) {
while (sb.length() > 0 && charsToTrim.indexOf(sb.charAt(sb.length() - 1)) >= 0) {
sb.setLength(sb.length() - 1);
}
return sb;
}

private StringBuilder unescapeHtmlEntities(StringBuilder sb, String text) {
int i = 0;
/* trim leading whitespace */
LOOP: while (i < text.length()) {
switch (text.charAt(i++)) {
case ' ':
case '\t':
case '\r':
case '\n':
break;
default:
i--;
break LOOP;
}
}
for (; i < text.length(); i++) {
final char ch = text.charAt(i);
switch (ch) {
case '&':
int start = ++i;
while (i < text.length() && text.charAt(i) != ';') {
i++;
}
if (i > start) {
final String abbrev = text.substring(start, i);
switch (abbrev) {
case "lt":
sb.append('<');
break;
case "gt":
sb.append('>');
break;
case "nbsp":
sb.append("{nbsp}");
break;
case "amp":
sb.append('&');
break;
default:
try {
int code = Integer.parseInt(abbrev);
sb.append((char) code);
} catch (NumberFormatException e) {
throw new RuntimeException(
"Could not parse HTML entity &" + abbrev + "; in\n\n" + text + "\n\n");
}
break;
}
}
break;
case '\r':
if (i + 1 < text.length() && text.charAt(i + 1) == '\n') {
/* Ignore \r followed by \n */
} else {
/* A Mac single \r: replace by \n */
sb.append('\n');
}
break;
default:
sb.append(ch);

}
}
return sb;
}

private StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text) {
boolean escaping = false;
for (int i = 0; i < text.length(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void parseNullJavaDoc() {
@Test
public void removeParagraphIndentation() {
String parsed = parser.parseConfigDescription("First paragraph<br><br> Second Paragraph");
assertEquals("First paragraph\n\nSecond Paragraph", parsed);
assertEquals("First paragraph +\n +\nSecond Paragraph", parsed);
}

@Test
Expand All @@ -50,13 +50,13 @@ public void parseSimpleJavaDoc() {
@Test
public void parseJavaDocWithParagraph() {
String javaDoc = "hello<p>world</p>";
String expectedOutput = "hello\nworld";
String expectedOutput = "hello\n\nworld";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);

javaDoc = "hello world<p>bonjour </p><p>le monde</p>";
expectedOutput = "hello world\nbonjour \nle monde";
expectedOutput = "hello world\n\nbonjour\n\nle monde";
parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
Expand Down Expand Up @@ -118,21 +118,6 @@ public void parseJavaDocWithStyles() {
assertEquals(expectedOutput, parsed);
}

@Test
public void parseJavaDocWithUlTags() {
String javaDoc = "hello <ul>world</ul>";
String expectedOutput = "hello world";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);

javaDoc = "hello world<ul> bonjour </ul><ul>le monde</ul>";
expectedOutput = "hello world bonjour le monde";
parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
}

@Test
public void parseJavaDocWithLiTagsInsideUlTag() {
String javaDoc = "List:" +
Expand All @@ -141,7 +126,7 @@ public void parseJavaDocWithLiTagsInsideUlTag() {
"<li>2</li>\n" +
"</ul>" +
"";
String expectedOutput = "List: \n - 1 \n - 2";
String expectedOutput = "List:\n\n - 1\n - 2";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
Expand All @@ -155,7 +140,7 @@ public void parseJavaDocWithLiTagsInsideOlTag() {
"<li>2</li>\n" +
"</ol>" +
"";
String expectedOutput = "List: \n . 1 \n . 2";
String expectedOutput = "List:\n\n . 1\n . 2";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
Expand Down Expand Up @@ -224,6 +209,49 @@ public void parseJavaDocWithUnknownNode() {
assertEquals(expectedOutput, parsed);
}

@Test
public void parseJavaDocWithBlockquoteBlock() {
assertEquals("See Section 4.5.5 of the JSR 380 specification, specifically\n"
+ "\n"
+ "[quote]\n"
+ "____\n"
+ "In sub types (be it sub classes/interfaces or interface implementations), no parameter constraints may be declared on overridden or implemented methods, nor may parameters be marked for cascaded validation. This would pose a strengthening of preconditions to be fulfilled by the caller.\n"
+ "____\n"
+ "\n"
+ "That was interesting, wasn't it?",
parser.parseConfigDescription("See Section 4.5.5 of the JSR 380 specification, specifically\n"
+ "\n"
+ "<blockquote>\n"
+ "In sub types (be it sub classes/interfaces or interface implementations), no parameter constraints may\n"
+ "be declared on overridden or implemented methods, nor may parameters be marked for cascaded validation.\n"
+ "This would pose a strengthening of preconditions to be fulfilled by the caller.\n"
+ "</blockquote>\nThat was interesting, wasn't it?"));

assertEquals(
"Some HTML entities & special characters:\n\n```\n<os>|<arch>[/variant]|<os>/<arch>[/variant]\n```\n\nbaz",
parser.parseConfigDescription(
"Some HTML entities &amp; special characters:\n\n<pre>&lt;os&gt;|&lt;arch&gt;[/variant]|&lt;os&gt;/&lt;arch&gt;[/variant]\n</pre>\n\nbaz"));

// TODO
// assertEquals("Example:\n\n```\nfoo\nbar\n```",
// parser.parseConfigDescription("Example:\n\n<pre>{@code\nfoo\nbar\n}</pre>"));
}

@Test
public void parseJavaDocWithCodeBlock() {
assertEquals("Example:\n\n```\nfoo\nbar\n```\n\nbaz",
parser.parseConfigDescription("Example:\n\n<pre>\nfoo\nbar\n</pre>\n\nbaz"));

assertEquals(
"Some HTML entities & special characters:\n\n```\n<os>|<arch>[/variant]|<os>/<arch>[/variant]\n```\n\nbaz",
parser.parseConfigDescription(
"Some HTML entities &amp; special characters:\n\n<pre>&lt;os&gt;|&lt;arch&gt;[/variant]|&lt;os&gt;/&lt;arch&gt;[/variant]\n</pre>\n\nbaz"));

// TODO
// assertEquals("Example:\n\n```\nfoo\nbar\n```",
// parser.parseConfigDescription("Example:\n\n<pre>{@code\nfoo\nbar\n}</pre>"));
}

@Test
public void asciidoc() {
String asciidoc = "== My Asciidoc\n" +
Expand Down Expand Up @@ -308,4 +336,25 @@ public void escapeBrackets(String ch) {
assertEquals(expected, actual);
}

@Test
void trim() {
assertEquals("+ \nfoo", JavaDocParser.trim(new StringBuilder("+ \nfoo")));
assertEquals("+", JavaDocParser.trim(new StringBuilder(" +")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder(" +\nfoo")));
assertEquals("foo +", JavaDocParser.trim(new StringBuilder("foo +")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder("foo")));
assertEquals("+", JavaDocParser.trim(new StringBuilder("+ \n")));
assertEquals("+", JavaDocParser.trim(new StringBuilder(" +\n+ \n")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" +\n")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder(" \n\tfoo")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder("foo \n\t")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder(" \n\tfoo \n\t")));
assertEquals("", JavaDocParser.trim(new StringBuilder("")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" \n\t")));
assertEquals("+", JavaDocParser.trim(new StringBuilder(" +")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" +\n")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" +\n +\n")));
assertEquals("foo +\nbar", JavaDocParser.trim(new StringBuilder(" foo +\nbar +\n")));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,13 @@ public class JibConfig {
* List of target platforms. Each platform is defined using the pattern:
*
* <pre>
* {@literal <os>|<arch>[/variant]|<os>/<arch>[/variant]}
* &lt;os>|&lt;arch>[/variant]|&lt;os>/&lt;arch>[/variant]
* </pre>
*
* for example:
*
* <pre>
* {@literal linux/amd64,linux/arm64/v8}
* linux/amd64,linux/arm64/v8
* </pre>
*
* If not specified, OS default is linux and architecture default is {@code amd64}.
Expand Down
Loading

0 comments on commit ae7a5cf

Please sign in to comment.