Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve JavaDoc -> AsciiDoc transformation for lists, paragraphs and code blocks #32843

Merged
merged 1 commit into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ final class JavaDocParser {
private static final String ORDERED_LIST_NODE = "ol";
private static final String SUPER_SCRIPT_NODE = "sup";
private static final String UN_ORDERED_LIST_NODE = "ul";
private static final String PREFORMATED_NODE = "pre";
private static final String BLOCKQUOTE_NODE = "blockquote";

private static final String BIG_ASCIDOC_STYLE = "[.big]";
private static final String LINK_ATTRIBUTE_FORMAT = "[%s]";
Expand All @@ -62,6 +64,10 @@ final class JavaDocParser {
private static final String UNORDERED_LIST_ITEM_ASCIDOC_STYLE = " - ";
private static final String UNDERLINE_ASCIDOC_STYLE = "[.underline]";
private static final String LINE_THROUGH_ASCIDOC_STYLE = "[.line-through]";
private static final String HARD_LINE_BREAK_ASCIDOC_STYLE = " +\n";
private static final String CODE_BLOCK_ASCIDOC_STYLE = "```";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE = "[quote]\n____";
private static final String BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END = "____";

private final boolean inlineMacroMode;

Expand Down Expand Up @@ -185,25 +191,51 @@ private String htmlJavadocToAsciidoc(JavadocDescription javadocDescription) {
}
}

return sb.toString().trim();
return trim(sb);
}

private void appendHtml(StringBuilder sb, Node node) {
for (Node childNode : node.childNodes()) {
switch (childNode.nodeName()) {
case PARAGRAPH_NODE:
sb.append(NEW_LINE);
newLine(sb);
newLine(sb);
appendHtml(sb, childNode);
break;
case PREFORMATED_NODE:
newLine(sb);
newLine(sb);
sb.append(CODE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
for (Node grandChildNode : childNode.childNodes()) {
unescapeHtmlEntities(sb, grandChildNode.toString());
}
newLineIfNeeded(sb);
sb.append(CODE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
newLine(sb);
break;
case BLOCKQUOTE_NODE:
newLine(sb);
newLine(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE);
newLine(sb);
appendHtml(sb, childNode);
newLineIfNeeded(sb);
sb.append(BLOCKQUOTE_BLOCK_ASCIDOC_STYLE_END);
newLine(sb);
newLine(sb);
break;
case ORDERED_LIST_NODE:
case UN_ORDERED_LIST_NODE:
newLine(sb);
appendHtml(sb, childNode);
break;
case LIST_ITEM_NODE:
final String marker = childNode.parent().nodeName().equals(ORDERED_LIST_NODE)
? ORDERED_LIST_ITEM_ASCIDOC_STYLE
: UNORDERED_LIST_ITEM_ASCIDOC_STYLE;
sb.append(NEW_LINE);
newLine(sb);
sb.append(marker);
appendHtml(sb, childNode);
break;
Expand All @@ -213,7 +245,7 @@ private void appendHtml(StringBuilder sb, Node node) {
sb.append(link);
final StringBuilder caption = new StringBuilder();
appendHtml(caption, childNode);
sb.append(String.format(LINK_ATTRIBUTE_FORMAT, caption.toString().trim()));
sb.append(String.format(LINK_ATTRIBUTE_FORMAT, trim(caption)));
break;
case CODE_NODE:
sb.append(BACKTICK);
Expand Down Expand Up @@ -269,7 +301,7 @@ private void appendHtml(StringBuilder sb, Node node) {
sb.append(HASH);
break;
case NEW_LINE_NODE:
sb.append(NEW_LINE);
sb.append(HARD_LINE_BREAK_ASCIDOC_STYLE);
break;
case TEXT_NODE:
String text = ((TextNode) childNode).text();
Expand All @@ -295,6 +327,142 @@ private void appendHtml(StringBuilder sb, Node node) {
}
}

/**
* Trim the content of the given {@link StringBuilder} holding also AsciiDoc had line break {@code " +\n"}
* for whitespace in addition to characters <= {@code ' '}.
*
* @param sb the {@link StringBuilder} to trim
* @return the trimmed content of the given {@link StringBuilder}
*/
static String trim(StringBuilder sb) {
int length = sb.length();
int offset = 0;
while (offset < length) {
final char ch = sb.charAt(offset);
if (ch == ' '
&& offset + 2 < length
&& sb.charAt(offset + 1) == '+'
&& sb.charAt(offset + 2) == '\n') {
/* Space followed by + and newline is AsciiDoc hard break that we consider whitespace */
offset += 3;
continue;
} else if (ch > ' ') {
/* Non-whitespace as defined by String.trim() */
break;
}
offset++;
}
if (offset > 0) {
sb.delete(0, offset);
}
if (sb.length() > 0) {
offset = sb.length() - 1;
while (offset >= 0) {
final char ch = sb.charAt(offset);
if (ch == '\n'
&& offset - 2 >= 0
&& sb.charAt(offset - 1) == '+'
&& sb.charAt(offset - 2) == ' ') {
/* Space followed by + is AsciiDoc hard break that we consider whitespace */
offset -= 3;
continue;
} else if (ch > ' ') {
/* Non-whitespace as defined by String.trim() */
break;
}
offset--;
}
if (offset < sb.length() - 1) {
sb.setLength(offset + 1);
}
}
return sb.toString();
}

private static StringBuilder newLineIfNeeded(StringBuilder sb) {
trimText(sb, " \t\r\n");
return sb.append(NEW_LINE);
}

private static StringBuilder newLine(StringBuilder sb) {
/* Trim trailing spaces and tabs at the end of line */
trimText(sb, " \t");
return sb.append(NEW_LINE);
}

private static StringBuilder trimText(StringBuilder sb, String charsToTrim) {
while (sb.length() > 0 && charsToTrim.indexOf(sb.charAt(sb.length() - 1)) >= 0) {
sb.setLength(sb.length() - 1);
}
return sb;
}

private StringBuilder unescapeHtmlEntities(StringBuilder sb, String text) {
int i = 0;
/* trim leading whitespace */
LOOP: while (i < text.length()) {
switch (text.charAt(i++)) {
case ' ':
case '\t':
case '\r':
case '\n':
break;
default:
i--;
break LOOP;
}
}
for (; i < text.length(); i++) {
final char ch = text.charAt(i);
switch (ch) {
case '&':
int start = ++i;
while (i < text.length() && text.charAt(i) != ';') {
i++;
}
if (i > start) {
final String abbrev = text.substring(start, i);
switch (abbrev) {
case "lt":
sb.append('<');
break;
case "gt":
sb.append('>');
break;
case "nbsp":
sb.append("{nbsp}");
break;
case "amp":
sb.append('&');
break;
default:
try {
int code = Integer.parseInt(abbrev);
sb.append((char) code);
} catch (NumberFormatException e) {
throw new RuntimeException(
"Could not parse HTML entity &" + abbrev + "; in\n\n" + text + "\n\n");
}
break;
}
}
break;
case '\r':
if (i + 1 < text.length() && text.charAt(i + 1) == '\n') {
/* Ignore \r followed by \n */
} else {
/* A Mac single \r: replace by \n */
sb.append('\n');
}
break;
default:
sb.append(ch);

}
}
return sb;
}

private StringBuilder appendEscapedAsciiDoc(StringBuilder sb, String text) {
boolean escaping = false;
for (int i = 0; i < text.length(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void parseNullJavaDoc() {
@Test
public void removeParagraphIndentation() {
String parsed = parser.parseConfigDescription("First paragraph<br><br> Second Paragraph");
assertEquals("First paragraph\n\nSecond Paragraph", parsed);
assertEquals("First paragraph +\n +\nSecond Paragraph", parsed);
}

@Test
Expand All @@ -50,13 +50,13 @@ public void parseSimpleJavaDoc() {
@Test
public void parseJavaDocWithParagraph() {
String javaDoc = "hello<p>world</p>";
String expectedOutput = "hello\nworld";
String expectedOutput = "hello\n\nworld";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);

javaDoc = "hello world<p>bonjour </p><p>le monde</p>";
expectedOutput = "hello world\nbonjour \nle monde";
expectedOutput = "hello world\n\nbonjour\n\nle monde";
parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
Expand Down Expand Up @@ -118,21 +118,6 @@ public void parseJavaDocWithStyles() {
assertEquals(expectedOutput, parsed);
}

@Test
public void parseJavaDocWithUlTags() {
String javaDoc = "hello <ul>world</ul>";
String expectedOutput = "hello world";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);

javaDoc = "hello world<ul> bonjour </ul><ul>le monde</ul>";
expectedOutput = "hello world bonjour le monde";
parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
}

@Test
public void parseJavaDocWithLiTagsInsideUlTag() {
String javaDoc = "List:" +
Expand All @@ -141,7 +126,7 @@ public void parseJavaDocWithLiTagsInsideUlTag() {
"<li>2</li>\n" +
"</ul>" +
"";
String expectedOutput = "List: \n - 1 \n - 2";
String expectedOutput = "List:\n\n - 1\n - 2";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
Expand All @@ -155,7 +140,7 @@ public void parseJavaDocWithLiTagsInsideOlTag() {
"<li>2</li>\n" +
"</ol>" +
"";
String expectedOutput = "List: \n . 1 \n . 2";
String expectedOutput = "List:\n\n . 1\n . 2";
String parsed = parser.parseConfigDescription(javaDoc);

assertEquals(expectedOutput, parsed);
Expand Down Expand Up @@ -224,6 +209,49 @@ public void parseJavaDocWithUnknownNode() {
assertEquals(expectedOutput, parsed);
}

@Test
public void parseJavaDocWithBlockquoteBlock() {
assertEquals("See Section 4.5.5 of the JSR 380 specification, specifically\n"
+ "\n"
+ "[quote]\n"
+ "____\n"
+ "In sub types (be it sub classes/interfaces or interface implementations), no parameter constraints may be declared on overridden or implemented methods, nor may parameters be marked for cascaded validation. This would pose a strengthening of preconditions to be fulfilled by the caller.\n"
+ "____\n"
+ "\n"
+ "That was interesting, wasn't it?",
parser.parseConfigDescription("See Section 4.5.5 of the JSR 380 specification, specifically\n"
+ "\n"
+ "<blockquote>\n"
+ "In sub types (be it sub classes/interfaces or interface implementations), no parameter constraints may\n"
+ "be declared on overridden or implemented methods, nor may parameters be marked for cascaded validation.\n"
+ "This would pose a strengthening of preconditions to be fulfilled by the caller.\n"
+ "</blockquote>\nThat was interesting, wasn't it?"));

assertEquals(
"Some HTML entities & special characters:\n\n```\n<os>|<arch>[/variant]|<os>/<arch>[/variant]\n```\n\nbaz",
parser.parseConfigDescription(
"Some HTML entities &amp; special characters:\n\n<pre>&lt;os&gt;|&lt;arch&gt;[/variant]|&lt;os&gt;/&lt;arch&gt;[/variant]\n</pre>\n\nbaz"));

// TODO
// assertEquals("Example:\n\n```\nfoo\nbar\n```",
// parser.parseConfigDescription("Example:\n\n<pre>{@code\nfoo\nbar\n}</pre>"));
}

@Test
public void parseJavaDocWithCodeBlock() {
assertEquals("Example:\n\n```\nfoo\nbar\n```\n\nbaz",
parser.parseConfigDescription("Example:\n\n<pre>\nfoo\nbar\n</pre>\n\nbaz"));

assertEquals(
"Some HTML entities & special characters:\n\n```\n<os>|<arch>[/variant]|<os>/<arch>[/variant]\n```\n\nbaz",
parser.parseConfigDescription(
"Some HTML entities &amp; special characters:\n\n<pre>&lt;os&gt;|&lt;arch&gt;[/variant]|&lt;os&gt;/&lt;arch&gt;[/variant]\n</pre>\n\nbaz"));

// TODO
// assertEquals("Example:\n\n```\nfoo\nbar\n```",
// parser.parseConfigDescription("Example:\n\n<pre>{@code\nfoo\nbar\n}</pre>"));
}

@Test
public void asciidoc() {
String asciidoc = "== My Asciidoc\n" +
Expand Down Expand Up @@ -308,4 +336,25 @@ public void escapeBrackets(String ch) {
assertEquals(expected, actual);
}

@Test
void trim() {
assertEquals("+ \nfoo", JavaDocParser.trim(new StringBuilder("+ \nfoo")));
assertEquals("+", JavaDocParser.trim(new StringBuilder(" +")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder(" +\nfoo")));
assertEquals("foo +", JavaDocParser.trim(new StringBuilder("foo +")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder("foo")));
assertEquals("+", JavaDocParser.trim(new StringBuilder("+ \n")));
assertEquals("+", JavaDocParser.trim(new StringBuilder(" +\n+ \n")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" +\n")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder(" \n\tfoo")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder("foo \n\t")));
assertEquals("foo", JavaDocParser.trim(new StringBuilder(" \n\tfoo \n\t")));
assertEquals("", JavaDocParser.trim(new StringBuilder("")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" \n\t")));
assertEquals("+", JavaDocParser.trim(new StringBuilder(" +")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" +\n")));
assertEquals("", JavaDocParser.trim(new StringBuilder(" +\n +\n")));
assertEquals("foo +\nbar", JavaDocParser.trim(new StringBuilder(" foo +\nbar +\n")));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,13 @@ public class JibConfig {
* List of target platforms. Each platform is defined using the pattern:
*
* <pre>
* {@literal <os>|<arch>[/variant]|<os>/<arch>[/variant]}
* &lt;os>|&lt;arch>[/variant]|&lt;os>/&lt;arch>[/variant]
* </pre>
*
* for example:
*
* <pre>
* {@literal linux/amd64,linux/arm64/v8}
* linux/amd64,linux/arm64/v8
* </pre>
*
* If not specified, OS default is linux and architecture default is {@code amd64}.
Expand Down
Loading