-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed issue with confusion between html and already markdown documentation. Wrote unit tests Fixes #245 Signed-off-by: Nikolas Komonen <[email protected]>
- Loading branch information
1 parent
c8193b2
commit 9d8e118
Showing
9 changed files
with
269 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
80 changes: 80 additions & 0 deletions
80
org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/MarkdownConverter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2016-2017 Red Hat Inc. and others. | ||
* All rights reserved. This program and the accompanying materials | ||
* are made available under the terms of the Eclipse Public License v1.0 | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/legal/epl-v10.html | ||
* | ||
* Contributors: | ||
* Red Hat, Inc. - initial API and implementation | ||
*******************************************************************************/ | ||
package org.eclipse.lsp4xml.utils; | ||
|
||
import static org.apache.commons.lang3.StringEscapeUtils.unescapeJava; | ||
import static org.apache.commons.lang3.StringEscapeUtils.unescapeXml; | ||
|
||
import java.lang.reflect.Field; | ||
import java.util.logging.Logger; | ||
import java.util.regex.Pattern; | ||
|
||
import com.overzealous.remark.Options; | ||
import com.overzealous.remark.Options.FencedCodeBlocks; | ||
import com.overzealous.remark.Options.Tables; | ||
import com.overzealous.remark.Remark; | ||
|
||
import org.jsoup.safety.Cleaner; | ||
import org.jsoup.safety.Whitelist; | ||
|
||
/** | ||
* Converts HTML content into Markdown equivalent. | ||
* | ||
* @author Fred Bricon | ||
*/ | ||
public class MarkdownConverter { | ||
|
||
private static final Logger LOGGER = Logger.getLogger(MarkdownConverter.class.getName()); | ||
|
||
private static Remark remark; | ||
|
||
//Pattern looking for any form of tag eg: <head> | ||
private static final Pattern markdownPattern = Pattern.compile("`[^`]*<[a-z][\\s\\S]*>[^`]*`"); | ||
|
||
private MarkdownConverter(){ | ||
//no public instanciation | ||
} | ||
|
||
static { | ||
Options options = new Options(); | ||
options.tables = Tables.CONVERT_TO_CODE_BLOCK; | ||
options.hardwraps = true; | ||
options.inlineLinks = true; | ||
options.autoLinks = true; | ||
options.reverseHtmlSmartPunctuation = true; | ||
options.fencedCodeBlocks = FencedCodeBlocks.ENABLED_BACKTICK; | ||
remark = new Remark(options); | ||
//Stop remark from stripping file protocol in an href | ||
try { | ||
Field cleanerField = Remark.class.getDeclaredField("cleaner"); | ||
cleanerField.setAccessible(true); | ||
|
||
Cleaner c = (Cleaner) cleanerField.get(remark); | ||
|
||
Field whitelistField = Cleaner.class.getDeclaredField("whitelist"); | ||
whitelistField.setAccessible(true); | ||
|
||
Whitelist w = (Whitelist) whitelistField.get(c); | ||
|
||
w.addProtocols("a", "href", "file"); | ||
} catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) { | ||
LOGGER.severe("Unable to modify jsoup to include file protocols "+ e.getMessage()); | ||
} | ||
} | ||
|
||
public static String convert(String html) { | ||
if(!StringUtils.isTagOutsideOfBackticks(html)) { | ||
return unescapeXml(html); // is not html so it can be returned as is (aside from unescaping) | ||
} | ||
return unescapeJava(remark.convert(html)); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/utils/MarkdownConverterTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2019 Red Hat Inc. and others. | ||
* All rights reserved. This program and the accompanying materials | ||
* which accompanies this distribution, and is available at | ||
* http://www.eclipse.org/legal/epl-v20.html | ||
* | ||
* Contributors: | ||
* Red Hat Inc. - initial API and implementation | ||
*******************************************************************************/ | ||
|
||
package org.eclipse.lsp4xml.utils; | ||
|
||
import static org.eclipse.lsp4xml.utils.MarkdownConverter.convert; | ||
import static org.junit.Assert.assertEquals; | ||
|
||
import org.junit.Test; | ||
|
||
/** | ||
* MarkdownConverterTest | ||
*/ | ||
public class MarkdownConverterTest { | ||
|
||
@Test | ||
public void testHTMLConversion() { | ||
assertEquals("This is `my code`", convert("This is <code>my code</code>")); | ||
assertEquals("This is\n**bold**", convert("This is<br><b>bold</b>")); | ||
assertEquals("The `<project>` element is the root of the descriptor.", convert("The <code><project></code> element is the root of the descriptor.")); | ||
assertEquals("# Hey Man #", convert("<h1>Hey Man</h1>")); | ||
assertEquals("[Placeholder](https://www.xml.com)", convert("<a href=\"https://www.xml.com\">Placeholder</a>")); | ||
|
||
String htmlList = | ||
"<ul>\n" + | ||
" <li>Coffee</li>\n" + | ||
" <li>Tea</li>\n" + | ||
" <li>Milk</li>\n" + | ||
"</ul>"; | ||
String expectedList = | ||
" * Coffee\n" + | ||
" * Tea\n" + | ||
" * Milk"; | ||
assertEquals(expectedList, convert(htmlList)); | ||
assertEquals("ONLY_THIS_TEXT", convert("<p>ONLY_THIS_TEXT</p>")); | ||
|
||
String multilineHTML = | ||
"multi\n" + | ||
"line\n" + | ||
"<code>HTML</code>\n" + | ||
"stuff"; | ||
assertEquals("multi line `HTML` stuff", convert(multilineHTML)); | ||
|
||
String multilineHTML2 = | ||
"<p>multi<p>\n" + | ||
"line\n" + | ||
"<code>HTML</code>\n" + | ||
"stuff"; | ||
String multilineHTML2Expected = | ||
"multi\n" + | ||
"\n" + | ||
"line `HTML` stuff"; | ||
assertEquals(multilineHTML2Expected, convert(multilineHTML2)); | ||
} | ||
|
||
@Test | ||
public void testMarkdownConversion() { | ||
assertEquals("This is `my code`", convert("This is `my code`")); | ||
assertEquals("The `<thing>` element is the root of the descriptor.", convert("The `<thing>` element is the root of the descriptor.")); | ||
assertEquals("The `<project>` element is the root of the descriptor.", convert("The `<project>` element is the root of the descriptor.")); | ||
} | ||
|
||
} |
Oops, something went wrong.