diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a3367d903e2..366ff4f78f6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by - Added integrity check detecting HTML-encoded characters ### Fixed +- Fixed [#405](https://github.com/JabRef/jabref/issues/405): Added more {} around capital letters in Unicode/HTML to LaTeX conversion to preserve them ### Removed @@ -91,7 +92,6 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by - Swedish is added as a language option (still not a complete translation) ### Fixed -- Fixed [#405](https://github.com/JabRef/jabref/issues/405): Added more {} around capital letters in Unicode/HTML to LaTeX conversion to preserve them - Fixed [#318](https://github.com/JabRef/jabref/issues/318): Improve normalization of author names - Fixed [#598](https://github.com/JabRef/jabref/issues/598) and [#402](https://github.com/JabRef/jabref/issues/402): No more issues with invalid icons for ExternalFileTypes in global search or after editing the settings - Fixed [#883](https://github.com/JabRef/jabref/issues/883): No NPE during cleanup diff --git a/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java b/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java index 6a391a6fa904..bfd53437312e 100644 --- a/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java +++ b/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java @@ -22,7 +22,6 @@ import net.sf.jabref.logic.layout.format.HTMLChars; import net.sf.jabref.logic.layout.format.LatexToUnicodeFormatter; import net.sf.jabref.logic.search.SearchQuery; -import net.sf.jabref.logic.util.strings.HTMLUnicodeConversionMaps; import net.sf.jabref.model.database.BibDatabase; import net.sf.jabref.model.database.BibDatabaseMode; import net.sf.jabref.model.database.BibDatabaseModeDetection; @@ -40,8 +39,8 @@ public class Benchmarks { private String bibtexString; private final BibDatabase database = new BibDatabase(); - private final List latexConversionStrings = new ArrayList<>(); - private final List htmlConversionStrings = new ArrayList<>(); + private String latexConversionString; + private String htmlConversionString; @Setup public void init() throws IOException, SaveException { @@ -66,27 +65,9 @@ public void init() throws IOException, SaveException { new SavePreferences()); bibtexString = stringWriter.toString(); - List latexSymbols = new ArrayList<>(HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.values()); - int symbolcount = latexSymbols.size(); - StringBuilder sb = new StringBuilder(); - sb.append("{A} \\textbf{bold} "); - sb.append(latexSymbols.get(Math.abs(randomizer.nextInt() % symbolcount))); - sb.append(" {\\it italic} {"); - sb.append(latexSymbols.get(Math.abs(randomizer.nextInt() % symbolcount))); - sb.append(latexSymbols.get(Math.abs(randomizer.nextInt() % symbolcount))); - sb.append("} abc"); - latexConversionStrings.add(sb.toString()); - - List htmlSymbols = new ArrayList<>(HTMLUnicodeConversionMaps.HTML_LATEX_CONVERSION_MAP.keySet()); - symbolcount = htmlSymbols.size(); - sb = new StringBuilder(); - sb.append("A bold "); - sb.append(htmlSymbols.get(Math.abs(randomizer.nextInt() % symbolcount))); - sb.append(" italic "); - sb.append(htmlSymbols.get(Math.abs(randomizer.nextInt() % symbolcount))); - sb.append(htmlSymbols.get(Math.abs(randomizer.nextInt() % symbolcount))); - sb.append("– abc"); - htmlConversionStrings.add(sb.toString()); + latexConversionString = "{A} \\textbf{bold} approach {\\it to} ${{\\Sigma}}{\\Delta}$ modulator \\textsuperscript{2} \\$"; + + htmlConversionString = "Österreich – & characters ⪢ italic"; } @Benchmark @@ -122,33 +103,21 @@ public BibDatabaseMode inferBibDatabaseMode() { } @Benchmark - public List latexToUnicodeConversion() { - List result = new ArrayList<>(1000); + public String latexToUnicodeConversion() { LatexToUnicodeFormatter f = new LatexToUnicodeFormatter(); - for (String s : latexConversionStrings) { - result.add(f.format(s)); - } - return result; + return f.format(latexConversionString); } @Benchmark - public List latexToHTMLConversion() { - List result = new ArrayList<>(1000); + public String latexToHTMLConversion() { HTMLChars f = new HTMLChars(); - for (String s : latexConversionStrings) { - result.add(f.format(s)); - } - return result; + return f.format(latexConversionString); } @Benchmark - public List htmlToLatexConversion() { - List result = new ArrayList<>(1000); + public String htmlToLatexConversion() { HtmlToLatexFormatter f = new HtmlToLatexFormatter(); - for (String s : htmlConversionStrings) { - result.add(f.format(s)); - } - return result; + return f.format(htmlConversionString); } public static void main(String[] args) throws IOException, RunnerException { diff --git a/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java b/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java index 56165bb7bad8..170dd1132125 100644 --- a/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java +++ b/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java @@ -211,38 +211,38 @@ public class HTMLUnicodeConversionMaps { /* Greek */ {"913", "Alpha", "{{$\\Alpha$}}"}, // greek capital letter alpha, U+0391 - {"914", "Beta", "{$\\Beta$}"}, // greek capital letter beta, U+0392 - {"915", "Gamma", "{$\\Gamma$}"}, // greek capital letter gamma, + {"914", "Beta", "{{$\\Beta$}}"}, // greek capital letter beta, U+0392 + {"915", "Gamma", "{{$\\Gamma$}}"}, // greek capital letter gamma, // U+0393 ISOgrk3 - {"916", "Delta", "{$\\Delta$}"}, // greek capital letter delta, + {"916", "Delta", "{{$\\Delta$}}"}, // greek capital letter delta, // U+0394 ISOgrk3 - {"917", "Epsilon", "{$\\Epsilon$}"}, // greek capital letter epsilon, U+0395 - {"918", "Zeta", "{$\\Zeta$}"}, // greek capital letter zeta, U+0396 - {"919", "Eta", "{$\\Eta$}"}, // greek capital letter eta, U+0397 - {"920", "Theta", "{$\\Theta$}"}, // greek capital letter theta, + {"917", "Epsilon", "{{$\\Epsilon$}}"}, // greek capital letter epsilon, U+0395 + {"918", "Zeta", "{{$\\Zeta$}}"}, // greek capital letter zeta, U+0396 + {"919", "Eta", "{{$\\Eta$}}"}, // greek capital letter eta, U+0397 + {"920", "Theta", "{{$\\Theta$}}"}, // greek capital letter theta, // U+0398 ISOgrk3 - {"921", "Iota", "{$\\Iota$}"}, // greek capital letter iota, U+0399 - {"922", "Kappa", "{$\\Kappa$}"}, // greek capital letter kappa, U+039A - {"923", "Lambda", "{$\\Lambda$}"}, // greek capital letter lambda, + {"921", "Iota", "{{$\\Iota$}}"}, // greek capital letter iota, U+0399 + {"922", "Kappa", "{{$\\Kappa$}}"}, // greek capital letter kappa, U+039A + {"923", "Lambda", "{{$\\Lambda$}}"}, // greek capital letter lambda, // U+039B ISOgrk3 - {"924", "Mu", "{$\\Mu$}"}, // greek capital letter mu, U+039C - {"925", "Nu", "{$\\Nu$}"}, // greek capital letter nu, U+039D - {"926", "Xi", "{$\\Xi$}"}, // greek capital letter xi, U+039E ISOgrk3 - {"927", "Omicron", "{$\\Omicron$}"}, // greek capital letter omicron, U+039F - {"928", "Pi", "{$\\Pi$}"}, // greek capital letter pi, U+03A0 ISOgrk3 - {"929", "Rho", "{$\\Rho$}"}, // greek capital letter rho, U+03A1 + {"924", "Mu", "{{$\\Mu$}}"}, // greek capital letter mu, U+039C + {"925", "Nu", "{{$\\Nu$}}"}, // greek capital letter nu, U+039D + {"926", "Xi", "{{$\\Xi$}}"}, // greek capital letter xi, U+039E ISOgrk3 + {"927", "Omicron", "{{$\\Omicron$}}"}, // greek capital letter omicron, U+039F + {"928", "Pi", "{{$\\Pi$}}"}, // greek capital letter pi, U+03A0 ISOgrk3 + {"929", "Rho", "{{$\\Rho$}}"}, // greek capital letter rho, U+03A1 /* there is no Sigmaf, and no U+03A2 character either */ - {"931", "Sigma", "{$\\Sigma$}"}, // greek capital letter sigma, + {"931", "Sigma", "{{$\\Sigma$}}"}, // greek capital letter sigma, // U+03A3 ISOgrk3 - {"932", "Tau", "{$\\Tau$}"}, // greek capital letter tau, U+03A4 - {"933", "Upsilon", "{$\\Upsilon$}"}, // greek capital letter upsilon, + {"932", "Tau", "{{$\\Tau$}}"}, // greek capital letter tau, U+03A4 + {"933", "Upsilon", "{{$\\Upsilon$}}"}, // greek capital letter upsilon, // U+03A5 ISOgrk3 - {"934", "Phi", "{$\\Phi$}"}, // greek capital letter phi, + {"934", "Phi", "{{$\\Phi$}}"}, // greek capital letter phi, // U+03A6 ISOgrk3 - {"935", "Chi", "{$\\Chi$}"}, // greek capital letter chi, U+03A7 - {"936", "Psi", "{$\\Psi$}"}, // greek capital letter psi, + {"935", "Chi", "{{$\\Chi$}}"}, // greek capital letter chi, U+03A7 + {"936", "Psi", "{{$\\Psi$}}"}, // greek capital letter psi, // U+03A8 ISOgrk3 - {"937", "Omega", "{$\\Omega$}"}, // greek capital letter omega, + {"937", "Omega", "{{$\\Omega$}}"}, // greek capital letter omega, // U+03A9 ISOgrk3 {"945", "alpha", "$\\alpha$"}, // greek small letter alpha, @@ -286,7 +286,7 @@ public class HTMLUnicodeConversionMaps { {"", "thetav", "$\\vartheta$"}, // greek small letter theta symbol, {"", "vartheta", "$\\vartheta$"}, // greek small letter theta symbol, // U+03D1 NEW - {"978", "upsih", "{$\\Upsilon$}"}, // greek upsilon with hook symbol, + {"978", "upsih", "{{$\\Upsilon$}}"}, // greek upsilon with hook symbol, // U+03D2 NEW {"982", "piv", "$\\varphi$"}, // greek pi symbol, U+03D6 ISOgrk3 @@ -306,9 +306,9 @@ public class HTMLUnicodeConversionMaps { /* Letterlike Symbols */ {"8472", "weierp", "$\\wp$"}, // script capital P = power set // = Weierstrass p, U+2118 ISOamso - {"8465", "image", "{$\\Im$}"}, // blackletter capital I = imaginary part, + {"8465", "image", "{{$\\Im$}}"}, // blackletter capital I = imaginary part, // U+2111 ISOamso - {"8476", "real", "{$\\Re$}"}, // blackletter capital R = real part symbol, + {"8476", "real", "{{$\\Re$}}"}, // blackletter capital R = real part symbol, // U+211C ISOamso {"8482", "trade", "{\\texttrademark}"}, // trade mark sign, U+2122 ISOnum {"8501", "alefsym", "$\\aleph$"}, // alef symbol = first transfinite cardinal, @@ -323,18 +323,18 @@ public class HTMLUnicodeConversionMaps { {"8596", "harr", "$\\leftrightarrow$"}, // left right arrow, U+2194 ISOamsa {"8629", "crarr", "$\\dlsh$"}, // downwards arrow with corner leftwards // = carriage return, U+21B5 NEW - require mathabx - {"8656", "lArr", "{$\\Leftarrow$}"}, // leftwards double arrow, U+21D0 ISOtech + {"8656", "lArr", "{{$\\Leftarrow$}}"}, // leftwards double arrow, U+21D0 ISOtech /* ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */ - {"8657", "uArr", "{$\\Uparrow$}"}, // upwards double arrow, U+21D1 ISOamsa - {"8658", "rArr", "{$\\Rightarrow$}"}, // rightwards double arrow, + {"8657", "uArr", "{{$\\Uparrow$}}"}, // upwards double arrow, U+21D1 ISOamsa + {"8658", "rArr", "{{$\\Rightarrow$}}"}, // rightwards double arrow, // U+21D2 ISOtech /* ISO 10646 does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */ - {"8659", "dArr", "{$\\Downarrow$}"}, // downwards double arrow, U+21D3 ISOamsa - {"8660", "hArr", "{$\\Leftrightarrow$}"}, // left right double arrow, + {"8659", "dArr", "{{$\\Downarrow$}}"}, // downwards double arrow, U+21D3 ISOamsa + {"8660", "hArr", "{{$\\Leftrightarrow$}}"}, // left right double arrow, // U+21D4 ISOamsa /* Mathematical Operators */ @@ -575,10 +575,10 @@ public class HTMLUnicodeConversionMaps { {"729", "dot", "{\\\\.{}}"}, // Dot above {"730", "ring", "{\\r{}}"}, // Ring above {"731", "ogon", "{\\k{}}"}, // Ogonek - {"733", "dblac", "{\\H{}}"}, // Double acute + {"733", "dblac", "{{\\H{}}}"}, // Double acute {"949", "epsi", "$\\epsilon$"}, // Epsilon - double check {"1013", "epsiv", "$\\varepsilonup$"}, // lunate epsilon, requires txfonts - {"1055", "", "{\\cyrchar\\CYRP}"}, // Cyrillic capital Pe + {"1055", "", "{{\\cyrchar\\CYRP}}"}, // Cyrillic capital Pe {"1082", "", "{\\cyrchar\\cyrk}"}, // Cyrillic small Ka // {"2013", "", ""}, // NKO letter FA -- Maybe en dash = 0x2013? // {"2014", "", ""}, // NKO letter FA -- Maybe em dash = 0x2014? @@ -593,17 +593,17 @@ public class HTMLUnicodeConversionMaps { {"8244", "", "{\\prime\\prime\\prime}"}, // triple prime {"8251", "", "{\\textreferencemark}"}, {"8253", "", "{\\textinterrobang}"}, {"8450", "complexes", "$\\mathbb{C}$"}, // double struck capital C -- requires e.g. amsfonts - {"8451", "", "$\\deg${C}"}, // Degree Celsius - {"8459", "Hscr", "$\\mathcal{H}$"}, // script capital H -- possibly use \mathscr - {"8460", "Hfr", "$\\mathbb{H}$"}, // black letter capital H -- requires e.g. amsfonts - {"8466", "Lscr", "$\\mathcal{L}$"}, // script capital L -- possibly use \mathscr + {"8451", "", "$\\deg${{C}}"}, // Degree Celsius + {"8459", "Hscr", "{{$\\mathcal{H}}}$"}, // script capital H -- possibly use \mathscr + {"8460", "Hfr", "{{$\\mathbb{H}$}}"}, // black letter capital H -- requires e.g. amsfonts + {"8466", "Lscr", "{{$\\mathcal{L}$}}"}, // script capital L -- possibly use \mathscr {"8467", "ell", "{\\ell}"}, // script small l - {"8469", "naturals", "$\\mathbb{N}$"}, // double struck capital N -- requires e.g. amsfonts - {"8474", "Qopf", "$\\mathbb{Q}$"}, // double struck capital Q -- requires e.g. amsfonts - {"8477", "reals", "$\\mathbb{R}$"}, // double struck capital R -- requires e.g. amsfonts - {"8486", "", "${\\Omega}$"}, // Omega + {"8469", "naturals", "{{$\\mathbb{N}$}}"}, // double struck capital N -- requires e.g. amsfonts + {"8474", "Qopf", "{{$\\mathbb{Q}$}}"}, // double struck capital Q -- requires e.g. amsfonts + {"8477", "reals", "{{$\\mathbb{R}$}}"}, // double struck capital R -- requires e.g. amsfonts + {"8486", "", "${{\\Omega}}$"}, // Omega {"8491", "angst", "{{\\AA}}"}, // Angstrom - {"8496", "Escr", "$\\mathcal{E}$"}, // script capital E + {"8496", "Escr", "{{$\\mathcal{E}$}}"}, // script capital E {"8531", "frac13", "$\\sfrac{1}{3}$"}, // Vulgar fraction one third {"8532", "frac23", "$\\sfrac{2}{3}$"}, // Vulgar fraction two thirds {"8533", "frac15", "$\\sfrac{1}{5}$"}, // Vulgar fraction one fifth diff --git a/src/test/java/net/sf/jabref/logic/cleanup/CleanupWorkerTest.java b/src/test/java/net/sf/jabref/logic/cleanup/CleanupWorkerTest.java index 2100a13f603a..33a46a8e3a69 100644 --- a/src/test/java/net/sf/jabref/logic/cleanup/CleanupWorkerTest.java +++ b/src/test/java/net/sf/jabref/logic/cleanup/CleanupWorkerTest.java @@ -39,7 +39,7 @@ public class CleanupWorkerTest { @Rule public TemporaryFolder bibFolder = new TemporaryFolder(); - private CleanupPreset emptyPreset = new CleanupPreset(EnumSet.noneOf(CleanupPreset.CleanupStep.class)); + private final CleanupPreset emptyPreset = new CleanupPreset(EnumSet.noneOf(CleanupPreset.CleanupStep.class)); private CleanupWorker worker; private File pdfFolder; @@ -248,7 +248,7 @@ public void cleanupHtmlToLatexConvertsEpsilonToLatex() { entry.setField("title", "Ε"); worker.cleanup(preset, entry); - Assert.assertEquals("{$\\Epsilon$}", entry.getField("title")); + Assert.assertEquals("{{$\\Epsilon$}}", entry.getField("title")); } @Test diff --git a/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/HtmlToLatexFormatterTest.java b/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/HtmlToLatexFormatterTest.java index 3db99a42d85a..75785edb2365 100644 --- a/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/HtmlToLatexFormatterTest.java +++ b/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/HtmlToLatexFormatterTest.java @@ -45,7 +45,7 @@ public void testHTML() { assertEquals("{\\\"{a}}", formatter.format("ä")); assertEquals("{\\\"{a}}", formatter.format("ä")); assertEquals("{\\\"{a}}", formatter.format("ä")); - assertEquals("{$\\Epsilon$}", formatter.format("Ε")); + assertEquals("{{$\\Epsilon$}}", formatter.format("Ε")); } @Test diff --git a/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeConverterTest.java b/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeConverterTest.java index 9a222b6bbe66..4d14e4aa2895 100644 --- a/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeConverterTest.java +++ b/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeConverterTest.java @@ -35,7 +35,7 @@ public void testUnicodeCombiningAccents() { @Test public void testUnicode() { assertEquals("{\\\"{a}}", formatter.format("รค")); - assertEquals("{$\\Epsilon$}", formatter.format("\u0395")); + assertEquals("{{$\\Epsilon$}}", formatter.format("\u0395")); } @Test