Skip to content

Commit

Permalink
Added double {} around all symbols with capital letters to avoid lowe…
Browse files Browse the repository at this point in the history
…rcasing in titles. Fixed JabRef#405
  • Loading branch information
oscargus committed Apr 19, 2016
1 parent 0266a8e commit e7205f5
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 89 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by
- Added integrity check detecting HTML-encoded characters

### Fixed
- Fixed [#405](https://github.com/JabRef/jabref/issues/405): Added more {} around capital letters in Unicode/HTML to LaTeX conversion to preserve them

### Removed

Expand Down Expand Up @@ -91,7 +92,6 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by
- Swedish is added as a language option (still not a complete translation)

### Fixed
- Fixed [#405](https://github.com/JabRef/jabref/issues/405): Added more {} around capital letters in Unicode/HTML to LaTeX conversion to preserve them
- Fixed [#318](https://github.com/JabRef/jabref/issues/318): Improve normalization of author names
- Fixed [#598](https://github.com/JabRef/jabref/issues/598) and [#402](https://github.com/JabRef/jabref/issues/402): No more issues with invalid icons for ExternalFileTypes in global search or after editing the settings
- Fixed [#883](https://github.com/JabRef/jabref/issues/883): No NPE during cleanup
Expand Down
53 changes: 11 additions & 42 deletions src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import net.sf.jabref.logic.layout.format.HTMLChars;
import net.sf.jabref.logic.layout.format.LatexToUnicodeFormatter;
import net.sf.jabref.logic.search.SearchQuery;
import net.sf.jabref.logic.util.strings.HTMLUnicodeConversionMaps;
import net.sf.jabref.model.database.BibDatabase;
import net.sf.jabref.model.database.BibDatabaseMode;
import net.sf.jabref.model.database.BibDatabaseModeDetection;
Expand All @@ -40,8 +39,8 @@ public class Benchmarks {

private String bibtexString;
private final BibDatabase database = new BibDatabase();
private final List<String> latexConversionStrings = new ArrayList<>();
private final List<String> htmlConversionStrings = new ArrayList<>();
private String latexConversionString;
private String htmlConversionString;

@Setup
public void init() throws IOException, SaveException {
Expand All @@ -66,27 +65,9 @@ public void init() throws IOException, SaveException {
new SavePreferences());
bibtexString = stringWriter.toString();

List<String> latexSymbols = new ArrayList<>(HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.values());
int symbolcount = latexSymbols.size();
StringBuilder sb = new StringBuilder();
sb.append("{A} \\textbf{bold} ");
sb.append(latexSymbols.get(Math.abs(randomizer.nextInt() % symbolcount)));
sb.append(" {\\it italic} {");
sb.append(latexSymbols.get(Math.abs(randomizer.nextInt() % symbolcount)));
sb.append(latexSymbols.get(Math.abs(randomizer.nextInt() % symbolcount)));
sb.append("} abc");
latexConversionStrings.add(sb.toString());

List<String> htmlSymbols = new ArrayList<>(HTMLUnicodeConversionMaps.HTML_LATEX_CONVERSION_MAP.keySet());
symbolcount = htmlSymbols.size();
sb = new StringBuilder();
sb.append("A <b>bold</b> ");
sb.append(htmlSymbols.get(Math.abs(randomizer.nextInt() % symbolcount)));
sb.append(" <it>italic</it> ");
sb.append(htmlSymbols.get(Math.abs(randomizer.nextInt() % symbolcount)));
sb.append(htmlSymbols.get(Math.abs(randomizer.nextInt() % symbolcount)));
sb.append("&#8211; abc");
htmlConversionStrings.add(sb.toString());
latexConversionString = "{A} \\textbf{bold} approach {\\it to} ${{\\Sigma}}{\\Delta}$ modulator \\textsuperscript{2} \\$";

htmlConversionString = "<b>&Ouml;sterreich</b> &#8211; &amp; characters &#x2aa2; <i>italic</i>";
}

@Benchmark
Expand Down Expand Up @@ -122,33 +103,21 @@ public BibDatabaseMode inferBibDatabaseMode() {
}

@Benchmark
public List<String> latexToUnicodeConversion() {
List<String> result = new ArrayList<>(1000);
public String latexToUnicodeConversion() {
LatexToUnicodeFormatter f = new LatexToUnicodeFormatter();
for (String s : latexConversionStrings) {
result.add(f.format(s));
}
return result;
return f.format(latexConversionString);
}

@Benchmark
public List<String> latexToHTMLConversion() {
List<String> result = new ArrayList<>(1000);
public String latexToHTMLConversion() {
HTMLChars f = new HTMLChars();
for (String s : latexConversionStrings) {
result.add(f.format(s));
}
return result;
return f.format(latexConversionString);
}

@Benchmark
public List<String> htmlToLatexConversion() {
List<String> result = new ArrayList<>(1000);
public String htmlToLatexConversion() {
HtmlToLatexFormatter f = new HtmlToLatexFormatter();
for (String s : htmlConversionStrings) {
result.add(f.format(s));
}
return result;
return f.format(htmlConversionString);
}

public static void main(String[] args) throws IOException, RunnerException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,38 +211,38 @@ public class HTMLUnicodeConversionMaps {

/* Greek */
{"913", "Alpha", "{{$\\Alpha$}}"}, // greek capital letter alpha, U+0391
{"914", "Beta", "{$\\Beta$}"}, // greek capital letter beta, U+0392
{"915", "Gamma", "{$\\Gamma$}"}, // greek capital letter gamma,
{"914", "Beta", "{{$\\Beta$}}"}, // greek capital letter beta, U+0392
{"915", "Gamma", "{{$\\Gamma$}}"}, // greek capital letter gamma,
// U+0393 ISOgrk3
{"916", "Delta", "{$\\Delta$}"}, // greek capital letter delta,
{"916", "Delta", "{{$\\Delta$}}"}, // greek capital letter delta,
// U+0394 ISOgrk3
{"917", "Epsilon", "{$\\Epsilon$}"}, // greek capital letter epsilon, U+0395
{"918", "Zeta", "{$\\Zeta$}"}, // greek capital letter zeta, U+0396
{"919", "Eta", "{$\\Eta$}"}, // greek capital letter eta, U+0397
{"920", "Theta", "{$\\Theta$}"}, // greek capital letter theta,
{"917", "Epsilon", "{{$\\Epsilon$}}"}, // greek capital letter epsilon, U+0395
{"918", "Zeta", "{{$\\Zeta$}}"}, // greek capital letter zeta, U+0396
{"919", "Eta", "{{$\\Eta$}}"}, // greek capital letter eta, U+0397
{"920", "Theta", "{{$\\Theta$}}"}, // greek capital letter theta,
// U+0398 ISOgrk3
{"921", "Iota", "{$\\Iota$}"}, // greek capital letter iota, U+0399
{"922", "Kappa", "{$\\Kappa$}"}, // greek capital letter kappa, U+039A
{"923", "Lambda", "{$\\Lambda$}"}, // greek capital letter lambda,
{"921", "Iota", "{{$\\Iota$}}"}, // greek capital letter iota, U+0399
{"922", "Kappa", "{{$\\Kappa$}}"}, // greek capital letter kappa, U+039A
{"923", "Lambda", "{{$\\Lambda$}}"}, // greek capital letter lambda,
// U+039B ISOgrk3
{"924", "Mu", "{$\\Mu$}"}, // greek capital letter mu, U+039C
{"925", "Nu", "{$\\Nu$}"}, // greek capital letter nu, U+039D
{"926", "Xi", "{$\\Xi$}"}, // greek capital letter xi, U+039E ISOgrk3
{"927", "Omicron", "{$\\Omicron$}"}, // greek capital letter omicron, U+039F
{"928", "Pi", "{$\\Pi$}"}, // greek capital letter pi, U+03A0 ISOgrk3
{"929", "Rho", "{$\\Rho$}"}, // greek capital letter rho, U+03A1
{"924", "Mu", "{{$\\Mu$}}"}, // greek capital letter mu, U+039C
{"925", "Nu", "{{$\\Nu$}}"}, // greek capital letter nu, U+039D
{"926", "Xi", "{{$\\Xi$}}"}, // greek capital letter xi, U+039E ISOgrk3
{"927", "Omicron", "{{$\\Omicron$}}"}, // greek capital letter omicron, U+039F
{"928", "Pi", "{{$\\Pi$}}"}, // greek capital letter pi, U+03A0 ISOgrk3
{"929", "Rho", "{{$\\Rho$}}"}, // greek capital letter rho, U+03A1
/* there is no Sigmaf, and no U+03A2 character either */
{"931", "Sigma", "{$\\Sigma$}"}, // greek capital letter sigma,
{"931", "Sigma", "{{$\\Sigma$}}"}, // greek capital letter sigma,
// U+03A3 ISOgrk3
{"932", "Tau", "{$\\Tau$}"}, // greek capital letter tau, U+03A4
{"933", "Upsilon", "{$\\Upsilon$}"}, // greek capital letter upsilon,
{"932", "Tau", "{{$\\Tau$}}"}, // greek capital letter tau, U+03A4
{"933", "Upsilon", "{{$\\Upsilon$}}"}, // greek capital letter upsilon,
// U+03A5 ISOgrk3
{"934", "Phi", "{$\\Phi$}"}, // greek capital letter phi,
{"934", "Phi", "{{$\\Phi$}}"}, // greek capital letter phi,
// U+03A6 ISOgrk3
{"935", "Chi", "{$\\Chi$}"}, // greek capital letter chi, U+03A7
{"936", "Psi", "{$\\Psi$}"}, // greek capital letter psi,
{"935", "Chi", "{{$\\Chi$}}"}, // greek capital letter chi, U+03A7
{"936", "Psi", "{{$\\Psi$}}"}, // greek capital letter psi,
// U+03A8 ISOgrk3
{"937", "Omega", "{$\\Omega$}"}, // greek capital letter omega,
{"937", "Omega", "{{$\\Omega$}}"}, // greek capital letter omega,
// U+03A9 ISOgrk3

{"945", "alpha", "$\\alpha$"}, // greek small letter alpha,
Expand Down Expand Up @@ -286,7 +286,7 @@ public class HTMLUnicodeConversionMaps {
{"", "thetav", "$\\vartheta$"}, // greek small letter theta symbol,
{"", "vartheta", "$\\vartheta$"}, // greek small letter theta symbol,
// U+03D1 NEW
{"978", "upsih", "{$\\Upsilon$}"}, // greek upsilon with hook symbol,
{"978", "upsih", "{{$\\Upsilon$}}"}, // greek upsilon with hook symbol,
// U+03D2 NEW
{"982", "piv", "$\\varphi$"}, // greek pi symbol, U+03D6 ISOgrk3

Expand All @@ -306,9 +306,9 @@ public class HTMLUnicodeConversionMaps {
/* Letterlike Symbols */
{"8472", "weierp", "$\\wp$"}, // script capital P = power set
// = Weierstrass p, U+2118 ISOamso
{"8465", "image", "{$\\Im$}"}, // blackletter capital I = imaginary part,
{"8465", "image", "{{$\\Im$}}"}, // blackletter capital I = imaginary part,
// U+2111 ISOamso
{"8476", "real", "{$\\Re$}"}, // blackletter capital R = real part symbol,
{"8476", "real", "{{$\\Re$}}"}, // blackletter capital R = real part symbol,
// U+211C ISOamso
{"8482", "trade", "{\\texttrademark}"}, // trade mark sign, U+2122 ISOnum
{"8501", "alefsym", "$\\aleph$"}, // alef symbol = first transfinite cardinal,
Expand All @@ -323,18 +323,18 @@ public class HTMLUnicodeConversionMaps {
{"8596", "harr", "$\\leftrightarrow$"}, // left right arrow, U+2194 ISOamsa
{"8629", "crarr", "$\\dlsh$"}, // downwards arrow with corner leftwards
// = carriage return, U+21B5 NEW - require mathabx
{"8656", "lArr", "{$\\Leftarrow$}"}, // leftwards double arrow, U+21D0 ISOtech
{"8656", "lArr", "{{$\\Leftarrow$}}"}, // leftwards double arrow, U+21D0 ISOtech
/* ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
but also does not have any other character for that function. So ? lArr can
be used for 'is implied by' as ISOtech suggests */
{"8657", "uArr", "{$\\Uparrow$}"}, // upwards double arrow, U+21D1 ISOamsa
{"8658", "rArr", "{$\\Rightarrow$}"}, // rightwards double arrow,
{"8657", "uArr", "{{$\\Uparrow$}}"}, // upwards double arrow, U+21D1 ISOamsa
{"8658", "rArr", "{{$\\Rightarrow$}}"}, // rightwards double arrow,
// U+21D2 ISOtech
/* ISO 10646 does not say this is the 'implies' character but does not have
another character with this function so ?
rArr can be used for 'implies' as ISOtech suggests */
{"8659", "dArr", "{$\\Downarrow$}"}, // downwards double arrow, U+21D3 ISOamsa
{"8660", "hArr", "{$\\Leftrightarrow$}"}, // left right double arrow,
{"8659", "dArr", "{{$\\Downarrow$}}"}, // downwards double arrow, U+21D3 ISOamsa
{"8660", "hArr", "{{$\\Leftrightarrow$}}"}, // left right double arrow,
// U+21D4 ISOamsa

/* Mathematical Operators */
Expand Down Expand Up @@ -575,10 +575,10 @@ public class HTMLUnicodeConversionMaps {
{"729", "dot", "{\\\\.{}}"}, // Dot above
{"730", "ring", "{\\r{}}"}, // Ring above
{"731", "ogon", "{\\k{}}"}, // Ogonek
{"733", "dblac", "{\\H{}}"}, // Double acute
{"733", "dblac", "{{\\H{}}}"}, // Double acute
{"949", "epsi", "$\\epsilon$"}, // Epsilon - double check
{"1013", "epsiv", "$\\varepsilonup$"}, // lunate epsilon, requires txfonts
{"1055", "", "{\\cyrchar\\CYRP}"}, // Cyrillic capital Pe
{"1055", "", "{{\\cyrchar\\CYRP}}"}, // Cyrillic capital Pe
{"1082", "", "{\\cyrchar\\cyrk}"}, // Cyrillic small Ka
// {"2013", "", ""}, // NKO letter FA -- Maybe en dash = 0x2013?
// {"2014", "", ""}, // NKO letter FA -- Maybe em dash = 0x2014?
Expand All @@ -593,17 +593,17 @@ public class HTMLUnicodeConversionMaps {
{"8244", "", "{\\prime\\prime\\prime}"}, // triple prime
{"8251", "", "{\\textreferencemark}"}, {"8253", "", "{\\textinterrobang}"},
{"8450", "complexes", "$\\mathbb{C}$"}, // double struck capital C -- requires e.g. amsfonts
{"8451", "", "$\\deg${C}"}, // Degree Celsius
{"8459", "Hscr", "$\\mathcal{H}$"}, // script capital H -- possibly use \mathscr
{"8460", "Hfr", "$\\mathbb{H}$"}, // black letter capital H -- requires e.g. amsfonts
{"8466", "Lscr", "$\\mathcal{L}$"}, // script capital L -- possibly use \mathscr
{"8451", "", "$\\deg${{C}}"}, // Degree Celsius
{"8459", "Hscr", "{{$\\mathcal{H}}}$"}, // script capital H -- possibly use \mathscr
{"8460", "Hfr", "{{$\\mathbb{H}$}}"}, // black letter capital H -- requires e.g. amsfonts
{"8466", "Lscr", "{{$\\mathcal{L}$}}"}, // script capital L -- possibly use \mathscr
{"8467", "ell", "{\\ell}"}, // script small l
{"8469", "naturals", "$\\mathbb{N}$"}, // double struck capital N -- requires e.g. amsfonts
{"8474", "Qopf", "$\\mathbb{Q}$"}, // double struck capital Q -- requires e.g. amsfonts
{"8477", "reals", "$\\mathbb{R}$"}, // double struck capital R -- requires e.g. amsfonts
{"8486", "", "${\\Omega}$"}, // Omega
{"8469", "naturals", "{{$\\mathbb{N}$}}"}, // double struck capital N -- requires e.g. amsfonts
{"8474", "Qopf", "{{$\\mathbb{Q}$}}"}, // double struck capital Q -- requires e.g. amsfonts
{"8477", "reals", "{{$\\mathbb{R}$}}"}, // double struck capital R -- requires e.g. amsfonts
{"8486", "", "${{\\Omega}}$"}, // Omega
{"8491", "angst", "{{\\AA}}"}, // Angstrom
{"8496", "Escr", "$\\mathcal{E}$"}, // script capital E
{"8496", "Escr", "{{$\\mathcal{E}$}}"}, // script capital E
{"8531", "frac13", "$\\sfrac{1}{3}$"}, // Vulgar fraction one third
{"8532", "frac23", "$\\sfrac{2}{3}$"}, // Vulgar fraction two thirds
{"8533", "frac15", "$\\sfrac{1}{5}$"}, // Vulgar fraction one fifth
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class CleanupWorkerTest {
@Rule
public TemporaryFolder bibFolder = new TemporaryFolder();

private CleanupPreset emptyPreset = new CleanupPreset(EnumSet.noneOf(CleanupPreset.CleanupStep.class));
private final CleanupPreset emptyPreset = new CleanupPreset(EnumSet.noneOf(CleanupPreset.CleanupStep.class));
private CleanupWorker worker;
private File pdfFolder;

Expand Down Expand Up @@ -248,7 +248,7 @@ public void cleanupHtmlToLatexConvertsEpsilonToLatex() {
entry.setField("title", "&Epsilon;");

worker.cleanup(preset, entry);
Assert.assertEquals("{$\\Epsilon$}", entry.getField("title"));
Assert.assertEquals("{{$\\Epsilon$}}", entry.getField("title"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public void testHTML() {
assertEquals("{\\\"{a}}", formatter.format("&auml;"));
assertEquals("{\\\"{a}}", formatter.format("&#228;"));
assertEquals("{\\\"{a}}", formatter.format("&#xe4;"));
assertEquals("{$\\Epsilon$}", formatter.format("&Epsilon;"));
assertEquals("{{$\\Epsilon$}}", formatter.format("&Epsilon;"));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public void testUnicodeCombiningAccents() {
@Test
public void testUnicode() {
assertEquals("{\\\"{a}}", formatter.format("ä"));
assertEquals("{$\\Epsilon$}", formatter.format("\u0395"));
assertEquals("{{$\\Epsilon$}}", formatter.format("\u0395"));
}

@Test
Expand Down

0 comments on commit e7205f5

Please sign in to comment.