diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dd3c72536d..7f00571c81f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - [#1026](https://github.com/JabRef/jabref/issues/1026) JabRef does no longer delete user comments outside of BibTeX entries and strings ### Fixed +- Fixed [#1264](https://github.com/JabRef/jabref/issues/1264): S with caron does not render correctly +- LaTeX to Unicode converter now handles combining accents ### Removed diff --git a/src/main/java/net/sf/jabref/Globals.java b/src/main/java/net/sf/jabref/Globals.java index 6e8577b5ed1..7fb2cf06a5b 100644 --- a/src/main/java/net/sf/jabref/Globals.java +++ b/src/main/java/net/sf/jabref/Globals.java @@ -53,8 +53,8 @@ public class Globals { public static final ImportFormatReader IMPORT_FORMAT_READER = new ImportFormatReader(); - - public static final String SPECIAL_COMMAND_CHARS = "\"`^~'c="; + // Non-letters which are used to denote accents in LaTeX-commands, e.g., in {\"{a}} + public static final String SPECIAL_COMMAND_CHARS = "\"`^~'=.|"; // In the main program, this field is initialized in JabRef.java // Each test case initializes this field if required diff --git a/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java b/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java index 7c366134364..793fc1deecf 100644 --- a/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java +++ b/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java @@ -15,8 +15,8 @@ */ package net.sf.jabref.logic.formatter.bibtexfields; +import java.util.Map; import java.util.Objects; -import java.util.Set; import net.sf.jabref.logic.formatter.Formatter; import net.sf.jabref.logic.l10n.Localization; @@ -39,10 +39,9 @@ public String format(String text) { } // Standard symbols - Set chars = HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.keySet(); - for (Character character : chars) { - result = result.replace(character.toString(), - HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.get(character)); + for (Map.Entry unicodeLatexPair : HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP + .entrySet()) { + result = result.replace(unicodeLatexPair.getKey(), unicodeLatexPair.getValue()); } // Combining accents diff --git a/src/main/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatter.java b/src/main/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatter.java index abe2818fc41..821c70b882c 100644 --- a/src/main/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatter.java +++ b/src/main/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatter.java @@ -31,6 +31,7 @@ public class LatexToUnicodeFormatter implements LayoutFormatter, Formatter { private static final Map CHARS = HTMLUnicodeConversionMaps.LATEX_UNICODE_CONVERSION_MAP; + private static final Map ACCENTS = HTMLUnicodeConversionMaps.UNICODE_ESCAPED_ACCENTS; @Override public String getName() { @@ -74,6 +75,7 @@ public String format(String inField) { } else { sb.append((String) result); } + } escaped = true; incommand = true; @@ -88,28 +90,34 @@ public String format(String inField) { sb.append(c); } else { currentCommand.append(c); - testCharCom: if ((currentCommand.length() == 1) - && Globals.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())) { + if ((currentCommand.length() == 1) + && Globals.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString()) + && !(i >= (field.length() - 1))) { // This indicates that we are in a command of the type // \^o or \~{n} - if (i >= (field.length() - 1)) { - break testCharCom; - } String command = currentCommand.toString(); i++; c = field.charAt(i); - String combody; + String commandBody; if (c == '{') { String part = StringUtil.getPart(field, i, false); i += part.length(); - combody = part; + commandBody = part; } else { - combody = field.substring(i, i + 1); + commandBody = field.substring(i, i + 1); } - Object result = LatexToUnicodeFormatter.CHARS.get(command + combody); + Object result = LatexToUnicodeFormatter.CHARS.get(command + commandBody); - if (result != null) { + if (result == null) { + // Use combining accents if argument is single character or empty + if (commandBody.length() <= 1) { + String accent = LatexToUnicodeFormatter.ACCENTS.get(command); + if (accent != null) { + sb.append(commandBody).append(accent); + } + } + } else { sb.append((String) result); } @@ -152,28 +160,43 @@ public String format(String inField) { if (argument != null) { // handle common case of general latex command Object result = LatexToUnicodeFormatter.CHARS.get(command + argument); + // If found, then use translated version. If not, then keep // the // text of the parameter intact. if (result == null) { - sb.append(argument); + // Use combining accents if argument is single character or empty + if (argument.length() <= 1) { + String accent = LatexToUnicodeFormatter.ACCENTS.get(command); + if (accent != null) { + sb.append(argument).append(accent); + } else { + sb.append(argument); + } + } else { + sb.append(argument); + } } else { sb.append((String) result); } + } } else if (c == '}') { // This end brace terminates a command. This can be the case in // constructs like {\aa}. The correct behaviour should be to // substitute the evaluated command and swallow the brace: Object result = LatexToUnicodeFormatter.CHARS.get(command); + if (result == null) { // If the command is unknown, just print it: sb.append(command); } else { sb.append((String) result); } + } else { Object result = LatexToUnicodeFormatter.CHARS.get(command); + if (result == null) { sb.append(command); } else { diff --git a/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java b/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java index 93695cfa27e..c74213c7900 100644 --- a/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java +++ b/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java @@ -509,14 +509,14 @@ public class HTMLUnicodeConversionMaps { {"", "VerticalLine", "|"}, // Vertical bar {"125", "rbrace", "\\}"}, // Right curly bracket {"", "rcub", "\\}"}, // Right curly bracket - {"138", "", "{{\\v{S}}}"}, // Line tabulation set + // {"138", "", "{{\\v{S}}}"}, // Line tabulation set // {"141", "", ""}, // Reverse line feed {"145", "", "`"}, // Apostrophe {"146", "", "'"}, // Apostrophe {"147", "", "``"}, // Quotation mark {"148", "", "''"}, // Quotation mark {"150", "", "--"}, // En dash - {"154", "", "{\\v{s}}"}, // Single character introducer + // {"154", "", "{\\v{s}}"}, // Single character introducer {"260", "Aogon", "{{\\k{A}}}"}, // capital A with ogonek {"261", "aogon", "{\\k{a}}"}, // small a with ogonek {"262", "Cacute", "{{\\'{C}}}"}, // capital C with acute @@ -574,7 +574,7 @@ public class HTMLUnicodeConversionMaps { {"", "Hacek", "{\\v{}}"}, // Caron {"728", "breve", "{\\u{}}"}, // Breve {"", "Breve", "{\\u{}}"}, // Breve - {"729", "dot", "{\\\\.{}}"}, // Dot above + {"729", "dot", "{\\.{}}"}, // Dot above {"730", "ring", "{\\r{}}"}, // Ring above {"731", "ogon", "{\\k{}}"}, // Ogonek {"733", "dblac", "{{\\H{}}}"}, // Double acute @@ -744,8 +744,9 @@ public class HTMLUnicodeConversionMaps { public static final Map HTML_LATEX_CONVERSION_MAP = new HashMap<>(); public static final Map ESCAPED_ACCENTS = new HashMap<>(); + public static final Map UNICODE_ESCAPED_ACCENTS = new HashMap<>(); public static final Map NUMERICAL_LATEX_CONVERSION_MAP = new HashMap<>(); - public static final Map UNICODE_LATEX_CONVERSION_MAP = new HashMap<>(); + public static final Map UNICODE_LATEX_CONVERSION_MAP = new HashMap<>(); public static final Map LATEX_HTML_CONVERSION_MAP = new HashMap<>(); public static final Map LATEX_UNICODE_CONVERSION_MAP = new HashMap<>(); @@ -765,10 +766,10 @@ public class HTMLUnicodeConversionMaps { if (!(aConversionList[0].isEmpty())) { NUMERICAL_LATEX_CONVERSION_MAP.put(Integer.decode(aConversionList[0]), aConversionList[2]); if (Integer.decode(aConversionList[0]) > 128) { - Character c = (char) Integer.decode(aConversionList[0]).intValue(); - UNICODE_LATEX_CONVERSION_MAP.put(c, aConversionList[2]); + String unicodeSymbol = String.valueOf(Character.toChars(Integer.decode(aConversionList[0]))); + UNICODE_LATEX_CONVERSION_MAP.put(unicodeSymbol, aConversionList[2]); if (!strippedLaTeX.isEmpty()) { - LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, c.toString()); + LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, unicodeSymbol); } } } @@ -776,6 +777,8 @@ public class HTMLUnicodeConversionMaps { } for (String[] anAccentList : ACCENT_LIST) { ESCAPED_ACCENTS.put(Integer.decode(anAccentList[0]), anAccentList[1]); + UNICODE_ESCAPED_ACCENTS.put(anAccentList[1], + String.valueOf(Character.toChars(Integer.decode(anAccentList[0])))); } // Manually added values which are killed by cleanLaTeX LATEX_HTML_CONVERSION_MAP.put("$", "$"); @@ -784,6 +787,12 @@ public class HTMLUnicodeConversionMaps { // Manual corrections LATEX_HTML_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Å which is less supported LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol + + // Manual additions + // Support relax to the extent that it is simply removed + LATEX_HTML_CONVERSION_MAP.put("relax", ""); + LATEX_UNICODE_CONVERSION_MAP.put("relax", ""); + } private static String cleanLaTeX(String escapedString) { diff --git a/src/test/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatterTest.java b/src/test/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatterTest.java index a894506579b..2c6a98a92c8 100644 --- a/src/test/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatterTest.java +++ b/src/test/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatterTest.java @@ -51,9 +51,23 @@ public void testFormatStripLatexCommands() { } @Test - public void testEquations() { + public void testFormatTextit() { + // See #1464 + assertEquals("text", formatter.format("\\textit{text}")); + } + + @Test + public void testEscapedDollarSign() { assertEquals("$", formatter.format("\\$")); + } + + @Test + public void testEquationsSingleSymbol() { assertEquals("σ", formatter.format("$\\sigma$")); + } + + @Test + public void testEquationsMoreComplicatedFormatting() { assertEquals("A 32\u00A0mA ΣΔ-modulator", formatter.format("A 32~{mA} {$\\Sigma\\Delta$}-modulator")); } @@ -61,4 +75,26 @@ public void testEquations() { public void formatExample() { assertEquals("Mönch", formatter.format(formatter.getExampleInput())); } + + @Test + public void testChi() { + // See #1464 + assertEquals("χ", formatter.format("$\\chi$")); + } + + @Test + public void testSWithCaron() { + // Bug #1264 + assertEquals("Š", formatter.format("{\\v{S}}")); + } + + @Test + public void testCombiningAccentsCase1() { + assertEquals("ḩ", formatter.format("{\\c{h}}")); + } + + @Test + public void testCombiningAccentsCase2() { + assertEquals("a͍", formatter.format("\\spreadlips{a}")); + } }