Fixed #1264 (#1581)

* Fixed #1264 * Code improvement * Added tests * Fixed comments and a removed a code label
JabRef · Jul 15, 2016 · bc56ab6 · bc56ab6
1 parent aa42c16
commit bc56ab6
Show file tree

Hide file tree

Showing 6 changed files with 95 additions and 26 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
 - [#1026](https://github.com/JabRef/jabref/issues/1026) JabRef does no longer delete user comments outside of BibTeX entries and strings
 
 ### Fixed
+- Fixed [#1264](https://github.com/JabRef/jabref/issues/1264): S with caron does not render correctly
+- LaTeX to Unicode converter now handles combining accents
 
 ### Removed
 

diff --git a/src/main/java/net/sf/jabref/Globals.java b/src/main/java/net/sf/jabref/Globals.java
@@ -53,8 +53,8 @@ public class Globals {
     public static final ImportFormatReader IMPORT_FORMAT_READER = new ImportFormatReader();
 
 
-
-    public static final String SPECIAL_COMMAND_CHARS = "\"`^~'c=";
+    // Non-letters which are used to denote accents in LaTeX-commands, e.g., in {\"{a}}
+    public static final String SPECIAL_COMMAND_CHARS = "\"`^~'=.|";
 
     // In the main program, this field is initialized in JabRef.java
     // Each test case initializes this field if required

diff --git a/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java b/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/UnicodeToLatexFormatter.java
@@ -15,8 +15,8 @@
  */
 package net.sf.jabref.logic.formatter.bibtexfields;
 
+import java.util.Map;
 import java.util.Objects;
-import java.util.Set;
 
 import net.sf.jabref.logic.formatter.Formatter;
 import net.sf.jabref.logic.l10n.Localization;
@@ -39,10 +39,9 @@ public String format(String text) {
         }
 
         // Standard symbols
-        Set<Character> chars = HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.keySet();
-        for (Character character : chars) {
-            result = result.replace(character.toString(),
-                    HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.get(character));
+        for (Map.Entry<String, String> unicodeLatexPair : HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP
+                .entrySet()) {
+            result = result.replace(unicodeLatexPair.getKey(), unicodeLatexPair.getValue());
         }
 
         // Combining accents

diff --git a/src/main/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatter.java b/src/main/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatter.java
@@ -31,6 +31,7 @@
 public class LatexToUnicodeFormatter implements LayoutFormatter, Formatter {
 
     private static final Map<String, String> CHARS = HTMLUnicodeConversionMaps.LATEX_UNICODE_CONVERSION_MAP;
+    private static final Map<String, String> ACCENTS = HTMLUnicodeConversionMaps.UNICODE_ESCAPED_ACCENTS;
 
     @Override
     public String getName() {
@@ -74,6 +75,7 @@ public String format(String inField) {
                     } else {
                         sb.append((String) result);
                     }
+
                 }
                 escaped = true;
                 incommand = true;
@@ -88,28 +90,34 @@ public String format(String inField) {
                     sb.append(c);
                 } else {
                     currentCommand.append(c);
-                    testCharCom: if ((currentCommand.length() == 1)
-                            && Globals.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())) {
+                    if ((currentCommand.length() == 1)
+                            && Globals.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())
+                            && !(i >= (field.length() - 1))) {
                         // This indicates that we are in a command of the type
                         // \^o or \~{n}
-                        if (i >= (field.length() - 1)) {
-                            break testCharCom;
-                        }
 
                         String command = currentCommand.toString();
                         i++;
                         c = field.charAt(i);
-                        String combody;
+                        String commandBody;
                         if (c == '{') {
                             String part = StringUtil.getPart(field, i, false);
                             i += part.length();
-                            combody = part;
+                            commandBody = part;
                         } else {
-                            combody = field.substring(i, i + 1);
+                            commandBody = field.substring(i, i + 1);
                         }
-                        Object result = LatexToUnicodeFormatter.CHARS.get(command + combody);
+                        Object result = LatexToUnicodeFormatter.CHARS.get(command + commandBody);
 
-                        if (result != null) {
+                        if (result == null) {
+                            // Use combining accents if argument is single character or empty
+                            if (commandBody.length() <= 1) {
+                                String accent = LatexToUnicodeFormatter.ACCENTS.get(command);
+                                if (accent != null) {
+                                    sb.append(commandBody).append(accent);
+                                }
+                            }
+                        } else {
                             sb.append((String) result);
                         }
 
@@ -152,28 +160,43 @@ public String format(String inField) {
                         if (argument != null) {
                             // handle common case of general latex command
                             Object result = LatexToUnicodeFormatter.CHARS.get(command + argument);
+
                             // If found, then use translated version. If not, then keep
                             // the
                             // text of the parameter intact.
                             if (result == null) {
-                                sb.append(argument);
+                                // Use combining accents if argument is single character or empty
+                                if (argument.length() <= 1) {
+                                    String accent = LatexToUnicodeFormatter.ACCENTS.get(command);
+                                    if (accent != null) {
+                                        sb.append(argument).append(accent);
+                                    } else {
+                                        sb.append(argument);
+                                    }
+                                } else {
+                                    sb.append(argument);
+                                }
                             } else {
                                 sb.append((String) result);
                             }
+
                         }
                     } else if (c == '}') {
                         // This end brace terminates a command. This can be the case in
                         // constructs like {\aa}. The correct behaviour should be to
                         // substitute the evaluated command and swallow the brace:
                         Object result = LatexToUnicodeFormatter.CHARS.get(command);
+
                         if (result == null) {
                             // If the command is unknown, just print it:
                             sb.append(command);
                         } else {
                             sb.append((String) result);
                         }
+
                     } else {
                         Object result = LatexToUnicodeFormatter.CHARS.get(command);
+
                         if (result == null) {
                             sb.append(command);
                         } else {

diff --git a/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java b/src/main/java/net/sf/jabref/logic/util/strings/HTMLUnicodeConversionMaps.java
@@ -509,14 +509,14 @@ public class HTMLUnicodeConversionMaps {
             {"", "VerticalLine", "|"}, // Vertical bar
             {"125", "rbrace", "\\}"}, // Right curly bracket
             {"", "rcub", "\\}"}, // Right curly bracket
-            {"138", "", "{{\\v{S}}}"}, // Line tabulation set
+            // {"138", "", "{{\\v{S}}}"}, // Line tabulation set
             // {"141", "", ""}, // Reverse line feed
             {"145", "", "`"}, // Apostrophe
             {"146", "", "'"}, // Apostrophe
             {"147", "", "``"}, // Quotation mark
             {"148", "", "''"}, // Quotation mark
             {"150", "", "--"}, // En dash
-            {"154", "", "{\\v{s}}"}, // Single character introducer
+            // {"154", "", "{\\v{s}}"}, // Single character introducer
             {"260", "Aogon", "{{\\k{A}}}"}, // capital A with ogonek
             {"261", "aogon", "{\\k{a}}"}, // small a with ogonek
             {"262", "Cacute", "{{\\'{C}}}"}, // capital C with acute
@@ -574,7 +574,7 @@ public class HTMLUnicodeConversionMaps {
             {"", "Hacek", "{\\v{}}"}, // Caron
             {"728", "breve", "{\\u{}}"}, // Breve
             {"", "Breve", "{\\u{}}"}, // Breve
-            {"729", "dot", "{\\\\.{}}"}, // Dot above
+            {"729", "dot", "{\\.{}}"}, // Dot above
             {"730", "ring", "{\\r{}}"}, // Ring above
             {"731", "ogon", "{\\k{}}"}, // Ogonek
             {"733", "dblac", "{{\\H{}}}"}, // Double acute
@@ -744,8 +744,9 @@ public class HTMLUnicodeConversionMaps {
 
     public static final Map<String, String> HTML_LATEX_CONVERSION_MAP = new HashMap<>();
     public static final Map<Integer, String> ESCAPED_ACCENTS = new HashMap<>();
+    public static final Map<String, String> UNICODE_ESCAPED_ACCENTS = new HashMap<>();
     public static final Map<Integer, String> NUMERICAL_LATEX_CONVERSION_MAP = new HashMap<>();
-    public static final Map<Character, String> UNICODE_LATEX_CONVERSION_MAP = new HashMap<>();
+    public static final Map<String, String> UNICODE_LATEX_CONVERSION_MAP = new HashMap<>();
     public static final Map<String, String> LATEX_HTML_CONVERSION_MAP = new HashMap<>();
     public static final Map<String, String> LATEX_UNICODE_CONVERSION_MAP = new HashMap<>();
 
@@ -765,17 +766,19 @@ public class HTMLUnicodeConversionMaps {
                 if (!(aConversionList[0].isEmpty())) {
                     NUMERICAL_LATEX_CONVERSION_MAP.put(Integer.decode(aConversionList[0]), aConversionList[2]);
                     if (Integer.decode(aConversionList[0]) > 128) {
-                        Character c = (char) Integer.decode(aConversionList[0]).intValue();
-                        UNICODE_LATEX_CONVERSION_MAP.put(c, aConversionList[2]);
+                        String unicodeSymbol = String.valueOf(Character.toChars(Integer.decode(aConversionList[0])));
+                        UNICODE_LATEX_CONVERSION_MAP.put(unicodeSymbol, aConversionList[2]);
                         if (!strippedLaTeX.isEmpty()) {
-                            LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, c.toString());
+                            LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, unicodeSymbol);
                         }
                     }
                 }
             }
         }
         for (String[] anAccentList : ACCENT_LIST) {
             ESCAPED_ACCENTS.put(Integer.decode(anAccentList[0]), anAccentList[1]);
+            UNICODE_ESCAPED_ACCENTS.put(anAccentList[1],
+                    String.valueOf(Character.toChars(Integer.decode(anAccentList[0]))));
         }
         // Manually added values which are killed by cleanLaTeX
         LATEX_HTML_CONVERSION_MAP.put("$", "&dollar;");
@@ -784,6 +787,12 @@ public class HTMLUnicodeConversionMaps {
         // Manual corrections
         LATEX_HTML_CONVERSION_MAP.put("AA", "&Aring;"); // Overwritten by &angst; which is less supported
         LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol
+
+        // Manual additions
+        // Support relax to the extent that it is simply removed
+        LATEX_HTML_CONVERSION_MAP.put("relax", "");
+        LATEX_UNICODE_CONVERSION_MAP.put("relax", "");
+
     }
 
     private static String cleanLaTeX(String escapedString) {

diff --git a/src/test/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatterTest.java b/src/test/java/net/sf/jabref/logic/layout/format/LatexToUnicodeFormatterTest.java
@@ -51,14 +51,50 @@ public void testFormatStripLatexCommands() {
     }
 
     @Test
-    public void testEquations() {
+    public void testFormatTextit() {
+        // See #1464
+        assertEquals("text", formatter.format("\\textit{text}"));
+    }
+
+    @Test
+    public void testEscapedDollarSign() {
         assertEquals("$", formatter.format("\\$"));
+    }
+
+    @Test
+    public void testEquationsSingleSymbol() {
         assertEquals("σ", formatter.format("$\\sigma$"));
+    }
+
+    @Test
+    public void testEquationsMoreComplicatedFormatting() {
         assertEquals("A 32\u00A0mA ΣΔ-modulator", formatter.format("A 32~{mA} {$\\Sigma\\Delta$}-modulator"));
     }
 
     @Test
     public void formatExample() {
         assertEquals("Mönch", formatter.format(formatter.getExampleInput()));
     }
+
+    @Test
+    public void testChi() {
+        // See #1464
+        assertEquals("χ", formatter.format("$\\chi$"));
+    }
+
+    @Test
+    public void testSWithCaron() {
+        // Bug #1264
+        assertEquals("Š", formatter.format("{\\v{S}}"));
+    }
+
+    @Test
+    public void testCombiningAccentsCase1() {
+        assertEquals("ḩ", formatter.format("{\\c{h}}"));
+    }
+
+    @Test
+    public void testCombiningAccentsCase2() {
+        assertEquals("a͍", formatter.format("\\spreadlips{a}"));
+    }
 }