Skip to content

Commit

Permalink
Fixed #1264 (#1581)
Browse files Browse the repository at this point in the history
* Fixed #1264

* Code improvement

* Added tests

* Fixed comments and a removed a code label
  • Loading branch information
oscargus authored Jul 15, 2016
1 parent aa42c16 commit bc56ab6
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 26 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- [#1026](https://github.com/JabRef/jabref/issues/1026) JabRef does no longer delete user comments outside of BibTeX entries and strings

### Fixed
- Fixed [#1264](https://github.com/JabRef/jabref/issues/1264): S with caron does not render correctly
- LaTeX to Unicode converter now handles combining accents

### Removed

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/net/sf/jabref/Globals.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ public class Globals {
public static final ImportFormatReader IMPORT_FORMAT_READER = new ImportFormatReader();



public static final String SPECIAL_COMMAND_CHARS = "\"`^~'c=";
// Non-letters which are used to denote accents in LaTeX-commands, e.g., in {\"{a}}
public static final String SPECIAL_COMMAND_CHARS = "\"`^~'=.|";

// In the main program, this field is initialized in JabRef.java
// Each test case initializes this field if required
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
package net.sf.jabref.logic.formatter.bibtexfields;

import java.util.Map;
import java.util.Objects;
import java.util.Set;

import net.sf.jabref.logic.formatter.Formatter;
import net.sf.jabref.logic.l10n.Localization;
Expand All @@ -39,10 +39,9 @@ public String format(String text) {
}

// Standard symbols
Set<Character> chars = HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.keySet();
for (Character character : chars) {
result = result.replace(character.toString(),
HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP.get(character));
for (Map.Entry<String, String> unicodeLatexPair : HTMLUnicodeConversionMaps.UNICODE_LATEX_CONVERSION_MAP
.entrySet()) {
result = result.replace(unicodeLatexPair.getKey(), unicodeLatexPair.getValue());
}

// Combining accents
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
public class LatexToUnicodeFormatter implements LayoutFormatter, Formatter {

private static final Map<String, String> CHARS = HTMLUnicodeConversionMaps.LATEX_UNICODE_CONVERSION_MAP;
private static final Map<String, String> ACCENTS = HTMLUnicodeConversionMaps.UNICODE_ESCAPED_ACCENTS;

@Override
public String getName() {
Expand Down Expand Up @@ -74,6 +75,7 @@ public String format(String inField) {
} else {
sb.append((String) result);
}

}
escaped = true;
incommand = true;
Expand All @@ -88,28 +90,34 @@ public String format(String inField) {
sb.append(c);
} else {
currentCommand.append(c);
testCharCom: if ((currentCommand.length() == 1)
&& Globals.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())) {
if ((currentCommand.length() == 1)
&& Globals.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())
&& !(i >= (field.length() - 1))) {
// This indicates that we are in a command of the type
// \^o or \~{n}
if (i >= (field.length() - 1)) {
break testCharCom;
}

String command = currentCommand.toString();
i++;
c = field.charAt(i);
String combody;
String commandBody;
if (c == '{') {
String part = StringUtil.getPart(field, i, false);
i += part.length();
combody = part;
commandBody = part;
} else {
combody = field.substring(i, i + 1);
commandBody = field.substring(i, i + 1);
}
Object result = LatexToUnicodeFormatter.CHARS.get(command + combody);
Object result = LatexToUnicodeFormatter.CHARS.get(command + commandBody);

if (result != null) {
if (result == null) {
// Use combining accents if argument is single character or empty
if (commandBody.length() <= 1) {
String accent = LatexToUnicodeFormatter.ACCENTS.get(command);
if (accent != null) {
sb.append(commandBody).append(accent);
}
}
} else {
sb.append((String) result);
}

Expand Down Expand Up @@ -152,28 +160,43 @@ public String format(String inField) {
if (argument != null) {
// handle common case of general latex command
Object result = LatexToUnicodeFormatter.CHARS.get(command + argument);

// If found, then use translated version. If not, then keep
// the
// text of the parameter intact.
if (result == null) {
sb.append(argument);
// Use combining accents if argument is single character or empty
if (argument.length() <= 1) {
String accent = LatexToUnicodeFormatter.ACCENTS.get(command);
if (accent != null) {
sb.append(argument).append(accent);
} else {
sb.append(argument);
}
} else {
sb.append(argument);
}
} else {
sb.append((String) result);
}

}
} else if (c == '}') {
// This end brace terminates a command. This can be the case in
// constructs like {\aa}. The correct behaviour should be to
// substitute the evaluated command and swallow the brace:
Object result = LatexToUnicodeFormatter.CHARS.get(command);

if (result == null) {
// If the command is unknown, just print it:
sb.append(command);
} else {
sb.append((String) result);
}

} else {
Object result = LatexToUnicodeFormatter.CHARS.get(command);

if (result == null) {
sb.append(command);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -509,14 +509,14 @@ public class HTMLUnicodeConversionMaps {
{"", "VerticalLine", "|"}, // Vertical bar
{"125", "rbrace", "\\}"}, // Right curly bracket
{"", "rcub", "\\}"}, // Right curly bracket
{"138", "", "{{\\v{S}}}"}, // Line tabulation set
// {"138", "", "{{\\v{S}}}"}, // Line tabulation set
// {"141", "", ""}, // Reverse line feed
{"145", "", "`"}, // Apostrophe
{"146", "", "'"}, // Apostrophe
{"147", "", "``"}, // Quotation mark
{"148", "", "''"}, // Quotation mark
{"150", "", "--"}, // En dash
{"154", "", "{\\v{s}}"}, // Single character introducer
// {"154", "", "{\\v{s}}"}, // Single character introducer
{"260", "Aogon", "{{\\k{A}}}"}, // capital A with ogonek
{"261", "aogon", "{\\k{a}}"}, // small a with ogonek
{"262", "Cacute", "{{\\'{C}}}"}, // capital C with acute
Expand Down Expand Up @@ -574,7 +574,7 @@ public class HTMLUnicodeConversionMaps {
{"", "Hacek", "{\\v{}}"}, // Caron
{"728", "breve", "{\\u{}}"}, // Breve
{"", "Breve", "{\\u{}}"}, // Breve
{"729", "dot", "{\\\\.{}}"}, // Dot above
{"729", "dot", "{\\.{}}"}, // Dot above
{"730", "ring", "{\\r{}}"}, // Ring above
{"731", "ogon", "{\\k{}}"}, // Ogonek
{"733", "dblac", "{{\\H{}}}"}, // Double acute
Expand Down Expand Up @@ -744,8 +744,9 @@ public class HTMLUnicodeConversionMaps {

public static final Map<String, String> HTML_LATEX_CONVERSION_MAP = new HashMap<>();
public static final Map<Integer, String> ESCAPED_ACCENTS = new HashMap<>();
public static final Map<String, String> UNICODE_ESCAPED_ACCENTS = new HashMap<>();
public static final Map<Integer, String> NUMERICAL_LATEX_CONVERSION_MAP = new HashMap<>();
public static final Map<Character, String> UNICODE_LATEX_CONVERSION_MAP = new HashMap<>();
public static final Map<String, String> UNICODE_LATEX_CONVERSION_MAP = new HashMap<>();
public static final Map<String, String> LATEX_HTML_CONVERSION_MAP = new HashMap<>();
public static final Map<String, String> LATEX_UNICODE_CONVERSION_MAP = new HashMap<>();

Expand All @@ -765,17 +766,19 @@ public class HTMLUnicodeConversionMaps {
if (!(aConversionList[0].isEmpty())) {
NUMERICAL_LATEX_CONVERSION_MAP.put(Integer.decode(aConversionList[0]), aConversionList[2]);
if (Integer.decode(aConversionList[0]) > 128) {
Character c = (char) Integer.decode(aConversionList[0]).intValue();
UNICODE_LATEX_CONVERSION_MAP.put(c, aConversionList[2]);
String unicodeSymbol = String.valueOf(Character.toChars(Integer.decode(aConversionList[0])));
UNICODE_LATEX_CONVERSION_MAP.put(unicodeSymbol, aConversionList[2]);
if (!strippedLaTeX.isEmpty()) {
LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, c.toString());
LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, unicodeSymbol);
}
}
}
}
}
for (String[] anAccentList : ACCENT_LIST) {
ESCAPED_ACCENTS.put(Integer.decode(anAccentList[0]), anAccentList[1]);
UNICODE_ESCAPED_ACCENTS.put(anAccentList[1],
String.valueOf(Character.toChars(Integer.decode(anAccentList[0]))));
}
// Manually added values which are killed by cleanLaTeX
LATEX_HTML_CONVERSION_MAP.put("$", "&dollar;");
Expand All @@ -784,6 +787,12 @@ public class HTMLUnicodeConversionMaps {
// Manual corrections
LATEX_HTML_CONVERSION_MAP.put("AA", "&Aring;"); // Overwritten by &angst; which is less supported
LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol

// Manual additions
// Support relax to the extent that it is simply removed
LATEX_HTML_CONVERSION_MAP.put("relax", "");
LATEX_UNICODE_CONVERSION_MAP.put("relax", "");

}

private static String cleanLaTeX(String escapedString) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,50 @@ public void testFormatStripLatexCommands() {
}

@Test
public void testEquations() {
public void testFormatTextit() {
// See #1464
assertEquals("text", formatter.format("\\textit{text}"));
}

@Test
public void testEscapedDollarSign() {
assertEquals("$", formatter.format("\\$"));
}

@Test
public void testEquationsSingleSymbol() {
assertEquals("σ", formatter.format("$\\sigma$"));
}

@Test
public void testEquationsMoreComplicatedFormatting() {
assertEquals("A 32\u00A0mA ΣΔ-modulator", formatter.format("A 32~{mA} {$\\Sigma\\Delta$}-modulator"));
}

@Test
public void formatExample() {
assertEquals("Mönch", formatter.format(formatter.getExampleInput()));
}

@Test
public void testChi() {
// See #1464
assertEquals("χ", formatter.format("$\\chi$"));
}

@Test
public void testSWithCaron() {
// Bug #1264
assertEquals("Š", formatter.format("{\\v{S}}"));
}

@Test
public void testCombiningAccentsCase1() {
assertEquals("ḩ", formatter.format("{\\c{h}}"));
}

@Test
public void testCombiningAccentsCase2() {
assertEquals("a͍", formatter.format("\\spreadlips{a}"));
}
}

0 comments on commit bc56ab6

Please sign in to comment.