Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix integrity check for tilde accents in author names #9097

Merged
merged 14 commits into from
Sep 3, 2022
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Fixed

- We fixed an issue where author names with tilde accents (for example ñ) were marked as "Names are not in the standard BibTex format" [#8071](https://github.com/JabRef/jabref/issues/8071)
- We fixed an issue where the possibilty to generate a subdatabase from an aux file was writing empty files when called from the commandline [#9115](https://github.com/JabRef/jabref/issues/9115), [forum#3516](https://discourse.jabref.org/t/export-subdatabase-from-aux-file-on-macos-command-line/3516)
- We fixed the display of issue, number, eid and pages fields in the entry preview. [#8607](https://github.com/JabRef/jabref/pull/8607), [#8372](https://github.com/JabRef/jabref/issues/8372), [Koppor#514](https://github.com/koppor/jabref/issues/514), [forum#2390](https://discourse.jabref.org/t/unable-to-edit-my-bibtex-file-that-i-used-before-vers-5-1/2390), [forum#3462](https://discourse.jabref.org/t/jabref-5-6-need-help-with-export-from-jabref-to-microsoft-word-entry-preview-of-apa-7-not-rendering-correctly/3462)
- We fixed the page ranges checker to detect article numbers in the pages field (used at [Check Integrity](https://docs.jabref.org/finding-sorting-and-cleaning-entries/checkintegrity)). [#8607](https://github.com/JabRef/jabref/pull/8607)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ private Token getToken() {
if (c == '\\') {
currentBackslash = tokenEnd;
}
if ((bracesLevel == 0) && ((",;~-".indexOf(c) != -1) || Character.isWhitespace(c))) {
if ((bracesLevel == 0) && ((",;-".indexOf(c) != -1) || Character.isWhitespace(c))) {
break;
}
tokenEnd++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ public class HTMLUnicodeConversionMaps {
{"119978", "Oscr", "$\\mathcal{O}$"}, // script capital O -- possibly use \mathscr
{"119984", "Uscr", "$\\mathcal{U}$"}, // script capital U -- possibly use \mathscr
{"120598", "", "$\\epsilon$"}, // mathematical italic epsilon U+1D716 -- requires amsmath
{"120599", "", "{{\\˜{n}}}"}, // n with tide
};

// List of combining accents
Expand Down Expand Up @@ -888,7 +889,6 @@ public class HTMLUnicodeConversionMaps {
// Manual corrections
LATEX_HTML_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Å which is less supported
LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol
LATEX_UNICODE_CONVERSION_MAP.put("'n", "ń");

// Manual additions
// Support relax to the extent that it is simply removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

public class RtfCharMap {

private HashMap<String, String> rtfMap = new HashMap<>();
private final HashMap<String, String> rtfMap = new HashMap<>();

public RtfCharMap() {
put("`a", "\\'e0");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,60 +1,40 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.stream.Stream;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

public class HtmlToUnicodeFormatterTest {

private HtmlToUnicodeFormatter formatter;

private static Stream<Arguments> data() {
return Stream.of(
Arguments.of("abc", "abc"),
Arguments.of("åäö", "&aring;&auml;&ouml;"),
Arguments.of("í", "i&#x301;"),
Arguments.of("Ε", "&Epsilon;"),
Arguments.of("ä", "&auml;"),
Arguments.of("ä", "&#228;"),
Arguments.of("ä", "&#xe4;"),
Arguments.of("ñ", "&#241;"),
Arguments.of("aaa", "<p>aaa</p>"),
Arguments.of("bread & butter", "<b>bread</b> &amp; butter"));
}

@BeforeEach
public void setUp() {
formatter = new HtmlToUnicodeFormatter();
}

@Test
public void formatWithoutHtmlCharactersReturnsSameString() {
assertEquals("abc", formatter.format("abc"));
}

@Test
public void formatMultipleHtmlCharacters() {
assertEquals("åäö", formatter.format("&aring;&auml;&ouml;"));
}

@Test
public void formatCombinedAccent() {
assertEquals("í", formatter.format("i&#x301;"));
}

@Test
public void testBasic() {
assertEquals("aaa", formatter.format("aaa"));
}

@Test
public void testUmlauts() {
assertEquals("ä", formatter.format("&auml;"));
assertEquals("ä", formatter.format("&#228;"));
assertEquals("ä", formatter.format("&#xe4;"));
}

@Test
public void testGreekLetter() {
assertEquals("Ε", formatter.format("&Epsilon;"));
}

@Test
public void testHTMLRemoveTags() {
assertEquals("aaa", formatter.format("<p>aaa</p>"));
}

@Test
public void formatExample() {
assertEquals("bread & butter", formatter.format(formatter.getExampleInput()));
@ParameterizedTest
@MethodSource("data")
void testFormatterWorksCorrectly(String expected, String input) {
assertEquals(expected, formatter.format(input));
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ private static Stream<Arguments> data() {
Arguments.of("de la Vallée Poussin, Jean Charles Gabriel", new Author("Jean Charles Gabriel", "J. C. G.", "de la", "Vallée Poussin", null)),
Arguments.of("de la Vallée Poussin, J. C. G.", new Author("J. C. G.", "J. C. G.", "de la", "Vallée Poussin", null)),
Arguments.of("{K}ent-{B}oswell, E. S.", new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null)),
Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null))
Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null)),
Arguments.of("Nu{\\~{n}}ez, Jose", new Author("Jose", "J.", null, "Nu{\\~{n}}ez", null))
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,35 @@ public void setUp() throws Exception {
checkerb = new PersonNamesChecker(database);
}

@Test
public void validNameFirstnameAuthor() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Kolb, Stefan"));
@ParameterizedTest
@MethodSource("provideValidNames")
public void validNames(String name) {
assertEquals(Optional.empty(), checker.checkValue(name));
}

@Test
public void validNameFirstnameAuthors() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Kolb, Stefan and Harrer, Simon"));
}
private static Stream<String> provideValidNames() {
return Stream.of(
"Kolb, Stefan", // single [Name, Firstname]
"Kolb, Stefan and Harrer, Simon", // multiple [Name, Firstname]
"Stefan Kolb", // single [Firstname Name]
"Stefan Kolb and Simon Harrer", // multiple [Firstname Name]

@Test
public void validFirstnameNameAuthor() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Stefan Kolb"));
}
"M. J. Gotay", // second name in front

@Test
public void validFirstnameNameAuthors() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("Stefan Kolb and Simon Harrer"));
"{JabRef}", // corporate name in brackets
"{JabRef} and Stefan Kolb", // mixed corporate name with name
"{JabRef} and Kolb, Stefan",

"hugo Para{\\~n}os" // tilde in name
);
}

@Test
public void complainAboutPersonStringWithTwoManyCommas() throws Exception {
public void complainAboutPersonStringWithTwoManyCommas() {
assertEquals(Optional.of("Names are not in the standard BibTeX format."),
checker.checkValue("Test1, Test2, Test3, Test4, Test5, Test6"));
}

@Test
public void doNotComplainAboutSecondNameInFront() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("M. J. Gotay"));
}

@Test
public void validCorporateNameInBrackets() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("{JabRef}"));
}

@Test
public void validCorporateNameAndPerson() throws Exception {
assertEquals(Optional.empty(), checker.checkValue("{JabRef} and Stefan Kolb"));
assertEquals(Optional.empty(), checker.checkValue("{JabRef} and Kolb, Stefan"));
}

@ParameterizedTest
@MethodSource("provideCorrectFormats")
public void authorNameInCorrectFormatsShouldNotComplain(String input) {
Expand All @@ -84,13 +71,19 @@ public void authorNameInIncorrectFormatsShouldComplain(String input) {
}

private static Stream<String> provideCorrectFormats() {
return Stream.of("", "Knuth", "Donald E. Knuth and Kurt Cobain and A. Einstein");
return Stream.of(
"",
"Knuth",
"Donald E. Knuth and Kurt Cobain and A. Einstein");
}

private static Stream<String> provideIncorrectFormats() {
return Stream.of(" Knuth, Donald E. ",
"Knuth, Donald E. and Kurt Cobain and A. Einstein",
", and Kurt Cobain and A. Einstein", "Donald E. Knuth and Kurt Cobain and ,",
"and Kurt Cobain and A. Einstein", "Donald E. Knuth and Kurt Cobain and");
return Stream.of(
" Knuth, Donald E. ",
"Knuth, Donald E. and Kurt Cobain and A. Einstein",
", and Kurt Cobain and A. Einstein",
"Donald E. Knuth and Kurt Cobain and ,",
"and Kurt Cobain and A. Einstein",
"Donald E. Knuth and Kurt Cobain and");
}
}