Skip to content

Commit

Permalink
Fixes JabRef#1181 and JabRef#1504: Improved "Normalize to BibTeX name…
Browse files Browse the repository at this point in the history
… format"

Added the jr, sr,... special cases for semicolon partition.
Fixed to avoid the "and", "{", ";" cases.
Added Test for every case.
  • Loading branch information
bruehldev committed Sep 8, 2016
1 parent cc38019 commit 7486d51
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 2 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- Fixed field `key` field is not exported to MS-Office 2008 xml format

### Removed
- The non-supported feature of being able to define file directories for any extension is removed. Still, it should work for older databases using the legacy `ps` and `pdf` fields, although we strongly encourage using the `file` field.
- The non-supported feature of being able to define file directories for any extension is removed. Still, it should work for older databases using the legacy `ps` and `pdf` fields, although we strongly encourage using the `file` field.



Expand Down Expand Up @@ -200,6 +200,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- Manage content selectors now saves edited existing lists again and only marks database as changed when the content selectors are changed
- When inserting a duplicate the right entry will be selected
- Preview panel height is now saved immediately, thus is shown correctly if the panel height is changed, closed and opened again
- Fixed [#1181](https://github.com/JabRef/jabref/issues/1181) and [#1504](https://github.com/JabRef/jabref/issues/1504): Improved "Normalize to BibTeX name format": Support separated names with commas and colons. Considered name affixes such as "Jr".

### Removed
- [#1610](https://github.com/JabRef/jabref/issues/1610) Removed the possibility to auto show or hide the groups interface
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
package net.sf.jabref.logic.formatter.bibtexfields;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Objects;
import java.util.StringJoiner;

import net.sf.jabref.logic.formatter.Formatter;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.model.entry.AuthorList;
Expand All @@ -9,6 +15,9 @@
*/
public class NormalizeNamesFormatter implements Formatter {

// Avoid partition where these values are contained
private final Collection<String> avoidTermsInLowerCase = Arrays.asList("jr", "sr", "jnr", "snr", "von", "zu", "van", "der");

@Override
public String getName() {
return Localization.lang("Normalize names of persons");
Expand All @@ -21,6 +30,73 @@ public String getKey() {

@Override
public String format(String value) {
Objects.requireNonNull(value);
// Handle case names in order lastname, firstname and separated by ","
// E.g., Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H.
if (!value.contains(" and ") && !value.contains("{") && !value.contains(";")) {
String[] valueParts = value.split(",");
// Delete spaces for correct case identification
for(int i=0; i < valueParts.length; i++) {
valueParts[i] = valueParts[i].trim();
}
// Looking for space between pre- and lastname
boolean spaceInAllParts = false;
for (int i=0; i<valueParts.length; i++) {
if (valueParts[i].contains(" ") ) {
spaceInAllParts = true;
} else {
spaceInAllParts = false;
break;
}
}

// We hit the comma name separator case
// Usually the getAsLastFirstNamesWithAnd method would separate them if pre- and lastname are separated with "and"
// If not, we check if spaces separate pre- and lastname
if (spaceInAllParts) {
value = value.replaceAll(",", " and");
} else {
// Looking for name affixes to avoid
// partCount need to reduce by the count off avoiding terms
int valuePartsCount = valueParts.length;
// Holding the index of every term, which need to avoid
Collection<Integer> avoidIndex = new HashSet<>();

for (int i = 0; i < valueParts.length; i++) {
if (avoidTermsInLowerCase.contains(valueParts[i].toLowerCase())) {
avoidIndex.add(i);
valuePartsCount--;
}
}

if ((valuePartsCount % 2) == 0) {
// We hit the described special case with name affix like Jr
StringBuilder stringBuilder = new StringBuilder();
// avoidedTimes need to increase the count of avoided terms for correct module calculation
int avoidedTimes = 0;
for (int i = 0; i < valueParts.length; i++) {
if (avoidIndex.contains(i)) {
// We hit a name affix
stringBuilder.append(valueParts[i]);
stringBuilder.append(',');
avoidedTimes++;
} else {
stringBuilder.append(valueParts[i]);
if (((i + avoidedTimes) % 2) == 0) {
// Hit separation between last name and firstname --> comma has to be kept
stringBuilder.append(',');
} else {
// Hit separation between full names (e.g., Ali Babar, M. and Dingsøyr, T.) --> semicolon has to be used
// Will be treated correctly by AuthorList.parse(value);
stringBuilder.append(';');
}
}
}
value = stringBuilder.toString();
}
}
}

AuthorList authorList = AuthorList.parse(value);
return authorList.getAsLastFirstNamesWithAnd(false);
}
Expand All @@ -35,4 +111,14 @@ public String getExampleInput() {
return "Albert Einstein and Alan Turing";
}

private static boolean contains(final String[] array, final String[] searchTerms) {
for (String currentTerm : array) {
for (String beCompared : searchTerms) {
if (beCompared.trim().toLowerCase().equals(currentTerm.trim().toLowerCase())) {
return true;
}
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,18 @@ public void lastThenJuniorThenFirst() {
expectCorrect("Name, della, first", "Name, della, first");
}

@Test
public void testConcatenationOfAuthorsWithCommas() {
expectCorrect("Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H.",
"Ali Babar, M. and Dingsøyr, T. and Lago, P. and van der Vliet, H.");
expectCorrect("Ali Babar, M.", "Ali Babar, M.");
}

@Test
public void testOddCountOfCommas() {
expectCorrect("Ali Babar, M., Dingsøyr, T., Lago P.", "Ali Babar, M., Dingsøyr T. Lago P.");
}

private void expectCorrect(String input, String expected) {
Assert.assertEquals(expected, formatter.format(input));
}
Expand All @@ -107,4 +119,35 @@ public void formatExample() {
assertEquals("Einstein, Albert and Turing, Alan", formatter.format(formatter.getExampleInput()));
}

}
@Test
public void testNameAffixe() {
expectCorrect("Surname, jr, First, Surname2, First2", "Surname, jr, First and Surname2, First2");
}

@Test
public void testAvoidSpecialCharacter() {
expectCorrect("Surname, {, First; Surname2, First2", "Surname, {, First; Surname2, First2");
}

@Test
public void testAndInName() {
expectCorrect("Surname, and , First, Surname2, First2", "Surname and , First, Surname2 First2");
}

@Test
public void testMultipleNameAffixes() {
expectCorrect("Mair, Jr, Daniel, Brühl, Sr, Daniel", "Mair, Jr, Daniel and Brühl, Sr, Daniel");
}

@Test
public void testCommaSeperatedNames() {
expectCorrect("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez, Mélanie Tremblay, Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur",
"Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal");
}

@Test
public void testMultipleSpaces() {
expectCorrect("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez , Mélanie Tremblay , Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur",
"Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal");
}
}
10 changes: 10 additions & 0 deletions src/test/java/net/sf/jabref/model/entry/AuthorListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,15 @@ public void testGetAuthorsLastFirstAnds() {

}

@Test
public void testGetAuthorsLastFirstAndsCaching() {
// getAsLastFirstNamesWithAnd caches its results, therefore we call the method twice using the same arguments
Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false));
Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false));
Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true));
Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true));
}

@Test
public void testGetAuthorsFirstFirst() {

Expand Down Expand Up @@ -636,4 +645,5 @@ public void parseNameWithBraces() throws Exception {
Author expected = new Author("H{e}lene", "H.", null, "Fiaux", null);
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("H{e}lene Fiaux"));
}

}

0 comments on commit 7486d51

Please sign in to comment.