Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: bibkey generated does not handle diacritics #4713

Merged
merged 8 commits into from
Mar 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ public class BibtexKeyGenerator extends BracketedPattern {
*/
public static final String APPENDIX_CHARACTERS = "abcdefghijklmnopqrstuvwxyz";
private static final Logger LOGGER = LoggerFactory.getLogger(BibtexKeyGenerator.class);
private static final String KEY_ILLEGAL_CHARACTERS = "{}(),\\\"#~^':`";
private static final String KEY_UNWANTED_CHARACTERS = "{}(),\\\"";
private static final String KEY_ILLEGAL_CHARACTERS = "{}(),\\\"-#~^':`";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chelseyong Do you, by chance, remember why you introduced - here? We have issues with that. See #6295 for details.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is due to a suggestion made in the issue.

Al-Ketan, 2019 -> Al-19 (dash in bibtex key is awkward, should probably be ignored in favor of the next letter and become AlK19)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your reply. I Think, there was some misunderstanding at #4709 (comment)

Well, dash is a perfectly legal char

We will fix it at #6300

private static final String KEY_UNWANTED_CHARACTERS = "{}(),\\\"-";
private final AbstractBibtexKeyPattern citeKeyPattern;
private final BibDatabase database;
private final BibtexKeyPatternPreferences bibtexKeyPatternPreferences;
Expand Down Expand Up @@ -72,15 +72,15 @@ private static String getAppendix(int number) {
}
}

public static String cleanKey(String key, boolean enforceLegalKey) {
public static String removeUnwantedCharacters(String key, boolean enforceLegalKey) {
if (!enforceLegalKey) {
// User doesn't want us to enforce legal characters. We must still look
// for whitespace and some characters such as commas, since these would
// interfere with parsing:
StringBuilder newKey = new StringBuilder();
for (int i = 0; i < key.length(); i++) {
char c = key.charAt(i);
if (!Character.isWhitespace(c) && (KEY_UNWANTED_CHARACTERS.indexOf(c) == -1)) {
if (KEY_UNWANTED_CHARACTERS.indexOf(c) == -1) {
newKey.append(c);
}
}
Expand All @@ -90,7 +90,7 @@ public static String cleanKey(String key, boolean enforceLegalKey) {
StringBuilder newKey = new StringBuilder();
for (int i = 0; i < key.length(); i++) {
char c = key.charAt(i);
if (!Character.isWhitespace(c) && (KEY_ILLEGAL_CHARACTERS.indexOf(c) == -1)) {
if (KEY_ILLEGAL_CHARACTERS.indexOf(c) == -1) {
newKey.append(c);
}
}
Expand All @@ -100,6 +100,10 @@ public static String cleanKey(String key, boolean enforceLegalKey) {
return StringUtil.replaceSpecialCharacters(newKey.toString());
}

public static String cleanKey(String key, boolean enforceLegalKey) {
return removeUnwantedCharacters(key, enforceLegalKey).replaceAll("\\s","");
}

public String generateKey(BibEntry entry) {
String key;
StringBuilder stringBuilder = new StringBuilder();
Expand All @@ -123,7 +127,7 @@ public String generateKey(BibEntry entry) {
List<String> parts = parseFieldMarker(typeListEntry);
Character delimiter = bibtexKeyPatternPreferences.getKeywordDelimiter();
String pattern = "[" + parts.get(0) + "]";
String label = expandBrackets(pattern, delimiter, entry, database);
String label = expandBrackets(pattern, delimiter, entry, database, bibtexKeyPatternPreferences.isEnforceLegalKey());
// apply modifier if present
if (parts.size() > 1) {
label = applyModifiers(label, parts, 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ public String expand(BibEntry bibentry, Character keywordDelimiter, BibDatabase
return expandBrackets(this.pattern, keywordDelimiter, bibentry, database);
}

public static String expandBrackets(String pattern, Character keywordDelimiter, BibEntry entry, BibDatabase database) {
return expandBrackets(pattern, keywordDelimiter, entry, database, false);
}

/**
* Expands a pattern
*
Expand All @@ -101,7 +105,7 @@ public String expand(BibEntry bibentry, Character keywordDelimiter, BibDatabase
* @param database The database for field resolving. May be null.
* @return The expanded pattern. Not null.
*/
public static String expandBrackets(String pattern, Character keywordDelimiter, BibEntry entry, BibDatabase database) {
public static String expandBrackets(String pattern, Character keywordDelimiter, BibEntry entry, BibDatabase database, boolean isEnforceLegalKey) {
Objects.requireNonNull(pattern);
Objects.requireNonNull(entry);
StringBuilder sb = new StringBuilder();
Expand All @@ -122,10 +126,10 @@ public static String expandBrackets(String pattern, Character keywordDelimiter,
// check whether there is a modifier on the end such as
// ":lower":
if (fieldParts.size() <= 1) {
sb.append(getFieldValue(entry, token, keywordDelimiter, database));
sb.append(getFieldValue(entry, token, keywordDelimiter, database, isEnforceLegalKey));
} else {
// apply modifiers:
String fieldValue = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database);
String fieldValue = getFieldValue(entry, fieldParts.get(0), keywordDelimiter, database, isEnforceLegalKey);
sb.append(applyModifiers(fieldValue, fieldParts, 1));
}
// Fetch and discard the closing ']'
Expand Down Expand Up @@ -156,7 +160,7 @@ public static String expandBrackets(String pattern, Character keywordDelimiter,
*
* @return String containing the evaluation result. Empty string if the pattern cannot be resolved.
*/
public static String getFieldValue(BibEntry entry, String value, Character keywordDelimiter, BibDatabase database) {
public static String getFieldValue(BibEntry entry, String value, Character keywordDelimiter, BibDatabase database, boolean isEnforceLegalKey) {

String val = value;
try {
Expand Down Expand Up @@ -224,15 +228,8 @@ else if ("authorLast".equals(val)) {
return authNofMth(authString, Integer.parseInt(nums[0]),
Integer.parseInt(nums[1]));
} else if (val.matches("auth\\d+")) {
// authN. First N chars of the first author's last
// name.

String fa = firstAuthor(authString);
int num = Integer.parseInt(val.substring(4));
if (num > fa.length()) {
num = fa.length();
}
return fa.substring(0, num);
return authN(authString, num, isEnforceLegalKey);
} else if (val.matches("authors\\d+")) {
return nAuthors(authString, Integer.parseInt(val.substring(7)));
} else {
Expand Down Expand Up @@ -840,6 +837,18 @@ public static String authNofMth(String authorField, int n, int m) {
}
}

/**
* First N chars of the first author's last name.
*/
public static String authN(String authString, int num, boolean isEnforceLegalKey) {
authString = BibtexKeyGenerator.removeUnwantedCharacters(authString, isEnforceLegalKey);
String fa = firstAuthor(authString);
if (num > fa.length()) {
num = fa.length();
}
return fa.substring(0, num);
}

/**
* authshort format:
* added by Kolja Brix, [email protected]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,73 +119,73 @@ public void testMakeLabelAndCheckLegalKeys() throws ParseException {

Optional<BibEntry> entry0 = BibtexParser.singleFromString(
"@ARTICLE{kohn, author={Andreas Köning}, year={2000}}", importFormatPreferences, fileMonitor);
assertEquals("Koen",
assertEquals("Koe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Áöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Aoen",
assertEquals("Aoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Éöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Eoen",
assertEquals("Eoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Íöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Ioen",
assertEquals("Ioe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Ĺöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Loen",
assertEquals("Loe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Ńöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Noen",
assertEquals("Noe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Óöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Ooen",
assertEquals("Ooe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Ŕöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Roen",
assertEquals("Roe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Śöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Soen",
assertEquals("Soe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Úöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Uoen",
assertEquals("Uoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Ýöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Yoen",
assertEquals("Yoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Źöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Zoen",
assertEquals("Zoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));
}
Expand All @@ -197,31 +197,43 @@ public void testMakeLabelAndCheckLegalKeys() throws ParseException {
public void testMakeLabelAndCheckLegalKeysAccentGrave() throws ParseException {
Optional<BibEntry> entry0 = BibtexParser.singleFromString(
"@ARTICLE{kohn, author={Andreas Àöning}, year={2000}}", importFormatPreferences, fileMonitor);
assertEquals("Aoen",
assertEquals("Aoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Èöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Eoen",
assertEquals("Eoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Ìöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Ioen",
assertEquals("Ioe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Òöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Ooen",
assertEquals("Ooe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andreas Ùöning}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("Uoen",
assertEquals("Uoe",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Oraib Al-Ketan}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("AlK",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));

entry0 = BibtexParser.singleFromString("@ARTICLE{kohn, author={Andrés D'Alessandro}, year={2000}}",
importFormatPreferences, fileMonitor);
assertEquals("DAl",
BibtexKeyGenerator.cleanKey(BibtexKeyGenerator.generateKey(entry0.get(), "auth3",
new BibDatabase()), true));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ void generateKeyTitleRegexe() {
bibtexKeyPattern.setDefaultValue("[title:regex(\" \",\"-\")]");
entry.setField("title", "Please replace the spaces");
new BibtexKeyGenerator(bibtexKeyPattern, database, preferences).generateAndSetKey(entry);
assertEquals(Optional.of("Please-Replace-the-Spaces"), entry.getCiteKeyOptional());
assertEquals(Optional.of("PleaseReplacetheSpaces"), entry.getCiteKeyOptional());
}

@Test
Expand Down