Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing of save actions #9122

Merged
merged 12 commits into from
Sep 3, 2022
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed some visual glitches with the linked files editor field in the entry editor and increased its height. [#8823](https://github.com/JabRef/jabref/issues/8823)
- We fixed several bugs regarding the manual and the autosave of library files that sometimes lead to exceptions or data loss. [#9067](https://github.com/JabRef/jabref/pull/9067), [#8448](https://github.com/JabRef/jabref/issues/8484), [#8746](https://github.com/JabRef/jabref/issues/8746), [#6684](https://github.com/JabRef/jabref/issues/6684), [#6644](https://github.com/JabRef/jabref/issues/6644), [#6102](https://github.com/JabRef/jabref/issues/6102), [#6002](https://github.com/JabRef/jabref/issues/6000)
- We fixed an issue where applied save actions on saving the library file would lead to the dialog "The libary has been modified by another program" popping up [#4877](https://github.com/JabRef/jabref/issues/4877)
- We fixed an issue where title case didn't capitalize words after en-dash characters [#9068]
- We fixed an issue where JabRef would not exit when a connection to a LibreOffice document was established previously and the document is still open [#9075](https://github.com/JabRef/jabref/issues/9075)
- We fixed issues with save actions not correctly loaded when opening the library. [#9122](https://github.com/JabRef/jabref/pull/9122)
- We fixed an issue where title case didn't capitalize words after en-dash characters. [#9068](https://github.com/JabRef/jabref/pull/9068)
- We fixed an issue where JabRef would not exit when a connection to a LibreOffice document was established previously and the document is still open. [#9075](https://github.com/JabRef/jabref/issues/9075)

### Removed

Expand Down
76 changes: 35 additions & 41 deletions src/main/java/org/jabref/logic/cleanup/Cleanups.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package org.jabref.logic.cleanup;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.logic.formatter.Formatters;
import org.jabref.logic.formatter.IdentityFormatter;
Expand All @@ -14,6 +18,7 @@
import org.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter;
import org.jabref.logic.layout.format.LatexToUnicodeFormatter;
import org.jabref.logic.layout.format.ReplaceUnicodeLigaturesFormatter;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.FieldFactory;
import org.jabref.model.entry.field.InternalField;
import org.jabref.model.entry.field.StandardField;
Expand All @@ -25,6 +30,19 @@ public class Cleanups {
public static final FieldFormatterCleanups RECOMMEND_BIBTEX_ACTIONS;
public static final FieldFormatterCleanups RECOMMEND_BIBLATEX_ACTIONS;

/**
* This parses the key/list map of fields and clean up actions for the field.
*
* General format for one key/list map: <code>...[...]</code> - <code>field[formatter1,formatter2,...]</code>
* Multiple are written as <code>...[...]...[...]...[...]</code>
* <code>field1[formatter1,formatter2,...]field2[formatter3,formatter4,...]</code>
*
* The idea is that characters are field names until <code>[</code> is reached and that formatter lists are terminated by <code>]</code>
*
* Example: <code>pages[normalize_page_numbers]title[escapeAmpersands,escapeDollarSign,escapeUnderscores,latex_cleanup]</code>
*/
private static final Pattern FIELD_FORMATTER_CLEANUP_PATTERN = Pattern.compile("([^\\[]+)\\[([^\\]]+)\\]");

static {
List<FieldFormatterCleanup> defaultFormatters = new ArrayList<>();
defaultFormatters.add(new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()));
Expand Down Expand Up @@ -58,52 +76,28 @@ public static List<Formatter> getBuiltInFormatters() {
public static List<FieldFormatterCleanup> parse(String formatterString) {
if ((formatterString == null) || formatterString.isEmpty()) {
// no save actions defined in the meta data
return new ArrayList<>();
return Collections.emptyList();
}

List<FieldFormatterCleanup> actions = new ArrayList<>();
// first remove all newlines for easier parsing
String formatterStringWithoutLineBreaks = StringUtil.unifyLineBreaks(formatterString, "");

// read concrete actions
int startIndex = 0;
List<FieldFormatterCleanup> result = new ArrayList<>();

// first remove all newlines for easier parsing
String remainingString = StringUtil.unifyLineBreaks(formatterString, "");

try {
while (startIndex < formatterString.length()) {
// read the field name
int currentIndex = remainingString.indexOf('[');
String fieldKey = remainingString.substring(0, currentIndex);
int endIndex = remainingString.indexOf(']');
startIndex += endIndex + 1;

// read each formatter
int tokenIndex = remainingString.indexOf(',');
do {
boolean doBreak = false;
if ((tokenIndex == -1) || (tokenIndex > endIndex)) {
tokenIndex = remainingString.indexOf(']');
doBreak = true;
}

String formatterKey = remainingString.substring(currentIndex + 1, tokenIndex);
actions.add(new FieldFormatterCleanup(FieldFactory.parseField(fieldKey), getFormatterFromString(formatterKey)));

remainingString = remainingString.substring(tokenIndex + 1);
if (remainingString.startsWith("]") || doBreak) {
break;
}
tokenIndex = remainingString.indexOf(',');

currentIndex = -1;
} while (true);
}
} catch (StringIndexOutOfBoundsException ignore) {
// if this exception occurs, the remaining part of the save actions string is invalid.
// Thus we stop parsing and take what we have parsed until now
return actions;
Matcher matcher = FIELD_FORMATTER_CLEANUP_PATTERN.matcher(formatterStringWithoutLineBreaks);
while (matcher.find()) {
String fieldKey = matcher.group(1);
Field field = FieldFactory.parseField(fieldKey);

String fieldString = matcher.group(2);

List<FieldFormatterCleanup> fieldFormatterCleanups = Arrays.stream(fieldString.split(","))
.map(formatterKey -> getFormatterFromString(formatterKey))
.map(formatter -> new FieldFormatterCleanup(field, formatter))
.toList();
result.addAll(fieldFormatterCleanups);
}
return actions;
return result;
}

public static FieldFormatterCleanups parse(List<String> formatterMetaList) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public FieldFormatterCleanups(boolean enabled, List<FieldFormatterCleanup> actio
this.actions = Objects.requireNonNull(actions);
}

/**
* Note: String parsing is done at {@link Cleanups#parse(String)}
*/
private static String getMetaDataString(List<FieldFormatterCleanup> actionList, String newLineSeparator) {
// first, group all formatters by the field for which they apply
Map<Field, List<String>> groupedByField = new TreeMap<>(Comparator.comparing(Field::getName));
Expand All @@ -52,7 +55,7 @@ private static String getMetaDataString(List<FieldFormatterCleanup> actionList,

StringJoiner joiner = new StringJoiner(",", "[", "]" + newLineSeparator);
entry.getValue().forEach(joiner::add);
result.append(joiner.toString());
result.append(joiner);
}

return result.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,14 +288,13 @@ private void parseAndAddEntry(String type) {
}

private void parseJabRefComment(Map<String, String> meta) {
StringBuilder buffer = null;
StringBuilder buffer;
try {
buffer = parseBracketedTextExactly();
} catch (IOException e) {
/* if we get an IO Exception here, than we have an unbracketed comment,
* which means that we should just return and the comment will be picked up as arbitrary text
* by the parser
*/
// if we get an IO Exception here, then we have an unbracketed comment,
// which means that we should just return and the comment will be picked up as arbitrary text
// by the parser
LOGGER.info("Found unbracketed comment");
return;
}
Expand Down
183 changes: 183 additions & 0 deletions src/test/java/org/jabref/logic/cleanup/CleanupsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
package org.jabref.logic.cleanup;

import java.util.ArrayList;
import java.util.List;

import org.jabref.logic.formatter.bibtexfields.EscapeAmpersandsFormatter;
import org.jabref.logic.formatter.bibtexfields.EscapeDollarSignFormatter;
import org.jabref.logic.formatter.bibtexfields.EscapeUnderscoresFormatter;
import org.jabref.logic.formatter.bibtexfields.LatexCleanupFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizeMonthFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter;
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.StandardField;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;

class CleanupsTest {

@Test
void parserKeepsSaveActions() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
month[normalize_month]
pages[normalize_page_numbers]
title[escapeAmpersands,escapeDollarSign,escapeUnderscores,latex_cleanup]
booktitle[escapeAmpersands,escapeDollarSign,escapeUnderscores,latex_cleanup]
publisher[escapeAmpersands,escapeDollarSign,escapeUnderscores,latex_cleanup]
journal[escapeAmpersands,escapeDollarSign,escapeUnderscores,latex_cleanup]
abstract[escapeAmpersands,escapeDollarSign,escapeUnderscores,latex_cleanup]
""");

List<FieldFormatterCleanup> expected = new ArrayList<>(30);
expected.add(new FieldFormatterCleanup(StandardField.MONTH, new NormalizeMonthFormatter()));
expected.add(new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()));
for (Field field : List.of(StandardField.TITLE, StandardField.BOOKTITLE, StandardField.PUBLISHER, StandardField.JOURNAL, StandardField.ABSTRACT)) {
expected.add(new FieldFormatterCleanup(field, new EscapeAmpersandsFormatter()));
expected.add(new FieldFormatterCleanup(field, new EscapeDollarSignFormatter()));
expected.add(new FieldFormatterCleanup(field, new EscapeUnderscoresFormatter()));
expected.add(new FieldFormatterCleanup(field, new LatexCleanupFormatter()));
}

assertEquals(expected, fieldFormatterCleanups);
}

@Test
void parserParsesLatexCleanupFormatter() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
title[latex_cleanup]
""");
assertEquals(
List.of(new FieldFormatterCleanup(StandardField.TITLE, new LatexCleanupFormatter())),
fieldFormatterCleanups);
}

@Test
void parserParsesTwoFormatters() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
title[escapeUnderscores,latex_cleanup]
""");
assertEquals(
List.of(
new FieldFormatterCleanup(StandardField.TITLE, new EscapeUnderscoresFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new LatexCleanupFormatter())
),
fieldFormatterCleanups);
}

@Test
void parserParsesFourFormatters() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
title[escapeAmpersands,escapeDollarSign,escapeUnderscores,latex_cleanup]
""");
assertEquals(
List.of(
new FieldFormatterCleanup(StandardField.TITLE, new EscapeAmpersandsFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new EscapeDollarSignFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new EscapeUnderscoresFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new LatexCleanupFormatter())
),
fieldFormatterCleanups);
}

@Test
void parserParsesTwoFormattersWithCommas() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
title[escapeUnderscores,latex_cleanup]
booktitle[escapeAmpersands,escapeDollarSign]
""");
assertEquals(
List.of(
new FieldFormatterCleanup(StandardField.TITLE, new EscapeUnderscoresFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new LatexCleanupFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeAmpersandsFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeDollarSignFormatter())
),
fieldFormatterCleanups);
}

@Test
void parserParsesTwoFormattersOneWithComma() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
pages[normalize_page_numbers]
booktitle[escapeAmpersands,escapeDollarSign]
""");
assertEquals(
List.of(
new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeAmpersandsFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeDollarSignFormatter())
),
fieldFormatterCleanups);
}

@Test
void parserParsesThreeFormattersTwoWithComma() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
pages[normalize_page_numbers]
title[escapeUnderscores,latex_cleanup]
booktitle[escapeAmpersands,escapeDollarSign]
""");
assertEquals(
List.of(
new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new EscapeUnderscoresFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new LatexCleanupFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeAmpersandsFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeDollarSignFormatter())
),
fieldFormatterCleanups);
}

@Test
void parserWithTwoAndThree() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
title[escapeAmpersands,escapeUnderscores,latex_cleanup]
booktitle[escapeAmpersands,escapeUnderscores,latex_cleanup]
""");

List<FieldFormatterCleanup> expected = new ArrayList<>(30);
for (Field field : List.of(StandardField.TITLE, StandardField.BOOKTITLE)) {
expected.add(new FieldFormatterCleanup(field, new EscapeAmpersandsFormatter()));
expected.add(new FieldFormatterCleanup(field, new EscapeUnderscoresFormatter()));
expected.add(new FieldFormatterCleanup(field, new LatexCleanupFormatter()));
}

assertEquals(expected, fieldFormatterCleanups);
}

@Test
void parserWithFourEntries() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
title[escapeUnderscores,latex_cleanup]
booktitle[escapeAmpersands,escapeUnderscores,latex_cleanup]
""");
assertEquals(
List.of(
new FieldFormatterCleanup(StandardField.TITLE, new EscapeUnderscoresFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new LatexCleanupFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeAmpersandsFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeUnderscoresFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new LatexCleanupFormatter())
),
fieldFormatterCleanups);
}

@Test
void parserTest() {
List<FieldFormatterCleanup> fieldFormatterCleanups = Cleanups.parse("""
title[escapeAmpersands,escapeUnderscores,latex_cleanup]
booktitle[escapeAmpersands,latex_cleanup]
""");
assertEquals(
List.of(
new FieldFormatterCleanup(StandardField.TITLE, new EscapeAmpersandsFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new EscapeUnderscoresFormatter()),
new FieldFormatterCleanup(StandardField.TITLE, new LatexCleanupFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new EscapeAmpersandsFormatter()),
new FieldFormatterCleanup(StandardField.BOOKTITLE, new LatexCleanupFormatter())
),
fieldFormatterCleanups);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,16 @@ public class FieldFormatterCleanupsTest {

@BeforeEach
public void setUp() {
entry = new BibEntry();
entry.setType(StandardEntryType.InProceedings);
entry.setCitationKey("6055279");
entry.setField(StandardField.TITLE, "Educational session 1");
entry.setField(StandardField.BOOKTITLE, "Custom Integrated Circuits Conference (CICC), 2011 IEEE");
entry.setField(StandardField.YEAR, "2011");
entry.setField(StandardField.MONTH, "Sept.");
entry.setField(StandardField.PAGES, "1-7");
entry.setField(StandardField.ABSTRACT, "Start of the above-titled section of the conference proceedings record.");
entry.setField(StandardField.DOI, "10.1109/CICC.2011.6055279");
entry.setField(StandardField.ISSN, "0886-5930");
entry = new BibEntry(StandardEntryType.InProceedings)
.withCitationKey("6055279")
.withField(StandardField.TITLE, "Educational session 1")
.withField(StandardField.BOOKTITLE, "Custom Integrated Circuits Conference (CICC), 2011 IEEE")
.withField(StandardField.YEAR, "2011")
.withField(StandardField.MONTH, "Sept.")
.withField(StandardField.PAGES, "1-7")
.withField(StandardField.ABSTRACT, "Start of the above-titled section of the conference proceedings record.")
.withField(StandardField.DOI, "10.1109/CICC.2011.6055279")
.withField(StandardField.ISSN, "0886-5930");
}

@Test
Expand Down
Loading