Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parsing of backslashes at the end of field content #9677

Merged
merged 6 commits into from
Mar 18, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
### Changed

- 'Get full text' now also checks the file url. [#568](https://github.com/koppor/jabref/issues/568)
- `log.txt` now contains an entry if a BibTeX entry could not be parsed.
- We refined the 'main directory not found' error message. [#9625](https://github.com/JabRef/jabref/pull/9625)
- We modified the `Add Group` dialog to use the most recently selected group hierarchical context [#9141](https://github.com/JabRef/jabref/issues/9141)

Expand All @@ -37,6 +38,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where the command line export using `--exportMatches` flag does not create an output bib file [#9581](https://github.com/JabRef/jabref/issues/9581)
- We fixed an issue where custom field in the custom entry types could not be set to mulitline [#9609](https://github.com/JabRef/jabref/issues/9609)
- We fixed an issue where the Office XML exporter did not resolve BibTeX-Strings when exporting entries [forum#3741](https://discourse.jabref.org/t/exporting-bibtex-constant-strings-to-ms-office-2007-xml/3741)
- JabRef is now more relaxed when parsing field content: In case a field content ended with `\`, the combination `\}` was treated as plain `}`. [#9668](https://github.com/JabRef/jabref/issues/9668)


### Removed
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/gui/ClipBoardManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public static String getContents() {
return result;
}

public Optional<String> getBibTeXEntriesFromClipbaord() {
public Optional<String> getBibTeXEntriesFromClipboard() {
return Optional.ofNullable(clipboard.getContent(DragAndDropDataFormats.ENTRIES)).map(String.class::cast);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ private void generateKeys(List<BibEntry> entries) {
}

public List<BibEntry> handleBibTeXData(String entries) {
BibtexParser parser = new BibtexParser(preferencesService.getImportFormatPreferences(), Globals.getFileUpdateMonitor());
BibtexParser parser = new BibtexParser(preferencesService.getImportFormatPreferences(), fileUpdateMonitor);
try {
return parser.parseEntries(new ByteArrayInputStream(entries.getBytes(StandardCharsets.UTF_8)));
} catch (ParseException ex) {
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/jabref/gui/maintable/MainTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,9 @@ private void clearAndSelectLast() {

public void paste() {
List<BibEntry> entriesToAdd;
entriesToAdd = this.clipBoardManager.getBibTeXEntriesFromClipbaord()
entriesToAdd = this.clipBoardManager.getBibTeXEntriesFromClipboard()
.map(importHandler::handleBibTeXData)
.orElseGet(this::handleNonBibteXStringData);
.orElseGet(this::handleNonBibTeXStringData);

for (BibEntry entry : entriesToAdd) {
importHandler.importEntryWithDuplicateCheck(database, entry);
Expand All @@ -334,7 +334,7 @@ public void paste() {
}
}

private List<BibEntry> handleNonBibteXStringData() {
private List<BibEntry> handleNonBibTeXStringData() {
String data = ClipBoardManager.getContents();
List<BibEntry> entries = new ArrayList<>();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,10 @@ private void parseAndAddEntry(String type) {

database.insertEntry(entry);
} catch (IOException ex) {
// Trying to make the parser more robust.
// This makes the parser more robust:
// If an exception is thrown when parsing an entry, drop the entry and try to resume parsing.

LOGGER.debug("Could not parse entry", ex);
LOGGER.warn("Could not parse entry", ex);
parserResult.addWarning(Localization.lang("Error occurred when parsing entry") + ": '" + ex.getMessage()
+ "'. " + "\n\n" + Localization.lang("JabRef skipped the entry."));
}
Expand All @@ -290,7 +290,7 @@ private void parseAndAddEntry(String type) {
private void parseJabRefComment(Map<String, String> meta) {
StringBuilder buffer;
try {
buffer = parseBracketedTextExactly();
buffer = parseBracketedFieldContent();
} catch (IOException e) {
// if we get an IO Exception here, then we have an unbracketed comment,
// which means that we should just return and the comment will be picked up as arbitrary text
Expand Down Expand Up @@ -500,6 +500,16 @@ private int peek() throws IOException {
return character;
}

private char[] peek2() throws IOException {
char character1 = (char) read();
char character2 = (char) read();
unread(character2);
unread(character1);
return new char[] {
character1, character2
};
}

private int read() throws IOException {
int character = pushbackReader.read();

Expand Down Expand Up @@ -635,7 +645,7 @@ private String parseFieldContent(Field field) throws IOException {
// Value is a string enclosed in brackets. There can be pairs
// of brackets inside a field, so we need to count the
// brackets to know when the string is finished.
StringBuilder text = parseBracketedTextExactly();
StringBuilder text = parseBracketedFieldContent();
value.append(fieldContentFormatter.format(text, field));
} else if (Character.isDigit((char) character)) { // value is a number
String number = parseTextToken();
Expand Down Expand Up @@ -668,7 +678,6 @@ private String parseTextToken() throws IOException {
int character = read();
if (character == -1) {
eof = true;

return token.toString();
}

Expand Down Expand Up @@ -886,7 +895,11 @@ private boolean isClosingBracketNext() {
}
}

private StringBuilder parseBracketedTextExactly() throws IOException {
/**
* This is called if a field in the form of <code>field = {content}</code> is parsed.
* The global variable <code>character</code> contains <code>{</code>.
*/
private StringBuilder parseBracketedFieldContent() throws IOException {
StringBuilder value = new StringBuilder();

consume('{');
Expand All @@ -898,7 +911,32 @@ private StringBuilder parseBracketedTextExactly() throws IOException {
while (true) {
character = (char) read();

boolean isClosingBracket = (character == '}') && (lastCharacter != '\\');
boolean isClosingBracket = false;
if (character == '}') {
if (lastCharacter == '\\') {
// We hit `\}`
// It could be that a user has a backslash at the end of the entry, but intended to put a file path
// We want to be relaxed at that case
// First described at https://github.com/JabRef/jabref/issues/9668
char[] peek2 = peek2();
koppor marked this conversation as resolved.
Show resolved Hide resolved
if ((peek2[0] == ',') && ((peek2[1] == OS.NEWLINE.charAt(0)) || (peek2[1] == '\n'))) {
// We hit '\}\r` or `\}\n`
// Heuristics: Unwanted escaping of }
//
// Two consequences:
//
// 1. Keep `\` as read
// This is already done
//
// 2. Treat `}` as closing bracket
isClosingBracket = true;
} else {
isClosingBracket = false;
}
} else {
isClosingBracket = true;
}
}

if (isClosingBracket && (brackets == 0)) {
return value;
Expand Down
57 changes: 57 additions & 0 deletions src/test/java/org/jabref/gui/externalfiles/ImportHandlerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.jabref.gui.externalfiles;

import java.util.List;

import javax.swing.undo.UndoManager;

import org.jabref.gui.DialogService;
import org.jabref.gui.StateManager;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ImportFormatReader;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.preferences.FilePreferences;
import org.jabref.preferences.PreferencesService;

import org.junit.jupiter.api.Test;
import org.mockito.Answers;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

class ImportHandlerTest {

@Test
void handleBibTeXData() {
ImportFormatPreferences importFormatPreferences = mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS);

PreferencesService preferencesService = mock(PreferencesService.class);
when(preferencesService.getImportFormatPreferences()).thenReturn(importFormatPreferences);
when(preferencesService.getFilePreferences()).thenReturn(mock(FilePreferences.class));

ImportHandler importHandler = new ImportHandler(
mock(BibDatabaseContext.class),
preferencesService,
new DummyFileUpdateMonitor(),
mock(UndoManager.class),
mock(StateManager.class),
mock(DialogService.class),
mock(ImportFormatReader.class));

List<BibEntry> bibEntries = importHandler.handleBibTeXData("""
@InProceedings{Wen2013,
library = {Tagungen\\2013\\KWTK45\\},
}
""");

BibEntry expected = new BibEntry(StandardEntryType.InProceedings)
.withCitationKey("Wen2013")
.withField(StandardField.LIBRARY, "Tagungen\\2013\\KWTK45\\");

assertEquals(List.of(expected), bibEntries.stream().toList());
}
}
59 changes: 57 additions & 2 deletions src/test/java/org/jabref/logic/bibtex/BibEntryWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ void writeReallyUnknownTypeTest() throws Exception {

@Test
void roundTripTest() throws IOException {
// @formatter:off
String bibtexEntry = """
@Article{test,
Author = {Foo Bar},
Expand All @@ -193,7 +192,63 @@ void roundTripTest() throws IOException {
Number = {1}
}
""".replaceAll("\n", OS.NEWLINE);
// @formatter:on

// read in bibtex string
ParserResult result = new BibtexParser(importFormatPreferences, fileMonitor).parse(new StringReader(bibtexEntry));
Collection<BibEntry> entries = result.getDatabase().getEntries();
BibEntry entry = entries.iterator().next();

// write out bibtex string
bibEntryWriter.write(entry, bibWriter, BibDatabaseMode.BIBTEX);

assertEquals(bibtexEntry, stringWriter.toString());
}

@Test
void roundTripKeepsFilePathWithBackslashes() throws IOException {
String bibtexEntry = """
@Article{,
file = {Tagungen\\2013\\KWTK45},
}
""".replaceAll("\n", OS.NEWLINE);

// read in bibtex string
ParserResult result = new BibtexParser(importFormatPreferences, fileMonitor).parse(new StringReader(bibtexEntry));
Collection<BibEntry> entries = result.getDatabase().getEntries();
BibEntry entry = entries.iterator().next();

// write out bibtex string
bibEntryWriter.write(entry, bibWriter, BibDatabaseMode.BIBTEX);

assertEquals(bibtexEntry, stringWriter.toString());
}

@Test
void roundTripKeepsEscapedCharacters() throws IOException {
String bibtexEntry = """
@Article{,
demofield = {Tagungen\\2013\\KWTK45},
}
""".replaceAll("\n", OS.NEWLINE);

// read in bibtex string
ParserResult result = new BibtexParser(importFormatPreferences, fileMonitor).parse(new StringReader(bibtexEntry));
Collection<BibEntry> entries = result.getDatabase().getEntries();
BibEntry entry = entries.iterator().next();

// write out bibtex string
bibEntryWriter.write(entry, bibWriter, BibDatabaseMode.BIBTEX);

assertEquals(bibtexEntry, stringWriter.toString());
}

@Test
void roundTripKeepsFilePathEndingWithBackslash() throws IOException {
String bibtexEntry = """
@Article{,
file = {dir\\},
}
""".replaceAll("\n", OS.NEWLINE);

// read in bibtex string
ParserResult result = new BibtexParser(importFormatPreferences, fileMonitor).parse(new StringReader(bibtexEntry));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,32 @@ void parseRecognizesAbsoluteFile() throws IOException {
assertEquals(Optional.of("D:\\Documents\\literature\\Tansel-PRL2006.pdf"), entry.getField(StandardField.FILE));
}

@Test
void parseRecognizesFinalSlashAsSlash() throws Exception {
ParserResult result = parser
.parse(new StringReader("""
@misc{,
test = {wired\\},
}
"""));

assertEquals(
List.of(new BibEntry()
.withField(new UnknownField("test"), "wired\\")),
result.getDatabase().getEntries()
);
}

/**
* JabRef's heuristics is not able to parse this special case.
*/
@Test
void parseFailsWithFinalSlashAsSlashWhenSingleLine() throws Exception {
ParserResult parserResult = parser.parse(new StringReader("@misc{, test = {wired\\}}"));
// In case JabRef was more relaxed, `assertFalse` would be provided here.
assertTrue(parserResult.hasWarnings());
}

@Test
void parseRecognizesDateFieldWithConcatenation() throws IOException {
ParserResult result = parser
Expand Down