Skip to content

Commit

Permalink
Fix expansion of bracketed expressions in RegExpBasedFileFinder (#7338)
Browse files Browse the repository at this point in the history
  • Loading branch information
k3KAW8Pnf7mkmdSMPHz27 authored Jan 24, 2021
1 parent 86d52cd commit 7672b2d
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 112 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where the "Find unlinked files" dialog would freeze JabRef on importing. [#7205](https://github.com/JabRef/jabref/issues/7205)
- We fixed an issue where the "Find unlinked files" would stop importing when importing a single file failed. [#7206](https://github.com/JabRef/jabref/issues/7206)
- We fixed an issue where an exception would be displayed for previewing and preferences when a custom theme has been configured but is missing [#7177](https://github.com/JabRef/jabref/issues/7177)
- We fixed an issue where the regex based file search miss-interpreted specific symbols [#4342](https://github.com/JabRef/jabref/issues/4342)
- We fixed an issue where the Harvard RTF exporter used the wrong default file extension. [4508](https://github.com/JabRef/jabref/issues/4508)
- We fixed an issue where the Harvard RTF exporter did not use the new authors formatter and therefore did not export "organization" authors correctly. [4508](https://github.com/JabRef/jabref/issues/4508)
- We fixed an issue where the field `urldate` was not exported to the corresponding fields `YearAccessed`, `MonthAccessed`, `DayAccessed` in MS Office XML [#7354](https://github.com/JabRef/jabref/issues/7354)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public static String expandBrackets(String pattern, Character keywordDelimiter,
* @param database The {@link BibDatabase} for field resolving. May be null.
* @return a function accepting a bracketed expression and returning the result of expanding it
*/
private static Function<String, String> expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) {
public static Function<String, String> expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) {
return (String bracket) -> {
String expandedPattern;
List<String> fieldParts = parseFieldAndModifiers(bracket);
Expand Down
78 changes: 41 additions & 37 deletions src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.jabref.logic.citationkeypattern.BracketedPattern;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.strings.StringUtil;

Expand All @@ -28,7 +27,6 @@ class RegExpBasedFileFinder implements FileFinder {

private static final Pattern ESCAPE_PATTERN = Pattern.compile("([^\\\\])\\\\([^\\\\])");

private static final Pattern SQUARE_BRACKETS_PATTERN = Pattern.compile("\\[.*?\\]");
private final String regExp;
private final Character keywordDelimiter;

Expand All @@ -41,21 +39,41 @@ class RegExpBasedFileFinder implements FileFinder {
}

/**
* Takes a string that contains bracketed expression and expands each of these using getFieldAndFormat.
* <p>
* Unknown Bracket expressions are silently dropped.
* Creates a Pattern that matches the file name corresponding to the last element of {@code fileParts} with any bracketed patterns expanded.
*
* @throws IOException throws an IOException if a PatternSyntaxException occurs
*/
public static String expandBrackets(String bracketString, BibEntry entry, BibDatabase database,
Character keywordDelimiter) {
Matcher matcher = SQUARE_BRACKETS_PATTERN.matcher(bracketString);
StringBuilder expandedStringBuffer = new StringBuilder();
while (matcher.find()) {
String replacement = BracketedPattern.expandBrackets(matcher.group(), keywordDelimiter, entry, database);
matcher.appendReplacement(expandedStringBuffer, replacement);
private Pattern createFileNamePattern(String[] fileParts, String extensionRegExp, BibEntry entry) throws IOException {
// Protect the extension marker so that it isn't treated as a bracketed pattern
String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER);

// We need to supply a custom function to deal with the content of a bracketed expression and expandBracketContent is the default function
Function<String, String> expandBracket = BracketedPattern.expandBracketContent(keywordDelimiter, entry, null);
// but, we want to post-process the expanded content so that it can be used as a regex for finding a file name
Function<String, String> bracketToFileNameRegex = expandBracket.andThen(RegExpBasedFileFinder::toFileNameRegex);

String expandedBracketAsFileNameRegex = BracketedPattern.expandBrackets(filePart, bracketToFileNameRegex);

String fileNamePattern = expandedBracketAsFileNameRegex
.replaceAll(EXT_MARKER, extensionRegExp) // Replace the extension marker
.replaceAll("\\\\\\\\", "\\\\");
try {
return Pattern.compile('^' + fileNamePattern + '$', Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new IOException(String.format("There is a syntax error in the regular expression %s used to search for files", fileNamePattern), e);
}
matcher.appendTail(expandedStringBuffer);
}

return expandedStringBuffer.toString();
/**
* Helper method for both exact matching (if the file name were not created by JabRef) and cleaned file name matching.
*
* @param expandedContent the expanded content of a bracketed expression
* @return a String representation of a regex matching the expanded content and the expanded content cleaned for file name use
*/
private static String toFileNameRegex(String expandedContent) {
var cleanedContent = FileNameCleaner.cleanFileName(expandedContent);
return expandedContent.equals(cleanedContent) ? Pattern.quote(expandedContent) :
"(" + Pattern.quote(expandedContent) + ")|(" + Pattern.quote(cleanedContent) + ")";
}

/**
Expand Down Expand Up @@ -142,9 +160,7 @@ private List<Path> findFile(final BibEntry entry, final Path directory, final St
}

for (int index = 0; index < (fileParts.length - 1); index++) {

String dirToProcess = fileParts[index];
dirToProcess = expandBrackets(dirToProcess, entry, null, keywordDelimiter);

if (dirToProcess.matches("^.:$")) { // Windows Drive Letter
actualDirectory = Path.of(dirToProcess + '/');
Expand Down Expand Up @@ -179,33 +195,21 @@ private List<Path> findFile(final BibEntry entry, final Path directory, final St
resultFiles.addAll(findFile(entry, path, restOfFileString, extensionRegExp));
}
} catch (UncheckedIOException ioe) {
throw new IOException(ioe);
throw ioe.getCause();
}
} // End process directory information
}

// Last step: check if the given file can be found in this directory
String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER);
String filenameToLookFor = expandBrackets(filePart, entry, null, keywordDelimiter).replaceAll(EXT_MARKER, extensionRegExp);

try {
final Pattern toMatch = Pattern.compile('^' + filenameToLookFor.replaceAll("\\\\\\\\", "\\\\") + '$',
Pattern.CASE_INSENSITIVE);
BiPredicate<Path, BasicFileAttributes> matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches();
resultFiles.addAll(collectFilesWithMatcher(actualDirectory, matcher));
} catch (UncheckedIOException | PatternSyntaxException e) {
throw new IOException("Could not look for " + filenameToLookFor, e);
}

return resultFiles;
}

private List<Path> collectFilesWithMatcher(Path actualDirectory, BiPredicate<Path, BasicFileAttributes> matcher) {
Pattern toMatch = createFileNamePattern(fileParts, extensionRegExp, entry);
BiPredicate<Path, BasicFileAttributes> matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches();
try (Stream<Path> pathStream = Files.find(actualDirectory, 1, matcher, FileVisitOption.FOLLOW_LINKS)) {
return pathStream.collect(Collectors.toList());
} catch (UncheckedIOException | IOException ioe) {
return Collections.emptyList();
resultFiles.addAll(pathStream.collect(Collectors.toList()));
} catch (UncheckedIOException uncheckedIOException) {
// Previously, an empty list were returned here on both IOException and UncheckedIOException
throw uncheckedIOException.getCause();
}
return resultFiles;
}

private boolean isSubDirectory(Path rootDirectory, Path path) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,4 +324,44 @@ void expandBracketsLastNameWithChineseCharacters() {

assertEquals("杨秀群", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null));
}

@Test
void expandBracketsWithTestCasesFromRegExpBasedFileFinder() {
BibEntry entry = new BibEntry(StandardEntryType.Article).withCitationKey("HipKro03");
entry.setField(StandardField.AUTHOR, "Eric von Hippel and Georg von Krogh");
entry.setField(StandardField.TITLE, "Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science");
entry.setField(StandardField.JOURNAL, "Organization Science");
entry.setField(StandardField.YEAR, "2003");
entry.setField(StandardField.VOLUME, "14");
entry.setField(StandardField.PAGES, "209--223");
entry.setField(StandardField.NUMBER, "2");
entry.setField(StandardField.ADDRESS, "Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA");
entry.setField(StandardField.DOI, "http://dx.doi.org/10.1287/orsc.14.2.209.14992");
entry.setField(StandardField.ISSN, "1526-5455");
entry.setField(StandardField.PUBLISHER, "INFORMS");

BibDatabase database = new BibDatabase();
database.insertEntry(entry);

assertEquals("", BracketedPattern.expandBrackets("", ',', entry, database));

assertEquals("dropped", BracketedPattern.expandBrackets("drop[unknownkey]ped", ',', entry, database));

assertEquals("Eric von Hippel and Georg von Krogh",
BracketedPattern.expandBrackets("[author]", ',', entry, database));

assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.",
BracketedPattern.expandBrackets("[author] are two famous authors.", ',', entry, database));

assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.",
BracketedPattern.expandBrackets("[author] are two famous authors.", ',', entry, database));

assertEquals(
"Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science in Organization Science.",
BracketedPattern.expandBrackets("[author] have published [fulltitle] in [journal].", ',', entry, database));

assertEquals(
"Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private Collective\" Innovation Model: Issues for Organization Science in Organization Science.",
BracketedPattern.expandBrackets("[author] have published [title] in [journal].", ',', entry, database));
}
}
Loading

0 comments on commit 7672b2d

Please sign in to comment.