diff --git a/CHANGELOG.md b/CHANGELOG.md index d676057f908..122bea5f52a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We fixed an issue where the option "Move file to file directory" was disabled in the entry editor for all files [#7194](https://github.com/JabRef/jabref/issues/7194) - We fixed an issue where application dialogs were opening in the wrong display when using multiple screens [#7273](https://github.com/JabRef/jabref/pull/7273) - We fixed an issue where an exception would be displayed for previewing and preferences when a custom theme has been configured but is missing [#7177](https://github.com/JabRef/jabref/issues/7177) +- We fixed an issue where the regex based file search miss-interpreted specific symbols [#4342](https://github.com/JabRef/jabref/issues/4342) - We fixed an issue where the Harvard RTF exporter used the wrong default file extension. [4508](https://github.com/JabRef/jabref/issues/4508) - We fixed an issue where the Harvard RTF exporter did not use the new authors formatter and therefore did not export "organization" authors correctly. [4508](https://github.com/JabRef/jabref/issues/4508) - We fixed an issue where the field `urldate` was not exported to the corresponding fields `YearAccessed`, `MonthAccessed`, `DayAccessed` in MS Office XML [#7354](https://github.com/JabRef/jabref/issues/7354) diff --git a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java index 0e5a7d4a518..b4d4c203be1 100644 --- a/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java +++ b/src/main/java/org/jabref/logic/citationkeypattern/BracketedPattern.java @@ -187,7 +187,7 @@ public static String expandBrackets(String pattern, Character keywordDelimiter, * @param database The {@link BibDatabase} for field resolving. May be null. * @return a function accepting a bracketed expression and returning the result of expanding it */ - private static Function expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) { + public static Function expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) { return (String bracket) -> { String expandedPattern; List fieldParts = parseFieldAndModifiers(bracket); diff --git a/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java b/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java index cdb2d9a03f9..01bcbbf0ba8 100644 --- a/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java +++ b/src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java @@ -8,9 +8,9 @@ import java.nio.file.Path; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.function.BiPredicate; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -18,7 +18,6 @@ import java.util.stream.Stream; import org.jabref.logic.citationkeypattern.BracketedPattern; -import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; import org.jabref.model.strings.StringUtil; @@ -28,7 +27,6 @@ class RegExpBasedFileFinder implements FileFinder { private static final Pattern ESCAPE_PATTERN = Pattern.compile("([^\\\\])\\\\([^\\\\])"); - private static final Pattern SQUARE_BRACKETS_PATTERN = Pattern.compile("\\[.*?\\]"); private final String regExp; private final Character keywordDelimiter; @@ -41,21 +39,41 @@ class RegExpBasedFileFinder implements FileFinder { } /** - * Takes a string that contains bracketed expression and expands each of these using getFieldAndFormat. - *

- * Unknown Bracket expressions are silently dropped. + * Creates a Pattern that matches the file name corresponding to the last element of {@code fileParts} with any bracketed patterns expanded. + * + * @throws IOException throws an IOException if a PatternSyntaxException occurs */ - public static String expandBrackets(String bracketString, BibEntry entry, BibDatabase database, - Character keywordDelimiter) { - Matcher matcher = SQUARE_BRACKETS_PATTERN.matcher(bracketString); - StringBuilder expandedStringBuffer = new StringBuilder(); - while (matcher.find()) { - String replacement = BracketedPattern.expandBrackets(matcher.group(), keywordDelimiter, entry, database); - matcher.appendReplacement(expandedStringBuffer, replacement); + private Pattern createFileNamePattern(String[] fileParts, String extensionRegExp, BibEntry entry) throws IOException { + // Protect the extension marker so that it isn't treated as a bracketed pattern + String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER); + + // We need to supply a custom function to deal with the content of a bracketed expression and expandBracketContent is the default function + Function expandBracket = BracketedPattern.expandBracketContent(keywordDelimiter, entry, null); + // but, we want to post-process the expanded content so that it can be used as a regex for finding a file name + Function bracketToFileNameRegex = expandBracket.andThen(RegExpBasedFileFinder::toFileNameRegex); + + String expandedBracketAsFileNameRegex = BracketedPattern.expandBrackets(filePart, bracketToFileNameRegex); + + String fileNamePattern = expandedBracketAsFileNameRegex + .replaceAll(EXT_MARKER, extensionRegExp) // Replace the extension marker + .replaceAll("\\\\\\\\", "\\\\"); + try { + return Pattern.compile('^' + fileNamePattern + '$', Pattern.CASE_INSENSITIVE); + } catch (PatternSyntaxException e) { + throw new IOException(String.format("There is a syntax error in the regular expression %s used to search for files", fileNamePattern), e); } - matcher.appendTail(expandedStringBuffer); + } - return expandedStringBuffer.toString(); + /** + * Helper method for both exact matching (if the file name were not created by JabRef) and cleaned file name matching. + * + * @param expandedContent the expanded content of a bracketed expression + * @return a String representation of a regex matching the expanded content and the expanded content cleaned for file name use + */ + private static String toFileNameRegex(String expandedContent) { + var cleanedContent = FileNameCleaner.cleanFileName(expandedContent); + return expandedContent.equals(cleanedContent) ? Pattern.quote(expandedContent) : + "(" + Pattern.quote(expandedContent) + ")|(" + Pattern.quote(cleanedContent) + ")"; } /** @@ -142,9 +160,7 @@ private List findFile(final BibEntry entry, final Path directory, final St } for (int index = 0; index < (fileParts.length - 1); index++) { - String dirToProcess = fileParts[index]; - dirToProcess = expandBrackets(dirToProcess, entry, null, keywordDelimiter); if (dirToProcess.matches("^.:$")) { // Windows Drive Letter actualDirectory = Path.of(dirToProcess + '/'); @@ -179,33 +195,21 @@ private List findFile(final BibEntry entry, final Path directory, final St resultFiles.addAll(findFile(entry, path, restOfFileString, extensionRegExp)); } } catch (UncheckedIOException ioe) { - throw new IOException(ioe); + throw ioe.getCause(); } } // End process directory information } // Last step: check if the given file can be found in this directory - String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER); - String filenameToLookFor = expandBrackets(filePart, entry, null, keywordDelimiter).replaceAll(EXT_MARKER, extensionRegExp); - - try { - final Pattern toMatch = Pattern.compile('^' + filenameToLookFor.replaceAll("\\\\\\\\", "\\\\") + '$', - Pattern.CASE_INSENSITIVE); - BiPredicate matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches(); - resultFiles.addAll(collectFilesWithMatcher(actualDirectory, matcher)); - } catch (UncheckedIOException | PatternSyntaxException e) { - throw new IOException("Could not look for " + filenameToLookFor, e); - } - - return resultFiles; - } - - private List collectFilesWithMatcher(Path actualDirectory, BiPredicate matcher) { + Pattern toMatch = createFileNamePattern(fileParts, extensionRegExp, entry); + BiPredicate matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches(); try (Stream pathStream = Files.find(actualDirectory, 1, matcher, FileVisitOption.FOLLOW_LINKS)) { - return pathStream.collect(Collectors.toList()); - } catch (UncheckedIOException | IOException ioe) { - return Collections.emptyList(); + resultFiles.addAll(pathStream.collect(Collectors.toList())); + } catch (UncheckedIOException uncheckedIOException) { + // Previously, an empty list were returned here on both IOException and UncheckedIOException + throw uncheckedIOException.getCause(); } + return resultFiles; } private boolean isSubDirectory(Path rootDirectory, Path path) { diff --git a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java index bbc0e6eb3ea..16b0ebe59f8 100644 --- a/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java +++ b/src/test/java/org/jabref/logic/citationkeypattern/BracketedPatternTest.java @@ -324,4 +324,44 @@ void expandBracketsLastNameWithChineseCharacters() { assertEquals("杨秀群", BracketedPattern.expandBrackets("[auth]", null, bibEntry, null)); } + + @Test + void expandBracketsWithTestCasesFromRegExpBasedFileFinder() { + BibEntry entry = new BibEntry(StandardEntryType.Article).withCitationKey("HipKro03"); + entry.setField(StandardField.AUTHOR, "Eric von Hippel and Georg von Krogh"); + entry.setField(StandardField.TITLE, "Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science"); + entry.setField(StandardField.JOURNAL, "Organization Science"); + entry.setField(StandardField.YEAR, "2003"); + entry.setField(StandardField.VOLUME, "14"); + entry.setField(StandardField.PAGES, "209--223"); + entry.setField(StandardField.NUMBER, "2"); + entry.setField(StandardField.ADDRESS, "Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA"); + entry.setField(StandardField.DOI, "http://dx.doi.org/10.1287/orsc.14.2.209.14992"); + entry.setField(StandardField.ISSN, "1526-5455"); + entry.setField(StandardField.PUBLISHER, "INFORMS"); + + BibDatabase database = new BibDatabase(); + database.insertEntry(entry); + + assertEquals("", BracketedPattern.expandBrackets("", ',', entry, database)); + + assertEquals("dropped", BracketedPattern.expandBrackets("drop[unknownkey]ped", ',', entry, database)); + + assertEquals("Eric von Hippel and Georg von Krogh", + BracketedPattern.expandBrackets("[author]", ',', entry, database)); + + assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", + BracketedPattern.expandBrackets("[author] are two famous authors.", ',', entry, database)); + + assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", + BracketedPattern.expandBrackets("[author] are two famous authors.", ',', entry, database)); + + assertEquals( + "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science in Organization Science.", + BracketedPattern.expandBrackets("[author] have published [fulltitle] in [journal].", ',', entry, database)); + + assertEquals( + "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private Collective\" Innovation Model: Issues for Organization Science in Organization Science.", + BracketedPattern.expandBrackets("[author] have published [title] in [journal].", ',', entry, database)); + } } diff --git a/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java b/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java index 0cc53fdbc37..0c776e52fdd 100644 --- a/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java +++ b/src/test/java/org/jabref/logic/util/io/RegExpBasedFileFinderTests.java @@ -5,26 +5,33 @@ import java.util.Collections; import java.util.List; -import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.types.StandardEntryType; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; class RegExpBasedFileFinderTests { - - private static final String FILES_DIRECTORY = "src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder"; - private BibDatabase database; + private static final List PDF_EXTENSION = Collections.singletonList("pdf"); + private static final List FILE_NAMES = List.of( + "ACM_IEEE-CS.pdf", + "pdfInDatabase.pdf", + "Regexp from [A-Z].pdf", + "directory/subdirectory/2003_Hippel_209.pdf", + "directory/subdirectory/2017_Gražulis_726.pdf", + "directory/subdirectory/pdfInSubdirectory.pdf", + "directory/subdirectory/GUO ea - INORG CHEM COMMUN 2010 - Ferroelectric Metal Organic Framework (MOF).pdf" + ); + private Path directory; private BibEntry entry; @BeforeEach - void setUp() { - + void setUp(@TempDir Path tempDir) throws Exception { entry = new BibEntry(); entry.setType(StandardEntryType.Article); entry.setCitationKey("HipKro03"); @@ -40,69 +47,98 @@ void setUp() { entry.setField(StandardField.ISSN, "1526-5455"); entry.setField(StandardField.PUBLISHER, "INFORMS"); - database = new BibDatabase(); - database.insertEntry(entry); + // Create default directories and files + directory = tempDir; + Files.createDirectories(directory.resolve("directory/subdirectory")); + for (String fileName : FILE_NAMES) { + Files.createFile(directory.resolve(fileName)); + } } @Test void testFindFiles() throws Exception { // given - BibEntry localEntry = new BibEntry(StandardEntryType.Article); - localEntry.setCitationKey("pdfInDatabase"); - localEntry.setField(StandardField.YEAR, "2001"); - - List extensions = Collections.singletonList("pdf"); + BibEntry localEntry = new BibEntry(StandardEntryType.Article).withCitationKey("pdfInDatabase"); - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[citationkey].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("pdfInDatabase.pdf")); // then - assertEquals(Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/pdfInDatabase.pdf")), - result); + assertEquals(expected, result); } @Test void testYearAuthFirstPageFindFiles() throws Exception { // given - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[year]_[auth]_[firstpage].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(entry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(entry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("directory/subdirectory/2003_Hippel_209.pdf")); // then - assertEquals(Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2003_Hippel_209.pdf")), - result); + assertEquals(expected, result); + } + + @Test + void findAssociatedFilesFindFileContainingBracketsFromBracketedExpression() throws Exception { + var bibEntry = new BibEntry().withField(StandardField.TITLE, "Regexp from [A-Z]"); + + RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("[TITLE]\\\\.[extension]", ','); + + List result = fileFinder.findAssociatedFiles(bibEntry, List.of(directory), PDF_EXTENSION); + List pdfFile = List.of(directory.resolve("Regexp from [A-Z].pdf")); + + assertEquals(pdfFile, result); + } + + @Test + void findAssociatedFilesFindCleanedFileFromBracketedExpression() throws Exception { + var bibEntry = new BibEntry().withField(StandardField.JOURNAL, "ACM/IEEE-CS"); + + RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("[JOURNAL]\\\\.[extension]", ','); + + List result = fileFinder.findAssociatedFiles(bibEntry, List.of(directory), PDF_EXTENSION); + List pdfFile = List.of(directory.resolve("ACM_IEEE-CS.pdf")); + + assertEquals(pdfFile, result); + } + + @Test + void findAssociatedFilesFindFileContainingParenthesizesFromBracketedExpression() throws Exception { + var bibEntry = new BibEntry().withCitationKey("Guo_ICC_2010") + .withField(StandardField.TITLE, "Ferroelectric Metal Organic Framework (MOF)") + .withField(StandardField.AUTHOR, "Guo, M. and Cai, H.-L. and Xiong, R.-G.") + .withField(StandardField.JOURNAL, "Inorganic Chemistry Communications") + .withField(StandardField.YEAR, "2010"); + + RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/.*[TITLE].*\\\\.[extension]", ','); + + List result = fileFinder.findAssociatedFiles(bibEntry, List.of(directory), PDF_EXTENSION); + List pdfFile = List.of(directory.resolve("directory/subdirectory/GUO ea - INORG CHEM COMMUN 2010 - Ferroelectric Metal Organic Framework (MOF).pdf")); + + assertEquals(pdfFile, result); } @Test void testAuthorWithDiacritics() throws Exception { // given - BibEntry localEntry = new BibEntry(StandardEntryType.Article); - localEntry.setCitationKey("Grazulis2017"); + BibEntry localEntry = new BibEntry(StandardEntryType.Article).withCitationKey("Grazulis2017"); localEntry.setField(StandardField.YEAR, "2017"); localEntry.setField(StandardField.AUTHOR, "Gražulis, Saulius and O. Kitsune"); localEntry.setField(StandardField.PAGES, "726--729"); - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[year]_[auth]_[firstpage]\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); - List expected = Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2017_Gražulis_726.pdf")); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("directory/subdirectory/2017_Gražulis_726.pdf")); // then - assertEquals(expected.size(), result.size()); - for (int i = 0; i < expected.size(); i++) { - assertTrue(Files.isSameFile(expected.get(i), result.get(i))); - } + assertEquals(expected, result); } @Test @@ -112,17 +148,14 @@ void testFindFileInSubdirectory() throws Exception { localEntry.setCitationKey("pdfInSubdirectory"); localEntry.setField(StandardField.YEAR, "2017"); - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("**/[citationkey].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); + List expected = List.of(directory.resolve("directory/subdirectory/pdfInSubdirectory.pdf")); // then - assertEquals(Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf")), - result); + assertEquals(expected, result); } @Test @@ -132,45 +165,12 @@ void testFindFileNonRecursive() throws Exception { localEntry.setCitationKey("pdfInSubdirectory"); localEntry.setField(StandardField.YEAR, "2017"); - List extensions = Collections.singletonList("pdf"); - - List dirs = Collections.singletonList(Path.of(FILES_DIRECTORY)); RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("*/[citationkey].*\\\\.[extension]", ','); // when - List result = fileFinder.findAssociatedFiles(localEntry, dirs, extensions); + List result = fileFinder.findAssociatedFiles(localEntry, List.of(directory), PDF_EXTENSION); // then assertTrue(result.isEmpty()); } - - @Test - void testExpandBrackets() { - - assertEquals("", RegExpBasedFileFinder.expandBrackets("", entry, database, ',')); - - assertEquals("dropped", RegExpBasedFileFinder.expandBrackets("drop[unknownkey]ped", entry, database, - ',')); - - assertEquals("Eric von Hippel and Georg von Krogh", - RegExpBasedFileFinder.expandBrackets("[author]", entry, database, ',')); - - assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", - RegExpBasedFileFinder.expandBrackets("[author] are two famous authors.", entry, database, - ',')); - - assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.", - RegExpBasedFileFinder.expandBrackets("[author] are two famous authors.", entry, database, - ',')); - - assertEquals( - "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science in Organization Science.", - RegExpBasedFileFinder.expandBrackets("[author] have published [fulltitle] in [journal].", entry, database, - ',')); - - assertEquals( - "Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private Collective\" Innovation Model: Issues for Organization Science in Organization Science.", - RegExpBasedFileFinder.expandBrackets("[author] have published [title] in [journal].", entry, database, - ',')); - } } diff --git a/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2003_Hippel_209.pdf b/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2003_Hippel_209.pdf deleted file mode 100644 index e69de29bb2d..00000000000 diff --git "a/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2017_Gra\305\276ulis_726.pdf" "b/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/2017_Gra\305\276ulis_726.pdf" deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf b/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf deleted file mode 100644 index 3ac0b7d0dd9..00000000000 Binary files a/src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/directory/subdirectory/pdfInSubdirectory.pdf and /dev/null differ