TEX_NAMES = Set.of(
"aa", "ae", "l", "o", "oe", "i", "AA", "AE", "L", "O", "OE", "j");
+
+ private static final Pattern STARTS_WITH_CAPITAL_LETTER_DOT = Pattern.compile("^[A-Z]\\. ");
+
/**
* the raw bibtex author/editor field
*/
@@ -108,6 +114,8 @@ public AuthorList parse(@NonNull String listOfNames) {
andOthersPresent = false;
}
+ listOfNames = checkNamesCommaSeparated(listOfNames);
+
// Handle case names in order lastname, firstname and separated by ","
// E.g., Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H.
final boolean authorsContainAND = listOfNames.toUpperCase(Locale.ENGLISH).contains(" AND ");
@@ -170,6 +178,29 @@ public AuthorList parse(@NonNull String listOfNames) {
return AuthorList.of(authors);
}
+ /**
+ * Handle cases names in order Firstname Lastname, separated by ","
and a final ", and "
+ * E.g, "I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla"
+ *
+ * @return the original or patched version of listOfNames
+ */
+ private static String checkNamesCommaSeparated(String listOfNames) {
+ int commandAndPos = listOfNames.lastIndexOf(", and ");
+ if (commandAndPos >= 0) {
+ String lastContainedName = listOfNames.substring(commandAndPos + ", and ".length());
+ Matcher matcher = STARTS_WITH_CAPITAL_LETTER_DOT.matcher(lastContainedName);
+ if (matcher.find()) {
+ String namesBeforeAndString = listOfNames.substring(0, commandAndPos);
+ String[] namesBeforeAnd = namesBeforeAndString.split(", ");
+ if (Arrays.stream(namesBeforeAnd).allMatch(name -> STARTS_WITH_CAPITAL_LETTER_DOT.matcher(name).find())) {
+ // Format found
+ listOfNames = Arrays.stream(namesBeforeAnd).collect(Collectors.joining(" and ", "", " and " + lastContainedName));
+ }
+ }
+ }
+ return listOfNames;
+ }
+
/**
* Parses one author name and returns preformatted information.
*
diff --git a/src/main/java/org/jabref/logic/importer/Importer.java b/src/main/java/org/jabref/logic/importer/Importer.java
index fa4159dc404..d991b087d3b 100644
--- a/src/main/java/org/jabref/logic/importer/Importer.java
+++ b/src/main/java/org/jabref/logic/importer/Importer.java
@@ -168,22 +168,6 @@ public static BufferedReader getReader(InputStream stream) {
return new BufferedReader(reader);
}
- /**
- * Returns the name of this import format.
- *
- * The name must be unique.
- *
- * @return format name, must be unique and not null
- */
- public abstract String getName();
-
- /**
- * Returns the type of files that this importer can read
- *
- * @return {@link FileType} corresponding to the importer
- */
- public abstract FileType getFileType();
-
/**
* Returns a one-word ID which identifies this importer. Used for example, to identify the importer when used from
* the command line.
@@ -202,6 +186,15 @@ public String getId() {
return result.toString();
}
+ /**
+ * Returns the name of this import format.
+ *
+ * The name must be unique.
+ *
+ * @return format name, must be unique and not null
+ */
+ public abstract String getName();
+
/**
* Returns the description of the import format.
*
@@ -216,6 +209,13 @@ public String getId() {
*/
public abstract String getDescription();
+ /**
+ * Returns the type of files that this importer can read
+ *
+ * @return {@link FileType} corresponding to the importer
+ */
+ public abstract FileType getFileType();
+
@Override
public int hashCode() {
return getName().hashCode();
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter.java
new file mode 100644
index 00000000000..f2605594e13
--- /dev/null
+++ b/src/main/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporter.java
@@ -0,0 +1,304 @@
+package org.jabref.logic.importer.fileformat;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jabref.logic.citationkeypattern.CitationKeyGenerator;
+import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
+import org.jabref.logic.importer.Importer;
+import org.jabref.logic.importer.ParserResult;
+import org.jabref.logic.l10n.Localization;
+import org.jabref.logic.util.FileType;
+import org.jabref.logic.util.StandardFileType;
+import org.jabref.logic.xmp.EncryptedPdfsNotSupportedException;
+import org.jabref.logic.xmp.XmpUtilReader;
+import org.jabref.model.entry.AuthorList;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.Date;
+import org.jabref.model.entry.field.Field;
+import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.entry.types.StandardEntryType;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Parses the references from the "References" section from a PDF
+ *
+ * Currently, IEEE two column format is supported.
+ *
+ */
+public class BibliographyFromPdfImporter extends Importer {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(BibliographyFromPdfImporter.class);
+
+ private static final Pattern REFERENCE_PATTERN = Pattern.compile("\\[(\\d+)\\](.*?)(?=\\[|$)", Pattern.DOTALL);
+ private static final Pattern YEAR_AT_END = Pattern.compile(", (\\d{4})\\.$");
+ private static final Pattern PAGES = Pattern.compile(", pp\\. (\\d+--?\\d+)\\.?(.*)");
+ private static final Pattern PAGE = Pattern.compile(", p\\. (\\d+)(.*)");
+ private static final Pattern MONTH_RANGE_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.?)-[A-Z][a-z]{2,7}\\.? (\\d+)(.*)");
+ private static final Pattern MONTH_AND_YEAR = Pattern.compile(", ([A-Z][a-z]{2,7}\\.? \\d+),? ?(.*)");
+ private static final Pattern VOLUME = Pattern.compile(", vol\\. (\\d+)(.*)");
+ private static final Pattern NO = Pattern.compile(", no\\. (\\d+)(.*)");
+ private static final Pattern AUTHORS_AND_TITLE_AT_BEGINNING = Pattern.compile("^([^“]+), “(.*?)”, ");
+ private static final Pattern TITLE = Pattern.compile("“(.*?)”, (.*)");
+
+ private final CitationKeyPatternPreferences citationKeyPatternPreferences;
+
+ public BibliographyFromPdfImporter(CitationKeyPatternPreferences citationKeyPatternPreferences) {
+ this.citationKeyPatternPreferences = citationKeyPatternPreferences;
+ }
+
+ @Override
+ public boolean isRecognizedFormat(BufferedReader input) throws IOException {
+ return input.readLine().startsWith("%PDF");
+ }
+
+ @Override
+ public ParserResult importDatabase(BufferedReader reader) throws IOException {
+ Objects.requireNonNull(reader);
+ throw new UnsupportedOperationException("BibliopgraphyFromPdfImporter does not support importDatabase(BufferedReader reader)."
+ + "Instead use importDatabase(Path filePath).");
+ }
+
+ @Override
+ public String getName() {
+ return "Bibliography from PDF";
+ }
+
+ @Override
+ public String getDescription() {
+ return "Reads the references from the 'References' section of a PDF file.";
+ }
+
+ @Override
+ public FileType getFileType() {
+ return StandardFileType.PDF;
+ }
+
+ @Override
+ public ParserResult importDatabase(Path filePath) {
+ List result;
+
+ try (PDDocument document = new XmpUtilReader().loadWithAutomaticDecryption(filePath)) {
+ String contents = getLastPageContents(document);
+ result = getEntriesFromPDFContent(contents);
+ } catch (EncryptedPdfsNotSupportedException e) {
+ return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
+ } catch (IOException exception) {
+ return ParserResult.fromError(exception);
+ }
+
+ ParserResult parserResult = new ParserResult(result);
+
+ // Generate citation keys for result
+ CitationKeyGenerator citationKeyGenerator = new CitationKeyGenerator(parserResult.getDatabaseContext(), citationKeyPatternPreferences);
+ parserResult.getDatabase().getEntries().forEach(citationKeyGenerator::generateAndSetKey);
+
+ return parserResult;
+ }
+
+ private record IntermediateData(String number, String reference) {
+ }
+
+ /**
+ * In: "[1] ...\n...\n...[2]...\n...\n...\n[3]..."
+ * Out: List<String> = ["[1] ...", "[2]...", "[3]..."]
+ */
+ private List getEntriesFromPDFContent(String contents) {
+ List referencesStrings = new ArrayList<>();
+ Matcher matcher = REFERENCE_PATTERN.matcher(contents);
+ while (matcher.find()) {
+ String reference = matcher.group(2).replaceAll("\\r?\\n", " ").trim();
+ referencesStrings.add(new IntermediateData(matcher.group(1), reference));
+ }
+
+ return referencesStrings.stream()
+ .map(data -> parseReference(data.number(), data.reference()))
+ .toList();
+ }
+
+ private String getLastPageContents(PDDocument document) throws IOException {
+ PDFTextStripper stripper = new PDFTextStripper();
+
+ int lastPage = document.getNumberOfPages();
+ stripper.setStartPage(lastPage);
+ stripper.setEndPage(lastPage);
+ StringWriter writer = new StringWriter();
+ stripper.writeText(document, writer);
+
+ return writer.toString();
+ }
+
+ /**
+ * Example: J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a
+ *
+ * @param number The number of the reference - used for logging only
+ */
+ @VisibleForTesting
+ BibEntry parseReference(String number, String reference) {
+ String originalReference = "[" + number + "] " + reference;
+ BibEntry result = new BibEntry(StandardEntryType.Article);
+
+ reference = reference.replace(".-", "-");
+
+ // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a
+ // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979. doi:10.18429/ JACoW-IPAC2019-MOPTS051
+ int pos = reference.indexOf("doi:");
+ if (pos >= 0) {
+ String doi = reference.substring(pos + "doi:".length()).trim();
+ doi = doi.replace(" ", "");
+ result.setField(StandardField.DOI, doi);
+ reference = reference.substring(0, pos).trim();
+ }
+
+ // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017.
+ // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979
+ Matcher matcher = YEAR_AT_END.matcher(reference);
+ if (matcher.find()) {
+ result.setField(StandardField.YEAR, matcher.group(1));
+ reference = reference.substring(0, matcher.start()).trim();
+ }
+
+ reference = updateEntryAndReferenceIfMatches(reference, PAGES, result, StandardField.PAGES);
+
+ // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016
+ // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019
+ reference = updateEntryAndReferenceIfMatches(reference, PAGE, result, StandardField.PAGES);
+
+ matcher = MONTH_RANGE_AND_YEAR.matcher(reference);
+ if (matcher.find()) {
+ // strip out second month
+ reference = reference.substring(0, matcher.start()) + ", " + matcher.group(1) + " " + matcher.group(2) + matcher.group(3);
+ }
+
+ // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57
+ // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019
+ matcher = MONTH_AND_YEAR.matcher(reference);
+ if (matcher.find()) {
+ Optional parsedDate = Date.parse(matcher.group(1));
+ if (parsedDate.isPresent()) {
+ Date date = parsedDate.get();
+ date.getYear().ifPresent(year -> result.setField(StandardField.YEAR, year.toString()));
+ date.getMonth().ifPresent(month -> result.setField(StandardField.MONTH, month.getJabRefFormat()));
+
+ String prefix = reference.substring(0, matcher.start()).trim();
+ String suffix = matcher.group(2);
+ if (!suffix.isEmpty() && !".".equals(suffix)) {
+ suffix = ", " + suffix.replaceAll("^\\. ", "");
+ } else {
+ suffix = "";
+ }
+ reference = prefix + suffix;
+ }
+ }
+
+ // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57
+ // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia
+ reference = updateEntryAndReferenceIfMatches(reference, VOLUME, result, StandardField.VOLUME);
+
+ reference = updateEntryAndReferenceIfMatches(reference, NO, result, StandardField.NUMBER);
+
+ // J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion
+ // Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia
+ matcher = AUTHORS_AND_TITLE_AT_BEGINNING.matcher(reference);
+ if (matcher.find()) {
+ String authors = matcher.group(1)
+ .replace("- ", "")
+ .replaceAll("et al\\.?", "and others");
+ result.setField(StandardField.AUTHOR, AuthorList.fixAuthorFirstNameFirst(authors));
+ result.setField(StandardField.TITLE, matcher.group(2)
+ .replace("- ", "")
+ .replaceAll("et al\\.?", "and others"));
+ reference = reference.substring(matcher.end()).trim();
+ } else {
+ // No authors present
+ // Example: “AF4.1.1 SRF Linac Engineering Design Report”, Internal note.
+ reference = updateEntryAndReferenceIfMatches(reference, TITLE, result, StandardField.TITLE);
+ }
+
+ // Nucl. Fusion
+ // in Proc. IPAC’19, Mel- bourne, Australia
+ // presented at th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland
+ List stringsToRemove = List.of("presented at", "to be presented at");
+ // need to use "iterator()" instead of "stream().foreach", because "reference" is modified inside the loop
+ Iterator iterator = stringsToRemove.iterator();
+ while (iterator.hasNext()) {
+ String check = iterator.next();
+ if (reference.startsWith(check)) {
+ reference = reference.substring(check.length()).trim();
+ result.setType(StandardEntryType.InProceedings);
+ }
+ }
+
+ boolean startsWithInProc = reference.startsWith("in Proc.");
+ boolean conainsWorkshop = reference.contains("Workshop");
+ if (startsWithInProc || conainsWorkshop) {
+ int beginIndex = startsWithInProc ? 3 : 0;
+ result.setField(StandardField.BOOKTITLE, reference.substring(beginIndex).replace("- ", "").trim());
+ result.setType(StandardEntryType.InProceedings);
+ reference = "";
+ }
+
+ // Nucl. Fusion
+ reference = reference.trim()
+ .replace("- ", "")
+ .replaceAll("\\.$", "");
+ if (!reference.contains(",") && !reference.isEmpty()) {
+ if (reference.endsWith(" Note") || reference.endsWith(" note")) {
+ result.setField(StandardField.NOTE, reference);
+ result.setType(StandardEntryType.TechReport);
+ } else {
+ result.setField(StandardField.JOURNAL, reference.replace("- ", ""));
+ }
+ reference = "";
+ } else {
+ String toAdd = reference;
+ result.setType(StandardEntryType.InProceedings);
+ if (result.hasField(StandardField.BOOKTITLE)) {
+ String oldTitle = result.getField(StandardField.BOOKTITLE).get();
+ result.setField(StandardField.BOOKTITLE, oldTitle + toAdd);
+ } else {
+ result.setField(StandardField.BOOKTITLE, toAdd);
+ }
+ reference = "";
+ LOGGER.debug("InProceedings fallback used for current state of handled string {}", reference);
+ }
+
+ if (reference.isEmpty()) {
+ result.setField(StandardField.COMMENT, originalReference);
+ } else {
+ result.setField(StandardField.COMMENT, "Unprocessed: " + reference + "\n\n" + originalReference);
+ }
+ return result;
+ }
+
+ /**
+ * @param pattern A pattern matching two groups: The first one to take, the second one to leave at the end of the string
+ */
+ private static String updateEntryAndReferenceIfMatches(String reference, Pattern pattern, BibEntry result, Field field) {
+ Matcher matcher;
+ matcher = pattern.matcher(reference);
+ if (matcher.find()) {
+ result.setField(field, matcher.group(1).replace("- ", ""));
+ String suffix = matcher.group(2);
+ if (!suffix.isEmpty()) {
+ suffix = " " + suffix;
+ }
+ reference = reference.substring(0, matcher.start()).trim() + suffix;
+ }
+ return reference;
+ }
+}
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java
index 3d2e4f7aa86..40233fc288d 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java
@@ -5,6 +5,7 @@
import java.io.StringWriter;
import java.nio.file.Path;
import java.util.ArrayList;
+import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
@@ -182,16 +183,9 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {
+ "Instead use importDatabase(Path filePath, Charset defaultEncoding).");
}
- @Override
- public ParserResult importDatabase(String data) throws IOException {
- Objects.requireNonNull(data);
- throw new UnsupportedOperationException("PdfContentImporter does not support importDatabase(String data)."
- + "Instead use importDatabase(Path filePath, Charset defaultEncoding).");
- }
-
@Override
public ParserResult importDatabase(Path filePath) {
- final ArrayList result = new ArrayList<>(1);
+ List result = new ArrayList<>(1);
try (PDDocument document = new XmpUtilReader().loadWithAutomaticDecryption(filePath)) {
String firstPageContents = getFirstPageContents(document);
Optional entry = getEntryFromPDFContent(firstPageContents, OS.NEWLINE);
diff --git a/src/main/java/org/jabref/model/entry/Date.java b/src/main/java/org/jabref/model/entry/Date.java
index 8c127f6bf2f..3509b8cc304 100644
--- a/src/main/java/org/jabref/model/entry/Date.java
+++ b/src/main/java/org/jabref/model/entry/Date.java
@@ -41,10 +41,13 @@ public class Date {
"M/uu", // covers 9/15
"MMMM d, uuuu", // covers September 1, 2015
"MMMM, uuuu", // covers September, 2015
+ "MMMM uuuu", // covers September 2015
"d.M.uuuu", // covers 15.1.2015
"uuuu.M.d", // covers 2015.1.15
"uuuu", // covers 2015
"MMM, uuuu", // covers Jan, 2020
+ "MMM. uuuu", // covers Oct. 2020
+ "MMM uuuu", // covers Jan 2020
"uuuu.MM.d", // covers 2015.10.15
"d MMMM u/d MMMM u", // covers 20 January 2015/20 February 2015
"d MMMM u", // covers 20 January 2015
@@ -141,7 +144,7 @@ public static Optional parse(String dateString) {
TemporalAccessor parsedEndDate = SIMPLE_DATE_FORMATS.parse(strDates[1].strip());
return Optional.of(new Date(parsedDate, parsedEndDate));
} catch (DateTimeParseException e) {
- LOGGER.debug("Invalid Date format for range", e);
+ LOGGER.warn("Invalid Date format for range", e);
return Optional.empty();
}
} else if (dateString.matches(
@@ -162,7 +165,7 @@ public static Optional parse(String dateString) {
TemporalAccessor parsedEndDate = SIMPLE_DATE_FORMATS.parse(strDates[1].strip());
return Optional.of(new Date(parsedDate, parsedEndDate));
} catch (DateTimeParseException e) {
- LOGGER.debug("Invalid Date format range", e);
+ LOGGER.warn("Invalid Date format range", e);
return Optional.empty();
}
} else if (dateString.matches(
@@ -179,7 +182,7 @@ public static Optional parse(String dateString) {
TemporalAccessor parsedEndDate = parseDateWithEraIndicator(strDates[1]);
return Optional.of(new Date(parsedDate, parsedEndDate));
} catch (DateTimeParseException e) {
- LOGGER.debug("Invalid Date format range", e);
+ LOGGER.warn("Invalid Date format range", e);
return Optional.empty();
}
} else if (dateString.matches(
@@ -196,13 +199,13 @@ public static Optional parse(String dateString) {
TemporalAccessor parsedEndDate = parseDateWithEraIndicator(strDates[1]);
return Optional.of(new Date(parsedDate, parsedEndDate));
} catch (DateTimeParseException e) {
- LOGGER.debug("Invalid Date format range", e);
+ LOGGER.warn("Invalid Date format range", e);
return Optional.empty();
}
}
// if dateString is single year
- if (dateString.matches("\\d{4}-|" + "\\d{4}\\?")) {
+ if (dateString.matches("\\d{4}-|\\d{4}\\?")) {
try {
String year = dateString.substring(0, dateString.length() - 1);
TemporalAccessor parsedDate = SIMPLE_DATE_FORMATS.parse(year);
@@ -225,7 +228,7 @@ public static Optional parse(String dateString) {
TemporalAccessor date = parseDateWithEraIndicator(dateString);
return Optional.of(new Date(date));
} catch (DateTimeParseException e) {
- LOGGER.debug("Invalid Date format with era indicator", e);
+ LOGGER.warn("Invalid Date format with era indicator", e);
return Optional.empty();
}
}
diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties
index ce06acfe80b..d11e5c11275 100644
--- a/src/main/resources/l10n/JabRef_en.properties
+++ b/src/main/resources/l10n/JabRef_en.properties
@@ -329,8 +329,10 @@ Export\ preferences\ to\ file=Export preferences to file
Export\ to\ clipboard=Export to clipboard
Export\ to\ text\ file.=Export to text file.
-Extract\ references\ from\ file=Extract references from file
-Extract\ References=Extract References
+Extract\ references\ from\ file\ (online)=Extract references from file (online)
+Extract\ references\ from\ file\ (offline)=Extract references from file (offline)
+Extract\ References\ (online)=Extract References (online)
+Extract\ References\ (offline)=Extract References (offline)
Processing\ PDF(s)=Processing PDF(s)
Processing\ a\ large\ number\ of\ files=Processing a large number of files
You\ are\ about\ to\ process\ %0\ files.\ Continue?=You are about to process %0 files. Continue?
diff --git a/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java b/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java
index 2943fc5fec6..aa983ab6ab3 100644
--- a/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java
+++ b/src/test/java/org/jabref/logic/importer/AuthorListParserTest.java
@@ -11,6 +11,9 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
+/**
+ * Similar tests are available in {@link org.jabref.model.entry.AuthorListTest}
+ */
class AuthorListParserTest {
AuthorListParser parser = new AuthorListParser();
@@ -50,7 +53,15 @@ private static Stream parseMultipleCorrectly() {
new Author("Alexander", "A.", null, "Artemenko", null),
Author.OTHERS
),
- "Alexander Artemenko and others")
+ "Alexander Artemenko and others"),
+ Arguments.of(
+ AuthorList.of(
+ new Author("I.", "I.", null, "Podadera", null),
+ new Author("J. M.", "J. M.", null, "Carmona", null),
+ new Author("A.", "A.", null, "Ibarra", null),
+ new Author("J.", "J.", null, "Molla", null)
+ ),
+ "I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla")
);
}
diff --git a/src/test/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporterTest.java b/src/test/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporterTest.java
new file mode 100644
index 00000000000..9318d99e977
--- /dev/null
+++ b/src/test/java/org/jabref/logic/importer/fileformat/BibliographyFromPdfImporterTest.java
@@ -0,0 +1,274 @@
+package org.jabref.logic.importer.fileformat;
+
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Stream;
+
+import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
+import org.jabref.logic.citationkeypattern.GlobalCitationKeyPattern;
+import org.jabref.logic.importer.ParserResult;
+import org.jabref.model.entry.BibEntry;
+import org.jabref.model.entry.field.StandardField;
+import org.jabref.model.entry.types.StandardEntryType;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import static org.jabref.logic.citationkeypattern.CitationKeyGenerator.DEFAULT_UNWANTED_CHARACTERS;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class BibliographyFromPdfImporterTest {
+
+ private static final BibEntry KNASTER_2017 = new BibEntry(StandardEntryType.Article)
+ .withField(StandardField.AUTHOR, "J. Knaster and others")
+ .withField(StandardField.TITLE, "Overview of the IFMIF/EVEDA project")
+ .withField(StandardField.JOURNAL, "Nucl. Fusion")
+ .withField(StandardField.VOLUME, "57")
+ .withField(StandardField.PAGES, "102016")
+ .withField(StandardField.YEAR, "2017")
+ .withField(StandardField.DOI, "10.1088/1741-4326/aa6a6a")
+ .withField(StandardField.COMMENT, "[1] J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a");
+ private static final BibEntry SHIMOSAKI_2019 = new BibEntry(StandardEntryType.InProceedings)
+ .withField(StandardField.AUTHOR, "Y. Shimosaki and others")
+ .withField(StandardField.TITLE, "Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc")
+ .withField(StandardField.BOOKTITLE, "Proc. IPAC’19, Melbourne, Australia")
+ .withField(StandardField.MONTH, "#may#")
+ .withField(StandardField.YEAR, "2019")
+ .withField(StandardField.PAGES, "977-979")
+ .withField(StandardField.DOI, "10.18429/JACoW-IPAC2019-MOPTS051")
+ .withField(StandardField.COMMENT, "[3] Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979. doi:10.18429/ JACoW-IPAC2019-MOPTS051");
+ private static final BibEntry BELLAN_2021 = new BibEntry(StandardEntryType.InProceedings)
+ .withField(StandardField.AUTHOR, "L. Bellan and others")
+ .withField(StandardField.TITLE, "Acceleration of the high current deuteron beam through the IFMIF-EVEDA beam dynamics performances")
+ .withField(StandardField.BOOKTITLE, "Proc. HB’21, Batavia, IL, USA")
+ .withField(StandardField.MONTH, "#oct#")
+ .withField(StandardField.YEAR, "2021")
+ .withField(StandardField.PAGES, "197-202")
+ .withField(StandardField.DOI, "10.18429/JACoW-HB2021-WEDC2")
+ .withField(StandardField.COMMENT, "[6] L. Bellan et al., “Acceleration of the high current deuteron beam through the IFMIF-EVEDA beam dynamics perfor- mances”, in Proc. HB’21, Batavia, IL, USA, Oct. 2021, pp. 197-202. doi:10.18429/JACoW-HB2021-WEDC2");
+ private static final BibEntry MASUDA_2022 = new BibEntry(StandardEntryType.InProceedings)
+ .withField(StandardField.AUTHOR, "K. Masuda and others")
+ .withField(StandardField.TITLE, "Commissioning of IFMIF Prototype Accelerator towards CW operation")
+ .withField(StandardField.BOOKTITLE, "Proc. LINAC’22, Liverpool, UK")
+ .withField(StandardField.MONTH, "#aug#")
+ .withField(StandardField.YEAR, "2022")
+ .withField(StandardField.PAGES, "319-323")
+ .withField(StandardField.DOI, "10.18429/JACoW-LINAC2022-TU2AA04")
+ .withField(StandardField.COMMENT, "[7] K. Masuda et al., “Commissioning of IFMIF Prototype Ac- celerator towards CW operation”, in Proc. LINAC’22, Liv- erpool, UK, Aug.-Sep. 2022, pp. 319-323. doi:10.18429/ JACoW-LINAC2022-TU2AA04");
+ private static final BibEntry PODADERA_2012 = new BibEntry(StandardEntryType.InProceedings)
+ .withField(StandardField.AUTHOR, "I. Podadera and J. M. Carmona and A. Ibarra and J. Molla")
+ .withField(StandardField.TITLE, "Beam position monitor development for LIPAc")
+ .withField(StandardField.BOOKTITLE, "th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland")
+ .withField(StandardField.MONTH, "#jan#")
+ .withField(StandardField.YEAR, "2012")
+ .withField(StandardField.COMMENT, "[11] I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla, “Beam position monitor development for LIPAc”, presented at th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland, Jan. 2012.");
+ private static final BibEntry AKAGI_2023 = new BibEntry(StandardEntryType.InProceedings)
+ .withField(StandardField.AUTHOR, "T. Akagi and others")
+ .withField(StandardField.TITLE, "Achievement of high-current continuouswave deuteron injector for Linear IFMIF Prototype Accelerator (LIPAc)")
+ .withField(StandardField.BOOKTITLE, "IAEA FEC’23, London, UK, https://www.iaea.org/events/fec2023")
+ .withField(StandardField.MONTH, "#oct#")
+ .withField(StandardField.YEAR, "2023")
+ .withField(StandardField.COMMENT, "[15] T. Akagi et al., “Achievement of high-current continuous- wave deuteron injector for Linear IFMIF Prototype Accelera- tor (LIPAc)”, to be presented at IAEA FEC’23, London, UK, Oct. 2023. https://www.iaea.org/events/fec2023");
+ private static final BibEntry INTERNAL_NOTE = new BibEntry(StandardEntryType.TechReport)
+ .withField(StandardField.TITLE, "AF4.1.1 SRF Linac Engineering Design Report")
+ .withField(StandardField.NOTE, "Internal note")
+ .withField(StandardField.COMMENT, "[16] “AF4.1.1 SRF Linac Engineering Design Report”, Internal note.");
+ private static final BibEntry KWON_2023 = new BibEntry(StandardEntryType.InProceedings)
+ .withField(StandardField.AUTHOR, "S. Kwon and others")
+ .withField(StandardField.TITLE, "High beam current operation with beam di-agnostics at LIPAc")
+ .withField(StandardField.BOOKTITLE, "HB’23, Geneva, Switzerland, paper FRC1I2, this conference")
+ .withField(StandardField.MONTH, "#oct#")
+ .withField(StandardField.YEAR, "2023")
+ .withField(StandardField.COMMENT, "[14] S. Kwon et al., “High beam current operation with beam di-agnostics at LIPAc”, presented at HB’23, Geneva, Switzer- land, Oct. 2023, paper FRC1I2, this conference.");
+ private BibliographyFromPdfImporter bibliographyFromPdfImporter;
+
+ @BeforeEach
+ void setup() {
+ GlobalCitationKeyPattern globalCitationKeyPattern = GlobalCitationKeyPattern.fromPattern("[auth][year]");
+ CitationKeyPatternPreferences citationKeyPatternPreferences = new CitationKeyPatternPreferences(
+ false,
+ false,
+ false,
+ CitationKeyPatternPreferences.KeySuffix.SECOND_WITH_A,
+ "",
+ "",
+ DEFAULT_UNWANTED_CHARACTERS,
+ globalCitationKeyPattern,
+ "",
+ ',');
+ bibliographyFromPdfImporter = new BibliographyFromPdfImporter(citationKeyPatternPreferences);
+ }
+
+ @Test
+ void tua3i2refpage() throws Exception {
+ Path file = Path.of(BibliographyFromPdfImporterTest.class.getResource("tua3i2refpage.pdf").toURI());
+ ParserResult parserResult = bibliographyFromPdfImporter.importDatabase(file);
+ BibEntry entry02 = new BibEntry(StandardEntryType.Article)
+ .withCitationKey("Kondo2020")
+ .withField(StandardField.AUTHOR, "K. Kondo and others")
+ .withField(StandardField.TITLE, "Validation of the Linear IFMIF Prototype Accelerator (LIPAc) in Rokkasho")
+ .withField(StandardField.JOURNAL, "Fusion Eng. Des") // TODO: Final dot should be kept
+ .withField(StandardField.VOLUME, "153")
+ .withField(StandardField.YEAR, "2020")
+ .withField(StandardField.PAGES, "111503")
+ .withField(StandardField.DOI, "10.1016/j.fusengdes.2020.111503")
+ .withField(StandardField.COMMENT, "[2] K. Kondo et al., “Validation of the Linear IFMIF Prototype Accelerator (LIPAc) in Rokkasho”, Fusion Eng. Des., vol. 153, p. 111503, 2020. doi:10.1016/j.fusengdes.2020. 111503");
+
+ BibEntry entry04 = new BibEntry(StandardEntryType.InProceedings)
+ .withCitationKey("Devanz2017")
+ .withField(StandardField.AUTHOR, "G. Devanz and others")
+ .withField(StandardField.TITLE, "Manufacturing and validation tests of IFMIF low-beta HWRs")
+ .withField(StandardField.BOOKTITLE, "Proc. IPAC’17, Copenhagen, Denmark")
+ .withField(StandardField.MONTH, "#may#")
+ .withField(StandardField.YEAR, "2017")
+ .withField(StandardField.PAGES, "942-944")
+ .withField(StandardField.DOI, "10.18429/JACoW-IPAC2017-MOPVA039")
+ .withField(StandardField.COMMENT, "[4] G. Devanz et al., “Manufacturing and validation tests of IFMIF low-beta HWRs”, in Proc. IPAC’17, Copen- hagen, Denmark, May 2017, pp. 942-944. doi:10.18429/ JACoW-IPAC2017-MOPVA039");
+
+ BibEntry entry05 = new BibEntry(StandardEntryType.Article)
+ .withCitationKey("Branas2018")
+ .withField(StandardField.AUTHOR, "B. Brañas and others")
+ .withField(StandardField.TITLE, "The LIPAc Beam Dump")
+ .withField(StandardField.JOURNAL, "Fusion Eng. Des")
+ .withField(StandardField.VOLUME, "127")
+ .withField(StandardField.PAGES, "127-138")
+ .withField(StandardField.YEAR, "2018")
+ .withField(StandardField.DOI, "10.1016/j.fusengdes.2017.12.018")
+ .withField(StandardField.COMMENT, "[5] B. Brañas et al., “The LIPAc Beam Dump”, Fusion Eng. Des., vol. 127, pp. 127-138, 2018. doi:10.1016/j.fusengdes. 2017.12.018");
+
+ BibEntry entry08 = new BibEntry(StandardEntryType.InProceedings)
+ .withCitationKey("Scantamburlo2023")
+ .withField(StandardField.AUTHOR, "F. Scantamburlo and others")
+ .withField(StandardField.TITLE, "Linear IFMIF Prototype Accelera-tor (LIPAc) Radio Frequency Quadrupole’s (RFQ) RF couplers enhancement towards CW operation at nominal voltage")
+ .withField(StandardField.BOOKTITLE, "Proc. ISFNT’23, Las Palmas de Gran Canaria, Spain.")
+ .withField(StandardField.MONTH, "#sep#")
+ .withField(StandardField.YEAR, "2023")
+ .withField(StandardField.COMMENT, "[8] F. Scantamburlo et al., “Linear IFMIF Prototype Accelera-tor (LIPAc) Radio Frequency Quadrupole’s (RFQ) RF couplers enhancement towards CW operation at nominal voltage”, in Proc. ISFNT’23, Sep. 2023, Las Palmas de Gran Canaria, Spain.");
+
+ BibEntry entry09 = new BibEntry(StandardEntryType.InProceedings)
+ .withCitationKey("Franco2023")
+ .withField(StandardField.AUTHOR, "A. De Franco and others")
+ .withField(StandardField.BOOKTITLE, "Proc. IPAC’23, Venice, Italy")
+ .withField(StandardField.TITLE, "RF conditioning towards continuous wave of the FRQ of the Linear IFMIF Prototype Accelerator")
+ .withField(StandardField.PAGES, "2345-2348")
+ .withField(StandardField.MONTH, "#may#")
+ .withField(StandardField.YEAR, "2023")
+ .withField(StandardField.DOI, "10.18429/JACoW-IPAC2023-TUPM065")
+ .withField(StandardField.COMMENT, "[9] A. De Franco et al., “RF conditioning towards continuous wave of the FRQ of the Linear IFMIF Prototype Accelerator”, in Proc. IPAC’23, Venice, Italy, May 2023, pp. 2345-2348. doi:10.18429/JACoW-IPAC2023-TUPM065");
+
+ BibEntry entry10 = new BibEntry(StandardEntryType.InProceedings)
+ .withCitationKey("Hirosawa")
+ .withField(StandardField.AUTHOR, "K. Hirosawa and others")
+ .withField(StandardField.BOOKTITLE, "Proc. PASJ’23, 2023, Japan.")
+ .withField(StandardField.TITLE, "High-Power RF tests of repaired circulator for LIPAc RFQ")
+ .withField(StandardField.COMMENT, "[10] K. Hirosawa et al., “High-Power RF tests of repaired circu- lator for LIPAc RFQ”, in Proc. PASJ’23, 2023, Japan.");
+
+ BibEntry entry12 = new BibEntry(StandardEntryType.InProceedings)
+ .withCitationKey("Podadera2019")
+ .withField(StandardField.AUTHOR, "I. Podadera and others")
+ .withField(StandardField.TITLE, "Beam commissioning of beam position and phase monitors for LIPAc")
+ .withField(StandardField.BOOKTITLE, "Proc. IBIC’19, Malmö, Sweden")
+ .withField(StandardField.PAGES, "534-538")
+ .withField(StandardField.MONTH, "#sep#")
+ .withField(StandardField.YEAR, "2019")
+ .withField(StandardField.DOI, "10.18429/JACoW-IBIC2019-WEPP013")
+ .withField(StandardField.COMMENT, "[12] I. Podadera et al., “Beam commissioning of beam posi- tion and phase monitors for LIPAc”, in Proc. IBIC’19, Malmö, Sweden, Sep. 2019, pp. 534-538. doi:10.18429/ JACoW-IBIC2019-WEPP013");
+
+ BibEntry entry13 = new BibEntry(StandardEntryType.Article)
+ .withCitationKey("Kondo2021")
+ .withField(StandardField.AUTHOR, "K. Kondo and others")
+ .withField(StandardField.TITLE, "Neutron production measurement in the 125 mA 5 MeV Deuteron beam commissioning of Linear IFMIF Prototype Accelerator (LIPAc) RFQ")
+ .withField(StandardField.JOURNAL, "Nucl. Fusion")
+ .withField(StandardField.VOLUME, "61")
+ .withField(StandardField.NUMBER, "1")
+ .withField(StandardField.PAGES, "116002")
+ .withField(StandardField.YEAR, "2021")
+ .withField(StandardField.DOI, "82310.1088/1741-4326/ac233c")
+ .withField(StandardField.COMMENT, "[13] K. Kondo et al., “Neutron production measurement in the 125 mA 5 MeV Deuteron beam commissioning of Linear IFMIF Prototype Accelerator (LIPAc) RFQ”, Nucl. Fusion, vol. 61, no. 1, p. 116002, 2021. doi:82310.1088/1741-4326/ ac233c");
+
+ BibEntry entry17 = new BibEntry(StandardEntryType.InProceedings)
+ .withCitationKey("Bellan2021a")
+ .withField(StandardField.AUTHOR, "L. Bellan and others")
+ .withField(StandardField.BOOKTITLE, "Proc. ICIS’21, TRIUMF, Vancouver, BC, Canada, https://indico.cern.ch/event/1027296/")
+ .withField(StandardField.COMMENT, "[17] L. Bellan et al., “Extraction and low energy beam transport models used for the IFMIF/EVEDA RFQ commissioning”, in Proc. ICIS’21, TRIUMF, Vancouver, BC, Canada, Sep. 2021. https://indico.cern.ch/event/1027296/")
+ .withField(StandardField.MONTH, "#sep#")
+ .withField(StandardField.TITLE, "Extraction and low energy beam transport models used for the IFMIF/EVEDA RFQ commissioning")
+ .withField(StandardField.YEAR, "2021");
+
+ // We use the existing test entries, but add a citation key (which is added by the importer)
+ // We need to clone to keep the static entries unmodified
+ assertEquals(List.of(
+ ((BibEntry) KNASTER_2017.clone()).withCitationKey("Knaster2017"),
+ entry02,
+ ((BibEntry) SHIMOSAKI_2019.clone()).withCitationKey("Shimosaki2019"),
+ entry04,
+ entry05,
+ ((BibEntry) BELLAN_2021.clone()).withCitationKey("Bellan2021"),
+ ((BibEntry) MASUDA_2022.clone()).withCitationKey("Masuda2022"),
+ entry08,
+ entry09,
+ entry10,
+ ((BibEntry) PODADERA_2012.clone()).withCitationKey("Podadera2012"),
+ entry12,
+ entry13,
+ ((BibEntry) KWON_2023.clone()).withCitationKey("Kwon2023"),
+ ((BibEntry) AKAGI_2023.clone()).withCitationKey("Akagi2023"),
+ ((BibEntry) INTERNAL_NOTE.clone()),
+ entry17),
+ parserResult.getDatabase().getEntries());
+ }
+
+ static Stream references() {
+ return Stream.of(
+ Arguments.of(
+ KNASTER_2017,
+ "1",
+ "J. Knaster et al., “Overview of the IFMIF/EVEDA project”, Nucl. Fusion, vol. 57, p. 102016, 2017. doi:10.1088/ 1741-4326/aa6a6a"
+ ),
+ Arguments.of(
+ SHIMOSAKI_2019,
+ "3",
+ "Y. Shimosaki et al., “Lattice design for 5 MeV – 125 mA CW RFQ operation in LIPAc”, in Proc. IPAC’19, Mel- bourne, Australia, May 2019, pp. 977-979. doi:10.18429/ JACoW-IPAC2019-MOPTS051"
+ ),
+ Arguments.of(
+ BELLAN_2021,
+ "6",
+ "L. Bellan et al., “Acceleration of the high current deuteron beam through the IFMIF-EVEDA beam dynamics perfor- mances”, in Proc. HB’21, Batavia, IL, USA, Oct. 2021, pp. 197-202. doi:10.18429/JACoW-HB2021-WEDC2"
+ ),
+ Arguments.of(
+ MASUDA_2022,
+ "7",
+ "K. Masuda et al., “Commissioning of IFMIF Prototype Ac- celerator towards CW operation”, in Proc. LINAC’22, Liv- erpool, UK, Aug.-Sep. 2022, pp. 319-323. doi:10.18429/ JACoW-LINAC2022-TU2AA04"
+ ),
+ Arguments.of(
+ PODADERA_2012,
+ "11",
+ "I. Podadera, J. M. Carmona, A. Ibarra, and J. Molla, “Beam position monitor development for LIPAc”, presented at th 8th DITANET Topical Workshop on Beam Position Monitors, CERN, Geneva, Switzreland, Jan. 2012."
+ ),
+ Arguments.of(
+ KWON_2023,
+ "14",
+ "S. Kwon et al., “High beam current operation with beam di-agnostics at LIPAc”, presented at HB’23, Geneva, Switzer- land, Oct. 2023, paper FRC1I2, this conference."
+ ),
+ Arguments.of(
+ AKAGI_2023,
+ "15",
+ "T. Akagi et al., “Achievement of high-current continuous- wave deuteron injector for Linear IFMIF Prototype Accelera- tor (LIPAc)”, to be presented at IAEA FEC’23, London, UK, Oct. 2023. https://www.iaea.org/events/fec2023"
+ ),
+ Arguments.of(
+ INTERNAL_NOTE,
+ "16",
+ "“AF4.1.1 SRF Linac Engineering Design Report”, Internal note."
+ )
+ );
+ }
+
+ @ParameterizedTest
+ @MethodSource
+ void references(BibEntry expectedEntry, String number, String reference) {
+ assertEquals(expectedEntry, bibliographyFromPdfImporter.parseReference(number, reference));
+ }
+}
diff --git a/src/test/java/org/jabref/model/entry/AuthorListTest.java b/src/test/java/org/jabref/model/entry/AuthorListTest.java
index 6f49c711bae..57617913ddd 100644
--- a/src/test/java/org/jabref/model/entry/AuthorListTest.java
+++ b/src/test/java/org/jabref/model/entry/AuthorListTest.java
@@ -14,6 +14,9 @@
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
+/**
+ * Other parsing tests are available in {@link org.jabref.logic.importer.AuthorListParserTest}
+ */
public class AuthorListTest {
/*
diff --git a/src/test/resources/org/jabref/logic/importer/fileformat/tua3i2refpage.pdf b/src/test/resources/org/jabref/logic/importer/fileformat/tua3i2refpage.pdf
new file mode 100644
index 00000000000..5f98c97a533
Binary files /dev/null and b/src/test/resources/org/jabref/logic/importer/fileformat/tua3i2refpage.pdf differ