-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Co-authored-by: Carl Christian Snethlage <[email protected]>
- Loading branch information
Showing
1 changed file
with
69 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
package org.jabref.logic.importer.fileformat; | ||
|
||
import java.nio.file.Path; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
|
@@ -10,88 +9,84 @@ | |
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
|
||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
class PdfContentImporterTest { | ||
|
||
private PdfContentImporter importer; | ||
|
||
@BeforeEach | ||
void setUp() { | ||
importer = new PdfContentImporter(); | ||
} | ||
private PdfContentImporter importer = new PdfContentImporter(); | ||
|
||
@Test | ||
void doesNotHandleEncryptedPdfs() throws Exception { | ||
Path file = Path.of(PdfContentImporter.class.getResource("/pdfs/encrypted.pdf").toURI()); | ||
List<BibEntry> result = importer.importDatabase(file).getDatabase().getEntries(); | ||
assertEquals(Collections.emptyList(), result); | ||
assertEquals(List.of(), result); | ||
} | ||
|
||
@Test | ||
void importTwiceWorksAsExpected() throws Exception { | ||
Path file = Path.of(PdfContentImporter.class.getResource("/pdfs/minimal.pdf").toURI()); | ||
List<BibEntry> result = importer.importDatabase(file).getDatabase().getEntries(); | ||
|
||
BibEntry expected = new BibEntry(StandardEntryType.InProceedings); | ||
expected.setField(StandardField.AUTHOR, "1 "); | ||
expected.setField(StandardField.TITLE, "Hello World"); | ||
expected.setFiles(Collections.singletonList(new LinkedFile("", file.toAbsolutePath(), "PDF"))); | ||
BibEntry expected = new BibEntry(StandardEntryType.InProceedings) | ||
.withField(StandardField.AUTHOR, "1 ") | ||
.withField(StandardField.TITLE, "Hello World") | ||
.withFiles(List.of(new LinkedFile("", file.toAbsolutePath(), "PDF"))); | ||
assertEquals(List.of(expected), result); | ||
|
||
List<BibEntry> resultSecondImport = importer.importDatabase(file).getDatabase().getEntries(); | ||
assertEquals(Collections.singletonList(expected), result); | ||
assertEquals(Collections.singletonList(expected), resultSecondImport); | ||
assertEquals(List.of(expected), resultSecondImport); | ||
} | ||
|
||
@Test | ||
void parsingEditorWithoutPagesorSeriesInformation() { | ||
BibEntry entry = new BibEntry(StandardEntryType.InProceedings); | ||
entry.setField(StandardField.AUTHOR, "Anke Lüdeling and Merja Kytö (Eds.)"); | ||
entry.setField(StandardField.EDITOR, "Anke Lüdeling and Merja Kytö"); | ||
entry.setField(StandardField.PUBLISHER, "Springer"); | ||
entry.setField(StandardField.TITLE, "Corpus Linguistics – An International Handbook – Lüdeling, Anke, Kytö, Merja (Eds.)"); | ||
|
||
String firstPageContents = "Corpus Linguistics – An International Handbook – Lüdeling, Anke,\n" + | ||
"Kytö, Merja (Eds.)\n" + | ||
"\n" + | ||
"Anke Lüdeling, Merja Kytö (Eds.)\n" + | ||
"\n" + | ||
"VOLUME 2\n" + | ||
"\n" + | ||
"This handbook provides an up-to-date survey of the field of corpus linguistics, a Handbücher zur Sprach- und\n" + | ||
"field whose methodology has revolutionized much of the empirical work done in Kommunikationswissenschaft / Handbooks\n" + | ||
"\n" + | ||
"of Linguistics and Communication Science\n" + | ||
"most fields of linguistic study over the past decade. (HSK) 29/2\n" + | ||
"\n" + | ||
"vii, 578 pages\n" + | ||
"Corpus linguistics investigates human language by starting out from large\n"; | ||
BibEntry entry = new BibEntry(StandardEntryType.InProceedings) | ||
.withField(StandardField.AUTHOR, "Anke Lüdeling and Merja Kytö (Eds.)") | ||
.withField(StandardField.EDITOR, "Anke Lüdeling and Merja Kytö") | ||
.withField(StandardField.PUBLISHER, "Springer") | ||
.withField(StandardField.TITLE, "Corpus Linguistics – An International Handbook – Lüdeling, Anke, Kytö, Merja (Eds.)"); | ||
|
||
String firstPageContents = """ | ||
Corpus Linguistics – An International Handbook – Lüdeling, Anke, | ||
Kytö, Merja (Eds.) | ||
Anke Lüdeling, Merja Kytö (Eds.) | ||
VOLUME 2 | ||
This handbook provides an up-to-date survey of the field of corpus linguistics, a Handbücher zur Sprach- und | ||
field whose methodology has revolutionized much of the empirical work done in Kommunikationswissenschaft / Handbooks | ||
of Linguistics and Communication Science | ||
most fields of linguistic study over the past decade. (HSK) 29/2 | ||
vii, 578 pages | ||
Corpus linguistics investigates human language by starting out from large | ||
"""; | ||
|
||
assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContents, "\n")); | ||
} | ||
|
||
@Test | ||
void parsingWithoutActualDOINumber() { | ||
BibEntry entry = new BibEntry(StandardEntryType.InProceedings); | ||
entry.withField(StandardField.AUTHOR, "Link to record in KAR and http://kar.kent.ac.uk/51043/ and Document Version and UNSPECIFIED and Master of Research (MRes) thesis and University of Kent") | ||
.withField(StandardField.TITLE, "Kent Academic Repository Full text document (pdf) Citation for published version Smith, Lucy Anna (2014) Mortality in the Ornamental Fish Retail Sector: an Analysis of Stock Losses and Stakeholder Opinions. DOI") | ||
.withField(StandardField.YEAR, "5104"); | ||
|
||
String firstPageContents = "Kent Academic Repository Full text document (pdf)\n" + | ||
"Citation for published version\n" + | ||
"Smith, Lucy Anna (2014) Mortality in the Ornamental Fish Retail Sector: an Analysis of Stock\n" + | ||
"Losses and Stakeholder Opinions.\n" + | ||
"DOI\n" + | ||
"\n" + | ||
"\n" + | ||
"Link to record in KAR\n" + | ||
"http://kar.kent.ac.uk/51043/\n" + | ||
"Document Version\n" + | ||
"UNSPECIFIED\n" + | ||
"Master of Research (MRes) thesis, University of Kent,."; | ||
BibEntry entry = new BibEntry(StandardEntryType.InProceedings) | ||
.withField(StandardField.AUTHOR, "Link to record in KAR and http://kar.kent.ac.uk/51043/ and Document Version and UNSPECIFIED and Master of Research (MRes) thesis and University of Kent") | ||
.withField(StandardField.TITLE, "Kent Academic Repository Full text document (pdf) Citation for published version Smith, Lucy Anna (2014) Mortality in the Ornamental Fish Retail Sector: an Analysis of Stock Losses and Stakeholder Opinions. DOI") | ||
.withField(StandardField.YEAR, "5104"); | ||
|
||
String firstPageContents = """ | ||
Kent Academic Repository Full text document (pdf) | ||
Citation for published version | ||
Smith, Lucy Anna (2014) Mortality in the Ornamental Fish Retail Sector: an Analysis of Stock | ||
Losses and Stakeholder Opinions. | ||
DOI | ||
Link to record in KAR | ||
http://kar.kent.ac.uk/51043/ | ||
Document Version | ||
UNSPECIFIED | ||
Master of Research (MRes) thesis, University of Kent,."""; | ||
|
||
assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContents, "\n")); | ||
} | ||
|
@@ -104,26 +99,27 @@ void extractDOIFromPage1() { | |
.withField(StandardField.TITLE, "British Journal of Nutrition (2008), 99, 1–11 doi: 10.1017/S0007114507795296 q The Authors") | ||
.withField(StandardField.YEAR, "2008"); | ||
|
||
String firstPageContent = "British Journal of Nutrition (2008), 99, 1–11 doi: 10.1017/S0007114507795296\n" + | ||
"q The Authors 2008\n" + | ||
"\n" + | ||
"Review Article\n" + | ||
"\n" + | ||
"Cocoa and health: a decade of research\n" + | ||
"\n" + | ||
"Karen A. Cooper1, Jennifer L. Donovan2, Andrew L. Waterhouse3 and Gary Williamson1*\n" + | ||
"1Nestlé Research Center, Vers-Chez-les-Blanc, PO Box 44, CH-1000 Lausanne 26, Switzerland\n" + | ||
"2Department of Psychiatry and Behavioural Sciences, Medical University of South Carolina, Charleston, SC 29425, USA\n" + | ||
"3Department of Viticulture & Enology, University of California, Davis, CA 95616, USA\n" + | ||
"\n" + | ||
"(Received 5 December 2006 – Revised 29 May 2007 – Accepted 31 May 2007)\n" + | ||
"\n" + | ||
"Abbreviations: FMD, flow-mediated dilation; NO, nitirc oxide.\n" + | ||
"\n" + | ||
"*Corresponding author: Dr Gary Williamson, fax þ41 21 785 8544, email [email protected]\n" + | ||
"\n" + | ||
"British Journal of Nutrition\n" + | ||
"https://doi.org/10.1017/S0007114507795296 Published online by Cambridge University Press"; | ||
String firstPageContent = """ | ||
British Journal of Nutrition (2008), 99, 1–11 doi: 10.1017/S0007114507795296 | ||
q The Authors 2008 | ||
Review Article | ||
Cocoa and health: a decade of research | ||
Karen A. Cooper1, Jennifer L. Donovan2, Andrew L. Waterhouse3 and Gary Williamson1* | ||
1Nestlé Research Center, Vers-Chez-les-Blanc, PO Box 44, CH-1000 Lausanne 26, Switzerland | ||
2Department of Psychiatry and Behavioural Sciences, Medical University of South Carolina, Charleston, SC 29425, USA | ||
3Department of Viticulture & Enology, University of California, Davis, CA 95616, USA | ||
(Received 5 December 2006 – Revised 29 May 2007 – Accepted 31 May 2007) | ||
Abbreviations: FMD, flow-mediated dilation; NO, nitirc oxide. | ||
*Corresponding author: Dr Gary Williamson, fax þ41 21 785 8544, email [email protected] | ||
British Journal of Nutrition | ||
https://doi.org/10.1017/S0007114507795296 Published online by Cambridge University Press"""; | ||
|
||
assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContent, "\n")); | ||
} | ||
|