Skip to content

Commit

Permalink
Fix month creation. #name# is really used within JabRef to determine …
Browse files Browse the repository at this point in the history
…a BibTeX string content (and not plain text)

- Fix tests
- Sort BibEntryTests: default constructor, keywords, all others
- Fix name of "CanonicalBibEntry" (from CanonicalBibtexEntry)
- Revert "fix jabref format of month", this also reverts commit 77c25ca.
- Format some code
- Add comments
  • Loading branch information
koppor committed Nov 9, 2019
1 parent fafc402 commit 28bd008
Show file tree
Hide file tree
Showing 20 changed files with 699 additions and 608 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ private <T> void parse(T entryType, Map<Field, String> fields) {
putNumber(fields, (BigInteger) method.invoke(entryType));
continue;
} else if (method.getName().equals("getMonth")) {
putMonth(fields, Month.parse((String)method.invoke(entryType)));
putMonth(fields, Month.parse((String) method.invoke(entryType)));
continue;
} else if (isMethodToIgnore(method.getName())) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,6 @@ private String parseFieldContent(Field field) throws IOException {
int character;

while (((character = peek()) != ',') && (character != '}') && (character != ')')) {

if (eof) {
throw new IOException("Error in line " + line + ": EOF in mid-string");
}
Expand All @@ -602,7 +601,6 @@ private String parseFieldContent(Field field) throws IOException {
// brackets to know when the string is finished.
StringBuilder text = parseBracketedTextExactly();
value.append(fieldContentParser.format(text, field));

} else if (Character.isDigit((char) character)) { // value is a number
String number = parseTextToken();
value.append(number);
Expand All @@ -614,16 +612,11 @@ private String parseFieldContent(Field field) throws IOException {
throw new IOException("Error in line " + line + " or above: "
+ "Empty text token.\nThis could be caused " + "by a missing comma between two fields.");
}
if (field != StandardField.MONTH) {
value.append('#').append(textToken).append('#');
} else {
value.append(textToken);
}
value.append('#').append(textToken).append('#');
}
skipWhitespace();
}
return value.toString();

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ private static void processCapitalization(Map<Field, String> map) {
public ParserResult importDatabase(BufferedReader reader) throws IOException {
Objects.requireNonNull(reader);

List<BibEntry> bibitems = new ArrayList<>();
List<BibEntry> bibEntries = new ArrayList<>();
StringBuilder sb = new StringBuilder();

// Pattern fieldPattern = Pattern.compile("^AU |^TI |^SO |^DT |^C1 |^AB
Expand Down Expand Up @@ -233,7 +233,6 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {
} else if ("SO".equals(beg) || "JA".equals(beg)) {
hm.put(StandardField.JOURNAL, EOL_PATTERN.matcher(value).replaceAll(" "));
} else if ("ID".equals(beg) || "KW".equals(beg)) {

value = EOL_PATTERN.matcher(value).replaceAll(" ");
String existingKeywords = hm.get(StandardField.KEYWORDS);
if ((existingKeywords == null) || existingKeywords.contains(value)) {
Expand All @@ -242,7 +241,6 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {
existingKeywords += ", " + value;
}
hm.put(StandardField.KEYWORDS, existingKeywords);

} else if ("AB".equals(beg)) {
hm.put(StandardField.ABSTRACT, EOL_PATTERN.matcher(value).replaceAll(" "));
} else if ("BP".equals(beg) || "BR".equals(beg) || "SP".equals(beg)) {
Expand Down Expand Up @@ -271,12 +269,10 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {
} else if ("DI".equals(beg)) {
hm.put(StandardField.DOI, value);
} else if ("PD".equals(beg)) {

String month = IsiImporter.parseMonth(value);
if (month != null) {
hm.put(StandardField.MONTH, month);
}

} else if ("DT".equals(beg)) {
if ("Review".equals(value)) {
type = StandardEntryType.Article; // set "Review" in Note/Comment?
Expand Down Expand Up @@ -327,17 +323,19 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {

b.setField(hm);

bibitems.add(b);
bibEntries.add(b);
}
return new ParserResult(bibitems);
return new ParserResult(bibEntries);
}

private static String parsePages(String value) {
return value.replace("-", "--");
}

public static String parseMonth(String value) {

/**
* Parses the month and returns it in the JabRef format
*/
static String parseMonth(String value) {
String[] parts = value.split("\\s|\\-");
for (String part1 : parts) {
Optional<Month> month = Month.getMonthByShortName(part1.toLowerCase(Locale.ROOT));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,22 +330,22 @@ private void parseAdditionalFields(BibEntry be, boolean multilineUrlFieldAllowed
// skip keyword
this.lastLine = "".equals(this.lastLine) ? "" : this.lastLine.substring(this.lastLine.indexOf(':') + 1).trim();

// parse keywords field
if ("Keywords".equals(keyword)) {
// parse keywords field
String content = readMultipleLines(in);
String[] keywords = content.split("[,;]");
be.addKeywords(Arrays.asList(keywords),
importFormatPreferences.getKeywordSeparator());
// parse JEL field
} else if ("JEL".equals(keyword)) {
// parse JEL field
be.setField(new UnknownField("jel"), readMultipleLines(in));

} else if (keyword.startsWith("Date")) {
// parse date field
String content = readMultipleLines(in);
Date.parse(content).ifPresent(be::setDate);
// parse URL field
} else if (keyword.startsWith("URL")) {
// parse URL field
String content;
if (multilineUrlFieldAllowed) {
content = readMultipleLines(in);
Expand All @@ -354,10 +354,8 @@ private void parseAdditionalFields(BibEntry be, boolean multilineUrlFieldAllowed
readLine(in);
}
be.setField(StandardField.URL, content);

// authors field
} else if (keyword.startsWith("By")) {
// parse authors
// parse authors field
parseAuthors(be, in);
} else {
readLine(in);
Expand Down
58 changes: 27 additions & 31 deletions src/main/java/org/jabref/logic/xmp/DublinCoreExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,11 @@ private void extractAuthor() {

/**
* Year in BibTex - Date in DublinCore is only the year information, because dc interprets empty months as January.
* Tries to extract the month as well.
* In JabRef the bibtex/month/value is prioritized.
* <br/>
* The problem is the default value of the calendar, which is always January, also if there is no month information in
* the xmp metdata. The idea is, to reject all information with YYYY-01-01. In cases, where xmp is written with JabRef
* the month property filled with jan will override this behavior and no data is lost. In the cases, where xmp
* is written by another service, the assumption is, that the 1st January is not a publication date at all.
* Tries to extract the month as well. In JabRef the bibtex/month/value is prioritized. <br/> The problem is the
* default value of the calendar, which is always January, also if there is no month information in the xmp metdata.
* The idea is, to reject all information with YYYY-01-01. In cases, where xmp is written with JabRef the month
* property filled with jan will override this behavior and no data is lost. In the cases, where xmp is written by
* another service, the assumption is, that the 1st January is not a publication date at all.
*/
private void extractYearAndMonth() {
List<String> dates = dcSchema.getUnqualifiedSequenceValueList("date");
Expand All @@ -87,12 +85,11 @@ private void extractYearAndMonth() {
}
if (calender != null) {
bibEntry.setField(StandardField.YEAR, String.valueOf(calender.get(Calendar.YEAR)));
int monthNumber = calender.get(Calendar.MONTH) + 1;
// not the 1st of January
if (!((calender.get(Calendar.MONTH) == 0) && (calender.get(Calendar.DAY_OF_MONTH) == 1))) {
Optional<Month> month = Month.getMonthByNumber(calender.get(Calendar.MONTH) + 1);
if (month.isPresent()) {
bibEntry.setField(StandardField.MONTH, month.get().getShortName());
}
if (!((monthNumber == 1) && (calender.get(Calendar.DAY_OF_MONTH) == 1))) {
Month.getMonthByNumber(monthNumber)
.ifPresent(month -> bibEntry.setMonth(month));
}
}
}
Expand Down Expand Up @@ -129,15 +126,13 @@ private void extractPublisher() {
}

/**
* This method sets all fields, which are custom in bibtext and therefore supported by jabref, but which are not included in the DublinCore format.
* <p/>
* This method sets all fields, which are custom in BibTeX and therefore supported by JabRef, but which are not
* included in the DublinCore format.
* <p>
* The relation attribute of DublinCore is abused to insert these custom fields.
*/
private void extractBibTexFields() {
List<String> relationships = dcSchema.getRelations();

Predicate<String> isBibTeXElement = s -> s.startsWith("bibtex/");

Consumer<String> splitBibTeXElement = s -> {
// the default pattern is bibtex/key/value, but some fields contains url etc.
// so the value property contains additional slashes, which makes the usage of
Expand All @@ -154,15 +149,15 @@ private void extractBibTexFields() {
// see also DublinCoreExtractor#extractYearAndMonth
if (StandardField.MONTH.equals(key)) {
Optional<Month> parsedMonth = Month.parse(value);
parsedMonth.ifPresent(month -> bibEntry.setField(key, month.getShortName()));
parsedMonth.ifPresent(bibEntry::setMonth);
}
}

};
List<String> relationships = dcSchema.getRelations();
if (relationships != null) {
relationships.stream()
.filter(isBibTeXElement)
.forEach(splitBibTeXElement);
.filter(isBibTeXElement)
.forEach(splitBibTeXElement);
}
}

Expand Down Expand Up @@ -220,26 +215,27 @@ private void extractType() {
}

/**
* Helper function for retrieving a BibEntry from the DublinCore metadata
* in a PDF file.
* Helper function for retrieving a BibEntry from the DublinCore metadata in a PDF file.
* <p>
* To understand how to get hold of a DublinCore have a look in the test cases for XMPUtil.
* <p>
* The BibEntry is build by mapping individual fields in the dublin core (like creator, title, subject) to fields in
* a bibtex bibEntry. In case special "bibtex/" entries are contained, the normal dublin core fields take
* precedence. For instance, the dublin core date takes precedence over bibtex/month.
*
* To understand how to get hold of a DublinCore have a look in the
* test cases for XMPUtil.
*
* The BibEntry is build by mapping individual fields in the dublin core
* (like creator, title, subject) to fields in a bibtex bibEntry.
*
* @return The bibtex bibEntry found in the document information.
* @return The bibEntry extracted from the document information.
*/
public Optional<BibEntry> extractBibtexEntry() {
// first extract "bibtex/" entries
this.extractBibTexFields();

// then extract all "standard" dublin core entries
this.extractEditor();
this.extractAuthor();
this.extractYearAndMonth();
this.extractAbstract();
this.extractDOI();
this.extractPublisher();
this.extractBibTexFields();
this.extractRights();
this.extractSource();
this.extractSubject();
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/model/entry/BibEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ public Object clone() {
*/
@Override
public String toString() {
return CanonicalBibtexEntry.getCanonicalRepresentation(this);
return CanonicalBibEntry.getCanonicalRepresentation(this);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,19 @@
import org.jabref.model.entry.field.Field;
import org.jabref.model.entry.field.InternalField;

public class CanonicalBibtexEntry {
public class CanonicalBibEntry {

private CanonicalBibtexEntry() {
private CanonicalBibEntry() {
}

/**
* This returns a canonical BibTeX serialization. Special characters such as "{" or "&" are NOT escaped, but written
* as is
* This returns a canonical BibTeX serialization. Serializes all fields, even the JabRef internal ones. Does NOT
* serialize "KEY_FIELD" as field, but as key
*
* Serializes all fields, even the JabRef internal ones. Does NOT serialize "KEY_FIELD" as field, but as key
* <ul>
* <li>Special characters such as "{" or "&" are NOT escaped, but written as</li>
* <li>String constants are not handled. That means, <code>month = apr</code> in a bib file gets <code>month = {#apr#}</code>. This indicates that the month field is correctly stored</li>
* </ul>
*/
public static String getCanonicalRepresentation(BibEntry entry) {
StringBuilder sb = new StringBuilder();
Expand Down Expand Up @@ -50,7 +53,7 @@ public static String getCanonicalRepresentation(BibEntry entry) {
// generate field entries
StringJoiner sj = new StringJoiner(",\n", "", "\n");
for (String fieldName : sortedFields) {
String line = String.format(" %s = {%s}", fieldName, String.valueOf(mapFieldToValue.get(fieldName)).replaceAll("\\r\\n","\n"));
String line = String.format(" %s = {%s}", fieldName, String.valueOf(mapFieldToValue.get(fieldName)).replaceAll("\\r\\n", "\n"));
sj.add(line);
}
sb.append(sj);
Expand All @@ -59,5 +62,4 @@ public static String getCanonicalRepresentation(BibEntry entry) {
sb.append('}');
return sb.toString();
}

}
7 changes: 4 additions & 3 deletions src/main/java/org/jabref/model/entry/Month.java
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,14 @@ public String getShortName() {
* Returns the month in JabRef format. The format is the short 3-digit name surrounded by a '#'.
* Example: #jan#, #feb#, etc.
*
* See https://github.com/JabRef/jabref/issues/263#issuecomment-151246595 for a discussion on that thing.
* This seems to be an <em>invalid</em> format in terms of plain BiBTeX, but a <em>valid</em> format in the case of JabRef
* See <a href="https://github.com/JabRef/jabref/issues/263#issuecomment-151246595">Issue 263</a> for a discussion on that thing.
* This seems to be an <em>invalid</em> format in terms of plain BiBTeX, but a <em>valid</em> format in the case of JabRef.
* The documentation is available at the <a href="http://help.jabref.org/en/Strings">Strings help</a> of JabRef.
*
* @return Month in JabRef format
*/
public String getJabRefFormat() {
return String.format("%s", shortName);
return String.format("#%s#", shortName);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ void monthFieldSpecialSyntax() throws IOException {
// modify month field
Set<Field> fields = entry.getFields();
assertTrue(fields.contains(StandardField.MONTH));
assertEquals("mar", entry.getField(StandardField.MONTH).get());
assertEquals("#mar#", entry.getField(StandardField.MONTH).get());

//write out bibtex string
StringWriter stringWriter = new StringWriter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ void cleanupMonthChangesNumberToBibtex() {
entry.setField(StandardField.MONTH, "01");

worker.cleanup(preset, entry);
assertEquals(Optional.of("jan"), entry.getField(StandardField.MONTH));
assertEquals(Optional.of("#jan#"), entry.getField(StandardField.MONTH));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public void cleanupMovesDateToYearAndMonth() {

assertEquals(Optional.empty(), entry.getField(StandardField.DATE));
assertEquals(Optional.of("2011"), entry.getField(StandardField.YEAR));
assertEquals(Optional.of("jan"), entry.getField(StandardField.MONTH));
assertEquals(Optional.of("#jan#"), entry.getField(StandardField.MONTH));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ public void setUp() {

@Test
public void formatExample() {
assertEquals("dec", formatter.format(formatter.getExampleInput()));
assertEquals("#dec#", formatter.format(formatter.getExampleInput()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1784,4 +1784,11 @@ void bibTeXConstantAprilIsDisplayedAsConstant() throws ParseException {

assertEquals("#apr#", result.get().getField(StandardField.MONTH).get());
}

@Test
void bibTeXConstantAprilIsParsedAsStringMonthAprilWhenReadingTheField() throws ParseException {
Optional<BibEntry> result = parser.parseSingleEntry("@Misc{m, month = apr }" );

assertEquals(Optional.of("#apr#"), result.get().getField(StandardField.MONTH));
}
}
Loading

0 comments on commit 28bd008

Please sign in to comment.