Skip to content

Commit

Permalink
Fix location field not exported correctly to office 2007 xml (#1909)
Browse files Browse the repository at this point in the history
* Fix location field not exported to office 2007 xml
* Add some test for exporting location and address field
Address in xml field is now imported as location
add some javadoc
  • Loading branch information
Siedlerchr authored Sep 4, 2016
1 parent a9eb978 commit ce67502
Show file tree
Hide file tree
Showing 15 changed files with 190 additions and 30 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- Made it possible to download multiple entries in one action
- [#1813](https://github.com/JabRef/jabref/issues/1813) Import/Export preferences dialog default directory set to working directory
- [#1897](https://github.com/JabRef/jabref/issues/1897) Implemented integrity check for `year` field: Last four nonpunctuation characters should be numerals
- Address in MS-Office 2007 xml format is now imported as `location`


### Fixed
- Fixed NullPointerException when opening search result window for an untitled database
- Fixed entry table traversal with Tab (no column traversal thus no double jump)
- Fixed [#1757](https://github.com/JabRef/jabref/issues/1757): Crash after saving illegal argument in entry editor
- Fixed [#1663](https://github.com/JabRef/jabref/issues/1663): Better multi-monitor support
- Fixed [#1882](https://github.com/JabRef/jabref/issues/1882): Crash after saving illegal bibtexkey in entry editor
- Fixed field `location` containing only city is not exported correctly to MS-Office 2007 xml format

### Removed
- The non-supported feature of being able to define file directories for any extension is removed. Still, it should work for older databases using the legacy `ps` and `pdf` fields, although we strongly encourage using the `file` field.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public void performExport(final BibDatabaseContext databaseContext, final String

try (VerifyingWriter ps = session.getWriter()) {
try {
DOMSource source = new DOMSource(msBibDatabase.getDOM());
DOMSource source = new DOMSource(msBibDatabase.getDomForExport());
StreamResult result = new StreamResult(ps);
Transformer trans = TransformerFactory.newInstance().newTransformer();
trans.setOutputProperty(OutputKeys.INDENT, "yes");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {
Objects.requireNonNull(reader);

MSBibDatabase dbase = new MSBibDatabase();
return new ParserResult(dbase.importEntries(reader));
return new ParserResult(dbase.importEntriesFromXml(reader));
}

@Override
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/net/sf/jabref/logic/msbib/BibTeXConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ public class BibTeXConverter {
private static final String MSBIB_PREFIX = "msbib-";


/**
* Converts an {@link MSBibEntry} to a {@link BibEntry} for import
* @param entry The MsBibEntry to convert
* @return The bib entry
*/
public static BibEntry convert(MSBibEntry entry) {
BibEntry result;
Map<String, String> fieldValues = new HashMap<>();
Expand Down Expand Up @@ -72,7 +77,7 @@ public static BibEntry convert(MSBibEntry entry) {
parseStandardNumber(entry.standardNumber, fieldValues);

if (entry.address != null) {
fieldValues.put(FieldName.ADDRESS, entry.address);
fieldValues.put(FieldName.LOCATION, entry.address);
}
// TODO: ConferenceName is saved as booktitle when converting from MSBIB to BibTeX
if (entry.conferenceName != null) {
Expand Down
31 changes: 24 additions & 7 deletions src/main/java/net/sf/jabref/logic/msbib/MSBibDatabase.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

/**
* Microsoft Word bibliography.
*
* The class is uesed both for import and export
* See http://www.ecma-international.org/publications/standards/Ecma-376.htm
*/
public class MSBibDatabase {
Expand All @@ -39,20 +39,33 @@ public class MSBibDatabase {
private Set<MSBibEntry> entries;


/**
* Creates a {@link MSBibDatabase} for <b>import</b>
*/
public MSBibDatabase() {
entries = new HashSet<>();
}

// TODO: why an additonal entry list? entries are included inside database!
/**
* Creates a new {@link MSBibDatabase} for <b>export</b>
* @param database The bib database
* @param entries List of {@link BibEntry}
*/
public MSBibDatabase(BibDatabase database, List<BibEntry> entries) {
if (entries == null) {
addEntries(database.getEntries());
addEntriesForExport(database.getEntries());
} else {
addEntries(entries);
addEntriesForExport(entries);
}
}

public List<BibEntry> importEntries(BufferedReader reader) {
/**
* Imports entries from an office xml file
* @param reader
* @return List of {@link BibEntry}
*/
public List<BibEntry> importEntriesFromXml(BufferedReader reader) {
entries = new HashSet<>();
Document inputDocument;
try {
Expand Down Expand Up @@ -83,15 +96,19 @@ public List<BibEntry> importEntries(BufferedReader reader) {
return bibitems;
}

private void addEntries(List<BibEntry> entriesToAdd) {
private void addEntriesForExport(List<BibEntry> entriesToAdd) {
entries = new HashSet<>();
for (BibEntry entry : entriesToAdd) {
MSBibEntry newMods = MSBibConverter.convert(entry);
entries.add(newMods);
}
}

public Document getDOM() {
/**
* Gets the assembled dom for export
* @return XML Document
*/
public Document getDomForExport() {
Document document = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
Expand All @@ -106,7 +123,7 @@ public Document getDOM() {
rootNode.setAttribute("SelectedStyle", "");

for (MSBibEntry entry : entries) {
Node node = entry.getDOM(document);
Node node = entry.getEntryDom(document);
rootNode.appendChild(node);
}
document.appendChild(rootNode);
Expand Down
56 changes: 36 additions & 20 deletions src/main/java/net/sf/jabref/logic/msbib/MSBibEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,24 +64,32 @@ class MSBibEntry {

private String bibtexEntryType;

// reduced subset, supports only "CITY , STATE, COUNTRY"
// \b(\w+)\s?[,]?\s?(\w+)\s?[,]?\s?(\w+)\b
// WORD SPACE , SPACE WORD SPACE , SPACE WORD
// tested using http://www.javaregex.com/test.html
private static final Pattern ADDRESS_PATTERN = Pattern.compile("\\b(\\w+)\\s?[,]?\\s?(\\w+)\\s?[,]?\\s?(\\w+)\\b");

// Allows 20.3-2007|||20/3- 2007 etc.
// (\d{1,2})\s?[.,-/]\s?(\d{1,2})\s?[.,-/]\s?(\d{2,4})
// 1-2 DIGITS SPACE SEPERATOR SPACE 1-2 DIGITS SPACE SEPERATOR SPACE 2-4 DIGITS
// tested using http://www.javaregex.com/test.html
/**
* reduced subset, supports only "CITY , STATE, COUNTRY" <br>
* <b>\b(\w+)\s?[,]?\s?(\w+)\s?[,]?\s?(\w*)\b</b> <br>
* WORD SPACE , SPACE WORD SPACE (Can be zero or more) , SPACE WORD (Can be zero or more) <br>
* Matches both single locations (only city) like Berlin and full locations like Stroudsburg, PA, USA <br>
* tested using http://www.regexpal.com/
*/
private final Pattern ADDRESS_PATTERN = Pattern.compile("\\b(\\w+)\\s?[,]?\\s?(\\w*)\\s?[,]?\\s?(\\w*)\\b");

/**
* Allows 20.3-2007|||20/3- 2007 etc.
* <b>(\d{1,2})\s?[.,-/]\s?(\d{1,2})\s?[.,-/]\s?(\d{2,4})</b>
* 1-2 DIGITS SPACE SEPERATOR SPACE 1-2 DIGITS SPACE SEPERATOR SPACE 2-4 DIGITS
*/
private static final Pattern DATE_PATTERN = Pattern
.compile("(\\d{1,2})\\s*[.,-/]\\s*(\\d{1,2})\\s*[.,-/]\\s*(\\d{2,4})");


public MSBibEntry() {

//empty
}

/**
* Createa new {@link MsBibEntry} to import from an xml element
* @param entry
*/
public MSBibEntry(Element entry) {
populateFromXml(entry);
}
Expand Down Expand Up @@ -128,14 +136,17 @@ private void populateFromXml(Element entry) {
String city = getXmlElementTextContent("City", entry);
String state = getXmlElementTextContent("StateProvince", entry);
String country = getXmlElementTextContent("CountryRegion", entry);

StringBuilder addressBuffer = new StringBuilder();
if (city != null) {
addressBuffer.append(city).append(", ");
addressBuffer.append(city);
}
if (state != null) {
addressBuffer.append(state).append(' ');
if (((state != null) && !state.isEmpty()) && ((city != null) && !city.isEmpty())) {
addressBuffer.append(",").append(' ');
addressBuffer.append(state);
}
if (country != null) {
if ((country != null) && !country.isEmpty()) {
addressBuffer.append(",").append(' ');
addressBuffer.append(country);
}
address = addressBuffer.toString().trim();
Expand Down Expand Up @@ -231,7 +242,12 @@ private List<PersonName> getSpecificAuthors(String type, Element authors) {
return result;
}

public Element getDOM(Document document) {
/**
* Gets the dom representation for one entry, used for export
* @param document XmlDocument
* @return XmlElement represenation of one entry
*/
public Element getEntryDom(Document document) {
Element rootNode = document.createElementNS(MSBibDatabase.NAMESPACE, MSBibDatabase.PREFIX + "Source");

for (Map.Entry<String, String> entry : fields.entrySet()) {
Expand Down Expand Up @@ -318,18 +334,18 @@ private void addAuthor(Document document, Element allAuthors, String entryName,
allAuthors.appendChild(authorTop);
}

private void addAddress(Document document, Element parent, String address) {
if (address == null) {
private void addAddress(Document document, Element parent, String addressToSplit) {
if (addressToSplit == null) {
return;
}

Matcher matcher = ADDRESS_PATTERN.matcher(address);
Matcher matcher = ADDRESS_PATTERN.matcher(addressToSplit);
if (matcher.matches() && (matcher.groupCount() >= 3)) {
addField(document, parent, "City", matcher.group(1));
addField(document, parent, "StateProvince", matcher.group(2));
addField(document, parent, "CountryRegion", matcher.group(3));
} else {
addField(document, parent, "City", address);
addField(document, parent, "City", addressToSplit);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
<b:Pages>237-248</b:Pages>
<b:JournalName>Wirtschaftsinformatik</b:JournalName>
<b:City>a</b:City>
<b:StateProvince/>
<b:CountryRegion/>
<b:ThesisType>type</b:ThesisType>
</b:Source>
</b:Sources>
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
% Encoding: UTF-8
@InProceedings{LocationTest,
author = {LocationTest},
location = {Berlin},
owner = {Christoph Schwentker},
timestamp = {2016.09.04},
}

@Comment{jabref-meta: databaseType:biblatex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<b:Sources xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" SelectedStyle="">
<b:Source>
<b:BIBTEX_Entry>inproceedings</b:BIBTEX_Entry>
<b:SourceType>ConferenceProceedings</b:SourceType>
<b:Tag>LocationTest</b:Tag>
<b:Author>
<b:Author>
<b:NameList>
<b:Person>
<b:Last>LocationTest</b:Last>
</b:Person>
</b:NameList>
</b:Author>
</b:Author>
<b:City>Berlin</b:City>
<b:StateProvince/>
<b:CountryRegion/>
</b:Source>
</b:Sources>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
% Encoding: UTF-8
@InProceedings{MultiAddressTest,
author = {MultiAddressTest},
location = {Berlin},
address = {Stroudsburg, PA, USA},
owner = {Christoph Schwentker},
timestamp = {2016.09.04},
}

@Comment{jabref-meta: databaseType:biblatex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<b:Sources xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" SelectedStyle="">
<b:Source>
<b:BIBTEX_Entry>inproceedings</b:BIBTEX_Entry>
<b:SourceType>ConferenceProceedings</b:SourceType>
<b:Tag>MultiAddressTest</b:Tag>
<b:Author>
<b:Author>
<b:NameList>
<b:Person>
<b:Last>MultiAddressTest</b:Last>
</b:Person>
</b:NameList>
</b:Author>
</b:Author>
<b:City>Stroudsburg</b:City>
<b:StateProvince>PA</b:StateProvince>
<b:CountryRegion>USA</b:CountryRegion>
</b:Source>
</b:Sources>
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
% Encoding: UTF-8
@InProceedings{LocationTest,
author = {LocationTest},
location = {Berlin},
}

@Comment{jabref-meta: databaseType:biblatex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<b:Sources xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" SelectedStyle="">
<b:Source>
<b:BIBTEX_Entry>inproceedings</b:BIBTEX_Entry>
<b:SourceType>ConferenceProceedings</b:SourceType>
<b:Tag>LocationTest</b:Tag>
<b:Author>
<b:Author>
<b:NameList>
<b:Person>
<b:Last>LocationTest</b:Last>
</b:Person>
</b:NameList>
</b:Author>
</b:Author>
<b:City>Berlin</b:City>
<b:StateProvince/>
<b:CountryRegion/>
</b:Source>
</b:Sources>
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
% Encoding: UTF-8
@InProceedings{LocationTest,
author = {LocationTest},
location = {Stroudsburg, PA, USA},
}

@Comment{jabref-meta: databaseType:biblatex;}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<b:Sources xmlns:b="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" xmlns="http://schemas.openxmlformats.org/officeDocument/2006/bibliography" SelectedStyle="">
<b:Source>
<b:BIBTEX_Entry>inproceedings</b:BIBTEX_Entry>
<b:SourceType>ConferenceProceedings</b:SourceType>
<b:Tag>LocationTest</b:Tag>
<b:Author>
<b:Author>
<b:NameList>
<b:Person>
<b:Last>LocationTest</b:Last>
</b:Person>
</b:NameList>
</b:Author>
</b:Author>
<b:City>Stroudsburg</b:City>
<b:StateProvince>PA</b:StateProvince>
<b:CountryRegion>USA</b:CountryRegion>
</b:Source>
</b:Sources>

0 comments on commit ce67502

Please sign in to comment.