Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Citavi Importer - Import all knowledge items #9043

Merged
merged 17 commits into from
Aug 13, 2022
Merged
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Fixed

- The [HtmlToLaTeXFormatter](https://docs.jabref.org/finding-sorting-and-cleaning-entries/saveactions#html-to-latex) keeps single `<` characters.
- We fixed a performance regression when opening large libraries [#9041](https://github.com/JabRef/jabref/issues/9041)

### Removed
Original file line number Diff line number Diff line change
@@ -44,7 +44,12 @@ public String format(String text) {
int c = result.charAt(i);

if (c == '<') {
int oldI = i;
i = readTag(result, i);
if (oldI == i) {
// just a single <, which needs to be kept
sb.append('<');
}
} else {
sb.append((char) c);
}
Original file line number Diff line number Diff line change
@@ -20,6 +20,8 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.StringJoiner;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@@ -28,6 +30,7 @@
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import org.jabref.logic.formatter.bibtexfields.HtmlToLatexFormatter;
import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter;
import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.Parser;
@@ -59,6 +62,7 @@ public class CitaviXmlImporter extends Importer implements Parser {
private static final Logger LOGGER = LoggerFactory.getLogger(CitaviXmlImporter.class);
private static final byte UUID_LENGTH = 36;
private static final byte UUID_SEMICOLON_OFFSET_INDEX = 37;
private final HtmlToLatexFormatter htmlToLatexFormatter = new HtmlToLatexFormatter();
private final NormalizePagesFormatter pagesFormatter = new NormalizePagesFormatter();

private final Map<String, Author> knownPersons = new HashMap<>();
@@ -363,19 +367,33 @@ private String getPublisher(CitaviExchangeData.References.Reference data) {
}

private String getKnowledgeItem(CitaviExchangeData.References.Reference data) {
Optional<KnowledgeItem> knowledgeItem = knowledgeItems.getKnowledgeItem().stream().filter(p -> data.getId().equals(p.getReferenceID())).findFirst();
StringJoiner comment = new StringJoiner("\n\n");
List<KnowledgeItem> foundItems = knowledgeItems.getKnowledgeItem().stream().filter(p -> data.getId().equals(p.getReferenceID())).toList();
for (KnowledgeItem knowledgeItem : foundItems) {
Optional<String> title = Optional.ofNullable(knowledgeItem.getCoreStatement()).filter(Predicate.not(String::isEmpty));
title.ifPresent(t -> comment.add("# " + cleanUpText(t)));

StringBuilder comment = new StringBuilder();
Optional<String> title = knowledgeItem.map(item -> item.getCoreStatement());
title.ifPresent(t -> comment.append("# ").append(t).append("\n\n"));
Optional<String> text = knowledgeItem.map(item -> item.getText());
text.ifPresent(t -> comment.append(t).append("\n\n"));
Optional<Integer> pages = knowledgeItem.map(item -> item.getPageRangeNumber()).filter(range -> range != -1);
pages.ifPresent(p -> comment.append("page range: ").append(p));
Optional<String> text = Optional.ofNullable(knowledgeItem.getText()).filter(Predicate.not(String::isEmpty));
text.ifPresent(t -> comment.add(cleanUpText(t)));

Optional<Integer> pages = Optional.ofNullable(knowledgeItem.getPageRangeNumber()).filter(range -> range != -1);
pages.ifPresent(p -> comment.add("page range: " + p));
}
return comment.toString();
}

String cleanUpText(String text) {
String result = removeSpacesBeforeLineBreak(text);
result = result.replaceAll("(?<!\\\\)\\{", "\\\\{");
result = result.replaceAll("(?<!\\\\)}", "\\\\}");
return result;
}

private String removeSpacesBeforeLineBreak(String string) {
return string.replaceAll(" +\r\n", "\r\n")
.replaceAll(" +\n", "\n");
}

private void initUnmarshaller() throws JAXBException {
if (unmarshaller == null) {
// Lazy init because this is expensive
@@ -453,8 +471,9 @@ private static InputStream checkForUtf8BOMAndDiscardIfAny(InputStream inputStrea
}

private String clean(String input) {
return StringUtil.unifyLineBreaks(input, " ")
String result = StringUtil.unifyLineBreaks(input, " ")
.trim()
.replaceAll(" +", " ");
return htmlToLatexFormatter.format(result);
}
}
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
import org.jabref.logic.layout.LayoutFormatter;

/**
* Remove non printable character formatter.
* Remove non-printable character formatter.
*/
public class RemoveWhitespace implements LayoutFormatter {

Original file line number Diff line number Diff line change
@@ -58,6 +58,12 @@ public void testHTMLCombiningAccents() {
assertEquals("{\\\"{a}}b", formatter.format("a&#x308;b"));
}

@Test
public void keepsSingleLessThan() {
String text = "(p < 0.01)";
assertEquals(text, formatter.format(text));
}

@Test
public void formatExample() {
assertEquals("JabRef", formatter.format(formatter.getExampleInput()));
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.jabref.logic.importer.fileformat;

import java.util.stream.Stream;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import static org.junit.jupiter.api.Assertions.assertEquals;

class CitaviXmlImporterTest {

CitaviXmlImporter citaviXmlImporter = new CitaviXmlImporter();

public static Stream<Arguments> cleanUpText() {
return Stream.of(
Arguments.of("no action", "no action"),
Arguments.of("\\{action\\}", "{action}"),
Arguments.of("\\}", "}"));
}

@ParameterizedTest
@MethodSource
void cleanUpText(String expected, String input) {
assertEquals(expected, citaviXmlImporter.cleanUpText(input));
}
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -3,12 +3,10 @@ @article{
author = {Harris, Robert},
comment = {# Criteria for assessing the trustworthiness of a source
"Credibility:trustworthy source, author’s credentials, evidence of quality control, known or respected authority, organizational support. Goal: an authoritative source, a source that supplies some good evidence that allows you to trust it.
Accuracy: up to date, factual, detailed, exact, comprehensive, audience and purpose reflect intentions of completeness and accuracy. Goal: a source that is correct today (not yesterday), a source that gives the whole truth.
Reasonableness:fair, balanced, objective, reasoned, no conflict of interest, absence of fallacies or slanted tone. Goal: a source that engages the subject thoughtfully and reasonably, concerned with the truth.
Support: listed sources, contact information, available corroboration, claims supported, documentation supplied. Goal: a source that provides convincing evidence for the claims made, a source you can triangulate (find at least two other sources that support it). "
},
"Credibility:trustworthy source, author’s credentials, evidence of quality control, known or respected authority, organizational support. Goal: an authoritative source, a source that supplies some good evidence that allows you to trust it.
Accuracy: up to date, factual, detailed, exact, comprehensive, audience and purpose reflect intentions of completeness and accuracy. Goal: a source that is correct today (not yesterday), a source that gives the whole truth.
Reasonableness:fair, balanced, objective, reasoned, no conflict of interest, absence of fallacies or slanted tone. Goal: a source that engages the subject thoughtfully and reasonably, concerned with the truth.
Support: listed sources, contact information, available corroboration, claims supported, documentation supplied. Goal: a source that provides convincing evidence for the claims made, a source you can triangulate (find at least two other sources that support it). "},
keywords = {CARS, information quality},
title = {Evaluating Internet Research Sources},
}