Skip to content

Commit

Permalink
Split loader into crawler and "real" loader
Browse files Browse the repository at this point in the history
  • Loading branch information
koppor committed Dec 4, 2023
1 parent e8c456c commit 9c86d44
Show file tree
Hide file tree
Showing 15 changed files with 85 additions and 86 deletions.
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,9 @@ tasks.register("generatePredatoryJournalListMV", JavaExec) {
group = "JabRef"
description = "Load predatory journal information from online sources to a H2 MVStore"
classpath = sourceSets.main.runtimeClasspath
mainClass = "org.jabref.cli.PredatoryJournalListMvGenerator"
mainClass = "org.jabref.cli.PredatoryJournalsMvGenerator"
onlyIf {
!file("build/resources/main/journals/predatoryJournal-list.mv").exists()
!file("build/resources/main/journals/predatory-journals.mv").exists()
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ It is also possible to use IntelliJ's internal build and run system to launch Ja
Due to [IDEA-119280](https://youtrack.jetbrains.com/issue/IDEA-119280), it is a bit more work.

1. Navigate to **File > Settings... > Build, Execution, Deployment > Build Tools > Gradle**.
2. Change the setting "Build an run using:" to "IntelliJ IDEA".
2. Change the setting "Build and run using:" to "IntelliJ IDEA".
3. Navigate to **File > Settings... > Build, Execution, Deployment > Compiler > Java Compiler**.
4. Uncheck `--Use 'release' option for cross-compilation`.
5. **Build > Build Project**
6. Open the project view (<kbd>Alt</kbd>+<kbd>1</kbd> , on mac <kbd>cmd><kbd>+<kbd>1</kbd>)
7. Copy all build resources to the folder of the build classes
1. Navigate to the folder `out/production/resources`
1. Navigate to the folder `build/resoruces/main`
2. Select all folders below (`bst`, `csl-locales`, ...)
3. Press <kbd>Ctrl</kbd>+<kbd>C</kbd> to mark them for copying
4. Select the folder `classes`
4. Select the folder `out/production/classes`
5. Press <kbd>Ctrl</kbd>+<kbd>V</kbd> to start the copy process
8. Locate the class `Launcher` (e.g., by <kbd>ctrl</kbd>+<kbd>N</kbd> and then typing `Launcher`). Press <kbd>Enter</kbd> to jump to that class.
<figure>
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/cli/Launcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.jabref.gui.Globals;
import org.jabref.gui.MainApplication;
import org.jabref.logic.journals.JournalAbbreviationLoader;
import org.jabref.logic.journals.PredatoryJournalLoader;
import org.jabref.logic.journals.predatory.PredatoryJournalListLoader;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.net.ProxyAuthenticator;
import org.jabref.logic.net.ProxyPreferences;
Expand Down Expand Up @@ -170,7 +170,7 @@ private static void initGlobals(PreferencesService preferences) {
// Read list(s) of journal names and abbreviations
Globals.journalAbbreviationRepository = JournalAbbreviationLoader
.loadRepository(preferences.getJournalAbbreviationPreferences());
Globals.predatoryJournalRepository = PredatoryJournalLoader
Globals.predatoryJournalRepository = PredatoryJournalListLoader
.loadRepository();

Globals.entryTypesManager = preferences.getCustomEntryTypesRepository();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,35 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Set;
import java.util.HashSet;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.jabref.logic.journals.PredatoryJournalInformation;
import org.jabref.logic.journals.PredatoryJournalLoader;
import org.jabref.logic.journals.predatory.PredatoryJournalInformation;
import org.jabref.logic.journals.predatory.PredatoryJournalListCrawler;

import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;

public class PredatoryJournalListMvGenerator {
public class PredatoryJournalsMvGenerator {
public static void main(String[] args) throws IOException {
boolean verbose = (args.length == 1) && ("--verbose".equals(args[0]));

PredatoryJournalLoader loader = new PredatoryJournalLoader();
loader.loadFromOnlineSources();

Path predatoryJournalMvFile = Path.of("build", "resources", "main", "journals", "predatoryJournal-list.mv");

Files.createDirectories(predatoryJournalMvFile.getParent());
Path predatoryJournalsMvFile = Path.of("build", "resources", "main", "journals", "predatory-journals.mv");
Files.createDirectories(predatoryJournalsMvFile.getParent());

try (MVStore store = new MVStore.Builder()
.fileName(predatoryJournalMvFile.toString())
.fileName(predatoryJournalsMvFile.toString())
.compressHigh()
.backgroundExceptionHandler((t, e) -> {
System.err.println("Exception occurred in Thread " + t + "with exception " + e);
e.printStackTrace();
})
.open()) {
MVMap<String, PredatoryJournalInformation> predatoryJournalsMap = store.openMap("PredatoryJournals");
Set<PredatoryJournalInformation> predatoryJournals = loader.getPredatoryJournalInformations();

PredatoryJournalListCrawler loader = new PredatoryJournalListCrawler();
HashSet<PredatoryJournalInformation> predatoryJournals = loader.loadFromOnlineSources();

var resultMap = predatoryJournals.stream().collect(Collectors.toMap(PredatoryJournalInformation::name, Function.identity(),
(predatoryJournalInformation, predatoryJournalInformation2) -> {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/gui/Globals.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import org.jabref.gui.util.DefaultTaskExecutor;
import org.jabref.gui.util.TaskExecutor;
import org.jabref.logic.journals.JournalAbbreviationRepository;
import org.jabref.logic.journals.PredatoryJournalRepository;
import org.jabref.logic.journals.predatory.PredatoryJournalRepository;
import org.jabref.logic.protectedterms.ProtectedTermsLoader;
import org.jabref.logic.remote.RemotePreferences;
import org.jabref.logic.remote.server.RemoteListenerServerManager;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/gui/MainMenu.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
import org.jabref.logic.importer.IdFetcher;
import org.jabref.logic.importer.WebFetchers;
import org.jabref.logic.journals.JournalAbbreviationRepository;
import org.jabref.logic.journals.PredatoryJournalRepository;
import org.jabref.logic.journals.predatory.PredatoryJournalRepository;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.OS;
import org.jabref.model.entry.BibEntryTypesManager;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import org.jabref.logic.integrity.IntegrityCheck;
import org.jabref.logic.integrity.IntegrityMessage;
import org.jabref.logic.journals.JournalAbbreviationRepository;
import org.jabref.logic.journals.PredatoryJournalRepository;
import org.jabref.logic.journals.predatory.PredatoryJournalRepository;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
import org.jabref.logic.journals.JournalAbbreviationRepository;
import org.jabref.logic.journals.PredatoryJournalRepository;
import org.jabref.logic.journals.predatory.PredatoryJournalRepository;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import java.util.List;
import java.util.Objects;

import org.jabref.logic.journals.PredatoryJournalRepository;
import org.jabref.logic.journals.predatory.PredatoryJournalRepository;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.Field;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.jabref.logic.journals;
package org.jabref.logic.journals.predatory;

import java.io.Serializable;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
package org.jabref.logic.journals;
package org.jabref.logic.journals.predatory;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand All @@ -27,35 +23,31 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PredatoryJournalLoader {
private static class PJSource {
URL url;
Optional<Pattern> elementPattern;
/**
* Converts (hard-coded) online resources to a set. {@link #loadFromOnlineSources} is the method containing the result.
*/
public class PredatoryJournalListCrawler {

private record PJSource(URL url, Optional<Pattern> elementPattern) {

PJSource(String url, String regex) {
try {
this.url = new URI(url).toURL();
} catch (
MalformedURLException |
URISyntaxException ex) {
throw new IllegalArgumentException("Malformed URL has occurred in PJSource", ex);
}
this.elementPattern = Optional.of(Pattern.compile(regex));
this(createURL(url), Optional.of(Pattern.compile(regex)));
}

PJSource(String url) {
this(createURL(url), Optional.empty());
}

private static URL createURL(String urlString) {
try {
this.url = new URI(url).toURL();
} catch (
MalformedURLException |
URISyntaxException ex) {
return new URI(urlString).toURL();
} catch (MalformedURLException | URISyntaxException ex) {
throw new IllegalArgumentException("Malformed URL has occurred in PJSource", ex);
}
this.elementPattern = Optional.empty();
}
}

private static final Logger LOGGER = LoggerFactory.getLogger(PredatoryJournalLoader.class);
private static final Logger LOGGER = LoggerFactory.getLogger(PredatoryJournalListCrawler.class);
private static final Pattern PATTERN_NAME = Pattern.compile("(?<=\">).*?(?=<)");
private static final Pattern PATTERN_URL = Pattern.compile("http.*?(?=\")");
private static final Pattern PATTERN_ABBR = Pattern.compile("(?<=\\()[^ ]*(?=\\))");
Expand All @@ -69,40 +61,21 @@ private static class PJSource {
new PJSource("https://beallslist.net/hijacked-journals/",
"<tr>.*?</tr>")
);

private final List<String> linkElements = new ArrayList<>();

private final List<PredatoryJournalInformation> predatoryJournalInformation = new ArrayList<>();

public static PredatoryJournalRepository loadRepository() {
PredatoryJournalRepository repository = new PredatoryJournalRepository();

try (InputStream resourceAsStream = PredatoryJournalRepository.class.getResourceAsStream("/journals/predatoryJournal-list.mv")) {
if (resourceAsStream == null) {
LOGGER.warn("There is no predatoryJournalList.mv. We use a default predatory journal list");
} else {
Path tempDir = Files.createTempDirectory("jabref-journal");
Path tempJournalList = tempDir.resolve("predatoryJournal-list.mv");
Files.copy(resourceAsStream, tempJournalList);
repository = new PredatoryJournalRepository(tempJournalList);
tempDir.toFile().deleteOnExit();
tempJournalList.toFile().deleteOnExit();
}
} catch (IOException e) {
LOGGER.error("Error while copying predatory journal list", e);
return repository;
}
return repository;
}

/**
* Loads predatory journal information from online resources
* This method should be only called once when building JabRef
*
* @return the set of journal information
*/
public void loadFromOnlineSources() {
public HashSet<PredatoryJournalInformation> loadFromOnlineSources() {
predatorySources.forEach(this::crawl);
linkElements.forEach(this::clean);

LOGGER.info("Updated predatory journal list");
return new HashSet<>(predatoryJournalInformation);
}

private void crawl(PJSource source) {
Expand Down Expand Up @@ -185,8 +158,4 @@ private String decode(String s) {
.replace("&#8217;", "'")
.replace("&#8211;", "-");
}

public Set<PredatoryJournalInformation> getPredatoryJournalInformations() {
return new HashSet<>(predatoryJournalInformation);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package org.jabref.logic.journals.predatory;

import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Path;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PredatoryJournalListLoader {

private static final Logger LOGGER = LoggerFactory.getLogger(PredatoryJournalListLoader.class);

public static PredatoryJournalRepository loadRepository() {
PredatoryJournalRepository repository = new PredatoryJournalRepository();

Path path;
try {
URL resource = PredatoryJournalRepository.class.getResource("/journals/predatory-journals.mv");
if (resource == null) {
LOGGER.error("predatoryJournal-list.mv not found. Using demo list.");
return new PredatoryJournalRepository();
}
path = Path.of(resource.toURI());
} catch (URISyntaxException e) {
LOGGER.error("Could not determine path to predatoryJournal-list.mv. Using demo list.");
return new PredatoryJournalRepository();
}

return new PredatoryJournalRepository(path);
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.jabref.logic.journals;
package org.jabref.logic.journals.predatory;

import java.nio.file.Path;
import java.util.HashMap;
Expand All @@ -25,9 +25,9 @@ public class PredatoryJournalRepository {
/**
* Initializes the internal data based on the predatory journals found in the given MV file
*/
public PredatoryJournalRepository(Path pjlist) {
public PredatoryJournalRepository(Path mvStore) {
MVMap<String, PredatoryJournalInformation> predatoryJournalsMap;
try (MVStore store = new MVStore.Builder().readOnly().fileName(pjlist.toAbsolutePath().toString()).open()) {
try (MVStore store = new MVStore.Builder().readOnly().fileName(mvStore.toAbsolutePath().toString()).open()) {
predatoryJournalsMap = store.openMap("PredatoryJournals");
predatoryJournals.putAll(predatoryJournalsMap);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
import org.jabref.logic.citationkeypattern.GlobalCitationKeyPattern;
import org.jabref.logic.journals.JournalAbbreviationLoader;
import org.jabref.logic.journals.PredatoryJournalLoader;
import org.jabref.logic.journals.predatory.PredatoryJournalListLoader;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.database.BibDatabaseMode;
Expand Down Expand Up @@ -141,7 +141,7 @@ void testEntryIsUnchangedAfterChecks() {
mock(FilePreferences.class),
createCitationKeyPatternPreferences(),
JournalAbbreviationLoader.loadBuiltInRepository(),
PredatoryJournalLoader.loadRepository(), false)
PredatoryJournalListLoader.loadRepository(), false)
.check();

assertEquals(clonedEntry, entry);
Expand Down Expand Up @@ -174,7 +174,7 @@ private void assertWrong(BibDatabaseContext context) {
mock(FilePreferences.class),
createCitationKeyPatternPreferences(),
JournalAbbreviationLoader.loadBuiltInRepository(),
PredatoryJournalLoader.loadRepository(), false)
PredatoryJournalListLoader.loadRepository(), false)
.check();
assertNotEquals(Collections.emptyList(), messages);
}
Expand All @@ -186,7 +186,7 @@ private void assertCorrect(BibDatabaseContext context) {
filePreferencesMock,
createCitationKeyPatternPreferences(),
JournalAbbreviationLoader.loadBuiltInRepository(),
PredatoryJournalLoader.loadRepository(), false)
PredatoryJournalListLoader.loadRepository(), false)
.check();
assertEquals(Collections.emptyList(), messages);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import java.util.Collections;
import java.util.List;

import org.jabref.logic.journals.PredatoryJournalLoader;
import org.jabref.logic.journals.predatory.PredatoryJournalListLoader;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;

Expand All @@ -18,10 +18,10 @@ class PredatoryJournalCheckerTest {

@BeforeAll
static void initChecker() {
checker = new PredatoryJournalChecker(PredatoryJournalLoader.loadRepository(),
checker = new PredatoryJournalChecker(PredatoryJournalListLoader.loadRepository(),
List.of(StandardField.JOURNAL, StandardField.PUBLISHER, StandardField.BOOKTITLE));
}

@Test
void journalIsNotPredatory() {
BibEntry entry = new BibEntry().withField(StandardField.JOURNAL, "IEEE Software");
Expand Down

0 comments on commit 9c86d44

Please sign in to comment.