Skip to content

Commit

Permalink
Incorporates reviewer feedback of 2.0.0 RC2 (#285)
Browse files Browse the repository at this point in the history
* Only add the log4j2.xml to the fat-jar for CLI-like modules, so it does not leak into Maven attached artifacts

* Move configuration samples into README files

* Fix some warning regarding the shaded artifact
  • Loading branch information
rzo1 authored Nov 22, 2023
1 parent 0917eec commit 4ab7b9d
Show file tree
Hide file tree
Showing 13 changed files with 160 additions and 363 deletions.
20 changes: 20 additions & 0 deletions dkpro-jwpl-datamachine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,26 @@
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<configuration>
<excludes>
<!-- exclude it here as we only want to add it to the fat jar -->
<exclude>**/log4j2.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<excludes>
<!-- exclude it here as we only want to add it to the fat jar -->
<exclude>**/log4j2.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
Expand Down
9 changes: 9 additions & 0 deletions dkpro-jwpl-datamachine/src/main/assembly/assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,13 @@
<handlerName>metaInf-services</handlerName>
</containerDescriptorHandler>
</containerDescriptorHandlers>
<fileSets>
<fileSet>
<directory>${project.basedir}/src/main/resources</directory>
<includes>
<include>log4j2.xml</include>
</includes>
<outputDirectory>/</outputDirectory>
</fileSet>
</fileSets>
</assembly>
13 changes: 11 additions & 2 deletions dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,18 @@
<include>org.sweble.wikitext:swc-engine:*</include>
</includes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/MANIFEST.MF</exclude>
<exclude>META-INF/maven/**</exclude>
<exclude>org.sweble.wikitext/swc-engine/git.properties</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
</transformers>
<relocations>
<relocation>
Expand Down
20 changes: 20 additions & 0 deletions dkpro-jwpl-revisionmachine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<configuration>
<excludes>
<!-- exclude it here as we only want to add it to the fat jar -->
<exclude>**/log4j2.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<excludes>
<!-- exclude it here as we only want to add it to the fat jar -->
<exclude>**/log4j2.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
Expand Down
9 changes: 9 additions & 0 deletions dkpro-jwpl-revisionmachine/src/main/assembly/assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,13 @@
<handlerName>metaInf-services</handlerName>
</containerDescriptorHandler>
</containerDescriptorHandlers>
<fileSets>
<fileSet>
<directory>${project.basedir}/src/main/resources</directory>
<includes>
<include>log4j2.xml</include>
</includes>
<outputDirectory>/</outputDirectory>
</fileSet>
</fileSets>
</assembly>
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
JWPLTimeMachine
# JWPLTimeMachine

USAGE:

StartDBMapping <configuration.xml>

EXAMPLE FILE:

```xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
Expand All @@ -22,6 +23,7 @@ EXAMPLE FILE:
<entry key="outputDirectory">/home/zesch/wiki_data/elwiki_test</entry>
<entry key="removeInputFilesAfterProcessing">false</entry>
</properties>
```

* language - The used language. The language string must correspond to one of the values enumerated in WikiConstants.Language in the JWPL. Examples: english, german, frensh, arabic.
* mainCategory - The title of the main category of the Wikipedia language version used. For example, "Categories" for the English Wikipedia or "!Hauptkategorie" for the German Wikipedia.
Expand All @@ -34,3 +36,47 @@ EXAMPLE FILE:
* categoryLinksFile - The absolute path to the categorylinks file only .sql and .sql.gz extensions are supported.
* outputDirectory - The absolute path to the directory to which the transformed files will be written. The outputDirectory will be created if it does not exist. However its parent directory must exist.
* removeInputFilesAfterProcessing - A boolean that specifies whether the meta-history file, the pagelinks file and the categorylinks file should be removed after the processing.

# Config Examples

## Greek

```xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>This a configuration formular for the JWPL TimeMachine</comment>
<entry key="language">greek</entry>
<entry key="mainCategory">Κατηγορίες</entry>
<entry key="disambiguationCategory">Αποσαφήνιση</entry>
<entry key="fromTimestamp">20060101000000</entry>
<entry key="toTimestamp">20060102000000</entry>
<entry key="each">1</entry>
<entry key="metaHistoryFile">/home/zesch/wiki_data/elwiki/elwiki-20080205-pages-meta-history.xml.bz2</entry>
<entry key="categoryLinksFile">/home/zesch/wiki_data/elwiki/elwiki-20080205-categorylinks.sql.gz</entry>
<entry key="pageLinksFile">/home/zesch/wiki_data/elwiki/elwiki-20080205-pagelinks.sql.gz</entry>
<entry key="outputDirectory">/home/zesch/wiki_data/elwiki_test</entry>
<entry key="removeInputFilesAfterProcessing">false</entry>
</properties>
```

## Arabic

```xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>This a configuration formular for the JWPL TimeMachine</comment>
<entry key="language">greek</entry>
<entry key="mainCategory">Κατηγορίες</entry>
<entry key="disambiguationCategory">Αποσαφήνιση</entry>
<entry key="fromTimestamp">20060101000000</entry>
<entry key="toTimestamp">20060102000000</entry>
<entry key="each">1</entry>
<entry key="metaHistoryFile">/home/zesch/wiki_data/elwiki/elwiki-20080205-pages-meta-history.xml.bz2</entry>
<entry key="categoryLinksFile">/home/zesch/wiki_data/elwiki/elwiki-20080205-categorylinks.sql.gz</entry>
<entry key="pageLinksFile">/home/zesch/wiki_data/elwiki/elwiki-20080205-pagelinks.sql.gz</entry>
<entry key="outputDirectory">/home/zesch/wiki_data/elwiki_test</entry>
<entry key="removeInputFilesAfterProcessing">false</entry>
</properties>
```
34 changes: 0 additions & 34 deletions dkpro-jwpl-timemachine/config_file_arabic_one_snapshot.xml

This file was deleted.

34 changes: 0 additions & 34 deletions dkpro-jwpl-timemachine/config_file_greek_one_snapshot.xml

This file was deleted.

20 changes: 20 additions & 0 deletions dkpro-jwpl-timemachine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,26 @@

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<configuration>
<excludes>
<!-- exclude it here as we only want to add it to the fat jar -->
<exclude>**/log4j2.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<excludes>
<!-- exclude it here as we only want to add it to the fat jar -->
<exclude>**/log4j2.xml</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
Expand Down
9 changes: 9 additions & 0 deletions dkpro-jwpl-timemachine/src/main/assembly/assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,13 @@
<handlerName>metaInf-services</handlerName>
</containerDescriptorHandler>
</containerDescriptorHandlers>
<fileSets>
<fileSet>
<directory>${project.basedir}/src/main/resources</directory>
<includes>
<include>log4j2.xml</include>
</includes>
<outputDirectory>/</outputDirectory>
</fileSet>
</fileSets>
</assembly>
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# JWPL Util

## Template Schema

```sql
CREATE TABLE IF NOT EXISTS templateId_pageId (templateId INTEGER UNSIGNED NOT NULL,pageId INTEGER UNSIGNED NOT NULL, UNIQUE(templateId, pageId)) ENGINE = MYISAM;
CREATE TABLE IF NOT EXISTS templates (templateId INTEGER NOT NULL AUTO_INCREMENT,templateName TEXT NOT NULL,PRIMARY KEY(templateId)) ENGINE = MYISAM;
CREATE TABLE IF NOT EXISTS templateId_revisionId(templateId INTEGER UNSIGNED NOT NULL,revisionId INTEGER UNSIGNED NOT NULL, UNIQUE(templateId, revisionId)) ENGINE = MYISAM;
```

## Properties Sample

```
#host=dbhost
#db=revisiondb
#user=username
Expand Down Expand Up @@ -33,4 +46,5 @@ pages_black_list=
revisions_white_list=official_schprooche
revisions_white_prefix_list=
revisions_black_prefix_list=
revisions_black_list=
revisions_black_list=
```

This file was deleted.

Loading

0 comments on commit 4ab7b9d

Please sign in to comment.