diff --git a/dkpro-jwpl-datamachine/pom.xml b/dkpro-jwpl-datamachine/pom.xml index 6a89f4811..0430b003e 100644 --- a/dkpro-jwpl-datamachine/pom.xml +++ b/dkpro-jwpl-datamachine/pom.xml @@ -56,6 +56,26 @@ + + org.apache.maven.plugins + maven-source-plugin + + + + **/log4j2.xml + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + **/log4j2.xml + + + maven-assembly-plugin diff --git a/dkpro-jwpl-datamachine/src/main/assembly/assembly.xml b/dkpro-jwpl-datamachine/src/main/assembly/assembly.xml index 08f195a85..40250a7cb 100644 --- a/dkpro-jwpl-datamachine/src/main/assembly/assembly.xml +++ b/dkpro-jwpl-datamachine/src/main/assembly/assembly.xml @@ -39,4 +39,13 @@ metaInf-services + + + ${project.basedir}/src/main/resources + + log4j2.xml + + / + + \ No newline at end of file diff --git a/dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/pom.xml b/dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/pom.xml index 3c69328c0..6423b0fb7 100644 --- a/dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/pom.xml +++ b/dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/pom.xml @@ -115,9 +115,18 @@ org.sweble.wikitext:swc-engine:* + + + *:* + + META-INF/MANIFEST.MF + META-INF/maven/** + org.sweble.wikitext/swc-engine/git.properties + + + - + diff --git a/dkpro-jwpl-revisionmachine/pom.xml b/dkpro-jwpl-revisionmachine/pom.xml index a72d4f87d..c0a4419d1 100644 --- a/dkpro-jwpl-revisionmachine/pom.xml +++ b/dkpro-jwpl-revisionmachine/pom.xml @@ -84,6 +84,26 @@ + + org.apache.maven.plugins + maven-source-plugin + + + + **/log4j2.xml + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + **/log4j2.xml + + + maven-assembly-plugin diff --git a/dkpro-jwpl-revisionmachine/src/main/assembly/assembly.xml b/dkpro-jwpl-revisionmachine/src/main/assembly/assembly.xml index 08f195a85..40250a7cb 100644 --- a/dkpro-jwpl-revisionmachine/src/main/assembly/assembly.xml +++ b/dkpro-jwpl-revisionmachine/src/main/assembly/assembly.xml @@ -39,4 +39,13 @@ metaInf-services + + + ${project.basedir}/src/main/resources + + log4j2.xml + + / + + \ No newline at end of file diff --git a/dkpro-jwpl-timemachine/README.TXT b/dkpro-jwpl-timemachine/README.md similarity index 55% rename from dkpro-jwpl-timemachine/README.TXT rename to dkpro-jwpl-timemachine/README.md index a831651ad..bd122a23d 100644 --- a/dkpro-jwpl-timemachine/README.TXT +++ b/dkpro-jwpl-timemachine/README.md @@ -1,4 +1,4 @@ -JWPLTimeMachine +# JWPLTimeMachine USAGE: @@ -6,6 +6,7 @@ StartDBMapping EXAMPLE FILE: +```xml @@ -22,6 +23,7 @@ EXAMPLE FILE: /home/zesch/wiki_data/elwiki_test false +``` * language - The used language. The language string must correspond to one of the values enumerated in WikiConstants.Language in the JWPL. Examples: english, german, frensh, arabic. * mainCategory - The title of the main category of the Wikipedia language version used. For example, "Categories" for the English Wikipedia or "!Hauptkategorie" for the German Wikipedia. @@ -34,3 +36,47 @@ EXAMPLE FILE: * categoryLinksFile - The absolute path to the categorylinks file only .sql and .sql.gz extensions are supported. * outputDirectory - The absolute path to the directory to which the transformed files will be written. The outputDirectory will be created if it does not exist. However its parent directory must exist. * removeInputFilesAfterProcessing - A boolean that specifies whether the meta-history file, the pagelinks file and the categorylinks file should be removed after the processing. + +# Config Examples + +## Greek + +```xml + + + + This a configuration formular for the JWPL TimeMachine + greek + Κατηγορίες + Αποσαφήνιση + 20060101000000 + 20060102000000 + 1 + /home/zesch/wiki_data/elwiki/elwiki-20080205-pages-meta-history.xml.bz2 + /home/zesch/wiki_data/elwiki/elwiki-20080205-categorylinks.sql.gz + /home/zesch/wiki_data/elwiki/elwiki-20080205-pagelinks.sql.gz + /home/zesch/wiki_data/elwiki_test + false + +``` + +## Arabic + +```xml + + + + This a configuration formular for the JWPL TimeMachine + greek + Κατηγορίες + Αποσαφήνιση + 20060101000000 + 20060102000000 + 1 + /home/zesch/wiki_data/elwiki/elwiki-20080205-pages-meta-history.xml.bz2 + /home/zesch/wiki_data/elwiki/elwiki-20080205-categorylinks.sql.gz + /home/zesch/wiki_data/elwiki/elwiki-20080205-pagelinks.sql.gz + /home/zesch/wiki_data/elwiki_test + false + +``` \ No newline at end of file diff --git a/dkpro-jwpl-timemachine/config_file_arabic_one_snapshot.xml b/dkpro-jwpl-timemachine/config_file_arabic_one_snapshot.xml deleted file mode 100644 index 41961ec48..000000000 --- a/dkpro-jwpl-timemachine/config_file_arabic_one_snapshot.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - This a configuration formular for the JWPL TimeMachine - arabic - التصنيف الرئيسي - صفحات توضيح - 20080223000000 - 20080224000000 - 1 - /home/zesch/wiki_data/ar_historic/arwiki-20080224-pages-meta-history.xml - /home/zesch/wiki_data/ar_historic/arwiki-20080224-categorylinks.sql - /home/zesch/wiki_data/ar_historic/arwiki-20080224-pagelinks.sql - /home/zesch/wiki_data/ar_historic/arwiki_test - false - diff --git a/dkpro-jwpl-timemachine/config_file_greek_one_snapshot.xml b/dkpro-jwpl-timemachine/config_file_greek_one_snapshot.xml deleted file mode 100644 index 510e88127..000000000 --- a/dkpro-jwpl-timemachine/config_file_greek_one_snapshot.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - This a configuration formular for the JWPL TimeMachine - greek - Κατηγορίες - Αποσαφήνιση - 20060101000000 - 20060102000000 - 1 - /home/zesch/wiki_data/elwiki/elwiki-20080205-pages-meta-history.xml.bz2 - /home/zesch/wiki_data/elwiki/elwiki-20080205-categorylinks.sql.gz - /home/zesch/wiki_data/elwiki/elwiki-20080205-pagelinks.sql.gz - /home/zesch/wiki_data/elwiki_test - false - diff --git a/dkpro-jwpl-timemachine/pom.xml b/dkpro-jwpl-timemachine/pom.xml index 72f8eb7df..1ee0172bc 100644 --- a/dkpro-jwpl-timemachine/pom.xml +++ b/dkpro-jwpl-timemachine/pom.xml @@ -62,6 +62,26 @@ + + org.apache.maven.plugins + maven-source-plugin + + + + **/log4j2.xml + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + **/log4j2.xml + + + maven-assembly-plugin diff --git a/dkpro-jwpl-timemachine/src/main/assembly/assembly.xml b/dkpro-jwpl-timemachine/src/main/assembly/assembly.xml index 08f195a85..40250a7cb 100644 --- a/dkpro-jwpl-timemachine/src/main/assembly/assembly.xml +++ b/dkpro-jwpl-timemachine/src/main/assembly/assembly.xml @@ -39,4 +39,13 @@ metaInf-services + + + ${project.basedir}/src/main/resources + + log4j2.xml + + / + + \ No newline at end of file diff --git a/dkpro-jwpl-util/src/main/resources/templategen.properties.sample b/dkpro-jwpl-util/README.md similarity index 55% rename from dkpro-jwpl-util/src/main/resources/templategen.properties.sample rename to dkpro-jwpl-util/README.md index 2123659be..0e5a81572 100644 --- a/dkpro-jwpl-util/src/main/resources/templategen.properties.sample +++ b/dkpro-jwpl-util/README.md @@ -1,3 +1,16 @@ +# JWPL Util + +## Template Schema + +```sql +CREATE TABLE IF NOT EXISTS templateId_pageId (templateId INTEGER UNSIGNED NOT NULL,pageId INTEGER UNSIGNED NOT NULL, UNIQUE(templateId, pageId)) ENGINE = MYISAM; +CREATE TABLE IF NOT EXISTS templates (templateId INTEGER NOT NULL AUTO_INCREMENT,templateName TEXT NOT NULL,PRIMARY KEY(templateId)) ENGINE = MYISAM; +CREATE TABLE IF NOT EXISTS templateId_revisionId(templateId INTEGER UNSIGNED NOT NULL,revisionId INTEGER UNSIGNED NOT NULL, UNIQUE(templateId, revisionId)) ENGINE = MYISAM; +``` + +## Properties Sample + +``` #host=dbhost #db=revisiondb #user=username @@ -33,4 +46,5 @@ pages_black_list= revisions_white_list=official_schprooche revisions_white_prefix_list= revisions_black_prefix_list= -revisions_black_list= \ No newline at end of file +revisions_black_list= +``` \ No newline at end of file diff --git a/dkpro-jwpl-util/src/main/resources/create_tpltables_simplegenerator.sql b/dkpro-jwpl-util/src/main/resources/create_tpltables_simplegenerator.sql deleted file mode 100644 index 50f193065..000000000 --- a/dkpro-jwpl-util/src/main/resources/create_tpltables_simplegenerator.sql +++ /dev/null @@ -1,3 +0,0 @@ -CREATE TABLE IF NOT EXISTS templateId_pageId (templateId INTEGER UNSIGNED NOT NULL,pageId INTEGER UNSIGNED NOT NULL, UNIQUE(templateId, pageId)) ENGINE = MYISAM; -CREATE TABLE IF NOT EXISTS templates (templateId INTEGER NOT NULL AUTO_INCREMENT,templateName TEXT NOT NULL,PRIMARY KEY(templateId)) ENGINE = MYISAM; -CREATE TABLE IF NOT EXISTS templateId_revisionId(templateId INTEGER UNSIGNED NOT NULL,revisionId INTEGER UNSIGNED NOT NULL, UNIQUE(templateId, revisionId)) ENGINE = MYISAM; diff --git a/dkpro-jwpl-wikimachine/jwpl_tables.sql b/dkpro-jwpl-wikimachine/jwpl_tables.sql deleted file mode 100644 index 2135f53b6..000000000 --- a/dkpro-jwpl-wikimachine/jwpl_tables.sql +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Licensed to the Technische Universität Darmstadt under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The Technische Universität Darmstadt - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ --- MySQL dump 10.11 --- --- Host: localhost Database: jwpl_tables --- ------------------------------------------------------ --- Server version 5.0.37-community-nt - -/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; -/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; -/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; -/*!40101 SET NAMES utf8 */; -/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; -/*!40103 SET TIME_ZONE='+00:00' */; -/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; -/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; -/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; -/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; - --- --- Table structure for table `Category` --- - -DROP TABLE IF EXISTS `Category`; -CREATE TABLE `Category` ( - `id` bigint(20) NOT NULL auto_increment, - `pageId` int(11) default NULL, - `name` varchar(255) default NULL, - PRIMARY KEY (`id`), - UNIQUE KEY `pageId` (`pageId`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `Category` --- - -LOCK TABLES `Category` WRITE; -/*!40000 ALTER TABLE `Category` DISABLE KEYS */; -/*!40000 ALTER TABLE `Category` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `category_inlinks` --- - -DROP TABLE IF EXISTS `category_inlinks`; -CREATE TABLE `category_inlinks` ( - `id` bigint(20) NOT NULL, - `inLinks` int(11) default NULL, - KEY `FK3F433773E46A97CC` (`id`), - KEY `FK3F433773BB482769` (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `category_inlinks` --- - -LOCK TABLES `category_inlinks` WRITE; -/*!40000 ALTER TABLE `category_inlinks` DISABLE KEYS */; -/*!40000 ALTER TABLE `category_inlinks` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `category_outlinks` --- - -DROP TABLE IF EXISTS `category_outlinks`; -CREATE TABLE `category_outlinks` ( - `id` bigint(20) NOT NULL, - `outLinks` int(11) default NULL, - KEY `FK9885334CE46A97CC` (`id`), - KEY `FK9885334CBB482769` (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `category_outlinks` --- - -LOCK TABLES `category_outlinks` WRITE; -/*!40000 ALTER TABLE `category_outlinks` DISABLE KEYS */; -/*!40000 ALTER TABLE `category_outlinks` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `category_pages` --- - -DROP TABLE IF EXISTS `category_pages`; -CREATE TABLE `category_pages` ( - `id` bigint(20) NOT NULL, - `pages` int(11) default NULL, - KEY `FK71E8D943E46A97CC` (`id`), - KEY `FK71E8D943BB482769` (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `category_pages` --- - -LOCK TABLES `category_pages` WRITE; -/*!40000 ALTER TABLE `category_pages` DISABLE KEYS */; -/*!40000 ALTER TABLE `category_pages` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `MetaData` --- - -DROP TABLE IF EXISTS `MetaData`; -CREATE TABLE `MetaData` ( - `id` bigint(20) NOT NULL auto_increment, - `language` varchar(255) default NULL, - `disambiguationCategory` varchar(255) default NULL, - `mainCategory` varchar(255) default NULL, - `nrofPages` bigint(20) default NULL, - `nrofRedirects` bigint(20) default NULL, - `nrofDisambiguationPages` bigint(20) default NULL, - `nrofCategories` bigint(20) default NULL, - `version` varchar(255) default NULL, - PRIMARY KEY (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `MetaData` --- - -LOCK TABLES `MetaData` WRITE; -/*!40000 ALTER TABLE `MetaData` DISABLE KEYS */; -/*!40000 ALTER TABLE `MetaData` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `Page` --- - -DROP TABLE IF EXISTS `Page`; -CREATE TABLE `Page` ( - `id` bigint(20) NOT NULL auto_increment, - `pageId` int(11) default NULL, - `name` varchar(255) default NULL, - `text` longtext, - `isDisambiguation` bit(1) default NULL, - PRIMARY KEY (`id`), - UNIQUE KEY `pageId` (`pageId`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `Page` --- - -LOCK TABLES `Page` WRITE; -/*!40000 ALTER TABLE `Page` DISABLE KEYS */; -/*!40000 ALTER TABLE `Page` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `page_categories` --- - -DROP TABLE IF EXISTS `page_categories`; -CREATE TABLE `page_categories` ( - `id` bigint(20) NOT NULL, - `pages` int(11) default NULL, - KEY `FK72FB59CC1E350EDD` (`id`), - KEY `FK72FB59CC75DCF4FA` (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `page_categories` --- - -LOCK TABLES `page_categories` WRITE; -/*!40000 ALTER TABLE `page_categories` DISABLE KEYS */; -/*!40000 ALTER TABLE `page_categories` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `page_inlinks` --- - -DROP TABLE IF EXISTS `page_inlinks`; -CREATE TABLE `page_inlinks` ( - `id` bigint(20) NOT NULL, - `inLinks` int(11) default NULL, - KEY `FK91C2BC041E350EDD` (`id`), - KEY `FK91C2BC0475DCF4FA` (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `page_inlinks` --- - -LOCK TABLES `page_inlinks` WRITE; -/*!40000 ALTER TABLE `page_inlinks` DISABLE KEYS */; -/*!40000 ALTER TABLE `page_inlinks` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `page_outlinks` --- - -DROP TABLE IF EXISTS `page_outlinks`; -CREATE TABLE `page_outlinks` ( - `id` bigint(20) NOT NULL, - `outLinks` int(11) default NULL, - KEY `FK95F640DB1E350EDD` (`id`), - KEY `FK95F640DB75DCF4FA` (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `page_outlinks` --- - -LOCK TABLES `page_outlinks` WRITE; -/*!40000 ALTER TABLE `page_outlinks` DISABLE KEYS */; -/*!40000 ALTER TABLE `page_outlinks` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `page_redirects` --- - -DROP TABLE IF EXISTS `page_redirects`; -CREATE TABLE `page_redirects` ( - `id` bigint(20) NOT NULL, - `redirects` varchar(255) default NULL, - KEY `FK1484BA671E350EDD` (`id`), - KEY `FK1484BA6775DCF4FA` (`id`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `page_redirects` --- - -LOCK TABLES `page_redirects` WRITE; -/*!40000 ALTER TABLE `page_redirects` DISABLE KEYS */; -/*!40000 ALTER TABLE `page_redirects` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `PageMapLine` --- - -DROP TABLE IF EXISTS `PageMapLine`; -CREATE TABLE `PageMapLine` ( - `id` bigint(20) NOT NULL auto_increment, - `name` varchar(255) default NULL, - `pageID` int(11) default NULL, - `stem` varchar(255) default NULL, - `lemma` varchar(255) default NULL, - PRIMARY KEY (`id`), - KEY `name` (`name`) -) ENGINE=MyISAM DEFAULT CHARSET=utf8; - --- --- Dumping data for table `PageMapLine` --- - -LOCK TABLES `PageMapLine` WRITE; -/*!40000 ALTER TABLE `PageMapLine` DISABLE KEYS */; -/*!40000 ALTER TABLE `PageMapLine` ENABLE KEYS */; -UNLOCK TABLES; -/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; - -/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; -/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; -/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; -/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; -/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; -/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; -/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; - --- Dump completed on 2008-02-11 12:33:30