diff --git a/beans/pom.xml b/beans/pom.xml new file mode 100644 index 000000000..d0cbec27d --- /dev/null +++ b/beans/pom.xml @@ -0,0 +1,74 @@ + + + + fscrawler-parent + fr.pilato.elasticsearch.crawler + 2.5-SNAPSHOT + + 4.0.0 + + fscrawler-beans + FSCrawler Beans + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-help-plugin + + + org.apache.maven.plugins + maven-resources-plugin + + + org.codehaus.mojo + versions-maven-plugin + + + com.carrotsearch.randomizedtesting + junit4-maven-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + org.apache.maven.plugins + maven-enforcer-plugin + + + + + + + + fr.pilato.elasticsearch.crawler + fscrawler-framework + + + + + fr.pilato.elasticsearch.crawler + fscrawler-settings + + + + + fr.pilato.elasticsearch.crawler + fscrawler-test-framework + test + + + fr.pilato.elasticsearch.crawler + fscrawler-test-documents + test + + + + diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Attributes.java b/beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Attributes.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Attributes.java rename to beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Attributes.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Doc.java b/beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Doc.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Doc.java rename to beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Doc.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/DocParser.java b/beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/DocParser.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/DocParser.java rename to beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/DocParser.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/File.java b/beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/File.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/File.java rename to beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/File.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Meta.java b/beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Meta.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Meta.java rename to beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Meta.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Path.java b/beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Path.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Path.java rename to beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/Path.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/PathParser.java b/beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/PathParser.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/PathParser.java rename to beans/src/main/java/fr/pilato/elasticsearch/crawler/fs/meta/doc/PathParser.java diff --git a/core/pom.xml b/core/pom.xml index 17b7dfb38..f31cbd9a7 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -64,6 +64,12 @@ fscrawler-settings + + + fr.pilato.elasticsearch.crawler + fscrawler-tika + + fr.pilato.elasticsearch.crawler diff --git a/pom.xml b/pom.xml index e9e3fb015..b3c012c55 100644 --- a/pom.xml +++ b/pom.xml @@ -16,6 +16,8 @@ integration-tests distribution cli + tika + beans FSCrawler https://github.com/dadoonet/fscrawler/ @@ -302,6 +304,21 @@ fscrawler-elasticsearch-client 2.5-SNAPSHOT + + fr.pilato.elasticsearch.crawler + fscrawler-cli + 2.5-SNAPSHOT + + + fr.pilato.elasticsearch.crawler + fscrawler-beans + 2.5-SNAPSHOT + + + fr.pilato.elasticsearch.crawler + fscrawler-tika + 2.5-SNAPSHOT + org.elasticsearch.client diff --git a/tika/pom.xml b/tika/pom.xml new file mode 100644 index 000000000..8d898a1fd --- /dev/null +++ b/tika/pom.xml @@ -0,0 +1,122 @@ + + + + fscrawler-parent + fr.pilato.elasticsearch.crawler + 2.5-SNAPSHOT + + 4.0.0 + + fscrawler-tika + FSCrawler Tika + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-help-plugin + + + org.apache.maven.plugins + maven-resources-plugin + + + org.codehaus.mojo + versions-maven-plugin + + + com.carrotsearch.randomizedtesting + junit4-maven-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + org.apache.maven.plugins + maven-enforcer-plugin + + + + + + + + fr.pilato.elasticsearch.crawler + fscrawler-framework + + + + + fr.pilato.elasticsearch.crawler + fscrawler-beans + + + + + fr.pilato.elasticsearch.crawler + fscrawler-settings + + + + org.apache.tika + tika-parsers + + + + org.xerial + sqlite-jdbc + + + + + com.levigo.jbig2 + levigo-jbig2-imageio + true + + + com.github.jai-imageio + jai-imageio-core + true + + + com.github.jai-imageio + jai-imageio-jpeg2000 + true + + + + + org.apache.tika + tika-langdetect + + + + com.jcraft + jsch + + + + + fr.pilato.elasticsearch.crawler + fscrawler-test-framework + test + + + fr.pilato.elasticsearch.crawler + fscrawler-test-documents + test + + + + + diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParser.java b/tika/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParser.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParser.java rename to tika/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParser.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaInstance.java b/tika/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaInstance.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaInstance.java rename to tika/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaInstance.java diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/XmlDocParser.java b/tika/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/XmlDocParser.java similarity index 100% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/XmlDocParser.java rename to tika/src/main/java/fr/pilato/elasticsearch/crawler/fs/tika/XmlDocParser.java diff --git a/core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/DocParserTestCase.java b/tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/DocParserTestCase.java similarity index 95% rename from core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/DocParserTestCase.java rename to tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/DocParserTestCase.java index 7466cbb75..64ac38b85 100644 --- a/core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/DocParserTestCase.java +++ b/tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/DocParserTestCase.java @@ -17,7 +17,7 @@ * under the License. */ -package fr.pilato.elasticsearch.crawler.fs.test.unit.parser; +package fr.pilato.elasticsearch.crawler.fs.tika; import fr.pilato.elasticsearch.crawler.fs.test.framework.AbstractFSCrawlerTestCase; diff --git a/core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/TikaDocParserTest.java b/tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParserTest.java similarity index 99% rename from core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/TikaDocParserTest.java rename to tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParserTest.java index c75d55661..e0ef49b8a 100644 --- a/core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/TikaDocParserTest.java +++ b/tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/TikaDocParserTest.java @@ -17,13 +17,11 @@ * under the License. */ -package fr.pilato.elasticsearch.crawler.fs.test.unit.parser; +package fr.pilato.elasticsearch.crawler.fs.tika; import fr.pilato.elasticsearch.crawler.fs.meta.doc.Doc; import fr.pilato.elasticsearch.crawler.fs.meta.settings.Fs; import fr.pilato.elasticsearch.crawler.fs.meta.settings.FsSettings; -import fr.pilato.elasticsearch.crawler.fs.tika.TikaDocParser; -import fr.pilato.elasticsearch.crawler.fs.tika.TikaInstance; import org.apache.tika.parser.external.ExternalParser; import org.junit.Test; diff --git a/core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/XmlDocParserTest.java b/tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/XmlDocParserTest.java similarity index 91% rename from core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/XmlDocParserTest.java rename to tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/XmlDocParserTest.java index 3ba2eeedd..c08c726c1 100644 --- a/core/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/unit/parser/XmlDocParserTest.java +++ b/tika/src/test/java/fr/pilato/elasticsearch/crawler/fs/tika/XmlDocParserTest.java @@ -17,9 +17,8 @@ * under the License. */ -package fr.pilato.elasticsearch.crawler.fs.test.unit.parser; +package fr.pilato.elasticsearch.crawler.fs.tika; -import fr.pilato.elasticsearch.crawler.fs.tika.XmlDocParser; import org.junit.Test; import java.io.IOException;