From 1deb0a57558c43742884d6d30f7cb0fa5df2391b Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 14 Feb 2018 15:31:05 +0100 Subject: [PATCH] Create fscrawler-crawler-fs and fscrawler-crawler-ssh modules More and more modules. This commit will allow us to have a clear separation of concerns between the crawling part and the indexation part. Related to #502. --- core/pom.xml | 15 ++-- .../crawler/fs/FsCrawlerImpl.java | 8 +- crawler/crawler-abstract/pom.xml | 16 ++++ .../fs/crawler}/FileAbstractModel.java | 2 +- .../crawler/fs/crawler}/FileAbstractor.java | 8 +- crawler/crawler-fs/pom.xml | 22 ++++++ .../fs/crawler/fs}/FileAbstractorFile.java | 4 +- crawler/crawler-ssh/pom.xml | 28 +++++++ .../fs/crawler/ssh}/FileAbstractorSSH.java | 4 +- crawler/pom.xml | 76 +++++++++++++++++++ pom.xml | 21 +++++ 11 files changed, 188 insertions(+), 16 deletions(-) create mode 100644 crawler/crawler-abstract/pom.xml rename {core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor => crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler}/FileAbstractModel.java (97%) rename {core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor => crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler}/FileAbstractor.java (86%) create mode 100644 crawler/crawler-fs/pom.xml rename {core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor => crawler/crawler-fs/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/fs}/FileAbstractorFile.java (94%) create mode 100644 crawler/crawler-ssh/pom.xml rename {core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor => crawler/crawler-ssh/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/ssh}/FileAbstractorSSH.java (96%) create mode 100644 crawler/pom.xml diff --git a/core/pom.xml b/core/pom.xml index f31cbd9a7..cfe194e65 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -70,6 +70,16 @@ fscrawler-tika + + + fr.pilato.elasticsearch.crawler + fscrawler-crawler-fs + + + fr.pilato.elasticsearch.crawler + fscrawler-crawler-ssh + + fr.pilato.elasticsearch.crawler @@ -120,11 +130,6 @@ org.apache.tika tika-langdetect - - - com.jcraft - jsch - diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/FsCrawlerImpl.java b/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/FsCrawlerImpl.java index 5fd5eb556..285d0a5bc 100644 --- a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/FsCrawlerImpl.java +++ b/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/FsCrawlerImpl.java @@ -24,10 +24,10 @@ import fr.pilato.elasticsearch.crawler.fs.beans.DocParser; import fr.pilato.elasticsearch.crawler.fs.beans.PathParser; import fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClientManager; -import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractModel; -import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractor; -import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractorFile; -import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractorSSH; +import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractModel; +import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor; +import fr.pilato.elasticsearch.crawler.fs.crawler.fs.FileAbstractorFile; +import fr.pilato.elasticsearch.crawler.fs.crawler.ssh.FileAbstractorSSH; import fr.pilato.elasticsearch.crawler.fs.framework.TimeValue; import fr.pilato.elasticsearch.crawler.fs.meta.job.FsJob; import fr.pilato.elasticsearch.crawler.fs.meta.job.FsJobFileHandler; diff --git a/crawler/crawler-abstract/pom.xml b/crawler/crawler-abstract/pom.xml new file mode 100644 index 000000000..c2be9cde8 --- /dev/null +++ b/crawler/crawler-abstract/pom.xml @@ -0,0 +1,16 @@ + + + + fscrawler-crawler + fr.pilato.elasticsearch.crawler + 2.5-SNAPSHOT + + 4.0.0 + + fscrawler-crawler-abstract + FSCrawler Abstract Crawler + + + diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractModel.java b/crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/FileAbstractModel.java similarity index 97% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractModel.java rename to crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/FileAbstractModel.java index cfa4a615f..fbe0b605e 100644 --- a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractModel.java +++ b/crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/FileAbstractModel.java @@ -36,7 +36,7 @@ * under the License. */ -package fr.pilato.elasticsearch.crawler.fs.fileabstractor; +package fr.pilato.elasticsearch.crawler.fs.crawler; import java.time.LocalDateTime; diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractor.java b/crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/FileAbstractor.java similarity index 86% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractor.java rename to crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/FileAbstractor.java index 9b21b86cf..b43bba126 100644 --- a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractor.java +++ b/crawler/crawler-abstract/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/FileAbstractor.java @@ -17,7 +17,7 @@ * under the License. */ -package fr.pilato.elasticsearch.crawler.fs.fileabstractor; +package fr.pilato.elasticsearch.crawler.fs.crawler; import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings; import org.apache.logging.log4j.LogManager; @@ -27,9 +27,9 @@ import java.util.Collection; public abstract class FileAbstractor { - static final Logger logger = LogManager.getLogger(FileAbstractor.class); + protected static final Logger logger = LogManager.getLogger(FileAbstractor.class); - final FsSettings fsSettings; + protected final FsSettings fsSettings; public abstract FileAbstractModel toFileAbstractModel(String path, T file); @@ -43,7 +43,7 @@ public abstract class FileAbstractor { public abstract void close() throws Exception; - FileAbstractor(FsSettings fsSettings) { + protected FileAbstractor(FsSettings fsSettings) { this.fsSettings = fsSettings; } } diff --git a/crawler/crawler-fs/pom.xml b/crawler/crawler-fs/pom.xml new file mode 100644 index 000000000..a9ac39c0d --- /dev/null +++ b/crawler/crawler-fs/pom.xml @@ -0,0 +1,22 @@ + + + + fscrawler-crawler + fr.pilato.elasticsearch.crawler + 2.5-SNAPSHOT + + 4.0.0 + + fscrawler-crawler-fs + FSCrawler Crawlers: FS + + + + fr.pilato.elasticsearch.crawler + fscrawler-crawler-abstract + + + + diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractorFile.java b/crawler/crawler-fs/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/fs/FileAbstractorFile.java similarity index 94% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractorFile.java rename to crawler/crawler-fs/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/fs/FileAbstractorFile.java index 0fae04338..3a24361b6 100644 --- a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractorFile.java +++ b/crawler/crawler-fs/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/fs/FileAbstractorFile.java @@ -17,8 +17,10 @@ * under the License. */ -package fr.pilato.elasticsearch.crawler.fs.fileabstractor; +package fr.pilato.elasticsearch.crawler.fs.crawler.fs; +import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractModel; +import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor; import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings; import java.io.File; diff --git a/crawler/crawler-ssh/pom.xml b/crawler/crawler-ssh/pom.xml new file mode 100644 index 000000000..6bc21097f --- /dev/null +++ b/crawler/crawler-ssh/pom.xml @@ -0,0 +1,28 @@ + + + + fscrawler-crawler + fr.pilato.elasticsearch.crawler + 2.5-SNAPSHOT + + 4.0.0 + + fscrawler-crawler-ssh + FSCrawler Crawlers: SSH + + + + fr.pilato.elasticsearch.crawler + fscrawler-crawler-abstract + + + + + com.jcraft + jsch + + + + diff --git a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractorSSH.java b/crawler/crawler-ssh/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/ssh/FileAbstractorSSH.java similarity index 96% rename from core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractorSSH.java rename to crawler/crawler-ssh/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/ssh/FileAbstractorSSH.java index 058b44bc5..e25171cd6 100644 --- a/core/src/main/java/fr/pilato/elasticsearch/crawler/fs/fileabstractor/FileAbstractorSSH.java +++ b/crawler/crawler-ssh/src/main/java/fr/pilato/elasticsearch/crawler/fs/crawler/ssh/FileAbstractorSSH.java @@ -17,12 +17,14 @@ * under the License. */ -package fr.pilato.elasticsearch.crawler.fs.fileabstractor; +package fr.pilato.elasticsearch.crawler.fs.crawler.ssh; import com.jcraft.jsch.Channel; import com.jcraft.jsch.ChannelSftp; import com.jcraft.jsch.JSch; import com.jcraft.jsch.Session; +import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractModel; +import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor; import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings; import fr.pilato.elasticsearch.crawler.fs.settings.Server; diff --git a/crawler/pom.xml b/crawler/pom.xml new file mode 100644 index 000000000..4f7da1ad8 --- /dev/null +++ b/crawler/pom.xml @@ -0,0 +1,76 @@ + + + + fscrawler-parent + fr.pilato.elasticsearch.crawler + 2.5-SNAPSHOT + + 4.0.0 + + fscrawler-crawler + pom + FSCrawler Crawlers + + + crawler-abstract + crawler-fs + crawler-ssh + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-help-plugin + + + org.apache.maven.plugins + maven-resources-plugin + + + org.codehaus.mojo + versions-maven-plugin + + + com.carrotsearch.randomizedtesting + junit4-maven-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + org.apache.maven.plugins + maven-enforcer-plugin + + + + + + + + fr.pilato.elasticsearch.crawler + fscrawler-framework + + + + + fr.pilato.elasticsearch.crawler + fscrawler-settings + + + + + fr.pilato.elasticsearch.crawler + fscrawler-test-framework + test + + + + diff --git a/pom.xml b/pom.xml index b3c012c55..b205c335d 100644 --- a/pom.xml +++ b/pom.xml @@ -18,6 +18,7 @@ cli tika beans + crawler FSCrawler https://github.com/dadoonet/fscrawler/ @@ -314,6 +315,26 @@ fscrawler-beans 2.5-SNAPSHOT + + fr.pilato.elasticsearch.crawler + fscrawler-crawler + 2.5-SNAPSHOT + + + fr.pilato.elasticsearch.crawler + fscrawler-crawler-abstract + 2.5-SNAPSHOT + + + fr.pilato.elasticsearch.crawler + fscrawler-crawler-fs + 2.5-SNAPSHOT + + + fr.pilato.elasticsearch.crawler + fscrawler-crawler-ssh + 2.5-SNAPSHOT + fr.pilato.elasticsearch.crawler fscrawler-tika