Skip to content

Commit

Permalink
Create fscrawler-crawler-fs and fscrawler-crawler-ssh modules
Browse files Browse the repository at this point in the history
More and more modules.
This commit will allow us to have a clear separation of concerns between the crawling part and the indexation part.

Related to #502.
  • Loading branch information
dadoonet committed Feb 14, 2018
1 parent 18ec6b3 commit 1deb0a5
Show file tree
Hide file tree
Showing 11 changed files with 188 additions and 16 deletions.
15 changes: 10 additions & 5 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@
<artifactId>fscrawler-tika</artifactId>
</dependency>

<!-- Our crawlers -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler-fs</artifactId>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler-ssh</artifactId>
</dependency>

<!-- Our Elasticsearch Client -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
Expand Down Expand Up @@ -120,11 +130,6 @@
<groupId>org.apache.tika</groupId>
<artifactId>tika-langdetect</artifactId>
</dependency>
<!--Dependency for parsing remote ssh directory [http://www.jcraft.com/jsch/]-->
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</dependency>

<!-- For REST Server -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
import fr.pilato.elasticsearch.crawler.fs.beans.DocParser;
import fr.pilato.elasticsearch.crawler.fs.beans.PathParser;
import fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClientManager;
import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractModel;
import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractor;
import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractorFile;
import fr.pilato.elasticsearch.crawler.fs.fileabstractor.FileAbstractorSSH;
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractModel;
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor;
import fr.pilato.elasticsearch.crawler.fs.crawler.fs.FileAbstractorFile;
import fr.pilato.elasticsearch.crawler.fs.crawler.ssh.FileAbstractorSSH;
import fr.pilato.elasticsearch.crawler.fs.framework.TimeValue;
import fr.pilato.elasticsearch.crawler.fs.meta.job.FsJob;
import fr.pilato.elasticsearch.crawler.fs.meta.job.FsJobFileHandler;
Expand Down
16 changes: 16 additions & 0 deletions crawler/crawler-abstract/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>fscrawler-crawler</artifactId>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<version>2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>fscrawler-crawler-abstract</artifactId>
<name>FSCrawler Abstract Crawler</name>


</project>
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* under the License.
*/

package fr.pilato.elasticsearch.crawler.fs.fileabstractor;
package fr.pilato.elasticsearch.crawler.fs.crawler;


import java.time.LocalDateTime;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package fr.pilato.elasticsearch.crawler.fs.fileabstractor;
package fr.pilato.elasticsearch.crawler.fs.crawler;

import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings;
import org.apache.logging.log4j.LogManager;
Expand All @@ -27,9 +27,9 @@
import java.util.Collection;

public abstract class FileAbstractor<T> {
static final Logger logger = LogManager.getLogger(FileAbstractor.class);
protected static final Logger logger = LogManager.getLogger(FileAbstractor.class);

final FsSettings fsSettings;
protected final FsSettings fsSettings;

public abstract FileAbstractModel toFileAbstractModel(String path, T file);

Expand All @@ -43,7 +43,7 @@ public abstract class FileAbstractor<T> {

public abstract void close() throws Exception;

FileAbstractor(FsSettings fsSettings) {
protected FileAbstractor(FsSettings fsSettings) {
this.fsSettings = fsSettings;
}
}
22 changes: 22 additions & 0 deletions crawler/crawler-fs/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>fscrawler-crawler</artifactId>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<version>2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>fscrawler-crawler-fs</artifactId>
<name>FSCrawler Crawlers: FS</name>

<dependencies>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler-abstract</artifactId>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
* under the License.
*/

package fr.pilato.elasticsearch.crawler.fs.fileabstractor;
package fr.pilato.elasticsearch.crawler.fs.crawler.fs;

import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractModel;
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor;
import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings;

import java.io.File;
Expand Down
28 changes: 28 additions & 0 deletions crawler/crawler-ssh/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>fscrawler-crawler</artifactId>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<version>2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>fscrawler-crawler-ssh</artifactId>
<name>FSCrawler Crawlers: SSH</name>

<dependencies>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler-abstract</artifactId>
</dependency>

<!--Dependency for parsing remote ssh directory [http://www.jcraft.com/jsch/]-->
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
* under the License.
*/

package fr.pilato.elasticsearch.crawler.fs.fileabstractor;
package fr.pilato.elasticsearch.crawler.fs.crawler.ssh;

import com.jcraft.jsch.Channel;
import com.jcraft.jsch.ChannelSftp;
import com.jcraft.jsch.JSch;
import com.jcraft.jsch.Session;
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractModel;
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor;
import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings;
import fr.pilato.elasticsearch.crawler.fs.settings.Server;

Expand Down
76 changes: 76 additions & 0 deletions crawler/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>fscrawler-parent</artifactId>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<version>2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>fscrawler-crawler</artifactId>
<packaging>pom</packaging>
<name>FSCrawler Crawlers</name>

<modules>
<module>crawler-abstract</module>
<module>crawler-fs</module>
<module>crawler-ssh</module>
</modules>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-help-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>versions-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>junit4-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
</plugin>
</plugins>
</build>

<dependencies>
<!-- Our framework -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-framework</artifactId>
</dependency>

<!-- Our Settings -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-settings</artifactId>
</dependency>

<!-- Test dependencies -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-test-framework</artifactId>
<scope>test</scope>
</dependency>

</dependencies>
</project>
21 changes: 21 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
<module>cli</module>
<module>tika</module>
<module>beans</module>
<module>crawler</module>
</modules>
<name>FSCrawler</name>
<url>https://github.com/dadoonet/fscrawler/</url>
Expand Down Expand Up @@ -314,6 +315,26 @@
<artifactId>fscrawler-beans</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler-abstract</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler-fs</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-crawler-ssh</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-tika</artifactId>
Expand Down

0 comments on commit 1deb0a5

Please sign in to comment.