Skip to content

Commit

Permalink
Create fscrawler-tika and fscrawler-beans modules
Browse files Browse the repository at this point in the history
Related to #502.
  • Loading branch information
dadoonet committed Feb 13, 2018
1 parent 635ddb8 commit 9996b73
Show file tree
Hide file tree
Showing 17 changed files with 222 additions and 6 deletions.
74 changes: 74 additions & 0 deletions beans/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>fscrawler-parent</artifactId>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<version>2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>fscrawler-beans</artifactId>
<name>FSCrawler Beans</name>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-help-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>versions-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>junit4-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
</plugin>
</plugins>
</build>

<dependencies>
<!-- Our framework -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-framework</artifactId>
</dependency>

<!-- Our Settings -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-settings</artifactId>
</dependency>

<!-- Test dependencies -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-test-framework</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-test-documents</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

</project>
6 changes: 6 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@
<artifactId>fscrawler-settings</artifactId>
</dependency>

<!-- Tika Extraction -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-tika</artifactId>
</dependency>

<!-- Our Elasticsearch Client -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
Expand Down
17 changes: 17 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
<module>integration-tests</module>
<module>distribution</module>
<module>cli</module>
<module>tika</module>
<module>beans</module>
</modules>
<name>FSCrawler</name>
<url>https://github.com/dadoonet/fscrawler/</url>
Expand Down Expand Up @@ -302,6 +304,21 @@
<artifactId>fscrawler-elasticsearch-client</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-cli</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-beans</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-tika</artifactId>
<version>2.5-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>org.elasticsearch.client</groupId>
Expand Down
122 changes: 122 additions & 0 deletions tika/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>fscrawler-parent</artifactId>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<version>2.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>fscrawler-tika</artifactId>
<name>FSCrawler Tika</name>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-help-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>versions-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>junit4-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
</plugin>
</plugins>
</build>

<dependencies>
<!-- Our framework -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-framework</artifactId>
</dependency>

<!-- Our Beans -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-beans</artifactId>
</dependency>

<!-- Our Settings -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-settings</artifactId>
</dependency>

<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
</dependency>
<!-- Add some optional dependencies -->
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
</dependency>

<!--
For legal reasons (incompatible license), these dependencies are to marked as optional so users who might want
to use them have to provided them manually in the lib dir.
-->
<dependency>
<groupId>com.levigo.jbig2</groupId>
<artifactId>levigo-jbig2-imageio</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-core</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<optional>true</optional>
</dependency>

<!-- For Language detection -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-langdetect</artifactId>
</dependency>
<!--Dependency for parsing remote ssh directory [http://www.jcraft.com/jsch/]-->
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</dependency>

<!-- Test dependencies -->
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-test-framework</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>fr.pilato.elasticsearch.crawler</groupId>
<artifactId>fscrawler-test-documents</artifactId>
<scope>test</scope>
</dependency>

</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package fr.pilato.elasticsearch.crawler.fs.test.unit.parser;
package fr.pilato.elasticsearch.crawler.fs.tika;

import fr.pilato.elasticsearch.crawler.fs.test.framework.AbstractFSCrawlerTestCase;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,11 @@
* under the License.
*/

package fr.pilato.elasticsearch.crawler.fs.test.unit.parser;
package fr.pilato.elasticsearch.crawler.fs.tika;

import fr.pilato.elasticsearch.crawler.fs.meta.doc.Doc;
import fr.pilato.elasticsearch.crawler.fs.meta.settings.Fs;
import fr.pilato.elasticsearch.crawler.fs.meta.settings.FsSettings;
import fr.pilato.elasticsearch.crawler.fs.tika.TikaDocParser;
import fr.pilato.elasticsearch.crawler.fs.tika.TikaInstance;
import org.apache.tika.parser.external.ExternalParser;
import org.junit.Test;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
* under the License.
*/

package fr.pilato.elasticsearch.crawler.fs.test.unit.parser;
package fr.pilato.elasticsearch.crawler.fs.tika;

import fr.pilato.elasticsearch.crawler.fs.tika.XmlDocParser;
import org.junit.Test;

import java.io.IOException;
Expand Down

0 comments on commit 9996b73

Please sign in to comment.