forked from DigitalPebble/behemoth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
behemoth
executable file
·76 lines (68 loc) · 1.98 KB
/
behemoth
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash
#
# shortcuts for behemoth commands
# assumes that mvn clean install has been called succesfully
# if no args specified, show usage
if [ $# = 0 ]; then
echo "Usage: behemoth COMMAND"
echo "where COMMAND is one of:"
echo " reader"
echo " exporter"
echo " filter"
echo " importer"
echo " gate"
echo " tika"
echo " uima"
echo " mahout"
echo " solr"
echo " language-id"
echo "Most commands print help when invoked w/o parameters."
exit 1
fi
# get arguments
COMMAND=$1
shift
# figure out which class to run
if [ "$COMMAND" = "reader" ] ; then
CLASS=com.digitalpebble.behemoth.util.CorpusReader
MODULE=core
elif [ "$COMMAND" = "filter" ] ; then
CLASS=com.digitalpebble.behemoth.util.CorpusFilter
MODULE=core
elif [ "$COMMAND" = "importer" ] ; then
CLASS=com.digitalpebble.behemoth.util.CorpusGenerator
MODULE=core
elif [ "$COMMAND" = "exporter" ] ; then
CLASS=com.digitalpebble.behemoth.util.ContentExtractor
MODULE=core
elif [ "$COMMAND" = "gate" ] ; then
CLASS=com.digitalpebble.behemoth.gate.GATEDriver
MODULE=gate
elif [ "$COMMAND" = "tika" ] ; then
CLASS=com.digitalpebble.behemoth.tika.TikaDriver
MODULE=tika
elif [ "$COMMAND" = "uima" ] ; then
CLASS=com.digitalpebble.behemoth.uima.UIMADriver
MODULE=uima
elif [ "$COMMAND" = "mahout" ] ; then
CLASS=com.digitalpebble.behemoth.mahout.SparseVectorsFromBehemoth
MODULE=mahout
elif [ "$COMMAND" = "solr" ] ; then
CLASS=com.digitalpebble.behemoth.solr.SOLRIndexerJob
MODULE=solr
elif [ "$COMMAND" = "language-id" ] ; then
CLASS=com.digitalpebble.behemoth.languageidentification.LanguageIdDriver
MODULE=language-id
else
echo "unknown command"
exit -1;
fi
BEHE_JOB=$(dirname $0)/$MODULE/target/behemoth-$MODULE-1.1-SNAPSHOT-job.jar
EXEC_CALL="hadoop jar $BEHE_JOB"
# check that hadoop can be found on the path
if [ $(which hadoop | wc -l ) -eq 0 ]; then
echo "Can't find Hadoop executable. Add HADOOP_HOME/bin to the path"
exit -1;
fi
# run it
exec $EXEC_CALL $CLASS "$@"