forked from DigitalPebble/behemoth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
behemoth-site.xml
99 lines (82 loc) · 2.47 KB
/
behemoth-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- overriding Hadoop values -->
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx1024m</value>
</property>
<property>
<name>mapred.compress.map.output</name>
<value>true</value>
</property>
<property>
<name>mapred.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapred.output.compression.type</name>
<value>BLOCK</value>
</property>
<property>
<name>hadoop.job.history.user.location</name>
<value>${hadoop.log.dir}/history/user</value>
<description>Hadoop 0.17.x comes with a default setting to create
user logs inside the output path of the job. This breaks some
Hadoop classes, which expect the output to contain only
part-XXXXX files. This setting changes the output to a
subdirectory of the regular log directory.
</description>
</property>
<!-- Properties for GATE -->
<property>
<name>gate.annotationset.input</name>
<value></value>
<description>Map the information at the behemoth format onto the
select annotationset
</description>
</property>
<property>
<name>gate.annotationset.output</name>
<value></value>
<description>AnnotationSet to consider when serializing to the
behemoth format
</description>
</property>
<property>
<name>gate.annotations.filter</name>
<value>Token</value>
<description>Annotations types to consider when serializing to the
behemoth format, separated by commas
</description>
</property>
<property>
<name>gate.features.filter</name>
<value>Token.string</value>
<description>if specified, only the feature listed for a type will be
kept
</description>
</property>
<property>
<name>gate.emptyannotationset</name>
<value>false</value>
<description>if specified all the annotations in the Behemoth document
will be deleted before processing with GATE
</description>
</property>
<!-- Properties for UIMA -->
<property>
<name>uima.annotations.filter</name>
<value>org.apache.uima.TokenAnnotation,org.apache.uima.SentenceAnnotation</value>
<description>Annotations types to consider when serializing to the
behemoth format, separated by commas
</description>
</property>
<property>
<name>uima.features.filter</name>
<value>org.apache.uima.TokenAnnotation:posTag</value>
<description>Feature names to consider when serializing to the
behemoth format, separated by commas
</description>
</property>
</configuration>