-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbuild-morfologik-lt.sh
executable file
·48 lines (33 loc) · 1.76 KB
/
build-morfologik-lt.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/bin/bash
cd morfologik-lt
#LanguageTool jar
#jarfile=~/github/languagetool/languagetool-tools/target/languagetool-tools-3.5-SNAPSHOT-jar-with-dependencies.jar
jarfile=~/target-lt/languagetool.jar
target_dir=../resultats/java-lt/src/main/resources/org/languagetool/resource/es
#source dictionaries
# spanish
cp ../resultats/lt/diccionari.txt /tmp/es-ES.txt
for targetdict in es-ES
do
# replace whitespaces with tabs
perl sptotabs.pl </tmp/${targetdict}.txt >${targetdict}_tabs.txt
# create tagger dictionary with morfologik tools
java -cp $jarfile org.languagetool.tools.POSDictionaryBuilder -i ${targetdict}_tabs.txt -info ${targetdict}.info -freq es_wordlist.xml -o ${targetdict}.dict
# dump the tagger dictionary
java -cp $jarfile org.languagetool.tools.DictionaryExporter -i ${targetdict}.dict -info ${targetdict}.info -o ${targetdict}_lt.txt
# create synthesis dictionary with morfologik tools
java -cp $jarfile org.languagetool.tools.SynthDictionaryBuilder -i ${targetdict}_tabs.txt -info ${targetdict}_synth.info -o ${targetdict}_synth.dict
mv ${targetdict}_synth.dict_tags.txt ${targetdict}_tags.txt
# dump synthesis dictionary
java -cp $jarfile org.languagetool.tools.DictionaryExporter -i ${targetdict}_synth.dict -o ${targetdict}_synth_lt.txt -info ${targetdict}_synth.info
rm ${targetdict}_tabs.txt
#convert catalan_tags.txt to DOS file
#sed 's/$'"/`echo \\\r`/" ${targetdict}_tags.txt > ${targetdict}_tags_dos.txt
#rm ${targetdict}_tags.txt
#mv ${targetdict}_tags_dos.txt ${targetdict}_tags.txt
cp ${targetdict}_tags.txt $target_dir
cp ${targetdict}.dict $target_dir
cp ${targetdict}_synth.dict $target_dir
cp ${targetdict}.info $target_dir
cp ${targetdict}_synth.info $target_dir
done