-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTaskfile
44 lines (36 loc) · 1.45 KB
/
Taskfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
library 'common'
node('w2v'){
checkout scm
stage 'download image'
ecr.pull_image('parser', 'v0.4.5')
sh "docker build . -t wiki"
withDockerContainer(image:'wiki', args: '-u root:root') {
sh 'pip install awscli'
stage "download requirement"
version = sh(returnStdout: true, script: "cat version").trim()
langs = ["zh", "ja", "en"]
date = sh(returnStdout: true, script: "date +%Y-%m-%d").trim()
println(version)
println(date)
for (String lang: langs){
stage(lang){
stage "download ${lang}"
sh "mkdir -p ${lang}"
sh "python WikiDumper.py ${lang}"
stage "prepare ${lang}"
sh "cd ${lang} && ls -lh"
sh "cd ${lang} && ls *.bz2 | xargs -i -t basename {} .bz2 | xargs -i -t ../WikiExtractor.py -b 50m --processes=32 {}.bz2 -o {} --lang ${lang}"
stage "remove ${lang}"
sh "rm ${lang}/*.bz2"
stage "upload s3 ${lang}"
try{
s3.rename("gliacloud-nlp", "rawdata/${lang}/${version}/lastest/wiki", "rawdata/${lang}/${version}/${date}/wiki")
}catch(Exception ex){
println("Catching the exception");
}
s3.upload_folder("gliacloud-nlp", "${lang}", "rawdata/${lang}/${version}/lastest/wiki")
sh "rm -rf ${lang}"
}
}
}
}