forked from gettyimages/docker-spark
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDockerfileWorker
71 lines (66 loc) · 4.58 KB
/
DockerfileWorker
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
FROM frolvlad/alpine-oraclejdk8:slim
MAINTAINER Saulo Ricci "[email protected]"
RUN apk add --no-cache curl bash python3 python3-dev && python3 -m ensurepip && rm -r /usr/lib/python*/ensurepip && pip3 install --upgrade pip setuptools && \
if [ ! -e /usr/bin/pip ]; then ln -s pip3 /usr/bin/pip; fi && \
rm -r /root/.cache
RUN echo \
&& if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python3 /usr/bin/python; fi \
&& if [[ ! -e /usr/bin/python-config ]]; then ln -sf /usr/bin/python3-config /usr/bin/python-config; fi \
&& if [[ ! -e /usr/bin/easy_install ]]; then ln -sf /usr/bin/easy_install-3 /usr/bin/easy_install; fi \
&& easy_install pip \
&& pip install --upgrade pip \
&& if [[ ! -e /usr/bin/pip ]]; then ln -sf /usr/bin/pip2.7 /usr/bin/pip; fi \
&& echo
# HADOOP
ENV HADOOP_VERSION 2.7.3
ENV HADOOP_HOME /usr/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin
ENV SPARK_WORKER_OPTS "-Dspark.worker.cleanup.enabled=true -Dspark.worker.cleanup.appDataTtl=86400"
RUN curl -sL --retry 3 \
"http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \
| gunzip \
| tar -x -C /usr/ \
&& rm -rf $HADOOP_HOME/share/doc \
&& chown -R root:root $HADOOP_HOME
# SPARK
ENV SPARK_VERSION 2.1.1
ENV SPARK_PACKAGE spark-${SPARK_VERSION}-bin-without-hadoop
ENV SPARK_HOME /usr/spark-${SPARK_VERSION}
ENV SPARK_DIST_CLASSPATH="$HADOOP_HOME/etc/hadoop/*:$HADOOP_HOME/share/hadoop/common/lib/*:$HADOOP_HOME/share/hadoop/common/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/hdfs/lib/*:$HADOOP_HOME/share/hadoop/hdfs/*:$HADOOP_HOME/share/hadoop/yarn/lib/*:$HADOOP_HOME/share/hadoop/yarn/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*:$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/tools/lib/*:$SPARK_HOME/jars/*"
ENV PATH $PATH:${SPARK_HOME}/bin
RUN curl -sL --retry 3 \
"http://d3kbcqa49mib13.cloudfront.net/${SPARK_PACKAGE}.tgz" \
| gunzip \
| tar x -C /usr/ \
&& mv /usr/$SPARK_PACKAGE $SPARK_HOME \
&& chown -R root:root $SPARK_HOME
WORKDIR $SPARK_HOME
ADD https://jdbc.postgresql.org/download/postgresql-42.0.0.jar jars/postgresql-42.0.0.jar
RUN chmod 644 jars/postgresql-42.0.0.jar
ADD http://central.maven.org/maven2/com/microsoft/azure/azure-storage/3.1.0/azure-storage-3.1.0.jar jars/azure-storage-3.1.0.jar
RUN chmod 644 jars/azure-storage-3.1.0.jar
ADD http://central.maven.org/maven2/org/apache/hadoop/hadoop-azure/2.7.2/hadoop-azure-2.7.2.jar jars/hadoop-azure-2.7.2.jar
RUN chmod 644 jars/hadoop-azure-2.7.2.jar
ADD http://dl.bintray.com/spark-packages/maven/graphframes/graphframes/0.5.0-spark2.1-s_2.11/graphframes-0.5.0-spark2.1-s_2.11.jar jars/graphframes-0.5.0-spark2.1-s_2.11.jar
RUN chmod 644 jars/graphframes-0.5.0-spark2.1-s_2.11.jar
ADD http://central.maven.org/maven2/com/typesafe/scala-logging/scala-logging-slf4j_2.11/2.1.2/scala-logging-slf4j_2.11-2.1.2.jar jars/scala-logging-slf4j_2.11-2.1.2.jar
RUN chmod 644 jars/scala-logging-slf4j_2.11-2.1.2.jar
ADD http://central.maven.org/maven2/com/typesafe/scala-logging/scala-logging-api_2.11/2.1.2/scala-logging-api_2.11-2.1.2.jar jars/scala-logging-api_2.11-2.1.2.jar
RUN chmod 644 jars/scala-logging-api_2.11-2.1.2.jar
ADD http://central.maven.org/maven2/net/sansa-stack/sansa-rdf-spark-core_2.11/0.2.0/sansa-rdf-spark-core_2.11-0.2.0.jar jars/sansa-rdf-spark-core_2.11-0.2.0.jar
RUN chmod 644 jars/sansa-rdf-spark-core_2.11-0.2.0.jar
ADD http://central.maven.org/maven2/net/sansa-stack/sansa-rdf-kryo-jena_2.11/0.2.0/sansa-rdf-kryo-jena_2.11-0.2.0.jar jars/sansa-rdf-kryo-jena_2.11-0.2.0.jar
RUN chmod 644 jars/sansa-rdf-kryo-jena_2.11-0.2.0.jar
ADD http://central.maven.org/maven2/net/sansa-stack/sansa-rdf-spark-utils_2.11/0.2.0/sansa-rdf-spark-utils_2.11-0.2.0.jar jars/sansa-rdf-spark-utils_2.11-0.2.0.jar
RUN chmod 644 jars/sansa-rdf-spark-utils_2.11-0.2.0.jar
ADD http://central.maven.org/maven2/net/sansa-stack/sansa-rdf-partition-core_2.11/0.2.0/sansa-rdf-partition-core_2.11-0.2.0.jar jars/sansa-rdf-partition-core_2.11-0.2.0.jar
RUN chmod 644 jars/sansa-rdf-partition-core_2.11-0.2.0.jar
ADD http://central.maven.org/maven2/org/apache/jena/jena-core/3.5.0/jena-core-3.5.0.jar jars/jena-core-3.5.0.jar
RUN chmod 644 jars/jena-core-3.5.0.jar
ADD http://central.maven.org/maven2/org/apache/jena/jena-arq/3.5.0/jena-arq-3.5.0.jar jars/jena-arq-3.5.0.jar
RUN chmod 644 jars/jena-arq-3.5.0.jar
ADD http://central.maven.org/maven2/org/apache/jena/jena-base/3.5.0/jena-base-3.5.0.jar jars/jena-base-3.5.0.jar
RUN chmod 644 jars/jena-base-3.5.0.jar
ADD conf/worker/spark-defaults.conf conf/spark-defaults.conf
CMD ["bin/spark-class", "org.apache.spark.deploy.master.Master"]