-
Notifications
You must be signed in to change notification settings - Fork 1
/
spark-entrypoint.sh
executable file
·98 lines (84 loc) · 2.68 KB
/
spark-entrypoint.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env bash
# echo commands to the terminal output
set -ex
# Start the SSH daemon
/etc/init.d/dropbear start
# Check whether there is a passwd entry for the container UID
myuid=$(id -u)
mygid=$(id -g)
# turn off -e for getent because it will return error code in anonymous uid case
set +e
uidentry=$(getent passwd $myuid)
set -e
# If there is no passwd entry for the container UID, attempt to create one
if [ -z "$uidentry" ]
then
if [ -w /etc/passwd ]
then
echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
else
echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
fi
fi
SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt
if [ -n "$SPARK_EXTRA_CLASSPATH" ]
then
SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
fi
if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]
then
pyv="$(python -V 2>&1)"
export PYTHON_VERSION="${pyv:7}"
export PYSPARK_PYTHON="python"
export PYSPARK_DRIVER_PYTHON="python"
elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]
then
pyv3="$(python3 -V 2>&1)"
export PYTHON_VERSION="${pyv3:7}"
export PYSPARK_PYTHON="python3"
export PYSPARK_DRIVER_PYTHON="python3"
fi
# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor.
# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s.
if [ -n "${HADOOP_HOME}" ] && [ -z "${SPARK_DIST_CLASSPATH}" ]
then
export SPARK_DIST_CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath)
fi
if ! [ -z "${HADOOP_CONF_DIR+x}" ]
then
SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
fi
case "$1" in
driver)
shift 1
CMD=(
"$SPARK_HOME/bin/spark-submit"
--conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
--deploy-mode client
"$@"
)
;;
executor)
shift 1
CMD=(
"/usr/bin/java"
"${SPARK_EXECUTOR_JAVA_OPTS[@]}"
-Xms"$SPARK_EXECUTOR_MEMORY"
-Xmx"$SPARK_EXECUTOR_MEMORY"
-cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
org.apache.spark.executor.CoarseGrainedExecutorBackend
--driver-url "$SPARK_DRIVER_URL"
--executor-id "$SPARK_EXECUTOR_ID"
--cores "$SPARK_EXECUTOR_CORES"
--app-id "$SPARK_APPLICATION_ID"
--hostname "$SPARK_EXECUTOR_POD_IP"
)
;;
*)
echo "Non-spark-on-k8s command provided, proceeding in pass-through mode..."
CMD=("$SPARK_HOME/bin/spark-submit" "$@")
;;
esac
exec "${CMD[@]}"