Skip to content

Commit

Permalink
exec: mpi-benchmark: add proper rootless execution
Browse files Browse the repository at this point in the history
  • Loading branch information
kpouget committed Feb 7, 2022
1 parent 98b4e66 commit a9c28f8
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 31 deletions.
19 changes: 6 additions & 13 deletions exec/mpi-benchmark/dependencies.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
---
name: cluster_is_prepared
spec:
requirements:
- operator_is_prepared
- has_mpi_operator

- in_mpi_namespace
- has_mpi_base_image
- has_mpi_osu_image
---
name: operator_is_prepared
spec:
requirements:
- has_mpijobs
- has_volcano
---
name: has_mpijobs
name: has_mpi_operator
spec:
tests:
- name: has_mpi_crd
Expand Down Expand Up @@ -50,15 +45,13 @@ spec:
tests:
- name: in_mpi_namespace
type: shell
spec: 'test "$(oc project -q)" == mpi-benchmark'
spec: 'test "$(oc project -q)" == mpi-benchmarking'
install:
- name: goto_mpi_namespace
type: shell
spec: |
oc new-project mpi-benchmark 2>/dev/null
oc project -q mpi-benchmark
oc adm policy add-scc-to-user privileged -z default -n mpi-benchmark
oc adm policy add-scc-to-user anyuid -z default
oc new-project mpi-benchmarking 2>/dev/null
oc project -q mpi-benchmarking
---
name: has_mpi_imagestream
spec:
Expand Down
3 changes: 2 additions & 1 deletion exec/mpi-benchmark/mpijob_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,5 @@ spec:
securityContext:
privileged: true
nodeSelector:
node.kubernetes.io/instance-type: {{ .Machine }}
node.kubernetes.io/instance-type: {{ .Machine }}

16 changes: 12 additions & 4 deletions exec/mpi-benchmark/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,24 @@ echo "Done, collecting artifacts in $ARTIFACT_DIR ..."

oc get "$mpijob_name" -oyaml > "$ARTIFACT_DIR/mpijob.status.yaml"

# if Pod logs are queried with the label selector, only the last lines are
launcher_pod_name=$(oc get pod -ltraining.kubeflow.org/job-name=$name,training.kubeflow.org/job-role=launcher -oname)
oc logs "$launcher_pod_name" > "$ARTIFACT_DIR/mpijob.launcher.log"

oc get pods -ltraining.kubeflow.org/job-name=$name -oyaml > "$ARTIFACT_DIR/mpijob.pods.yaml"

for pod in $(oc get pods -ltraining.kubeflow.org/job-name=$name,training.kubeflow.org/job-role=worker -oname); do
oc logs $pod > "$ARTIFACT_DIR/mpijob.$(echo "$pod" | sed "s|pod/${name}-||").log"
done

oc get nodes -oyaml > "$ARTIFACT_DIR/nodes.yaml"

# if Pod logs are queried with the label selector, only the last lines are
launcher_pod_name=$(oc get pod -ltraining.kubeflow.org/job-name=$name,training.kubeflow.org/job-role=launcher -oname)

if [[ -z "$launcher_pod_name" ]]; then
echo "ERROR: the launcher Pod disappeared ..."
exit 1
fi

oc logs "$launcher_pod_name" > "$ARTIFACT_DIR/mpijob.launcher.log"

echo

cat "$ARTIFACT_DIR/mpijob.launcher.log"
Expand Down
1 change: 0 additions & 1 deletion exec/mpi-benchmark/setup/001_imagestream.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@
apiVersion: image.openshift.io/v1
kind: ImageStream
metadata:
namespace: mpi-benchmark
name: mpi-bench
42 changes: 34 additions & 8 deletions exec/mpi-benchmark/setup/002_base_image.buildconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ apiVersion: build.openshift.io/v1
kind: BuildConfig
metadata:
name: mpi-base-image
namespace: mpi-benchmark
spec:
output:
to:
Expand All @@ -13,20 +12,47 @@ spec:
dockerfile: |2
FROM registry.access.redhat.com/ubi8/ubi
ENV USER_NAME=mpi \
USER=mpi \
HOME=/home/mpi
WORKDIR ${HOME}
RUN dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm \
&& dnf -y install --quiet \
sudo pkg-config vim make gdb \
curl wget git gcc-c++ \
openssh-server openssh-clients \
gcc-c++ openmpi-devel openmpi \
vim openssh-server openssh-clients openmpi \
\
&& ln -s /usr/lib64/openmpi/bin/orted /usr/bin/orted \
&& ssh-keygen -A \
&& (echo "Host *"; echo " StrictHostKeyChecking no") >> /etc/ssh/ssh_config.d/no_StrictHostKeyChecking.conf \
&& echo "StrictModes no" >> /etc/ssh/sshd_config \
\
&& (echo "Host *"; echo " StrictHostKeyChecking no") >> /etc/ssh/ssh_config.d/StrictHostKeyChecking.conf \
&& (echo "Host *"; echo " IdentityFile /home/mpi/.ssh/id_rsa") >> /etc/ssh/ssh_config.d/IdentityFile.conf \
&& (echo "Host *"; echo " Port 2222") >> /etc/ssh/ssh_config.d/Port_2222.conf \
&& (echo "Host *"; echo " UserKnownHostsFile /dev/null") >> /etc/ssh/ssh_config.d/UserKnownHostsFile.conf \
\
&& mkdir -p ${HOME}/custom_ssh \
&& ssh-keygen -f ${HOME}/custom_ssh/ssh_host_rsa_key -N '' -t rsa \
&& ssh-keygen -f ${HOME}/custom_ssh/ssh_host_dsa_key -N '' -t dsa \
\
&& echo -e > ${HOME}/custom_ssh/sshd_config \
"Port 2222 \n\
HostKey ${HOME}/custom_ssh/ssh_host_rsa_key \n\
HostKey ${HOME}/custom_ssh/ssh_host_dsa_key \n\
AuthorizedKeysFile .ssh/authorized_keys \n\
ChallengeResponseAuthentication no \n\
UsePAM no \n\
Subsystem sftp /usr/lib/ssh/sftp-server \n\
PidFile ${HOME}/custom_ssh/sshd.pid \n\
StrictModes no \n\
" \
&& rm /sbin/nologin && ln -s /usr/bin/bash /sbin/nologin \
\
&& touch /var/log/lastlog \
&& chgrp utmp /var/log/lastlog \
&& chmod 664 /var/log/lastlog
RUN chgrp -R 0 "${HOME}" \
&& chmod -R g=u "${HOME}"
ENV PATH="${PATH}:/usr/lib64/openmpi/bin/"
type: Dockerfile
Expand Down
10 changes: 6 additions & 4 deletions exec/mpi-benchmark/setup/003_osu-bench.buildconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ apiVersion: build.openshift.io/v1
kind: BuildConfig
metadata:
name: mpi-osu-image
namespace: mpi-benchmark
spec:
output:
to:
Expand All @@ -11,17 +10,20 @@ spec:
source:
dockerfile: |2
FROM mpi-bench:base
RUN dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm \
&& dnf -y install --quiet make wget gcc-c++ openmpi-devel
RUN cd /root \
RUN cd "${HOME}" \
&& OSU_VERSION=5.8 \
&& wget --quiet https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-${OSU_VERSION}.tgz \
&& tar xvf osu-micro-benchmarks-${OSU_VERSION}.tgz \
&& cd osu-micro-benchmarks-${OSU_VERSION} \
&& ./configure --prefix=/opt/osu-micro-benchmarks CC=/usr/lib64/openmpi/bin/mpicc LIBS="-L/usr/lib64/openmpi/lib -lmpi -lpthread" \
&& make \
&& make install \
&& cd /root \
&& rm -rf /root/osu-micro-benchmarks-${OSU_VERSION} osu-micro-benchmarks-${OSU_VERSION}.tar.gz
&& cd "${HOME}" \
&& rm -rf "${HOME}/osu-micro-benchmarks-${OSU_VERSION} osu-micro-benchmarks-${OSU_VERSION}.tar.gz"
type: Dockerfile
strategy:
Expand Down
73 changes: 73 additions & 0 deletions exec/mpi-benchmark/setup/test_mpijob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
label:
profile: rootless
name: hello-world
spec:
sshAuthMountPath: /home/mpi/.ssh
cleanPodPolicy: Running
slotsPerWorker: 1
mpiReplicaSpecs:
Launcher:
replicas: 1
template:
spec:
initContainers:
- name: wait-hostfilename
image: image-registry.openshift-image-registry.svc:5000/mpi-benchmarking/mpi-bench:base
command:
- bash
- -cx
- "[[ $(cat /etc/mpi/hostfile | wc -l) != 0 ]] && (date; echo 'Hostfile is ready'; cat /etc/mpi/hostfile) || (date; echo 'Hostfile not ready ...'; sleep 10; exit 1) && while read host; do while ! ssh $host echo $host ; do date; echo \"Pod $host is not up ...\"; sleep 10; done; date; echo \"Pod $host is ready\"; done <<< \"$(cat /etc/mpi/hostfile)\""
volumeMounts:
- mountPath: /etc/mpi
name: mpi-job-config
- mountPath: /home/mpi/.ssh
name: ssh-auth
containers:
- name: mpi-launcher
command:
- mpirun
- --allow-run-as-root
- -np
- "2"
- -bind-to
- none
- -map-by
- slot
- -mca
- pml
- ob1
- -mca
- btl
- ^openib
- bash
- -c
- echo Hello World from $(cat /proc/sys/kernel/hostname)
image: image-registry.openshift-image-registry.svc:5000/mpi-benchmarking/mpi-bench:base
imagePullPolicy: Always
Worker:
replicas: 2
template:
metadata:
labels:
app: mpi-app
spec:
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
app: mpi-app
containers:
- name: mpi-worker
image: image-registry.openshift-image-registry.svc:5000/mpi-benchmarking/mpi-bench:base
imagePullPolicy: Always
command:
- /usr/sbin/sshd
- -De
- -f
- /home/mpi/custom_ssh/sshd_config

0 comments on commit a9c28f8

Please sign in to comment.