forked from lambdal/deeplearning-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
36 lines (26 loc) · 1.87 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
FROM nvcr.io/nvidia/pytorch:22.10-py3
RUN apt-get install git
RUN mkdir /scripts
WORKDIR /workspace
# The purpose is to set the directory structure in the right way to be able to run these tests.
# There is a better structure for the directories but that will require changing the script
# which is a high-bandwidth task
RUN mkdir benchmark && \
git clone -b lambda/benchmark https://github.com/LambdaLabsML/DeepLearningExamples.git && \
git clone https://github.com/pierre818181/deeplearning-benchmark.git && \
cp -r DeepLearningExamples/PyTorch/* benchmark && \
cp -r deeplearning-benchmark/pytorch/scripts/* /scripts && \
cp -r deeplearning-benchmark/pytorch/scripts/* .
RUN pip install -r requirements.txt
# Test it with this. The bert finetuning model is a lot smaller than the ones for ncf and object detection
# RUN ./run_prepare.sh bert
# RUN ./run_prepare.sh object_detection
# Initialize datasets for the three models we will run
# RUN ./run_prepare.sh bert && ./run_prepare.sh ncf && ./run_prepare.sh object_detection
# TODO: test without this
# RUN cp -r /data /workspace/data
CMD ["python3", "compile_results_pytorch.py"]
# run a blocking call, get inside the container with exec and then test stuff out
# CMD ["python3", "-m", "http.server"]
# sudo docker run -e "GPU_TYPE=8x24GB" -e "MACHINE_ID=ghnlbwbe44923" -e "API_KEY=7699e9fe969179fcce8f3c04e91c195f9a4e0971644ea391af37d7fcacac2d448c9a71525117d8ef44b20edd2deba1abd318150cbeb9a146683102bdf6db354a" -e "ENV=dev" -e "BENCHMARK_CONFIG=8x24GB" -e "PRECISION=fp32" -e "TIMEOUT=1500" --shm-size=3000g --gpus all -it host /bin/bash
# GPU_TYPE="8x24GB" MACHINE_ID="ghnlbwbe44923" API_KEY="7699e9fe969179fcce8f3c04e91c195f9a4e0971644ea391af37d7fcacac2d448c9a71525117d8ef44b20edd2deba1abd318150cbeb9a146683102bdf6db354a" ENV="dev" BENCHMARK_CONFIG="8x24GB" PRECISION="fp32" TIMEOUT=1500 python3 compile_results_pytorch.py