-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
specify output file with a parameter
undo tf-launcher
- Loading branch information
1 parent
79b698a
commit 3eb4b1a
Showing
27 changed files
with
745 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# Copyright 2018 The Kubeflow Authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
FROM ubuntu:16.04 | ||
|
||
ARG TRAINER_IMAGE_NAME | ||
|
||
RUN apt-get update -y | ||
|
||
RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip git | ||
|
||
RUN easy_install pip | ||
|
||
RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.7.0 \ | ||
kubernetes google-api-python-client retrying | ||
|
||
RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ | ||
unzip -qq google-cloud-sdk.zip -d tools && \ | ||
rm google-cloud-sdk.zip && \ | ||
tools/google-cloud-sdk/install.sh --usage-reporting=false \ | ||
--path-update=false --bash-completion=false \ | ||
--disable-installation-options && \ | ||
tools/google-cloud-sdk/bin/gcloud -q components update \ | ||
gcloud core gsutil && \ | ||
tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ | ||
tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ | ||
touch /tools/google-cloud-sdk/lib/third_party/google.py | ||
|
||
RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.9.0/ks_0.9.0_linux_amd64.tar.gz && \ | ||
tar -xzf ks_0.9.0_linux_amd64.tar.gz && \ | ||
mkdir -p /tools/ks/bin && \ | ||
cp ./ks_0.9.0_linux_amd64/ks /tools/ks/bin && \ | ||
rm ks_0.9.0_linux_amd64.tar.gz && \ | ||
rm -r ks_0.9.0_linux_amd64 | ||
|
||
RUN wget https://github.com/kubeflow/tf-operator/archive/v0.3.0.zip && \ | ||
unzip v0.3.0.zip && \ | ||
mv tf-operator-0.3.0 tf-operator | ||
|
||
ENV PYTHONPATH $PYTHONPATH:/tf-operator | ||
|
||
ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin | ||
|
||
ENV TRAINER_IMAGE_NAME $TRAINER_IMAGE_NAME | ||
|
||
ADD build /ml | ||
|
||
RUN mkdir /usr/licenses && \ | ||
/ml/license.sh /ml/third_party_licenses.csv /usr/licenses | ||
|
||
ENTRYPOINT ["python", "/ml/launch_tf_job.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#!/bin/bash -e | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
while getopts ":hp:t:i:" opt; do | ||
case "${opt}" in | ||
h) echo "-p: project name" | ||
echo "-t: tag name" | ||
echo "-i: image name. If provided, project name and tag name are not necessary" | ||
exit | ||
;; | ||
p) PROJECT_ID=${OPTARG} | ||
;; | ||
t) TAG_NAME=${OPTARG} | ||
;; | ||
i) LAUNCHER_IMAGE_NAME=${OPTARG} | ||
;; | ||
\? ) echo "Usage: cmd [-p] project [-t] tag [-i] image" | ||
exit | ||
;; | ||
esac | ||
done | ||
|
||
LOCAL_LAUNCHER_IMAGE_NAME=ml-pipeline-kubeflow-tf | ||
LOCAL_TRAINER_IMAGE_NAME=ml-pipeline-kubeflow-tf-trainer | ||
|
||
if [ -z "${PROJECT_ID}" ]; then | ||
PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") | ||
fi | ||
|
||
if [ -z "${TAG_NAME}" ]; then | ||
TAG_NAME=$(date +v%Y%m%d)-$(git describe --tags --always --dirty)-$(git diff | shasum -a256 | cut -c -6) | ||
fi | ||
|
||
mkdir -p ./build | ||
rsync -arvp ./src/ ./build/ | ||
|
||
cp ../../license.sh ./build | ||
cp ../../third_party_licenses.csv ./build | ||
|
||
# Build the trainer image | ||
if [ -z "${LAUNCHER_IMAGE_NAME}" ]; then | ||
TRAINER_IMAGE_NAME=gcr.io/${PROJECT_ID}/${LOCAL_TRAINER_IMAGE_NAME}:${TAG_NAME} | ||
else | ||
# construct the trainer image name as "laucher_image_name"-trainer:"launcher_image_tag" | ||
colon_index=`expr index "${LAUNCHER_IMAGE_NAME}" :` | ||
if [ $colon_index == '0' ]; then | ||
TRAINER_IMAGE_NAME=${LAUNCHER_IMAGE_NAME}-trainer | ||
else | ||
tag=${LAUNCHER_IMAGE_NAME:$colon_index} | ||
TRAINER_IMAGE_NAME=${LAUNCHER_IMAGE_NAME:0:$colon_index-1}-trainer:${tag} | ||
fi | ||
fi | ||
|
||
bash_dir=`dirname $0` | ||
bash_dir_abs=`realpath $bash_dir` | ||
parent_dir=`dirname ${bash_dir_abs}` | ||
trainer_dir=${parent_dir}/dnntrainer | ||
cd ${trainer_dir} | ||
if [ -z "${LAUNCHER_IMAGE_NAME}" ]; then | ||
./build_image.sh -p ${PROJECT_ID} -t ${TAG_NAME} | ||
else | ||
./build_image.sh -i ${TRAINER_IMAGE_NAME} | ||
fi | ||
cd - | ||
|
||
docker build -t ${LOCAL_LAUNCHER_IMAGE_NAME} . --build-arg TRAINER_IMAGE_NAME=${TRAINER_IMAGE_NAME} | ||
if [ -z "${LAUNCHER_IMAGE_NAME}" ]; then | ||
docker tag ${LOCAL_LAUNCHER_IMAGE_NAME} gcr.io/${PROJECT_ID}/${LOCAL_LAUNCHER_IMAGE_NAME}:${TAG_NAME} | ||
docker push gcr.io/${PROJECT_ID}/${LOCAL_LAUNCHER_IMAGE_NAME}:${TAG_NAME} | ||
else | ||
docker tag ${LOCAL_LAUNCHER_IMAGE_NAME} "${LAUNCHER_IMAGE_NAME}" | ||
docker push "${LAUNCHER_IMAGE_NAME}" | ||
fi | ||
|
||
rm -rf ./build |
31 changes: 31 additions & 0 deletions
31
components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from kfp import dsl | ||
|
||
def kubeflow_tfjob_launcher_op(container_image, command, number_of_workers: int, number_of_parameter_servers: int, tfjob_timeout_minutes: int, output_dir=None, step_name='TFJob-launcher'): | ||
return dsl.ContainerOp( | ||
name = step_name, | ||
image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf:7775692adf28d6f79098e76e839986c9ee55dd61', | ||
arguments = [ | ||
'--workers', number_of_workers, | ||
'--pss', number_of_parameter_servers, | ||
'--tfjob-timeout-minutes', tfjob_timeout_minutes, | ||
'--container-image', container_image, | ||
'--output-dir', output_dir, | ||
'--ui-metadata-type', 'tensorboard', | ||
'--', | ||
] + command, | ||
file_outputs = {'train': '/output.txt'} | ||
) |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from .kubeflow_tfjob_launcher_op import kubeflow_tfjob_launcher_op |
Oops, something went wrong.