Skip to content

Commit

Permalink
Ray AWS launcher (#518)
Browse files Browse the repository at this point in the history
  • Loading branch information
jieru-hu committed Nov 16, 2020
1 parent aeaed16 commit 97e186b
Show file tree
Hide file tree
Showing 33 changed files with 1,899 additions and 42 deletions.
69 changes: 32 additions & 37 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ parameters:
default: ""
cache_key_version:
type: string
default: v0
default: v1

commands:
macos:
Expand Down Expand Up @@ -37,16 +37,18 @@ commands:
brew update
brew upgrade git
brew install fish
brew install zsh
brew cleanup
- run:
name: Preparing environment - Hydra
command: |
conda create -n hydra python=<< parameters.py_version >> -yq
conda run -n hydra pip install nox
conda run -n hydra pip install nox --progress-bar off
- save_cache:
key: -<< pipeline.parameters.cache_key_version >>-macos-sys-{{ .Branch }}-<< parameters.py_version >>
paths:
- ~/miniconda3
- /usr/local/Homebrew
- ~/Library/Caches/Homebrew


linux:
Expand All @@ -57,11 +59,20 @@ commands:
steps:
- checkout
- run:
name: Preparing environment
name: Preparing environment - Conda
command: |
curl -o Miniconda3-py38_4.8.3-Linux-x86_64.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_4.8.3-Linux-x86_64.sh
bash ./Miniconda3-py38_4.8.3-Linux-x86_64.sh -b
- run:
name: Preparing environment - Other dependency
command: |
sudo apt-get update
sudo apt-get install -y expect fish openjdk-11-jre
sudo pip install nox
sudo apt-get install -y expect fish zsh openjdk-11-jre rsync
- run:
name: Preparing environment - Hydra
command: |
~/miniconda3/bin/conda init bash
~/miniconda3/bin/conda create -n hydra python=<< parameters.py_version >> -yq
win:
Expand All @@ -84,8 +95,7 @@ commands:
command: |
conda create -n hydra python=<< parameters.py_version >> pywin32 -qy
conda activate hydra
pip install nox
pip install dataclasses
pip install nox dataclasses --progress-bar off
- save_cache:
key: -<< pipeline.parameters.cache_key_version >>-win-sys-{{ .Branch }}-<< parameters.py_version >>
paths:
Expand All @@ -98,7 +108,7 @@ jobs:
py_version:
type: string
macos:
xcode: "10.0.0"
xcode: "12.0.0"
steps:
- macos:
py_version: << parameters.py_version >>
Expand All @@ -108,29 +118,24 @@ jobs:
command: |
export NOX_PYTHON_VERSIONS=<< parameters.py_version >>
conda activate hydra
pip install nox
pip install dataclasses
pip install nox dataclasses --progress-bar off
nox -s lint test_tools test_core test_jupyter_notebooks -ts
test_linux:
parameters:
py_version:
type: string
docker:
- image: circleci/python:<< parameters.py_version >>
- image: cimg/base:stable-18.04
steps:
- linux:
py_version: << parameters.py_version >>
- run:
name: Testing Hydra
command: |
export PATH="$HOME/miniconda3/envs/hydra/bin:$PATH"
export NOX_PYTHON_VERSIONS=<< parameters.py_version >>
pip install nox
pip install dataclasses
pip install nox dataclasses --progress-bar off
nox -s lint test_tools test_core test_jupyter_notebooks -ts
test_win:
parameters:
py_version:
Expand All @@ -148,7 +153,6 @@ jobs:
conda activate hydra
nox -s lint test_tools test_core test_jupyter_notebooks -ts
exit $LASTEXITCODE
trigger_plugin_piplines:
docker:
- image: circleci/python:3.8
Expand All @@ -159,16 +163,14 @@ jobs:
command: |
python tools/ci/circleci_pipeline.py
echo "Done kicking off plugin tests."
test_plugin_macos:
parameters:
py_version:
type: string
test_plugin:
type: string
macos:
xcode: "10.0.0"
xcode: "12.0.0"
steps:
- macos:
py_version: << parameters.py_version >>
Expand All @@ -179,32 +181,27 @@ jobs:
export NOX_PYTHON_VERSIONS=<< parameters.py_version >>
export PLUGINS=<< parameters.test_plugin >>
conda activate hydra
pip install nox
pip install dataclasses
pip install nox dataclasses --progress-bar off
nox -s lint_plugins test_plugins -ts
test_plugin_linux:
parameters:
py_version:
type: string
test_plugin:
type: string
docker:
- image: circleci/python:<< parameters.py_version >>
- image: cimg/base:stable-18.04
steps:
- linux:
py_version: << parameters.py_version >>
- run:
name: << parameters.test_plugin >>
command: |
export NOX_PYTHON_VERSIONS=<< parameters.py_version >>
export PLUGINS=<< parameters.test_plugin >>
pip install nox
pip install dataclasses
nox -s lint_plugins test_plugins -ts
export PATH="$HOME/miniconda3/envs/hydra/bin:$PATH"
export NOX_PYTHON_VERSIONS=<< parameters.py_version >>
export PLUGINS=<< parameters.test_plugin >>
pip install nox dataclasses --progress-bar off
nox -s lint_plugins test_plugins -ts
test_plugin_win:
parameters:
py_version:
Expand All @@ -225,15 +222,13 @@ jobs:
conda activate hydra
nox -s lint_plugins test_plugins -ts
exit $LASTEXITCODE
# Misc
coverage:
docker:
- image: circleci/python:3.6
steps:
- checkout
- run: sudo pip install nox
- run: sudo pip install nox --progress-bar off
- run: nox -s coverage

workflows:
Expand Down
1 change: 0 additions & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,3 @@ skip=
,hydra/grammar/gen
,tools/configen/example/gen
,tools/configen/tests/test_modules/expected

3 changes: 3 additions & 0 deletions plugins/hydra_ray_launcher/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
global-exclude *.pyc
global-exclude __pycache__
recursive-include hydra_plugins/* *.yaml
4 changes: 4 additions & 0 deletions plugins/hydra_ray_launcher/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Hydra Ray Launcher
Provides a [`Ray`](https://docs.ray.io/en/latest/) based Hydra Launcher supporting execution on AWS.

See [website](https://hydra.cc/docs/next/plugins/ray_launcher) for more information
1 change: 1 addition & 0 deletions plugins/hydra_ray_launcher/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
6 changes: 6 additions & 0 deletions plugins/hydra_ray_launcher/example/conf/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- hydra/launcher: ray_local


random_seed: 0
checkpoint_path: checkpoint
24 changes: 24 additions & 0 deletions plugins/hydra_ray_launcher/example/conf/extra_configs/aws.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# @package _global_

hydra:
launcher:
sync_up:
# source dir is relative in this case, assuming you are running from
# <project_root>/hydra/plugins/hydra_ray_launcher/example
# absolute path is also supported.
source_dir: "."
# we leave target_dir to be null
# as a result the files will be synced to a temp dir on remote cluster.
# the temp dir will be cleaned up after the jobs are done.
# recommend to leave target_dir to be null if you are syncing code/artifacts to remote cluster so you don't need
# configure $PYTHONPATH on remote cluster
include: ["model", "*.py"]
# No need to sync up config files.
exclude: ["*"]
sync_down:
include: ["*.pt", "*/"]
# No need to sync down config files.
exclude: ["*"]
ray_cluster_cfg:
provider:
cache_stopped_nodes: true
19 changes: 19 additions & 0 deletions plugins/hydra_ray_launcher/example/model/my_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
from datetime import datetime
from pathlib import Path

log = logging.getLogger(__name__)


class MyModel:
def __init__(self, random_seed: int):
self.random_seed = random_seed
log.info("Init my model")

def save(self, checkpoint_path: str) -> None:
checkpoint_dir = Path(checkpoint_path)
checkpoint_dir.mkdir(parents=True, exist_ok=True)
log.info(f"Created dir for checkpoints. dir={checkpoint_dir}")
with open(checkpoint_dir / f"checkpoint_{self.random_seed}.pt", "w") as f:
f.write(f"checkpoint@{datetime.now()}")
20 changes: 20 additions & 0 deletions plugins/hydra_ray_launcher/example/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging

import hydra
from model.my_model import MyModel
from omegaconf import DictConfig

log = logging.getLogger(__name__)


@hydra.main(config_path="conf", config_name="config")
def main(cfg: DictConfig) -> None:
log.info("Start training...")
model = MyModel(cfg.random_seed)
# save checkpoint to current working dir.
model.save(cfg.checkpoint_path)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from hydra.core.config_search_path import ConfigSearchPath
from hydra.plugins.search_path_plugin import SearchPathPlugin


class RayLauncherSearchPathPlugin(SearchPathPlugin):
def manipulate_search_path(self, search_path: ConfigSearchPath) -> None:
# Appends the search path for this plugin to the end of the search path
search_path.append(
"hydra-ray-launcher", "pkg://hydra_plugins.hydra_ray_launcher.conf"
)
Loading

0 comments on commit 97e186b

Please sign in to comment.