Skip to content

Commit

Permalink
chore: Improve core v2 init API [MD-441] (#9560)
Browse files Browse the repository at this point in the history
  • Loading branch information
gt2345 authored Jul 15, 2024
1 parent 0494cdf commit 02da2a2
Show file tree
Hide file tree
Showing 18 changed files with 140 additions and 92 deletions.
7 changes: 7 additions & 0 deletions docs/release-notes/detached-mode-config-improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
:orphan:

**Deprecations**

- Detached mode: The ``defaults`` and ``unmanaged`` parameters of the ``init`` function for
unmanaged experiment have been deprecated and will be removed in a future version. Please use
``config`` instead.
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ context:
dist.init_process_group("gloo")
distributed = core_v2.DistributedContext.from_torch_distributed()
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="unmanaged-3-torch-distributed",
),
distributed=distributed,
Expand Down
4 changes: 1 addition & 3 deletions docs/tutorials/detached-mode/save-load-checkpoints.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,9 @@ metrics to:
def main():
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="unmanaged-2-checkpoints",
checkpoint_storage="/path/to/checkpoint",
),
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id="my-existing-experiment",
external_trial_id="my-existing-trial",
),
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/detached-mode/simple-metrics-reporting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ function:
def main():
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="detached_mode_example",
),
)
Expand Down
2 changes: 1 addition & 1 deletion e2e_tests/tests/fixtures/core_api/pytorch_profiler_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

def main() -> None:
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="pytorch-profiler-sync-test",
),
)
Expand Down
8 changes: 3 additions & 5 deletions e2e_tests/tests/fixtures/unmanaged/checkpoint_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@ def main():
logging.getLogger("determined").setLevel(logging.INFO)
distributed = core_v2.DistributedContext.from_torch_distributed()
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name=name,
checkpoint_storage="/tmp/determined-cp",
),
distributed=distributed,
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id=name,
external_trial_id=name,
checkpoint_storage="/tmp/determined-cp",
),
distributed=distributed,
)

# Use framework-native dtrain utilities, as normal.
Expand Down
6 changes: 2 additions & 4 deletions e2e_tests/tests/fixtures/unmanaged/checkpointing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ def main():
assert "DET_TEST_EXTERNAL_EXP_ID" in os.environ
name = os.environ["DET_TEST_EXTERNAL_EXP_ID"]
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name=name,
checkpoint_storage="/tmp/determined-cp",
),
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id=name,
external_trial_id=name,
checkpoint_storage="/tmp/determined-cp",
),
)

Expand Down
6 changes: 2 additions & 4 deletions e2e_tests/tests/fixtures/unmanaged/error_termination.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ def main():

checkpoint_storage = tempfile.mkdtemp()
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name=name,
),
checkpoint_storage=checkpoint_storage,
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id=name,
external_trial_id=name,
checkpoint_storage=checkpoint_storage,
),
)

Expand Down
10 changes: 3 additions & 7 deletions examples/features/unmanaged/1_singleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,13 @@
def main():
core_v2.init(
# For managed experiments, will be overridden by the yaml config.
# Future: merge this and yaml configs field-by-field at runtime.
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="unmanaged-1-singleton",
# labels=["some", "set", "of", "labels"],
# description="some description",
# workspace="...",
# project="...",
),
# `UnmanagedConfig` values will not get merged, and will only be used in the unmanaged mode.
# unmanaged=core_v2.UnmanagedConfig(
# workspace="...",
# project="...",
# )
)

for i in range(100):
Expand Down
4 changes: 1 addition & 3 deletions examples/features/unmanaged/2_checkpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@

def main():
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="unmanaged-2-checkpoints",
# We allow configuring the local checkpoint storage directory.
# checkpoint_storage="/tmp/determined-cp",
),
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id="test-unmanaged-2-checkpoints",
external_trial_id="test-unmanaged-2-checkpoints",
# e.g., requeued jobs on slurm:
Expand Down
2 changes: 1 addition & 1 deletion examples/features/unmanaged/3_torch_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def main():
logging.getLogger("determined").setLevel(logging.INFO)
distributed = core_v2.DistributedContext.from_torch_distributed()
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="unmanaged-3-torch-distributed",
),
distributed=distributed,
Expand Down
4 changes: 1 addition & 3 deletions examples/features/unmanaged/advanced/checkpoints_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,14 @@ def __getitem__(self, index: int) -> Tuple:

def main():
core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="unmanaged-checkpoints-advanced",
hparams={
"lr": 1e-5,
"max_epochs": 10,
},
labels=["some", "set", "of", "labels"],
description="torch identity example",
),
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id="unmanaged-checkpoints-advanced",
external_trial_id="unmanaged-checkpoints-advanced",
),
Expand Down
2 changes: 1 addition & 1 deletion examples/features/unmanaged/advanced/context_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

def main():
with core_v2.init_context(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name="unmanaged-context-manager",
),
) as core_context:
Expand Down
4 changes: 1 addition & 3 deletions examples/features/unmanaged/ray/ray_hp_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def objective(config):
print(f"experiment name: {experiment_name} trial name: {trial_name}")

core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name=experiment_name,
hparams={
"hp": config["hp"],
Expand All @@ -29,8 +29,6 @@ def objective(config):
"metric": "loss",
"smaller_is_better": True,
},
),
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id=experiment_name,
external_trial_id=trial_name,
),
Expand Down
4 changes: 1 addition & 3 deletions examples/features/unmanaged/ray/ray_hp_search_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def setup(self, config):
print(f"experiment name: {experiment_name} trial name: {trial_name}")

core_v2.init(
defaults=core_v2.DefaultConfig(
config=core_v2.Config(
name=experiment_name,
hparams={
"hp": config["hp"],
Expand All @@ -29,8 +29,6 @@ def setup(self, config):
"metric": "loss",
"smaller_is_better": True,
},
),
unmanaged=core_v2.UnmanagedConfig(
external_experiment_id=experiment_name,
external_trial_id=trial_name,
),
Expand Down
1 change: 1 addition & 0 deletions harness/determined/experimental/core_v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Import these directly to make core_v2 a complete package.
from determined.core import DistributedContext, PreemptMode, TensorboardMode
from determined.experimental.core_v2._core_v2 import (
Config,
DefaultConfig,
UnmanagedConfig,
init_context,
Expand Down
Loading

0 comments on commit 02da2a2

Please sign in to comment.