Skip to content

Commit

Permalink
model and dataset logging for raytune using CMF (#212)
Browse files Browse the repository at this point in the history
  • Loading branch information
rishabhsharma22 authored Oct 14, 2024
1 parent 883fc3e commit e559799
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 4 deletions.
10 changes: 9 additions & 1 deletion cmflib/cmf_ray_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class CmfRayLogger(Callback):
#id_count = 1

def __init__(self, pipeline_name, file_path, pipeline_stage):
def __init__(self, pipeline_name, file_path, pipeline_stage, data_dir = None):
"""
pipeline_name: The name of the CMF Pipelibe
file_path: The path to metadata file
Expand All @@ -16,6 +16,7 @@ def __init__(self, pipeline_name, file_path, pipeline_stage):
self.pipeline_stage = pipeline_stage
self.cmf_obj = {}
self.cmf_run = {}
self.data_dir = data_dir

def on_trial_start(self, iteration, trials, trial, **info):
trial_id = trial.trial_id
Expand All @@ -28,6 +29,8 @@ def on_trial_start(self, iteration, trials, trial, **info):
custom_properties = {'Configuration': trial_config})
#self.execution_id[trial_id] = CmfRayLogger.id_count
#CmfRayLogger.id_count+=1
if self.data_dir:
_ = self.cmf_obj[trial_id].log_dataset(url = str(self.data_dir), event = "input")

def on_trial_result(self, iteration, trials, trial, result, **info):
trial_id = trial.trial_id
Expand Down Expand Up @@ -58,6 +61,11 @@ def on_trial_complete(self, iteration, trials, trial, **info):
_ = self.cmf_obj[trial_id].log_execution_metrics(metrics_name = f"Trial_{trial_id}_Result",
custom_properties = {'Result': trial_result})

if 'model_path' in trial_result:
_ = self.cmf_obj[trial_id].log_model(path = trial_result['model_path'],
event = 'input',
model_name = f"{trial_id}_model")

def on_trial_error(self, iteration, trials, trial, **info):
trial_id = trial.trial_id
trial_config = trial.config
Expand Down
29 changes: 26 additions & 3 deletions docs/api/public/cmf_ray_logger.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ Create an instance of CmfRayLogger by providing the following parameters:
* pipeline_name: A string representing the name of the CMF pipeline.
* file_path: The file path to the metadata file associated with the CMF pipeline.
* pipeline_stage: The name of the current stage of the CMF pipeline.
* data_dir (optional): A directory path where trial data should be logged. If the path is within the CMF directory, it should be relative. If it is outside, it must be an absolute path. Default vale is `None`.

Example of instantiation:
```python
logger = cmf_ray_logger.CmfRayLogger(pipeline_name, file_path, pipeline_stage)
logger = cmf_ray_logger.CmfRayLogger(pipeline_name, file_path, pipeline_stage. data_dir)
```
Here, the `data_dir` argument is used to log the dataset at the start of each trial. Ensure that this path is relative if within the CMF directory and absolute if external to the CMF directory.

## Integration with Ray Tune

Expand All @@ -50,8 +52,22 @@ tune.run(
)
```

## Model Logging
`CmfRayLogger` can now log the model during trials. To enable this, the `train.report` method must include a special key: `"model_path"`. The value of `"model_path"` should be a relative path pointing to the saved model within the CMF directory.

Important: Ensure that the `"model_path"` is relative, as the DVC wrapper expects all paths nested within the CMF directory to be relative.
```Python
train.report({
"accuracy": 0.95,
"loss": 0.05,
"model_path": "models/example_model.pth"
})
```



## Output
During each trial, `CmfRayLogger` will automatically create a CMF object with attributes set as `pipeline_name`, `pipeline_stage`, and the CMF execution as `trial_id`. It captures the trial's output and logs it under the metric key `'Output'`.
During each trial, `CmfRayLogger` will automatically create a CMF object with attributes set as `pipeline_name`, `pipeline_stage`, and the CMF execution as `trial_id`. It captures the trial's output and logs it under the metric key `'Output'`. Additionally, it logs the dataset at the start of each trial (if data_dir is specified) and logs the model based on the `"model_path"` key in `train.report`.

## Example
Here is a complete example of how to use `CmfRayLogger` with Ray Tune:
Expand All @@ -61,7 +77,7 @@ from cmf import cmf_ray_logger
from ray import tune

# Initialize the logger
logger = cmf_ray_logger.CmfRayLogger("ExamplePipeline", "/path/to/metadata.json", "Stage1")
logger = cmf_ray_logger.CmfRayLogger("ExamplePipeline", "/path/to/metadata.json", "Stage1", "path/to/data_dir")

# Configuration for tuning
config = {
Expand All @@ -74,4 +90,11 @@ tune.run(
config=config,
callbacks=[logger]
)

# Reporting within your trainable function
train.report({
"accuracy": 0.95,
"loss": 0.05,
"model_path": "path/to/models/example_model.pth"
})
```

0 comments on commit e559799

Please sign in to comment.