From 122cc26ea415f8c1f60d089725730e98704798a2 Mon Sep 17 00:00:00 2001 From: Tony Kao Date: Mon, 2 Dec 2024 12:02:00 -0800 Subject: [PATCH] torchx - profile scheduler validate call (#972) Summary: track torchx scheduler validate() call. Will capture elapse time and whether validation failed. There is no behavior impact to torchx users as this change only add logging for monitoring purpose verified scuba log has the validate record: https://fburl.com/scuba/pytorch_elastic_tsm_log/jza7f33n Differential Revision: D66166216 --- torchx/runner/api.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/torchx/runner/api.py b/torchx/runner/api.py index 4a03a8aad..172f86de5 100644 --- a/torchx/runner/api.py +++ b/torchx/runner/api.py @@ -404,10 +404,11 @@ def dryrun( role.env[tracker_config_env_var_name(name)] = config cfg = cfg or dict() + runcfg = json.dumps(cfg) if cfg else None with log_event( "dryrun", scheduler, - runcfg=json.dumps(cfg) if cfg else None, + runcfg=runcfg, workspace=workspace, ): sched = self._scheduler(scheduler) @@ -433,7 +434,13 @@ def dryrun( " Either a patch was built or no changes to workspace was detected." ) - sched._validate(app, scheduler) + with log_event( + "validate", + scheduler, + runcfg=runcfg, + workspace=workspace, + ): + sched._validate(app, scheduler) dryrun_info = sched.submit_dryrun(app, resolved_cfg) dryrun_info._scheduler = scheduler return dryrun_info