From 122cc26ea415f8c1f60d089725730e98704798a2 Mon Sep 17 00:00:00 2001
From: Tony Kao <tonykao@meta.com>
Date: Mon, 2 Dec 2024 12:02:00 -0800
Subject: [PATCH] torchx - profile scheduler validate call (#972)

Summary:

track torchx scheduler validate() call. Will capture elapse time and whether validation failed. There is no behavior impact to torchx users as this change only add logging for monitoring purpose

verified scuba log has the validate record:
https://fburl.com/scuba/pytorch_elastic_tsm_log/jza7f33n

Differential Revision: D66166216
---
 torchx/runner/api.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/torchx/runner/api.py b/torchx/runner/api.py
index 4a03a8aad..172f86de5 100644
--- a/torchx/runner/api.py
+++ b/torchx/runner/api.py
@@ -404,10 +404,11 @@ def dryrun(
                     role.env[tracker_config_env_var_name(name)] = config
 
         cfg = cfg or dict()
+        runcfg = json.dumps(cfg) if cfg else None
         with log_event(
             "dryrun",
             scheduler,
-            runcfg=json.dumps(cfg) if cfg else None,
+            runcfg=runcfg,
             workspace=workspace,
         ):
             sched = self._scheduler(scheduler)
@@ -433,7 +434,13 @@ def dryrun(
                         " Either a patch was built or no changes to workspace was detected."
                     )
 
-            sched._validate(app, scheduler)
+            with log_event(
+                "validate",
+                scheduler,
+                runcfg=runcfg,
+                workspace=workspace,
+            ):
+                sched._validate(app, scheduler)
             dryrun_info = sched.submit_dryrun(app, resolved_cfg)
             dryrun_info._scheduler = scheduler
             return dryrun_info