nemo-ux-state: handle None in state_dict.keys; disable auto-grad when…

… transforming ckpt Signed-off-by: Alexandros Koumparoulis <[email protected]>
NVIDIA · Jul 26, 2024 · 209a241 · 209a241
1 parent 917f715
commit 209a241
Showing 1 changed file with 12 additions and 8 deletions.
diff --git a/nemo/lightning/io/state.py b/nemo/lightning/io/state.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 from torch import nn
+import torch
 
 SourceModuleT = TypeVar("SourceModuleT", bound=nn.Module)
 TargetModuleT = TypeVar("TargetModuleT", bound=nn.Module)
@@ -18,12 +19,12 @@ class TransformCTX:
     target: nn.Module
     target_state: dict
 
-
+@torch.no_grad
 def apply_transforms(
     source: nn.Module,
     target: TargetModuleT,
     mapping: Dict[str, str],
-    transforms: Optional[List[Callable[[TransformCTX], TransformCTX]]] = None,
+    transforms: Optional[List[Callable[[TransformCTX], TransformCTX]]] = [],
 ) -> TargetModuleT:
     """
     Applies a series of transformations to adapt the state dictionary of a source module to
@@ -101,9 +102,8 @@ def scale_weights(ctx):
     for key, val in mapping.items():
         ctx = StateDictTransform(key, val)(ctx)
 
-    if transforms:
-        for transform in transforms:
-            ctx = transform(ctx)
+    for transform in transforms:
+        ctx = transform(ctx)
 
     _params: Dict[str, nn.Parameter] = {}
     for name, param in _target.named_parameters():
@@ -144,9 +144,9 @@ def scale_weights(ctx):
 
         _module.register_buffer(_key, val)
 
-    keys = [name for name in list(target_state.keys()) if not name.endswith("_extra_state")]
+    keys = list(filter(lambda x: x is not None and not x.endswith("_extra_state"), target_state.keys()))
     if len(keys) != 0:
-        raise RuntimeError(f"Additional keys: {target_state.keys()} in checkpoint but not in model.")
+        raise RuntimeError(f"Additional keys: {keys} in checkpoint but not in model.")
 
     # TODO: Is this correct?
     # for key in target.state_dict():
@@ -165,7 +165,7 @@ def scale_weights(ctx):
 
 
 def _default_transform(inp):
-    return inp.float()
+    return inp
 
 
 class StateDictTransform(Generic[F]):
@@ -325,6 +325,8 @@ def _match_keys(keys: List[str], pattern: str) -> np.ndarray:
     wildcard_matches = [[] for _ in range(pattern.count("*"))]
 
     for key in keys:
+        if key is None:
+            continue
         match = regex_pattern.match(key)
         if match:
             for i, group in enumerate(match.groups()):
@@ -343,6 +345,8 @@ def _match_keys(keys: List[str], pattern: str) -> np.ndarray:
 
     # Populate the array with the keys, now that we have the correct shape and ordering
     for key in keys:
+        if key is None:
+            continue
         match = regex_pattern.match(key)
         if match:
             # Convert match groups to indices based on their position in wildcard_matches