Add OptimType.NONE in SplitTBE (defuse bwd and optim)

Summary: This diff is the **backend** part This diff introduces `OptimType.NONE`. Unlike other `OptimType`s, `OptimType.NONE` does not perform the optimizer step during SplitTBE's backward pass. With `OptimType.NONE`, SplitTBE deduplicates output gradients in the backward pass and generates a sparse gradient tensor (PyTorch's `sparse_coo_tensor`) for the device's weight (FQN: `weights_dev`). Currently, `OptimType.NONE` only supports the case where the embedding dimensions of all embedding tables are identical. Differential Revision: D44392172 fbshipit-source-id: 7e6df6857bc9d4dc1666aef855f21572c4fd35fc
pytorch · Jun 10, 2023 · bee6400 · bee6400
1 parent bf491a0
commit bee6400
Show file tree

Hide file tree

Showing 8 changed files with 379 additions and 181 deletions.
diff --git a/fbgemm_gpu/codegen/embedding_backward_code_generator.py b/fbgemm_gpu/codegen/embedding_backward_code_generator.py
@@ -1641,6 +1641,23 @@ def backward_dense() -> None:
     )
 
 
+def none_optimizer() -> None:
+    generate(
+        optimizer="none",
+        dense=False,
+        args=make_args(
+            [
+                (INT, "total_hash_size"),
+                (INT, "total_unique_indices"),
+            ]
+        ),
+        # Generate only GPU code
+        has_cpu_support=False,
+        has_gpu_support=True,
+        has_vbe_support=False,
+    )
+
+
 def gen__init__py() -> None:
     template = env.get_template("__init__.template")
     src_py = template.render()
@@ -1670,6 +1687,8 @@ def emb_codegen(
     rowwise_adagrad_with_counter()
     rowwise_weighted_adagrad()
     sgd()
+    none_optimizer()
+
     gen__init__py()