Support MTIA device type in FBGEEM TBE training (#1994)

Summary: Pull Request resolved: #1994 For the MTIA device type, we have to rely on the CPU fallback. So we let the FBGEMM chose CPU path when the device type == MTIA. Reviewed By: jackm321 Differential Revision: D48809630 fbshipit-source-id: 15bec60be6efe4c8b1ad4f9d46da39ce58e36a40
pytorch · Sep 7, 2023 · d3fe199 · d3fe199
1 parent 9d6ba13
commit d3fe199
Showing 1 changed file with 9 additions and 2 deletions.
diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_training.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_training.py
@@ -54,6 +54,7 @@ class DoesNotHavePrefix(Exception):
 class ComputeDevice(enum.IntEnum):
     CPU = 0
     CUDA = 1
+    MTIA = 2
 
 
 class WeightDecayMode(enum.IntEnum):
@@ -366,7 +367,13 @@ def __init__(  # noqa C901
         assert all(
             cd == compute_devices[0] for cd in compute_devices
         ), "Heterogenous compute_devices are NOT supported!"
-        self.use_cpu: bool = all(cd == ComputeDevice.CPU for cd in compute_devices)
+        # Split TBE has different function schemas for CUDA and CPU.
+        # For MTIA device type, it uses the CPU one.
+        self.use_cpu: bool = (
+            compute_devices[0] == ComputeDevice.CPU
+            or compute_devices[0] == ComputeDevice.MTIA
+        )
+
         assert not self.use_cpu or all(
             loc == EmbeddingLocation.HOST for loc in locations
         ), "ComputeDevice.CPU is only for EmbeddingLocation.HOST!"
@@ -998,7 +1005,7 @@ def forward(  # noqa: C901
             placements=self.momentum2_placements,
         )
         # Ensure iter is always on CPU so the increment doesn't synchronize.
-        if self.iter.is_cuda:
+        if not self.iter.is_cpu:
             self.iter = self.iter.cpu()
         self.iter[0] += 1