Register fake CPU dispatch for new_unified_tensor (#2099)

Summary: Pull Request resolved: #2099 - Register fake CPU dispatch for new_unified_tensor Reviewed By: sryap, spcyppt Differential Revision: D50710925 fbshipit-source-id: 374675364e53a3a5ada2884263b189b97f4453f5
pytorch · Oct 28, 2023 · 4651326 · 4651326
1 parent 049f2a9
commit 4651326
Show file tree

Hide file tree

Showing 9 changed files with 112 additions and 29 deletions.
diff --git a/fbgemm_gpu/CMakeLists.txt b/fbgemm_gpu/CMakeLists.txt
@@ -586,7 +586,8 @@ if(NOT FBGEMM_CPU_ONLY)
     codegen/embedding_forward_quantized_host.cpp
     codegen/embedding_backward_dense_host.cpp
     codegen/embedding_bounds_check_host.cpp
-    src/cumem_utils_host.cpp
+    src/memory_utils/memory_utils.cpp
+    src/memory_utils/memory_utils_ops.cpp
     src/layout_transform_ops_gpu.cpp
     src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_gpu.cpp
     src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_gpu.cpp
@@ -628,7 +629,8 @@ if(NOT FBGEMM_CPU_ONLY)
   set(fbgemm_gpu_sources_static_gpu
       codegen/embedding_bounds_check.cu
       codegen/embedding_forward_quantized_split_lookup.cu
-      src/cumem_utils.cu
+      src/memory_utils/memory_utils.cu
+      src/memory_utils/memory_utils_ops.cu
       src/embedding_inplace_update.cu
       src/histogram_binning_calibration_ops.cu
       src/input_combine.cu

diff --git a/fbgemm_gpu/docs/Doxyfile.in b/fbgemm_gpu/docs/Doxyfile.in
@@ -924,7 +924,7 @@ INPUT                  = "../include/fbgemm_gpu" \
                          "../src/split_embeddings_cache/split_embeddings_cache_ops.cpp" \
                          "../src/jagged_tensor_ops.cu" \
                          "../src/jagged_tensor_ops_cpu.cpp" \
-                         "../src/cumem_utils.h" \
+                         "../include/fbgemm_gpu/cumem_utils.h" \
                          "../include/fbgemm_gpu/input_combine.h" \
                          "../src/layout_transform_ops.cu" \
                          "../src/layout_transform_ops_cpu.cpp" \

diff --git a/fbgemm_gpu/src/cumem_utils.h → fbgemm_gpu/include/fbgemm_gpu/cumem_utils.h b/fbgemm_gpu/src/cumem_utils.h → fbgemm_gpu/include/fbgemm_gpu/cumem_utils.h
@@ -6,12 +6,13 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#pragma once
+
 #include <ATen/ATen.h>
-#include "fbgemm_gpu/enum_utils.h"
 
 namespace fbgemm_gpu {
 
-using namespace at;
+using Tensor = at::Tensor;
 
 ///@defgroup cumem-utils CUDA Memorty Operators
 ///
@@ -86,6 +87,4 @@ void uvm_mem_advice_dont_fork(const Tensor& t);
 /// The copy uses single threaded memcpy
 Tensor uvm_to_cpu_clone(const Tensor& t);
 
-FBGEMM_GPU_ENUM_CREATE_TAG(uvm)
-
 } // namespace fbgemm_gpu
diff --git a/fbgemm_gpu/src/memory_utils/common.cuh b/fbgemm_gpu/src/memory_utils/common.cuh
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/Exceptions.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <cstring>
+
+#include "common.h"
+#include "fbgemm_gpu/cumem_utils.h"
+#include "fbgemm_gpu/enum_utils.h"
+
+namespace fbgemm_gpu {
+
+FBGEMM_GPU_ENUM_CREATE_TAG(uvm)
+
+} // namespace fbgemm_gpu
diff --git a/fbgemm_gpu/src/memory_utils/common.h b/fbgemm_gpu/src/memory_utils/common.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <ATen/ATen.h>
+
+using Tensor = at::Tensor;
+
+namespace fbgemm_gpu {
+
+Tensor new_unified_tensor_cpu(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool is_host_mapped);
+
+} // namespace fbgemm_gpu
diff --git a/fbgemm_gpu/src/memory_utils/memory_utils.cpp b/fbgemm_gpu/src/memory_utils/memory_utils.cpp
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "common.h"
+
+using Tensor = at::Tensor;
+
+namespace fbgemm_gpu {
+
+Tensor new_unified_tensor_cpu(
+    const Tensor& self,
+    const std::vector<std::int64_t>& sizes,
+    bool is_host_mapped) {
+  return at::empty({0}, self.options());
+}
+
+} // namespace fbgemm_gpu
diff --git a/fbgemm_gpu/src/cumem_utils.cu → fbgemm_gpu/src/memory_utils/memory_utils.cu b/fbgemm_gpu/src/cumem_utils.cu → fbgemm_gpu/src/memory_utils/memory_utils.cu
@@ -6,19 +6,9 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <ATen/ATen.h>
-#include <ATen/cuda/Exceptions.h>
-#include <c10/cuda/CUDAGuard.h>
+#include "common.cuh"
 
-#include <sys/mman.h>
-#include <unistd.h>
-#include <cstring>
-
-#include "cumem_utils.h"
-#include "fbgemm_gpu/enum_utils.h"
-#include "fbgemm_gpu/fbgemm_cuda_utils.cuh"
-
-using Tensor = at::Tensor;
+using namespace at;
 
 namespace fbgemm_gpu {
 
@@ -33,6 +23,7 @@ namespace fbgemm_gpu {
 // and set the correct device in the thread before calling cudaFree[Host]
 
 namespace {
+
 struct CUDAHostMappedContext {
   void* ptr_;
   int cuda_device_;

diff --git a/fbgemm_gpu/src/cumem_utils_host.cpp → ...gpu/src/memory_utils/memory_utils_ops.cpp b/fbgemm_gpu/src/cumem_utils_host.cpp → ...gpu/src/memory_utils/memory_utils_ops.cpp
@@ -6,13 +6,10 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <ATen/ATen.h>
 #include <torch/library.h>
-#include "fbgemm_gpu/enum_utils.h"
+#include "common.cuh"
 #include "fbgemm_gpu/sparse_ops_utils.h"
 
-#include "cumem_utils.h"
-
 using Tensor = at::Tensor;
 
 namespace fbgemm_gpu {
@@ -24,17 +21,11 @@ TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
       "uvm_to_device(Tensor self, Tensor prototype) -> Tensor",
       TORCH_FN(uvm_to_device));
   m.def("uvm_to_cpu(Tensor t) -> Tensor");
-  DISPATCH_TO_CUDA("uvm_to_cpu", uvm_to_cpu);
   m.def("new_managed_tensor(Tensor self, int[] sizes) -> Tensor");
-  DISPATCH_TO_CUDA("new_managed_tensor", new_managed_tensor);
-  DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
   m.def("new_host_mapped_tensor(Tensor self, int[] sizes) -> Tensor");
-  DISPATCH_TO_CUDA("new_host_mapped_tensor", new_host_mapped_tensor);
   m.def(
       "new_unified_tensor(Tensor self, int[] sizes, bool is_host_mapped) -> Tensor");
-  DISPATCH_TO_CUDA("new_unified_tensor", new_unified_tensor);
   m.def("new_vanilla_managed_tensor(Tensor self, int[] sizes) -> Tensor");
-  DISPATCH_TO_CUDA("new_vanilla_managed_tensor", new_vanilla_managed_tensor);
   m.def(
       "cuda_mem_advise(Tensor t, int advice) -> ()",
       TORCH_FN(uvm_cuda_mem_advise));
@@ -49,4 +40,9 @@ TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   m.def(FBGEMM_GPU_ENUM_OP(uvm, fbgemm_gpu_uvm_enum_query));
 }
 
+TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
+  DISPATCH_TO_CPU("new_unified_tensor", new_unified_tensor_cpu);
+  DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
+}
+
 } // namespace fbgemm_gpu
diff --git a/fbgemm_gpu/src/memory_utils/memory_utils_ops.cu b/fbgemm_gpu/src/memory_utils/memory_utils_ops.cu
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <torch/library.h>
+#include "common.cuh"
+#include "fbgemm_gpu/ops_utils.h"
+#include "fbgemm_gpu/sparse_ops_utils.h"
+
+namespace fbgemm_gpu {
+
+TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
+  DISPATCH_TO_CUDA("uvm_to_cpu", uvm_to_cpu);
+  DISPATCH_TO_CUDA("new_managed_tensor", new_managed_tensor);
+  DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
+  DISPATCH_TO_CUDA("new_host_mapped_tensor", new_host_mapped_tensor);
+  DISPATCH_TO_CUDA("new_unified_tensor", new_unified_tensor);
+  DISPATCH_TO_CUDA("new_vanilla_managed_tensor", new_vanilla_managed_tensor);
+}
+
+} // namespace fbgemm_gpu