Skip to content

Commit

Permalink
Register fake CPU dispatch for new_unified_tensor (#2099)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #2099

- Register fake CPU dispatch for new_unified_tensor

Reviewed By: sryap, spcyppt

Differential Revision: D50710925

fbshipit-source-id: 374675364e53a3a5ada2884263b189b97f4453f5
  • Loading branch information
q10 authored and facebook-github-bot committed Oct 28, 2023
1 parent 049f2a9 commit 4651326
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 29 deletions.
6 changes: 4 additions & 2 deletions fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,8 @@ if(NOT FBGEMM_CPU_ONLY)
codegen/embedding_forward_quantized_host.cpp
codegen/embedding_backward_dense_host.cpp
codegen/embedding_bounds_check_host.cpp
src/cumem_utils_host.cpp
src/memory_utils/memory_utils.cpp
src/memory_utils/memory_utils_ops.cpp
src/layout_transform_ops_gpu.cpp
src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_gpu.cpp
src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_gpu.cpp
Expand Down Expand Up @@ -628,7 +629,8 @@ if(NOT FBGEMM_CPU_ONLY)
set(fbgemm_gpu_sources_static_gpu
codegen/embedding_bounds_check.cu
codegen/embedding_forward_quantized_split_lookup.cu
src/cumem_utils.cu
src/memory_utils/memory_utils.cu
src/memory_utils/memory_utils_ops.cu
src/embedding_inplace_update.cu
src/histogram_binning_calibration_ops.cu
src/input_combine.cu
Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/docs/Doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ INPUT = "../include/fbgemm_gpu" \
"../src/split_embeddings_cache/split_embeddings_cache_ops.cpp" \
"../src/jagged_tensor_ops.cu" \
"../src/jagged_tensor_ops_cpu.cpp" \
"../src/cumem_utils.h" \
"../include/fbgemm_gpu/cumem_utils.h" \
"../include/fbgemm_gpu/input_combine.h" \
"../src/layout_transform_ops.cu" \
"../src/layout_transform_ops_cpu.cpp" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <ATen/ATen.h>
#include "fbgemm_gpu/enum_utils.h"

namespace fbgemm_gpu {

using namespace at;
using Tensor = at::Tensor;

///@defgroup cumem-utils CUDA Memorty Operators
///
Expand Down Expand Up @@ -86,6 +87,4 @@ void uvm_mem_advice_dont_fork(const Tensor& t);
/// The copy uses single threaded memcpy
Tensor uvm_to_cpu_clone(const Tensor& t);

FBGEMM_GPU_ENUM_CREATE_TAG(uvm)

} // namespace fbgemm_gpu
26 changes: 26 additions & 0 deletions fbgemm_gpu/src/memory_utils/common.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <ATen/ATen.h>
#include <ATen/cuda/Exceptions.h>
#include <c10/cuda/CUDAGuard.h>
#include <sys/mman.h>
#include <unistd.h>
#include <cstring>

#include "common.h"
#include "fbgemm_gpu/cumem_utils.h"
#include "fbgemm_gpu/enum_utils.h"

namespace fbgemm_gpu {

FBGEMM_GPU_ENUM_CREATE_TAG(uvm)

} // namespace fbgemm_gpu
22 changes: 22 additions & 0 deletions fbgemm_gpu/src/memory_utils/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <ATen/ATen.h>

using Tensor = at::Tensor;

namespace fbgemm_gpu {

Tensor new_unified_tensor_cpu(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool is_host_mapped);

} // namespace fbgemm_gpu
22 changes: 22 additions & 0 deletions fbgemm_gpu/src/memory_utils/memory_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "common.h"

using Tensor = at::Tensor;

namespace fbgemm_gpu {

Tensor new_unified_tensor_cpu(
const Tensor& self,
const std::vector<std::int64_t>& sizes,
bool is_host_mapped) {
return at::empty({0}, self.options());
}

} // namespace fbgemm_gpu
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,9 @@
* LICENSE file in the root directory of this source tree.
*/

#include <ATen/ATen.h>
#include <ATen/cuda/Exceptions.h>
#include <c10/cuda/CUDAGuard.h>
#include "common.cuh"

#include <sys/mman.h>
#include <unistd.h>
#include <cstring>

#include "cumem_utils.h"
#include "fbgemm_gpu/enum_utils.h"
#include "fbgemm_gpu/fbgemm_cuda_utils.cuh"

using Tensor = at::Tensor;
using namespace at;

namespace fbgemm_gpu {

Expand All @@ -33,6 +23,7 @@ namespace fbgemm_gpu {
// and set the correct device in the thread before calling cudaFree[Host]

namespace {

struct CUDAHostMappedContext {
void* ptr_;
int cuda_device_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,10 @@
* LICENSE file in the root directory of this source tree.
*/

#include <ATen/ATen.h>
#include <torch/library.h>
#include "fbgemm_gpu/enum_utils.h"
#include "common.cuh"
#include "fbgemm_gpu/sparse_ops_utils.h"

#include "cumem_utils.h"

using Tensor = at::Tensor;

namespace fbgemm_gpu {
Expand All @@ -24,17 +21,11 @@ TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
"uvm_to_device(Tensor self, Tensor prototype) -> Tensor",
TORCH_FN(uvm_to_device));
m.def("uvm_to_cpu(Tensor t) -> Tensor");
DISPATCH_TO_CUDA("uvm_to_cpu", uvm_to_cpu);
m.def("new_managed_tensor(Tensor self, int[] sizes) -> Tensor");
DISPATCH_TO_CUDA("new_managed_tensor", new_managed_tensor);
DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
m.def("new_host_mapped_tensor(Tensor self, int[] sizes) -> Tensor");
DISPATCH_TO_CUDA("new_host_mapped_tensor", new_host_mapped_tensor);
m.def(
"new_unified_tensor(Tensor self, int[] sizes, bool is_host_mapped) -> Tensor");
DISPATCH_TO_CUDA("new_unified_tensor", new_unified_tensor);
m.def("new_vanilla_managed_tensor(Tensor self, int[] sizes) -> Tensor");
DISPATCH_TO_CUDA("new_vanilla_managed_tensor", new_vanilla_managed_tensor);
m.def(
"cuda_mem_advise(Tensor t, int advice) -> ()",
TORCH_FN(uvm_cuda_mem_advise));
Expand All @@ -49,4 +40,9 @@ TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def(FBGEMM_GPU_ENUM_OP(uvm, fbgemm_gpu_uvm_enum_query));
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
DISPATCH_TO_CPU("new_unified_tensor", new_unified_tensor_cpu);
DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
}

} // namespace fbgemm_gpu
25 changes: 25 additions & 0 deletions fbgemm_gpu/src/memory_utils/memory_utils_ops.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <torch/library.h>
#include "common.cuh"
#include "fbgemm_gpu/ops_utils.h"
#include "fbgemm_gpu/sparse_ops_utils.h"

namespace fbgemm_gpu {

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
DISPATCH_TO_CUDA("uvm_to_cpu", uvm_to_cpu);
DISPATCH_TO_CUDA("new_managed_tensor", new_managed_tensor);
DISPATCH_TO_META("new_managed_tensor", new_managed_tensor_meta);
DISPATCH_TO_CUDA("new_host_mapped_tensor", new_host_mapped_tensor);
DISPATCH_TO_CUDA("new_unified_tensor", new_unified_tensor);
DISPATCH_TO_CUDA("new_vanilla_managed_tensor", new_vanilla_managed_tensor);
}

} // namespace fbgemm_gpu

0 comments on commit 4651326

Please sign in to comment.