From 77cbf7caad747a589d3b517a91688128120b02a0 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 26 Nov 2020 18:45:30 +0000 Subject: [PATCH] Use header files: - Create header files for kernel implementation and remove definitions from vision_*.h files. - Eliminate unnecessary headers and ensure all cpp include their headers. - Move internal implementations in detail namespaces. --- torchvision/csrc/cpu/deform_conv2d_cpu.cpp | 26 ++++----- torchvision/csrc/cpu/deform_conv2d_cpu.h | 39 +++++++++++++ torchvision/csrc/cpu/vision_cpu.h | 35 +---------- torchvision/csrc/cuda/deform_conv2d_cuda.cu | 22 +++---- torchvision/csrc/cuda/deform_conv2d_cuda.h | 39 +++++++++++++ torchvision/csrc/cuda/vision_cuda.h | 35 +---------- torchvision/csrc/deform_conv2d.cpp | 21 +++---- torchvision/csrc/deform_conv2d.h | 64 +++++++++++++++++++++ 8 files changed, 170 insertions(+), 111 deletions(-) create mode 100644 torchvision/csrc/cpu/deform_conv2d_cpu.h create mode 100644 torchvision/csrc/cuda/deform_conv2d_cuda.h create mode 100644 torchvision/csrc/deform_conv2d.h diff --git a/torchvision/csrc/cpu/deform_conv2d_cpu.cpp b/torchvision/csrc/cpu/deform_conv2d_cpu.cpp index 10a161303f6..9e904d72166 100644 --- a/torchvision/csrc/cpu/deform_conv2d_cpu.cpp +++ b/torchvision/csrc/cpu/deform_conv2d_cpu.cpp @@ -66,15 +66,9 @@ // modified from // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp -#include -#include -#include +#include "deform_conv2d_cpu.h" -#include -#include -#include - -namespace { +namespace detail { const int kMaxParallelImgs = 32; @@ -851,7 +845,7 @@ at::Tensor backward_gradient_parameters( return grad_weight; } -} // namespace +} // namespace detail at::Tensor deform_conv2d_forward_cpu( const at::Tensor& input_param, @@ -885,8 +879,8 @@ at::Tensor deform_conv2d_forward_cpu( int in_h = input.size(2); int in_w = input.size(3); - int n_parallel_imgs = - get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); + int n_parallel_imgs = detail::get_greatest_divisor_below_bound( + batch_sz, detail::kMaxParallelImgs); // Unpack shapes and args int out_channels = weight.size(0); @@ -1015,7 +1009,7 @@ at::Tensor deform_conv2d_forward_cpu( {n_in_channels * weight_h * weight_w, n_parallel_imgs * out_h * out_w}, input.options()); for (int b = 0; b < batch_sz / n_parallel_imgs; b++) { - deformable_im2col( + detail::deformable_im2col( input[b], offset[b], mask[b], @@ -1086,10 +1080,10 @@ deform_conv2d_backward_cpu( at::Tensor bias = bias_param.contiguous(); const int batch_sz = input.size(0); - const int n_parallel_imgs = - get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); + const int n_parallel_imgs = detail::get_greatest_divisor_below_bound( + batch_sz, detail::kMaxParallelImgs); - auto grad_input_and_offset_and_mask = backward_gradient_inputs( + auto grad_input_and_offset_and_mask = detail::backward_gradient_inputs( input, weight, offset, @@ -1110,7 +1104,7 @@ deform_conv2d_backward_cpu( auto grad_offset = std::get<1>(grad_input_and_offset_and_mask); auto grad_mask = std::get<2>(grad_input_and_offset_and_mask); - auto grad_weight = backward_gradient_parameters( + auto grad_weight = detail::backward_gradient_parameters( input, weight, offset, diff --git a/torchvision/csrc/cpu/deform_conv2d_cpu.h b/torchvision/csrc/cpu/deform_conv2d_cpu.h new file mode 100644 index 00000000000..2eb9375ca1a --- /dev/null +++ b/torchvision/csrc/cpu/deform_conv2d_cpu.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor deform_conv2d_forward_cpu( + const at::Tensor& input_param, + const at::Tensor& weight_param, + const at::Tensor& offset_param, + const at::Tensor& mask_param, + const at::Tensor& bias_param, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dil_h, + int64_t dil_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); + +VISION_API std:: + tuple + deform_conv2d_backward_cpu( + const at::Tensor& grad_out_param, + const at::Tensor& input_param, + const at::Tensor& weight_param, + const at::Tensor& offset_param, + const at::Tensor& mask_param, + const at::Tensor& bias_param, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dil_h, + int64_t dil_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index 26a4221d7c7..6f85d9c0256 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -2,40 +2,7 @@ #include #include "../macros.h" -VISION_API at::Tensor deform_conv2d_forward_cpu( - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dil_h, - int64_t dil_w, - int64_t n_weight_grps, - int64_t n_offset_grps, - bool use_mask); - -VISION_API std:: - tuple - deform_conv2d_backward_cpu( - const at::Tensor& grad_out_param, - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dil_h, - int64_t dil_w, - int64_t n_weight_grps, - int64_t n_offset_grps, - bool use_mask); +// TODO: Delete this file once all the methods are gone VISION_API at::Tensor nms_cpu( const at::Tensor& dets, diff --git a/torchvision/csrc/cuda/deform_conv2d_cuda.cu b/torchvision/csrc/cuda/deform_conv2d_cuda.cu index 03596eb97f7..47aaf785cb4 100644 --- a/torchvision/csrc/cuda/deform_conv2d_cuda.cu +++ b/torchvision/csrc/cuda/deform_conv2d_cuda.cu @@ -67,18 +67,14 @@ // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp #include -#include #include #include #include +#include "deform_conv2d_cuda.h" #include "cuda_helpers.h" -#include -#include -#include - -namespace { +namespace detail { const int kMaxParallelImgs = 32; @@ -898,7 +894,7 @@ at::Tensor backward_gradient_parameters( return grad_weight; } -} // namespace +} // namespace detail at::Tensor deform_conv2d_forward_cuda( const at::Tensor& input_param, @@ -935,7 +931,7 @@ at::Tensor deform_conv2d_forward_cuda( int in_w = input.size(3); int n_parallel_imgs = - get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); + detail::get_greatest_divisor_below_bound(batch_sz, detail::kMaxParallelImgs); int out_channels = weight.size(0); int weight_h = weight.size(2); @@ -1063,7 +1059,7 @@ at::Tensor deform_conv2d_forward_cuda( {in_channels * weight_h * weight_w, n_parallel_imgs * out_h * out_w}, input.options()); for (int b = 0; b < batch_sz / n_parallel_imgs; b++) { - deformable_im2col( + detail::deformable_im2col( input[b], offset[b], mask[b], @@ -1134,10 +1130,10 @@ deform_conv2d_backward_cuda( at::Tensor bias = bias_param.contiguous(); const int batch_sz = input.size(0); - const int n_parallel_imgs = - get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); + const int n_parallel_imgs = detail::get_greatest_divisor_below_bound( + batch_sz, detail::kMaxParallelImgs); - auto grad_input_and_offset_and_mask = backward_gradient_inputs( + auto grad_input_and_offset_and_mask = detail::backward_gradient_inputs( input, weight, offset, @@ -1158,7 +1154,7 @@ deform_conv2d_backward_cuda( auto grad_offset = std::get<1>(grad_input_and_offset_and_mask); auto grad_mask = std::get<2>(grad_input_and_offset_and_mask); - auto grad_weight = backward_gradient_parameters( + auto grad_weight = detail::backward_gradient_parameters( input, weight, offset, diff --git a/torchvision/csrc/cuda/deform_conv2d_cuda.h b/torchvision/csrc/cuda/deform_conv2d_cuda.h new file mode 100644 index 00000000000..c9afe68849d --- /dev/null +++ b/torchvision/csrc/cuda/deform_conv2d_cuda.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor deform_conv2d_forward_cuda( + const at::Tensor& input_param, + const at::Tensor& weight_param, + const at::Tensor& offset_param, + const at::Tensor& mask_param, + const at::Tensor& bias_param, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dil_h, + int64_t dil_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); + +VISION_API std:: + tuple + deform_conv2d_backward_cuda( + const at::Tensor& grad_out_param, + const at::Tensor& input_param, + const at::Tensor& weight_param, + const at::Tensor& offset_param, + const at::Tensor& mask_param, + const at::Tensor& bias_param, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dil_h, + int64_t dil_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index 1119d331cac..834973c5327 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -2,40 +2,7 @@ #include #include "../macros.h" -VISION_API at::Tensor deform_conv2d_forward_cuda( - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dil_h, - int64_t dil_w, - int64_t n_weight_grps, - int64_t n_offset_grps, - bool use_mask); - -VISION_API std:: - tuple - deform_conv2d_backward_cuda( - const at::Tensor& grad_out_param, - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dil_h, - int64_t dil_w, - int64_t n_weight_grps, - int64_t n_offset_grps, - bool use_mask); +// TODO: Delete this file once all the methods are gone VISION_API at::Tensor nms_cuda( const at::Tensor& dets, diff --git a/torchvision/csrc/deform_conv2d.cpp b/torchvision/csrc/deform_conv2d.cpp index e032f65b493..f1cbb2a5d90 100644 --- a/torchvision/csrc/deform_conv2d.cpp +++ b/torchvision/csrc/deform_conv2d.cpp @@ -1,17 +1,10 @@ #pragma once -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP +#include "deform_conv2d.h" +#include #include "autocast.h" -#include "hip/vision_cuda.h" -#endif -namespace { +namespace detail { at::Tensor deform_conv2d( const at::Tensor& input, @@ -261,7 +254,7 @@ class DeformConv2dBackwardFunction } }; -} // namespace +} // namespace detail #if defined(WITH_CUDA) || defined(WITH_HIP) at::Tensor deform_conv2d_autocast( @@ -280,7 +273,7 @@ at::Tensor deform_conv2d_autocast( int64_t offset_groups, bool use_mask) { c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); - return deform_conv2d( + return detail::deform_conv2d( at::autocast::cached_cast(at::kFloat, input), at::autocast::cached_cast(at::kFloat, weight), at::autocast::cached_cast(at::kFloat, offset), @@ -314,7 +307,7 @@ at::Tensor deform_conv2d_autograd( int64_t groups, int64_t offset_groups, bool use_mask) { - return DeformConv2dFunction::apply( + return detail::DeformConv2dFunction::apply( input, weight, offset, @@ -348,7 +341,7 @@ deform_conv2d_backward_autograd( int64_t groups, int64_t offset_groups, bool use_mask) { - auto result = DeformConv2dBackwardFunction::apply( + auto result = detail::DeformConv2dBackwardFunction::apply( grad, input, weight, diff --git a/torchvision/csrc/deform_conv2d.h b/torchvision/csrc/deform_conv2d.h new file mode 100644 index 00000000000..e6250a1847b --- /dev/null +++ b/torchvision/csrc/deform_conv2d.h @@ -0,0 +1,64 @@ +#pragma once + +#include "cpu/deform_conv2d_cpu.h" + +#ifdef WITH_CUDA +#include "cuda/deform_conv2d_cuda.h" +#endif +#ifdef WITH_HIP +#include "hip/deform_conv2d_cuda.h" +#endif + +// Autocast Registration +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor deform_conv2d_autocast( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask); +#endif + +// Autograd Registration +at::Tensor deform_conv2d_autograd( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask); + +std::tuple +deform_conv2d_backward_autograd( + const at::Tensor& grad, + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask);