From 8b8da5a09803fbce7164afd213e7910e7b8507aa Mon Sep 17 00:00:00 2001 From: cliffburdick Date: Thu, 18 Jul 2024 14:56:17 -0700 Subject: [PATCH] Workaround for constexpr bug inside lambda in CUDA 11.8 --- include/matx/transforms/conv.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/matx/transforms/conv.h b/include/matx/transforms/conv.h index c13d522d..8c2c9c87 100644 --- a/include/matx/transforms/conv.h +++ b/include/matx/transforms/conv.h @@ -61,7 +61,24 @@ inline void matxFFTConv1DInternal(OutputType &o, const InType &i, std::fill(std::begin(slice_start), std::end(slice_start), 0); std::fill(std::begin(slice_end), std::end(slice_end), matxEnd); + +#if (CUDART_VERSION <= 11080) + matx::tensor_t, InType::Rank()> s1; + matx::tensor_t, InType::Rank()> s2; + matx::tensor_t, InType::Rank()> sifft; + + if constexpr (is_cuda_executor_v) { + make_tensor(s1, in_shape_padded, MATX_ASYNC_DEVICE_MEMORY, exec.getStream()); + make_tensor(s2, in_shape_padded, MATX_ASYNC_DEVICE_MEMORY, exec.getStream()); + make_tensor(sifft, in_shape_padded, MATX_ASYNC_DEVICE_MEMORY, exec.getStream()); + } + else { + make_tensor(s1, in_shape_padded, MATX_HOST_MALLOC_MEMORY); + make_tensor(s2, in_shape_padded, MATX_HOST_MALLOC_MEMORY); + make_tensor(sifft, in_shape_padded, MATX_HOST_MALLOC_MEMORY); + } +#else auto allocate_tensor = [&](auto shape) { if constexpr (is_cuda_executor_v) { return make_tensor>(shape, MATX_ASYNC_DEVICE_MEMORY, exec.getStream()); @@ -73,6 +90,7 @@ inline void matxFFTConv1DInternal(OutputType &o, const InType &i, auto s1 = allocate_tensor(in_shape_padded); auto s2 = allocate_tensor(in_shape_padded); auto sifft = allocate_tensor(in_shape_padded); +#endif if constexpr (! is_complex_v) { slice_end[InType::Rank() - 1] = padded_size/2 + 1;