From 8b8da5a09803fbce7164afd213e7910e7b8507aa Mon Sep 17 00:00:00 2001
From: cliffburdick <cburdick@nvidia.com>
Date: Thu, 18 Jul 2024 14:56:17 -0700
Subject: [PATCH] Workaround for constexpr bug inside lambda in CUDA 11.8

---
 include/matx/transforms/conv.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
diff --git a/include/matx/transforms/conv.h b/include/matx/transforms/conv.h
index c13d522d..8c2c9c87 100644
--- a/include/matx/transforms/conv.h
+++ b/include/matx/transforms/conv.h
@@ -61,7 +61,24 @@ inline void matxFFTConv1DInternal(OutputType &o, const InType &i,
 
   std::fill(std::begin(slice_start), std::end(slice_start), 0);
   std::fill(std::begin(slice_end), std::end(slice_end), matxEnd);
+
   
+#if (CUDART_VERSION <= 11080)
+  matx::tensor_t<complex_from_scalar_t<typename InType::scalar_type>, InType::Rank()> s1;
+  matx::tensor_t<complex_from_scalar_t<typename InType::scalar_type>, InType::Rank()> s2;
+  matx::tensor_t<complex_from_scalar_t<typename InType::scalar_type>, InType::Rank()> sifft;
+
+  if constexpr (is_cuda_executor_v<Executor>) {
+    make_tensor(s1, in_shape_padded, MATX_ASYNC_DEVICE_MEMORY, exec.getStream());
+    make_tensor(s2, in_shape_padded, MATX_ASYNC_DEVICE_MEMORY, exec.getStream());
+    make_tensor(sifft, in_shape_padded, MATX_ASYNC_DEVICE_MEMORY, exec.getStream());
+  }
+  else {
+    make_tensor(s1, in_shape_padded, MATX_HOST_MALLOC_MEMORY);
+    make_tensor(s2, in_shape_padded, MATX_HOST_MALLOC_MEMORY);
+    make_tensor(sifft, in_shape_padded, MATX_HOST_MALLOC_MEMORY);
+  }
+#else
   auto allocate_tensor = [&](auto shape) {
     if constexpr (is_cuda_executor_v<Executor>) {
       return make_tensor<complex_from_scalar_t<typename InType::scalar_type>>(shape, MATX_ASYNC_DEVICE_MEMORY, exec.getStream());
@@ -73,6 +90,7 @@ inline void matxFFTConv1DInternal(OutputType &o, const InType &i,
   auto s1 = allocate_tensor(in_shape_padded);
   auto s2 = allocate_tensor(in_shape_padded);
   auto sifft = allocate_tensor(in_shape_padded);
+#endif  
 
   if constexpr (! is_complex_v<typename InType::scalar_type>) {
     slice_end[InType::Rank() - 1] = padded_size/2 + 1;