ROCm · junliume · Feb 16, 2023 · Feb 6, 2023 · Feb 6, 2023 · Feb 8, 2023
@@ -53,7 +53,7 @@ __kernel void Col2Im3d(global _FLOAT* col,
                        const int height,
                        const int width,
                        global _FLOAT* im,
-                       const int im_offset)
+                       const unsigned long im_offset)
 {
     global _FLOAT* im_off = im + im_offset;
     int gid               = (int)get_global_id(0);
@@ -84,7 +84,12 @@ __kernel void Col2Im3d(global _FLOAT* col,
                       : (im_w - (dilation_w * (wei_w - 1) + 1)) / stride_w + 1;
     int end_w   = min(col_w, im_w / stride_w + 1);
 
+#if MIOPEN_USE_64BIT_INDEX
+    long ch_offset = (long)im_ch * col_d * col_w * col_h * wei_d * wei_w * wei_h;
+#else
     int ch_offset = im_ch * col_d * col_w * col_h * wei_d * wei_w * wei_h;
+#endif
+
     col += ch_offset;
 
     _FLOAT_ACCUM tmp = (_FLOAT_ACCUM)0;
@@ -103,8 +108,15 @@ __kernel void Col2Im3d(global _FLOAT* col,
                     int y = (im_h - cy * stride_h) / dilation_h;
                     int x = (im_w - cx * stride_w) / dilation_w;
 
+#if MIOPEN_USE_64BIT_INDEX
+                    long col_off =
+                        ((((((long)z * wei_h) + y) * wei_w + x) * col_d + cz) * col_h + cy) *
+                            col_w +
+                        cx;
+#else
                     int col_off =
                         (((((z * wei_h) + y) * wei_w + x) * col_d + cz) * col_h + cy) * col_w + cx;
+#endif
 
                     tmp += CVT_FLOAT2ACCUM(col[col_off]);
                 }

@@ -509,7 +509,7 @@ float Col2Im3dGPU(const Handle& handle,
                   const int in_h,
                   const int in_w,
                   Data_t im,
-                  int im_offset,
+                  std::size_t im_offset,
                   miopenDataType_t type)
 {
     std::string program_name = "MIOpenCol2Im3d.cl";
@@ -565,8 +565,15 @@ float Col2Im3dGPU(const Handle& handle,
     }
     else
     {
+        std::size_t index_size = static_cast<size_t>(in_c) * out_d * out_h * out_w * wei_d * wei_w *
+                                 wei_h * sizeof(ConstData_t);
+
+        const bool use_64_bit_index = index_size > 0xffffffffULL;
+
         std::string params = GetDataTypeKernelParams(type);
 
+        params += use_64_bit_index ? " -DMIOPEN_USE_64BIT_INDEX=1" : " -DMIOPEN_USE_64BIT_INDEX=0";
+
         const std::vector<size_t> vld{256, 1, 1};
         size_t global_threads = static_cast<size_t>(in_c) * in_d * in_h * in_w;
         const std::vector<size_t> vgd{global_threads, 1, 1};