Make the new functions work with cpu-as-device. Add skipifs

Signed-off-by: Engin Kayraklioglu <[email protected]>
chapel-lang · e-kayrakli · Nov 6, 2023 · Sep 19, 2023 · Sep 22, 2023 · Sep 22, 2023
commit e85a00ceb4f994d255e6c1d34d83f92ed46d21c4
diff --git a/modules/standard/GPU.chpl b/modules/standard/GPU.chpl
@@ -398,8 +398,29 @@ module GPU
   config param gpuDebugReduce = false;
 
   private inline proc doGpuReduce(param op: string, const ref A: [] ?t) {
-    if CHPL_GPU == "amd" then
+    if op != "sum" && op != "min" && op != "max" &&
+       op != "minloc" && op != "maxloc" {
+
+      compilerError("Unexpected reduction kind in doGpuReduce: ", op);
+    }
+
+
+    if CHPL_GPU == "amd" {
       compilerError("gpu*Reduce functions are not supported on AMD GPUs");
+    }
+    else if CHPL_GPU == "cpu" {
+      select op {
+        when "sum" do return + reduce A;
+        when "min" do return min reduce A;
+        when "max" do return max reduce A;
+        when "minloc" do return minloc reduce zip (A.domain, A);
+        when "maxloc" do return maxloc reduce zip (A.domain, A);
+      }
+    }
+    else {
+      compilerAssert(CHPL_GPU=="nvidia");
+    }
+
 
     proc chplTypeToCTypeName(type t) param {
       select t {

diff --git a/runtime/src/gpu/cpu/gpu-cpu.c b/runtime/src/gpu/cpu/gpu-cpu.c
@@ -157,4 +157,33 @@ bool chpl_gpu_impl_stream_ready(void* stream) {
 void chpl_gpu_impl_stream_synchronize(void* stream) {
 }
 
+#define DEF_ONE_REDUCE_RET_VAL(impl_kind, chpl_kind, data_type) \
+void chpl_gpu_impl_##chpl_kind##_reduce_##data_type(data_type* data, int n,\
+                                                    data_type* val,\
+                                                    void* stream) {\
+  chpl_internal_error("This function shouldn't have been called. "\
+                      "cpu-as-device mode should handle reductions in "\
+                      "the module code\n");\
+}
+
+GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL, Sum, sum)
+GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL, Min, min)
+GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL, Max, max)
+
+#undef DEF_ONE_REDUCE_RET_VAL
+
+#define DEF_ONE_REDUCE_RET_VAL_IDX(cub_kind, chpl_kind, data_type) \
+void chpl_gpu_impl_##chpl_kind##_reduce_##data_type(data_type* data, int n,\
+                                                    data_type* val, int* idx,\
+                                                    void* stream) {\
+  chpl_internal_error("This function shouldn't have been called. "\
+                      "cpu-as-device mode should handle reductions in "\
+                      "the module code\n");\
+}
+
+GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL_IDX, ArgMin, minloc)
+GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL_IDX, ArgMax, maxloc)
+
+#undef DEF_ONE_REDUCE_RET_VAL_IDX
+
 #endif // HAS_GPU_LOCALE
diff --git a/test/gpu/native/reduction/SKIPIF b/test/gpu/native/reduction/SKIPIF
@@ -0,0 +1,2 @@
+# Reductions are not supported with AMD gpus yet.
+CHPL_GPU==amd
diff --git a/test/gpu/native/reduction/largeArrays.skipif b/test/gpu/native/reduction/largeArrays.skipif
@@ -0,0 +1,4 @@
+# this test is to exercise a code path that will never be run with
+# cpu-as-device. Moreover, the large size makes this mode take too long to
+# finish. So, I am skipping this test.
+CHPL_GPU==cpu
diff --git a/test/gpu/native/reduction/largeArraysMinMax.skipif b/test/gpu/native/reduction/largeArraysMinMax.skipif
@@ -0,0 +1,4 @@
+# this test is to exercise a code path that will never be run with
+# cpu-as-device. Moreover, the large size makes this mode take too long to
+# finish. So, I am skipping this test.
+CHPL_GPU==cpu
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Reductions are not supported with AMD gpus yet.
		CHPL_GPU==amd