Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial support for whole-array reduction on NVIDIA GPUs #23689

Merged
merged 42 commits into from
Nov 6, 2023
Merged
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
096fdba
Snapshot
e-kayrakli Sep 19, 2023
9020bf5
Get things to link in a hacky way
e-kayrakli Sep 22, 2023
df97abb
Get the initial example working
e-kayrakli Sep 22, 2023
01bcf84
Cleanup
e-kayrakli Sep 22, 2023
38dc53f
Drop additional makefile logic
e-kayrakli Oct 7, 2023
3417301
Finish implementing basic reductions
e-kayrakli Oct 7, 2023
17118b3
Add test
e-kayrakli Oct 7, 2023
b5e689b
Add minloc and maxloc reduces
e-kayrakli Oct 9, 2023
f036931
Refactor basic reduce runtime interface, too
e-kayrakli Oct 9, 2023
41e30a9
Move the actual reduction functions to the GPU module
e-kayrakli Oct 9, 2023
8d0e3e0
Simplify runtime macros
e-kayrakli Oct 9, 2023
c56df6b
Start separating impl implementations
e-kayrakli Oct 13, 2023
7aa3642
Seperate runtime interface from the implementation interface
e-kayrakli Oct 16, 2023
cf1e526
Runtime cleanup
e-kayrakli Oct 16, 2023
1489028
Add a common header
e-kayrakli Oct 16, 2023
60f921c
Blind implementation of reductions on AMD
e-kayrakli Oct 17, 2023
ab9b475
Add a compilerError in the module code
e-kayrakli Oct 17, 2023
5ab3e5e
Test/limit usage on AMD
e-kayrakli Oct 18, 2023
0819e31
Fix an issue, add perf test
e-kayrakli Oct 18, 2023
3a78ab2
Initial attempt to do multi-chunk reduction
e-kayrakli Oct 25, 2023
19ad754
Add a new test and fix an issue exposed by it
e-kayrakli Oct 25, 2023
06be1a8
Add new test
e-kayrakli Oct 25, 2023
d012a9b
Expand test to minloc,maxloc. Fix a bug
e-kayrakli Oct 26, 2023
e85a00c
Make the new functions work with cpu-as-device. Add skipifs
e-kayrakli Oct 26, 2023
186ad54
Add the missing good file
e-kayrakli Oct 26, 2023
4b6953f
Remove a trailing whitespace
e-kayrakli Oct 26, 2023
aca38c4
Revert some of the AMD changes
e-kayrakli Oct 26, 2023
257a6ae
Add the missing execopts
e-kayrakli Oct 26, 2023
9c49498
Remove an include
e-kayrakli Oct 26, 2023
1627385
Relocate tests into a new noAmd directory
e-kayrakli Nov 3, 2023
ab8a348
Add a user facing error message for unknown types and a test to lock …
e-kayrakli Nov 3, 2023
7725662
Add more fall-through otherwises
e-kayrakli Nov 3, 2023
6da77ee
Start adding documentation
e-kayrakli Nov 3, 2023
889ef73
Add one more fallthrough, unify error messages
e-kayrakli Nov 3, 2023
377f97d
Free runtime memory that we were leaking before
e-kayrakli Nov 3, 2023
1675b9e
A big refactor to reduce code duplication significantly
e-kayrakli Nov 3, 2023
dd10162
Fix a bug for non-zero-based arrays, add test
e-kayrakli Nov 3, 2023
7f007b7
Remove trailing whitespaces
e-kayrakli Nov 3, 2023
727f443
More clarifications in doc
e-kayrakli Nov 3, 2023
79ea359
Add a missing space
e-kayrakli Nov 3, 2023
cc39ef7
Add missing commas in AMD runtime
e-kayrakli Nov 6, 2023
00ee967
Move skipif to the parent directory
e-kayrakli Nov 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Make the new functions work with cpu-as-device. Add skipifs
Signed-off-by: Engin Kayraklioglu <[email protected]>
e-kayrakli committed Oct 26, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit e85a00ceb4f994d255e6c1d34d83f92ed46d21c4
23 changes: 22 additions & 1 deletion modules/standard/GPU.chpl
Original file line number Diff line number Diff line change
@@ -398,8 +398,29 @@ module GPU
config param gpuDebugReduce = false;

private inline proc doGpuReduce(param op: string, const ref A: [] ?t) {
if CHPL_GPU == "amd" then
if op != "sum" && op != "min" && op != "max" &&
op != "minloc" && op != "maxloc" {

compilerError("Unexpected reduction kind in doGpuReduce: ", op);
}


if CHPL_GPU == "amd" {
compilerError("gpu*Reduce functions are not supported on AMD GPUs");
}
else if CHPL_GPU == "cpu" {
select op {
when "sum" do return + reduce A;
when "min" do return min reduce A;
when "max" do return max reduce A;
when "minloc" do return minloc reduce zip (A.domain, A);
when "maxloc" do return maxloc reduce zip (A.domain, A);
e-kayrakli marked this conversation as resolved.
Show resolved Hide resolved
}
}
else {
compilerAssert(CHPL_GPU=="nvidia");
}


proc chplTypeToCTypeName(type t) param {
e-kayrakli marked this conversation as resolved.
Show resolved Hide resolved
select t {
29 changes: 29 additions & 0 deletions runtime/src/gpu/cpu/gpu-cpu.c
Original file line number Diff line number Diff line change
@@ -157,4 +157,33 @@ bool chpl_gpu_impl_stream_ready(void* stream) {
void chpl_gpu_impl_stream_synchronize(void* stream) {
}

#define DEF_ONE_REDUCE_RET_VAL(impl_kind, chpl_kind, data_type) \
void chpl_gpu_impl_##chpl_kind##_reduce_##data_type(data_type* data, int n,\
data_type* val,\
void* stream) {\
chpl_internal_error("This function shouldn't have been called. "\
"cpu-as-device mode should handle reductions in "\
"the module code\n");\
}

GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL, Sum, sum)
GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL, Min, min)
GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL, Max, max)

#undef DEF_ONE_REDUCE_RET_VAL

#define DEF_ONE_REDUCE_RET_VAL_IDX(cub_kind, chpl_kind, data_type) \
void chpl_gpu_impl_##chpl_kind##_reduce_##data_type(data_type* data, int n,\
data_type* val, int* idx,\
void* stream) {\
chpl_internal_error("This function shouldn't have been called. "\
"cpu-as-device mode should handle reductions in "\
"the module code\n");\
}

GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL_IDX, ArgMin, minloc)
GPU_IMPL_REDUCE(DEF_ONE_REDUCE_RET_VAL_IDX, ArgMax, maxloc)

#undef DEF_ONE_REDUCE_RET_VAL_IDX

#endif // HAS_GPU_LOCALE
2 changes: 2 additions & 0 deletions test/gpu/native/reduction/SKIPIF
e-kayrakli marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Reductions are not supported with AMD gpus yet.
CHPL_GPU==amd
4 changes: 4 additions & 0 deletions test/gpu/native/reduction/largeArrays.skipif
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# this test is to exercise a code path that will never be run with
# cpu-as-device. Moreover, the large size makes this mode take too long to
# finish. So, I am skipping this test.
CHPL_GPU==cpu
4 changes: 4 additions & 0 deletions test/gpu/native/reduction/largeArraysMinMax.skipif
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# this test is to exercise a code path that will never be run with
# cpu-as-device. Moreover, the large size makes this mode take too long to
# finish. So, I am skipping this test.
CHPL_GPU==cpu