Skip to content

Commit

Permalink
gpu: ocl: rely on get_sub_group_size() for post_ops
Browse files Browse the repository at this point in the history
Avoids the possibility that defines may differ from what is actually being
compiled.
  • Loading branch information
rjoursler authored and vpirogov committed Apr 5, 2023
1 parent 44355a6 commit a7c8cbc
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions src/gpu/ocl/ocl_post_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@
#ifndef GPU_OCL_OCL_POST_OPS_H
#define GPU_OCL_OCL_POST_OPS_H

#ifndef SUB_GROUP_SIZE
#define SUB_GROUP_SIZE get_sub_group_size()
#endif

#if WITH_POST_OP

#if !WITH_ELTWISE
Expand Down Expand Up @@ -183,11 +179,11 @@ float fwd_Xnary(unsigned kind, unsigned algorithm, float x, float y,
} \
}

#define X_NELEMS(x) ({ x / SUB_GROUP_SIZE; })
#define X_NELEMS(x) ({ x / get_sub_group_size(); })

#define CONDITIONAL_FILL( \
idx, blocked_coord, nelem, src_ptr, dst_ptr, data_type) \
if (blocked_coord / SUB_GROUP_SIZE == nelem) \
if (blocked_coord / get_sub_group_size() == nelem) \
FILL_WITH_BLOCK_READ(idx, src_ptr, dst_ptr, nelem, data_type);

#define FILL_BIN_ARG_TRY_BLOCK(idx, dest_ptr, dest_size, x0, x0_s, x1, x1_s, \
Expand Down Expand Up @@ -269,7 +265,7 @@ float fwd_Xnary(unsigned kind, unsigned algorithm, float x, float y,
REPLICATE_DATA(bin_arg_ptr, bin_arg_size, x0_s, X_NELEMS(x1_s), \
x2_s, x3_s, x4_s, x5_s); \
} else { \
const unsigned x1_jump = is_burst ? SUB_GROUP_SIZE : 1; \
const unsigned x1_jump = is_burst ? get_sub_group_size() : 1; \
const unsigned x1_size = x1_s / x1_jump; \
FILL_BIN_ARG_SERIAL(idx, bin_arg_ptr, x0, x0_s, (x1 + x1_incr), \
x1_s, x1_jump, x2, x2_s, x3, x3_s, x4, x4_s, x5, x5_s); \
Expand Down

0 comments on commit a7c8cbc

Please sign in to comment.