Skip to content

Commit

Permalink
Revert ovxlib topk kernel modification (#694)
Browse files Browse the repository at this point in the history
Internal ovxlib commit:b12b1f138e66c78e0fb4032e5399a68a7280a801 is
revert for sw compatibility

Type:  Bug Fix

Signed-off-by: Feiyue Chen <[email protected]>
  • Loading branch information
chenfeiyue-cfy authored Apr 26, 2024
1 parent 3b80968 commit e1c2f0a
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 782 deletions.
2 changes: 2 additions & 0 deletions src/tim/vx/internal/include/vsi_nn_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ typedef struct _vsi_nn_hw_config_t
{
char target_name[VSI_NN_MAX_TARGET_NAME];
vsi_nn_hw_evis_t evis;
#if VX_HARDWARE_CAPS_PARAMS_EXT_SUPPORT
uint32_t subGroupSize;
#endif
uint32_t use_40bits_va;
uint32_t support_stream_processor;
uint32_t sp_exec_count;
Expand Down
2 changes: 1 addition & 1 deletion src/tim/vx/internal/include/vsi_nn_version.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ extern "C"{

#define VSI_NN_VERSION_MAJOR 1
#define VSI_NN_VERSION_MINOR 2
#define VSI_NN_VERSION_PATCH 6
#define VSI_NN_VERSION_PATCH 5
#define VSI_NN_VERSION \
(VSI_NN_VERSION_MAJOR * 10000 + VSI_NN_VERSION_MINOR * 100 + VSI_NN_VERSION_PATCH)

Expand Down
44 changes: 12 additions & 32 deletions src/tim/vx/internal/src/kernel/cl/topk_cl.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,20 @@
#include "vsi_nn_tensor_util.h"
#include "utils/vsi_nn_util.h"
#include "kernel/vsi_nn_kernel.h"
#include "libnnext/vx_lib_nnext.h"

__BEGIN_DECLS

#define _TOPK_KERNEL_SOURCE "topk"
#define STR(a) #a
// Add kernel hashtable here
#define TOPK_HASH_KEY( IN_DTYPE, OUT_DTYPE, STAGES, SECTION ) \
( ( IN_DTYPE ) | ( OUT_DTYPE << 8 ) | (STAGES << 16) | (SECTION << 26))
#define TOPK_HASH_KEY( IN_DTYPE, OUT_DTYPE, STAGES ) \
( ( IN_DTYPE ) | ( OUT_DTYPE << 8 ) | (STAGES << 16) )
#define PACK_KERNEL_MAP( IN_DTYPE, OUT_DTYPE, STAGES ) \
{ TOPK_HASH_KEY( IN_DTYPE, OUT_DTYPE, STAGES, 0 ), \
{ TOPK_HASH_KEY( IN_DTYPE, OUT_DTYPE, STAGES ), \
CVIVANTE_NAMESPACE("cl.topk_stage"STR(STAGES)"_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
_TOPK_KERNEL_SOURCE }

#define PACK_MERGE_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
{ TOPK_HASH_KEY( IN_DTYPE, OUT_DTYPE, 0, 1 ), \
CVIVANTE_NAMESPACE("cl.topk_stage_"STR(IN_DTYPE)"to"STR(OUT_DTYPE)"_I32"), \
"topk2" }

#define TOPK_ODD_EVEN_SORT_HASH_KEY( IN_DTYPE, OUT_DTYPE ) \
( ( IN_DTYPE ) | ( OUT_DTYPE << 8 ) )
#define PACK_ODD_EVEN_SORT_KERNEL_MAP( IN_DTYPE, OUT_DTYPE ) \
Expand Down Expand Up @@ -115,9 +111,6 @@ static const _kernel_map_type _topk_kernel_map[] =
PACK_KERNEL_MAP( F32, I32, 4 ),
PACK_KERNEL_MAP( F32, I32, 5 ),
PACK_KERNEL_MAP( F32, I32, 6 ),

PACK_MERGE_KERNEL_MAP(U32, U32),
PACK_MERGE_KERNEL_MAP(I32, I32),
};

static const _kernel_map_type _topk_odd_even_sort_kernel_map[] =
Expand Down Expand Up @@ -261,8 +254,7 @@ static vsi_status _query_kernel
vsi_nn_kernel_t * kernel,
vsi_nn_tensor_t * const * const inputs,
vsi_nn_tensor_t * const * const outputs,
int32_t num_stages,
vsi_bool is_bitnoic_segment
int32_t num_stages
)
{
vsi_status status = VSI_FAILURE;
Expand All @@ -280,39 +272,37 @@ static vsi_status _query_kernel
in_dtype = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
out_dtype = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );

num_stages = is_bitnoic_segment ? 0 : num_stages;

switch (_PACK_SELECT_KEY(in_dtype, out_dtype))
{
case _PACK_SELECT_KEY(F32, F32):
case _PACK_SELECT_KEY(F16, F16):
key = TOPK_HASH_KEY( F32, F32, num_stages, is_bitnoic_segment );
key = TOPK_HASH_KEY( F32, F32, num_stages );
break;
case _PACK_SELECT_KEY(U32, U32):
case _PACK_SELECT_KEY(U16, U16):
case _PACK_SELECT_KEY(U8, U8):
key = TOPK_HASH_KEY( U32, U32, num_stages, is_bitnoic_segment );
key = TOPK_HASH_KEY( U32, U32, num_stages );
break;
case _PACK_SELECT_KEY(I32, I32):
case _PACK_SELECT_KEY(I16, I16):
case _PACK_SELECT_KEY(I8, I8):
key = TOPK_HASH_KEY( I32, I32, num_stages, is_bitnoic_segment );
key = TOPK_HASH_KEY( I32, I32, num_stages );
break;
case _PACK_SELECT_KEY(F32, U32):
case _PACK_SELECT_KEY(F16, U32):
case _PACK_SELECT_KEY(F32, U16):
case _PACK_SELECT_KEY(F16, U16):
case _PACK_SELECT_KEY(F32, U8):
case _PACK_SELECT_KEY(F16, U8):
key = TOPK_HASH_KEY( F32, U32, num_stages, is_bitnoic_segment );
key = TOPK_HASH_KEY( F32, U32, num_stages );
break;
case _PACK_SELECT_KEY(F32, I32):
case _PACK_SELECT_KEY(F16, I32):
case _PACK_SELECT_KEY(F32, I16):
case _PACK_SELECT_KEY(F16, I16):
case _PACK_SELECT_KEY(F32, I8):
case _PACK_SELECT_KEY(F16, I8):
key = TOPK_HASH_KEY( F32, I32, num_stages, is_bitnoic_segment );
key = TOPK_HASH_KEY( F32, I32, num_stages );
break;
default:
break;
Expand Down Expand Up @@ -450,12 +440,7 @@ static vsi_nn_kernel_node_t _setup
int32_t top_k = vsi_nn_kernel_param_get_int32(params, "top_k");
int32_t num_stages = (int32_t)vsi_nn_max(ceil(log10(block_size / 2.0f) / log10(2.0f)), 0);
vsi_bool is_odd_even_sort = FALSE;
vsi_bool is_bitnoic_segment = FALSE;
size_t param_num = _TOPK_PARAM_NUM;
int32_t max_stages = 7 + (int32_t)log2(graph->ctx->config.subGroupSize >> 2);
vsi_nn_kernel_dtype_e type0 = vsi_nn_kernel_map_dtype( inputs[0]->attr.dtype.vx_type );
vsi_nn_kernel_dtype_e type1 = vsi_nn_kernel_map_dtype( outputs[0]->attr.dtype.vx_type );

float inputScale = vsi_nn_get_tensor_scale(inputs[0]);
float inputTail = (float)vsi_nn_get_tensor_zero_point(inputs[0]);
float outputScale = vsi_nn_get_tensor_scale(outputs[0]);
Expand Down Expand Up @@ -486,14 +471,9 @@ static vsi_nn_kernel_node_t _setup
rs_tensors[0] = vsi_nn_reshape_tensor( graph,
inputs[0], shape[0], 2 );

is_bitnoic_segment = (num_stages >= 9) && (top_k <= 512 && max_stages > 9) &&
type0 == type1 && (type0 == U8 || type0 == I8 || type0 == I16 || type0 == U16 || type0 == I32 || type0 == U32);
num_stages = is_bitnoic_segment ? 9 : num_stages;
max_stages = is_bitnoic_segment ? max_stages : 7;

if (num_stages < max_stages || is_bitnoic_segment)
if (num_stages < 7)
{
status = _query_kernel( kernel, inputs, outputs, num_stages, is_bitnoic_segment );
status = _query_kernel( kernel, inputs, outputs, num_stages );

rs_tensors[1] = vsi_nn_reshape_tensor( graph,
outputs[0], shape[1], 2 );
Expand Down
10 changes: 5 additions & 5 deletions src/tim/vx/internal/src/libnnext/ops/cl/topk.cl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ __kernel __attribute__((reqd_work_group_size(LOCAL_SIZE0, 1, 1))) void topk_stag
float left_elem = local_data[left_id]; \
float right_elem = local_data[right_id]; \
\
if ((left_elem < right_elem || (left_elem == right_elem && left_idx < right_idx)) ^ signo) \
if ((left_elem < right_elem) ^ signo) \
{ \
local_data[left_id] = right_elem; \
local_data[right_id] = left_elem; \
Expand Down Expand Up @@ -139,7 +139,7 @@ __kernel __attribute__((reqd_work_group_size(LOCAL_SIZE0, 1, 1))) void topk_stag
uint left_elem = local_data[left_id]; \
uint right_elem = local_data[right_id]; \
\
if ((left_elem < right_elem || (left_elem == right_elem && left_idx < right_idx)) ^ signo) \
if ((left_elem < right_elem) ^ signo) \
{ \
local_data[left_id] = right_elem; \
local_data[right_id] = left_elem; \
Expand Down Expand Up @@ -227,7 +227,7 @@ __kernel __attribute__((reqd_work_group_size(LOCAL_SIZE0, 1, 1))) void topk_stag
int left_elem = local_data[left_id]; \
int right_elem = local_data[right_id]; \
\
if ((left_elem < right_elem || (left_elem == right_elem && left_idx < right_idx)) ^ signo) \
if ((left_elem < right_elem) ^ signo) \
{ \
local_data[left_id] = right_elem; \
local_data[right_id] = left_elem; \
Expand Down Expand Up @@ -315,7 +315,7 @@ __kernel __attribute__((reqd_work_group_size(LOCAL_SIZE0, 1, 1))) void topk_stag
float left_elem = local_data[left_id]; \
float right_elem = local_data[right_id]; \
\
if ((left_elem < right_elem || (left_elem == right_elem && left_idx < right_idx)) ^ signo) \
if ((left_elem < right_elem) ^ signo) \
{ \
local_data[left_id] = right_elem; \
local_data[right_id] = left_elem; \
Expand Down Expand Up @@ -403,7 +403,7 @@ __kernel __attribute__((reqd_work_group_size(LOCAL_SIZE0, 1, 1))) void topk_stag
float left_elem = local_data[left_id]; \
float right_elem = local_data[right_id]; \
\
if ((left_elem < right_elem || (left_elem == right_elem && left_idx < right_idx)) ^ signo) \
if ((left_elem < right_elem) ^ signo) \
{ \
local_data[left_id] = right_elem; \
local_data[right_id] = left_elem; \
Expand Down
Loading

0 comments on commit e1c2f0a

Please sign in to comment.