Skip to content

Commit

Permalink
Add support for SHF ptx instruction (#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
cesar-avalos3 authored Aug 9, 2024
1 parent e1afc53 commit 081da0a
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 1 deletion.
32 changes: 32 additions & 0 deletions src/cuda-sim/instructions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5441,6 +5441,38 @@ void shfl_impl(const ptx_instruction *pI, core_t *core, warp_inst_t inst) {
}
}

void shf_impl(const ptx_instruction *pI, ptx_thread_info *thread) {
ptx_reg_t a,b,c,d;
const operand_info &dst = pI->dst();
const operand_info &src1 = pI->src1();
const operand_info &src2 = pI->src2();
const operand_info &src3 = pI->src3();

// Only b32 is allowed
unsigned i_type = pI->get_type();
a = thread->get_operand_value(src1, dst, i_type, thread, 1);
b = thread->get_operand_value(src2, dst, i_type, thread, 1);
c = thread->get_operand_value(src3, dst, i_type, thread, 1);

if(i_type != B32_TYPE)
printf("Only the b32 data_type is allowed per the ISA\n");

unsigned clamp_mode = pI->clamp_mode();
unsigned n = c.u32 & 0x1f;
if(clamp_mode) {
if(c.u32 < 32)
n = c;
else
n = 32;
}
if(pI->left_mode())
d.u32 = (b.u32 << n) | (a.u32 >> (32-n));
else
d.u32 = (b.u32 << (32-n)) | (a.u32 >> n);

thread->set_operand_value(dst, d, i_type, thread, pI);
}

void shl_impl(const ptx_instruction *pI, ptx_thread_info *thread) {
ptx_reg_t a, b, d;
const operand_info &dst = pI->dst();
Expand Down
1 change: 1 addition & 0 deletions src/cuda-sim/opcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ OP_DEF(SELP_OP,selp_impl,"selp",1,1)
OP_DEF(SETP_OP,setp_impl,"setp",1,1)
OP_DEF(SET_OP,set_impl,"set",1,1)
OP_W_DEF(SHFL_OP,shfl_impl,"shfl",1,10)
OP_DEF(SHF_OP,shf_impl,"shf",1,1)
OP_DEF(SHL_OP,shl_impl,"shl",1,1)
OP_DEF(SHR_OP,shr_impl,"shr",1,1)
OP_DEF(SIN_OP,sin_impl,"sin",1,4)
Expand Down
8 changes: 7 additions & 1 deletion src/cuda-sim/ptx.l
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ selp TC; yylval->int_value = SELP_OP; return OPCODE;
setp TC; yylval->int_value = SETP_OP; return OPCODE;
set TC; yylval->int_value = SET_OP; return OPCODE;
shfl TC; yylval->int_value = SHFL_OP; return OPCODE;
shf TC; yylval->int_value = SHF_OP; return OPCODE;
shl TC; yylval->int_value = SHL_OP; return OPCODE;
shr TC; yylval->int_value = SHR_OP; return OPCODE;
sin TC; yylval->int_value = SIN_OP; return OPCODE;
Expand Down Expand Up @@ -317,6 +318,9 @@ breakaddr TC; yylval->int_value = BREAKADDR_OP; return OPCODE;

\.sat TC; return SAT_OPTION;

\.l TC; return LEFT_OPTION;
\.r TC; return RIGHT_OPTION;

\.eq TC; return EQ_OPTION;
\.ne TC; return NE_OPTION;
\.lt TC; return LT_OPTION;
Expand Down Expand Up @@ -354,6 +358,8 @@ breakaddr TC; yylval->int_value = BREAKADDR_OP; return OPCODE;
\.arrive TC; return ARRIVE_OPTION;
\.red TC; return RED_OPTION;

\.clamp TC; return CLAMP_OPTION;
\.wrap TC; return WRAP_OPTION;

\.approx TC; return APPROX_OPTION;
\.full TC; return FULL_OPTION;
Expand Down Expand Up @@ -488,4 +494,4 @@ int ptx_error( yyscan_t yyscanner, ptx_recognizer* recognizer, const char *s )
fflush(stdout);
//exit(1);
return 0;
}
}
8 changes: 8 additions & 0 deletions src/cuda-sim/ptx.y
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ class ptx_recognizer;
%token PRMT_RC16_MODE;
%token PRMT_ECL_MODE;
%token PRMT_ECR_MODE;
%token WRAP_OPTION;
%token CLAMP_OPTION;
%token LEFT_OPTION;
%token RIGHT_OPTION;

%type <int_value> function_decl_header
%type <ptr_value> function_decl
Expand Down Expand Up @@ -507,6 +511,10 @@ option: type_spec
| DOWN_OPTION { recognizer->add_option(DOWN_OPTION); }
| BFLY_OPTION { recognizer->add_option(BFLY_OPTION); }
| IDX_OPTION { recognizer->add_option(IDX_OPTION); }
| WRAP_OPTION { recognizer->add_option(WRAP_OPTION); }
| CLAMP_OPTION { recognizer->add_option(CLAMP_OPTION); }
| LEFT_OPTION { recognizer->add_option(LEFT_OPTION); }
| RIGHT_OPTION { recognizer->add_option(RIGHT_OPTION); }
;

atomic_operation_spec: ATOMIC_AND { recognizer->add_option(ATOMIC_AND); }
Expand Down
14 changes: 14 additions & 0 deletions src/cuda-sim/ptx_ir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,8 @@ ptx_instruction::ptx_instruction(
m_rounding_mode = RN_OPTION;
m_compare_op = -1;
m_saturation_mode = 0;
m_clamp_mode = 0;
m_left_mode = 0;
m_geom_spec = 0;
m_vector_spec = 0;
m_atomic_spec = 0;
Expand Down Expand Up @@ -1293,6 +1295,18 @@ ptx_instruction::ptx_instruction(
case SAT_OPTION:
m_saturation_mode = 1;
break;
case WRAP_OPTION:
m_clamp_mode = 0;
break;
case CLAMP_OPTION:
m_clamp_mode = 1;
break;
case LEFT_OPTION:
m_left_mode = 1;
break;
case RIGHT_OPTION:
m_left_mode = 0;
break;
case RNI_OPTION:
case RZI_OPTION:
case RMI_OPTION:
Expand Down
4 changes: 4 additions & 0 deletions src/cuda-sim/ptx_ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,8 @@ class ptx_instruction : public warp_inst_t {
unsigned cache_option() const { return m_cache_option; }
unsigned rounding_mode() const { return m_rounding_mode; }
unsigned saturation_mode() const { return m_saturation_mode; }
unsigned clamp_mode() const {return m_clamp_mode;}
unsigned left_mode() const { return m_left_mode; }
unsigned dimension() const { return m_geom_spec; }
unsigned barrier_op() const { return m_barrier_op; }
unsigned shfl_op() const { return m_shfl_op; }
Expand Down Expand Up @@ -1159,6 +1161,8 @@ class ptx_instruction : public warp_inst_t {
unsigned m_rounding_mode;
unsigned m_compare_op;
unsigned m_saturation_mode;
unsigned m_clamp_mode;
unsigned m_left_mode;
unsigned m_barrier_op;
unsigned m_shfl_op;
unsigned m_prmt_op;
Expand Down

0 comments on commit 081da0a

Please sign in to comment.