Skip to content

Commit

Permalink
Fix formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
lukamac committed Dec 26, 2024
1 parent 4ff9f1c commit 06e0384
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 92 deletions.
12 changes: 8 additions & 4 deletions inc/pulp_nnx_neureka_v2.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@

/* PULP-NNX interface */

void neureka_v2_nnx_init(const neureka_v2_dev_t *dev, neureka_v2_siracusa_conf_t *conf);
void neureka_v2_nnx_init(const neureka_v2_dev_t *dev,
neureka_v2_siracusa_conf_t *conf);
void neureka_v2_nnx_term(const neureka_v2_dev_t *dev);

/** neureka_v2_nnx_dispatch_check
Expand All @@ -46,16 +47,19 @@ void neureka_v2_nnx_dispatch_wait(const neureka_v2_dev_t *dev);
* Fails with return code 1 if the task cannot be dispatched. Otherwise returns
* 0.
*/
int neureka_v2_nnx_dispatch(const neureka_v2_dev_t *dev, neureka_v2_task_t *task);
int neureka_v2_nnx_dispatch(const neureka_v2_dev_t *dev,
neureka_v2_task_t *task);

/** neureka_v2_nnx_resolve_check
*
* Check whether the task has been resolved.
*/
int neureka_v2_nnx_resolve_check(const neureka_v2_dev_t *dev, neureka_v2_task_t *task);
int neureka_v2_nnx_resolve_check(const neureka_v2_dev_t *dev,
neureka_v2_task_t *task);

/** neureka_v2_nnx_resolve_wait
*
* Block until you can resolve the task.
*/
void neureka_v2_nnx_resolve_wait(const neureka_v2_dev_t *dev, neureka_v2_task_t *task);
void neureka_v2_nnx_resolve_wait(const neureka_v2_dev_t *dev,
neureka_v2_task_t *task);
4 changes: 2 additions & 2 deletions neureka_v2/bsp/neureka_v2_siracusa_bsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_BASE_ADDR (0x00200000)
#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_OFFS 0x18
#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_ADDR \
(NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_BASE_ADDR + \
#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_ADDR \
(NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_BASE_ADDR + \
NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_OFFS)
#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_PRIO 0x100
#define NEUREKA_V2_SIRACUSA_CLUSTER_CTRL_HWPE_MASK_HCI_MAXSTALL 0xff
Expand Down
10 changes: 6 additions & 4 deletions neureka_v2/gvsoc/neureka_v2_gvsoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ typedef enum neureka_v2_gvsoc_log_level_e {
NEUREKA_V2_GVSOC_LOG_LEVEL_ALL = 3
} neureka_v2_gvsoc_log_level_e;

static void neureka_v2_gvsoc_log_activate(const neureka_v2_dev_t *dev,
neureka_v2_gvsoc_log_level_e log_level,
neureka_v2_gvsoc_log_format_e format) {
hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_V2_REG_GVSOC_LOG_LEVEL, log_level);
static void
neureka_v2_gvsoc_log_activate(const neureka_v2_dev_t *dev,
neureka_v2_gvsoc_log_level_e log_level,
neureka_v2_gvsoc_log_format_e format) {
hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_V2_REG_GVSOC_LOG_LEVEL,
log_level);
hwpe_task_reg_write(&dev->hwpe_dev, NEUREKA_V2_REG_GVSOC_LOG_FORMAT, format);
}

Expand Down
97 changes: 51 additions & 46 deletions neureka_v2/hal/neureka_v2_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
#include "pulp_nnx_util.h"

uint32_t neureka_v2_get_tile_padding(uint32_t padding, uint32_t i_height,
uint32_t i_width, uint32_t n_height,
uint32_t n_width) {
uint32_t i_width, uint32_t n_height,
uint32_t n_width) {
uint32_t tile_padding = padding;
if (i_height > 0) {
tile_padding &= ~(0xf << 28);
Expand All @@ -46,12 +46,13 @@ void neureka_v2_task_init(neureka_v2_task_t *task) {
}

void neureka_v2_task_set_op_to_conv(neureka_v2_task_t *task,
const uint8_t kernel_shape,
const uint8_t depthwise) {
const uint8_t kernel_shape,
const uint8_t depthwise) {
task->depthwise = depthwise;
task->kernel_shape = kernel_shape;
task->subtile_output_channel = depthwise ? NEUREKA_V2_SUBTILE_INPUT_CHANNEL_3x3
: NEUREKA_V2_SUBTILE_OUTPUT_CHANNEL;
task->subtile_output_channel = depthwise
? NEUREKA_V2_SUBTILE_INPUT_CHANNEL_3x3
: NEUREKA_V2_SUBTILE_OUTPUT_CHANNEL;
task->subtile_input_channel = kernel_shape == 3
? NEUREKA_V2_SUBTILE_INPUT_CHANNEL_3x3
: NEUREKA_V2_SUBTILE_INPUT_CHANNEL_1x1;
Expand All @@ -65,8 +66,8 @@ void neureka_v2_task_set_op_to_conv(neureka_v2_task_t *task,
}

void neureka_v2_task_set_bits(neureka_v2_task_t *task, const uint8_t input_bits,
const uint8_t output_bits,
const uint8_t weight_bits) {
const uint8_t output_bits,
const uint8_t weight_bits) {
neureka_v2_quant_mode_e quantMode;
if (output_bits == 8) {
quantMode = quantMode8Bit;
Expand All @@ -80,8 +81,9 @@ void neureka_v2_task_set_bits(neureka_v2_task_t *task, const uint8_t input_bits,
task->data.cfg.conf0 |= quantMode | (weight_bits - 1);
}

void neureka_v2_task_set_norm_quant(neureka_v2_task_t *task, neureka_v2_quant_t quant,
neureka_v2_norm_t norm) {
void neureka_v2_task_set_norm_quant(neureka_v2_task_t *task,
neureka_v2_quant_t quant,
neureka_v2_norm_t norm) {
task->data.cfg.conf0 &=
~(NEUREKA_V2_MASK_QUANT_FUNCTION | NEUREKA_V2_MASK_SHIFT_AMOUNT |
NEUREKA_V2_MASK_NORM_MODE | NEUREKA_V2_MASK_FLAG_NORM_BIAS |
Expand All @@ -92,7 +94,8 @@ void neureka_v2_task_set_norm_quant(neureka_v2_task_t *task, neureka_v2_quant_t
norm.flag_shift << NEUREKA_V2_SHIFT_FLAG_NORM_SHIFT;
}

void neureka_v2_task_set_weight_offset(neureka_v2_task_t *task, const int32_t weight_offset) {
void neureka_v2_task_set_weight_offset(neureka_v2_task_t *task,
const int32_t weight_offset) {
task->data.cfg.weight_offset_factor = weight_offset;
}

Expand Down Expand Up @@ -128,8 +131,8 @@ void neureka_v2_task_set_infeat_prefetch(neureka_v2_task_t *task) {
task->data.cfg.conf0 |= NEUREKA_V2_FLAG_INFEAT_PREFETCH;
}

void neureka_v2_task_set_weight_source(neureka_v2_task_t *task,
neureka_v2_weight_source_e weight_source) {
void neureka_v2_task_set_weight_source(
neureka_v2_task_t *task, neureka_v2_weight_source_e weight_source) {
task->data.cfg.conf0 &= ~NEUREKA_V2_MASK_FLAG_WEIGHT_SOURCE;
task->data.cfg.conf0 |= weight_source;
}
Expand All @@ -141,50 +144,54 @@ void neureka_v2_task_set_weight_source(neureka_v2_task_t *task,
* Necessary for input pointer when it's padded.
*/
uint32_t neureka_v2_pad_addr(uint32_t ptr, const uint32_t width,
const uint32_t width_stride,
const uint8_t padding_top,
const uint8_t padding_left) {
const uint32_t width_stride,
const uint8_t padding_top,
const uint8_t padding_left) {
return ptr - (padding_top * width + padding_left) * width_stride;
}

void neureka_v2_task_set_addr_conv(neureka_v2_task_t *task, uint32_t input_addr,
uint32_t w_in, uint32_t w_in_stride,
uint8_t padding_top, uint8_t padding_left,
uint32_t output_addr, uint32_t weights_addr) {
uint32_t w_in, uint32_t w_in_stride,
uint8_t padding_top, uint8_t padding_left,
uint32_t output_addr,
uint32_t weights_addr) {
task->data.infeat_addr = neureka_v2_pad_addr(input_addr, w_in, w_in_stride,
padding_top, padding_left);
padding_top, padding_left);
task->data.outfeat_addr = output_addr;
if ((task->data.cfg.conf0 & NEUREKA_V2_MASK_FLAG_WEIGHT_SOURCE) ==
NEUREKA_V2_FLAG_WEIGHT_SOURCE_WMEM) {
//weights_addr -= 0x10400000;
// weights_addr -= 0x10400000;
} else {
weights_addr -= 0x10000000;
}
task->data.weights_addr = weights_addr;
}

void neureka_v2_task_set_addr_norm_quant(neureka_v2_task_t *task, uint32_t scale_addr,
uint32_t shift_addr, uint32_t bias_addr) {
void neureka_v2_task_set_addr_norm_quant(neureka_v2_task_t *task,
uint32_t scale_addr,
uint32_t shift_addr,
uint32_t bias_addr) {
task->data.scale_addr = scale_addr;
task->data.scale_shift_addr = shift_addr;
task->data.scale_bias_addr = bias_addr;
}

void neureka_v2_task_set_strides(neureka_v2_task_t *task, const uint32_t k_in,
const uint32_t h_in_stride,
const uint32_t w_in_stride,
const uint32_t h_out_stride,
const uint32_t w_out_stride) {
const uint32_t h_in_stride,
const uint32_t w_in_stride,
const uint32_t h_out_stride,
const uint32_t w_out_stride) {
const uint32_t num_k_in =
nnx_calculate_number_of_tiles(k_in, task->subtile_input_channel);

const neureka_v2_stride_t input_stride = {
.d0 = w_in_stride, .d1 = h_in_stride, .d2 = 0};
task->data.cfg.input_stride = input_stride;

const neureka_v2_stride_t output_stride = {.d0 = NEUREKA_V2_OUTPUT_BANDWIDTH_BYTES,
.d1 = w_out_stride,
.d2 = h_out_stride};
const neureka_v2_stride_t output_stride = {
.d0 = NEUREKA_V2_OUTPUT_BANDWIDTH_BYTES,
.d1 = w_out_stride,
.d2 = h_out_stride};
task->data.cfg.output_stride = output_stride;

task->data.cfg.weights_stride.d0 = NEUREKA_V2_WEIGHT_BANDWIDTH_BYTES;
Expand All @@ -201,10 +208,10 @@ void neureka_v2_task_set_strides(neureka_v2_task_t *task, const uint32_t k_in,
}

void neureka_v2_task_set_counters(neureka_v2_task_t *task, const uint32_t k_in,
const uint32_t h_out, const uint32_t w_out,
const uint32_t k_out,
const uint8_t padding_bottom,
const uint8_t padding_right) {
const uint32_t h_out, const uint32_t w_out,
const uint32_t k_out,
const uint8_t padding_bottom,
const uint8_t padding_right) {
const uint16_t num_Ko =
nnx_calculate_number_of_tiles(k_out, task->subtile_output_channel);
const uint16_t num_Ki =
Expand All @@ -222,10 +229,8 @@ void neureka_v2_task_set_counters(neureka_v2_task_t *task, const uint32_t k_in,
nnx_calculate_last_tile_size(h_out, NEUREKA_V2_SUBTILE_OUTPUT_HEIGHT);
const uint16_t rem_Wo =
nnx_calculate_last_tile_size(w_out, NEUREKA_V2_SUBTILE_OUTPUT_WIDTH);
const uint16_t rem_Hi =
(task->kernel_shape == 1 ? rem_Ho : rem_Ho + 2);
const uint16_t rem_Wi =
(task->kernel_shape == 1 ? rem_Wo : rem_Wo + 2);
const uint16_t rem_Hi = (task->kernel_shape == 1 ? rem_Ho : rem_Ho + 2);
const uint16_t rem_Wi = (task->kernel_shape == 1 ? rem_Wo : rem_Wo + 2);

const neureka_v2_subtile_t subtile = {
.number = {.KoKi = nnx_concat_half(num_Ko, num_Ki),
Expand All @@ -237,16 +242,16 @@ void neureka_v2_task_set_counters(neureka_v2_task_t *task, const uint32_t k_in,
}

void neureka_v2_task_set_padding(neureka_v2_task_t *task, const uint8_t top,
const uint8_t bottom, const uint8_t left,
const uint8_t right, const uint8_t value) {
const uint8_t bottom, const uint8_t left,
const uint8_t right, const uint8_t value) {
task->data.cfg.padding = ((top & 0xf) << 28) | ((right & 0xf) << 24) |
((bottom & 0xf) << 20) | ((left & 0xf) << 16) |
(value & 0xff);
}

void neureka_v2_task_set_mask_filter(neureka_v2_task_t *task, const uint8_t top,
const uint8_t bottom, const uint8_t left,
const uint8_t right) {
const uint8_t bottom, const uint8_t left,
const uint8_t right) {
task->data.cfg.filter_mask = ((top & 0xff) << 24) | ((right & 0xff) << 16) |
((bottom & 0xff) << 8) | ((left & 0xff) << 0);
}
Expand All @@ -258,10 +263,10 @@ void neureka_v2_task_set_dims(
const uint32_t h_out_stride, const uint32_t w_out_stride,
const uint8_t padding_top, const uint8_t padding_bottom,
const uint8_t padding_left, const uint8_t padding_right) {
neureka_v2_task_set_strides(task, k_in, h_in_stride, w_in_stride, h_out_stride,
w_out_stride);
neureka_v2_task_set_strides(task, k_in, h_in_stride, w_in_stride,
h_out_stride, w_out_stride);
neureka_v2_task_set_counters(task, k_in, h_out, w_out, k_out, padding_bottom,
padding_right);
padding_right);
neureka_v2_task_set_padding(task, padding_top, padding_bottom, padding_left,
padding_right, 0);
padding_right, 0);
}
66 changes: 35 additions & 31 deletions neureka_v2/hal/neureka_v2_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,58 +118,62 @@ typedef struct neureka_v2_task_t {

void neureka_v2_task_init(neureka_v2_task_t *task);
void neureka_v2_task_set_op_to_conv(neureka_v2_task_t *task,
const uint8_t kernel_shape,
const uint8_t depthwise);
const uint8_t kernel_shape,
const uint8_t depthwise);
void neureka_v2_task_set_bits(neureka_v2_task_t *task, const uint8_t input_bits,
const uint8_t output_bits,
const uint8_t weight_bits);
void neureka_v2_task_set_norm_quant(neureka_v2_task_t *task, neureka_v2_quant_t quant,
neureka_v2_norm_t norm);
void neureka_v2_task_set_weight_offset(neureka_v2_task_t *task, const int32_t weight_offset);
const uint8_t output_bits,
const uint8_t weight_bits);
void neureka_v2_task_set_norm_quant(neureka_v2_task_t *task,
neureka_v2_quant_t quant,
neureka_v2_norm_t norm);
void neureka_v2_task_set_weight_offset(neureka_v2_task_t *task,
const int32_t weight_offset);
void neureka_v2_task_set_activation_signed(neureka_v2_task_t *task);
void neureka_v2_task_set_activation_unsigned(neureka_v2_task_t *task);
void neureka_v2_task_set_outfeat_signed(neureka_v2_task_t *task);
void neureka_v2_task_set_outfeat_unsigned(neureka_v2_task_t *task);
void neureka_v2_task_set_streamin_signed(neureka_v2_task_t *task);
void neureka_v2_task_set_streamin_unsigned(neureka_v2_task_t *task);
void neureka_v2_task_set_streamin(neureka_v2_task_t *task);
void neureka_v2_task_set_weight_source(neureka_v2_task_t *task,
neureka_v2_weight_source_e weight_source);
void neureka_v2_task_set_weight_source(
neureka_v2_task_t *task, neureka_v2_weight_source_e weight_source);
uint32_t neureka_v2_get_tile_padding(uint32_t padding, uint32_t i_height,
uint32_t i_width, uint32_t n_height,
uint32_t n_width);
uint32_t i_width, uint32_t n_height,
uint32_t n_width);
uint32_t neureka_v2_pad_addr(uint32_t ptr, const uint32_t width,
const uint32_t width_stride,
const uint8_t padding_top,
const uint8_t padding_left);
const uint32_t width_stride,
const uint8_t padding_top,
const uint8_t padding_left);
void neureka_v2_task_set_addr_conv(neureka_v2_task_t *task, uint32_t input_addr,
uint32_t w_in, uint32_t w_in_stride,
uint8_t padding_top, uint8_t padding_left,
uint32_t output_addr, uint32_t weights_addr);
void neureka_v2_task_set_addr_norm_quant(neureka_v2_task_t *task, uint32_t scale_addr,
uint32_t shift_addr, uint32_t bias_addr);
uint32_t w_in, uint32_t w_in_stride,
uint8_t padding_top, uint8_t padding_left,
uint32_t output_addr, uint32_t weights_addr);
void neureka_v2_task_set_addr_norm_quant(neureka_v2_task_t *task,
uint32_t scale_addr,
uint32_t shift_addr,
uint32_t bias_addr);
/** neureka_v2_task_set_strides
*
* All the strides variables are strides between elements alongside that
* dimension and expressed in bytes. There is no stride variable for the channel
* dimension because the N-EUREKA requires the channels to be contiguous.
*/
void neureka_v2_task_set_strides(neureka_v2_task_t *task, const uint32_t k_in,
const uint32_t h_in_stride,
const uint32_t w_in_stride,
const uint32_t h_out_stride,
const uint32_t w_out_stride);
const uint32_t h_in_stride,
const uint32_t w_in_stride,
const uint32_t h_out_stride,
const uint32_t w_out_stride);
void neureka_v2_task_set_counters(neureka_v2_task_t *task, const uint32_t k_in,
const uint32_t h_out, const uint32_t w_out,
const uint32_t k_out,
const uint8_t padding_bottom,
const uint8_t padding_right);
const uint32_t h_out, const uint32_t w_out,
const uint32_t k_out,
const uint8_t padding_bottom,
const uint8_t padding_right);
void neureka_v2_task_set_padding(neureka_v2_task_t *task, const uint8_t top,
const uint8_t bottom, const uint8_t left,
const uint8_t right, const uint8_t value);
const uint8_t bottom, const uint8_t left,
const uint8_t right, const uint8_t value);
void neureka_v2_task_set_mask_filter(neureka_v2_task_t *task, const uint8_t top,
const uint8_t bottom, const uint8_t left,
const uint8_t right);
const uint8_t bottom, const uint8_t left,
const uint8_t right);
/** neureka_v2_task_set_dims
*
* All the strides variables are strides between elements alongside that
Expand Down
Loading

0 comments on commit 06e0384

Please sign in to comment.