Skip to content

Commit

Permalink
Add HIFI3 defines alongside HIFI4 uses
Browse files Browse the repository at this point in the history
TFLM has historically used a HIFI4 define for both HiFi3/3z and HiFi4
cores. That was recently split to use a separate define for HiFi3/3z to
include the different NDSP libraries. This commit updates the PR to use
this new convention within the xtensa kernels that were changed.
  • Loading branch information
rascani committed Dec 5, 2023
1 parent 3274767 commit 0f210d0
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 27 deletions.
14 changes: 7 additions & 7 deletions tensorflow/lite/micro/kernels/xtensa/lstm_eval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,14 @@ TfLiteStatus LstmTensors::ValidateTensorStatus(TfLiteContext* context) const {

namespace lstm_internal {

#if !(defined(HIFI5) || defined(HIFI4))
#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
#endif

void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int16_t* output) {
#if !(defined(HIFI5) || defined(HIFI4))
#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
for (int batch = 0; batch < n_batch; ++batch) {
for (int i = 0; i < n_input; ++i) {
const int index = batch * n_input + i;
Expand All @@ -137,7 +137,7 @@ void AddElementWise(const float* input_1, const float* input_2, int n_batch,
}
}

#if !(defined(HIFI5) || defined(HIFI4))
#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
void Sigmoid(const RuntimeShape& data_shape, int16_t* data) {
reference_integer_ops::Logistic(
0 /*data->input_multiplier*/, 0 /*data->input_left_shift */,
Expand Down Expand Up @@ -225,7 +225,7 @@ void FullyConnected(const FullyConnectedParams& params,
params, input_shape, input_data, filter_shape, filter_data, bias_shape,
bias_data, output_shape, output_data);
}
#else // #if !(defined(HIFI5) || defined(HIFI4))
#else // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
void Sigmoid(int16_t* data, int32_t data_size) {
WORD32 err;
err = xa_nn_vec_sigmoid_sym16s_sym16s(data, data, 0, 0, data_size);
Expand Down Expand Up @@ -341,7 +341,7 @@ void FullyConnected(const FullyConnectedParams& params, const float* input_data,
params, input_shape, input_data, filter_shape, filter_data, bias_shape,
bias_data, output_shape, output_data);
}
#endif // #if !(defined(HIFI5) || defined(HIFI4))
#endif // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))

void Clipping(const int v_size, const CellStateInfo& cell_state_info,
int16_t* vector) {
Expand All @@ -360,7 +360,7 @@ void Clipping(const int v_size, const CellStateInfo& cell_state_info,
}
}

#if defined(HIFI5) || defined(HIFI4)
#if define(HIFI3) || defined(HIFI4) || defined(HIFI5)
void UpdateLstmCell(const LstmStepManager& step_info,
TfLiteEvalTensor* cell_state,
// Gate outputs
Expand Down Expand Up @@ -428,7 +428,7 @@ void UpdateLstmCell(const LstmStepManager& step_info,
step_info.CellStateOffset());
}
}
#endif // #if defined(HIFI5) || defined(HIFI4)
#endif // #if define(HIFI3) || defined(HIFI4) || defined(HIFI5)

// Increment the data offset so the sigle time step invocation call can access
// the corresponding input/output tensor data at the time step
Expand Down
22 changes: 11 additions & 11 deletions tensorflow/lite/micro/kernels/xtensa/lstm_eval.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ LSTMBuffers<CellType> CreateLSTMBuffers(TfLiteContext* context,
// namespace to expose them for testing
namespace lstm_internal {

#if !(defined(HIFI5) || defined(HIFI4))
#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
void Sigmoid(const RuntimeShape& data_shape, int16_t* data);

void Sigmoid(const RuntimeShape& data_shape, float* data);
Expand Down Expand Up @@ -200,7 +200,7 @@ void FullyConnected(const FullyConnectedParams& params,
const RuntimeShape& filter_shape, const float* filter_data,
const RuntimeShape& bias_shape, const float* bias_data,
const RuntimeShape& output_shape, float* output_data);
#else // #if !(defined(HIFI5) || defined(HIFI4))
#else // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
void Sigmoid(int16_t* data, int32_t data_size);

void Sigmoid(float* data, int32_t data_size);
Expand Down Expand Up @@ -236,7 +236,7 @@ void FullyConnected(const FullyConnectedParams& params, const float* input_data,
const float* filter_data, const float* bias_data,
float* output_data, const int num_batches,
const int output_depth, const int accum_depth);
#endif // #if !(defined(HIFI5) || defined(HIFI4))
#endif // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))

void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
int n_input, int16_t* output);
Expand Down Expand Up @@ -272,7 +272,7 @@ class LstmStepManager {
int OutputOffset() const { return output_offset_; }
int HiddenStateOffset() const { return hidden_state_offset_; }
int CellStateOffset() const { return cell_state_offset_; }
#if defined(HIFI5) || defined(HIFI4)
#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
int time_major() const { return size_info_.time_major; }

int batch_size() const { return size_info_.batch_size; }
Expand All @@ -298,7 +298,7 @@ class LstmStepManager {
// Implements the following formula:
// gate = activate(FC(input) + FC(recurrent))
// Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
#if !(defined(HIFI5) || defined(HIFI4))
#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
template <typename ActivationType, typename WeightType, typename CellType,
typename BiasType>
void CalculateLstmGate(
Expand Down Expand Up @@ -406,7 +406,7 @@ void UpdateLstmCell(const LstmStepManager& step_info,
step_info.CellStateOffset());
}
}
#else // #if !defined(HIFI5) || defined(HIFI4)
#else // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
template <typename ActivationType, typename WeightType, typename CellType,
typename BiasType>
void CalculateLstmGate(
Expand Down Expand Up @@ -496,7 +496,7 @@ void UpdateLstmCell(const LstmStepManager& step_info,
const ArithmeticParams& forget_cell_mul_params,
const ArithmeticParams& input_mul_params,
const CellStateInfo& cell_state_info, float* buffer);
#endif // #if defined(HIFI5) || defined(HIFI4)
#endif // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))

// Update the hidden state of the LSTM kernel using the following formula:
// updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means
Expand All @@ -521,7 +521,7 @@ void UpdateLstmHidden(const LstmStepManager& step_info,
tflite::micro::GetTensorData<CellType>(cell_state) +
step_info.CellStateOffset();
// Tanh(cell_state)
#if !(defined(HIFI5) || defined(HIFI4))
#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
Tanh(cell_state_scale_power, cell_state_shape, cell_state_data,
cell_state_shape, buffer);
// Update the hidden state
Expand All @@ -539,7 +539,7 @@ void UpdateLstmHidden(const LstmStepManager& step_info,
#endif
}

#if !(defined(HIFI5) || defined(HIFI4))
#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
template <typename ActivationType, typename WeightType, typename CellType,
typename BiasType>
void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
Expand Down Expand Up @@ -651,7 +651,7 @@ void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
step_info.HiddenStateOffset(),
step_info.StateShape().FlatSize() * sizeof(ActivationType));
}
#else // #if !(defined(HIFI5) || defined(HIFI4))
#else // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
template <typename ActivationType, typename WeightType, typename CellType,
typename BiasType>
void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
Expand Down Expand Up @@ -782,7 +782,7 @@ void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
step_info.HiddenStateOffset(),
step_info.StateShape().FlatSize() * sizeof(ActivationType));
}
#endif // #if !(defined(HIFI5) || defined(HIFI4))
#endif // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))

} // namespace lstm_internal

Expand Down
6 changes: 3 additions & 3 deletions tensorflow/lite/micro/kernels/xtensa/lstm_eval_hifi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if defined(HIFI4) || defined(HIFI5)
#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)

#include <xtensa/tie/xt_hifi2.h>

Expand Down Expand Up @@ -498,7 +498,7 @@ void xa_nn_elm_mul_16x16_asym8s(int8_t* output, const int16_t* input_1,
AE_S8_0_IP(data_c, (ae_int8*)output, 1);
}
}
#elif defined(HIFI4)
#elif defined(HIFI3) || defined(HIFI4)
#if TFLITE_SINGLE_ROUNDING
#define MPY_BY_QUANT_MULT_X2_OUT32(out, inp, multiplier, l_shift, r_shift) \
{ \
Expand Down Expand Up @@ -1018,4 +1018,4 @@ void xa_nn_elm_mul_16x16_asym8s(int8_t* output, const int16_t* input_1,

} // namespace tflite

#endif // defined(HIFI4) || defined(HIFI5)
#endif // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
12 changes: 6 additions & 6 deletions tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Quantized kernels use an int32 scratch buffer.
if (input->type == kTfLiteInt8) {
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
#if defined(HIFI4) || defined(HIFI5)
#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
const int stride_width = params->stride_width;
const int stride_height = params->stride_height;

Expand All @@ -200,7 +200,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(context->RequestScratchBufferInArena(
context, scratch_buffer_size,
&(data->scratch_buffer_index)) == kTfLiteOk);
#else // #if defined(HIFI4) || defined(HIFI5)
#else // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
TFLITE_DCHECK(context->RequestScratchBufferInArena(
context,
GetTensorShape(output).FlatSize() * sizeof(int32_t),
Expand All @@ -211,7 +211,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
// Quantized 16x8 kernels use an int64 scratch buffer.
if (input->type == kTfLiteInt16) {
TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
#if defined(HIFI4) || defined(HIFI5)
#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
const int stride_width = params->stride_width;
const int stride_height = params->stride_height;

Expand All @@ -228,12 +228,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(context->RequestScratchBufferInArena(
context, scratch_buffer_size,
&(data->scratch_buffer_index)) == kTfLiteOk);
#else // #if defined(HIFI4) || defined(HIFI5)
#else // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
TFLITE_DCHECK(context->RequestScratchBufferInArena(
context,
GetTensorShape(output).FlatSize() * sizeof(std::int64_t),
&(data->scratch_buffer_index)) == kTfLiteOk);
#endif // #if defined(HIFI4) || defined(HIFI5)
#endif // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
}

// All per-channel quantized tensors need valid zero point and scale arrays.
Expand Down Expand Up @@ -320,7 +320,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
case kTfLiteInt8: {
int32_t* scratch_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, data.scratch_buffer_index));
#if defined(HIFI4) || defined(HIFI5)
#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
if (bias->type == kTfLiteInt32) {
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
const RuntimeShape& filter_shape =
Expand Down

0 comments on commit 0f210d0

Please sign in to comment.