Add HIFI3 defines alongside HIFI4 uses

TFLM has historically used a HIFI4 define for both HiFi3/3z and HiFi4 cores. That was recently split to use a separate define for HiFi3/3z to include the different NDSP libraries. This commit updates the PR to use this new convention within the xtensa kernels that were changed.
tensorflow · Dec 5, 2023 · 0f210d0 · 0f210d0
1 parent 3274767
commit 0f210d0
Show file tree

Hide file tree

Showing 4 changed files with 27 additions and 27 deletions.
diff --git a/tensorflow/lite/micro/kernels/xtensa/lstm_eval.cc b/tensorflow/lite/micro/kernels/xtensa/lstm_eval.cc
@@ -105,14 +105,14 @@ TfLiteStatus LstmTensors::ValidateTensorStatus(TfLiteContext* context) const {
 
 namespace lstm_internal {
 
-#if !(defined(HIFI5) || defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 const int32_t kInt16Max = std::numeric_limits<int16_t>::max();
 const int32_t kInt16Min = std::numeric_limits<int16_t>::min();
 #endif
 
 void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
                     int n_input, int16_t* output) {
-#if !(defined(HIFI5) || defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
   for (int batch = 0; batch < n_batch; ++batch) {
     for (int i = 0; i < n_input; ++i) {
       const int index = batch * n_input + i;
@@ -137,7 +137,7 @@ void AddElementWise(const float* input_1, const float* input_2, int n_batch,
   }
 }
 
-#if !(defined(HIFI5) || defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 void Sigmoid(const RuntimeShape& data_shape, int16_t* data) {
   reference_integer_ops::Logistic(
       0 /*data->input_multiplier*/, 0 /*data->input_left_shift */,
@@ -225,7 +225,7 @@ void FullyConnected(const FullyConnectedParams& params,
       params, input_shape, input_data, filter_shape, filter_data, bias_shape,
       bias_data, output_shape, output_data);
 }
-#else  // #if !(defined(HIFI5) || defined(HIFI4))
+#else  // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 void Sigmoid(int16_t* data, int32_t data_size) {
   WORD32 err;
   err = xa_nn_vec_sigmoid_sym16s_sym16s(data, data, 0, 0, data_size);
@@ -341,7 +341,7 @@ void FullyConnected(const FullyConnectedParams& params, const float* input_data,
       params, input_shape, input_data, filter_shape, filter_data, bias_shape,
       bias_data, output_shape, output_data);
 }
-#endif  // #if !(defined(HIFI5) || defined(HIFI4))
+#endif  // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 
 void Clipping(const int v_size, const CellStateInfo& cell_state_info,
               int16_t* vector) {
@@ -360,7 +360,7 @@ void Clipping(const int v_size, const CellStateInfo& cell_state_info,
   }
 }
 
-#if defined(HIFI5) || defined(HIFI4)
+#if define(HIFI3) || defined(HIFI4) || defined(HIFI5)
 void UpdateLstmCell(const LstmStepManager& step_info,
                     TfLiteEvalTensor* cell_state,
                     // Gate outputs
@@ -428,7 +428,7 @@ void UpdateLstmCell(const LstmStepManager& step_info,
                  step_info.CellStateOffset());
   }
 }
-#endif  // #if defined(HIFI5) || defined(HIFI4)
+#endif  // #if define(HIFI3) || defined(HIFI4) || defined(HIFI5)
 
 // Increment the data offset so the sigle time step invocation call can access
 // the corresponding input/output tensor data at the time step

diff --git a/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h b/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h
@@ -158,7 +158,7 @@ LSTMBuffers<CellType> CreateLSTMBuffers(TfLiteContext* context,
 // namespace to expose them for testing
 namespace lstm_internal {
 
-#if !(defined(HIFI5) || defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 void Sigmoid(const RuntimeShape& data_shape, int16_t* data);
 
 void Sigmoid(const RuntimeShape& data_shape, float* data);
@@ -200,7 +200,7 @@ void FullyConnected(const FullyConnectedParams& params,
                     const RuntimeShape& filter_shape, const float* filter_data,
                     const RuntimeShape& bias_shape, const float* bias_data,
                     const RuntimeShape& output_shape, float* output_data);
-#else   // #if !(defined(HIFI5) || defined(HIFI4))
+#else   // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 void Sigmoid(int16_t* data, int32_t data_size);
 
 void Sigmoid(float* data, int32_t data_size);
@@ -236,7 +236,7 @@ void FullyConnected(const FullyConnectedParams& params, const float* input_data,
                     const float* filter_data, const float* bias_data,
                     float* output_data, const int num_batches,
                     const int output_depth, const int accum_depth);
-#endif  // #if !(defined(HIFI5) || defined(HIFI4))
+#endif  // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 
 void AddElementWise(const int16_t* input_1, const int16_t* input_2, int n_batch,
                     int n_input, int16_t* output);
@@ -272,7 +272,7 @@ class LstmStepManager {
   int OutputOffset() const { return output_offset_; }
   int HiddenStateOffset() const { return hidden_state_offset_; }
   int CellStateOffset() const { return cell_state_offset_; }
-#if defined(HIFI5) || defined(HIFI4)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   int time_major() const { return size_info_.time_major; }
 
   int batch_size() const { return size_info_.batch_size; }
@@ -298,7 +298,7 @@ class LstmStepManager {
 // Implements the following formula:
 //   gate = activate(FC(input) + FC(recurrent))
 // Activation is sigmoid except for the "cell" gate (configurable, usually tanh)
-#if !(defined(HIFI5) || defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 template <typename ActivationType, typename WeightType, typename CellType,
           typename BiasType>
 void CalculateLstmGate(
@@ -406,7 +406,7 @@ void UpdateLstmCell(const LstmStepManager& step_info,
                  step_info.CellStateOffset());
   }
 }
-#else   // #if !defined(HIFI5) || defined(HIFI4)
+#else   // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 template <typename ActivationType, typename WeightType, typename CellType,
           typename BiasType>
 void CalculateLstmGate(
@@ -496,7 +496,7 @@ void UpdateLstmCell(const LstmStepManager& step_info,
                     const ArithmeticParams& forget_cell_mul_params,
                     const ArithmeticParams& input_mul_params,
                     const CellStateInfo& cell_state_info, float* buffer);
-#endif  // #if defined(HIFI5) || defined(HIFI4)
+#endif  // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 
 // Update the hidden state of the LSTM kernel using the following formula:
 // updated_hidden_state = Tanh(updated_cell_state) * output_gate_output, * means
@@ -521,7 +521,7 @@ void UpdateLstmHidden(const LstmStepManager& step_info,
       tflite::micro::GetTensorData<CellType>(cell_state) +
       step_info.CellStateOffset();
   // Tanh(cell_state)
-#if !(defined(HIFI5) || defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
   Tanh(cell_state_scale_power, cell_state_shape, cell_state_data,
        cell_state_shape, buffer);
   // Update the hidden state
@@ -539,7 +539,7 @@ void UpdateLstmHidden(const LstmStepManager& step_info,
 #endif
 }
 
-#if !(defined(HIFI5) || defined(HIFI4))
+#if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 template <typename ActivationType, typename WeightType, typename CellType,
           typename BiasType>
 void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
@@ -651,7 +651,7 @@ void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
                   step_info.HiddenStateOffset(),
               step_info.StateShape().FlatSize() * sizeof(ActivationType));
 }
-#else   // #if !(defined(HIFI5) || defined(HIFI4))
+#else   // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 template <typename ActivationType, typename WeightType, typename CellType,
           typename BiasType>
 void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
@@ -782,7 +782,7 @@ void LstmStep(const LstmStepManager& step_info, const OpDataLSTM& op_data,
                   step_info.HiddenStateOffset(),
               step_info.StateShape().FlatSize() * sizeof(ActivationType));
 }
-#endif  // #if !(defined(HIFI5) || defined(HIFI4))
+#endif  // #if !(defined(HIFI3) || defined(HIFI4) || defined(HIFI5))
 
 }  // namespace lstm_internal
 

diff --git a/tensorflow/lite/micro/kernels/xtensa/lstm_eval_hifi.cc b/tensorflow/lite/micro/kernels/xtensa/lstm_eval_hifi.cc
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
 
 #include <xtensa/tie/xt_hifi2.h>
 
@@ -498,7 +498,7 @@ void xa_nn_elm_mul_16x16_asym8s(int8_t* output, const int16_t* input_1,
     AE_S8_0_IP(data_c, (ae_int8*)output, 1);
   }
 }
-#elif defined(HIFI4)
+#elif defined(HIFI3) || defined(HIFI4)
 #if TFLITE_SINGLE_ROUNDING
 #define MPY_BY_QUANT_MULT_X2_OUT32(out, inp, multiplier, l_shift, r_shift) \
   {                                                                        \
@@ -1018,4 +1018,4 @@ void xa_nn_elm_mul_16x16_asym8s(int8_t* output, const int16_t* input_1,
 
 }  // namespace tflite
 
-#endif  // defined(HIFI4) || defined(HIFI5)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
diff --git a/tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc b/tensorflow/lite/micro/kernels/xtensa/transpose_conv.cc
@@ -183,7 +183,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Quantized kernels use an int32 scratch buffer.
   if (input->type == kTfLiteInt8) {
     TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
     const int stride_width = params->stride_width;
     const int stride_height = params->stride_height;
 
@@ -200,7 +200,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TFLITE_DCHECK(context->RequestScratchBufferInArena(
                       context, scratch_buffer_size,
                       &(data->scratch_buffer_index)) == kTfLiteOk);
-#else  // #if defined(HIFI4) || defined(HIFI5)
+#else  // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
     TFLITE_DCHECK(context->RequestScratchBufferInArena(
                       context,
                       GetTensorShape(output).FlatSize() * sizeof(int32_t),
@@ -211,7 +211,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Quantized 16x8 kernels use an int64 scratch buffer.
   if (input->type == kTfLiteInt16) {
     TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr);
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
     const int stride_width = params->stride_width;
     const int stride_height = params->stride_height;
 
@@ -228,12 +228,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TFLITE_DCHECK(context->RequestScratchBufferInArena(
                       context, scratch_buffer_size,
                       &(data->scratch_buffer_index)) == kTfLiteOk);
-#else   // #if defined(HIFI4) || defined(HIFI5)
+#else   // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
     TFLITE_DCHECK(context->RequestScratchBufferInArena(
                       context,
                       GetTensorShape(output).FlatSize() * sizeof(std::int64_t),
                       &(data->scratch_buffer_index)) == kTfLiteOk);
-#endif  // #if defined(HIFI4) || defined(HIFI5)
+#endif  // #if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
   }
 
   // All per-channel quantized tensors need valid zero point and scale arrays.
@@ -320,7 +320,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteInt8: {
       int32_t* scratch_buffer = static_cast<int32_t*>(
           context->GetScratchBuffer(context, data.scratch_buffer_index));
-#if defined(HIFI4) || defined(HIFI5)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
       if (bias->type == kTfLiteInt32) {
         const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
         const RuntimeShape& filter_shape =