Add int8 conv kernel variant for xtensa (#169)

* Add int8 conv kernel variant for fusion F1. Person detection benchmark binary size before: text data bss dec 138288 337488 141752 617528 After: text data bss dec 100040 336160 141752 577952 * Re-enable conv2d variant in person detection benchmark. * Address comments. -Change Register_CONV2D_INT8 to Register_CONV_2D_INT8REF -Remove int8-specific prepare. -Rename conv_int8.cc to conv_int8_reference.cc
tensorflow · Jun 16, 2021 · 36c507e · 36c507e
1 parent 6c41d56
commit 36c507e
Show file tree

Hide file tree

Showing 10 changed files with 132 additions and 26 deletions.
diff --git a/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc b/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc
@@ -14,14 +14,16 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/micro/all_ops_resolver.h"
 #include "tensorflow/lite/micro/benchmarks/micro_benchmark.h"
 #include "tensorflow/lite/micro/examples/person_detection/model_settings.h"
 #include "tensorflow/lite/micro/examples/person_detection/no_person_image_data.h"
 #include "tensorflow/lite/micro/examples/person_detection/person_detect_model_data.h"
 #include "tensorflow/lite/micro/examples/person_detection/person_image_data.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
+#include "tensorflow/lite/micro/kernels/fully_connected.h"
 #include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/micro_interpreter.h"
+#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
 #include "tensorflow/lite/micro/micro_utils.h"
 #include "tensorflow/lite/micro/system_setup.h"
 #include "tensorflow/lite/schema/schema_generated.h"
@@ -34,7 +36,7 @@ limitations under the License.
 
 namespace tflite {
 
-using PersonDetectionOpResolver = tflite::AllOpsResolver;
+using PersonDetectionOpResolver = MicroMutableOpResolver<6>;
 using PersonDetectionBenchmarkRunner = MicroBenchmarkRunner<int8_t>;
 
 // Create an area of memory to use for input, output, and intermediate arrays.
@@ -52,10 +54,17 @@ PersonDetectionBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
   // We allocate PersonDetectionOpResolver from a global buffer
   // because the object's lifetime must exceed that of the
   // PersonDetectionBenchmarkRunner object.
-  return new (benchmark_runner_buffer) PersonDetectionBenchmarkRunner(
-      g_person_detect_model_data,
-      new (op_resolver_buffer) PersonDetectionOpResolver(), tensor_arena,
-      kTensorArenaSize, profiler);
+  PersonDetectionOpResolver* op_resolver =
+      new (op_resolver_buffer) PersonDetectionOpResolver();
+  op_resolver->AddFullyConnected(tflite::Register_FULLY_CONNECTED_INT8());
+  op_resolver->AddConv2D(tflite::Register_CONV_2D_INT8REF());
+  op_resolver->AddDepthwiseConv2D();
+  op_resolver->AddSoftmax();
+  op_resolver->AddAveragePool2D();
+  op_resolver->AddReshape();
+  return new (benchmark_runner_buffer)
+      PersonDetectionBenchmarkRunner(g_person_detect_model_data, op_resolver,
+                                     tensor_arena, kTensorArenaSize, profiler);
 }
 
 void PersonDetectionNIerations(const int8_t* input, int iterations,

diff --git a/tensorflow/lite/micro/kernels/conv.h b/tensorflow/lite/micro/kernels/conv.h
@@ -72,6 +72,21 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,
 
 TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);
 
+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_CONV_2D();
+
+#if defined(XTENSA)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 inputs and outputs.
+TfLiteRegistration Register_CONV_2D_INT8REF();
+#else
+inline TfLiteRegistration Register_CONV_2D_INT8REF() {
+  return Register_CONV_2D();
+}
+#endif
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
diff --git a/tensorflow/lite/micro/kernels/conv_test.h b/tensorflow/lite/micro/kernels/conv_test.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
-#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
 
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
@@ -89,4 +89,4 @@ TfLiteStatus TestConvQuantizedPerChannel(
 }  // namespace testing
 }  // namespace tflite
 
-#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
diff --git a/tensorflow/lite/micro/kernels/micro_ops.h b/tensorflow/lite/micro/kernels/micro_ops.h
@@ -34,7 +34,6 @@ namespace tflite {
 TfLiteRegistration Register_ADD_N();
 TfLiteRegistration Register_BATCH_TO_SPACE_ND();
 TfLiteRegistration Register_CAST();
-TfLiteRegistration Register_CONV_2D();
 TfLiteRegistration Register_CUMSUM();
 TfLiteRegistration Register_DEPTH_TO_SPACE();
 TfLiteRegistration Register_DEPTHWISE_CONV_2D();

diff --git a/tensorflow/lite/micro/kernels/xtensa/conv.cc b/tensorflow/lite/micro/kernels/xtensa/conv.cc
@@ -106,18 +106,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 #elif defined(FUSION_F1) || defined(HIFI5)
       ConvEvalHifi(context, node, params, op_data, input, filter, bias, output);
 #else
-      reference_integer_ops::ConvPerChannel(
-          ConvParamsQuantized(params, op_data.reference_op_data),
-          op_data.reference_op_data.per_channel_output_multiplier,
-          op_data.reference_op_data.per_channel_output_shift,
-          tflite::micro::GetTensorShape(input),
-          tflite::micro::GetTensorData<int8_t>(input),
-          tflite::micro::GetTensorShape(filter),
-          tflite::micro::GetTensorData<int8_t>(filter),
-          tflite::micro::GetTensorShape(bias),
-          tflite::micro::GetTensorData<int32_t>(bias),
-          tflite::micro::GetTensorShape(output),
-          tflite::micro::GetTensorData<int8_t>(output));
+      return ConvReferenceEvalInt8(context, node);
 #endif
       break;
     }

diff --git a/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc b/tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc
@@ -42,6 +42,11 @@ TfLiteStatus ConvPrepareHifi(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
   TF_LITE_ENSURE(context, filter != nullptr);
 
+  if (input->type == kTfLiteInt8 && output->type == kTfLiteInt8 &&
+      filter->type == kTfLiteInt8) {
+    return ConvReferencePrepareInt8(context, node);
+  }
+
   const RuntimeShape& input_shape = GetTensorShape(input);
   const RuntimeShape& filter_shape = GetTensorShape(filter);
   const RuntimeShape& output_shape = GetTensorShape(output);

diff --git a/tensorflow/lite/micro/kernels/xtensa/conv_int8_reference.cc b/tensorflow/lite/micro/kernels/xtensa/conv_int8_reference.cc
@@ -0,0 +1,83 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+
+namespace tflite {
+namespace {
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
+}
+
+}  // namespace.
+
+TfLiteStatus ConvReferenceEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  TFLITE_DCHECK(node->builtin_data != nullptr);
+  const auto& params =
+      *(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
+  const auto& op_data = *(reinterpret_cast<OpDataConv*>(node->user_data));
+
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kConvInputTensor);
+  const TfLiteEvalTensor* filter =
+      tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
+  const TfLiteEvalTensor* bias =
+      (NumInputs(node) == 3)
+          ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
+          : nullptr;
+
+  reference_integer_ops::ConvPerChannel(
+      ConvParamsQuantized(params, op_data),
+      op_data.per_channel_output_multiplier, op_data.per_channel_output_shift,
+      tflite::micro::GetTensorShape(input),
+      tflite::micro::GetTensorData<int8_t>(input),
+      tflite::micro::GetTensorShape(filter),
+      tflite::micro::GetTensorData<int8_t>(filter),
+      tflite::micro::GetTensorShape(bias),
+      tflite::micro::GetTensorData<int32_t>(bias),
+      tflite::micro::GetTensorShape(output),
+      tflite::micro::GetTensorData<int8_t>(output));
+  return kTfLiteOk;
+}
+
+// TODO(b/189981943): This variant can be used for a smaller binary
+// since the optimized conv implementation currently adds a lot to
+// the binary size (~30KB to text section).
+TfLiteRegistration Register_CONV_2D_INT8REF() {
+  return {/*init=*/Init,
+          /*free=*/nullptr,
+          /*prepare=*/ConvPrepare,
+          /*invoke=*/ConvReferenceEvalInt8,
+          /*profiling_string=*/nullptr,
+          /*builtin_code=*/0,
+          /*custom_name=*/nullptr,
+          /*version=*/0};
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h b/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h
@@ -59,6 +59,9 @@ TfLiteStatus ConvEvalHifi(TfLiteContext* context, TfLiteNode* node,
                           const TfLiteEvalTensor* filter,
                           const TfLiteEvalTensor* bias,
                           TfLiteEvalTensor* output);
+TfLiteStatus ConvReferencePrepareInt8(TfLiteContext* context, TfLiteNode* node);
+
+TfLiteStatus ConvReferenceEvalInt8(TfLiteContext* context, TfLiteNode* node);
 #endif
 
 }  // namespace tflite

diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/compatibility.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
 #include "tensorflow/lite/micro/kernels/ethosu.h"
 #include "tensorflow/lite/micro/kernels/fully_connected.h"
 #include "tensorflow/lite/micro/kernels/micro_ops.h"
@@ -169,8 +170,9 @@ class MicroMutableOpResolver : public MicroOpResolver {
                       ParseConcatenation);
   }
 
-  TfLiteStatus AddConv2D() {
-    return AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), ParseConv2D);
+  TfLiteStatus AddConv2D(
+      const TfLiteRegistration& registration = Register_CONV_2D()) {
+    return AddBuiltin(BuiltinOperator_CONV_2D, registration, ParseConv2D);
   }
 
   TfLiteStatus AddCos() {

diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc b/tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
@@ -1,9 +1,10 @@
 # Explicitly add kernel sources specific to the Xtensa optimized
 # implementations.
 MICROLITE_CC_KERNEL_SRCS += \
-  tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc \
   tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc \
-  tensorflow/lite/micro/kernels/xtensa/conv_hifimini.cc
+  tensorflow/lite/micro/kernels/xtensa/conv_hifimini.cc \
+  tensorflow/lite/micro/kernels/xtensa/conv_int8_reference.cc \
+  tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc
 
 ifeq ($(TARGET_ARCH), $(findstring $(TARGET_ARCH), "hifi5"))