Skip to content

Commit

Permalink
Add int8 conv kernel variant for xtensa (#169)
Browse files Browse the repository at this point in the history
* Add int8 conv kernel variant for fusion F1.

Person detection benchmark binary size before:
   text	   data	    bss	    dec
 138288	 337488	 141752	 617528

After:
   text	   data	    bss	    dec
 100040	 336160	 141752	 577952

* Re-enable conv2d variant in person detection benchmark.

* Address comments.

-Change Register_CONV2D_INT8 to Register_CONV_2D_INT8REF
-Remove int8-specific prepare.
-Rename conv_int8.cc to conv_int8_reference.cc
  • Loading branch information
njeffrie authored Jun 16, 2021
1 parent 6c41d56 commit 36c507e
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 26 deletions.
21 changes: 15 additions & 6 deletions tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ limitations under the License.
==============================================================================*/

#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/micro/all_ops_resolver.h"
#include "tensorflow/lite/micro/benchmarks/micro_benchmark.h"
#include "tensorflow/lite/micro/examples/person_detection/model_settings.h"
#include "tensorflow/lite/micro/examples/person_detection/no_person_image_data.h"
#include "tensorflow/lite/micro/examples/person_detection/person_detect_model_data.h"
#include "tensorflow/lite/micro/examples/person_detection/person_image_data.h"
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/micro/micro_utils.h"
#include "tensorflow/lite/micro/system_setup.h"
#include "tensorflow/lite/schema/schema_generated.h"
Expand All @@ -34,7 +36,7 @@ limitations under the License.

namespace tflite {

using PersonDetectionOpResolver = tflite::AllOpsResolver;
using PersonDetectionOpResolver = MicroMutableOpResolver<6>;
using PersonDetectionBenchmarkRunner = MicroBenchmarkRunner<int8_t>;

// Create an area of memory to use for input, output, and intermediate arrays.
Expand All @@ -52,10 +54,17 @@ PersonDetectionBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
// We allocate PersonDetectionOpResolver from a global buffer
// because the object's lifetime must exceed that of the
// PersonDetectionBenchmarkRunner object.
return new (benchmark_runner_buffer) PersonDetectionBenchmarkRunner(
g_person_detect_model_data,
new (op_resolver_buffer) PersonDetectionOpResolver(), tensor_arena,
kTensorArenaSize, profiler);
PersonDetectionOpResolver* op_resolver =
new (op_resolver_buffer) PersonDetectionOpResolver();
op_resolver->AddFullyConnected(tflite::Register_FULLY_CONNECTED_INT8());
op_resolver->AddConv2D(tflite::Register_CONV_2D_INT8REF());
op_resolver->AddDepthwiseConv2D();
op_resolver->AddSoftmax();
op_resolver->AddAveragePool2D();
op_resolver->AddReshape();
return new (benchmark_runner_buffer)
PersonDetectionBenchmarkRunner(g_person_detect_model_data, op_resolver,
tensor_arena, kTensorArenaSize, profiler);
}

void PersonDetectionNIerations(const int8_t* input, int iterations,
Expand Down
15 changes: 15 additions & 0 deletions tensorflow/lite/micro/kernels/conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ TfLiteStatus CalculateOpDataConv(TfLiteContext* context, TfLiteNode* node,

TfLiteStatus ConvPrepare(TfLiteContext* context, TfLiteNode* node);

// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_CONV_2D();

#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 inputs and outputs.
TfLiteRegistration Register_CONV_2D_INT8REF();
#else
inline TfLiteRegistration Register_CONV_2D_INT8REF() {
return Register_CONV_2D();
}
#endif

} // namespace tflite

#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
6 changes: 3 additions & 3 deletions tensorflow/lite/micro/kernels/conv_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_

#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
Expand Down Expand Up @@ -89,4 +89,4 @@ TfLiteStatus TestConvQuantizedPerChannel(
} // namespace testing
} // namespace tflite

#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_H_
#endif // TENSORFLOW_LITE_MICRO_KERNELS_CONV_TEST_H_
1 change: 0 additions & 1 deletion tensorflow/lite/micro/kernels/micro_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ namespace tflite {
TfLiteRegistration Register_ADD_N();
TfLiteRegistration Register_BATCH_TO_SPACE_ND();
TfLiteRegistration Register_CAST();
TfLiteRegistration Register_CONV_2D();
TfLiteRegistration Register_CUMSUM();
TfLiteRegistration Register_DEPTH_TO_SPACE();
TfLiteRegistration Register_DEPTHWISE_CONV_2D();
Expand Down
13 changes: 1 addition & 12 deletions tensorflow/lite/micro/kernels/xtensa/conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
#elif defined(FUSION_F1) || defined(HIFI5)
ConvEvalHifi(context, node, params, op_data, input, filter, bias, output);
#else
reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, op_data.reference_op_data),
op_data.reference_op_data.per_channel_output_multiplier,
op_data.reference_op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return ConvReferenceEvalInt8(context, node);
#endif
break;
}
Expand Down
5 changes: 5 additions & 0 deletions tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ TfLiteStatus ConvPrepareHifi(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* filter = GetInput(context, node, kConvWeightsTensor);
TF_LITE_ENSURE(context, filter != nullptr);

if (input->type == kTfLiteInt8 && output->type == kTfLiteInt8 &&
filter->type == kTfLiteInt8) {
return ConvReferencePrepareInt8(context, node);
}

const RuntimeShape& input_shape = GetTensorShape(input);
const RuntimeShape& filter_shape = GetTensorShape(filter);
const RuntimeShape& output_shape = GetTensorShape(output);
Expand Down
83 changes: 83 additions & 0 deletions tensorflow/lite/micro/kernels/xtensa/conv_int8_reference.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"

namespace tflite {
namespace {

void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
}

} // namespace.

TfLiteStatus ConvReferenceEvalInt8(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TFLITE_DCHECK(node->builtin_data != nullptr);
const auto& params =
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
const auto& op_data = *(reinterpret_cast<OpDataConv*>(node->user_data));

TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
const TfLiteEvalTensor* input =
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
const TfLiteEvalTensor* filter =
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
const TfLiteEvalTensor* bias =
(NumInputs(node) == 3)
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
: nullptr;

reference_integer_ops::ConvPerChannel(
ConvParamsQuantized(params, op_data),
op_data.per_channel_output_multiplier, op_data.per_channel_output_shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
tflite::micro::GetTensorShape(filter),
tflite::micro::GetTensorData<int8_t>(filter),
tflite::micro::GetTensorShape(bias),
tflite::micro::GetTensorData<int32_t>(bias),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
return kTfLiteOk;
}

// TODO(b/189981943): This variant can be used for a smaller binary
// since the optimized conv implementation currently adds a lot to
// the binary size (~30KB to text section).
TfLiteRegistration Register_CONV_2D_INT8REF() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/ConvPrepare,
/*invoke=*/ConvReferenceEvalInt8,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,
/*version=*/0};
}

} // namespace tflite
3 changes: 3 additions & 0 deletions tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ TfLiteStatus ConvEvalHifi(TfLiteContext* context, TfLiteNode* node,
const TfLiteEvalTensor* filter,
const TfLiteEvalTensor* bias,
TfLiteEvalTensor* output);
TfLiteStatus ConvReferencePrepareInt8(TfLiteContext* context, TfLiteNode* node);

TfLiteStatus ConvReferenceEvalInt8(TfLiteContext* context, TfLiteNode* node);
#endif

} // namespace tflite
Expand Down
6 changes: 4 additions & 2 deletions tensorflow/lite/micro/micro_mutable_op_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/compatibility.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "tensorflow/lite/micro/compatibility.h"
#include "tensorflow/lite/micro/kernels/conv.h"
#include "tensorflow/lite/micro/kernels/ethosu.h"
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/micro/kernels/micro_ops.h"
Expand Down Expand Up @@ -169,8 +170,9 @@ class MicroMutableOpResolver : public MicroOpResolver {
ParseConcatenation);
}

TfLiteStatus AddConv2D() {
return AddBuiltin(BuiltinOperator_CONV_2D, Register_CONV_2D(), ParseConv2D);
TfLiteStatus AddConv2D(
const TfLiteRegistration& registration = Register_CONV_2D()) {
return AddBuiltin(BuiltinOperator_CONV_2D, registration, ParseConv2D);
}

TfLiteStatus AddCos() {
Expand Down
5 changes: 3 additions & 2 deletions tensorflow/lite/micro/tools/make/ext_libs/xtensa.inc
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Explicitly add kernel sources specific to the Xtensa optimized
# implementations.
MICROLITE_CC_KERNEL_SRCS += \
tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc \
tensorflow/lite/micro/kernels/xtensa/conv_hifi.cc \
tensorflow/lite/micro/kernels/xtensa/conv_hifimini.cc
tensorflow/lite/micro/kernels/xtensa/conv_hifimini.cc \
tensorflow/lite/micro/kernels/xtensa/conv_int8_reference.cc \
tensorflow/lite/micro/kernels/xtensa/softmax_int8_int16.cc

ifeq ($(TARGET_ARCH), $(findstring $(TARGET_ARCH), "hifi5"))

Expand Down

0 comments on commit 36c507e

Please sign in to comment.