Skip to content

Commit

Permalink
Merge pull request microsoft#5 from chenfeiyue-cfy/vsinpu
Browse files Browse the repository at this point in the history
Added Quantized Conv2d/GroupConv2d && refine ep code
  • Loading branch information
sunshinemyson authored Feb 7, 2024
2 parents 676f999 + 7f6ed3b commit d584c4c
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/framework/tensorprotoutils.h"
#include <variant>
namespace onnxruntime {
namespace vsi {
namespace npu {
class QLinearConvOpBuilder : public BaseOpBuilder {
enum {
INPUT_TENSOR = 0,
INPUT_TENSOR_SCALE = 1,
INPUT_TENSOR_ZP = 2,
WEIGHT_TENSOR = 3,
WEIGHT_TENSOR_SCALE = 4,
WEIGHT_TENSOR_ZP = 5,
OUTPUT_TENSOR_SCALE = 6,
OUTPUT_TENSOR_ZP = 7,
BIAS_TENSOR = 8,
};

template <typename T>
std::vector<T> getParamAsVector(std::shared_ptr<tim::vx::Tensor> qt_params) {
std::vector<T> values(qt_params->GetSpec().GetElementNum());
qt_params->CopyDataFromTensor(values.data());
return values;
}

template <typename T>
T getParamAsScalar(std::shared_ptr<tim::vx::Tensor> qt_params) {
T val;
qt_params->CopyDataFromTensor(&val);
return val;
}

bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override {
auto input_defs = node->InputDefs();
auto input_shape = vsi::npu::util::GetTensorShape(*input_defs[INPUT_TENSOR]);
auto w_scale_shape = vsi::npu::util::GetTensorShape(*input_defs[WEIGHT_TENSOR_SCALE]);
if (input_shape.NumDimensions() != 4) {
LOGS_DEFAULT(ERROR) << "Not support conv3d&& conv1d yet.";
return false;
}

if (!graph_viewer.IsInitializedTensor(input_defs[INPUT_TENSOR_SCALE]->Name()) || !graph_viewer.IsInitializedTensor(input_defs[WEIGHT_TENSOR]->Name())) {
LOGS_DEFAULT(ERROR) << "Not support quantization definitions or weights that are not constant yet.";
return false;
}

if (w_scale_shape.Size() != 1 && *input_defs[WEIGHT_TENSOR]->Type() == "tensor(int8)") {
const ONNX_NAMESPACE::TensorProto* tensor_proto =
graph_viewer.GetConstantInitializer(input_defs[WEIGHT_TENSOR_ZP]->Name(), true);
std::vector<int8_t> w_zp(1);
auto status = onnxruntime::utils::UnpackTensor(
*tensor_proto,
tensor_proto->has_raw_data() ? tensor_proto->raw_data().data() : nullptr,
tensor_proto->has_raw_data() ? tensor_proto->raw_data().size() : 0,
w_zp.data(), w_zp.size());
if (!status.IsOK()) {
LOGS_DEFAULT(ERROR) << "Failed to get data from weight zp tensor.";
return false;
}
if (w_zp[0] != 0) {
LOGS_DEFAULT(ERROR) << "Asymmetric perchannel quantization with datatype int8 is not supported.";
return false;
}
}
return true;
}
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const Node* node) override {
LOGS_DEFAULT(VERBOSE) << "Creating QLinearConv Op.";
auto x_scale = getParamAsScalar<float>(inputs[INPUT_TENSOR_SCALE]);
auto y_scale = getParamAsScalar<float>(inputs[OUTPUT_TENSOR_SCALE]);
std::variant<int8_t, uint8_t> x_zp, y_zp;
if (inputs[WEIGHT_TENSOR]->GetDataType() == tim::vx::DataType::INT8) {
x_zp = getParamAsScalar<int8_t>(inputs[INPUT_TENSOR_ZP]);
} else
x_zp = getParamAsScalar<uint8_t>(inputs[INPUT_TENSOR_ZP]);
if (outputs[0]->GetDataType() == tim::vx::DataType::INT8) {
y_zp = getParamAsScalar<int8_t>(inputs[OUTPUT_TENSOR_ZP]);
} else
y_zp = getParamAsScalar<uint8_t>(inputs[OUTPUT_TENSOR_ZP]);

// quantization of W can be perchanneled , which means w_scale could be a 1-D tensor.
bool is_pcq = inputs[WEIGHT_TENSOR_SCALE]->GetSpec().GetElementNum() == 1 ? false : true;
tim::vx::Quantization WeightQuant;
tim::vx::Quantization BiasQuant;
std::vector<int32_t> biasdata(inputs.size() == 9 ? inputs[BIAS_TENSOR]->GetSpec().GetElementNum() : 1);
if (is_pcq) {
auto w_scale = getParamAsVector<float>(inputs[WEIGHT_TENSOR_SCALE]);
std::variant<std::vector<int8_t>, std::vector<uint8_t>> w_zp;
if (inputs[WEIGHT_TENSOR]->GetDataType() == tim::vx::DataType::INT8) {
w_zp = getParamAsVector<int8_t>(inputs[WEIGHT_TENSOR_ZP]);
} else
w_zp = getParamAsVector<uint8_t>(inputs[WEIGHT_TENSOR_ZP]);
int32_t value = std::visit([](auto& vec) {
return static_cast<int32_t>(vec[0]);
}, w_zp);
std::vector<int32_t> timvx_w_zp(w_scale.size(), value);
if (timvx_w_zp[0] != 0) {
WeightQuant.SetType(tim::vx::QuantType::ASYMMETRIC_PER_CHANNEL);
WeightQuant.SetChannelDim(3);
WeightQuant.SetScales(w_scale);
WeightQuant.SetZeroPoints(timvx_w_zp);
} else {
WeightQuant.SetType(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL);
WeightQuant.SetChannelDim(3);
WeightQuant.SetScales(w_scale);
WeightQuant.SetZeroPoints(timvx_w_zp);
}
if (inputs.size() == 9) {
for (auto& val : w_scale) {
val = val * x_scale;
}
BiasQuant.SetType(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL);
BiasQuant.SetChannelDim(0);
BiasQuant.SetScales(w_scale);
BiasQuant.SetZeroPoints({0});
}
} else {
auto w_scale = getParamAsScalar<float>(inputs[WEIGHT_TENSOR_SCALE]);
std::variant<int8_t, uint8_t> w_zp;
if (inputs[WEIGHT_TENSOR]->GetDataType() == tim::vx::DataType::INT8) {
w_zp = getParamAsScalar<int8_t>(inputs[WEIGHT_TENSOR_ZP]);
} else
w_zp = getParamAsScalar<uint8_t>(inputs[WEIGHT_TENSOR_ZP]);
int32_t timvx_w_zp = std::visit([](auto arg) -> int32_t { return static_cast<int32_t>(arg); }, w_zp);
WeightQuant.SetType(tim::vx::QuantType::ASYMMETRIC);
WeightQuant.SetScales({w_scale});
WeightQuant.SetZeroPoints({timvx_w_zp});
if (inputs.size() == 9) {
BiasQuant.SetType(tim::vx::QuantType::ASYMMETRIC);
;
BiasQuant.SetScales({x_scale * w_scale});
BiasQuant.SetZeroPoints({0});
}
}
int32_t timvx_x_zp = std::visit([](auto arg) -> int32_t { return static_cast<int32_t>(arg); }, x_zp);
int32_t timvx_y_zp = std::visit([](auto arg) -> int32_t { return static_cast<int32_t>(arg); }, y_zp);
tim::vx::Quantization InputQuant(tim ::vx::QuantType::ASYMMETRIC, x_scale, timvx_x_zp);
tim::vx::Quantization OutputQuant(tim ::vx::QuantType::ASYMMETRIC, y_scale, timvx_y_zp);
tim::vx::TensorSpec InputSpec(inputs[INPUT_TENSOR]->GetSpec());
InputSpec.SetQuantization(InputQuant);
tim::vx::TensorSpec WeightSpec(inputs[WEIGHT_TENSOR]->GetSpec());
WeightSpec.SetQuantization(WeightQuant);
tim::vx::TensorSpec OutputSpec(outputs[0]->GetSpec());
OutputSpec.SetQuantization(OutputQuant);
auto input_tensor = graph_ep->GetGraph()->CreateTensor(InputSpec);
auto weight_tensor = graph_ep->GetGraph()->CreateTensor(WeightSpec);
auto output_tensor = graph_ep->GetGraph()->CreateTensor(OutputSpec);
std::vector<uint8_t> weight_data(inputs[WEIGHT_TENSOR]->GetSpec().GetElementNum());
inputs[WEIGHT_TENSOR]->CopyDataFromTensor(weight_data.data());
weight_tensor->CopyDataToTensor(weight_data.data());

NodeAttrHelper helper(*node);
auto padtype = helper.Get("auto_pad", std::string(""));
auto group = helper.Get("group", static_cast<uint32_t>(1));
std::vector<uint32_t> default_vec = {1, 1, 1, 1};
auto stride =
helper.Get("strides", default_vec);
auto dilation =
helper.Get("dilations", default_vec);
std::shared_ptr<tim::vx::Operation> op;
if (padtype != "NOTSET") { // array "pads" is not set
if (group != 1 && group != weight_tensor->GetShape()[3]) {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv2d>(
vsi::npu::util::GetPadType(padtype),
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, group,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);

} else {
int32_t multiplier = group == 1 ? 0 : weight_tensor->GetShape()[3] / input_tensor->GetShape()[2];
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv2d>(
vsi::npu::util::GetPadType(padtype),
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, multiplier,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
} else {
std::vector<uint32_t> default_pads(4, 0);
auto pads = helper.Get("pads", default_pads);
if (group != 1 && group != weight_tensor->GetShape()[3]) {
op = graph_ep->GetGraph()
->CreateOperation<tim::vx::ops::GroupedConv2d>(
std::array<uint32_t, 4>{pads[1], pads[3], pads[0], pads[2]},
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, group,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);

} else {
int32_t multiplier = group == 1 ? 0 : weight_tensor->GetShape()[3] / input_tensor->GetShape()[2];
op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Conv2d>(
std::array<uint32_t, 4>{pads[1], pads[3],
pads[0], pads[2]},
std::array<uint32_t, 2>{stride[1], stride[0]},
std::array<uint32_t, 2>{dilation[1], dilation[0]}, multiplier,
tim::vx::DataLayout::WHCN, tim::vx::DataLayout::WHIcOc);
}
}

if (inputs.size() == 9) {
tim::vx::TensorSpec BiasSpec(inputs[BIAS_TENSOR]->GetSpec());
BiasSpec.SetQuantization(BiasQuant);
inputs[8]->CopyDataFromTensor(biasdata.data());
auto bias_tensor = graph_ep->GetGraph()->CreateTensor(BiasSpec, biasdata.data());
op->BindInput(input_tensor).BindInput(weight_tensor).BindInput(bias_tensor).BindOutput(output_tensor);
} else {
op->BindInput(input_tensor).BindInput(weight_tensor).BindOutput(output_tensor);
}

for (auto& IO : graph_ep->GetGraphInputs()) {
if (IO->tensor.get() == inputs[0].get()) {
IO->tensor = input_tensor;
}
}
for (auto& IO : graph_ep->GetGraphOutputs()) {
if (IO->tensor.get() == outputs[0].get()) {
IO->tensor = output_tensor;
}
}
outputs[0] = output_tensor;
return true;
}
};
} // namespace npu

} // namespace vsi
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "impl/elementwise_op_builder.h"
#include "impl/gemm_op_builder.h"
#include "impl/pool_op_builder.h"
#include "impl/qlinearconv_op_builder.h"
namespace onnxruntime {
namespace vsi {
namespace npu {
Expand Down Expand Up @@ -60,6 +61,7 @@ static const std::map<std::string, createIOpBuildItemFunc> reg = {
REGISTER_OP_BUILDER("HardSigmoid", HardSigmoidOpBuilder),
REGISTER_OP_BUILDER("HardSwish", HardSwishOpBuilder),
REGISTER_OP_BUILDER("GlobalAveragePool", GlobalAveragePoolOpBuilder),
REGISTER_OP_BUILDER("QLinearConv", QLinearConvOpBuilder)

#undef REGISTER_OP_BUILDER
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
*
*****************************************************************************/
#include "core/framework/compute_capability.h"
#include "core/graph/graph_utils.h"
#include "vsinpu_execution_provider.h"
#include "vsinpu_ep_graph.h"
#include "builders/op_builder_factory.h"
Expand Down Expand Up @@ -344,16 +343,14 @@ Status VSINPUExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fu
for (const auto& fused_node_graph : fused_nodes_and_graphs) {
const GraphViewer& graph_viewer = fused_node_graph.filtered_graph;
NodeComputeInfo compute_info;

std::shared_ptr<vsi::npu::GraphEP> graph_ep = std::make_shared<vsi::npu::GraphEP>();

for (auto tensor : graph_viewer.GetInputsIncludingInitializers()) {
LOGS_DEFAULT(VERBOSE) << "subgraph input init:" << vsi::npu::util::PrintNode(*tensor) << "#"
<< graph_viewer.IsConstantInitializer(tensor->Name(), true) << "#"
<< graph_utils::IsInitializer(graph_viewer.GetGraph(), tensor->Name(), true);
<< graph_viewer.IsInitializedTensor(tensor->Name());
auto input = std::make_shared<vsi::npu::GraphIOInfo>();
input->name = tensor->Name();
if (graph_utils::IsInitializer(graph_viewer.GetGraph(), tensor->Name(), true)) {
if (graph_viewer.IsInitializedTensor(tensor->Name())) {
input->is_initializer = true;
} else {
input->is_initializer = false;
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/providers/vsinpu/vsinpu_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ tim::vx::ShapeType OnnxShapeToTIMVXShape(const onnxruntime::TensorShape ts) {
if (ts.NumDimensions() == 0) {
timvx_shape.push_back(1);
} else {
for (int i = 0; i < ts.NumDimensions(); i++) {
for (size_t i = 0; i < ts.NumDimensions(); i++) {
timvx_shape[i] = ts.GetDims()[i];
}
}
Expand Down Expand Up @@ -246,7 +246,7 @@ bool CheckMainInputType(const Node* node, std::string& reason) {

bool CheckZeroDim(const NodeArg* node_arg) {
auto shape = node_arg->Shape();
if (shape == nullptr || shape->dim_size() == 0) {
if (shape == nullptr) {
return false;
}
for (int i = 0; i < shape->dim_size(); i++) {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/providers/checkers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ struct TensorCheck<uint8_t> {
// For any other EPs, we still expect an exact match for the results
// TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513
if ((provider_type == kNnapiExecutionProvider || provider_type == kDmlExecutionProvider ||
provider_type == kXnnpackExecutionProvider) &&
provider_type == kXnnpackExecutionProvider || provider_type == kVSINPUExecutionProvider) &&
(has_abs_err || has_rel_err)) {
double threshold = has_abs_err ? *(params.absolute_error)
: 0.0;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/providers/cpu/nn/qlinearconv_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ class QLinearConvOpTester {
// NOTE, for now the tolerance will only apply if the NNAPI is actually used,
// if for any reason the execution falls back to CPU, we still expect an exact match
// See, 'void Check<uint8_t>(...' in onnxruntime/test/providers/provider_test_utils.cc
#if defined(USE_NNAPI) || defined(USE_DML)
#if defined(USE_NNAPI) || defined(USE_DML) || defined(USE_VSINPU)
// TODO: Verify if DML can possibly have a ROUNDING_MODE parameter and conform to the other EPs #41968513
abs_error = 1.0f;
#endif
Expand Down

0 comments on commit d584c4c

Please sign in to comment.