Skip to content

Commit

Permalink
Merge pull request microsoft#9 from chenfeiyue-cfy/vsinpu
Browse files Browse the repository at this point in the history
Refine qlinearconv and added missed quantize ops
  • Loading branch information
sunshinemyson authored Mar 12, 2024
2 parents cfc143b + 3fc109f commit c9bf8d5
Show file tree
Hide file tree
Showing 6 changed files with 414 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "core/providers/vsinpu/builders/impl/dequantize_op_builder.h"
#include <algorithm>

namespace onnxruntime {
namespace vsi {
namespace npu {
enum {
input_tensor = 0,
scale_tensor = 1,
zero_point_tensor = 2
};

bool DequantizeLinearOpBuilder::IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const {
auto input_defs = node->InputDefs();
auto scale_shape = vsi::npu::util::GetTensorShape(*input_defs[scale_tensor]);
NodeAttrHelper helper(*node);
if (helper.HasAttr("block_size") && helper.Get("block_size", 0) != 0) {
LOGS_DEFAULT(WARNING) << "Not support block quantization.";
return false;
}
if (!graph_viewer.IsInitializedTensor(input_defs[scale_tensor]->Name()) || (input_defs.size() == 3 && !graph_viewer.IsInitializedTensor(input_defs[zero_point_tensor]->Name()))) {
LOGS_DEFAULT(WARNING) << "Only support const scale / zero pint input.";
return false;
}
if (scale_shape.Size() != 1) {
LOGS_DEFAULT(WARNING) << "Per channel quantized input is not support in DequantizeLinear op.";
return false;
}
return true;
}

template <typename T1, typename T2>
struct DequantizeLinearOpBuilder::DequantizeImpl {
DequantizeImpl(vsi::npu::GraphEP* graph_ep, std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs) {
T1 scale;
inputs[scale_tensor]->CopyDataFromTensor(&scale);
T2 zero_point = 0;
if (inputs.size() == 3) {
inputs[zero_point_tensor]->CopyDataFromTensor(&zero_point);
}

tim::vx::Quantization quant(tim::vx::QuantType::ASYMMETRIC, static_cast<float>(scale), static_cast<int32_t>(zero_point));
tim::vx::TensorSpec InSpec(inputs[0]->GetSpec());
InSpec.SetQuantization(quant);
auto real_input = graph_ep->GetGraph()->CreateTensor(InSpec);
for (auto& IO : graph_ep->GetGraphInputs()) {
if (IO->tensor.get() == inputs[0].get()) {
IO->tensor = real_input;
}
}
inputs[0] = real_input;

auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::DataConvert>();
(*op).SetRoundingPolicy(tim::vx::OverflowPolicy::SATURATE,tim::vx::RoundingPolicy::TO_ZERO);
(*op).BindInput(real_input).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
}
};

bool DequantizeLinearOpBuilder::HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const Node* node) {
LOGS_DEFAULT(INFO) << "Creating Dequantize Op.";
NodeAttrHelper helper(*node);
switch (inputs[scale_tensor]->GetDataType()) {
case tim::vx::DataType::FLOAT32:
switch (inputs[input_tensor]->GetDataType()) {
case tim::vx::DataType::INT8:
DequantizeImpl<float, int8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT8:
DequantizeImpl<float, uint8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT16:
DequantizeImpl<float, int16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT16:
DequantizeImpl<float, uint16_t>(graph_ep, inputs, outputs);
break;
}
break;
case tim::vx::DataType::FLOAT16:
switch (inputs[input_tensor]->GetDataType()) {
case tim::vx::DataType::INT8:
DequantizeImpl<Ort::Float16_t, int8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT8:
DequantizeImpl<Ort::Float16_t, uint8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT16:
DequantizeImpl<Ort::Float16_t, int16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT16:
DequantizeImpl<Ort::Float16_t, uint16_t>(graph_ep, inputs, outputs);
break;
}
break;
case tim::vx::DataType::INT32:
switch (inputs[input_tensor]->GetDataType()) {
case tim::vx::DataType::INT8:
DequantizeImpl<int32_t, int8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT8:
DequantizeImpl<int32_t, uint8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT16:
DequantizeImpl<int32_t, int16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT16:
DequantizeImpl<int32_t, uint16_t>(graph_ep, inputs, outputs);
break;
}
break;
}
return true;
}

} // namespace npu

} // namespace vsi
} // namespace onnxruntime
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"
#include "core/providers/common.h"
#include "core/providers/shared/utils/utils.h"

namespace onnxruntime {
namespace vsi {
namespace npu {
class DequantizeLinearOpBuilder : public BaseOpBuilder {
bool IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const override;
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const Node* node) override;

private:
template <typename T1, typename T2>
struct DequantizeImpl;
};
} // namespace npu

} // namespace vsi
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ class QLinearConvOpBuilder : public BaseOpBuilder {
if (w_scale_shape.Size() != 1 && *input_defs[WEIGHT_TENSOR]->Type() == "tensor(int8)") {
const ONNX_NAMESPACE::TensorProto* tensor_proto =
graph_viewer.GetConstantInitializer(input_defs[WEIGHT_TENSOR_ZP]->Name(), true);
std::vector<int8_t> w_zp(1);
std::vector<int8_t> w_zp(tensor_proto->dims_size() == 0 ? 1 : tensor_proto->dims()[0]);

auto status = onnxruntime::utils::UnpackTensor(
*tensor_proto,
tensor_proto->has_raw_data() ? tensor_proto->raw_data().data() : nullptr,
Expand All @@ -83,8 +84,8 @@ class QLinearConvOpBuilder : public BaseOpBuilder {
LOGS_DEFAULT(ERROR) << "Failed to get data from weight zp tensor.";
return false;
}
if (w_zp[0] != 0) {
LOGS_DEFAULT(ERROR) << "Asymmetric perchannel quantization with datatype int8 is not supported.";
if (std::any_of(w_zp.begin(), w_zp.end(), [](int i) { return i != 0; })) {
LOGS_DEFAULT(ERROR) << "Asymmetric perchannel quantization only allows uint8 datatype or int8 with all zero.";
return false;
}
}
Expand Down
149 changes: 149 additions & 0 deletions onnxruntime/core/providers/vsinpu/builders/impl/quantize_op_builder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/****************************************************************************
*
* Copyright (c) 2024 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "core/providers/vsinpu/builders/impl/quantize_op_builder.h"
#include <algorithm>

namespace onnxruntime {
namespace vsi {
namespace npu {
enum {
input_tensor = 0,
scale_tensor = 1,
zero_point_tensor = 2
};

bool QuantizeLinearOpBuilder::IsOpSupported(const onnxruntime::GraphViewer& graph_viewer,
const Node* node) const {
auto input_defs = node->InputDefs();
auto scale_shape = vsi::npu::util::GetTensorShape(*input_defs[scale_tensor]);
NodeAttrHelper helper(*node);
if (helper.HasAttr("block_size") && helper.Get("block_size", 0) != 0) {
LOGS_DEFAULT(WARNING) << "Not support block quantization.";
return false;
}
if (!graph_viewer.IsInitializedTensor(input_defs[scale_tensor]->Name()) || (input_defs.size() == 3 && !graph_viewer.IsInitializedTensor(input_defs[zero_point_tensor]->Name()))) {
LOGS_DEFAULT(WARNING) << "Only support const scale / zero point.";
return false;
}

if (scale_shape.Size() != 1) {
LOGS_DEFAULT(ERROR) << "Per channel quantized output is not supported in QuantizeLinearOp.";
return false;
}
return true;
}

template <typename T1, typename T2>
struct QuantizeLinearOpBuilder::QuantizeImpl {
QuantizeImpl(vsi::npu::GraphEP* graph_ep, std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs) {
T1 scale;
inputs[scale_tensor]->CopyDataFromTensor(&scale);
T2 zero_point = 0;
if (inputs.size() == 3) {
inputs[zero_point_tensor]->CopyDataFromTensor(&zero_point);
}

tim::vx::Quantization quant(tim::vx::QuantType::ASYMMETRIC, static_cast<float>(scale), static_cast<int32_t>(zero_point));
tim::vx::TensorSpec OutSpec(outputs[0]->GetSpec());
OutSpec.SetQuantization(quant);
auto real_output = graph_ep->GetGraph()->CreateTensor(OutSpec);
for (auto& IO : graph_ep->GetGraphOutputs()) {
if (IO->tensor.get() == outputs[0].get()) {
IO->tensor = real_output;
}
}
outputs[0] = real_output;

auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::DataConvert>();
(*op).SetRoundingPolicy(tim::vx::OverflowPolicy::SATURATE,tim::vx::RoundingPolicy::RTNE);
(*op).BindInput(inputs[0]).BindOutput(real_output);
graph_ep->GetOps().push_back(std::move(op));
}
};

bool QuantizeLinearOpBuilder::HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const Node* node) {
LOGS_DEFAULT(INFO) << "Creating Quantize Op.";
NodeAttrHelper helper(*node);
switch (inputs[scale_tensor]->GetDataType()) {
case tim::vx::DataType::FLOAT32:
switch (outputs[0]->GetDataType()) {
case tim::vx::DataType::INT8:
QuantizeImpl<float, int8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT8:
QuantizeImpl<float, uint8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT16:
QuantizeImpl<float, int16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT16:
QuantizeImpl<float, uint16_t>(graph_ep, inputs, outputs);
break;
}
break;
case tim::vx::DataType::FLOAT16:
switch (outputs[0]->GetDataType()) {
case tim::vx::DataType::INT8:
QuantizeImpl<Ort::Float16_t, int8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT8:
QuantizeImpl<Ort::Float16_t, uint8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT16:
QuantizeImpl<Ort::Float16_t, int16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT16:
QuantizeImpl<Ort::Float16_t, uint16_t>(graph_ep, inputs, outputs);
break;
}
break;
case tim::vx::DataType::INT32:
switch (outputs[0]->GetDataType()) {
case tim::vx::DataType::INT8:
QuantizeImpl<int32_t, int8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT8:
QuantizeImpl<int32_t, uint8_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::INT16:
QuantizeImpl<int32_t, int16_t>(graph_ep, inputs, outputs);
break;
case tim::vx::DataType::UINT16:
QuantizeImpl<int32_t, uint16_t>(graph_ep, inputs, outputs);
break;
}
break;
}

return true;
}

} // namespace npu

} // namespace vsi
} // namespace onnxruntime
Loading

0 comments on commit c9bf8d5

Please sign in to comment.