Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify UnpackInitializerData API #8736

Merged
merged 11 commits into from
Aug 18, 2021
105 changes: 53 additions & 52 deletions onnxruntime/core/framework/tensorprotoutils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,23 +155,23 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
// This function does not unpack string_data of an initializer tensor
static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
const ORTCHAR_T* tensor_proto_dir,
std::unique_ptr<unsigned char[]>& unpacked_tensor,
SafeInt<size_t>& tensor_byte_size) {
std::vector<uint8_t>& unpacked_tensor) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to use std::byte?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, seems CUDA does not have std::byte support yet, revert back to use uint8+t

std::basic_string<ORTCHAR_T> external_file_path;
onnxruntime::FileOffsetType file_offset;
SafeInt<size_t> tensor_byte_size;
ORT_RETURN_IF_ERROR(GetExternalDataInfo(
tensor_proto,
tensor_proto_dir,
external_file_path,
file_offset,
tensor_byte_size));

unpacked_tensor.reset(new unsigned char[*&tensor_byte_size]);
unpacked_tensor.resize(tensor_byte_size);
ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(
Copy link
Member

@yuslepukhin yuslepukhin Aug 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(

Should we check for zero len? #Resolved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems zero len is fine here, same in posix version

if (length == 0)
return Status::OK();

external_file_path.c_str(),
file_offset,
tensor_byte_size,
gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_byte_size)));
gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.data()), tensor_byte_size)));

return Status::OK();
}
Expand All @@ -185,13 +185,11 @@ static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto
size_t expected_num_elements, size_t element_size,
/*out*/ unsigned char* p_data) {
ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");

std::unique_ptr<unsigned char[]> unpacked_tensor;
SafeInt<size_t> tensor_byte_size = 0;
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor, tensor_byte_size));
std::vector<uint8_t> unpacked_tensor;
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));

// ReadLittleEndian checks src and dst buffers are the same size
auto src_span = gsl::make_span(unpacked_tensor.get(), tensor_byte_size);
auto src_span = gsl::make_span(unpacked_tensor.data(), unpacked_tensor.size());
auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size);

return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span);
Expand Down Expand Up @@ -590,12 +588,12 @@ static Status GetFileContent(
break;

/**
* @brief Convert tensor_proto to tensor format and store it to pre-allocated tensor
* @param env
* @param model_path
* @brief Convert tensor_proto to tensor format and store it to pre-allocated tensor
* @param env
* @param model_path
* @param tensor_proto tensor data in protobuf format
* @param tensorp pre-allocated tensor object, where we store the data
* @return
* @return
*/
Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
const ONNX_NAMESPACE::TensorProto& tensor_proto,
Expand Down Expand Up @@ -763,11 +761,10 @@ ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto

ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std::string& tensor_proto_name) {
// Given we are using the raw_data field in the protobuf, this will work only for little-endian format.
ORT_IF_CONSTEXPR (endian::native != endian::little) {
ORT_IF_CONSTEXPR(endian::native != endian::little) {
ORT_THROW("Big endian not supported");
}


// Set name, dimensions, type, and data of the TensorProto.
ONNX_NAMESPACE::TensorProto tensor_proto;

Expand Down Expand Up @@ -951,10 +948,9 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT

if (type != TensorProto_DataType_STRING) {
// need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
size_t sparse_bytes = 0;
std::unique_ptr<uint8_t[]> sparse_data_storage;
ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage, sparse_bytes));
void* sparse_data = sparse_data_storage.get();
std::vector<uint8_t> sparse_data_storage;
ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
void* sparse_data = sparse_data_storage.data();
size_t element_size = 0;
// We want to this list to match the one used below in DenseTensorToSparseTensorProto()
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(type);
Expand Down Expand Up @@ -1019,7 +1015,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
" BUG! Report to onnxruntime team. element_size of: ",
element_size, " is not supported.", " type: ", type);
}
}

ORT_RETURN_IF_ERROR(status);
}
Expand Down Expand Up @@ -1106,33 +1102,33 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
n_dense_elements *= dim;
}

size_t tensor_bytes_size = 0;
std::unique_ptr<uint8_t[]> dense_raw_data;
ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data, tensor_bytes_size));
std::vector<uint8_t> dense_raw_data;
ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data));
size_t element_size = 0;
// We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(data_type);
ORT_RETURN_IF_ERROR(
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));

void* dense_data = dense_raw_data.data();
switch (element_size) {
case 1: {
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint8_t>, CopyElement<uint8_t>, values, indices);
break;
}
case 2: {
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint16_t>, CopyElement<uint16_t>, values, indices);
break;
}
case 4: {
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint32_t>, CopyElement<uint32_t>, values, indices);
break;
}
case 8: {
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint64_t>, CopyElement<uint64_t>, values, indices);
break;
}
Expand All @@ -1159,39 +1155,37 @@ template common::Status GetSizeInBytesFromTensorProto<kAllocAlignment>(const ONN
size_t* out);
template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);

#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE) \
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: { \
size_t element_count = 0; \
if (initializer.has_raw_data()) { \
tensor_byte_size = initializer.raw_data().size(); \
element_count = tensor_byte_size / sizeof(ELEMENT_TYPE); \
} else { \
element_count = initializer.DATA_SIZE(); \
tensor_byte_size = element_count * sizeof(ELEMENT_TYPE); \
} \
tensor_byte_size_out = tensor_byte_size; \
unpacked_tensor.reset(new unsigned char[tensor_byte_size_out]); \
return onnxruntime::utils::UnpackTensor( \
initializer, \
initializer.has_raw_data() ? initializer.raw_data().data() : nullptr, \
initializer.has_raw_data() ? initializer.raw_data().size() : 0, \
reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.get()), element_count); \
break; \
#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE) \
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: { \
SafeInt<size_t> tensor_byte_size; \
size_t element_count = 0; \
if (initializer.has_raw_data()) { \
tensor_byte_size = initializer.raw_data().size(); \
element_count = tensor_byte_size / sizeof(ELEMENT_TYPE); \
} else { \
element_count = initializer.DATA_SIZE(); \
tensor_byte_size = element_count * sizeof(ELEMENT_TYPE); \
} \
unpacked_tensor.resize(tensor_byte_size); \
return onnxruntime::utils::UnpackTensor( \
initializer, \
initializer.has_raw_data() ? initializer.raw_data().data() : nullptr, \
initializer.has_raw_data() ? initializer.raw_data().size() : 0, \
reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.data()), element_count); \
break; \
}

Status UnpackInitializerData(const onnx::TensorProto& initializer,
const Path& model_path,
std::unique_ptr<unsigned char[]>& unpacked_tensor,
size_t& tensor_byte_size_out) {
SafeInt<size_t> tensor_byte_size;

std::vector<uint8_t>& unpacked_tensor) {
// TODO, if std::vector does not use a custom allocator, the default std::allocator will
// allocation the memory aligned to std::max_align_t, need look into allocating
// forced aligned memory (align as 16 or larger)for unpacked_tensor
if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
initializer,
model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
unpacked_tensor,
tensor_byte_size));
tensor_byte_size_out = tensor_byte_size;
unpacked_tensor));
return Status::OK();
}

Expand All @@ -1217,5 +1211,12 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
}
#undef CASE_UNPACK

Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
std::vector<uint8_t>& unpacked_tensor) {
ORT_RETURN_IF(initializer.data_location() == TensorProto_DataLocation_EXTERNAL,
"The given initializer contains external data");
return UnpackInitializerData(initializer, Path(), unpacked_tensor);
}

} // namespace utils
} // namespace onnxruntime
24 changes: 16 additions & 8 deletions onnxruntime/core/framework/tensorprotoutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ common::Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_prot
const ONNX_NAMESPACE::TensorProto& input, const MemBuffer& m, OrtValue& value);
/**
* @brief Deserialize a TensorProto into a preallocated empty Tensor
* @param env
* @param model_path
* @param env
* @param model_path
* @param tensor_proto source data
* @param tensorp destination empty tensor
* @return
* @return
*/
common::Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
const ONNX_NAMESPACE::TensorProto& tensor_proto,
Expand Down Expand Up @@ -304,15 +304,23 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model
* Unpack the data from an initializer tensor
* Please note, this function does not unpack string_data of an initializer tensor
* @param initializer given initializer tensor
* @param initializer_dir model_path to construct external data dir path. When this is empty, current dir is used.
* @param unpacked_tensor the data from the initializer in byte form
* @param tensor_byte_size the byte size of the unpacked_tensor
* @param model_path model_path to construct external data dir path. When this is empty, current dir is used.
* @param unpacked_tensor the vector holds data from the initializer in byte form
* @returns Status::OK() if data is unpacked successfully
*/
common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
const Path& model_path,
std::unique_ptr<unsigned char[]>& unpacked_tensor,
size_t& tensor_byte_size) ORT_MUST_USE_RESULT;
std::vector<uint8_t>& unpacked_tensor);

/**
* Unpack the data from an internal initializer tensor, will return error when the given initializer
* contains external data
* Please note, this function does not unpack string_data of an initializer tensor
* @param initializer given initializer tensor
* @param unpacked_tensor the vector holds data from the initializer in byte form
* @returns Status::OK() if data is unpacked successfully
*/
common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
std::vector<uint8_t>& unpacked_tensor);
} // namespace utils
} // namespace onnxruntime
11 changes: 5 additions & 6 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2364,7 +2364,7 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
node.since_version_ = node.op_->since_version();

if (node.op_->Deprecated()) {
node.op_ = nullptr;
node.op_ = nullptr;
}
}

Expand Down Expand Up @@ -2451,7 +2451,7 @@ void Graph::InitFunctionBodyForNode(Node& node) {
function_container_.emplace_back(std::move(func_ptr));
node.SetFunctionBody(*function_container_.back());
}
ORT_CATCH(const std::exception& ) {
ORT_CATCH(const std::exception&) {
// Return without using this function op's expansion. No need to fail just yet.
// If ORT has a specialized kernel for this op then execution will proceed
return;
Expand Down Expand Up @@ -3144,10 +3144,9 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
// Dense tensors larger than the threshold are added to the external file.
TensorProto* output_proto = result.add_initializer();

size_t tensor_bytes_size = 0;
std::unique_ptr<uint8_t[]> raw_data;
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), raw_data, tensor_bytes_size));

std::vector<uint8_t> raw_data;
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), raw_data));
size_t tensor_bytes_size = raw_data.size();
if (tensor_bytes_size < initializer_size_threshold) {
*output_proto = initializer;
continue;
Expand Down
7 changes: 3 additions & 4 deletions onnxruntime/core/graph/graph_flatbuffers_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,10 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
std::copy(initializer.string_data().cbegin(), initializer.string_data().cend(), string_data_vec.begin());
string_data = builder.CreateVectorOfStrings(string_data_vec);
} else {
std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size = 0;
std::vector<uint8_t> unpacked_tensor;
ORT_RETURN_IF_ERROR(
onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor, tensor_byte_size));
raw_data = builder.CreateVector(unpacked_tensor.get(), tensor_byte_size);
onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));
raw_data = builder.CreateVector(unpacked_tensor.data(), unpacked_tensor.size());
}

fbs::TensorBuilder tb(builder);
Expand Down
25 changes: 11 additions & 14 deletions onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,21 +281,17 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
return false;
}

std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size;
auto status = onnxruntime::utils::UnpackInitializerData(
zero_tensor,
node.ModelPath(),
unpacked_tensor, tensor_byte_size);
std::vector<uint8_t> unpacked_tensor;
auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node.ModelPath(), unpacked_tensor);
if (!status.IsOK()) {
LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
<< ", error msg: " << status.ErrorMessage();
return false;
}

// Verify all onnx weight zero point(s) are 0(s)
const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.get());
for (size_t i = 0; i < tensor_byte_size; i++) {
const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.data());
for (size_t i = 0; i < unpacked_tensor.size(); i++) {
if (zero_points[i] != 0) {
LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only support 0 as zero point, "
<< "zero_points[" << i << "] has value: " << zero_points[i];
Expand All @@ -315,14 +311,15 @@ float GetQuantizationScale(const InitializedTensorSet& initializers, const Node&

common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
const Node& node, size_t idx, int32_t& zero_point) {
std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size;
const auto& zero_point_tensor = *initializers.at(node.InputDefs()[idx]->Name());
std::vector<uint8_t> unpacked_tensor;
const auto& name = node.InputDefs()[idx]->Name();
const auto& zero_point_tensor = *initializers.at(name);
ORT_RETURN_IF_ERROR(
onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(),
unpacked_tensor, tensor_byte_size));
onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(), unpacked_tensor));

ORT_RETURN_IF(unpacked_tensor.empty(), "The initializer [", name, "] is empty");
// Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI
zero_point = static_cast<int32_t>(unpacked_tensor.get()[0]);
zero_point = static_cast<int32_t>(unpacked_tensor[0]);
Copy link
Member

@yuslepukhin yuslepukhin Aug 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unpacked_tensor[0]

Need to check that the data is not empty. Perhaps, this was the reason for crashes? #Resolved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we should check the length of the buffer, but this is not the reason for the crashes

return Status::OK();
}

Expand Down
Loading