From 39596db68238b14d68ee41b99d2af1cafe32ea6d Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Thu, 12 Aug 2021 21:01:57 -0700
Subject: [PATCH 01/10] Move UnpackInitializerData to use vector

---
 .../core/framework/tensorprotoutils.cc        | 102 +++++++++---------
 onnxruntime/core/framework/tensorprotoutils.h |  22 ++--
 onnxruntime/core/graph/graph.cc               |  11 +-
 .../core/graph/graph_flatbuffers_utils.cc     |   7 +-
 .../nnapi/nnapi_builtin/builders/helper.cc    |  20 ++--
 .../nnapi_builtin/builders/model_builder.cc   |  12 +--
 .../nnapi_builtin/builders/op_builder.cc      |  24 ++---
 .../save_model_with_external_initializers.cc  |  14 +--
 8 files changed, 103 insertions(+), 109 deletions(-)
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index dfa95836c399a..352fe90d8f8bc 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -155,10 +155,10 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 // This function does not unpack string_data of an initializer tensor
 static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                         const ORTCHAR_T* tensor_proto_dir,
-                                        std::unique_ptr<unsigned char[]>& unpacked_tensor,
-                                        SafeInt<size_t>& tensor_byte_size) {
+                                        std::vector<uint8_t>& unpacked_tensor) {
   std::basic_string<ORTCHAR_T> external_file_path;
   onnxruntime::FileOffsetType file_offset;
+  SafeInt<size_t> tensor_byte_size;
   ORT_RETURN_IF_ERROR(GetExternalDataInfo(
       tensor_proto,
       tensor_proto_dir,
@@ -166,12 +166,12 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso
       file_offset,
       tensor_byte_size));
 
-  unpacked_tensor.reset(new unsigned char[*&tensor_byte_size]);
+  unpacked_tensor.resize(*&tensor_byte_size);
   ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(
       external_file_path.c_str(),
       file_offset,
       tensor_byte_size,
-      gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_byte_size)));
+      gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.data()), tensor_byte_size)));
 
   return Status::OK();
 }
@@ -185,13 +185,11 @@ static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto
                                                size_t expected_num_elements, size_t element_size,
                                                /*out*/ unsigned char* p_data) {
   ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");
-
-  std::unique_ptr<unsigned char[]> unpacked_tensor;
-  SafeInt<size_t> tensor_byte_size = 0;
-  ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor, tensor_byte_size));
+  std::vector<uint8_t> unpacked_tensor;
+  ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));
 
   // ReadLittleEndian checks src and dst buffers are the same size
-  auto src_span = gsl::make_span(unpacked_tensor.get(), tensor_byte_size);
+  auto src_span = gsl::make_span(unpacked_tensor.data(), unpacked_tensor.size());
   auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size);
 
   return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span);
@@ -590,12 +588,12 @@ static Status GetFileContent(
     break;
 
 /**
- * @brief Convert tensor_proto to tensor format and store it to pre-allocated tensor 
- * @param env 
- * @param model_path     
+ * @brief Convert tensor_proto to tensor format and store it to pre-allocated tensor
+ * @param env
+ * @param model_path
  * @param tensor_proto  tensor data in protobuf format
  * @param tensorp       pre-allocated tensor object, where we store the data
- * @return 
+ * @return
 */
 Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
                            const ONNX_NAMESPACE::TensorProto& tensor_proto,
@@ -763,11 +761,10 @@ ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto
 
 ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std::string& tensor_proto_name) {
   // Given we are using the raw_data field in the protobuf, this will work only for little-endian format.
-  ORT_IF_CONSTEXPR (endian::native != endian::little) {
+  ORT_IF_CONSTEXPR(endian::native != endian::little) {
     ORT_THROW("Big endian not supported");
   }
 
-
   // Set name, dimensions, type, and data of the TensorProto.
   ONNX_NAMESPACE::TensorProto tensor_proto;
 
@@ -951,10 +948,9 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
 
   if (type != TensorProto_DataType_STRING) {
     // need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
-    size_t sparse_bytes = 0;
-    std::unique_ptr<uint8_t[]> sparse_data_storage;
-    ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage, sparse_bytes));
-    void* sparse_data = sparse_data_storage.get();
+    std::vector<uint8_t> sparse_data_storage;
+    ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
+    void* sparse_data = sparse_data_storage.data();
     size_t element_size = 0;
     // We want to this list to match the one used below in DenseTensorToSparseTensorProto()
     MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(type);
@@ -1019,7 +1015,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
           return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
                                  " BUG! Report to onnxruntime team. element_size of: ",
                                  element_size, " is not supported.", " type: ", type);
-     }
+      }
 
       ORT_RETURN_IF_ERROR(status);
     }
@@ -1106,33 +1102,33 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
     n_dense_elements *= dim;
   }
 
-  size_t tensor_bytes_size = 0;
-  std::unique_ptr<uint8_t[]> dense_raw_data;
-  ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data, tensor_bytes_size));
+  std::vector<uint8_t> dense_raw_data;
+  ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data));
   size_t element_size = 0;
   // We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
   MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(data_type);
   ORT_RETURN_IF_ERROR(
       (type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
 
+  void* dense_data = dense_raw_data.data();
   switch (element_size) {
     case 1: {
-      SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
+      SparsifyGeneric(dense_data, n_dense_elements, element_size,
                       IsZero<uint8_t>, CopyElement<uint8_t>, values, indices);
       break;
     }
     case 2: {
-      SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
+      SparsifyGeneric(dense_data, n_dense_elements, element_size,
                       IsZero<uint16_t>, CopyElement<uint16_t>, values, indices);
       break;
     }
     case 4: {
-      SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
+      SparsifyGeneric(dense_data, n_dense_elements, element_size,
                       IsZero<uint32_t>, CopyElement<uint32_t>, values, indices);
       break;
     }
     case 8: {
-      SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
+      SparsifyGeneric(dense_data, n_dense_elements, element_size,
                       IsZero<uint64_t>, CopyElement<uint64_t>, values, indices);
       break;
     }
@@ -1159,42 +1155,37 @@ template common::Status GetSizeInBytesFromTensorProto<kAllocAlignment>(const ONN
                                                                        size_t* out);
 template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
 
-#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                              \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {     \
-    size_t element_count = 0;                                                   \
-    if (initializer.has_raw_data()) {                                           \
-      tensor_byte_size = initializer.raw_data().size();                         \
-      element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                  \
-    } else {                                                                    \
-      element_count = initializer.DATA_SIZE();                                  \
-      tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                  \
-    }                                                                           \
-    tensor_byte_size_out = tensor_byte_size;                                    \
-    unpacked_tensor.reset(new unsigned char[tensor_byte_size_out]);             \
-    return onnxruntime::utils::UnpackTensor(                                    \
-        initializer,                                                            \
-        initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,   \
-        initializer.has_raw_data() ? initializer.raw_data().size() : 0,         \
-        reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.get()), element_count); \
-    break;                                                                      \
+#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                               \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {      \
+    size_t element_count = 0;                                                    \
+    if (initializer.has_raw_data()) {                                            \
+      tensor_byte_size = initializer.raw_data().size();                          \
+      element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                   \
+    } else {                                                                     \
+      element_count = initializer.DATA_SIZE();                                   \
+      tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                   \
+    }                                                                            \
+    unpacked_tensor.resize(tensor_byte_size);                                    \
+    return onnxruntime::utils::UnpackTensor(                                     \
+        initializer,                                                             \
+        initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,    \
+        initializer.has_raw_data() ? initializer.raw_data().size() : 0,          \
+        reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.data()), element_count); \
+    break;                                                                       \
   }
 
 Status UnpackInitializerData(const onnx::TensorProto& initializer,
                              const Path& model_path,
-                             std::unique_ptr<unsigned char[]>& unpacked_tensor,
-                             size_t& tensor_byte_size_out) {
-  SafeInt<size_t> tensor_byte_size;
-
+                             std::vector<uint8_t>& unpacked_tensor) {
   if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
     ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
         initializer,
         model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
-        unpacked_tensor,
-        tensor_byte_size));
-    tensor_byte_size_out = tensor_byte_size;
+        unpacked_tensor));
     return Status::OK();
   }
 
+  SafeInt<size_t> tensor_byte_size;
   switch (initializer.data_type()) {
     CASE_UNPACK(FLOAT, float, float_data_size);
     CASE_UNPACK(DOUBLE, double, double_data_size);
@@ -1217,5 +1208,12 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
 }
 #undef CASE_UNPACK
 
+Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
+                             std::vector<uint8_t>& unpacked_tensor) {
+  ORT_RETURN_IF(initializer.data_location() == TensorProto_DataLocation_EXTERNAL,
+                "The given initializer contains external data");
+  return UnpackInitializerData(initializer, Path(), unpacked_tensor);
+}
+
 }  // namespace utils
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index df616ff2822ec..b68d648d2f3c5 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -47,11 +47,11 @@ common::Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_prot
                                     const ONNX_NAMESPACE::TensorProto& input, const MemBuffer& m, OrtValue& value);
 /**
  * @brief Deserialize a TensorProto into a preallocated empty Tensor
- * @param env 
- * @param model_path 
+ * @param env
+ * @param model_path
  * @param tensor_proto  source data
  * @param tensorp       destination empty tensor
- * @return 
+ * @return
 */
 common::Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
                                    const ONNX_NAMESPACE::TensorProto& tensor_proto,
@@ -304,15 +304,23 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model
  * Unpack the data from an initializer tensor
  * Please note, this function does not unpack string_data of an initializer tensor
  * @param initializer       given initializer tensor
- * @param initializer_dir   model_path to construct external data dir path. When this is empty, current dir is used.
+ * @param model_path        model_path to construct external data dir path. When this is empty, current dir is used.
  * @param unpacked_tensor   the data from the initializer in byte form
- * @param tensor_byte_size  the byte size of the unpacked_tensor
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
                                      const Path& model_path,
-                                     std::unique_ptr<unsigned char[]>& unpacked_tensor,
-                                     size_t& tensor_byte_size) ORT_MUST_USE_RESULT;
+                                     std::vector<uint8_t>& unpacked_tensor);
 
+/**
+ * Unpack the data from an internal initializer tensor, will return error when the given initializer
+ * contains external data
+ * Please note, this function does not unpack string_data of an initializer tensor
+ * @param initializer       given initializer tensor
+ * @param unpacked_tensor   the data from the initializer in byte form
+ * @returns                 Status::OK() if data is unpacked successfully
+ */
+common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
+                                     std::vector<uint8_t>& unpacked_tensor);
 }  // namespace utils
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 77174cde50c8d..2993a59e01567 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -2361,7 +2361,7 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
         node.since_version_ = node.op_->since_version();
 
         if (node.op_->Deprecated()) {
-            node.op_ = nullptr;
+          node.op_ = nullptr;
         }
       }
 
@@ -2448,7 +2448,7 @@ void Graph::InitFunctionBodyForNode(Node& node) {
       function_container_.emplace_back(std::move(func_ptr));
       node.SetFunctionBody(*function_container_.back());
     }
-    ORT_CATCH(const std::exception& ) {
+    ORT_CATCH(const std::exception&) {
       // Return without using this function op's expansion. No need to fail just yet.
       // If ORT has a specialized kernel for this op then execution will proceed
       return;
@@ -3141,10 +3141,9 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
       // Dense tensors larger than the threshold are added to the external file.
       TensorProto* output_proto = result.add_initializer();
 
-      size_t tensor_bytes_size = 0;
-      std::unique_ptr<uint8_t[]> raw_data;
-      ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), raw_data, tensor_bytes_size));
-
+      std::vector<uint8_t> raw_data;
+      ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), raw_data));
+      size_t tensor_bytes_size = raw_data.size();
       if (tensor_bytes_size < initializer_size_threshold) {
         *output_proto = initializer;
         continue;
diff --git a/onnxruntime/core/graph/graph_flatbuffers_utils.cc b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
index f0dd3a1012cbc..6812582fa5a9d 100644
--- a/onnxruntime/core/graph/graph_flatbuffers_utils.cc
+++ b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
@@ -44,11 +44,10 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
     std::copy(initializer.string_data().cbegin(), initializer.string_data().cend(), string_data_vec.begin());
     string_data = builder.CreateVectorOfStrings(string_data_vec);
   } else {
-    std::unique_ptr<uint8_t[]> unpacked_tensor;
-    size_t tensor_byte_size = 0;
+    std::vector<uint8_t> unpacked_tensor;
     ORT_RETURN_IF_ERROR(
-        onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor, tensor_byte_size));
-    raw_data = builder.CreateVector(unpacked_tensor.get(), tensor_byte_size);
+        onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));
+    raw_data = builder.CreateVector(unpacked_tensor.data(), unpacked_tensor.size());
   }
 
   fbs::TensorBuilder tb(builder);
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index c15241ebc6334..9c34ef78d8b68 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -281,12 +281,8 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
         return false;
       }
 
-      std::unique_ptr<uint8_t[]> unpacked_tensor;
-      size_t tensor_byte_size;
-      auto status = onnxruntime::utils::UnpackInitializerData(
-          zero_tensor,
-          node.ModelPath(),
-          unpacked_tensor, tensor_byte_size);
+      std::vector<uint8_t> unpacked_tensor;
+      auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node.ModelPath(), unpacked_tensor);
       if (!status.IsOK()) {
         LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
                             << ", error msg: " << status.ErrorMessage();
@@ -294,8 +290,8 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
       }
 
       // Verify all onnx weight zero point(s) are 0(s)
-      const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.get());
-      for (size_t i = 0; i < tensor_byte_size; i++) {
+      const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.data());
+      for (size_t i = 0; i < unpacked_tensor.size(); i++) {
         if (zero_points[i] != 0) {
           LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul]  only support 0 as zero point, "
                                 << "zero_points[" << i << "] has value: " << zero_points[i];
@@ -315,14 +311,12 @@ float GetQuantizationScale(const InitializedTensorSet& initializers, const Node&
 
 common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
                                         const Node& node, size_t idx, int32_t& zero_point) {
-  std::unique_ptr<uint8_t[]> unpacked_tensor;
-  size_t tensor_byte_size;
+  std::vector<uint8_t> unpacked_tensor;
   const auto& zero_point_tensor = *initializers.at(node.InputDefs()[idx]->Name());
   ORT_RETURN_IF_ERROR(
-      onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(),
-                                                unpacked_tensor, tensor_byte_size));
+      onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(), unpacked_tensor));
   // Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI
-  zero_point = static_cast<int32_t>(unpacked_tensor.get()[0]);
+  zero_point = static_cast<int32_t>(unpacked_tensor[0]);
   return Status::OK();
 }
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
index 812a037713ffc..11fb49411f9c7 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -281,20 +281,18 @@ Status ModelBuilder::RegisterInitializers() {
     std::tie(index, size, padded_size) = initializers[i++];
     const uint8_t* src = nullptr;
     // uint8_t data need unpack, need a holder for free memory after copy
-    std::unique_ptr<uint8_t[]> unpacked_tensor;
+    std::vector<uint8_t> unpacked_tensor;
     switch (tensor.data_type()) {
       case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
         src = reinterpret_cast<const uint8_t*>(GetTensorFloatData(tensor));
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
-        size_t tensor_byte_size;
         ORT_RETURN_IF_ERROR(
-            onnxruntime::utils::UnpackInitializerData(tensor, graph_viewer_.ModelPath(),
-                                                      unpacked_tensor, tensor_byte_size));
-        ORT_RETURN_IF_NOT(size == tensor_byte_size,
-                          "initializer tensor: ", tensor.name(), "'s size: ", tensor_byte_size,
+            onnxruntime::utils::UnpackInitializerData(tensor, graph_viewer_.ModelPath(), unpacked_tensor));
+        ORT_RETURN_IF_NOT(size == unpacked_tensor.size(),
+                          "initializer tensor: ", tensor.name(), "'s size: ", unpacked_tensor.size(),
                           " should match the calculated size: ", size);
-        src = unpacked_tensor.get();
+        src = unpacked_tensor.data();
         break;
         // default:
         // We should not get anything else here since we already checked in the 1st pass
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
index 9e640f56b39a2..4424db590e263 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@@ -297,8 +297,7 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
 
   // TODO support other data types
   const uint8_t* src = nullptr;
-  std::unique_ptr<uint8_t[]> unpacked_tensor;
-  size_t tensor_byte_size;
+  std::vector<uint8_t> unpacked_tensor;
 
   switch (tensor.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
@@ -308,8 +307,8 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
       ORT_RETURN_IF_ERROR(
           onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
-                                                    unpacked_tensor, tensor_byte_size));
-      src = unpacked_tensor.get();
+                                                    unpacked_tensor));
+      src = unpacked_tensor.data();
       break;
     }
     default:
@@ -389,8 +388,7 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder,
 
   // TODO support other data types
   const uint8_t* src = nullptr;
-  std::unique_ptr<uint8_t[]> unpacked_tensor;
-  size_t tensor_byte_size;
+  std::vector<uint8_t> unpacked_tensor;
   switch (tensor.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
       src = reinterpret_cast<const uint8_t*>(GetTensorFloatData(tensor));
@@ -399,8 +397,8 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder,
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
       ORT_RETURN_IF_ERROR(
           onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
-                                                    unpacked_tensor, tensor_byte_size));
-      src = unpacked_tensor.get();
+                                                    unpacked_tensor));
+      src = unpacked_tensor.data();
       break;
     }
     default:
@@ -2615,18 +2613,18 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
       const auto& initializers(model_builder.GetInitializerTensors());
 
       const auto& tensor = *initializers.at(input_name);
-      std::unique_ptr<uint8_t[]> unpacked_tensor;
-      size_t tensor_byte_size;
+      std::vector<uint8_t> unpacked_tensor;
       ORT_RETURN_IF_ERROR(
           onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
-                                                    unpacked_tensor, tensor_byte_size));
+                                                    unpacked_tensor));
+      size_t tensor_byte_size = unpacked_tensor.size();
       const auto data_type = tensor.data_type();
       if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
-        const int64_t* tensor_data = reinterpret_cast<const int64_t*>(unpacked_tensor.get());
+        const int64_t* tensor_data = reinterpret_cast<const int64_t*>(unpacked_tensor.data());
         size_t size = tensor_byte_size / sizeof(int64_t);
         data.insert(data.end(), tensor_data, tensor_data + size);
       } else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT32) {
-        const int32_t* tensor_data = reinterpret_cast<const int32_t*>(unpacked_tensor.get());
+        const int32_t* tensor_data = reinterpret_cast<const int32_t*>(unpacked_tensor.data());
         size_t size = tensor_byte_size / sizeof(int32_t);
         data.insert(data.end(), tensor_data, tensor_data + size);
       } else {
diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index 6ac48646d5504..b1cb65a82b129 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -46,13 +46,13 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
     const ONNX_NAMESPACE::TensorProto* tensor_proto = i.second;
     const ONNX_NAMESPACE::TensorProto* from_external_tensor_proto = initializers_from_external[kInitName];
 
-    size_t tensor_proto_size = 0;
-    std::unique_ptr<uint8_t[]> tensor_proto_data;
-    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, Path(), tensor_proto_data, tensor_proto_size));
+    std::vector<uint8_t> tensor_proto_data;
+    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, Path(), tensor_proto_data));
+    size_t tensor_proto_size = tensor_proto_data.size();
 
-    size_t from_external_tensor_proto_size = 0;
-    std::unique_ptr<uint8_t[]> from_external_tensor_proto_data;
-    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, Path(), from_external_tensor_proto_data, from_external_tensor_proto_size));
+    std::vector<uint8_t> from_external_tensor_proto_data;
+    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, Path(), from_external_tensor_proto_data));
+    size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size();
 
     if (from_external_tensor_proto_size < initializer_size_threshold) {
       // 'Small' tensors should be embedded in the onnx file.
@@ -63,7 +63,7 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
     }
 
     ASSERT_EQ(tensor_proto_size, from_external_tensor_proto_size);
-    EXPECT_EQ(memcmp(tensor_proto_data.get(), from_external_tensor_proto_data.get(), tensor_proto_size), 0);
+    EXPECT_EQ(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size), 0);
   }
   // Cleanup.
   ASSERT_EQ(std::remove(output_onnx.c_str()), 0);

From 11c35d2b1699a526b1dd67bf6272d9298ce5c2d3 Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Fri, 13 Aug 2021 11:39:16 -0700
Subject: [PATCH 02/10] minor update

---
 onnxruntime/core/framework/tensorprotoutils.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 352fe90d8f8bc..d2a0dfb7203d7 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -166,7 +166,7 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso
       file_offset,
       tensor_byte_size));
 
-  unpacked_tensor.resize(*&tensor_byte_size);
+  unpacked_tensor.resize(tensor_byte_size);
   ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(
       external_file_path.c_str(),
       file_offset,

From e8d4db241c4c1b3be5f9565f493b22df048e2e8d Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Fri, 13 Aug 2021 13:50:27 -0700
Subject: [PATCH 03/10] minor update

---
 onnxruntime/core/framework/tensorprotoutils.cc | 2 +-
 onnxruntime/core/framework/tensorprotoutils.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index d2a0dfb7203d7..3352592fb519c 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -1157,6 +1157,7 @@ template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::T
 
 #define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                               \
   case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {      \
+    SafeInt<size_t> tensor_byte_size;                                            \
     size_t element_count = 0;                                                    \
     if (initializer.has_raw_data()) {                                            \
       tensor_byte_size = initializer.raw_data().size();                          \
@@ -1185,7 +1186,6 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
     return Status::OK();
   }
 
-  SafeInt<size_t> tensor_byte_size;
   switch (initializer.data_type()) {
     CASE_UNPACK(FLOAT, float, float_data_size);
     CASE_UNPACK(DOUBLE, double, double_data_size);
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index b68d648d2f3c5..659222449b59e 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -305,7 +305,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model
  * Please note, this function does not unpack string_data of an initializer tensor
  * @param initializer       given initializer tensor
  * @param model_path        model_path to construct external data dir path. When this is empty, current dir is used.
- * @param unpacked_tensor   the data from the initializer in byte form
+ * @param unpacked_tensor   the vector holds data from the initializer in byte form
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
@@ -317,7 +317,7 @@ common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initiali
  * contains external data
  * Please note, this function does not unpack string_data of an initializer tensor
  * @param initializer       given initializer tensor
- * @param unpacked_tensor   the data from the initializer in byte form
+ * @param unpacked_tensor   the vector holds data from the initializer in byte form
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,

From ec38b9f48b7467834c93a4c607f6f7762490fb94 Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Fri, 13 Aug 2021 14:14:48 -0700
Subject: [PATCH 04/10] Update getclipminmax

---
 .../core/providers/shared/utils/utils.cc      | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/onnxruntime/core/providers/shared/utils/utils.cc b/onnxruntime/core/providers/shared/utils/utils.cc
index 3fd2b9bae0cc9..81c053bb4b545 100644
--- a/onnxruntime/core/providers/shared/utils/utils.cc
+++ b/onnxruntime/core/providers/shared/utils/utils.cc
@@ -5,9 +5,9 @@
 #include "utils.h"
 
 #include <core/common/safeint.h>
+#include <core/framework/tensorprotoutils.h>
 #include <core/graph/graph.h>
-
-#include "core/providers/common.h"
+#include <core/providers/common.h>
 
 namespace onnxruntime {
 
@@ -66,7 +66,13 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node,
         LOGS(logger, VERBOSE) << "Input min of Clip must be known";
         return false;
       }
-      min = GetTensorFloatData(*initializers.at(min_name))[0];
+      std::vector<uint8_t> unpacked_tensor;
+      auto status = onnxruntime::utils::UnpackInitializerData(*initializers.at(min_name), unpacked_tensor);
+      if (!status.IsOK()) {
+        LOGS(logger, ERROR) << "Error while unpack min tensor: " << status.ErrorMessage();
+        return false;
+      }
+      min = reinterpret_cast<float*>(unpacked_tensor.data())[0];
     }
 
     if (node.InputDefs().size() > 2) {  // we have input max
@@ -75,7 +81,13 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node,
         LOGS(logger, VERBOSE) << "Input max of Clip must be known";
         return false;
       }
-      max = GetTensorFloatData(*initializers.at(max_name))[0];
+      std::vector<uint8_t> unpacked_tensor;
+      auto status = onnxruntime::utils::UnpackInitializerData(*initializers.at(max_name), unpacked_tensor);
+      if (!status.IsOK()) {
+        LOGS(logger, ERROR) << "Error while unpack max tensor: " << status.ErrorMessage();
+        return false;
+      }
+      max = reinterpret_cast<float*>(unpacked_tensor.data())[0];
     }
   }
 

From 764a656ebac6610cdf1f25e63770330c3aedece6 Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Fri, 13 Aug 2021 17:37:51 -0700
Subject: [PATCH 05/10] Change uint8_t -> std::byte

---
 .../core/framework/tensorprotoutils.cc        | 15 ++++++++-------
 onnxruntime/core/framework/tensorprotoutils.h |  4 ++--
 onnxruntime/core/graph/graph.cc               |  7 ++++---
 .../core/graph/graph_flatbuffers_utils.cc     |  4 ++--
 .../nnapi/nnapi_builtin/builders/helper.cc    |  6 +++---
 .../nnapi_builtin/builders/model_builder.cc   |  6 +++---
 .../nnapi_builtin/builders/op_builder.cc      | 19 ++++++++-----------
 .../core/providers/shared/utils/utils.cc      |  4 ++--
 .../save_model_with_external_initializers.cc  |  4 ++--
 9 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 3352592fb519c..4a02986e2cf1d 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -155,7 +155,7 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 // This function does not unpack string_data of an initializer tensor
 static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                         const ORTCHAR_T* tensor_proto_dir,
-                                        std::vector<uint8_t>& unpacked_tensor) {
+                                        std::vector<std::byte>& unpacked_tensor) {
   std::basic_string<ORTCHAR_T> external_file_path;
   onnxruntime::FileOffsetType file_offset;
   SafeInt<size_t> tensor_byte_size;
@@ -185,11 +185,12 @@ static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto
                                                size_t expected_num_elements, size_t element_size,
                                                /*out*/ unsigned char* p_data) {
   ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");
-  std::vector<uint8_t> unpacked_tensor;
+  std::vector<std::byte> unpacked_tensor;
   ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));
 
   // ReadLittleEndian checks src and dst buffers are the same size
-  auto src_span = gsl::make_span(unpacked_tensor.data(), unpacked_tensor.size());
+  auto src_span = gsl::make_span(reinterpret_cast<const unsigned char*>(unpacked_tensor.data()),
+                                 unpacked_tensor.size());
   auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size);
 
   return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span);
@@ -948,7 +949,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
 
   if (type != TensorProto_DataType_STRING) {
     // need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
-    std::vector<uint8_t> sparse_data_storage;
+    std::vector<std::byte> sparse_data_storage;
     ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
     void* sparse_data = sparse_data_storage.data();
     size_t element_size = 0;
@@ -1102,7 +1103,7 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
     n_dense_elements *= dim;
   }
 
-  std::vector<uint8_t> dense_raw_data;
+  std::vector<std::byte> dense_raw_data;
   ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data));
   size_t element_size = 0;
   // We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
@@ -1177,7 +1178,7 @@ template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::T
 
 Status UnpackInitializerData(const onnx::TensorProto& initializer,
                              const Path& model_path,
-                             std::vector<uint8_t>& unpacked_tensor) {
+                             std::vector<std::byte>& unpacked_tensor) {
   if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
     ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
         initializer,
@@ -1209,7 +1210,7 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
 #undef CASE_UNPACK
 
 Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
-                             std::vector<uint8_t>& unpacked_tensor) {
+                             std::vector<std::byte>& unpacked_tensor) {
   ORT_RETURN_IF(initializer.data_location() == TensorProto_DataLocation_EXTERNAL,
                 "The given initializer contains external data");
   return UnpackInitializerData(initializer, Path(), unpacked_tensor);
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 659222449b59e..b632235d2c70d 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -310,7 +310,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
                                      const Path& model_path,
-                                     std::vector<uint8_t>& unpacked_tensor);
+                                     std::vector<std::byte>& unpacked_tensor);
 
 /**
  * Unpack the data from an internal initializer tensor, will return error when the given initializer
@@ -321,6 +321,6 @@ common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initiali
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
-                                     std::vector<uint8_t>& unpacked_tensor);
+                                     std::vector<std::byte>& unpacked_tensor);
 }  // namespace utils
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 2993a59e01567..f996478626fcc 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3141,14 +3141,15 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
       // Dense tensors larger than the threshold are added to the external file.
       TensorProto* output_proto = result.add_initializer();
 
-      std::vector<uint8_t> raw_data;
-      ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), raw_data));
-      size_t tensor_bytes_size = raw_data.size();
+      std::vector<std::byte> unpacked_tensor;
+      ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), unpacked_tensor));
+      size_t tensor_bytes_size = unpacked_tensor.size();
       if (tensor_bytes_size < initializer_size_threshold) {
         *output_proto = initializer;
         continue;
       }
 
+      const auto* raw_data = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
       for (size_t index = 0; index != tensor_bytes_size; ++index) {
         external_stream << raw_data[index];
       }
diff --git a/onnxruntime/core/graph/graph_flatbuffers_utils.cc b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
index 6812582fa5a9d..00b21f6ca4fe5 100644
--- a/onnxruntime/core/graph/graph_flatbuffers_utils.cc
+++ b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
@@ -44,10 +44,10 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
     std::copy(initializer.string_data().cbegin(), initializer.string_data().cend(), string_data_vec.begin());
     string_data = builder.CreateVectorOfStrings(string_data_vec);
   } else {
-    std::vector<uint8_t> unpacked_tensor;
+    std::vector<std::byte> unpacked_tensor;
     ORT_RETURN_IF_ERROR(
         onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));
-    raw_data = builder.CreateVector(unpacked_tensor.data(), unpacked_tensor.size());
+    raw_data = builder.CreateVector(reinterpret_cast<const uint8_t*>(unpacked_tensor.data()), unpacked_tensor.size());
   }
 
   fbs::TensorBuilder tb(builder);
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index 9c34ef78d8b68..4f38ca6e9ef98 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -281,7 +281,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
         return false;
       }
 
-      std::vector<uint8_t> unpacked_tensor;
+      std::vector<std::byte> unpacked_tensor;
       auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node.ModelPath(), unpacked_tensor);
       if (!status.IsOK()) {
         LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
@@ -311,12 +311,12 @@ float GetQuantizationScale(const InitializedTensorSet& initializers, const Node&
 
 common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
                                         const Node& node, size_t idx, int32_t& zero_point) {
-  std::vector<uint8_t> unpacked_tensor;
+  std::vector<std::byte> unpacked_tensor;
   const auto& zero_point_tensor = *initializers.at(node.InputDefs()[idx]->Name());
   ORT_RETURN_IF_ERROR(
       onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(), unpacked_tensor));
   // Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI
-  zero_point = static_cast<int32_t>(unpacked_tensor[0]);
+  zero_point = static_cast<int32_t>(reinterpret_cast<const uint8_t*>(unpacked_tensor.data())[0]);
   return Status::OK();
 }
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
index 11fb49411f9c7..cee896879a154 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -281,18 +281,18 @@ Status ModelBuilder::RegisterInitializers() {
     std::tie(index, size, padded_size) = initializers[i++];
     const uint8_t* src = nullptr;
     // uint8_t data need unpack, need a holder for free memory after copy
-    std::vector<uint8_t> unpacked_tensor;
+    std::vector<std::byte> unpacked_tensor;
     switch (tensor.data_type()) {
       case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
         src = reinterpret_cast<const uint8_t*>(GetTensorFloatData(tensor));
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
         ORT_RETURN_IF_ERROR(
-            onnxruntime::utils::UnpackInitializerData(tensor, graph_viewer_.ModelPath(), unpacked_tensor));
+            onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
         ORT_RETURN_IF_NOT(size == unpacked_tensor.size(),
                           "initializer tensor: ", tensor.name(), "'s size: ", unpacked_tensor.size(),
                           " should match the calculated size: ", size);
-        src = unpacked_tensor.data();
+        src = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
         break;
         // default:
         // We should not get anything else here since we already checked in the 1st pass
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
index 4424db590e263..9d78e536854f3 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@@ -297,7 +297,7 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
 
   // TODO support other data types
   const uint8_t* src = nullptr;
-  std::vector<uint8_t> unpacked_tensor;
+  std::vector<std::byte> unpacked_tensor;
 
   switch (tensor.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
@@ -306,9 +306,8 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
     case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
       ORT_RETURN_IF_ERROR(
-          onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
-                                                    unpacked_tensor));
-      src = unpacked_tensor.data();
+          onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
+      src = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
       break;
     }
     default:
@@ -388,7 +387,7 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder,
 
   // TODO support other data types
   const uint8_t* src = nullptr;
-  std::vector<uint8_t> unpacked_tensor;
+  std::vector<std::byte> unpacked_tensor;
   switch (tensor.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
       src = reinterpret_cast<const uint8_t*>(GetTensorFloatData(tensor));
@@ -396,9 +395,8 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder,
     case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
       ORT_RETURN_IF_ERROR(
-          onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
-                                                    unpacked_tensor));
-      src = unpacked_tensor.data();
+          onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
+      src = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
       break;
     }
     default:
@@ -2613,10 +2611,9 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
       const auto& initializers(model_builder.GetInitializerTensors());
 
       const auto& tensor = *initializers.at(input_name);
-      std::vector<uint8_t> unpacked_tensor;
+      std::vector<std::byte> unpacked_tensor;
       ORT_RETURN_IF_ERROR(
-          onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
-                                                    unpacked_tensor));
+          onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
       size_t tensor_byte_size = unpacked_tensor.size();
       const auto data_type = tensor.data_type();
       if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
diff --git a/onnxruntime/core/providers/shared/utils/utils.cc b/onnxruntime/core/providers/shared/utils/utils.cc
index 81c053bb4b545..6c9c2742b694a 100644
--- a/onnxruntime/core/providers/shared/utils/utils.cc
+++ b/onnxruntime/core/providers/shared/utils/utils.cc
@@ -66,7 +66,7 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node,
         LOGS(logger, VERBOSE) << "Input min of Clip must be known";
         return false;
       }
-      std::vector<uint8_t> unpacked_tensor;
+      std::vector<std::byte> unpacked_tensor;
       auto status = onnxruntime::utils::UnpackInitializerData(*initializers.at(min_name), unpacked_tensor);
       if (!status.IsOK()) {
         LOGS(logger, ERROR) << "Error while unpack min tensor: " << status.ErrorMessage();
@@ -81,7 +81,7 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node,
         LOGS(logger, VERBOSE) << "Input max of Clip must be known";
         return false;
       }
-      std::vector<uint8_t> unpacked_tensor;
+      std::vector<std::byte> unpacked_tensor;
       auto status = onnxruntime::utils::UnpackInitializerData(*initializers.at(max_name), unpacked_tensor);
       if (!status.IsOK()) {
         LOGS(logger, ERROR) << "Error while unpack max tensor: " << status.ErrorMessage();
diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index b1cb65a82b129..9b273c8b260c4 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -46,11 +46,11 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
     const ONNX_NAMESPACE::TensorProto* tensor_proto = i.second;
     const ONNX_NAMESPACE::TensorProto* from_external_tensor_proto = initializers_from_external[kInitName];
 
-    std::vector<uint8_t> tensor_proto_data;
+    std::vector<std::byte> tensor_proto_data;
     ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, Path(), tensor_proto_data));
     size_t tensor_proto_size = tensor_proto_data.size();
 
-    std::vector<uint8_t> from_external_tensor_proto_data;
+    std::vector<std::byte> from_external_tensor_proto_data;
     ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, Path(), from_external_tensor_proto_data));
     size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size();
 

From 1ffa284ac54fd605c0651954ea4fb2cab0464526 Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Fri, 13 Aug 2021 20:45:41 -0700
Subject: [PATCH 06/10] fix build break

---
 onnxruntime/core/framework/tensorprotoutils.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index b632235d2c70d..3185593eef883 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <cstddef>
 #include <vector>
 #include <type_traits>
 

From 66cee2dc051a873eac4113a1e565bb0057a7585c Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Mon, 16 Aug 2021 15:01:47 -0700
Subject: [PATCH 07/10] Revert "fix build break"

This reverts commit 1ffa284ac54fd605c0651954ea4fb2cab0464526.
---
 onnxruntime/core/framework/tensorprotoutils.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 3185593eef883..b632235d2c70d 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -3,7 +3,6 @@
 
 #pragma once
 
-#include <cstddef>
 #include <vector>
 #include <type_traits>
 

From 756ea090175ef9efa082486eff3d0bbd92f35d35 Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Mon, 16 Aug 2021 15:02:29 -0700
Subject: [PATCH 08/10] Revert "Change uint8_t -> std::byte"

This reverts commit 764a656ebac6610cdf1f25e63770330c3aedece6.
---
 .../core/framework/tensorprotoutils.cc        | 15 +++++++--------
 onnxruntime/core/framework/tensorprotoutils.h |  4 ++--
 onnxruntime/core/graph/graph.cc               |  7 +++----
 .../core/graph/graph_flatbuffers_utils.cc     |  4 ++--
 .../nnapi/nnapi_builtin/builders/helper.cc    |  6 +++---
 .../nnapi_builtin/builders/model_builder.cc   |  6 +++---
 .../nnapi_builtin/builders/op_builder.cc      | 19 +++++++++++--------
 .../core/providers/shared/utils/utils.cc      |  4 ++--
 .../save_model_with_external_initializers.cc  |  4 ++--
 9 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 4a02986e2cf1d..3352592fb519c 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -155,7 +155,7 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 // This function does not unpack string_data of an initializer tensor
 static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                         const ORTCHAR_T* tensor_proto_dir,
-                                        std::vector<std::byte>& unpacked_tensor) {
+                                        std::vector<uint8_t>& unpacked_tensor) {
   std::basic_string<ORTCHAR_T> external_file_path;
   onnxruntime::FileOffsetType file_offset;
   SafeInt<size_t> tensor_byte_size;
@@ -185,12 +185,11 @@ static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto
                                                size_t expected_num_elements, size_t element_size,
                                                /*out*/ unsigned char* p_data) {
   ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");
-  std::vector<std::byte> unpacked_tensor;
+  std::vector<uint8_t> unpacked_tensor;
   ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));
 
   // ReadLittleEndian checks src and dst buffers are the same size
-  auto src_span = gsl::make_span(reinterpret_cast<const unsigned char*>(unpacked_tensor.data()),
-                                 unpacked_tensor.size());
+  auto src_span = gsl::make_span(unpacked_tensor.data(), unpacked_tensor.size());
   auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size);
 
   return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span);
@@ -949,7 +948,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
 
   if (type != TensorProto_DataType_STRING) {
     // need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
-    std::vector<std::byte> sparse_data_storage;
+    std::vector<uint8_t> sparse_data_storage;
     ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
     void* sparse_data = sparse_data_storage.data();
     size_t element_size = 0;
@@ -1103,7 +1102,7 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
     n_dense_elements *= dim;
   }
 
-  std::vector<std::byte> dense_raw_data;
+  std::vector<uint8_t> dense_raw_data;
   ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data));
   size_t element_size = 0;
   // We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
@@ -1178,7 +1177,7 @@ template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::T
 
 Status UnpackInitializerData(const onnx::TensorProto& initializer,
                              const Path& model_path,
-                             std::vector<std::byte>& unpacked_tensor) {
+                             std::vector<uint8_t>& unpacked_tensor) {
   if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
     ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
         initializer,
@@ -1210,7 +1209,7 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
 #undef CASE_UNPACK
 
 Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
-                             std::vector<std::byte>& unpacked_tensor) {
+                             std::vector<uint8_t>& unpacked_tensor) {
   ORT_RETURN_IF(initializer.data_location() == TensorProto_DataLocation_EXTERNAL,
                 "The given initializer contains external data");
   return UnpackInitializerData(initializer, Path(), unpacked_tensor);
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index b632235d2c70d..659222449b59e 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -310,7 +310,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
                                      const Path& model_path,
-                                     std::vector<std::byte>& unpacked_tensor);
+                                     std::vector<uint8_t>& unpacked_tensor);
 
 /**
  * Unpack the data from an internal initializer tensor, will return error when the given initializer
@@ -321,6 +321,6 @@ common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initiali
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
-                                     std::vector<std::byte>& unpacked_tensor);
+                                     std::vector<uint8_t>& unpacked_tensor);
 }  // namespace utils
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index f996478626fcc..2993a59e01567 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3141,15 +3141,14 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
       // Dense tensors larger than the threshold are added to the external file.
       TensorProto* output_proto = result.add_initializer();
 
-      std::vector<std::byte> unpacked_tensor;
-      ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), unpacked_tensor));
-      size_t tensor_bytes_size = unpacked_tensor.size();
+      std::vector<uint8_t> raw_data;
+      ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, Path(), raw_data));
+      size_t tensor_bytes_size = raw_data.size();
       if (tensor_bytes_size < initializer_size_threshold) {
         *output_proto = initializer;
         continue;
       }
 
-      const auto* raw_data = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
       for (size_t index = 0; index != tensor_bytes_size; ++index) {
         external_stream << raw_data[index];
       }
diff --git a/onnxruntime/core/graph/graph_flatbuffers_utils.cc b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
index 00b21f6ca4fe5..6812582fa5a9d 100644
--- a/onnxruntime/core/graph/graph_flatbuffers_utils.cc
+++ b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
@@ -44,10 +44,10 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
     std::copy(initializer.string_data().cbegin(), initializer.string_data().cend(), string_data_vec.begin());
     string_data = builder.CreateVectorOfStrings(string_data_vec);
   } else {
-    std::vector<std::byte> unpacked_tensor;
+    std::vector<uint8_t> unpacked_tensor;
     ORT_RETURN_IF_ERROR(
         onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));
-    raw_data = builder.CreateVector(reinterpret_cast<const uint8_t*>(unpacked_tensor.data()), unpacked_tensor.size());
+    raw_data = builder.CreateVector(unpacked_tensor.data(), unpacked_tensor.size());
   }
 
   fbs::TensorBuilder tb(builder);
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index 4f38ca6e9ef98..9c34ef78d8b68 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -281,7 +281,7 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
         return false;
       }
 
-      std::vector<std::byte> unpacked_tensor;
+      std::vector<uint8_t> unpacked_tensor;
       auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node.ModelPath(), unpacked_tensor);
       if (!status.IsOK()) {
         LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
@@ -311,12 +311,12 @@ float GetQuantizationScale(const InitializedTensorSet& initializers, const Node&
 
 common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
                                         const Node& node, size_t idx, int32_t& zero_point) {
-  std::vector<std::byte> unpacked_tensor;
+  std::vector<uint8_t> unpacked_tensor;
   const auto& zero_point_tensor = *initializers.at(node.InputDefs()[idx]->Name());
   ORT_RETURN_IF_ERROR(
       onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(), unpacked_tensor));
   // Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI
-  zero_point = static_cast<int32_t>(reinterpret_cast<const uint8_t*>(unpacked_tensor.data())[0]);
+  zero_point = static_cast<int32_t>(unpacked_tensor[0]);
   return Status::OK();
 }
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
index cee896879a154..11fb49411f9c7 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -281,18 +281,18 @@ Status ModelBuilder::RegisterInitializers() {
     std::tie(index, size, padded_size) = initializers[i++];
     const uint8_t* src = nullptr;
     // uint8_t data need unpack, need a holder for free memory after copy
-    std::vector<std::byte> unpacked_tensor;
+    std::vector<uint8_t> unpacked_tensor;
     switch (tensor.data_type()) {
       case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
         src = reinterpret_cast<const uint8_t*>(GetTensorFloatData(tensor));
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
         ORT_RETURN_IF_ERROR(
-            onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
+            onnxruntime::utils::UnpackInitializerData(tensor, graph_viewer_.ModelPath(), unpacked_tensor));
         ORT_RETURN_IF_NOT(size == unpacked_tensor.size(),
                           "initializer tensor: ", tensor.name(), "'s size: ", unpacked_tensor.size(),
                           " should match the calculated size: ", size);
-        src = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
+        src = unpacked_tensor.data();
         break;
         // default:
         // We should not get anything else here since we already checked in the 1st pass
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
index 9d78e536854f3..4424db590e263 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@@ -297,7 +297,7 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
 
   // TODO support other data types
   const uint8_t* src = nullptr;
-  std::vector<std::byte> unpacked_tensor;
+  std::vector<uint8_t> unpacked_tensor;
 
   switch (tensor.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
@@ -306,8 +306,9 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
     case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
       ORT_RETURN_IF_ERROR(
-          onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
-      src = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
+          onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
+                                                    unpacked_tensor));
+      src = unpacked_tensor.data();
       break;
     }
     default:
@@ -387,7 +388,7 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder,
 
   // TODO support other data types
   const uint8_t* src = nullptr;
-  std::vector<std::byte> unpacked_tensor;
+  std::vector<uint8_t> unpacked_tensor;
   switch (tensor.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
       src = reinterpret_cast<const uint8_t*>(GetTensorFloatData(tensor));
@@ -395,8 +396,9 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder,
     case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
       ORT_RETURN_IF_ERROR(
-          onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
-      src = reinterpret_cast<const uint8_t*>(unpacked_tensor.data());
+          onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
+                                                    unpacked_tensor));
+      src = unpacked_tensor.data();
       break;
     }
     default:
@@ -2611,9 +2613,10 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
       const auto& initializers(model_builder.GetInitializerTensors());
 
       const auto& tensor = *initializers.at(input_name);
-      std::vector<std::byte> unpacked_tensor;
+      std::vector<uint8_t> unpacked_tensor;
       ORT_RETURN_IF_ERROR(
-          onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor));
+          onnxruntime::utils::UnpackInitializerData(tensor, model_builder.GetGraphViewer().ModelPath(),
+                                                    unpacked_tensor));
       size_t tensor_byte_size = unpacked_tensor.size();
       const auto data_type = tensor.data_type();
       if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64) {
diff --git a/onnxruntime/core/providers/shared/utils/utils.cc b/onnxruntime/core/providers/shared/utils/utils.cc
index 6c9c2742b694a..81c053bb4b545 100644
--- a/onnxruntime/core/providers/shared/utils/utils.cc
+++ b/onnxruntime/core/providers/shared/utils/utils.cc
@@ -66,7 +66,7 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node,
         LOGS(logger, VERBOSE) << "Input min of Clip must be known";
         return false;
       }
-      std::vector<std::byte> unpacked_tensor;
+      std::vector<uint8_t> unpacked_tensor;
       auto status = onnxruntime::utils::UnpackInitializerData(*initializers.at(min_name), unpacked_tensor);
       if (!status.IsOK()) {
         LOGS(logger, ERROR) << "Error while unpack min tensor: " << status.ErrorMessage();
@@ -81,7 +81,7 @@ bool GetClipMinMax(const InitializedTensorSet& initializers, const Node& node,
         LOGS(logger, VERBOSE) << "Input max of Clip must be known";
         return false;
       }
-      std::vector<std::byte> unpacked_tensor;
+      std::vector<uint8_t> unpacked_tensor;
       auto status = onnxruntime::utils::UnpackInitializerData(*initializers.at(max_name), unpacked_tensor);
       if (!status.IsOK()) {
         LOGS(logger, ERROR) << "Error while unpack max tensor: " << status.ErrorMessage();
diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index 9b273c8b260c4..b1cb65a82b129 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -46,11 +46,11 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
     const ONNX_NAMESPACE::TensorProto* tensor_proto = i.second;
     const ONNX_NAMESPACE::TensorProto* from_external_tensor_proto = initializers_from_external[kInitName];
 
-    std::vector<std::byte> tensor_proto_data;
+    std::vector<uint8_t> tensor_proto_data;
     ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, Path(), tensor_proto_data));
     size_t tensor_proto_size = tensor_proto_data.size();
 
-    std::vector<std::byte> from_external_tensor_proto_data;
+    std::vector<uint8_t> from_external_tensor_proto_data;
     ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, Path(), from_external_tensor_proto_data));
     size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size();
 

From 2b0769dd42c273857796ad85690344b49f5d6973 Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Mon, 16 Aug 2021 15:08:54 -0700
Subject: [PATCH 09/10] Add todo notes for extra vector alignment

---
 onnxruntime/core/framework/tensorprotoutils.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 3352592fb519c..5534f5f34760d 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -1178,6 +1178,9 @@ template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::T
 Status UnpackInitializerData(const onnx::TensorProto& initializer,
                              const Path& model_path,
                              std::vector<uint8_t>& unpacked_tensor) {
+  // TODO, if std::vector does not use a custom allocator, the default std::allocator will
+  // allocation the memory aligned to std::max_align_t, need look into allocating
+  // forced aligned memory (align as 16 or larger)for unpacked_tensor
   if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
     ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
         initializer,

From 210cd1ddd38c0f8f3380bc1604b5b8615f4cb51f Mon Sep 17 00:00:00 2001
From: Guoyu Wang <wanggy@outlook.com>
Date: Mon, 16 Aug 2021 22:47:16 -0700
Subject: [PATCH 10/10] add check result size

---
 .../core/providers/nnapi/nnapi_builtin/builders/helper.cc    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index 9c34ef78d8b68..1111be32bcb19 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -312,9 +312,12 @@ float GetQuantizationScale(const InitializedTensorSet& initializers, const Node&
 common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
                                         const Node& node, size_t idx, int32_t& zero_point) {
   std::vector<uint8_t> unpacked_tensor;
-  const auto& zero_point_tensor = *initializers.at(node.InputDefs()[idx]->Name());
+  const auto& name = node.InputDefs()[idx]->Name();
+  const auto& zero_point_tensor = *initializers.at(name);
   ORT_RETURN_IF_ERROR(
       onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(), unpacked_tensor));
+
+  ORT_RETURN_IF(unpacked_tensor.empty(), "The initializer [", name, "] is empty");
   // Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI
   zero_point = static_cast<int32_t>(unpacked_tensor[0]);
   return Status::OK();