Skip to content

Commit

Permalink
Merge branch 'ep_context_with_external_ini' of https://github.com/mic…
Browse files Browse the repository at this point in the history
…rosoft/onnxruntime into ep_context_with_external_ini
  • Loading branch information
HectorSVC committed Jan 27, 2025
2 parents 403ded3 + fc75131 commit 8d1d650
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ static const char* const kOrtSessionOptionShareEpContexts = "ep.share_ep_context
static const char* const kOrtSessionOptionsEpContextModelExternalInitializersFileName =
"ep.context_model_external_initializers_file_name";

// For nodes fallback to CPU use this config to control the minimum size of the initializer
// For nodes fallback to CPU use this config to control the minimum size of the initializer
// when externalizing it during serialization for EP context model
static const char* const kOrtSessionOptionsEpContextModelExternalInitializersMinSizeInBytes =
"ep.context_model_external_initializers_min_size_in_bytes";
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/python/tools/symbolic_shape_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
"GemmFastGelu": self._infer_GemmFastGelu,
"GemmFloat8": self._infer_GemmFloat8,
"GroupNorm": self._infer_GroupNorm,
"GroupNormalization": self._infer_GroupNorm,
"GroupQueryAttention": self._infer_GroupQueryAttention,
"LayerNormalization": self._infer_LayerNormalization,
"LongformerAttention": self._infer_LongformerAttention,
Expand Down Expand Up @@ -474,6 +475,7 @@ def _onnx_infer_single_node(self, node):
"PythonOp",
"MultiHeadAttention",
"GroupNorm",
"GroupNormalization",
"GroupQueryAttention",
"SparseAttention",
"SkipGroupNorm",
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/test/providers/qnn/qnn_ep_context_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ static const std::string& GetNodeAttr(const Node& node, const std::string& attr_
static GetTestModelFn BuildGraphWithQAndNonQ(bool single_ep_node = true) {
return [single_ep_node](ModelTestBuilder& builder) {
// Creat non-quantized FusedMatMul node1
std::vector<float> data(200*200, 1.0f);
std::vector<float> data(200 * 200, 1.0f);
NodeArg* input1 = MakeTestInput(builder, TestInputDef<float>({200, 200}, false, data));
NodeArg* add1_ini_input2 = MakeTestInput(builder, TestInputDef<float>({200, 200}, true, data));

Expand Down Expand Up @@ -220,7 +220,7 @@ void EpCtxCpuNodeWithExternalIniFileTestBody(bool expect_external_ini_file) {
if (expect_external_ini_file) {
// Set the threshold to a small size so FusedMatMul node with weights float[200, 200] will dump to external data file
so.AddConfigEntry(kOrtSessionOptionsEpContextModelExternalInitializersMinSizeInBytes, "1024");
} // otherwise it will use default value 1024,000, so the initializer is in Onnx file, no external data file generated
} // otherwise it will use default value 1024,000, so the initializer is in Onnx file, no external data file generated

Ort::Session session(*ort_env, ToPathString(model_with_ext).c_str(), so);

Expand Down

0 comments on commit 8d1d650

Please sign in to comment.