Ort openvino 4.3 cli (#14341)

### Description Introduce cache_dir CLI for graph serialisation. Replace existing use_compile_network and blob_dump_path cli options for openvino with a single command line option "cache_dir" specifying the path that needs to be passed for blob dump/load improving the developer experience. ### Motivation and Context? We were having two values to set cache dir which was unnecessary Co-authored-by: Preetha <[email protected]>
microsoft · Jan 23, 2023 · 77b455b · 77b455b
1 parent c252a7f
commit 77b455b
Show file tree

Hide file tree

Showing 12 changed files with 46 additions and 118 deletions.
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -554,7 +554,9 @@ typedef struct OrtMIGraphXProviderOptions {
  */
 typedef struct OrtOpenVINOProviderOptions {
 #ifdef __cplusplus
-  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{}, context{}, enable_opencl_throttling{}, enable_dynamic_shapes{} {}
+  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{},
+                                 num_of_threads{}, cache_dir{},
+                                 context{}, enable_opencl_throttling{}, enable_dynamic_shapes{} {}
 #endif
   /** \brief Device type string
    *
@@ -564,8 +566,7 @@ typedef struct OrtOpenVINOProviderOptions {
   unsigned char enable_vpu_fast_compile;  ///< 0 = disabled, nonzero = enabled
   const char* device_id;
   size_t num_of_threads;               ///< 0 = Use default number of threads
-  unsigned char use_compiled_network;  ///< 0 = disabled, nonzero = enabled
-  const char* blob_dump_path;          // path is set to empty by default
+  const char* cache_dir;          // path is set to empty by default
   void* context;
   unsigned char enable_opencl_throttling;  ///< 0 = disabled, nonzero = enabled
   unsigned char enable_dynamic_shapes;     ///< 0 = disabled, nonzero = enabled

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -15,9 +15,6 @@
 
 #if defined (OV_API_20)
 using Exception = ov::Exception;
-#elif defined (OPENVINO_2021_4)
-using Exception = InferenceEngine::Exception;
-using WaitMode = InferenceEngine::InferRequest::WaitMode;
 #else
 using Exception = InferenceEngine::details::InferenceEngineException;
 using WaitMode = InferenceEngine::IInferRequest::WaitMode;
@@ -45,45 +42,14 @@ bool IsCILogEnabled() {
   return false;
 }
 
-bool UseCompiledNetwork() {
-  const std::string env_name = onnxruntime::GetEnvironmentVar("OV_USE_COMPILED_NETWORK");
-  if (!env_name.empty()) {
-    return true;
-  }
-  return false;
-}
-
-std::string GetCurrentWorkingDir() {
-  std::string curr_dir;
-  ORT_UNUSED_PARAMETER(curr_dir);
-  char buff[FILENAME_MAX];
-  curr_dir = GetCurrentDir(buff, FILENAME_MAX);
-  std::string current_working_dir(buff);
-  return current_working_dir;
-}
-
-bool IsDirExists(const std::string& pathname) {
-  struct stat info;
-  if(stat(pathname.c_str(), &info) != 0) {
-    LOGS_DEFAULT(INFO) << log_tag << "cannot access pathname: " << pathname;
-	  return false;
-  } else if(info.st_mode & S_IFDIR) {
-      LOGS_DEFAULT(INFO) << log_tag << "pathname exists: " << pathname;
-	    return true;
-  } else {
-      LOGS_DEFAULT(INFO) << log_tag << "pathname: " << pathname << ": doesn't contain the directory 'ov_compiled_blobs' ";
-  }
-  return false;
-}
-
 struct static_cast_int64 {
   template <typename T1>  // T1 models type statically convertible to T
   int64_t operator()(const T1& x) const { return static_cast<int64_t>(x); }
 };
 
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
-              const SubGraphContext& subgraph_context, 
+              const SubGraphContext& subgraph_context,
               std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
   if(IsCILogEnabled()) {
     std::cout << "CreateNgraphFunc" << std::endl;

diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -30,14 +30,6 @@ bool IsDebugEnabled();
 // Internal diagnostic function.
 bool IsCILogEnabled();
 
-bool UseCompiledNetwork();
-
-// std::string GetCurrentWorkingDir();
-
-// bool IsDirExists(const std::string& pathname);
-
-// void CreateDirectory(const std::string& ov_compiled_blobs_dir);
-
 int GetFirstAvailableDevice(GlobalContext& global_context);
 
 void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor);
@@ -68,7 +60,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
                     size_t batch_slice_idx);
 
 std::shared_ptr<OVNetwork>
-CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, 
+CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
               std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -134,22 +134,16 @@ void BasicBackend::PopulateConfigValue(OVConfig& config, ov::AnyMap& device_conf
 }
 
 void BasicBackend::EnableCaching() {
-  if (global_context_.use_compiled_network == true && global_context_.is_wholly_supported_graph) {
+  if (!global_context_.cache_dir.empty() && global_context_.is_wholly_supported_graph) {
     #if defined (OPENVINO_2022_3)
       #if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
       _putenv_s("OV_GPU_CACHE_MODEL", "1");
       #else
       setenv("OV_GPU_CACHE_MODEL", "1", 1);
       #endif
     #endif
-    std::string cache_dir_path;
-    if (global_context_.blob_dump_path.empty()) {
-      cache_dir_path = "ov_compiled_blobs";
-    } else {
-      cache_dir_path = global_context_.blob_dump_path;
-    }
     LOGS_DEFAULT(INFO) << log_tag << "Enables Caching";
-    global_context_.ie_core.SetCache(cache_dir_path);
+    global_context_.ie_core.SetCache(global_context_.cache_dir);
   }
 }
 

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -13,14 +13,13 @@ struct GlobalContext {
   OVCore ie_core;
   bool is_wholly_supported_graph = false;
   bool enable_vpu_fast_compile = false;
-  bool use_compiled_network = false;
   bool enable_opencl_throttling = false;
   bool enable_dynamic_shapes = false;
   size_t num_of_threads;
   std::string device_type;
   std::string precision_str;
   std::string device_id;
-  std::string blob_dump_path;
+  std::string cache_dir;
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
   std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
   std::string onnx_model_name;

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -13,14 +13,13 @@ namespace onnxruntime {
 
 OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProviderInfo& info)
     : IExecutionProvider{onnxruntime::kOpenVINOExecutionProvider} {
-  
+
   InitProviderOrtApi();
-  
+
   openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
   openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
   openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
-  openvino_ep::BackendManager::GetGlobalContext().use_compiled_network = info.use_compiled_network_;
-  openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_;
+  openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
   openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
   openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
   openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
@@ -33,7 +32,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   }
   //to check if target device is available
   //using ie_core capability GetAvailableDevices to fetch list of devices plugged in
-  if (info.use_compiled_network_ == false) {
+  if (info.cache_dir_.empty()) {
     bool device_found = false;
     bool device_id_found = false;
     auto available_devices = openvino_ep::BackendManager::GetGlobalContext().ie_core.GetAvailableDevices();

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -56,19 +56,18 @@ struct OpenVINOExecutionProviderInfo {
   bool enable_vpu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
-  bool use_compiled_network_;
-  std::string blob_dump_path_;
+  std::string cache_dir_;
   void* context_;
   bool enable_opencl_throttling_;
   bool enable_dynamic_shapes_;
 
   explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
-                                         size_t num_of_threads, bool use_compiled_network,
-                                         std::string blob_dump_path, void* context, bool enable_opencl_throttling,
-                                          bool enable_dynamic_shapes)
+                                         size_t num_of_threads, std::string cache_dir,
+                                         void* context, bool enable_opencl_throttling,
+                                         bool enable_dynamic_shapes)
       : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads),
-       use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path), context_(context),
-       enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+        cache_dir_(cache_dir), context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
@@ -158,7 +157,7 @@ struct OpenVINOExecutionProviderInfo {
                        << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
   }
   OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0, false, "", NULL, false, false);
+    OpenVINOExecutionProviderInfo("", false, "", 0, "", NULL, false, false);
   }
 };
 

diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -10,14 +10,14 @@ namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
   OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
                           const char* device_id, size_t num_of_threads,
-                          bool use_compiled_network, const char* blob_dump_path, void* context,
+                          const char* cache_dir, void* context,
                           bool enable_opencl_throttling, bool enable_dynamic_shapes)
       : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads),
-        use_compiled_network_(use_compiled_network), context_(context),
-        enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+        context_(context), enable_opencl_throttling_(enable_opencl_throttling),
+        enable_dynamic_shapes_(enable_dynamic_shapes) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
     device_id_ = (device_id == nullptr) ? "" : device_id;
-    blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path;
+    cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
   }
   ~OpenVINOProviderFactory() override {
   }
@@ -29,26 +29,25 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   bool enable_vpu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
-  bool use_compiled_network_;
-  std::string blob_dump_path_;
+  std::string cache_dir_;
   void* context_;
   bool enable_opencl_throttling_;
   bool enable_dynamic_shapes_;
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
   OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
-                                     use_compiled_network_, blob_dump_path_, context_, enable_opencl_throttling_,
+                                     cache_dir_, context_, enable_opencl_throttling_,
                                      enable_dynamic_shapes_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
     const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads,
-    bool use_compiled_network, const char* blob_dump_path, void * context, bool enable_opencl_throttling,
+    const char* cache_dir, void * context, bool enable_opencl_throttling,
     bool enable_dynamic_shapes) {
   return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile,
-  device_id, num_of_threads, use_compiled_network, blob_dump_path, context, enable_opencl_throttling,
+  device_id, num_of_threads, cache_dir, context, enable_opencl_throttling,
   enable_dynamic_shapes);
 }
 
@@ -69,7 +68,7 @@ struct OpenVINO_Provider : Provider {
     auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
     return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile,
                                                      params.device_id, params.num_of_threads,
-                                                     params.use_compiled_network, params.blob_dump_path,
+                                                     params.cache_dir,
                                                      params.context, params.enable_opencl_throttling,
                                                      params.enable_dynamic_shapes);
   }

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -565,7 +565,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
 #ifdef USE_OPENVINO
     OrtOpenVINOProviderOptions params;
     params.device_type = openvino_device_type.c_str();
-    std::string blob_dump_path;
+    std::string cache_dir;
 
     auto it = provider_options_map.find(type);
     if (it != provider_options_map.end()) {
@@ -582,16 +582,7 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
             ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
           }
 
-        } else if (option.first == "use_compiled_network") {
-          if (option.second == "True") {
-            params.use_compiled_network = true;
-          } else if (option.second == "False") {
-            params.use_compiled_network = false;
-          } else {
-            ORT_THROW("Invalid value passed for use_compiled_network: ", option.second);
-          }
-
-        } else if (option.first == "enable_opencl_throttling") {
+        }  else if (option.first == "enable_opencl_throttling") {
           if (option.second == "True") {
             params.enable_opencl_throttling = true;
           } else if (option.second == "False") {
@@ -611,9 +602,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
           params.device_id = option.second.c_str();
         } else if (option.first == "num_of_threads") {
           params.num_of_threads = std::stoi(option.second);
-        } else if (option.first == "blob_dump_path") {
-          blob_dump_path = option.second;
-          params.blob_dump_path = blob_dump_path.c_str();
+        } else if (option.first == "cache_dir") {
+          cache_dir = option.second;
+          params.cache_dir = cache_dir.c_str();
         } else if (option.first == "context") {
           params.context = (void*)(option.second.c_str());
         } else {
@@ -1333,7 +1324,7 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc")
             ORT_THROW("External initializers are not supported in this build.");
 #endif
       });
-      
+
   py::class_<RunOptions>(m, "RunOptions", R"pbdoc(Configuration information for a single Run.)pbdoc")
       .def(py::init())
       .def_readwrite("log_severity_level", &RunOptions::run_log_severity_level,

diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
@@ -61,11 +61,10 @@ namespace perftest {
       "\t    [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
       "\t    [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
       "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
-      "\t    [OpenVINO only] [use_compiled_network]: Can be enabled to directly import pre-compiled blobs(VPU) or cl_cache files(iGPU) if exists else dump one. This feature is supported on MyriadX(VPU) hardware device target. Starting from OpenVINO 2021.4 version, this feature also works with iGPU with cl_cache.\n"
-      "\t    [OpenVINO only] [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs or cl_cache files for the use_compiled_network(Model caching) feature. This overrides the default path.\n"
+      "\t    [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
       "\t    [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
       "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
-      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true use_compiled_network|true blob_dump_path|\"<path>\"\"\n"
+      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
       "\t    [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n"
       "\t    [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n"
       "\t    [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"