diff --git a/src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp b/src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp index 14c6fa2d657c88..4873c18211631a 100644 --- a/src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp +++ b/src/inference/dev_api/openvino/runtime/iasync_infer_request.hpp @@ -154,6 +154,10 @@ class OPENVINO_RUNTIME_API IAsyncInferRequest : public IInferRequest { */ const std::vector>& get_outputs() const override; + void setSecondTaskExecutor(std::shared_ptr task_executor) { + m_second_request_executor = task_executor; + } + protected: using Stage = std::pair, ov::threading::Task>; /** @@ -271,6 +275,7 @@ class OPENVINO_RUNTIME_API IAsyncInferRequest : public IInferRequest { std::shared_ptr m_sync_request; std::shared_ptr m_request_executor; //!< Used to run inference CPU tasks. + std::shared_ptr m_second_request_executor; //!< Used to run inference CPU tasks. std::shared_ptr m_callback_executor; //!< Used to run post inference callback in asynchronous pipline std::shared_ptr diff --git a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp index 01f7b556da909f..be25b853cc5281 100644 --- a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp +++ b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp @@ -149,6 +149,7 @@ class OPENVINO_RUNTIME_API ICompiledModel : public std::enable_shared_from_this< ov::SoPtr m_context; std::shared_ptr m_task_executor = nullptr; //!< Holds a task executor + std::shared_ptr m_second_task_executor = nullptr; std::shared_ptr m_callback_executor = nullptr; //!< Holds a callback executor friend ov::CoreImpl; @@ -182,8 +183,10 @@ class OPENVINO_RUNTIME_API ICompiledModel : public std::enable_shared_from_this< */ const std::shared_ptr& get_plugin() const; const std::shared_ptr get_task_executor() const; + const std::shared_ptr get_second_task_executor() const; const std::shared_ptr get_callback_executor() const; void set_task_executor(const std::shared_ptr task_executor); + void set_second_task_executor(const std::shared_ptr task_executor); void set_callback_executor(const std::shared_ptr callback_executor); static void set_model_shared_object(ov::Model& model, const std::shared_ptr& shared_object); diff --git a/src/inference/src/dev/iasync_infer_request.cpp b/src/inference/src/dev/iasync_infer_request.cpp index 9e914a7c38b80b..80137eee109f1f 100644 --- a/src/inference/src/dev/iasync_infer_request.cpp +++ b/src/inference/src/dev/iasync_infer_request.cpp @@ -107,6 +107,18 @@ void ov::IAsyncInferRequest::infer_thread_unsafe() { } void ov::IAsyncInferRequest::start_async_thread_unsafe() { + // test code + static int g_num = 0; + if (g_num < 100) { + m_pipeline = {{m_second_request_executor, [this] { + m_sync_request->infer(); + }}}; + } else { + m_pipeline = {{m_request_executor, [this] { + m_sync_request->infer(); + }}}; + } + g_num++; run_first_stage(m_pipeline.begin(), m_pipeline.end(), m_callback_executor); } diff --git a/src/inference/src/dev/icompiled_model.cpp b/src/inference/src/dev/icompiled_model.cpp index b1cbedac1632ab..bba5d5fc0a4c06 100644 --- a/src/inference/src/dev/icompiled_model.cpp +++ b/src/inference/src/dev/icompiled_model.cpp @@ -126,6 +126,9 @@ const std::shared_ptr& ov::ICompiledModel::get_plugin() const const std::shared_ptr ov::ICompiledModel::get_task_executor() const { return m_task_executor; } +const std::shared_ptr ov::ICompiledModel::get_second_task_executor() const { + return m_second_task_executor; +} const std::shared_ptr ov::ICompiledModel::get_callback_executor() const { return m_callback_executor; } @@ -133,6 +136,9 @@ const std::shared_ptr ov::ICompiledModel::get_call void ov::ICompiledModel::set_task_executor(const std::shared_ptr task_executor) { m_task_executor = task_executor; } +void ov::ICompiledModel::set_second_task_executor(const std::shared_ptr task_executor) { + m_second_task_executor = task_executor; +} void ov::ICompiledModel::set_callback_executor(const std::shared_ptr callback_executor) { m_callback_executor = callback_executor; diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index bbee5d937be5d5..47769cc6dcda1f 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -71,6 +71,10 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, true} : m_cfg.streamExecutorConfig; m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(executor_confg); + if (m_cfg.second_executor_config) { + m_second_task_executor = + m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(m_cfg.secondStreamExecutorConfig); + } } if (0 != m_cfg.streamExecutorConfig.get_streams()) { m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor( @@ -79,8 +83,12 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, m_callback_executor = m_task_executor; } - if (m_task_executor) + if (m_task_executor) { set_task_executor(m_task_executor); + if (m_cfg.second_executor_config) { + set_second_task_executor(m_second_task_executor); + } + } if (m_callback_executor) set_callback_executor(m_callback_executor); @@ -193,6 +201,9 @@ std::shared_ptr CompiledModel::create_infer_request() co std::make_shared(std::static_pointer_cast(internal_request), get_task_executor(), get_callback_executor()); + if (m_cfg.second_executor_config) { + async_infer_request->setSecondTaskExecutor(get_second_task_executor()); + } if (m_has_sub_compiled_models) { std::vector> requests; for (auto model : m_sub_compiled_models) { diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h index faedf1ae5a744c..a5e1357fc06dbc 100644 --- a/src/plugins/intel_cpu/src/compiled_model.h +++ b/src/plugins/intel_cpu/src/compiled_model.h @@ -58,6 +58,7 @@ class CompiledModel : public ov::ICompiledModel { const std::shared_ptr m_model; const std::shared_ptr m_plugin; std::shared_ptr m_task_executor = nullptr; //!< Holds a task executor + std::shared_ptr m_second_task_executor = nullptr; //!< Holds a task executor std::shared_ptr m_callback_executor = nullptr; //!< Holds a callback executor // Generic synchronization primitive on CompiledModel level. diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 5f4bb25ede350e..5430db1bd2c59a 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -65,6 +65,8 @@ struct Config { size_t rtCacheCapacity = 0ul; #endif ov::threading::IStreamsExecutor::Config streamExecutorConfig; + ov::threading::IStreamsExecutor::Config secondStreamExecutorConfig; + bool second_executor_config = false; int streams = 1; bool streamsChanged = false; int threads = 0; diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 0ed64d49ea68dd..4b6d9481e246e8 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -703,6 +703,16 @@ std::vector> generate_stream_info(const int streams, false, cpu_pinning, streams_info_table}; + if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 || config.streams == 1) { + config.second_executor_config = true; + std::cout << " secondStreamExecutorConfig: threads--- " << proc_type_table[0][ALL_PROC] << "\n"; + config.secondStreamExecutorConfig = IStreamsExecutor::Config{"CPUSecondStreamsExecutor", + config.streams, + proc_type_table[0][ALL_PROC], + ov::hint::SchedulingCoreType::ANY_CORE, + false, + false}; + } return proc_type_table; } diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 5c88772eeedabc..956816898e6eee 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -141,6 +141,7 @@ Plugin::~Plugin() { executor_manager()->clear("CPU"); executor_manager()->clear("CPUStreamsExecutor"); executor_manager()->clear("CPUMainStreamExecutor"); + executor_manager()->clear("CPUSecondStreamsExecutor"); executor_manager()->clear("CPUCallbackExecutor"); }