Merge branch 'master' into almilosz/move-py-files

openvinotoolkit · Dec 11, 2024 · 62e2ec5 · 62e2ec5
2 parents ed8b897 + 859958f
commit 62e2ec5
Show file tree

Hide file tree

Showing 12 changed files with 94 additions and 25 deletions.
diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml
@@ -5,11 +5,7 @@ on:
     workflows:
       - Android ARM64 with vcpkg
       - Android x64
-      - Documentation
-      - Cleanup PIP caches
-      - Code snippets
-      - Code Style
-      - Code coverage
+      - Cleanup caches
       - Coverity (Ubuntu 20.04, Python 3.11)
       - Debian 10 ARM
       - Fedora 29 (RHEL 8.4), Python 3.9
@@ -19,10 +15,12 @@ on:
       - Linux ARM64 (Ubuntu 20.04, Python 3.11)
       - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang)
       - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10)
+      - Linux (Ubuntu 22.04, Python 3.11, Intel DPC++ Compiler)
+      - Linux CPU Plugin Snippets with LIBXSMM (Ubuntu 20.04)
+      - Linux Sanitizers (Ubuntu 20.04, Python 3.9)
       - macOS (Python 3.11)
       - macOS ARM64 (Python 3.11)
-      - MO
-      - Python API Checks
+      - Manylinux 2014
       - Webassembly
       - Windows (VS 2019, Python 3.11, Release)
       - Windows (VS 2019, Python 3.11, Debug)

diff --git a/docs/RELEASE.MD b/docs/RELEASE.MD
@@ -0,0 +1,29 @@
+# OpenVINO Release Management
+The process described below reflects the approach to managing OpenVINO releases.
+
+## Release Milestones
+- Planning
+- Execution (development of new features)
+- Stabilization (Feature Freeze, Code Freeze milestones)
+- Validation
+- Distribution
+
+### Planning
+This phase takes 2-4 weeks and involves scoping the backlog, prioritizing it, analyzing, and making commitments by developers for timelines specified by the release manager.
+
+### Execution (development of new features)
+- [OpenVINO Contributing Guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md)
+- [Code Contribution Guide](https://docs.openvino.ai/2024/about-openvino/contributing/code-contribution-guide.html)
+- [OpenVINO First Good Issue](https://github.com/openvinotoolkit/openvino/issues/17502)
+
+### Stabilization (Feature Freeze, Code Freeze milestones)
+- **Feature Freeze**: This milestone ensures that no new features are added to the software after a certain point. This allows the development team to focus on stabilizing and refining the existing features, fixing bugs, and improving performance without the risk of introducing new issues.
+- **Code Freeze**: This milestone marks the point where no new code changes are allowed except for critical bug fixes. This helps in ensuring that the final product is stable and reliable, as it minimizes the risk of last-minute changes that could introduce new bugs or instability.
+
+### Release Validation
+- This is a continuous process executed on a regular basis with cadence based on testing type: nightly, bi-weekly, weekly.
+- After Code Freeze, the testing team can perform final regression testing to ensure that recent changes have not introduced new bugs and that the software meets the required quality standards.
+
+### Distribution
+- OpenVINO has different types of build distribution: Regular releases, Long-Term Support, Pre-release releases, Nightly builds. Read more here: [OpenVINO Release Policy](https://docs.openvino.ai/2024/about-openvino/release-notes-openvino/release-policy.html)
+- Different distribution channels are supported. Explore different options here: [OpenVINO Download](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html)
diff --git a/src/core/include/openvino/pass/pattern/matcher.hpp b/src/core/include/openvino/pass/pattern/matcher.hpp
@@ -62,20 +62,45 @@ class OPENVINO_API Matcher {
     // Avoid implicit string construction from nullptr.
     Matcher(const std::shared_ptr<Node> pattern_node, std::nullptr_t name) = delete;
 
-    Matcher() = default;
-    Matcher(Output<Node>& pattern_node) : m_pattern_node{pattern_node} {}
-
-    Matcher(Output<Node>& pattern_node, const std::string& name) : m_pattern_node(pattern_node), m_name{name} {}
+    Matcher()
+        : m_match_root{},
+          m_pattern_node{},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{""},
+          m_strict_mode{false} {}
+    Matcher(Output<Node>& pattern_node)
+        : m_match_root{},
+          m_pattern_node{pattern_node},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{""},
+          m_strict_mode{false} {}
+
+    Matcher(Output<Node>& pattern_node, const std::string& name)
+        : m_match_root{},
+          m_pattern_node{pattern_node},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{name},
+          m_strict_mode{false} {}
 
     /// \brief Constructs a Matcher object
     ///
     /// \param pattern_node is a pattern sub graph that will be matched against input graphs
     /// \param name is a string which is used for logging and disabling a matcher
     /// \param strict_mode forces a matcher to consider shapes and ET of nodes
     Matcher(const Output<Node>& pattern_node, const std::string& name, bool strict_mode)
-        : m_pattern_node(pattern_node),
-          m_name(name),
-          m_strict_mode(strict_mode) {}
+        : m_match_root{},
+          m_pattern_node{pattern_node},
+          m_pattern_map{},
+          m_pattern_value_maps{},
+          m_matched_list{},
+          m_name{name},
+          m_strict_mode{strict_mode} {}
 
     // Some matches should start on a node rather than an output. These three constructors
     // are transition until we work out the right way to do that.

diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp
@@ -15,7 +15,7 @@ namespace cldnn {
 struct resample : public primitive_base<resample> {
     CLDNN_DECLARE_PRIMITIVE(resample)
 
-    resample() : primitive_base("", {}) {}
+    resample() : primitive_base("", {}), scales_port(0) {}
 
     using InterpolateOp = ov::op::util::InterpolateBase;
 

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp
@@ -140,7 +140,7 @@ class ExecutionConfig {
 
     // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call
     // So this method should be called after setting all user properties, but before apply_user_properties() call.
-    void apply_rt_info(const ov::RTMap& rt_info);
+    void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info);
 
     std::string to_string() const;
 

diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp
@@ -171,6 +171,10 @@ void prepare_primitive_fusing::fuse_swiglu(program &p) {
     // Apply only for high performant GPU
     if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128)
         return;
+
+    if (p.get_engine().get_device_info().supports_immad)
+        return;
+
     // TODO: to support other glu types && other weight data types
     auto itr = p.get_processing_order().begin();
     std::map<primitive_id, std::vector<std::pair<primitive_id, size_t>>> fusing_history;

diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
@@ -121,7 +121,7 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
 
 private:
     int _zero_point_mask;
-    dnnl::memory::data_type _wzp_data_type;
+    dnnl::memory::data_type _wzp_data_type = dnnl::memory::data_type::undef;
 
 protected:
     std::unique_ptr<primitive_impl> clone() const override {

diff --git a/...intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/...intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp
@@ -846,9 +846,11 @@ void FullyConnected_bf_tiled::GetUpdateDispatchDataFunc(KernelData& kd) const {
                         // quantized input is char type
                         kd.internalBufferSizes.push_back(input_size);
                         // half type of de_quan_scale and activation sum for each quantized group
+                        OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero.");
                         kd.internalBufferSizes.push_back((input_size / quantize_grp_size) * 2 * 2);
                     }
 
+                    OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero.");
                     kd.kernels[0].params.workGroups.global = {std::max((input_size / quantize_grp_size), (size_t)1), 1, 1};
                     kd.kernels[0].params.workGroups.local = {16, 1, 1};
                 }
@@ -983,6 +985,7 @@ KernelsData FullyConnected_bf_tiled::GetMultiKernelsData(const Params &params,
     const auto& fc_params = static_cast<const fully_connected_params&>(params);
 
     size_t quantize_grp_size = get_dynamic_quantize_group_size(fc_params);
+    OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero.");
 
     bool bProperInput = fc_params.inputs[0].GetLayout() == dl;
     if (!bProperInput && !fc_params.inputs[0].PitchesDifferFromLogicalDims()) {

diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -190,7 +190,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config);
     if (model->has_rt_info("runtime_options"))
-        config.apply_rt_info(model->get_rt_info<ov::AnyMap>("runtime_options"));
+        config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info<ov::AnyMap>("runtime_options"));
     config.apply_user_properties(context->get_engine().get_device_info());
 
     set_cache_info(model, config);
@@ -281,7 +281,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     ExecutionConfig config = m_configs_map.at(device_id);
     config.set_user_property(orig_config);
     if (model->has_rt_info("runtime_options"))
-        config.apply_rt_info(model->get_rt_info<ov::AnyMap>("runtime_options"));
+        config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info<ov::AnyMap>("runtime_options"));
     config.apply_user_properties(ctx->get_engine().get_device_info());
 
     ProgramBuilder prog(ctx->get_engine(), config);

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp
@@ -63,7 +63,7 @@ KVCacheFusionMatcher::KVCacheFusionMatcher() {
             return false;
 
         // TODO: Support conversion internally
-        if (concat_node->get_output_element_type(0) != past_node->get_output_element_type(0))
+        if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0))
             return false;
 
         auto variable = past_node->get_variable();

diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -262,10 +262,12 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
     user_properties.clear();
 }
 
-void ExecutionConfig::apply_rt_info(const ov::RTMap& rt_info) {
-    apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
+void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) {
+    if (!info.supports_immad) {
+        apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
+        apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
+    }
     apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
-    apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
 }
 
 std::string ExecutionConfig::to_string() const {

diff --git a/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp
@@ -2,7 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include <algorithm>
 #include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/intel_gpu/properties.hpp"
 #include "base/ov_behavior_test_utils.hpp"
 #include "openvino/runtime/core.hpp"
 #include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"
@@ -43,11 +45,17 @@ TEST_F(TestPropertiesGPU, RTInfoPropertiesWithDefault) {
     model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name());
 
     OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU));
-    OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision));
     OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size));
+    ASSERT_EQ(size.as<uint64_t>(), 0);
+
+    // GPU with systolic does not support some of rt_info
+    auto capabilities = core.get_property(ov::test::utils::DEVICE_GPU, ov::device::capabilities);
+    if (find(capabilities.cbegin(), capabilities.cend(), ov::intel_gpu::capability::HW_MATMUL) != capabilities.cend())
+        return;
+
+    OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision));
     OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor));
     ASSERT_EQ(type.as<ov::element::Type>(), ov::element::f16);
-    ASSERT_EQ(size.as<uint64_t>(), 0);
     ASSERT_EQ(scale.as<float>(), 8.0f);
 }