Add transformation pipeline to PrePostProcessing (openvinotoolkit#28852)

### Details: After switching from ModelOptimizer to OVC, the order of applying PrePostProcessing and MOCTransformations has changed: MO path : [fw model conversion -> PrePostProcessing -> MOC] -> nncf OVC path: [fw model conversion -> MOC] -> PrePostProcessing -> nncf Since nncf is applied with a not fully optimized model, extra FQ ops might appear, which can affect both accuracy and performance. e.g. Mul -> Conv fusion is not applied due to extra FQ <img width="165" alt="{C6E93F2C-2CE3-4596-8D7F-ED7BD8013603}" src="https://github.com/user-attachments/assets/3cbe6e07-9c07-4002-8b4c-9fb5bc662421" /> PrePostProcessing is not part of OVC, so we have to insert additional Transformation calls inside PrePostProcessing. ### Tickets: - *CVS-160786* - CVS-161724 --------- Co-authored-by: Andrei Kochin <[email protected]> Co-authored-by: Andrii Staikov <[email protected]>
bbielawx · Feb 20, 2025 · 5d00273 · 5d00273
1 parent 74126de
commit 5d00273
Show file tree

Hide file tree

Showing 8 changed files with 270 additions and 86 deletions.
diff --git a/src/bindings/python/tests/test_graph/test_preprocess.py b/src/bindings/python/tests/test_graph/test_preprocess.py
@@ -72,7 +72,8 @@ def test_graph_preprocess_scale_vector():
     assert list(model.get_output_shape(0)) == [2, 2]
     assert model.get_output_element_type(0) == Type.f32
     assert "Constant" in model_operators
-    assert "Divide" in model_operators
+    # Div will be converted to Mul in the transformations
+    assert "Multiply" in model_operators
 
 
 def test_graph_preprocess_mean_scale_convert():
@@ -95,12 +96,13 @@ def custom_preprocess(output: Output):
     model = ppp.build()
 
     model_operators = [op.get_name().split("_")[0] for op in model.get_ops()]
+    # Div will be converted to Mul in the transformations
     expected_ops = [
         "Parameter",
         "Convert",
         "Constant",
         "Subtract",
-        "Divide",
+        "Multiply",
         "Result",
         "Abs",
     ]
@@ -137,12 +139,13 @@ def custom_preprocess(output: Output):
     model = ppp.build()
 
     model_operators = [op.get_name().split("_")[0] for op in model.get_ops()]
+    # Div will be converted to Mul in the transformations
     expected_ops = [
         "Parameter",
         "Convert",
         "Constant",
         "Subtract",
-        "Divide",
+        "Multiply",
         "Result",
         "Abs",
     ]
@@ -404,7 +407,7 @@ def test_graph_preprocess_steps(algorithm, color_format1, color_format2, is_fail
             "Gather",
             "Interpolate",
         ]
-        assert len(model_operators) == 15
+        assert len(model_operators) == 12
         assert model.get_output_size() == 1
         assert list(model.get_output_shape(0)) == [1, 3, 3, 3]
         assert model.get_output_element_type(0) == Type.f32
@@ -456,10 +459,9 @@ def test_graph_preprocess_postprocess_layout():
         "Constant",
         "Result",
         "Gather",
-        "Range",
         "Transpose",
     ]
-    assert len(model_operators) == 14
+    assert len(model_operators) == 11
     assert model.get_output_size() == 1
     assert list(model.get_output_shape(0)) == [1, 1, 3, 3]
     assert model.get_output_element_type(0) == Type.f32
@@ -486,9 +488,8 @@ def test_graph_preprocess_reverse_channels():
         "Constant",
         "Result",
         "Gather",
-        "Range",
     ]
-    assert len(model_operators) == 10
+    assert len(model_operators) == 7
     assert model.get_output_size() == 1
     assert list(model.get_output_shape(0)) == [1, 2, 2, 2]
     assert model.get_output_element_type(0) == Type.f32
@@ -628,6 +629,7 @@ def custom_preprocess(output: Output):
     model = ppp.build()
 
     model_operators = [op.get_name().split("_")[0] for op in model.get_ops()]
+    # Div will be converted to Mul in the transformations
     expected_ops = [
         "Parameter",
         "Constant",
@@ -636,7 +638,7 @@ def custom_preprocess(output: Output):
         "Convert",
         "Abs",
         "Add",
-        "Divide",
+        "Multiply",
     ]
     assert len(model_operators) == 13
     assert model.get_output_size() == 1

diff --git a/src/common/transformations/include/transformations/rt_info/dequantization_node.hpp b/src/common/transformations/include/transformations/rt_info/dequantization_node.hpp
@@ -14,6 +14,8 @@ TRANSFORMATIONS_API void mark_as_dequantization_node(const std::shared_ptr<Node>
 
 TRANSFORMATIONS_API bool is_dequantization_node(const std::shared_ptr<const Node>& node);
 
+TRANSFORMATIONS_API void unmark_dequantization_node(const std::shared_ptr<Node>& node);
+
 /**
  * @ingroup ov_runtime_attr_api
  * @brief DequantizationNode class represents runtime info attribute that marks operation

diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp
@@ -131,8 +131,15 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
     using namespace ov::pass;
     REGISTER_PASS(manager, InitNodeInfo)
     if (m_low_precision_enabled) {
-        manager.register_pass<ov::pass::MarkDequantization>(
-            element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4});
+        manager.register_pass<ov::pass::MarkDequantization>(element::TypeVector{ov::element::i8,
+                                                                                ov::element::u8,
+                                                                                ov::element::i4,
+                                                                                ov::element::u4,
+                                                                                ov::element::nf4,
+                                                                                ov::element::f4e2m1,
+                                                                                ov::element::f8e4m3,
+                                                                                ov::element::f8e5m2,
+                                                                                ov::element::f8e8m0});
     }
     if (!m_use_shapes) {
         manager.register_pass<ov::pass::DisableShapeOfConstantFolding>();

diff --git a/src/common/transformations/src/transformations/rt_info/dequantization_node.cpp b/src/common/transformations/src/transformations/rt_info/dequantization_node.cpp
@@ -9,6 +9,10 @@ void ov::mark_as_dequantization_node(const std::shared_ptr<Node>& node) {
     rt_info[DequantizationNode::get_type_info_static()] = DequantizationNode();
 }
 
+void ov::unmark_dequantization_node(const std::shared_ptr<Node>& node) {
+    node->get_rt_info().erase(DequantizationNode::get_type_info_static());
+}
+
 bool ov::is_dequantization_node(const std::shared_ptr<const Node>& node) {
     const auto& rt_info = node->get_rt_info();
     return rt_info.find(DequantizationNode::get_type_info_static()) != rt_info.end();