diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py index 3fe1ba465dfd1f..a1c6aecc45d421 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py @@ -77,7 +77,8 @@ def patched_forward_sym(self, *args, **kwargs): unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width) # all zp is 8 for symmetrical, will repack to i4 in pt fe transformation - unpacked_weights = unpacked_weights.to(dtype) * self.scales + unpacked_weights = (unpacked_weights.to(torch.int8) - torch.tensor(8, dtype=torch.int8)) + unpacked_weights = unpacked_weights.to(dtype) * self.scales unpacked_weights = unpacked_weights.view(-1, self.width) out = x @ unpacked_weights diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp index 675a293269002b..5130424d0c60ed 100644 --- a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -7,6 +7,7 @@ #include "openvino/core/rt_info.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -53,6 +54,7 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr(); auto transpose = pattern_to_output[m_transpose].get_node_shared_ptr(); auto reshape2 = pattern_to_output[m_reshape2].get_node_shared_ptr(); + auto pattern_root = reshape2; if (constant->get_element_type() != element::u4) return false; @@ -76,9 +78,26 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { auto get_number = get_u4; auto constant_dtype = element::u4; + NodeVector copy_from{std::move(constant), std::move(reshape1), std::move(transpose), reshape2}; if (is_symmetrical) { get_number = get_i4; constant_dtype = element::i4; + // find pattern Convert(W, i8) -> Subtract(8) + auto reshape_targets = reshape2->output(0).get_target_inputs(); + if (reshape_targets.size() != 1) + return false; + auto convert = reshape_targets.begin()->get_node()->shared_from_this(); + if (!std::dynamic_pointer_cast(convert)) + return false; + auto convert_targets = convert->output(0).get_target_inputs(); + if (convert_targets.size() != 1) + return false; + auto subtract = convert_targets.begin()->get_node()->shared_from_this(); + if (!std::dynamic_pointer_cast(subtract)) + return false; + pattern_root = subtract; + copy_from.push_back(std::move(convert)); + copy_from.push_back(subtract); } auto new_const = std::make_shared(constant_dtype, destination_shape); auto dst = const_cast( // const_cast? @@ -96,8 +115,8 @@ U4BlockRepack::U4BlockRepack(bool is_symmetrical) { } } - copy_runtime_info({std::move(constant), std::move(reshape1), std::move(transpose), reshape2}, new_const); - replace_node(reshape2, new_const); + copy_runtime_info(copy_from, new_const); + replace_node(pattern_root, new_const); return true; }); diff --git a/tests/model_hub_tests/pytorch/test_llm.py b/tests/model_hub_tests/pytorch/test_llm.py index 9acf8e2100c520..e444f93db9d7ec 100644 --- a/tests/model_hub_tests/pytorch/test_llm.py +++ b/tests/model_hub_tests/pytorch/test_llm.py @@ -128,7 +128,7 @@ def load_model(self, name, type): example["past_key_values"] = pkv example["attention_mask"] = torch.cat( [example["attention_mask"], am], -1) - if atype not in ["opt", "falcon", "mbart_gptq", "mpt"]: + if atype not in ["opt", "falcon", "mbart", "mpt"]: ids = torch.cumsum(example["attention_mask"] != 0, dim=1) - 1 example["position_ids"] = ids[:, - example["input_ids"].shape[1]:]