diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index 83d2a10dc4f2f9..a11ceef8b0f2dd 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -204,14 +204,16 @@ struct convolution_onednn : typed_primitive_onednn_impl { auto& a_zp = arg.activations_zero_points(); auto a_zp_dtype = a_zp.get_output_layout().data_type; - if (!data_type_traits::is_i8_u8(a_zp_dtype)) { + if (!data_type_traits::is_i8_u8(a_zp_dtype) && a_zp_dtype != data_types::i32) { throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution"); } if (a_zp_dtype == data_types::i8) { set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); - } else { // if (a_zp_dtype == data_types::u8) + } else if (a_zp_dtype == data_types::u8) { set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); + } else if (a_zp_dtype == data_types::i32) { + set_activation_zero_points_attr::value_type>(attrs, a_zp.as(), zero_point_mask); } } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index 19ea02c7c66d28..a8aa43671ed048 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -30,6 +30,7 @@ cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory) { template cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory); template cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory); +template cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory); cldnn::format default_fmt_for_dims(size_t dims, bool is_grouped) { switch (dims) { @@ -489,6 +490,7 @@ bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) { template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); +template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); static std::string get_external_order(const std::vector& order, bool is_weights, bool is_grouped) { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 4155ac0b420e66..4f9c31064e9026 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -10134,6 +10134,113 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_weights_zp) { } } +TEST(convolution_gpu_onednn, support_activation_zero_points_for_i32) { + auto& engine = get_test_engine(); + if (!engine.get_device_info().supports_immad) + return; + + auto in_layout = layout { ov::PartialShape::dynamic(4), data_types::u8, format::bfyx }; + auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 2, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 3, 2, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto a_zp = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 3, 1, 1 } }); + auto w_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 1, 1 } }); + + set_values(input, { 1, 2, 3, 4, 5, + 2, 2, 3, 4, 6, + 3, 3, 3, 5, 1, + 1, 1, 1, 1, 1, + + 1, 2, 3, 4, 5, + 2, 2, 3, 4, 6, + 3, 3, 3, 5, 1, + 1, 1, 1, 1, 1 }); + + set_values(weights, { 1, 2, -1, + -2, 1, 2, + 9, 7, -1, + + 9, 0, -4, + -1, 3, 2, + 0, 2, 5, + + 1, 2, -1, + -2, 1, 2, + 9, 7, -1, + + 9, 0, -4, + -1, 3, 2, + 0, 2, 5, + + 1, 2, -1, + -2, 1, 2, + 9, 7, -1, + + 9, 0, -4, + -1, 3, 2, + 0, 2, 5 }); + set_values(a_zp, { 2, 5, 5 }); + set_values(w_zp, { 2 }); + set_values(biases, { 1.0f, -8.0f, -8.0f }); + + VVVF output_vec = { + { + { 2.0f, -5.0f, -20.0f }, + { 12.0f, 26.0f, -10.0f } + }, + { + { -7.0f, -14.0f, -29.0f }, + { 3.0f, 17.0f, -19.0f } + }, + { + { -7.0f, -14.0f, -29.0f }, + { 3.0f, 17.0f, -19.0f } + } }; + + topology topology( + input_layout("input", in_layout), + data("weights", weights), + data("biases", biases), + data("a_zp", a_zp), + data("w_zp", w_zp), + convolution("conv", input_info("input"), "weights", "biases", "w_zp", "a_zp", "", 1, + { 2, 2 }, { 1, 1 }, { 0, 0 }, { 1, 2 }, false, data_types::f32), + reorder("out", input_info("conv"), format::bfyx, data_types::f32)); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "", impl_types::onednn }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }})); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + network.set_input_data("input", input); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.begin()->first, "out"); + + auto output_memory = outputs.at("out").get_memory(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + + auto output_layout = output_memory->get_layout(); + int y_size = output_layout.spatial(1); + int x_size = output_layout.spatial(0); + int f_size = output_layout.feature(); + int b_size = output_layout.batch(); + ASSERT_EQ(output_layout.format, format::bfyx); + ASSERT_EQ(y_size, 2); + ASSERT_EQ(x_size, 3); + ASSERT_EQ(f_size, 3); + ASSERT_EQ(b_size, 1); + for (int f = 0; f < f_size; f++) + for (int y = 0; y < y_size; ++y) { + for (int x = 0; x < x_size; ++x) { + ASSERT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) << + " x="<< x << " y=" << y << " f=" << f; + } + } +} + TEST(convolution_gpu_onednn, has_proper_synchronization) { auto& engine = get_test_engine(); if (!engine.get_device_info().supports_immad)