Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Support convolution onednn activation zero points for i32 #27261

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,16 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
auto& a_zp = arg.activations_zero_points();
auto a_zp_dtype = a_zp.get_output_layout().data_type;

if (!data_type_traits::is_i8_u8(a_zp_dtype)) {
if (!data_type_traits::is_i8_u8(a_zp_dtype) && a_zp_dtype != data_types::i32) {
throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution");
}

if (a_zp_dtype == data_types::i8) {
set_activation_zero_points_attr<ov::element_type_traits<data_types::i8>::value_type>(attrs, a_zp.as<data>(), zero_point_mask);
} else { // if (a_zp_dtype == data_types::u8)
} else if (a_zp_dtype == data_types::u8) {
set_activation_zero_points_attr<ov::element_type_traits<data_types::u8>::value_type>(attrs, a_zp.as<data>(), zero_point_mask);
} else if (a_zp_dtype == data_types::i32) {
set_activation_zero_points_attr<ov::element_type_traits<data_types::i32>::value_type>(attrs, a_zp.as<data>(), zero_point_mask);
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory) {

template cldnn::memory::ptr convert_zp_data_to_s32<int8_t>(const memory::ptr zp_memory);
template cldnn::memory::ptr convert_zp_data_to_s32<uint8_t>(const memory::ptr zp_memory);
template cldnn::memory::ptr convert_zp_data_to_s32<int32_t>(const memory::ptr zp_memory);

cldnn::format default_fmt_for_dims(size_t dims, bool is_grouped) {
switch (dims) {
Expand Down Expand Up @@ -489,6 +490,7 @@ bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) {

template bool is_per_tensor<int8_t>(cldnn::data_node& node, int32_t& zp_val);
template bool is_per_tensor<uint8_t>(cldnn::data_node& node, int32_t& zp_val);
template bool is_per_tensor<int32_t>(cldnn::data_node& node, int32_t& zp_val);


static std::string get_external_order(const std::vector<size_t>& order, bool is_weights, bool is_grouped) {
Expand Down
107 changes: 107 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10134,6 +10134,113 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_weights_zp) {
}
}

TEST(convolution_gpu_onednn, support_activation_zero_points_for_i32) {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;

auto in_layout = layout { ov::PartialShape::dynamic(4), data_types::u8, format::bfyx };
auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 2, 5, 4 } });
auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 3, 2, 3, 3 } });
auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
auto a_zp = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 3, 1, 1 } });
auto w_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 1, 1 } });

set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
2, 2, 3, 4, 6,
3, 3, 3, 5, 1,
1, 1, 1, 1, 1,

1, 2, 3, 4, 5,
2, 2, 3, 4, 6,
3, 3, 3, 5, 1,
1, 1, 1, 1, 1 });

set_values<int8_t>(weights, { 1, 2, -1,
-2, 1, 2,
9, 7, -1,

9, 0, -4,
-1, 3, 2,
0, 2, 5,

1, 2, -1,
-2, 1, 2,
9, 7, -1,

9, 0, -4,
-1, 3, 2,
0, 2, 5,

1, 2, -1,
-2, 1, 2,
9, 7, -1,

9, 0, -4,
-1, 3, 2,
0, 2, 5 });
set_values<int32_t>(a_zp, { 2, 5, 5 });
set_values<uint8_t>(w_zp, { 2 });
set_values(biases, { 1.0f, -8.0f, -8.0f });

VVVF<float> output_vec = {
{
{ 2.0f, -5.0f, -20.0f },
{ 12.0f, 26.0f, -10.0f }
},
{
{ -7.0f, -14.0f, -29.0f },
{ 3.0f, 17.0f, -19.0f }
},
{
{ -7.0f, -14.0f, -29.0f },
{ 3.0f, 17.0f, -19.0f }
} };

topology topology(
input_layout("input", in_layout),
data("weights", weights),
data("biases", biases),
data("a_zp", a_zp),
data("w_zp", w_zp),
convolution("conv", input_info("input"), "weights", "biases", "w_zp", "a_zp", "", 1,
{ 2, 2 }, { 1, 1 }, { 0, 0 }, { 1, 2 }, false, data_types::f32),
reorder("out", input_info("conv"), format::bfyx, data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "", impl_types::onednn };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }}));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

network network(engine, topology, config);
network.set_input_data("input", input);

auto outputs = network.execute();
ASSERT_EQ(outputs.begin()->first, "out");

auto output_memory = outputs.at("out").get_memory();
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());

auto output_layout = output_memory->get_layout();
int y_size = output_layout.spatial(1);
int x_size = output_layout.spatial(0);
int f_size = output_layout.feature();
int b_size = output_layout.batch();
ASSERT_EQ(output_layout.format, format::bfyx);
ASSERT_EQ(y_size, 2);
ASSERT_EQ(x_size, 3);
ASSERT_EQ(f_size, 3);
ASSERT_EQ(b_size, 1);
for (int f = 0; f < f_size; f++)
for (int y = 0; y < y_size; ++y) {
for (int x = 0; x < x_size; ++x) {
ASSERT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
" x="<< x << " y=" << y << " f=" << f;
}
}
}

TEST(convolution_gpu_onednn, has_proper_synchronization) {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
Expand Down
Loading