Skip to content

Commit

Permalink
[GPU] Support convolution onednn activation zero points for i32
Browse files Browse the repository at this point in the history
  • Loading branch information
davidsnam-intel committed Oct 26, 2024
1 parent f413806 commit 3c14480
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,16 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
auto& a_zp = arg.activations_zero_points();
auto a_zp_dtype = a_zp.get_output_layout().data_type;

if (!data_type_traits::is_i8_u8(a_zp_dtype)) {
if (!data_type_traits::is_i8_u8(a_zp_dtype) && a_zp_dtype != data_types::i32) {
throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution");
}

if (a_zp_dtype == data_types::i8) {
set_activation_zero_points_attr<ov::element_type_traits<data_types::i8>::value_type>(attrs, a_zp.as<data>(), zero_point_mask);
} else { // if (a_zp_dtype == data_types::u8)
} else if (a_zp_dtype == data_types::u8) {
set_activation_zero_points_attr<ov::element_type_traits<data_types::u8>::value_type>(attrs, a_zp.as<data>(), zero_point_mask);
} else if (a_zp_dtype == data_types::i32) {
set_activation_zero_points_attr<ov::element_type_traits<data_types::i32>::value_type>(attrs, a_zp.as<data>(), zero_point_mask);
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ cldnn::memory::ptr convert_zp_data_to_s32(const memory::ptr zp_memory) {

template cldnn::memory::ptr convert_zp_data_to_s32<int8_t>(const memory::ptr zp_memory);
template cldnn::memory::ptr convert_zp_data_to_s32<uint8_t>(const memory::ptr zp_memory);
template cldnn::memory::ptr convert_zp_data_to_s32<int32_t>(const memory::ptr zp_memory);

cldnn::format default_fmt_for_dims(size_t dims, bool is_grouped) {
switch (dims) {
Expand Down Expand Up @@ -489,6 +490,7 @@ bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) {

template bool is_per_tensor<int8_t>(cldnn::data_node& node, int32_t& zp_val);
template bool is_per_tensor<uint8_t>(cldnn::data_node& node, int32_t& zp_val);
template bool is_per_tensor<int32_t>(cldnn::data_node& node, int32_t& zp_val);


static std::string get_external_order(const std::vector<size_t>& order, bool is_weights, bool is_grouped) {
Expand Down
107 changes: 107 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10134,6 +10134,113 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_weights_zp) {
}
}

TEST(convolution_gpu_onednn, support_activation_zero_points_for_i32) {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;

auto in_layout = layout { ov::PartialShape::dynamic(4), data_types::u8, format::bfyx };
auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 2, 5, 4 } });
auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 3, 2, 3, 3 } });
auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } });
auto a_zp = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 3, 1, 1 } });
auto w_zp = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 1, 1 } });

set_values<uint8_t>(input, { 1, 2, 3, 4, 5,
2, 2, 3, 4, 6,
3, 3, 3, 5, 1,
1, 1, 1, 1, 1,

1, 2, 3, 4, 5,
2, 2, 3, 4, 6,
3, 3, 3, 5, 1,
1, 1, 1, 1, 1 });

set_values<int8_t>(weights, { 1, 2, -1,
-2, 1, 2,
9, 7, -1,

9, 0, -4,
-1, 3, 2,
0, 2, 5,

1, 2, -1,
-2, 1, 2,
9, 7, -1,

9, 0, -4,
-1, 3, 2,
0, 2, 5,

1, 2, -1,
-2, 1, 2,
9, 7, -1,

9, 0, -4,
-1, 3, 2,
0, 2, 5 });
set_values<int32_t>(a_zp, { 2, 5, 5 });
set_values<uint8_t>(w_zp, { 2 });
set_values(biases, { 1.0f, -8.0f, -8.0f });

VVVF<float> output_vec = {
{
{ 2.0f, -5.0f, -20.0f },
{ 12.0f, 26.0f, -10.0f }
},
{
{ -7.0f, -14.0f, -29.0f },
{ 3.0f, 17.0f, -19.0f }
},
{
{ -7.0f, -14.0f, -29.0f },
{ 3.0f, 17.0f, -19.0f }
} };

topology topology(
input_layout("input", in_layout),
data("weights", weights),
data("biases", biases),
data("a_zp", a_zp),
data("w_zp", w_zp),
convolution("conv", input_info("input"), "weights", "biases", "w_zp", "a_zp", "", 1,
{ 2, 2 }, { 1, 1 }, { 0, 0 }, { 1, 2 }, false, data_types::f32),
reorder("out", input_info("conv"), format::bfyx, data_types::f32));

ExecutionConfig config = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "", impl_types::onednn };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }}));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

network network(engine, topology, config);
network.set_input_data("input", input);

auto outputs = network.execute();
ASSERT_EQ(outputs.begin()->first, "out");

auto output_memory = outputs.at("out").get_memory();
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());

auto output_layout = output_memory->get_layout();
int y_size = output_layout.spatial(1);
int x_size = output_layout.spatial(0);
int f_size = output_layout.feature();
int b_size = output_layout.batch();
ASSERT_EQ(output_layout.format, format::bfyx);
ASSERT_EQ(y_size, 2);
ASSERT_EQ(x_size, 3);
ASSERT_EQ(f_size, 3);
ASSERT_EQ(b_size, 1);
for (int f = 0; f < f_size; f++)
for (int y = 0; y < y_size; ++y) {
for (int x = 0; x < x_size; ++x) {
ASSERT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) <<
" x="<< x << " y=" << y << " f=" << f;
}
}
}

TEST(convolution_gpu_onednn, has_proper_synchronization) {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
Expand Down

0 comments on commit 3c14480

Please sign in to comment.