Skip to content

Commit

Permalink
[GPU] Support different input and output data type in convolution ref (
Browse files Browse the repository at this point in the history
…openvinotoolkit#26501)

### Details:
- For fp model, some convolutions may not be compressed to fp16
depending on the transformation policy and those convolutions may have
the fused node which is of fp16. Then convolution node input data type
will be fp32 while output data type fp16. Convolution needs to support
this case.

### Tickets:
 - 147689
  • Loading branch information
davidsnam-intel authored Sep 19, 2024
1 parent 4ed5ed0 commit 327f8e2
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ bool ConvolutionKernel_Ref::Validate(const Params& params) const {

// int8/uint8 inputs (quantization case) require additional checks
// require some additional checks.
if (input_type == output_type && input_type != Datatype::UINT8 && input_type != Datatype::INT8)
if (input_type != Datatype::UINT8 && input_type != Datatype::INT8 &&
output_type != Datatype::UINT8 && output_type != Datatype::INT8)
return true;

// (u)int8 input + fp weights
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <intel_gpu/primitives/crop.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/reshape.hpp>
#include <intel_gpu/primitives/permute.hpp>

#include <algorithm>
#include <array>
Expand Down Expand Up @@ -1639,6 +1640,47 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
}
}

TEST(convolution_f32_fw_gpu, input_f32_output_f16_dynamic_ref_kernel) {
auto& engine = get_test_engine();

auto in_layout = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } });
auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 3, 2 } });
auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
auto eltwise_data = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } });

set_values(input, {
1.0f, 2.0f, 3.0f, 4.0f,
5.0f, 2.0f, 2.0f, 3.0f,
4.0f, 6.0f, 3.0f, 3.0f,
3.0f, 5.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f }
);

topology topology(
input_layout("input", in_layout),
data("weights", weights),
data("biases", biases),
data("eltwise_data", eltwise_data),
convolution( "conv", input_info("input"), "weights", "biases", 1, {2, 1}, {1, 1}, {0, 0}, {0, 0}, false),
eltwise("eltwise", { input_info("conv"), input_info("eltwise_data") }, eltwise_mode::prod, data_types::f16),
permute("permute", input_info("eltwise"), {0, 1, 2, 3}));

ExecutionConfig config = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc conv_impl_ref = { format::bfyx, "convolution_gpu_ref", impl_types::ocl };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl_ref } }));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));

network network(engine, topology, config);
network.set_input_data("input", input);

auto outputs = network.execute();

ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_FALSE(has_node(*network.get_program(), "eltwise"));
ASSERT_EQ(outputs.at("permute").get_layout().data_type, data_types::f16);
}

TEST(convolution_f32_fw_gpu, convolution_big_size_weights) {
auto& engine = get_test_engine();

Expand Down

0 comments on commit 327f8e2

Please sign in to comment.