Skip to content

Commit

Permalink
[Layer][GPU][TF FE] test_segment_sum failed with get_shape was called… (
Browse files Browse the repository at this point in the history
openvinotoolkit#28778)

### Details:
- *Added dynamic shape support in GPU inference pipeline for SegmentSum
ops*
- *Updated tensorflow_tests/test_tf_SegmentSum.py to remove GPU skip*
- *Ensured correct handling of dynamic shapes during GPU inference for
SegmentSum This update resolves the GPU test failure and ensures proper
support for dynamic shapes in SegmentSum ops*
- *Added support for passing `custom_eps` through `kwargs` to allow
dynamic epsilon values for comparison in tests.
This improves flexibility in handling precision variations (FP32,
FP16).*
- *Following three tickets are interrelated.*

### Tickets:
- *CVS-105896*
- *CVS-152352*
- *CVS-156362*

Co-authored-by: Roman Kazantsev <[email protected]>
  • Loading branch information
pravin25 and rkazants authored Feb 17, 2025
1 parent 0b5b439 commit daf33e8
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ attach_embedding_bag_impl::attach_embedding_bag_impl() {
implementation_map<embedding_bag>::add(impl_types::ocl, typed_primitive_impl_ocl<embedding_bag>::create<embedding_bag_impl>, {
std::make_tuple(data_types::f32, format::bfyx),
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i32, format::bfyx),
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ KERNEL(embedding_bag_ref)(
const __global INPUT1_TYPE* indices,
const __global INPUT2_TYPE* segment_ids,
#ifdef INPUT3_TYPE
const __global INPUT3_TYPE* weights,
const __global INPUT3_TYPE* segments_sum,
#endif
#ifdef INPUT4_TYPE
const __global INPUT4_TYPE* weights,
#endif
__global OUTPUT_TYPE* output)
{
Expand All @@ -114,7 +117,7 @@ KERNEL(embedding_bag_ref)(
uint index = indices[INPUT1_OFFSET + i];
uint emb_index = INPUT0_GET_INDEX(index, emb_dim1, emb_dim2, emb_dim3);
OUTPUT_TYPE val = emb_table[emb_index];
#ifdef INPUT3_TYPE
#ifdef INPUT4_TYPE
{
uint weight_index = INPUT3_OFFSET + i;
val *= weights[weight_index];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@ ParamsKey EmbeddingBagKernelRef::GetSupportedKey() const {
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableInputDataType(Datatype::UINT32);

k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT32);

k.EnableAllInputLayout();
k.EnableAllOutputLayout();
Expand Down
11 changes: 6 additions & 5 deletions src/plugins/intel_gpu/src/plugin/ops/embedding_bag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,9 @@ static void CreateEmbeddingSegmentsSumOp(ProgramBuilder& p, const std::shared_pt
auto inputs = p.GetInputInfo(op);
std::string layerName = layer_type_name_ID(op);

inputs.erase(inputs.begin() + 3); // Remove "num_segments"

int32_t defaultIndex = -1;
// port of default_index is 4 by default, but we removed "num_segments" above, so now it's equal to 3
if (inputs.size() > 3) {
if (inputs.size() > 4) {
auto index_node = ov::as_type_ptr<ov::op::v0::Constant>(op->get_input_node_shared_ptr(4));
OPENVINO_ASSERT(index_node != nullptr, "[GPU] Unsupported parameter nodes type in ", op->get_friendly_name(), " (", op->get_type_name(), ")");

Expand All @@ -117,7 +115,7 @@ static void CreateEmbeddingSegmentsSumOp(ProgramBuilder& p, const std::shared_pt
OPENVINO_THROW("Unsupported parameter size in ", op->get_friendly_name(), " (", op->get_type_name(), ")");

defaultIndex = static_cast<int32_t>(val);
inputs.erase(inputs.begin() + 3); // Remove "default_index"
inputs.erase(inputs.begin() + 4); // Remove "default_index"
}

std::vector<cldnn::input_info> reordered_inputs;
Expand All @@ -141,10 +139,13 @@ static void CreateEmbeddingSegmentsSumOp(ProgramBuilder& p, const std::shared_pt
}
}

auto p_shape = op->get_output_partial_shape(0);
auto output_shape = p_shape.is_static() ? tensor_from_dims(p_shape.to_shape()) : cldnn::tensor();

auto embeddingBagPrim = cldnn::embedding_bag(layerName,
reordered_inputs,
cldnn::embedding_bag::segments_sum,
tensor_from_dims(op->get_output_shape(0)),
output_shape,
defaultIndex);

p.add_primitive(*op, embeddingBagPrim);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) {
auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } });
auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segments_num = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } });
auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } });
tensor output_shape = {3, 2, 1, 1};

Expand All @@ -789,6 +790,8 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) {
set_values<int32_t>(segment_ids, {
0, 0, 2, 2
});
set_values<int32_t>(segments_num, { 4 });

set_values(per_sample_weights, {
ov::float16(0.5f), ov::float16(0.5f), ov::float16(0.5f), ov::float16(0.5f)
});
Expand All @@ -798,16 +801,18 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) {
topology.add(input_layout("Input0", emb_table->get_layout()));
topology.add(input_layout("Input1", indices->get_layout()));
topology.add(input_layout("Input2", segment_ids->get_layout()));
topology.add(data("Input3", per_sample_weights));
topology.add(input_layout("Input3", segments_num->get_layout()));
topology.add(data("Input4", per_sample_weights));
topology.add(
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0)
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3"), input_info("Input4") }, type, output_shape, 0)
);

network network(engine, topology, get_test_default_config(engine));

network.set_input_data("Input0", emb_table);
network.set_input_data("Input1", indices);
network.set_input_data("Input2", segment_ids);
network.set_input_data("Input3", segments_num);

auto outputs = network.execute();

Expand Down Expand Up @@ -838,6 +843,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) {
auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } });
auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segments_num = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } });
auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } });
tensor output_shape = {3, 2, 1, 1};

Expand All @@ -854,6 +860,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) {
set_values<int32_t>(segment_ids, {
1, 1, 2, 2
});
set_values<int32_t>(segments_num, { 4 });
set_values(per_sample_weights, {
ov::float16(0.5f), ov::float16(0.5f), ov::float16(0.5f), ov::float16(0.5f)
});
Expand All @@ -863,16 +870,18 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) {
topology.add(input_layout("Input0", emb_table->get_layout()));
topology.add(input_layout("Input1", indices->get_layout()));
topology.add(input_layout("Input2", segment_ids->get_layout()));
topology.add(data("Input3", per_sample_weights));
topology.add(input_layout("Input3", segments_num->get_layout()));
topology.add(data("Input4", per_sample_weights));
topology.add(
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2)
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3"), input_info("Input4") }, type, output_shape, 2)
);

network network(engine, topology, get_test_default_config(engine));

network.set_input_data("Input0", emb_table);
network.set_input_data("Input1", indices);
network.set_input_data("Input2", segment_ids);
network.set_input_data("Input3", segments_num);

auto outputs = network.execute();

Expand Down Expand Up @@ -903,6 +912,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) {
auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } });
auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segments_num = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } });
auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } });
tensor output_shape = {3, 2, 1, 1};

Expand All @@ -919,6 +929,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) {
set_values<int32_t>(segment_ids, {
0, 0, 1, 1
});
set_values<int32_t>(segments_num, { 4 });
set_values(per_sample_weights, {
ov::float16(0.5f), ov::float16(0.5f), ov::float16(0.5f), ov::float16(0.5f)
});
Expand All @@ -928,16 +939,18 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) {
topology.add(input_layout("Input0", emb_table->get_layout()));
topology.add(input_layout("Input1", indices->get_layout()));
topology.add(input_layout("Input2", segment_ids->get_layout()));
topology.add(data("Input3", per_sample_weights));
topology.add(input_layout("Input3", segments_num->get_layout()));
topology.add(data("Input4", per_sample_weights));
topology.add(
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2)
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3"), input_info("Input4") }, type, output_shape, 2)
);

network network(engine, topology, get_test_default_config(engine));

network.set_input_data("Input0", emb_table);
network.set_input_data("Input1", indices);
network.set_input_data("Input2", segment_ids);
network.set_input_data("Input3", segments_num);

auto outputs = network.execute();

Expand Down Expand Up @@ -966,6 +979,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) {
auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } });
auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segments_num = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } });
tensor output_shape = {3, 2, 1, 1};

set_values(emb_table, {
Expand All @@ -981,21 +995,24 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) {
set_values<int32_t>(segment_ids, {
0, 0, 2, 2
});
set_values<int32_t>(segments_num, { 4 });

auto type = embedding_bag::segments_sum;
topology topology;
topology.add(input_layout("Input0", emb_table->get_layout()));
topology.add(input_layout("Input1", indices->get_layout()));
topology.add(input_layout("Input2", segment_ids->get_layout()));
topology.add(input_layout("Input3", segments_num->get_layout()));
topology.add(
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape)
);

network network(engine, topology, get_test_default_config(engine));

network.set_input_data("Input0", emb_table);
network.set_input_data("Input1", indices);
network.set_input_data("Input2", segment_ids);
network.set_input_data("Input3", segments_num);

auto outputs = network.execute();

Expand Down Expand Up @@ -1026,6 +1043,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) {
auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 3, 2 } });
auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
auto segments_num = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } });
auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } });
tensor output_shape = {3, 2, 3, 2};

Expand Down Expand Up @@ -1091,6 +1109,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) {
set_values<int32_t>(segment_ids, {
0, 0, 2, 2
});
set_values<int32_t>(segments_num, { 4 });
set_values(per_sample_weights, {
ov::float16(0.5f), ov::float16(0.5f),
ov::float16(0.5f), ov::float16(0.5f)
Expand All @@ -1101,16 +1120,18 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) {
topology.add(input_layout("Input0", emb_table->get_layout()));
topology.add(input_layout("Input1", indices->get_layout()));
topology.add(input_layout("Input2", segment_ids->get_layout()));
topology.add(data("Input3", per_sample_weights));
topology.add(input_layout("Input3", segments_num->get_layout()));
topology.add(data("Input4", per_sample_weights));
topology.add(
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0)
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3"), input_info("Input4") }, type, output_shape, 0)
);

network network(engine, topology, get_test_default_config(engine));

network.set_input_data("Input0", emb_table);
network.set_input_data("Input1", indices);
network.set_input_data("Input2", segment_ids);
network.set_input_data("Input3", segments_num);

auto outputs = network.execute();

Expand Down
8 changes: 1 addition & 7 deletions tests/layer_tests/tensorflow_tests/test_tf_SegmentSum.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ def test_segment_sum_basic(self, params, ie_device, precision, ir_version, temp_
use_legacy_frontend):
if use_legacy_frontend:
pytest.skip("SegmentSum operation is not supported via legacy frontend.")
if ie_device == 'GPU':
pytest.skip("GPU error: to_shape was called on a dynamic shape")
self._test(*self.create_segment_sum_net(**params),
ie_device, precision, ir_version, temp_dir=temp_dir,
use_legacy_frontend=use_legacy_frontend)
Expand All @@ -68,8 +66,6 @@ def test_segment_sum_different_types(self, params, ie_device, precision, ir_vers
use_legacy_frontend):
if use_legacy_frontend:
pytest.skip("SegmentSum operation is not supported via legacy frontend.")
if ie_device == 'GPU':
pytest.skip("GPU error: to_shape was called on a dynamic shape")
self._test(*self.create_segment_sum_net(**params),
ie_device, precision, ir_version, temp_dir=temp_dir,
use_legacy_frontend=use_legacy_frontend)
Expand Down Expand Up @@ -122,8 +118,6 @@ def test_complex_segment_sum(self, params, ie_device, precision, ir_version, tem
use_legacy_frontend):
if use_legacy_frontend:
pytest.skip("SegmentSum operation is not supported via legacy frontend.")
if ie_device == 'GPU':
pytest.skip("GPU error: to_shape was called on a dynamic shape")
self._test(*self.create_segment_sum_net(**params),
ie_device, precision, ir_version, temp_dir=temp_dir,
use_legacy_frontend=use_legacy_frontend)
use_legacy_frontend=use_legacy_frontend)
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,12 @@ def test_sparse_segment_mean(self, data_type, indices_type, segment_indices_type
shape, indices_shape, segments_num,
ie_device, precision, ir_version, temp_dir,
use_legacy_frontend):
kwargs = {}
if ie_device == 'GPU':
pytest.skip("GPU error: to_shape was called on a dynamic shape, ticket: 152352")
kwargs = {
'custom_eps': 1e-2,
}
self._test(*self.create_sparse_segment_mean(data_type, indices_type, segment_indices_type,
shape, indices_shape, segments_num),
ie_device, precision, ir_version, temp_dir=temp_dir,
use_legacy_frontend=use_legacy_frontend)
use_legacy_frontend=use_legacy_frontend, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ def create_unsorted_segment_sum_net(self, data_shape, segment_ids_shape, num_seg
def test_unsorted_segment_sum_basic(self, params, data_type, segment_ids_type, num_segments_type, ie_device,
precision, ir_version, temp_dir,
use_legacy_frontend):
if ie_device == 'GPU':
pytest.skip("156362: No layout format available for embeddingsegmentssum:UnsortedSegmentSum on GPU")
self._test(*self.create_unsorted_segment_sum_net(**params,
data_type=data_type, segment_ids_type=segment_ids_type,
num_segments_type=num_segments_type),
Expand Down

0 comments on commit daf33e8

Please sign in to comment.