diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp index 21c6efa5c42a63..7a86caf13288a5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp @@ -213,6 +213,7 @@ struct lstm_seq : public primitive_base { size_t hash() const override { size_t seed = primitive::hash(); + seed = hash_combine(seed, out1_prim_id); seed = hash_combine(seed, clip); seed = hash_combine(seed, input_forget); seed = hash_range(seed, activations.begin(), activations.end()); @@ -280,6 +281,8 @@ struct lstm_seq : public primitive_base { protected: std::vector get_dependencies() const override { std::vector ret; + //ret.push_back(input[input.size()-2].pid); + //ret.push_back(input[input.size()-1].pid); return ret; } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_seq.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_seq.cpp index d82b87009ae94a..5e60cc3ca10d95 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_seq.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_seq.cpp @@ -25,7 +25,13 @@ struct lstm_seq_impl : typed_primitive_impl_ocl { protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { - kernel_arguments_data args = parent::get_arguments(instance); + kernel_arguments_data args;// = parent::get_arguments(instance); + for (size_t i = 0; i < instance.inputs_memory_count()-2; i++) { + args.inputs.push_back(instance.input_memory_ptr(i)); + } + for (size_t i = 0; i < instance.outputs_memory_count(); i++) { + args.outputs.push_back(instance.output_memory_ptr(i)); + } args.outputs.push_back(instance.dep_memory_ptr(instance.desc()->input_size() - 2)); args.outputs.push_back(instance.dep_memory_ptr(instance.desc()->input_size() - 1)); return args; diff --git a/src/plugins/intel_gpu/src/graph/lstm_seq.cpp b/src/plugins/intel_gpu/src/graph/lstm_seq.cpp index 6d59e71e3806ca..d089156bc17e29 100644 --- a/src/plugins/intel_gpu/src/graph/lstm_seq.cpp +++ b/src/plugins/intel_gpu/src/graph/lstm_seq.cpp @@ -21,9 +21,11 @@ std::vector lstm_seq_inst::calc_output_layouts(lstm_seq_node const& node auto input_pshape_x = input_layout_x.get_partial_shape(); auto input_layout_hidden = impl_param.get_input_layout(1); auto input_pshape_hidden = input_layout_hidden.get_partial_shape(); + /* if (impl_param.desc->output_data_types.size() > 0) { OPENVINO_ASSERT(static_cast(impl_param.desc->output_data_types[0]) == false, "Output data type forcing is not supported for lstm_seq_node!"); } + */ if (input_pshape_x.is_static()) { OPENVINO_ASSERT(input_pshape_x.rank().get_length() == 4, "input_layout rank should be 4 on static shape."); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_ref.cl index 8537d5ff44dc26..442f01711460ef 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_ref.cl @@ -14,6 +14,7 @@ KERNEL(concatenation_gpu_ref)(__global INPUT0_TYPE* input, #endif ) { + printf("oncat 1 \n"); const uint d1 = (uint)get_global_id(0); // Y const uint d2 = (uint)get_global_id(1); // F #ifdef CHECK_FEATURES diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_simple_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_simple_ref.cl index 85d622e7e9d695..0c3ed1f1dd384c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_simple_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/concatenation_gpu_simple_ref.cl @@ -48,7 +48,7 @@ KERNEL (concatenation_gpu_ref)( uint output_offset = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR out_b, out_f, out_w, out_z, out_y, out_x); INPUT0_TYPE result = input[input_offset]; - + printf("result is %f for input_offset %d from %d %d %d %d %d %d\n", result, input_offset, b, f, w, z, y, x); #if HAS_FUSED_OPS FUSED_OPS; output[output_offset] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT); diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/lstm_seq_gpu_bfyx_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/lstm_seq_gpu_bfyx_ref.cl index 16ab26adcd85a7..00d37ed474e99b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/lstm_seq_gpu_bfyx_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/lstm_seq_gpu_bfyx_ref.cl @@ -31,7 +31,7 @@ KERNEL(lstm_seq)( } //printf("DIRECTION %d \n", DIRECTION); const int real_seq_length = sequence_lengths[INPUT3_GET_INDEX_SAFE(b, 0, 0, 0)]; - for(int i=0;i