Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
michal-miotk committed Aug 2, 2024
1 parent 12626fc commit af4f209
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 23 deletions.
3 changes: 3 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/lstm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ struct lstm_seq : public primitive_base<lstm_seq> {

size_t hash() const override {
size_t seed = primitive::hash();
seed = hash_combine(seed, out1_prim_id);
seed = hash_combine(seed, clip);
seed = hash_combine(seed, input_forget);
seed = hash_range(seed, activations.begin(), activations.end());
Expand Down Expand Up @@ -280,6 +281,8 @@ struct lstm_seq : public primitive_base<lstm_seq> {
protected:
std::vector<input_info> get_dependencies() const override {
std::vector<input_info> ret;
//ret.push_back(input[input.size()-2].pid);
//ret.push_back(input[input.size()-1].pid);
return ret;
}
};
Expand Down
8 changes: 7 additions & 1 deletion src/plugins/intel_gpu/src/graph/impls/ocl/lstm_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ struct lstm_seq_impl : typed_primitive_impl_ocl<lstm_seq> {

protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<lstm_seq>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);
kernel_arguments_data args;// = parent::get_arguments(instance);
for (size_t i = 0; i < instance.inputs_memory_count()-2; i++) {
args.inputs.push_back(instance.input_memory_ptr(i));
}
for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
args.outputs.push_back(instance.output_memory_ptr(i));
}
args.outputs.push_back(instance.dep_memory_ptr(instance.desc()->input_size() - 2));
args.outputs.push_back(instance.dep_memory_ptr(instance.desc()->input_size() - 1));
return args;
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/lstm_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ std::vector<layout> lstm_seq_inst::calc_output_layouts(lstm_seq_node const& node
auto input_pshape_x = input_layout_x.get_partial_shape();
auto input_layout_hidden = impl_param.get_input_layout(1);
auto input_pshape_hidden = input_layout_hidden.get_partial_shape();
/*
if (impl_param.desc->output_data_types.size() > 0) {
OPENVINO_ASSERT(static_cast<bool>(impl_param.desc->output_data_types[0]) == false, "Output data type forcing is not supported for lstm_seq_node!");
}
*/
if (input_pshape_x.is_static()) {
OPENVINO_ASSERT(input_pshape_x.rank().get_length() == 4, "input_layout rank should be 4 on static shape.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ KERNEL(concatenation_gpu_ref)(__global INPUT0_TYPE* input,
#endif
)
{
printf("oncat 1 \n");
const uint d1 = (uint)get_global_id(0); // Y
const uint d2 = (uint)get_global_id(1); // F
#ifdef CHECK_FEATURES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ KERNEL (concatenation_gpu_ref)(
uint output_offset = FUNC_CALL(get_output_index)(OPTIONAL_SHAPE_INFO_TENSOR out_b, out_f, out_w, out_z, out_y, out_x);

INPUT0_TYPE result = input[input_offset];

printf("result is %f for input_offset %d from %d %d %d %d %d %d\n", result, input_offset, b, f, w, z, y, x);
#if HAS_FUSED_OPS
FUSED_OPS;
output[output_offset] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ KERNEL(lstm_seq)(
}
//printf("DIRECTION %d \n", DIRECTION);
const int real_seq_length = sequence_lengths[INPUT3_GET_INDEX_SAFE(b, 0, 0, 0)];
for(int i=0;i<real_seq_length;i++){
for(int i=0;i<MAX_SEQ_LENGTH;i++){
for(int k=0;k<gate_num;k++){
hidden_result[k] = 0;
input_result[k] = 0;
Expand All @@ -43,15 +43,23 @@ KERNEL(lstm_seq)(
}else{
int prev_idx = i-1;
if(DIRECTION == 1){ //reverse
prev_idx = real_seq_length - i ;
if(i<real_seq_length){
prev_idx = real_seq_length - i;
} else {
prev_idx = MAX_SEQ_LENGTH - i;
}
}
hidden_result[k] += hidden_history[OUTPUT_GET_INDEX_SAFE(b, 0, prev_idx, j)]*R[INPUT5_GET_INDEX_SAFE(0, hidden_idx+weight_offsets[k], j, 0)];
}
}

for(int j=0;j<INPUT_SIZE;j++) {
if(DIRECTION == 1){ //reverse
input_result[k] += x[INPUT0_GET_INDEX_SAFE(b, real_seq_length-1-i, j, 0)]*W[INPUT4_GET_INDEX_SAFE(0, hidden_idx+weight_offsets[k], j, 0)];
if(i<real_seq_length){
input_result[k] += x[INPUT0_GET_INDEX_SAFE(b, real_seq_length-1-i, j, 0)]*W[INPUT4_GET_INDEX_SAFE(0, hidden_idx+weight_offsets[k], j, 0)];
} else {
input_result[k] += x[INPUT0_GET_INDEX_SAFE(b, MAX_SEQ_LENGTH-1-i, j, 0)]*W[INPUT4_GET_INDEX_SAFE(0, hidden_idx+weight_offsets[k], j, 0)];
}
} else {
input_result[k] += x[INPUT0_GET_INDEX_SAFE(b, i, j, 0)]*W[INPUT4_GET_INDEX_SAFE(0, hidden_idx+weight_offsets[k], j, 0)];
}
Expand Down Expand Up @@ -81,18 +89,21 @@ KERNEL(lstm_seq)(
}
int cur_history_idx = i;
if(DIRECTION == 1){ //reverse
cur_history_idx = real_seq_length - 1 - i ;
if(i<real_seq_length){
cur_history_idx = real_seq_length - 1 - i;
} else {
cur_history_idx = MAX_SEQ_LENGTH - 1 - i;
}
}
hidden_state[OUTPUT1_GET_INDEX_SAFE(b, 0, hidden_idx, 0)] = (OUTPUT_TYPE)(gate_output[3]*ACTIVATION_H(cell_state[OUTPUT2_GET_INDEX_SAFE(b, 0, hidden_idx, 0)], ACTIVATION_PARAMS_H));
barrier(CLK_LOCAL_MEM_FENCE);
hidden_history[OUTPUT_GET_INDEX_SAFE(b, 0, cur_history_idx, hidden_idx)] = hidden_state[OUTPUT1_GET_INDEX_SAFE(b, 0, hidden_idx, 0)];
barrier(CLK_LOCAL_MEM_FENCE);
}

//printf("R is %p B is %p ; hidden history %p cell state %p batch %d\n", &R[0], &B[0], &hidden_history[0], &cell_state[0], b);
for(int i=0;i<real_seq_length;i++){
//hidden_history[OUTPUT_GET_INDEX_SAFE(b, 0, i, hidden_idx)] = i;
printf("DIR %d result is %f for hididx %d b %d\n", DIRECTION, hidden_history[OUTPUT_GET_INDEX_SAFE(b, 0, i, hidden_idx)], hidden_idx, b);
printf("DIR %d hidden state is %f for hid idx %d b %d \n", DIRECTION, hidden_state[OUTPUT1_GET_INDEX_SAFE(b, 0, hidden_idx, 0)], hidden_idx, b);
printf("DIR %d result is %f for hididx %d b %d for i %d\n", DIRECTION, hidden_history[OUTPUT_GET_INDEX_SAFE(b, 0, i, hidden_idx)], hidden_idx, b, i);
}
printf("DIR %d cell_state is %f hidden_state is %f for hididx %d b %d\n", DIRECTION, cell_state[OUTPUT1_GET_INDEX_SAFE(b, 0, hidden_idx, 0)], hidden_state[OUTPUT1_GET_INDEX_SAFE(b, 0, hidden_idx, 0)], hidden_idx, b);
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ concatenation_kernel_selector::concatenation_kernel_selector() {
}

KernelsData concatenation_kernel_selector::GetBestKernels(const Params& params) const {
return GetNaiveBestKernel(params, KernelType::CONCATENATION);
auto a = GetNaiveBestKernel(params, KernelType::CONCATENATION);
return a;
}
} // namespace kernel_selector
16 changes: 9 additions & 7 deletions src/plugins/intel_gpu/src/plugin/ops/rnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,24 +249,26 @@ static void CreateLSTMSequenceOp(ProgramBuilder& p, const std::shared_ptr<ov::op
cldnn::format::bfyx,
tensor_from_dims(op->get_output_shape(1)));

cldnn::memory::ptr shared_memory1 = p.get_engine().allocate_memory(out12Layout);
std::vector<cldnn::memory::ptr> shared_memories;
shared_memories.push_back(p.get_engine().allocate_memory(out12Layout, false));
const cldnn::primitive_id mutable_id_1 = layerName + "_md_write1";
const cldnn::mutable_data mutable_prim_1{mutable_id_1, shared_memory1};
const cldnn::mutable_data mutable_prim_1{mutable_id_1, shared_memories.front()};
p.add_primitive(*op, mutable_prim_1);


cldnn::memory::ptr shared_memory2 = p.get_engine().allocate_memory(out12Layout);
std::cout << "layout is " << out12Layout << std::endl;
shared_memories.push_back(p.get_engine().allocate_memory(out12Layout, false));
const cldnn::primitive_id mutable_id_2 = layerName + "_md_write2";
const cldnn::mutable_data mutable_prim_2{mutable_id_2, shared_memory2};
const cldnn::mutable_data mutable_prim_2{mutable_id_2, shared_memories.back()};
p.add_primitive(*op, mutable_prim_2);
int direction = op->get_direction() == ov::op::RecurrentSequenceDirection::REVERSE ? 1 : 0;
cldnn::lstm_seq prim(lstm_seq_id + ".out0", inputs[0], inputs[1], \
inputs[2], inputs[3], inputs[4], inputs[5], cldnn::input_info(bias), mutable_id_1, mutable_id_2, \
"", clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo, direction);
prim.output_data_types = get_output_data_types(op, {{ov::element::f32, ov::element::f16}});
//prim.out1_prim_id = f_id;
p.add_primitive(*op, prim);
p.add_primitive(*op, cldnn::mutable_data(lstm_seq_id + ".out1", {cldnn::input_info(lstm_seq_id + ".out0")}, shared_memory1));
p.add_primitive(*op, cldnn::mutable_data(lstm_seq_id + ".out2", {cldnn::input_info(lstm_seq_id + ".out0")}, shared_memory2));
p.add_primitive(*op, cldnn::mutable_data(lstm_seq_id + ".out1", {cldnn::input_info(lstm_seq_id + ".out0")}, shared_memories.front()));
p.add_primitive(*op, cldnn::mutable_data(lstm_seq_id + ".out2", {cldnn::input_info(lstm_seq_id + ".out0")}, shared_memories.back()));
}

REGISTER_FACTORY_IMPL(v4, LSTMCell);
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/plugin/program_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "openvino/op/split.hpp"
#include "openvino/op/variadic_split.hpp"
#include "openvino/op/lstm_cell.hpp"
#include "openvino/op/lstm_sequence.hpp"
#include "openvino/op/loop.hpp"

#include "intel_gpu/plugin/common_utils.hpp"
Expand Down Expand Up @@ -266,6 +267,7 @@ std::vector<cldnn::input_info> ProgramBuilder::GetInputInfo(const std::shared_pt
bool is_legacy_multiple_outputs = !allow_new_shape_infer
|| ov::is_type<ov::op::v1::Split>(prevOp)
|| ov::is_type<ov::op::v1::VariadicSplit>(prevOp)
|| ov::is_type<ov::op::v5::LSTMSequence>(prevOp)
|| ov::is_type<ov::op::v4::LSTMCell>(prevOp);
if (prevOp->get_output_size() > 1 && is_legacy_multiple_outputs) {
prevName += ".out" + std::to_string(op->get_input_source_output(i).get_index());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ std::vector<ov::test::utils::SequenceTestsMode> mode{ov::test::utils::SequenceTe
ov::test::utils::SequenceTestsMode::PURE_SEQ_RAND_SEQ_LEN_PARAM,
ov::test::utils::SequenceTestsMode::PURE_SEQ};
// output values increase rapidly without clip, so use only seq_lengths = 2
std::vector<size_t> seq_lengths_zero_clip{2};
std::vector<size_t> seq_lengths_clip_non_zero{20};
std::vector<size_t> batch{10};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> seq_lengths_zero_clip{1};
std::vector<size_t> seq_lengths_clip_non_zero{10};
std::vector<size_t> batch{1};
std::vector<size_t> hidden_size{1};
std::vector<size_t> hidden_size_smoke{1};
std::vector<size_t> input_size{10};
std::vector<size_t> input_size{1};
std::vector<std::vector<std::string>> activations = {{"relu", "sigmoid", "tanh"}, {"sigmoid", "tanh", "tanh"},
{"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"},
{"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,12 +393,16 @@ class Error {
};

std::vector<IncorrectValue> incorrect_values_abs;
std::vector<IncorrectValue> correct_values_abs;
double abs_threshold, rel_threshold, mvn_threshold, topk_threshold, mvn_results, topk_results;
size_t tensor_size;

void emplace_back(double in_actual_value, double in_expected_value, double in_threshold, size_t in_coordinate) {
incorrect_values_abs.push_back(IncorrectValue(in_actual_value, in_expected_value, in_threshold, in_coordinate));
}
void emplace_back_good(double in_actual_value, double in_expected_value, double in_threshold, size_t in_coordinate) {
correct_values_abs.push_back(IncorrectValue(in_actual_value, in_expected_value, in_threshold, in_coordinate));
}

public:
Error(const double in_abs_threshold,
Expand All @@ -419,6 +423,7 @@ class Error {
const auto threshold = calculate_threshold(abs_threshold, rel_threshold, expected);
mvn_results += equal(threshold, 0.f) ? diff : (diff / threshold);
if (less_or_equal(diff, threshold)) {
emplace_back_good(actual, expected, threshold, coordinate);
return true;
}
emplace_back(actual, expected, threshold, coordinate);
Expand All @@ -441,6 +446,10 @@ class Error {
<< " Diff: " << std::fabs(val.expected_value - val.actual_value)
<< " calculated_abs_threshold: " << val.threshold << " abs_threshold: " << abs_threshold
<< " rel_threshold: " << rel_threshold << "\n";
}
for (auto val : correct_values_abs) {
std::cout << "\nOK: " << val.expected_value << " Actual: " << val.actual_value
<< " Coordinate: " << val.coordinate << "\n";
#ifdef NDEBUG
break;
#endif
Expand Down

0 comments on commit af4f209

Please sign in to comment.