Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IE CLDNN] Mixed precision scale support #1848

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion inference-engine/src/cldnn_engine/cldnn_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1354,7 +1354,8 @@ void Program::CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngi
scaleShiftLayerName,
inputPrimitives[0],
scalePrimID,
biasPrimID);
biasPrimID,
cldnn::optional_data_type{DataTypeFromPrecision(layer->outData[0]->getPrecision())});

topology.add(scaleShiftPrim);
AddPrimitiveToProfiler(scaleShiftLayerName, layer);
Expand Down
8 changes: 5 additions & 3 deletions inference-engine/thirdparty/clDNN/api/scale.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
// Copyright (c) 2016 Intel Corporation
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,8 +51,9 @@ struct scale : public primitive_base<scale> {
const primitive_id& input,
const primitive_id& scale_input, // should be bfyx or yxfb, where each dimension can be 1, if all dimensions
// are 1 then this is scalar
const optional_data_type& output_dt = {},
const padding& output_padding = padding())
: primitive_base(id, {input, scale_input}, output_padding), bias("") {}
: primitive_base(id, {input, scale_input}, output_padding, output_dt), bias("") {}

/// @brief Constructs scale primitive with optional adding bias.
/// @param id This primitive id.
Expand All @@ -64,8 +65,9 @@ struct scale : public primitive_base<scale> {
const primitive_id& scale_input, // should be bfyx or yxfb, where each dimension can be 1, if all dimensions
// are 1 then this is scalar
const primitive_id& bias, // should be same size as scale_input
const optional_data_type& output_dt = {},
const padding& output_padding = padding())
: primitive_base(id, {input, scale_input}, output_padding), bias(bias) {}
: primitive_base(id, {input, scale_input}, output_padding, output_dt), bias(bias) {}

/// @brief Primitive id containing bias data.
primitive_id bias;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1146,11 +1146,43 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati

switch (desc.GetType()) {
case KernelType::SCALE: {
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " +
in_vars_converted[0] + " * " + ConvertToOutputType(in_var, vec_size) + ";";
auto get_acc_t = [&]() -> Datatype {
std::vector<Datatype> tensor_types = {desc.output_tensor.GetDType()};
for (auto& in : desc.tensors) {
tensor_types.push_back(in.GetDType());
}

std::vector<Datatype> types_prioritized = { Datatype::F32, Datatype::F16 };

for (auto& type : types_prioritized) {
if (std::any_of(tensor_types.begin(), tensor_types.end(), [=](const Datatype& t) -> bool { return t == type; })) {
return type;
}
}

return Datatype::F32;
};

auto get_input = [&](size_t index) -> std::string {
auto in_name = index == 0 ? in_var : GetInputVarName(index - 1, is_shuffled, shuffle_var);
auto tensor_type = index == 0 ? in_type : desc.tensors[index - 1].GetDType();
auto acc_t = get_acc_t();

if (tensor_type != acc_t)
return ConvertToType(in_name, acc_t, vec_size);
else
return in_name;
};

auto tmp_var = out_var + "_tmp";
if (desc.tensors.size() > 1) {
op_decls += "\\\n\t" + out_var + " += " + in_vars_converted[1] + ";";
op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = "
+ get_input(0) + " * " + get_input(1) + " + " + get_input(2) + ";";
} else {
op_decls += "\\\n\t" + GetType(get_acc_t(), vec_size) + " " + tmp_var + " = "
+ get_input(0) + " * " + get_input(1) + ";";
}
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(tmp_var, vec_size) + ";";
break;
}
case KernelType::ELTWISE: {
Expand Down
8 changes: 5 additions & 3 deletions inference-engine/thirdparty/clDNN/src/scale.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
// Copyright (c) 2016-2019 Intel Corporation
// Copyright (c) 2016-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,8 +27,7 @@ primitive_type_id scale::type_id() {
}

layout scale_inst::calc_output_layout(scale_node const& node) {
assert(static_cast<bool>(node.get_primitive()->output_data_type) == false &&
"Output data type forcing is not supported for scale_node!");
auto desc = node.get_primitive();
auto result = node.input().get_non_padded_output_layout();

auto scale_sizes = node.scale_in().get_non_padded_output_layout().size;
Expand All @@ -47,6 +46,9 @@ layout scale_inst::calc_output_layout(scale_node const& node) {
node.scale_in().get_non_padded_output_layout().data_type == data_types::f16))
result.data_type = node.scale_in().get_non_padded_output_layout().data_type;

if (desc->output_data_type)
result.data_type = *desc->output_data_type;

if (scale_x_size != 1) {
CLDNN_ERROR_NOT_EQUAL(node.id(), "Scale x size", scale_x_size, "input x size", input_x_size, "");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1296,6 +1296,21 @@ TEST_P(conv_int8_scale, basic) {
execute(p);
}

TEST_P(conv_int8_scale, fp16_scale_out) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f/p.kernel.count())),
convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
scale("scale", "conv_prim", "scale_data", optional_data_type{data_types::f16}),
reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
);

tolerance = 1e-5f;
execute(p);
}

INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_int8_scale,
::testing::ValuesIn(std::vector<bc_test_params>{
bc_test_params{CASE_CONV_U8S8_1, 2, 3},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,154 @@
#include <api/engine.hpp>
#include "test_utils/test_utils.h"
#include "api/reorder.hpp"
#include "api/data.hpp"

#include <iostream>

using namespace cldnn;
using namespace tests;

TEST(scale_gpu, basic_in2x3x2x2_mixed_types_in_fp32_out_fp16) {
const auto& engine = get_test_engine();

auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
auto shift_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });

std::vector<float> input_vec = { 1.0f, 0.0f, 5.0f, 1.5f, 2.0f, 0.0f,
6.0f, 5.0f, -10.0f, -11.0f, -12.0f, -13.0f,

3.0f, 0.5f, 7.0f, 12.0f, 4.0f, -0.5f,
8.0f, 8.0f, -14.0f, -15.0f, -16.0f, -17.0f };
set_values(input, input_vec);
set_values(scale_input, { 2.0f, -1.0f });
set_values(shift_input, { -5.0f, 10.0f });

std::vector<float> result_vec = { -3.0f, -5.0f, 5.0f, -2.0f, -1.0f, -5.0f,
4.0f, 5.0f, 20.0f, 21.0f, 22.0f, 23.0f,

1.0f, -4.0f, 9.0f, 19.0f , 3.0f, -6.0f,
2.0f, 2.0f, 24.0f, 25.0f, 26.0f, 27.0f };

topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(data("scale_input", scale_input));
topology.add(data("shift_input", shift_input));
topology.add(scale("scale", "input", "scale_input", "shift_input", optional_data_type{data_types::f16}));
topology.add(reorder("reorder", "scale", format::bfyx, data_types::f32));

build_options bo;
bo.set_option(build_option::optimize_data(true));
network network(engine, topology, bo);

network.set_input_data("input", input);

auto outputs = network.execute();

auto output = outputs.at("reorder").get_memory();
auto output_ptr = output.pointer<float>();

ASSERT_EQ(result_vec.size(), output.count());

for (unsigned int i = 0; i < result_vec.size(); ++i) {
EXPECT_NEAR(output_ptr[i], result_vec[i], 1e-05F);
}
}

TEST(scale_gpu, basic_in2x3x2x2_mixed_types_in_fp16_out_fp32) {
const auto& engine = get_test_engine();

auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 3, 2 } });
auto scale_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } });
auto shift_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } });

std::vector<half_t> input_vec = { half_t(1.0f), half_t(0.0f), half_t(5.0f), half_t(1.5f), half_t(2.0f), half_t(0.0f),
half_t(6.0f), half_t(5.0f), half_t(-10.0f), half_t(-11.0f), half_t(-12.0f), half_t(-13.0f),

half_t(3.0f), half_t(0.5f), half_t( 7.0f), half_t(12.0f), half_t(4.0f), half_t(-0.5f),
half_t(8.0f), half_t(8.0f), half_t(-14.0f), half_t(-15.0f), half_t(-16.0f), half_t(-17.0f) };
set_values(input, input_vec);
set_values(scale_input, { half_t(2.0f), half_t(-1.0f) });
set_values(shift_input, { half_t(-5.0f), half_t(10.0f) });

std::vector<float> result_vec = { -3.0f, -5.0f, 5.0f, -2.0f, -1.0f, -5.0f,
4.0f, 5.0f, 20.0f, 21.0f, 22.0f, 23.0f,

1.0f, -4.0f, 9.0f, 19.0f , 3.0f, -6.0f,
2.0f, 2.0f, 24.0f, 25.0f, 26.0f, 27.0f };

topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(data("scale_input", scale_input));
topology.add(data("shift_input", shift_input));
topology.add(scale("scale", "input", "scale_input", "shift_input", optional_data_type{data_types::f32}));
topology.add(reorder("reorder", "scale", format::bfyx, data_types::f32));

build_options bo;
bo.set_option(build_option::optimize_data(true));
network network(engine, topology, bo);

network.set_input_data("input", input);

auto outputs = network.execute();

auto output = outputs.at("reorder").get_memory();
auto output_ptr = output.pointer<float>();

ASSERT_EQ(result_vec.size(), output.count());

for (unsigned int i = 0; i < result_vec.size(); ++i) {
EXPECT_NEAR(output_ptr[i], result_vec[i], 1e-05F);
}
}

TEST(scale_gpu, basic_in2x3x2x2_mixed_types_in_fp32_scale_fp16_out_fp16) {
const auto& engine = get_test_engine();

auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
auto scale_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } });
auto shift_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } });

std::vector<float> input_vec = { 1.0f, 0.0f, 5.0f, 1.5f, 2.0f, 0.0f,
6.0f, 5.0f, -10.0f, -11.0f, -12.0f, -13.0f,

3.0f, 0.5f, 7.0f, 12.0f, 4.0f, -0.5f,
8.0f, 8.0f, -14.0f, -15.0f, -16.0f, -17.0f };
set_values(input, input_vec);
set_values(scale_input, { half_t(2.0f), half_t(-1.0f) });
set_values(shift_input, { half_t(-5.0f), half_t(10.0f) });

std::vector<float> result_vec = { -3.0f, -5.0f, 5.0f, -2.0f, -1.0f, -5.0f,
4.0f, 5.0f, 20.0f, 21.0f, 22.0f, 23.0f,

1.0f, -4.0f, 9.0f, 19.0f , 3.0f, -6.0f,
2.0f, 2.0f, 24.0f, 25.0f, 26.0f, 27.0f };

topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(data("scale_input", scale_input));
topology.add(data("shift_input", shift_input));
topology.add(scale("scale", "input", "scale_input", "shift_input", optional_data_type{data_types::f16}));
topology.add(reorder("reorder", "scale", format::bfyx, data_types::f32));

build_options bo;
bo.set_option(build_option::optimize_data(true));
network network(engine, topology, bo);

network.set_input_data("input", input);

auto outputs = network.execute();

auto output = outputs.at("reorder").get_memory();
auto output_ptr = output.pointer<float>();

ASSERT_EQ(result_vec.size(), output.count());

for (unsigned int i = 0; i < result_vec.size(); ++i) {
EXPECT_NEAR(output_ptr[i], result_vec[i], 1e-05F);
}
}

TEST(scale_gpu, basic_in2x3x2x2_scale_same_size) {
// Scale : 2x3x2x2
// Input : 2x3x2x2
Expand Down Expand Up @@ -1155,7 +1297,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_yxfb_bfyx_same_size_padding) {
topology.add(input_layout("input", input.get_layout()));
topology.add(reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })));
topology.add(input_layout("scale_input", scale_input.get_layout()));
topology.add(scale("scale", "reorder", "scale_input", padding( { 0, 0, 2, 2 }, 0 )));
topology.add(scale("scale", "reorder", "scale_input", {}, padding( { 0, 0, 2, 2 }, 0 )));

std::vector<float> input_vec = { 1.f, 2.f, 3.f, 4.f };
set_values(input, input_vec);
Expand Down