Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Graph serialization for GPU #2 #13986

Merged
merged 33 commits into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
47bda85
moved serialization include path
e-ddykim Nov 14, 2022
da2428c
quiet onednn-gpu patching
e-ddykim Nov 14, 2022
6c02916
save and load kernels in _impls
e-ddykim Nov 14, 2022
e2b54ad
changed to use OPENVINO_ASSERT
e-ddykim Nov 14, 2022
edfe913
fix errata
e-ddykim Nov 14, 2022
5e91531
updated to follow OpenVINO naming convention
e-ddykim Nov 14, 2022
1b99643
updated error messages
e-ddykim Nov 14, 2022
2b29abb
binary buffer by vector<uint8_t>
e-ddykim Nov 15, 2022
c941190
partial_shape serialization
e-ddykim Nov 15, 2022
7fe8edb
removed object_type
e-ddykim Nov 15, 2022
d489a15
added a new storage class for primitive_type_string and id
e-ddykim Nov 17, 2022
fee6fbf
updated to throw an exception when _node is null in build_deps().
e-ddykim Nov 17, 2022
a9bb84c
removed redundant memory_pool clearing
e-ddykim Nov 17, 2022
c190318
added a new net_id creator
e-ddykim Nov 17, 2022
d7d878f
newline at eof
e-ddykim Nov 17, 2022
8de74ae
updated CLDNN with GPU
e-ddykim Nov 17, 2022
094ae97
added cache blob descriptions
e-ddykim Nov 17, 2022
dd37c11
updated output allocation logic in serialization
e-ddykim Nov 19, 2022
5e36324
added ov::device::architecture in supported properties
e-ddykim Nov 19, 2022
4f805dc
overrided save and load in data_inst and mutable_data_inst
e-ddykim Nov 19, 2022
fbca09f
removed save and load functions in mutable_data
e-ddykim Nov 20, 2022
b170557
baseline for serialization unit tests
e-ddykim Nov 20, 2022
43f71e9
added serialization unit tests
e-ddykim Nov 20, 2022
8a4999f
added serialization unit tests
e-ddykim Nov 21, 2022
30a0ea1
updated not to execute build_deps when deserialized
e-ddykim Nov 21, 2022
916123f
make_data without namespace
e-ddykim Nov 21, 2022
bcea37c
updated to use default layout c-tor
e-ddykim Nov 21, 2022
2797292
updated get_unique_net_id()
e-ddykim Nov 21, 2022
1fc6660
updated get_type_id() to a pure virtual method
e-ddykim Nov 21, 2022
23d6dfd
updated ov::caching_properties
e-ddykim Nov 21, 2022
183a1d8
added [GPU] tags
e-ddykim Nov 21, 2022
a0dfa37
updated network c-tor
e-ddykim Nov 21, 2022
5d3ab0d
updated unit tests
e-ddykim Nov 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

#include "intel_gpu/graph/topology.hpp"
#include "intel_gpu/graph/program.hpp"
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#13801 (comment)
Moved the include path. Thank you.

#include "intel_gpu/runtime/compounds.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/event.hpp"
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/lru_cache.hpp"
#include "serialization/binary_buffer.hpp"

#include <map>
#include <vector>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ class BinaryOutputBuffer : public OutputBuffer<BinaryOutputBuffer> {

void write(void const * data, std::streamsize size) {
auto const written_size = stream.rdbuf()->sputn(reinterpret_cast<const char*>(data), size);
if (written_size != size) {
throw std::runtime_error("Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
}
OPENVINO_ASSERT(written_size == size,
"[GPU] Failed to write " + std::to_string(size) + " bytes to stream! Wrote " + std::to_string(written_size));
}

void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
Expand All @@ -38,9 +37,8 @@ class BinaryInputBuffer : public InputBuffer<BinaryInputBuffer> {

void read(void* const data, std::streamsize size) {
auto const read_size = stream.rdbuf()->sgetn(reinterpret_cast<char*>(data), size);
if (read_size != size) {
throw std::runtime_error("Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
}
OPENVINO_ASSERT(read_size == size,
"[GPU] Failed to read " + std::to_string(size) + " bytes from stream! Read " + std::to_string(read_size));
}

void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
Expand Down Expand Up @@ -85,9 +83,14 @@ class Serializer<BinaryInputBuffer, Data<T>> {

} // namespace cldnn

#define BIND_BINARY_BUFFER_WITH_TYPE(cls_name, obj_type) \
#define ASSIGN_TYPE_NAME(cls_name) \
namespace cldnn { \
const object_type cls_name::type = obj_type; \
const std::string cls_name::type = #cls_name; \
}

#define BIND_BINARY_BUFFER_WITH_TYPE(cls_name) \
namespace cldnn { \
const std::string cls_name::type = #cls_name; \
BIND_TO_BUFFER(BinaryOutputBuffer, cls_name) \
BIND_TO_BUFFER(BinaryInputBuffer, cls_name) \
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@
#include <functional>
#include "buffer.hpp"
#include "static_instance.hpp"
#include "object_types.hpp"

#define DECLARE_OBJECT_TYPE_SERIALIZATION \
static const object_type type; \
object_type get_type() const override { return type; }
static const std::string type; \
std::string get_type() const override { return type; }

#define BIND_TO_BUFFER(buffer, type) \
template <> \
Expand All @@ -25,26 +24,19 @@
const instance_creator<buffer, type>& bind_creator<buffer, type>::creator = \
static_instance<instance_creator<buffer, type>>::get_instance().instantiate();

// It's a defect, and was fixed in C++14
// https://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2148
struct enum_class_hash {
template <typename T>
std::size_t operator()(T t) const { return static_cast<std::size_t>(t); }
};

namespace cldnn {

template <typename BufferType>
struct saver_storage {
using save_function = std::function<void(BufferType&, const void*)>;
using value_type = typename std::unordered_map<object_type, save_function, enum_class_hash>::value_type;
using value_type = typename std::unordered_map<std::string, save_function>::value_type;

static saver_storage<BufferType>& instance() {
static saver_storage<BufferType> instance;
return instance;
}

const save_function& get_save_function(const object_type& type) const {
const save_function& get_save_function(const std::string& type) const {
return map.at(type);
}

Expand All @@ -57,7 +49,7 @@ struct saver_storage {
saver_storage(const saver_storage&) = delete;
void operator=(const saver_storage&) = delete;

std::unordered_map<object_type, save_function, enum_class_hash> map;
std::unordered_map<std::string, save_function> map;
};

template <typename T>
Expand All @@ -67,14 +59,14 @@ struct void_deleter {

template <typename BufferType, typename FuncT>
struct loader_storage {
using value_type = typename std::unordered_map<object_type, FuncT, enum_class_hash>::value_type;
using value_type = typename std::unordered_map<std::string, FuncT>::value_type;

static loader_storage& instance() {
static loader_storage instance;
return instance;
}

const FuncT& get_load_function(const object_type& type) {
const FuncT& get_load_function(const std::string& type) {
return map.at(type);
}

Expand All @@ -87,7 +79,7 @@ struct loader_storage {
loader_storage(const loader_storage&) = delete;
void operator=(const loader_storage&) = delete;

std::unordered_map<object_type, FuncT, enum_class_hash> map;
std::unordered_map<std::string, FuncT> map;
};

template <typename BufferType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,33 @@
#include "intel_gpu/runtime/layout.hpp"

namespace cldnn {
template <typename BufferType>
class Serializer<BufferType, ov::PartialShape, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void save(BufferType& buffer, const ov::PartialShape& partial_shape) {
std::vector<ov::Dimension> dimensions(partial_shape);
buffer << dimensions.size();
for (const auto& dimension : dimensions) {
buffer << dimension.get_interval().get_min_val();
buffer << dimension.get_interval().get_max_val();
}
}
};

template <typename BufferType>
class Serializer<BufferType, ov::PartialShape, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void load(BufferType& buffer, ov::PartialShape& partial_shape) {
size_t num_dimensions;
buffer >> num_dimensions;
for (size_t i = 0; i < num_dimensions; i++) {
ov::Dimension::value_type min_val, max_val;
buffer >> min_val >> max_val;
partial_shape.push_back(ov::Dimension(min_val, max_val));
}
}
};

template <typename BufferType>
class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
public:
Expand All @@ -21,15 +48,7 @@ class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base
buffer << _layout.data_padding.filling_value();
buffer << _layout.data_padding.lower_size().sizes();
buffer << _layout.data_padding.upper_size().sizes();

std::vector<cldnn::tensor::value_type> _sizes = _layout.get_tensor().sizes(_layout.format);
// Temp WA for bs_x_bsv16
if (_layout.format == cldnn::format::bs_x_bsv16) {
std::vector<cldnn::tensor::value_type> _tmp_sizes = _layout.get_tensor().sizes();
_sizes[0] = _tmp_sizes[0];
_sizes[1] = _tmp_sizes[1];
}
buffer << _sizes;
buffer << _layout.get_partial_shape();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#13801 (comment)
Updated to serialize partial_shape instead of tensor. Thank you.

}
};

Expand All @@ -50,15 +69,9 @@ class Serializer<BufferType, cldnn::layout, typename std::enable_if<std::is_base
_layout.data_padding = cldnn::padding(_lower_size, _upper_size, _filling_value);
}

std::vector<cldnn::tensor::value_type> _sizes;
buffer >> _sizes;

// Temp WA for bs_x_bsv16
if (_layout.format == cldnn::format::bs_x_bsv16) {
_layout.set_tensor(tensor(_sizes));
} else {
_layout.set_tensor(tensor(_layout.format, _sizes));
}
ov::PartialShape partial_shape;
buffer >> partial_shape;
_layout.set_partial_shape(partial_shape);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "buffer.hpp"
#include "bind.hpp"
#include "helpers.hpp"
#include "object_types.hpp"

namespace cldnn {

Expand All @@ -21,7 +20,7 @@ class Serializer<BufferType, std::unique_ptr<T>, typename std::enable_if<std::is
public:
static void save(BufferType& buffer, const std::unique_ptr<T>& ptr) {
const auto& type = ptr->get_type();
buffer << cldnn::make_data(&type, sizeof(object_type));
buffer << type;
const auto save_func = saver_storage<BufferType>::instance().get_save_function(type);
save_func(buffer, ptr.get());
}
Expand All @@ -31,17 +30,17 @@ template <typename BufferType, typename T>
class Serializer<BufferType, std::unique_ptr<T>, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void load(BufferType& buffer, std::unique_ptr<T>& ptr, engine& engine) {
object_type type;
buffer >> cldnn::make_data(&type, sizeof(object_type));
std::string type;
buffer >> type;
const auto load_func = dif<BufferType>::instance().get_load_function(type);
std::unique_ptr<void, void_deleter<void>> result;
load_func(buffer, result, engine);
ptr.reset(static_cast<T*>(result.release()));
}

static void load(BufferType& buffer, std::unique_ptr<T>& ptr) {
object_type type;
buffer >> cldnn::make_data(&type, sizeof(object_type));
std::string type;
buffer >> type;
const auto load_func = def<BufferType>::instance().get_load_function(type);
std::unique_ptr<void, void_deleter<void>> result;
load_func(buffer, result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
CompiledModel(std::istream& networkModel, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);

void Export(std::ostream& networkModel) override;
bool isSerializable();
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
Expand All @@ -47,6 +46,9 @@ class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault
Config m_config;
InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;

private:
bool is_serializable();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#13801 (comment)
I changed its visibility and name regarding to the coding style. Thank you.

};

} // namespace intel_gpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,30 @@ struct primitive_info {
CLDNN_DEFINE_TYPE_ID(PType) \
CLDNN_DEFINE_TYPE_STRING(PType)

#define GPU_DEFINE_PRIMITIVE_TYPE_ID(PType) \
primitive_type_id PType::type_id() { \
static primitive_type_base<PType> instance; \
return &instance; \
} \
bool _##PType##_added_ = prim_map_storage::instance().set_type_id(#PType, PType::type_id());

struct prim_map_storage {
static prim_map_storage& instance() {
static prim_map_storage instance;
return instance;
}

const cldnn::primitive_type_id get_type_id(const std::string& type_string) const {
return map.at(type_string);
}

bool set_type_id(const std::string& type_string, const cldnn::primitive_type_id type_id) {
return map.insert({type_string, type_id}).second;
}

private:
std::unordered_map<std::string, cldnn::primitive_type_id> map;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can make this map a static field of primitive_type and insert type_id in primitive_type_base c-tor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the point to insert type_id is moved to c-tor, primitives that have not yet been created will not exist in the map. Then, when deserializing, the type_id cannot be obtained by type name, which causes a problem.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

primitive_type objects are static, so all primitives are supposed to be initialized on app startup, aren't they?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After adding a ctor in primitive_type_base, I checked the execution step by debugger.
In my test, the first calling point was not on app startup as below.
image

};
/// @}
/// @}
} // namespace cldnn
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
#include <vector>

namespace cldnn {
primitive_type_id activation::type_id() {
static primitive_type_base<activation> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(activation)

layout activation_inst::calc_output_layout(activation_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/adaptive_pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
#include <string>

namespace cldnn {
primitive_type_id adaptive_pooling::type_id() {
static primitive_type_base<adaptive_pooling> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(adaptive_pooling)

layout adaptive_pooling_inst::calc_output_layout(const adaptive_pooling_node& node, kernel_impl_params const& impl_param) {
const auto data_layout = impl_param.get_input_layout();
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/arg_max_min.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@
#include "topk_shape_inference.hpp"

namespace cldnn {
primitive_type_id arg_max_min::type_id() {
static primitive_type_base<arg_max_min> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(arg_max_min)

layout arg_max_min_inst::calc_output_layout(arg_max_min_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<arg_max_min>();
Expand Down
6 changes: 1 addition & 5 deletions src/plugins/intel_gpu/src/graph/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
#include <data_inst.h>

namespace cldnn {

primitive_type_id assign::type_id() {
static primitive_type_base<assign> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(assign)

assign_inst::typed_primitive_inst(network& network, const assign_node& node) :
parent{network, node, false},
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/average_unpooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
#include <string>

namespace cldnn {
primitive_type_id average_unpooling::type_id() {
static primitive_type_base<average_unpooling> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(average_unpooling)

layout average_unpooling_inst::calc_output_layout(average_unpooling_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/batch_to_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
#include <vector>

namespace cldnn {
primitive_type_id cldnn::batch_to_space::type_id() {
static primitive_type_base<batch_to_space> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(batch_to_space)

layout batch_to_space_inst::calc_output_layout(batch_to_space_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<batch_to_space>();
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/binary_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
#include <string>

namespace cldnn {
primitive_type_id binary_convolution::type_id() {
static primitive_type_base<binary_convolution> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(binary_convolution)

layout binary_convolution_inst::calc_output_layout(binary_convolution_node const& node, kernel_impl_params const& impl_param) {
auto desc = impl_param.typed_desc<binary_convolution>();
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_gpu/src/graph/border.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
#include <algorithm>

namespace cldnn {
primitive_type_id border::type_id() {
static primitive_type_base<border> instance;
return &instance;
}
GPU_DEFINE_PRIMITIVE_TYPE_ID(border)

layout border_inst::calc_output_layout(border_node const& node, kernel_impl_params const& impl_param) {
assert(static_cast<bool>(impl_param.desc->output_data_type) == false &&
Expand Down
Loading