From 11161e9af7196848b15fcc9081e8d6005f1f512f Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Thu, 11 Mar 2021 09:16:20 +0300 Subject: [PATCH] [CPU] Remove BatchNorm node. (#22) --- .../src/mkldnn_plugin/CMakeLists.txt | 1 - .../src/mkldnn_plugin/mkldnn_descriptor.cpp | 12 - .../src/mkldnn_plugin/mkldnn_descriptor.h | 3 - .../src/mkldnn_plugin/mkldnn_exec_network.cpp | 1 - .../mkldnn_plugin/mkldnn_graph_optimizer.cpp | 37 --- .../src/mkldnn_plugin/mkldnn_node.cpp | 2 - .../src/mkldnn_plugin/mkldnn_node.h | 3 - .../nodes/mkldnn_batchnorm_node.cpp | 289 ------------------ .../nodes/mkldnn_batchnorm_node.h | 43 --- 9 files changed, 391 deletions(-) delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp delete mode 100644 inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index 29f24e82cbab3a..f8286bbc67eec0 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -14,7 +14,6 @@ endif() ## TODO set(LAYERS -# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_batchnorm_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_bin_conv_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_concat_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_conv_node.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp index 10ed3d432ebd83..99002688e90db5 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp @@ -23,18 +23,6 @@ size_t MKLDNNDescriptor::outputNumbers() const { return 1; } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { - this->desc.reset(new DescFwdImpl(desc)); -} - -MKLDNNDescriptor::operator std::shared_ptr() { - auto typeDesc = std::dynamic_pointer_cast>(desc); - if (typeDesc == nullptr) { - IE_THROW() << "Cannot cast descriptor!"; - } - return typeDesc->getPtr(); -} - MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h index e73b505790a494..5025e1a025d6a5 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h @@ -10,9 +10,6 @@ class MKLDNNDescriptor { public: - explicit MKLDNNDescriptor(std::shared_ptr desc); - operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); operator std::shared_ptr(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp index e6090c5cd15f65..521e455b0e4c93 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp @@ -301,7 +301,6 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &ne // type != Concatenation && // type != Eltwise && // type != Crop && -// type != BatchNormalization && // type != Copy) { // check_result = false; // } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index 9d6183991f2870..b97a77af322c59 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -97,9 +97,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { FuseBinaryConvolutionAndQuantize(graph); graph.RemoveDroppedNodes(); - FuseBatchNormWithScale(graph); - graph.RemoveDroppedNodes(); - FuseConvolutionSumAndConvolutionSumActivation(graph); graph.RemoveDroppedNodes(); @@ -697,40 +694,6 @@ void MKLDNNGraphOptimizer::MergeTwoEqualScaleShifts(MKLDNNGraph& graph) { // } } -void MKLDNNGraphOptimizer::FuseBatchNormWithScale(MKLDNNGraph &graph) { -// auto &graphNodes = graph.GetNodes(); -// -// for (int i = 0; i < graphNodes.size(); i++) { -// const auto& bn = graphNodes[i]; -// if (bn->getType() == BatchNormalization) { -// const auto& outputNodesMap = graph.GetOutputNodesMap(); -// const std::string node_name = bn->getName(); -// // Check that the node is not output node -// if (std::find_if(outputNodesMap.begin(), outputNodesMap.end(), -// [&node_name](const MKLDNNNodePtr& x) { -// return x->getName() == node_name;}) == outputNodesMap.end()) { -// if (bn->getChildEdges().size() == 1) { -// auto child = bn->getChildEdgeAt(0)->getChild(); -// if (child->type == Eltwise && child->getCnnLayer()->type == "ScaleShift") { -// bn->fuseWith(child); -// -// auto parentEdges = child->parentEdges; -// for (auto &parentEdge : parentEdges) { -// auto p_edge = parentEdge.lock(); -// if (p_edge->getParent()->getType() == BatchNormalization) -// continue; -// -// removeEdge(graph, p_edge); -// } -// -// graph.DropNode(child); -// } -// } -// } -// } -// } -} - void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) { // auto& graphNodes = graph.GetNodes(); // diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 8701ad831fb983..b0975089bcab55 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -13,7 +13,6 @@ #include #include -#include #include #include #include @@ -148,7 +147,6 @@ static const InferenceEngine::details::caseless_unordered_map // { "SimplerNMS", SimplerNMS }, // { "ROIAlign", ROIAlign }, // { "ROIPooling", ROIPooling }, -// { "BatchNormalization", BatchNormalization }, // { "Flatten", Flatten }, { "Pad", Pad }, { "Transpose", Transpose }, diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index a36cb2abb7e968..e34fbc99270409 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -59,7 +59,6 @@ enum Type { SimplerNMS, ROIAlign, ROIPooling, - BatchNormalization, Flatten, Pad, Transpose, @@ -201,8 +200,6 @@ static std::string NameFromType(Type type) { return "ROIAlign"; case ROIPooling: return "ROIPooling"; - case BatchNormalization: - return "BatchNormalization"; case Flatten: return "Flatten"; case Pad: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp deleted file mode 100644 index 8aff5ca6a0b5c7..00000000000000 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "mkldnn_batchnorm_node.h" -#include -#include "common/cpu_memcpy.h" - -using namespace mkldnn; -using namespace MKLDNNPlugin; -using namespace InferenceEngine; - -MKLDNNBatchNormalizationNode::MKLDNNBatchNormalizationNode(const std::shared_ptr& op, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return GetVarianceDesc(primitive_desc_it); - }); - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return GetMeanDesc(primitive_desc_it); - }); - - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - if (!fusedWithScale()) - return MKLDNNMemoryDesc(); - return GetScaleShiftWeightsDesc(primitive_desc_it); - }); -} - -bool MKLDNNBatchNormalizationNode::fusedWithScale() const { - return false; - IE_THROW() << "Not implemented"; - // TODO [NM]: reimplement w/o using CNNLayer -// return fusedWith.size() == 1 && fusedWith[0]->getType() == Eltwise -// && fusedWith[0]->getCnnLayer()->type == "ScaleShift"; -} - -void MKLDNNBatchNormalizationNode::getSupportedDescriptors() { -// if (!descs.empty()) -// return; -// auto * bnLayer = dynamic_cast(getCnnLayer().get()); -// if (bnLayer == nullptr) -// IE_THROW() << "Cannot convert batch normalization layer."; -// if (bnLayer->_weights == nullptr || bnLayer->_biases == nullptr) { -// IE_THROW() << "Weights/biases are empty for layer: " << bnLayer->name -// << " used in MKLDNN node: " << getName() << "\n" -// << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" -// << " to load them from .bin part of the IR"; -// } -// -// if (getParentEdges().size() != 1) -// IE_THROW() << "Incorrect number of input edges for layer " << getName(); -// if (!getChildEdges().size()) -// IE_THROW() << "Incorrect number of output edges for layer " << getName(); -// -// eps = bnLayer->epsilon; -// -// size_t variancesSize = MKLDNNDims(bnLayer->_weights->getTensorDesc().getDims()).size(); -// size_t meansSize = MKLDNNDims(bnLayer->_biases->getTensorDesc().getDims()).size(); -// -// if (variancesSize != meansSize && variancesSize != 1) -// IE_THROW() << "Incorrect weights and biases sizes!"; -// -// internalBlobs.push_back(createInternalBlob(bnLayer->_weights->getTensorDesc().getDims(), true)); -// internalBlobs.push_back(createInternalBlob(bnLayer->_biases->getTensorDesc().getDims(), false)); -// -// auto parentOutDims = getParentEdgeAt(0)->getDims(); -// -// if (fusedWith.size() > 1) -// IE_THROW() << "BatchNorm fusion is possible with only one layer!"; -// -// for (const auto &node : fusedWith) { -// auto * scshLayer = dynamic_cast(node->getCnnLayer().get()); -// if (scshLayer == nullptr) -// IE_THROW() << "Cannot cast to the ScaleShift layer to fuse with BatchNorm."; -// -// size_t C = static_cast(getChildEdgeAt(0)->getDims()[1]); -// SizeVector mkldnn_weights = {2, C}; -// TensorDesc desc(scshLayer->_weights->getTensorDesc().getPrecision(), mkldnn_weights, InferenceEngine::NC); -// InferenceEngine::TBlob::Ptr internalBlob = InferenceEngine::make_shared_blob(desc); -// internalBlob->allocate(); -// float * data = internalBlob->buffer(); -// if (data == nullptr) -// IE_THROW() << "Cannot get memory!"; -// -// InferenceEngine::Blob::Ptr blb = scshLayer->_weights; -// if (blb == nullptr) -// IE_THROW() << "Cannot get weights blob for node " << getName() << "."; -// -// size_t weightsByteSize = blb->byteSize(); -// cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize); -// data += blb->size(); -// blb = scshLayer->_biases; -// -// if (blb == nullptr) { -// memset(data, 0, weightsByteSize); -// } else { -// if (weightsByteSize != blb->byteSize()) -// IE_THROW() << "ScaleShift has incorrect weights!"; -// cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize); -// } -// internalBlobs.push_back(internalBlob); -// } -// -// InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); -// if (precision != InferenceEngine::Precision::FP32) -// precision = InferenceEngine::Precision::FP32; -// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); -// -// for (auto format : getAvailableFormatsForDims(parentOutDims)) { -// MKLDNNMemoryDesc in_candidate(parentOutDims, inputDataType, format); -// createDescriptor({in_candidate}, {}); -// } -} - -static MKLDNNMemoryDesc get_bn_mdesc_by_index(const mkldnn::primitive_desc_iterator &primitive_desc, int idx) { - mkldnn_batch_normalization_desc_t *p; - error::wrap_c_api(mkldnn_primitive_desc_query( - primitive_desc.get(), mkldnn::convert_to_c(mkldnn::query::batch_normalization_d), 0, &p), - "could not get a batch-normalization descriptor"); - auto bndesc = - (p->flags & mkldnn::convert_to_c(mkldnn::normalization_flags::use_global_stats)) ? - primitive_desc.src_desc(idx) : primitive_desc.dst_desc(idx); - - return MKLDNNMemoryDesc {bndesc}; -} - -MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const mkldnn::primitive_desc &primitive_desc) const { - // TODO: rewrite with using stat_desc - return get_bn_mdesc_by_index(primitive_desc, 2); -} - -MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const mkldnn::primitive_desc &primitive_desc) const { - return get_bn_mdesc_by_index(primitive_desc, 1); -} - -MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const mkldnn::primitive_desc &primitive_desc) const { - return MKLDNNMemoryDesc(primitive_desc.weights_desc(0)); -} - -bool MKLDNNBatchNormalizationNode::created() const { - return getType() == BatchNormalization; -} - -void MKLDNNBatchNormalizationNode::createPrimitive() { - if (prim) - return; - - auto prim_desc = createPrimitiveDescriptor(); - prim.reset(new batch_normalization_forward(prim_desc)); - - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - - const auto &mean = internalBlobMemory[1]->GetPrimitive(); - const auto &var = internalBlobMemory[0]->GetPrimitive(); - - if (convert_to_c(flag) & dnnl_use_scaleshift) { - const auto &sclshft = internalBlobMemory[2]->GetPrimitive(); - primArgs = {{DNNL_ARG_SRC, src}, - {DNNL_ARG_MEAN, mean}, - {DNNL_ARG_VARIANCE, var}, - {DNNL_ARG_SCALE_SHIFT, sclshft}, - {DNNL_ARG_DST, dst}}; - } else { - primArgs = {{DNNL_ARG_SRC, src}, - {DNNL_ARG_MEAN, mean}, - {DNNL_ARG_VARIANCE, var}, - {DNNL_ARG_DST, dst}}; - } -} - -void MKLDNNBatchNormalizationNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc inDesc(inputDesc[0]); - if (inDesc.getDims().ndims() == 2) { - // Make it 4D - MKLDNNDims dims = inDesc.getDims(); - dims.push_back(1); // H - dims.push_back(1); // W - auto format = memory::format_tag::nchw; - inDesc = MKLDNNMemoryDesc(dims, inDesc.getDataType(), format); - } - - flag = normalization_flags::use_global_stats; - if (fusedWithScale()) - flag |= normalization_flags::use_scale_shift; - - MKLDNNDescriptor desc(std::shared_ptr( - new mkldnn::batch_normalization_forward::desc(prop_kind::forward_scoring, inDesc, eps, - flag))); - descs.push_back(desc); -} - -void MKLDNNBatchNormalizationNode::initOptimalPrimitiveDescriptor() { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); - if (isInitConfig(config)) - return; - - if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || (!isUninitTensorDesc(config.inConfs[0].desc) && - !isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc)) - IE_THROW() << "Layer " << getName() << " has incorrect selected config!"; - - if (!isUninitTensorDesc(config.inConfs[0].desc)) { - config.outConfs[0].desc = config.inConfs[0].desc; - } else if (!isUninitTensorDesc(config.outConfs[0].desc)) { - config.inConfs[0].desc = config.outConfs[0].desc; - } else { - config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0); - } - - initDescriptor(config); -} - -void MKLDNNBatchNormalizationNode::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) - return; - - // BN primitive doesn't support strides - for (auto& desc : descs) { - primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine()); - while (static_cast(itpd)) { - InferenceEngine::LayerConfig config; - config.dynBatchSupport = true; - for (size_t i = 0; i < desc.inputNumbers(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - dataConfig.desc = getSrcMemDesc(itpd, i); - config.inConfs.push_back(dataConfig); - } - - for (size_t i = 0; i < desc.outputNumbers(); i++) { - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = canBeInPlace() ? 0 : -1; - dataConfig.constant = false; - dataConfig.desc = getDstMemDesc(itpd, i); - config.outConfs.push_back(dataConfig); - } - impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); - - supportedPrimitiveDescriptors.emplace_back(config, impl_type); - if (!itpd.next_impl()) - break; - } - } -} - -MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, - size_t idx) { - TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - - if (getParentEdgeAt(0)->getDims().ndims() == 2 && desc.getLayout() == InferenceEngine::Layout::NCHW) { - desc.reshape(getParentEdgeAt(idx)->getDims().ToSizeVector(), InferenceEngine::Layout::NC); - return MKLDNNMemoryDesc(desc); - } - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); -} - -MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, - size_t idx) { - TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); - - if (getParentEdgeAt(0)->getDims().ndims() == 2 && desc.getLayout() == InferenceEngine::Layout::NCHW) { - desc.reshape(getParentEdgeAt(idx)->getDims().ToSizeVector(), InferenceEngine::Layout::NC); - return MKLDNNMemoryDesc(desc); - } - if (desc.getLayout() == InferenceEngine::Layout::ANY) - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getChildEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); -} - -REG_MKLDNN_PRIM_FOR(MKLDNNBatchNormalizationNode, BatchNormalization); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h deleted file mode 100644 index ed64cb06fd77a9..00000000000000 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include - -namespace MKLDNNPlugin { - -class MKLDNNBatchNormalizationNode : public MKLDNNNode { -public: - MKLDNNBatchNormalizationNode(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache); - - ~MKLDNNBatchNormalizationNode() override = default; - void initSupportedPrimitiveDescriptors() override; - void initOptimalPrimitiveDescriptor() override; - void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; - void createPrimitive() override; - bool created() const override; - - bool fusedWithScale() const; - - MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - -private: - mkldnn::normalization_flags flag = mkldnn::normalization_flags::none; - float eps = 0.0f; - MKLDNNMemoryDesc GetVarianceDesc(const mkldnn::primitive_desc& primitive_desc) const; - MKLDNNMemoryDesc GetMeanDesc(const mkldnn::primitive_desc& primitive_desc) const; - MKLDNNMemoryDesc GetScaleShiftWeightsDesc(const mkldnn::primitive_desc& primitive_desc) const; -}; - -} // namespace MKLDNNPlugin -