From 97b88733fdc9a017c3e22627c311d933d9bc1f1b Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 6 Jul 2023 19:33:44 +0200 Subject: [PATCH] Apply review comments --- src/plugins/intel_cpu/src/cpu_memory.cpp | 15 +- src/plugins/intel_cpu/src/edge.cpp | 71 +++--- src/plugins/intel_cpu/src/graph.cpp | 229 +++++++++--------- src/plugins/intel_cpu/src/graph.h | 1 + src/plugins/intel_cpu/src/node.cpp | 26 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 79 +++--- src/plugins/intel_cpu/src/nodes/gather.cpp | 49 ++-- src/plugins/intel_cpu/src/nodes/split.cpp | 66 ++--- .../intel_cpu/src/partitioned_mem_mgr.h | 13 +- 9 files changed, 276 insertions(+), 273 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 1acad38240bc55..6c34123a65b046 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -93,13 +93,14 @@ void Memory::create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { m_padsZeroing = pads_zeroing; dnnlMemHandle.resetDnnlPrim(); - if (m_pMemDesc->isDefined()) { - auto memSize = m_pMemDesc->getCurrentMemSize(); - if (nullptr != data) { - m_mgrHandle->setExtBuff(const_cast(data), memSize); - } else { - m_mgrHandle->resize(memSize); - } + if (!m_pMemDesc->isDefined()) { + return; + } + auto memSize = m_pMemDesc->getCurrentMemSize(); + if (nullptr != data) { + m_mgrHandle->setExtBuff(const_cast(data), memSize); + } else { + m_mgrHandle->resize(memSize); } } diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 4f36dccd89ca01..b18af69ec03ce6 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -105,7 +105,7 @@ bool Edge::enforceReorder() { for (auto& p_edge_peer : portChildEdges) { if (p_edge_peer.get() == this) continue; - if (p_edge_peer->inPlace(LOOK_DOWN)) { //p_edge_peer->getChild()->getType() != Type::Reorder && + if (p_edge_peer->inPlace(LOOK_DOWN)) { return true; } } @@ -472,22 +472,6 @@ void Edge::init() { } sharedMemFrom(edgePtr); } -// - // auto port = getInputNum(); - // if (port < 0) - // return; - // auto edges_at_same_port = getParent()->getChildEdgesAtPort(static_cast(port)); - // for (auto edge : edges_at_same_port) { - // if (edge->getStatus() != Status::NeedAllocation && edge->getStatus() != Status::Uninitialized) { - // if (edge->getSharedEdge() != edgePtr) - // IE_THROW() << "Unsupported behavior. Cannot mark edge " - // << getParent()->getChildEdgeAt(0)->getParent()->getName() << "->" - // << getParent()->getChildEdgeAt(0)->getChild()->getName() << " as not allocated!"; - // } else { - // if (edge != edgePtr) - // edge->sharedMemFrom(edgePtr); - // } - // } } /** @@ -550,42 +534,45 @@ bool Edge::inPlace(LOOK look) const { NodePtr Edge::modifiedInPlace() const { auto childNode = getChild(); - if (childNode && childNode->isInPlace()) { - // check if the children nodes are able to modify the memory - auto childPort = getOutputNum(); - auto inPlaceInputPort = childNode->inPlaceInputPort(childPort); - if (inPlaceInputPort >= 0) { + if (!childNode || !childNode->isInPlace()) { + return nullptr; + } + // check if the children nodes are able to modify the memory + auto childPort = getOutputNum(); + auto inPlaceInputPort = childNode->inPlaceInputPort(childPort); + if (inPlaceInputPort >= 0) { + if (childNode->isExecutable()) { + // Node can modify the memory + return childNode; + } + for (auto&& edge : childNode->getChildEdgesAtPort(inPlaceInputPort)) { + // continue searching + if (auto result = edge->modifiedInPlace()) { + return result; + } + } + } + // check backward dependency + if (auto childSPD = childNode->getSelectedPrimitiveDescriptor()) { + auto& outConfs = childSPD->getConfig().outConfs; + for (size_t i = 0; i < outConfs.size(); ++i) { + const auto& conf = outConfs[i]; + if (childPort < 0 || conf.inPlace() != childPort) { + continue; + } if (childNode->isExecutable()) { // Node can modify the memory return childNode; } - for (auto&& edge : childNode->getChildEdgesAtPort(inPlaceInputPort)) { + for (auto&& edge : childNode->getChildEdgesAtPort(i)) { // continue searching if (auto result = edge->modifiedInPlace()) { return result; } } } - // check backward dependency - if (auto childSPD = childNode->getSelectedPrimitiveDescriptor()) { - auto& outConfs = childSPD->getConfig().outConfs; - for (size_t i = 0; i < outConfs.size(); ++i) { - const auto& conf = outConfs[i]; - if (childPort >= 0 && conf.inPlace() == childPort) { - if (childNode->isExecutable()) { - // Node can modify the memory - return childNode; - } - for (auto&& edge : childNode->getChildEdgesAtPort(i)) { - // continue searching - if (auto result = edge->modifiedInPlace()) { - return result; - } - } - } - } - } } + // nothing has been found return nullptr; } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 96e3deb58bbeef..185f3384c9c758 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -363,30 +363,7 @@ void Graph::InitGraph() { optimizer.ApplyImplSpecificGraphOptimizations(*this); SortTopologically(); - bool haveDynNodes = false; - for (size_t i = 0; i < graphNodes.size(); ++i) { - const auto& node = graphNodes[i]; - if (node->isDynamicNode()) { - haveDynNodes = true; - if (node->outputShapeDataDependency() || - // WA: for convolution plus sum(broadcast). Due to the fact that a convolution with sum use the same memory for second sum term and the output - // tensors (inPlace) resizing the output tensor, may lead to reallocation of this second term memory and possible data lost. The reallocation - // may happen when the second term shape is broadcasted to the output tensor shape. To avoid the data loss, we have a special processing for - // such cases inside the convolution node, but it works properly only when dynamic shapes inference, preparation and execution a called - // for this node sequentially. - (node->getType() == Type::Convolution && node->isInPlace())) { - syncNodesInds.insert({node.get(), i}); - } - } - } - - // In case of dynamic shapes, tensors may be resized due to the shapes variations. - // If the input tensor is included to memory reuse, it means that its memory manager is shared with other tensors in the graph, which in turn may cause data - // loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations - // we disable io mem reuse for the case of dynamic shapes. - if (haveDynNodes) { - this->reuse_io_tensors = false; - } + const bool hasDynNodes = ProcessDynNodes(); Allocate(); @@ -400,7 +377,7 @@ void Graph::InitGraph() { ExtractExecutableNodes(); - status = haveDynNodes ? Status::ReadyDynamic : Status::ReadyStatic; + status = hasDynNodes ? Status::ReadyDynamic : Status::ReadyStatic; } void Graph::InitNodes() { @@ -731,16 +708,16 @@ void Graph::AllocateWithReuse() { auto &cluster = edge_clusters[i]; bool erase = false; for (auto &edge : cluster) { - if (edge->getStatus() == Edge::Status::NeedAllocation - && edge->getParent()->isConstant()) { - if (edge->getParent()->getType() == Type::Input) { - auto constNode = std::static_pointer_cast(edge->getParent()); - edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); - } else { - edge->externalAllocate(context->getWeightsCache()); - } - erase = true; + if (edge->getStatus() != Edge::Status::NeedAllocation || !edge->getParent()->isConstant()) { + continue; + } + if (edge->getParent()->getType() == Type::Input) { + auto constNode = std::static_pointer_cast(edge->getParent()); + edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); + } else { + edge->externalAllocate(context->getWeightsCache()); } + erase = true; } if (erase) { @@ -901,29 +878,30 @@ void Graph::AllocateWithReuse() { // Resolve all other edges with status NotAllocated and in-place for (auto& cluster : edge_clusters) { for (auto& edge : cluster) { - if (edge->getStatus() == Edge::Status::NotAllocated) { - std::vector edges_to_process; - edges_to_process.push_back(edge); - for (auto next_edge = edge->getSharedEdge(std::nothrow); - next_edge; - next_edge = next_edge->getSharedEdge(std::nothrow)) { - edges_to_process.push_back(next_edge); - } - std::for_each(edges_to_process.rbegin(), edges_to_process.rend(), [](const EdgePtr& edge){ - if (edge->getStatus() == Edge::Status::NotAllocated) { - if (edge->inPlace(Edge::LOOK_DOWN)) { - edge->getChild()->resolveInPlaceEdges(Edge::LOOK_DOWN); - } else if (edge->inPlace(Edge::LOOK_UP)) { - edge->getParent()->resolveInPlaceEdges(Edge::LOOK_UP); - } else { - auto sharedEdge = edge->getSharedEdge(); - auto sharedEdgeParent = sharedEdge->getParent(); - edge->allocate(sharedEdge->getMemoryPtr()->getMemoryMngr()); - DEBUG_LOG(*edge, " sharedEdge with ", *sharedEdge); - } - } - }); + if (edge->getStatus() != Edge::Status::NotAllocated) { + continue; } + std::vector edges_to_process; + edges_to_process.push_back(edge); + for (auto next_edge = edge->getSharedEdge(std::nothrow); + next_edge; + next_edge = next_edge->getSharedEdge(std::nothrow)) { + edges_to_process.push_back(next_edge); + } + std::for_each(edges_to_process.rbegin(), edges_to_process.rend(), [](const EdgePtr& edge) { + if (edge->getStatus() == Edge::Status::NotAllocated) { + if (edge->inPlace(Edge::LOOK_DOWN)) { + edge->getChild()->resolveInPlaceEdges(Edge::LOOK_DOWN); + } else if (edge->inPlace(Edge::LOOK_UP)) { + edge->getParent()->resolveInPlaceEdges(Edge::LOOK_UP); + } else { + auto sharedEdge = edge->getSharedEdge(); + auto sharedEdgeParent = sharedEdge->getParent(); + edge->allocate(sharedEdge->getMemoryPtr()->getMemoryMngr()); + DEBUG_LOG(*edge, " sharedEdge with ", *sharedEdge); + } + } + }); } } } @@ -946,6 +924,36 @@ void Graph::Allocate() { for (auto& edge : graphEdges) edge->validate(); } +bool Graph::ProcessDynNodes() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ProcessDynNodes"); + + bool result = false; + for (size_t i = 0; i < graphNodes.size(); ++i) { + const auto& node = graphNodes[i]; + if (node->isDynamicNode()) { + result = true; + if (node->outputShapeDataDependency() || + // WA: for convolution plus sum(broadcast). Due to the fact that a convolution with sum use the same memory for second sum term and the output + // tensors (inPlace) resizing the output tensor, may lead to reallocation of this second term memory and possible data lost. The reallocation + // may happen when the second term shape is broadcasted to the output tensor shape. To avoid the data loss, we have a special processing for + // such cases inside the convolution node, but it works properly only when dynamic shapes inference, preparation and execution a called + // for this node sequentially. + (node->getType() == Type::Convolution && node->isInPlace())) { + syncNodesInds.insert({node.get(), i}); + } + } + } + + // In case of dynamic shapes, tensors may be resized due to the shapes variations. + // If the input tensor is included to memory reuse, it means that its memory manager is shared with other tensors in the graph, which in turn may cause data + // loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations + // we disable io mem reuse for the case of dynamic shapes. + if (result) { + this->reuse_io_tensors = false; + } + return result; +} + void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) { if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready."; @@ -1797,66 +1805,67 @@ void Graph::resolveInPlaceDirection(const NodePtr& node) const { if (auto pEdge = wEdge.lock()) { auto inpPort = pEdge->getOutputNum(); auto inPlaceInpPort = node->inPlaceInputPort(inpPort); - if (inPlaceInpPort >= 0 && inPlaceDirection(node, PortType::INPUT, inpPort) == InplaceDirectionType::CYCLIC) { - // inPlace memory cyclic dependency detected, need to resolve - // let's check the parent node first - auto pParent = pEdge->getParent(); - auto parentInPlaceDirection = inPlaceDirection(pParent, PortType::OUTPUT, pEdge->getInputNum()); - if (parentInPlaceDirection == InplaceDirectionType::UP) { + if (inPlaceInpPort < 0 || inPlaceDirection(node, PortType::INPUT, inpPort) != InplaceDirectionType::CYCLIC) { + continue; + } + // inPlace memory cyclic dependency detected, need to resolve + // let's check the parent node first + auto pParent = pEdge->getParent(); + auto parentInPlaceDirection = inPlaceDirection(pParent, PortType::OUTPUT, pEdge->getInputNum()); + if (parentInPlaceDirection == InplaceDirectionType::UP) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.inConfs[inpPort].inPlace(-1); + node->initDescriptor(config); + } else if (parentInPlaceDirection == InplaceDirectionType::DOWN) { + //search if siblings already have downstream direction + auto downstreamPeers = [&] { + for (auto& peerEdge : pParent->getChildEdgesAtPort(pEdge->getInputNum())) { + auto peerNode = peerEdge->getChild(); + if (peerNode == node) continue; + if (inPlaceDirection(peerNode, PortType::INPUT, peerEdge->getOutputNum()) == InplaceDirectionType::DOWN) { + return true; + } + } + return false; + }(); + if (downstreamPeers) { + // when there is an downstream peer we have to resolve upstream inplace for the node + // to avoid inplace conflict auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); config.inConfs[inpPort].inPlace(-1); node->initDescriptor(config); - } else if (parentInPlaceDirection == InplaceDirectionType::DOWN) { - //search if siblings already have downstream direction - auto downstreamPeers = [&] { - for (auto& peerEdge : pParent->getChildEdgesAtPort(pEdge->getInputNum())) { - auto peerNode = peerEdge->getChild(); - if (peerNode == node) continue; - if (inPlaceDirection(peerNode, PortType::INPUT, peerEdge->getOutputNum()) == InplaceDirectionType::DOWN) { - return true; - } - } - return false; - }(); - if (downstreamPeers) { - // when there is an downstream peer we have to resolve upstream inplace for the node - // to avoid inplace conflict - auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); - config.inConfs[inpPort].inPlace(-1); - node->initDescriptor(config); - } else { - auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); - config.outConfs[inPlaceInpPort].inPlace(-1); - node->initDescriptor(config); - } } else { - // the parent node does not use inPlace memory, let's check children - std::function searchNonCyclicDirection; - searchNonCyclicDirection = [&](const NodePtr& node, int portIdx) -> InplaceDirectionType { - auto& childEdges = node->getChildEdgesAtPort(portIdx); - for (auto& edge : childEdges) { - auto pChild = edge->getChild(); - auto result = inPlaceDirection(pChild, PortType::INPUT, edge->getOutputNum()); - if (InplaceDirectionType::UP == result || InplaceDirectionType::DOWN == result) { - return result; - } else if (InplaceDirectionType::CYCLIC == result) { - return searchNonCyclicDirection(pChild, pChild->inPlaceInputPort(edge->getOutputNum())); - } + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.outConfs[inPlaceInpPort].inPlace(-1); + node->initDescriptor(config); + } + } else { + // the parent node does not use inPlace memory, let's check children + std::function searchNonCyclicDirection; + searchNonCyclicDirection = [&](const NodePtr& node, int portIdx) -> InplaceDirectionType { + auto& childEdges = node->getChildEdgesAtPort(portIdx); + for (auto& edge : childEdges) { + auto pChild = edge->getChild(); + auto result = inPlaceDirection(pChild, PortType::INPUT, edge->getOutputNum()); + if (InplaceDirectionType::UP == result || InplaceDirectionType::DOWN == result) { + return result; + } else if (InplaceDirectionType::CYCLIC == result) { + return searchNonCyclicDirection(pChild, pChild->inPlaceInputPort(edge->getOutputNum())); } - return InplaceDirectionType::NONE; - }; - auto result = searchNonCyclicDirection(node, inPlaceInpPort); - if (one_of(result, InplaceDirectionType::UP, InplaceDirectionType::NONE)) { - auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); - config.inConfs[inpPort].inPlace(-1); - node->initDescriptor(config); - } else if (InplaceDirectionType::DOWN == result) { - auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); - config.outConfs[inPlaceInpPort].inPlace(-1); - node->initDescriptor(config); - } else { - IE_THROW() << "A node without an inPlace memory cyclic dependency has not been found"; } + return InplaceDirectionType::NONE; + }; + auto result = searchNonCyclicDirection(node, inPlaceInpPort); + if (one_of(result, InplaceDirectionType::UP, InplaceDirectionType::NONE)) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.inConfs[inpPort].inPlace(-1); + node->initDescriptor(config); + } else if (InplaceDirectionType::DOWN == result) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.outConfs[inPlaceInpPort].inPlace(-1); + node->initDescriptor(config); + } else { + IE_THROW() << "A node without an inPlace memory cyclic dependency has not been found"; } } } diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index a3d2725dcec1b8..f2b9cae7ecda47 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -229,6 +229,7 @@ class Graph { void ResolveInplaceDirections(); void InitOptimalPrimitiveDescriptors(); void InitEdges(); + bool ProcessDynNodes(); void Allocate(); void AllocateWithReuse(); void ExtractExecutableNodes(); diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index b4b9415aafd45c..cdd343c126277c 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -377,7 +377,7 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { auto inplaceOutIndx = selected_pd->getConfig().inConfs[i].inPlace(); - if (inplaceOutIndx < 0) //parentEdge->getStatus() != Edge::Status::NotAllocated || + if (inplaceOutIndx < 0) continue; auto parentEdge = getParentEdgeAt(i); @@ -398,7 +398,7 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { auto inplaceInpIndx = selected_pd->getConfig().outConfs[i].inPlace(); - if (inplaceInpIndx < 0) //childEdge->getStatus() != Edge::Status::NotAllocated || + if (inplaceInpIndx < 0) continue; auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); @@ -1473,13 +1473,12 @@ bool Node::isInputTensorAtPortEmpty(size_t port) const { if (inputShapes[port].hasZeroDims()) { return true; - } else { - auto edge = getParentEdgesAtPort(port)[0]; - if (one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::Validated)) { - auto&& mem = edge->getMemory(); - if (mem.isAllocated()) { - return mem.getShape().hasZeroDims(); - } + } + auto edge = getParentEdgesAtPort(port)[0]; + if (one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::Validated)) { + auto&& mem = edge->getMemory(); + if (mem.isAllocated()) { + return mem.getShape().hasZeroDims(); } } return false; @@ -1491,11 +1490,10 @@ bool Node::isOutputTensorAtPortEmpty(size_t port) const { } if (outputShapes[port].isStatic()) { return outputShapes[port].hasZeroDims(); - } else { - auto& mem = getChildEdgesAtPort(port)[0]->getMemory(); - if (mem.isAllocated()) { - return mem.getShape().hasZeroDims(); - } + } + auto&& mem = getChildEdgesAtPort(port)[0]->getMemory(); + if (mem.isAllocated()) { + return mem.getShape().hasZeroDims(); } return false; } diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index fd207a7409c2a7..633f40cea00fa5 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -626,47 +626,48 @@ void Concat::execRef() { } void Concat::resolveInPlaceEdges(Edge::LOOK look) { - if ((look & Edge::LOOK_DOWN) && isInPlace()) { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - auto& config = selected_pd->getConfig(); - size_t numberOfInputs = config.inConfs.size(); - size_t inplaceOutIndx = selected_pd->getConfig().inConfs[0].inPlace(); - auto baseDim = outputShapes.front().getDims()[axis]; - IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << " Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; - - auto& edges = getChildEdgesAtPort(inplaceOutIndx); - auto itr = std::find_if(edges.begin(), edges.end(), [](const EdgePtr& edge) { return edge->getStatus() == Edge::Status::Allocated; }); - IE_ASSERT(itr != edges.end()) << " Could not find allocated child edge for concat node: " << getName(); - - auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); - IE_ASSERT(baseMemMngr != nullptr) << " NULL base memory manager in concat node: " << getName(); - - ptrdiff_t offset = 0; - for (size_t i = 0; i < numberOfInputs; ++i) { - auto partDim = inputShapes[i].getDims()[axis]; - IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << " Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; - - auto parentEdge = getParentEdgeAt(i); - - IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); - - auto memDesc = selected_pd->getConfig().inConfs[i].getMemDesc(); - MemoryPtr newMem; - if (partDim != 0) { - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - newMem = std::make_shared(getEngine(), memDesc, memMngr); - } else { - // empty tensor, no need to reference a part, default memory is enough - newMem = std::make_shared(getEngine(), memDesc); - } + if (!(look & Edge::LOOK_DOWN) || !isInPlace()) { + Node::resolveInPlaceEdges(look); + return; + } + + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto& config = selected_pd->getConfig(); + size_t numberOfInputs = config.inConfs.size(); + size_t inplaceOutIndx = selected_pd->getConfig().inConfs[0].inPlace(); + auto baseDim = outputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << " Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; + + auto& edges = getChildEdgesAtPort(inplaceOutIndx); + auto itr = std::find_if(edges.begin(), edges.end(), [](const EdgePtr& edge) { return edge->getStatus() == Edge::Status::Allocated; }); + IE_ASSERT(itr != edges.end()) << " Could not find allocated child edge for concat node: " << getName(); - parentEdge->reuse(newMem); - offset += partDim; + auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); + IE_ASSERT(baseMemMngr != nullptr) << " NULL base memory manager in concat node: " << getName(); + + ptrdiff_t offset = 0; + for (size_t i = 0; i < numberOfInputs; ++i) { + auto partDim = inputShapes[i].getDims()[axis]; + IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << " Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; + + auto parentEdge = getParentEdgeAt(i); + + IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); + + auto memDesc = selected_pd->getConfig().inConfs[i].getMemDesc(); + MemoryPtr newMem; + if (partDim != 0) { + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memMngr); + } else { + // empty tensor, no need to reference a part, default memory is enough + newMem = std::make_shared(getEngine(), memDesc); } - } else { - Node::resolveInPlaceEdges(look); + + parentEdge->reuse(newMem); + offset += partDim; } } diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index c68371c8ea34a5..06314ca17c6f5e 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -588,31 +588,32 @@ bool Gather::isExecutable() const { } void Gather::resolveInPlaceEdges(Edge::LOOK look) { - if ((look & Edge::LOOK_UP) && isInPlace()) { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - constexpr size_t outputPort = 0; - - auto& config = selected_pd->getConfig(); - size_t inplaceInpIndx = selected_pd->getConfig().outConfs[outputPort].inPlace(); - auto baseDim = inputShapes.front().getDims()[axis]; - IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Gather node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; - auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); - auto index = constIndices.at(0); - ptrdiff_t offset = index < 0 ? baseDim + index : index; - const auto& childEdges = getChildEdgesAtPort(outputPort); - for (auto& childEdge : childEdges) { - IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected edge status in node: " << - getName() << " with type " << getTypeStr(); - - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); - auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memMngr); - - childEdge->reuse(newMem); - } - } else { + if (!(look & Edge::LOOK_UP) || !isInPlace()) { Node::resolveInPlaceEdges(look); + return; + } + + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + constexpr size_t outputPort = 0; + + auto& config = selected_pd->getConfig(); + size_t inplaceInpIndx = selected_pd->getConfig().outConfs[outputPort].inPlace(); + auto baseDim = inputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Gather node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); + auto index = constIndices.at(0); + ptrdiff_t offset = index < 0 ? baseDim + index : index; + const auto& childEdges = getChildEdgesAtPort(outputPort); + for (auto& childEdge : childEdges) { + IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected edge status in node: " << + getName() << " with type " << getTypeStr(); + + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); + auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memMngr); + + childEdge->reuse(newMem); } } diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index 8f8a02099edb26..710abbfffba80f 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -524,41 +524,41 @@ void Split::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vect } void Split::resolveInPlaceEdges(Edge::LOOK look) { - if ((look & Edge::LOOK_UP) && isInPlace()) { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - auto& config = selected_pd->getConfig(); - size_t numberOfOutputs = config.outConfs.size(); - size_t inplaceInpIndx = selected_pd->getConfig().outConfs[0].inPlace(); - auto baseDim = inputShapes.front().getDims()[axis]; - IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << " Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; - auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); - ptrdiff_t offset = 0; - for (size_t i = 0; i < numberOfOutputs; ++i) { - auto partDim = outputShapes[i].getDims()[axis]; - IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << " Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; - const auto& childEdges = getChildEdgesAtPort(i); - for (auto& childEdge : childEdges) { - IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected edge status in node: " << - getName() << " with type " << getTypeStr(); - - auto memDesc = selected_pd->getConfig().outConfs[i].getMemDesc(); - MemoryPtr newMem; - if (partDim != 0) { - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - newMem = std::make_shared(getEngine(), memDesc, memMngr); - } else { - // empty tensor, no need to reference a part, default memory is enough - newMem = std::make_shared(getEngine(), memDesc); - } - - childEdge->reuse(newMem); + if (!(look & Edge::LOOK_UP) || !isInPlace()) { + Node::resolveInPlaceEdges(look); + return; + } + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto& config = selected_pd->getConfig(); + size_t numberOfOutputs = config.outConfs.size(); + size_t inplaceInpIndx = selected_pd->getConfig().outConfs[0].inPlace(); + auto baseDim = inputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << " Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); + ptrdiff_t offset = 0; + for (size_t i = 0; i < numberOfOutputs; ++i) { + auto partDim = outputShapes[i].getDims()[axis]; + IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << " Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; + const auto& childEdges = getChildEdgesAtPort(i); + for (auto& childEdge : childEdges) { + IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected edge status in node: " << + getName() << " with type " << getTypeStr(); + + auto memDesc = selected_pd->getConfig().outConfs[i].getMemDesc(); + MemoryPtr newMem; + if (partDim != 0) { + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memMngr); + } else { + // empty tensor, no need to reference a part, default memory is enough + newMem = std::make_shared(getEngine(), memDesc); } - offset += partDim; + + childEdge->reuse(newMem); } - } else { - Node::resolveInPlaceEdges(look); + offset += partDim; } } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h index 94f5f9288d27f8..f2ca5e45a1746f 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -9,6 +9,11 @@ namespace ov { namespace intel_cpu { +/** + * This is a memory manager that represents a view on a partition inside a continuous memory block controlled by + * another memory manager. + * + */ class PartitionedMemoryMngr : public IMemoryMngrObserver { public: PartitionedMemoryMngr(MemoryMngrPtr pMngr, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) @@ -25,10 +30,10 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { private: MemoryMngrPtr m_pMngr; - size_t m_total_blocks = 1; // size of the parent memory in blocks - ptrdiff_t m_offset_blocks = 0; // offset from the base pointer in blocks - size_t m_size_blocks = 1; // size of the partition in blocks - size_t m_size = 0; // self size in bytes + size_t m_total_blocks = 1; // size of the parent memory in abstract blocks + ptrdiff_t m_offset_blocks = 0; // offset from the beginning of the external memory in abstract blocks + size_t m_size_blocks = 1; // size of the viewed partition in abstract blocks + size_t m_size = 0; // size of the viewed partition in bytes }; } // namespace intel_cpu