Skip to content

Commit

Permalink
Merge branch 'at/extend-llm-infer-request-to-support-vlm' of https://…
Browse files Browse the repository at this point in the history
…github.com/TolyaTalamanov/openvino into at/extend-llm-infer-request-to-support-vlm
  • Loading branch information
TolyaTalamanov committed Feb 21, 2025
2 parents 4ed45be + 2c421c5 commit 6b7c517
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
22 changes: 11 additions & 11 deletions src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class TransposeValueTensors : public ov::pass::MatcherPass {
auto matched_matmul = std::static_pointer_cast<ov::op::v0::MatMul>(node_matmul);

auto param_shape = matched_param->get_partial_shape();
OPENVINO_ASSERT(param_shape.size() == 4u);
NPUW_ASSERT(param_shape.size() == 4u);
// NB: Transpose Parameter that correspond to V-tensor it will
// speed-up its multiplication with attention scores
std::swap(param_shape[2], param_shape[3]);
Expand Down Expand Up @@ -150,7 +150,7 @@ class TransposeValueTensors_llama3 : public TransposeValueTensors {
auto matched_reshape = std::static_pointer_cast<ov::op::v1::Reshape>(matched_node_reshape);

auto shape_broadcast = matched_broadcast->get_output_shape(0);
OPENVINO_ASSERT(shape_broadcast.size() == 5u);
NPUW_ASSERT(shape_broadcast.size() == 5u);
std::swap(shape_broadcast[3], shape_broadcast[4]);

LOG_DEBUG("shape_broadcast for: " << matched_broadcast->get_friendly_name()
Expand All @@ -162,7 +162,7 @@ class TransposeValueTensors_llama3 : public TransposeValueTensors {
matched_broadcast->input(1).replace_source_output(broadcast_axes_node);

auto shape_reshape = matched_reshape->get_output_shape(0);
OPENVINO_ASSERT(shape_reshape.size() == 4u);
NPUW_ASSERT(shape_reshape.size() == 4u);
std::swap(shape_reshape[2], shape_reshape[3]);

LOG_DEBUG("shape_reshape for: " << matched_reshape->get_friendly_name() << ", shape=" << shape_reshape);
Expand Down Expand Up @@ -373,8 +373,8 @@ void reshape_to_static(std::shared_ptr<ov::Model> model,
new_shape = ov::PartialShape({1, input_size});
} else if (input_name.find("inputs_embeds") != std::string::npos) {
// NB: VLMs case, model accepts inputs_embeds[BATCH, SEQ_LEN, EMB_SIZE]
OPENVINO_ASSERT(input.get_partial_shape().size() == 3u);
OPENVINO_ASSERT(input.get_partial_shape()[2].is_static());
NPUW_ASSERT(input.get_partial_shape().size() == 3u);
NPUW_ASSERT(input.get_partial_shape()[2].is_static());
new_shape = ov::PartialShape({1, input_size, input.get_partial_shape()[2]});
} else if (input_name.find("attention_mask") != std::string::npos) {
new_shape = ov::PartialShape({1, kvcache_size});
Expand Down Expand Up @@ -633,14 +633,14 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m

m_kvcache_compiled = std::dynamic_pointer_cast<ov::npuw::CompiledModel>(
ov::npuw::ICompiledModel::create(kvcache_model, plugin, generate_config));
OPENVINO_ASSERT(m_kvcache_compiled,
"Can't create ov::npuw::CompiledModel for passed kvcache "
"model and its config, please check passed config.");
NPUW_ASSERT(m_kvcache_compiled &&
"Can't create ov::npuw::CompiledModel for passed kvcache "
"model and its config, please check passed config.");
m_prefill_compiled = std::dynamic_pointer_cast<ov::npuw::CompiledModel>(
ov::npuw::ICompiledModel::create(prefill_model, plugin, prefill_config));
OPENVINO_ASSERT(m_prefill_compiled,
"Can't create ov::npuw::CompiledModel for passed prefill "
"model and its config, please check passed config.");
NPUW_ASSERT(m_prefill_compiled &&
"Can't create ov::npuw::CompiledModel for passed prefill "
"model and its config, please check passed config.");

implement_properties();
LOG_DEBUG("Done");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void copy_columns_by_row_chunks(ov::SoPtr<ov::ITensor> src, ov::SoPtr<ov::ITenso
std::optional<ov::Output<const ov::Node>> find_port_by_name(const std::vector<ov::Output<const ov::Node>>& ports,
const std::string& name) {
auto it = std::find_if(ports.begin(), ports.end(), [&](const auto& port) {
return port.get_any_name() == name;
return port.get_names().count(name) != 0;
});
if (it == ports.end()) {
return std::nullopt;
Expand Down

0 comments on commit 6b7c517

Please sign in to comment.