diff --git a/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.cpp b/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.cpp index c29448e513c594..54c2c71d466741 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.cpp @@ -45,6 +45,16 @@ void jit_generator::postamble() { ret(); } +void jit_generator::lqw(const Reg& rd, size_t value) { + const uint32_t uppper_32bits = static_cast(value >> 32); + uint32_t lower_32bits = static_cast(value & 0xFFFFFFFF); + if (uppper_32bits != 0) { + li(rd, static_cast(uppper_32bits)); + slli(rd, rd, 32); + } + li(rd, static_cast(lower_32bits)); +} + Xbyak_riscv::LMUL jit_generator::float2lmul(const float lmul) const { if (lmul == 0.125f) return LMUL::mf8; if (lmul == 0.25f) return LMUL::mf4; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.hpp b/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.hpp index 8795149ccd04eb..4e6fae877c2b6f 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_generator.hpp @@ -96,6 +96,9 @@ class jit_generator : public Xbyak_riscv::CodeGenerator { static constexpr Xbyak_riscv::Reg abi_param_regs[] = {Xbyak_riscv::a0, Xbyak_riscv::a1, Xbyak_riscv::a2, Xbyak_riscv::a3, Xbyak_riscv::a4, Xbyak_riscv::a5, Xbyak_riscv::a6, Xbyak_riscv::a7}; + // load size_t value to GPR safely + void lqw(const Xbyak_riscv::Reg& rd, size_t value); + protected: virtual void generate() = 0; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_uni_eltwise_generic.cpp index c471e84dfaa370..15e38d3dd953ed 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/riscv64/jit_uni_eltwise_generic.cpp @@ -60,8 +60,7 @@ void jit_uni_eltwise_generic::generate() { auto init_ptrs_with_offsets = [&](Reg reg, const std::vector& offsets) { for (int j = 0; j < offset_count; j++) { if (jep_.dims[j] != 1 && offsets[j] != 0) { - // what's about 64bit? - li(reg_tmp_0, static_cast(offsets[j])); + lqw(reg_tmp_0, static_cast(offsets[j])); ld(reg_tmp_1, reg_indexes, static_cast(j * sizeof(size_t))); mul(reg_tmp_0, reg_tmp_0, reg_tmp_1); add(reg, reg, reg_tmp_0); @@ -77,7 +76,7 @@ void jit_uni_eltwise_generic::generate() { ld(dst_gpr(), reg_const_params, GET_OFF(dst_ptr)); init_ptrs_with_offsets(dst_gpr(), jep.dst_offsets); - li(reg_work_amount, static_cast(jep.work_amount)); + lqw(reg_work_amount, static_cast(jep.work_amount)); } // TODO: Support any LMUL values @@ -155,7 +154,7 @@ void jit_uni_eltwise_generic::generate() { } } - li(reg_loop_step, min_src_size); + lqw(reg_loop_step, min_src_size); L(inner_loop_begin); { // to get correct `reg_vlen` in loop - in tail loop `rg_vlen` might be updated @@ -179,12 +178,12 @@ void jit_uni_eltwise_generic::generate() { const auto reg_tmp = reg_loop_step; for (size_t i = 0; i < jep.inputs_number; i++) { if (jep.src_size[i] == jep.dst_size) { - li(reg_tmp, jep.src_prc[i].size() * min_src_size); + lqw(reg_tmp, jep.src_prc[i].size() * min_src_size); add(src_gpr(i), src_gpr(i), reg_tmp); } } - li(reg_loop_step, min_src_size); + lqw(reg_loop_step, min_src_size); sub(reg_work_amount, reg_work_amount, reg_loop_step); j_(loop_begin); }