diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 79b1384e8d24e3..b7f6d1ded8c9b1 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1129,6 +1129,7 @@ void CodeGen::genCodeForMul(GenTreeOp* treeNode) } #ifdef FEATURE_SIMD + //------------------------------------------------------------------------ // genSIMDSplitReturn: Generates code for returning a fixed-size SIMD type that lives // in a single register, but is returned in multiple registers. @@ -1148,34 +1149,62 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) regNumber reg0 = retTypeDesc->GetABIReturnReg(0); regNumber reg1 = retTypeDesc->GetABIReturnReg(1); + assert((reg0 != REG_NA) && (reg1 != REG_NA) && (opReg != REG_NA)); + + const bool srcIsFloatReg = genIsValidFloatReg(opReg); + const bool dstIsFloatReg = genIsValidFloatReg(reg0); + assert(srcIsFloatReg); + +#ifdef TARGET_AMD64 + assert(src->TypeIs(TYP_SIMD16)); + assert(srcIsFloatReg == dstIsFloatReg); if (opReg != reg0 && opReg != reg1) { // Operand reg is different from return regs. // Copy opReg to reg0 and let it to be handled by one of the // two cases below. - inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE); + inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); opReg = reg0; } if (opReg == reg0) { assert(opReg != reg1); - - // reg0 - already has required 8-byte in bit position [63:0]. // reg1 = opReg. - // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. - inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE); + inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg1, opReg, TYP_SIMD16); } else { assert(opReg == reg1); // reg0 = opReg. - // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. - inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE); + + inst_RV_RV(ins_Copy(opReg, TYP_SIMD16), reg0, opReg, TYP_SIMD16); } + // reg0 - already has required 8-byte in bit position [63:0]. + // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0]. inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01); + +#else // TARGET_X86 + assert(src->TypeIs(TYP_SIMD8)); + assert(srcIsFloatReg != dstIsFloatReg); + assert((reg0 == REG_EAX) && (reg1 == REG_EDX)); + // reg0 = opReg[31:0] + inst_RV_RV(ins_Copy(opReg, TYP_INT), reg0, opReg, TYP_INT); + // reg1 = opRef[61:32] + if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1); + } + else + { + int8_t shuffleMask = 1; // we only need [61:32]->[31:0], the rest is not read. + inst_RV_TT_IV(INS_pshufd, EA_8BYTE, opReg, src, shuffleMask); + inst_RV_RV(ins_Copy(opReg, TYP_INT), reg1, opReg, TYP_INT); + } +#endif // TARGET_X86 } + #endif // FEATURE_SIMD #if defined(TARGET_X86) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index b6ca4dd7030a3e..88e13c4839e216 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -8821,7 +8821,7 @@ void emitter::emitDispIns( } else if (ins == INS_mov_xmm2i) { - printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE)); + printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); } else if (ins == INS_pmovmskb) {