diff --git a/src/compiler/ia32/code-generator-ia32.cc b/src/compiler/ia32/code-generator-ia32.cc index 113786c4663..45cf138348f 100644 --- a/src/compiler/ia32/code-generator-ia32.cc +++ b/src/compiler/ia32/code-generator-ia32.cc @@ -283,13 +283,29 @@ class OutOfLineTruncateDoubleToI FINAL : public OutOfLineCode { } while (false) -#define ASSEMBLE_FLOAT32x4_BINOP(asm_instr) \ - do { \ - if (instr->InputAt(1)->IsFloat32x4Register()) { \ - __ asm_instr(i.InputFloat32x4Register(0), i.InputFloat32x4Register(1));\ - } else { \ - __ asm_instr(i.InputFloat32x4Register(0), i.InputOperand(1)); \ - } \ +#define ASSEMBLE_SIMD_BINOP(asm_instr, type) \ + do { \ + if (instr->InputAt(1)->Is##type##Register()) { \ + __ asm_instr(i.Input##type##Register(0), i.Input##type##Register(1)); \ + } else { \ + __ asm_instr(i.Input##type##Register(0), i.InputOperand(1)); \ + } \ + } while (0) + + +#define ASSEMBLE_SIMD_CMP_BINOP(op1, op2, type) \ + do { \ + auto result = i.OutputInt32x4Register(); \ + auto left = i.Input##type##Register(0); \ + auto right = i.Input##type##Register(1); \ + if (result.is(left)) { \ + __ op1(result, right); \ + } else if (result.is(right)) { \ + __ op2(result, left); \ + } else { \ + __ movaps(result, left); \ + __ op1(result, right); \ + } \ } while (0) @@ -302,6 +318,107 @@ static uint8_t ComputeShuffleSelect(uint32_t x, uint32_t y, uint32_t z, } +static void Emit32x4Shuffle(MacroAssembler* masm, XMMRegister lhs, + XMMRegister rhs, int32_t x, int32_t y, int32_t z, + int32_t w) { + XMMRegister temp = xmm0; + uint32_t num_lanes_from_lhs = (x < 4) + (y < 4) + (z < 4) + (w < 4); + if (num_lanes_from_lhs == 4) { + uint8_t select = ComputeShuffleSelect(x, y, z, w); + masm->shufps(lhs, lhs, select); + return; + } else if (num_lanes_from_lhs == 0) { + x -= 4; + y -= 4; + z -= 4; + w -= 4; + uint8_t select = ComputeShuffleSelect(x, y, z, w); + masm->movaps(lhs, rhs); + masm->shufps(lhs, lhs, select); + return; + } else if (num_lanes_from_lhs == 3) { + uint8_t first_select = 0xFF; + uint8_t second_select = 0xFF; + if (x < 4 && y < 4) { + if (w >= 4) { + w -= 4; + first_select = ComputeShuffleSelect(w, w, z, z); + second_select = ComputeShuffleSelect(x, y, 2, 0); + } else { + DCHECK(z >= 4); + z -= 4; + first_select = ComputeShuffleSelect(z, z, w, w); + second_select = ComputeShuffleSelect(x, y, 0, 2); + } + masm->movaps(temp, rhs); + masm->shufps(temp, lhs, first_select); + masm->shufps(lhs, temp, second_select); + return; + } + + DCHECK(z < 4 && w < 4); + if (y >= 4) { + y -= 4; + first_select = ComputeShuffleSelect(y, y, x, x); + second_select = ComputeShuffleSelect(2, 0, z, w); + } else { + DCHECK(x >= 4); + x -= 4; + first_select = ComputeShuffleSelect(x, x, y, y); + second_select = ComputeShuffleSelect(0, 2, z, w); + } + masm->movaps(temp, rhs); + masm->shufps(temp, lhs, first_select); + masm->shufps(temp, lhs, second_select); + masm->movaps(lhs, temp); + return; + } else if (num_lanes_from_lhs == 2) { + if (x < 4 && y < 4) { + uint8_t select = ComputeShuffleSelect(x, y, z % 4, w % 4); + masm->shufps(lhs, rhs, select); + return; + } else if (z < 4 && w < 4) { + uint8_t select = ComputeShuffleSelect(x % 4, y % 4, z, w); + masm->movaps(temp, rhs); + masm->shufps(temp, lhs, select); + masm->movaps(lhs, temp); + return; + } + + // In two shufps, for the most generic case: + uint8_t first_select[4], second_select[4]; + uint32_t i = 0, j = 2, k = 0; + +#define COMPUTE_SELECT(lane) \ + if (lane >= 4) { \ + first_select[j] = lane % 4; \ + second_select[k++] = j++; \ + } else { \ + first_select[i] = lane; \ + second_select[k++] = i++; \ + } + + COMPUTE_SELECT(x) + COMPUTE_SELECT(y) + COMPUTE_SELECT(z) + COMPUTE_SELECT(w) +#undef COMPUTE_SELECT + + DCHECK(i == 2 && j == 4 && k == 4); + + int8_t select; + select = ComputeShuffleSelect(first_select[0], first_select[1], + first_select[2], first_select[3]); + masm->shufps(lhs, rhs, select); + select = ComputeShuffleSelect(second_select[0], second_select[1], + second_select[2], second_select[3]); + masm->shufps(lhs, lhs, select); + } + + return; +} + + // Assembles an instruction after register allocation, producing machine code. void CodeGenerator::AssembleArchInstruction(Instruction* instr) { IA32OperandConverter i(this, instr); @@ -719,22 +836,22 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { ASSEMBLE_CHECKED_STORE_FLOAT(movsd); break; case kFloat32x4Add: - ASSEMBLE_FLOAT32x4_BINOP(addps); + ASSEMBLE_SIMD_BINOP(addps, Float32x4); break; case kFloat32x4Sub: - ASSEMBLE_FLOAT32x4_BINOP(subps); + ASSEMBLE_SIMD_BINOP(subps, Float32x4); break; case kFloat32x4Mul: - ASSEMBLE_FLOAT32x4_BINOP(mulps); + ASSEMBLE_SIMD_BINOP(mulps, Float32x4); break; case kFloat32x4Div: - ASSEMBLE_FLOAT32x4_BINOP(divps); + ASSEMBLE_SIMD_BINOP(divps, Float32x4); break; case kFloat32x4Min: - ASSEMBLE_FLOAT32x4_BINOP(minps); + ASSEMBLE_SIMD_BINOP(minps, Float32x4); break; case kFloat32x4Max: - ASSEMBLE_FLOAT32x4_BINOP(maxps); + ASSEMBLE_SIMD_BINOP(maxps, Float32x4); break; case kFloat32x4Constructor: __ sub(esp, Immediate(kFloat32x4Size)); @@ -827,8 +944,290 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { __ shufps(value_reg, value_reg, s); break; } - case kLoadFloat32x4: { - auto result = i.OutputFloat32x4Register(); + case kFloat32x4Equal: + ASSEMBLE_SIMD_CMP_BINOP(cmpeqps, cmpeqps, Float32x4); + break; + case kFloat32x4NotEqual: + ASSEMBLE_SIMD_CMP_BINOP(cmpneqps, cmpneqps, Float32x4); + break; + case kFloat32x4GreaterThan: + ASSEMBLE_SIMD_CMP_BINOP(cmpnleps, cmpltps, Float32x4); + break; + case kFloat32x4GreaterThanOrEqual: + ASSEMBLE_SIMD_CMP_BINOP(cmpnltps, cmpleps, Float32x4); + break; + case kFloat32x4LessThan: + ASSEMBLE_SIMD_CMP_BINOP(cmpltps, cmpnleps, Float32x4); + break; + case kFloat32x4LessThanOrEqual: + ASSEMBLE_SIMD_CMP_BINOP(cmpleps, cmpnltps, Float32x4); + break; + case kFloat32x4Select: + case kInt32x4Select: { + auto mask = i.InputSIMD128Register(0); + auto left = i.InputSIMD128Register(1); + auto right = i.InputSIMD128Register(2); + auto result = i.OutputSIMD128Register(); + __ movaps(xmm0, mask); + __ notps(xmm0); + __ andps(xmm0, right); + if (!result.is(mask)) { + if (result.is(left)) { + __ andps(result, mask); + __ orps(result, xmm0); + } else { + __ movaps(result, mask); + __ andps(result, left); + __ orps(result, xmm0); + } + } else { + __ andps(result, left); + __ orps(result, xmm0); + } + break; + } + case kFloat32x4Shuffle: + case kInt32x4Shuffle: { + DCHECK(i.OutputSIMD128Register().is(i.InputSIMD128Register(0))); + auto lhs = i.InputSIMD128Register(0); + auto rhs = i.InputSIMD128Register(1); + auto x = i.InputInt32(2); + auto y = i.InputInt32(3); + auto z = i.InputInt32(4); + auto w = i.InputInt32(5); + Emit32x4Shuffle(masm(), lhs, rhs, x, y, z, w); + break; + } + // For Int32x4 operation. + case kInt32x4And: + ASSEMBLE_SIMD_BINOP(andps, Int32x4); + break; + case kInt32x4Or: + ASSEMBLE_SIMD_BINOP(orps, Int32x4); + break; + case kInt32x4Xor: + ASSEMBLE_SIMD_BINOP(xorps, Int32x4); + break; + case kInt32x4Sub: + ASSEMBLE_SIMD_BINOP(psubd, Int32x4); + break; + case kInt32x4Add: + ASSEMBLE_SIMD_BINOP(paddd, Int32x4); + break; + case kInt32x4Mul: { + DCHECK(i.InputInt32x4Register(0).is(i.OutputInt32x4Register())); + XMMRegister left_reg = i.InputInt32x4Register(0); + XMMRegister right_reg = i.InputInt32x4Register(1); + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ pmulld(left_reg, right_reg); + } else { + // The algorithm is from + // http://stackoverflow.com/questions/10500766/sse-multiplication-of-4-32-bit-integers + XMMRegister xmm_scratch = xmm0; + __ movaps(xmm_scratch, left_reg); + __ pmuludq(left_reg, right_reg); + __ psrldq(xmm_scratch, 4); + __ psrldq(right_reg, 4); + __ pmuludq(xmm_scratch, right_reg); + __ pshufd(left_reg, left_reg, 8); + __ pshufd(xmm_scratch, xmm_scratch, 8); + __ punpackldq(left_reg, xmm_scratch); + } + break; + } + case kInt32x4Constructor: + __ sub(esp, Immediate(kInt32x4Size)); + __ mov(Operand(esp, 0 * kIntSize), i.InputRegister(0)); + __ mov(Operand(esp, 1 * kIntSize), i.InputRegister(1)); + __ mov(Operand(esp, 2 * kIntSize), i.InputRegister(2)); + __ mov(Operand(esp, 3 * kIntSize), i.InputRegister(3)); + __ movups(i.OutputInt32x4Register(), Operand(esp, 0 * kIntSize)); + __ add(esp, Immediate(kInt32x4Size)); + break; + case kInt32x4GetW: + select++; + case kInt32x4GetZ: + select++; + case kInt32x4GetY: + select++; + case kInt32x4GetX: { + Register dst = i.OutputRegister(); + XMMRegister input = i.InputInt32x4Register(0); + if (select == 0x0) { + __ movd(dst, input); + } else { + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ extractps(dst, input, select); + } else { + XMMRegister xmm_scratch = xmm0; + __ pshufd(xmm_scratch, input, select); + __ movd(dst, xmm_scratch); + } + } + break; + } + case kInt32x4Bool: { + __ sub(esp, Immediate(kInt32x4Size)); + __ mov(eax, i.InputRegister(0)); + __ neg(eax); + __ mov(Operand(esp, 0 * kIntSize), eax); + __ mov(eax, i.InputRegister(1)); + __ neg(eax); + __ mov(Operand(esp, 1 * kIntSize), eax); + __ mov(eax, i.InputRegister(2)); + __ neg(eax); + __ mov(Operand(esp, 2 * kIntSize), eax); + __ mov(eax, i.InputRegister(3)); + __ neg(eax); + __ mov(Operand(esp, 3 * kIntSize), eax); + __ movups(i.OutputInt32x4Register(), Operand(esp, 0 * kIntSize)); + __ add(esp, Immediate(kInt32x4Size)); + break; + } + case kInt32x4GetSignMask: { + XMMRegister input = i.InputInt32x4Register(0); + Register dst = i.OutputRegister(); + __ movmskps(dst, input); + break; + } + case kInt32x4GetFlagW: + select++; + case kInt32x4GetFlagZ: + select++; + case kInt32x4GetFlagY: + select++; + case kInt32x4GetFlagX: { + Label false_value, done; + Register dst = i.OutputRegister(); + XMMRegister input = i.InputInt32x4Register(0); + if (select == 0x0) { + __ movd(dst, input); + } else { + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ extractps(dst, input, select); + } else { + XMMRegister xmm_scratch = xmm0; + __ pshufd(xmm_scratch, input, select); + __ movd(dst, xmm_scratch); + } + } + + __ test(dst, dst); + __ j(zero, &false_value, Label::kNear); + __ LoadRoot(dst, Heap::kTrueValueRootIndex); + __ jmp(&done, Label::kNear); + __ bind(&false_value); + __ LoadRoot(dst, Heap::kFalseValueRootIndex); + __ bind(&done); + break; + } + case kInt32x4Not: { + XMMRegister input = i.InputInt32x4Register(0); + __ notps(input); + break; + } + case kInt32x4Neg: { + XMMRegister input = i.InputInt32x4Register(0); + __ pnegd(input); + break; + } + case kInt32x4Splat: { + Register input_reg = i.InputRegister(0); + XMMRegister result_reg = i.OutputInt32x4Register(); + __ movd(result_reg, input_reg); + __ shufps(result_reg, result_reg, 0x0); + return; + } + case kInt32x4Swizzle: { + uint8_t s = ComputeShuffleSelect(i.InputInt32(1), i.InputInt32(2), + i.InputInt32(3), i.InputInt32(4)); + XMMRegister value_reg = i.InputInt32x4Register(0); + __ pshufd(value_reg, value_reg, s); + break; + } + case kInt32x4ShiftLeft: { + if (HasImmediateInput(instr, 1)) { + uint8_t shift = static_cast(i.InputInt32(1) && 0xFF); + __ pslld(i.InputInt32x4Register(0), shift); + } else { + DCHECK(instr->InputAt(1)->IsRegister()); + __ movd(xmm0, i.InputRegister(1)); + __ pslld(i.InputInt32x4Register(0), xmm0); + } + break; + } + case kInt32x4ShiftRight: { + if (HasImmediateInput(instr, 1)) { + uint8_t shift = static_cast(i.InputInt32(1) && 0xFF); + __ psrld(i.InputInt32x4Register(0), shift); + } else { + DCHECK(instr->InputAt(1)->IsRegister()); + __ movd(xmm0, i.InputRegister(1)); + __ psrld(i.InputInt32x4Register(0), xmm0); + } + break; + } + case kInt32x4ShiftRightArithmetic: { + if (HasImmediateInput(instr, 1)) { + uint8_t shift = static_cast(i.InputInt32(1) && 0xFF); + __ psrad(i.InputInt32x4Register(0), shift); + } else { + DCHECK(instr->InputAt(1)->IsRegister()); + __ movd(xmm0, i.InputRegister(1)); + __ psrad(i.InputInt32x4Register(0), xmm0); + } + break; + } + case kFloat32x4BitsToInt32x4: + case kInt32x4BitsToFloat32x4: + if (!i.OutputSIMD128Register().is(i.InputSIMD128Register(0))) { + __ movaps(i.OutputSIMD128Register(), i.InputSIMD128Register(0)); + } + break; + case kInt32x4ToFloat32x4: + __ cvtdq2ps(i.OutputFloat32x4Register(), i.InputInt32x4Register(0)); + break; + case kFloat32x4ToInt32x4: + __ cvtps2dq(i.OutputInt32x4Register(), i.InputFloat32x4Register(0)); + break; + case kInt32x4Equal: + __ pcmpeqd(i.InputFloat32x4Register(0), i.InputFloat32x4Register(1)); + break; + case kInt32x4GreaterThan: + __ pcmpgtd(i.InputFloat32x4Register(0), i.InputFloat32x4Register(1)); + break; + case kInt32x4LessThan: + __ movaps(xmm0, i.InputFloat32x4Register(1)); + __ pcmpgtd(xmm0, i.InputFloat32x4Register(0)); + __ movaps(i.InputFloat32x4Register(0), xmm0); + break; + case kInt32x4WithW: + select++; + case kInt32x4WithZ: + select++; + case kInt32x4WithY: + select++; + case kInt32x4WithX: { + XMMRegister left = i.InputInt32x4Register(0); + Register right = i.InputRegister(1); + if (CpuFeatures::IsSupported(SSE4_1)) { + CpuFeatureScope scope(masm(), SSE4_1); + __ pinsrd(left, right, select); + } else { + __ sub(esp, Immediate(kInt32x4Size)); + __ movdqu(Operand(esp, 0), left); + __ mov(Operand(esp, select * kInt32Size), right); + __ movdqu(left, Operand(esp, 0)); + __ add(esp, Immediate(kInt32x4Size)); + } + break; + } + // Int32x4 Operation end. + case kLoadSIMD128: { + auto result = i.OutputSIMD128Register(); auto base = i.InputRegister(0); auto disp = i.InputInt32(1); auto loaded_bytes = i.InputInt32(2); @@ -845,8 +1244,8 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { } break; } - case kCheckedLoadFloat32x4: { - auto result = i.OutputFloat32x4Register(); + case kCheckedLoadSIMD128: { + auto result = i.OutputSIMD128Register(); auto offset = i.InputRegister(0); auto base = i.InputRegister(2); auto disp = i.InputInt32(3); @@ -872,11 +1271,11 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { __ bind(ool->exit()); break; } - case kStoreFloat32x4: { + case kStoreSIMD128: { DCHECK(!instr->HasOutput()); auto base = i.InputRegister(0); auto disp = i.InputInt32(1); - auto val = i.InputFloat32x4Register(2); + auto val = i.InputSIMD128Register(2); auto stored_bytes = i.InputInt32(3); if (stored_bytes == 16) { __ movups(Operand(base, disp), val); @@ -891,10 +1290,10 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { } break; } - case kCheckedStoreFloat32x4: { + case kCheckedStoreSIMD128: { DCHECK(!instr->HasOutput()); auto offset = i.InputRegister(0); - auto val = i.InputFloat32x4Register(2); + auto val = i.InputSIMD128Register(2); auto base = i.InputRegister(3); auto disp = i.InputInt32(4); auto stored_bytes = i.InputInt32(5); @@ -919,6 +1318,76 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) { __ bind(&done); break; } + case kFloat64x2Add: + ASSEMBLE_SIMD_BINOP(addpd, Float64x2); + break; + case kFloat64x2Sub: + ASSEMBLE_SIMD_BINOP(subpd, Float64x2); + break; + case kFloat64x2Mul: + ASSEMBLE_SIMD_BINOP(mulpd, Float64x2); + break; + case kFloat64x2Div: + ASSEMBLE_SIMD_BINOP(divpd, Float64x2); + break; + case kFloat64x2Max: + ASSEMBLE_SIMD_BINOP(maxpd, Float64x2); + break; + case kFloat64x2Min: + ASSEMBLE_SIMD_BINOP(minpd, Float64x2); + break; + case kFloat64x2Constructor: + __ sub(esp, Immediate(kFloat64x2Size)); + __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); + __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); + __ movups(i.OutputFloat64x2Register(), Operand(esp, 0)); + __ add(esp, Immediate(kFloat64x2Size)); + break; + case kFloat64x2GetY: + select++; + case kFloat64x2GetX: { + XMMRegister dst = i.OutputDoubleRegister(); + XMMRegister input = i.InputFloat64x2Register(0); + if (!dst.is(input)) __ movaps(dst, input); + if (select != 0) __ shufpd(dst, input, select); + break; + } + case kFloat64x2GetSignMask: + __ movmskpd(i.OutputRegister(), i.InputFloat64x2Register(0)); + break; + case kFloat64x2Abs: + __ abspd(i.InputFloat64x2Register(0)); + break; + case kFloat64x2Neg: + __ negatepd(i.InputFloat64x2Register(0)); + break; + case kFloat64x2Sqrt: + __ sqrtpd(i.OutputFloat64x2Register(), i.InputFloat64x2Register(0)); + break; + case kFloat64x2Scale: { + XMMRegister scale = i.InputDoubleRegister(1); + __ shufpd(scale, scale, 0x0); + __ mulpd(i.InputFloat64x2Register(0), scale); + break; + } + case kFloat64x2WithY: + select++; + case kFloat64x2WithX: { + __ sub(esp, Immediate(kFloat64x2Size)); + __ movups(Operand(esp, 0), i.InputFloat64x2Register(0)); + __ movsd(Operand(esp, select * kDoubleSize), i.InputDoubleRegister(1)); + __ movups(i.InputFloat64x2Register(0), Operand(esp, 0)); + __ add(esp, Immediate(kFloat64x2Size)); + break; + } + case kFloat64x2Clamp: { + XMMRegister value_reg = i.InputFloat64x2Register(0); + XMMRegister lower_reg = i.InputFloat64x2Register(1); + XMMRegister upper_reg = i.InputFloat64x2Register(2); + __ minpd(value_reg, upper_reg); + __ maxpd(value_reg, lower_reg); + break; + } } } diff --git a/src/compiler/ia32/instruction-selector-ia32.cc b/src/compiler/ia32/instruction-selector-ia32.cc index 392ca007202..770ac6fa4e6 100644 --- a/src/compiler/ia32/instruction-selector-ia32.cc +++ b/src/compiler/ia32/instruction-selector-ia32.cc @@ -131,6 +131,7 @@ void InstructionSelector::VisitLoad(Node* node) { MachineType typ = TypeOf(OpParameter(node)); ArchOpcode opcode; + Node* loaded_bytes = NULL; // TODO(titzer): signed/unsigned small loads switch (rep) { case kRepFloat32: @@ -140,7 +141,10 @@ void InstructionSelector::VisitLoad(Node* node) { opcode = kIA32Movsd; break; case kRepFloat32x4: - opcode = kLoadFloat32x4; + case kRepInt32x4: + case kRepFloat64x2: + opcode = kLoadSIMD128; + loaded_bytes = node->InputAt(2); break; case kRepBit: // Fall through. case kRepWord8: @@ -157,21 +161,18 @@ void InstructionSelector::VisitLoad(Node* node) { UNREACHABLE(); return; } - IA32OperandGenerator g(this); InstructionOperand* outputs[1]; outputs[0] = g.DefineAsRegister(node); InstructionOperand* inputs[4]; - Node* loaded_bytes = NULL; - if (opcode == kLoadFloat32x4) { - loaded_bytes = node->InputAt(2); - } + + DCHECK(loaded_bytes == NULL || g.CanBeImmediate(loaded_bytes)); + size_t input_count = 0; AddressingMode mode = g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count); InstructionCode code = opcode | AddressingModeField::encode(mode); if (loaded_bytes != NULL) { - DCHECK(g.CanBeImmediate(loaded_bytes)); inputs[input_count++] = g.UseImmediate(loaded_bytes); } Emit(code, 1, outputs, input_count, inputs); @@ -200,6 +201,7 @@ void InstructionSelector::VisitStore(Node* node) { DCHECK_EQ(kNoWriteBarrier, store_rep.write_barrier_kind()); ArchOpcode opcode; + Node* stored_bytes = NULL; switch (rep) { case kRepFloat32: opcode = kIA32Movss; @@ -208,7 +210,10 @@ void InstructionSelector::VisitStore(Node* node) { opcode = kIA32Movsd; break; case kRepFloat32x4: - opcode = kStoreFloat32x4; + case kRepInt32x4: + case kRepFloat64x2: + opcode = kStoreSIMD128; + stored_bytes = node->InputAt(3); break; case kRepBit: // Fall through. case kRepWord8: @@ -225,6 +230,7 @@ void InstructionSelector::VisitStore(Node* node) { UNREACHABLE(); return; } + DCHECK(stored_bytes == NULL || g.CanBeImmediate(stored_bytes)); InstructionOperand* val; if (g.CanBeImmediate(value)) { @@ -236,10 +242,6 @@ void InstructionSelector::VisitStore(Node* node) { } InstructionOperand* inputs[5]; - Node* stored_bytes = NULL; - if (opcode == kStoreFloat32x4) { - stored_bytes = node->InputAt(3); - } size_t input_count = 0; AddressingMode mode = g.GetEffectiveAddressMemoryOperand(node, inputs, &input_count); @@ -261,6 +263,7 @@ void InstructionSelector::VisitCheckedLoad(Node* node) { Node* const offset = node->InputAt(1); Node* const length = node->InputAt(2); ArchOpcode opcode; + Node* loaded_bytes = NULL; switch (rep) { case kRepWord8: opcode = typ == kTypeInt32 ? kCheckedLoadInt8 : kCheckedLoadUint8; @@ -278,18 +281,16 @@ void InstructionSelector::VisitCheckedLoad(Node* node) { opcode = kCheckedLoadFloat64; break; case kRepFloat32x4: - opcode = kCheckedLoadFloat32x4; + case kRepInt32x4: + case kRepFloat64x2: + opcode = kCheckedLoadSIMD128; + loaded_bytes = node->InputAt(3); break; default: UNREACHABLE(); return; } - // To support simd.js partial load. - Node* loaded_bytes = NULL; - if (opcode == kCheckedLoadFloat32x4) { - loaded_bytes = node->InputAt(3); - DCHECK(g.CanBeImmediate(loaded_bytes)); - } + DCHECK(loaded_bytes == NULL || g.CanBeImmediate(loaded_bytes)); InstructionOperand* offset_operand = g.UseRegister(offset); InstructionOperand* length_operand = g.CanBeImmediate(length) ? g.UseImmediate(length) : g.UseRegister(length); @@ -327,6 +328,7 @@ void InstructionSelector::VisitCheckedStore(Node* node) { Node* const length = node->InputAt(2); Node* const value = node->InputAt(3); ArchOpcode opcode; + Node* stored_bytes = NULL; switch (rep) { case kRepWord8: opcode = kCheckedStoreWord8; @@ -344,17 +346,17 @@ void InstructionSelector::VisitCheckedStore(Node* node) { opcode = kCheckedStoreFloat64; break; case kRepFloat32x4: - opcode = kCheckedStoreFloat32x4; + case kRepInt32x4: + case kRepFloat64x2: + opcode = kCheckedStoreSIMD128; + stored_bytes = node->InputAt(4); break; default: UNREACHABLE(); return; } - Node* stored_bytes = NULL; - if (opcode == kCheckedStoreFloat32x4) { - stored_bytes = node->InputAt(4); - DCHECK(g.CanBeImmediate(stored_bytes)); - } + DCHECK(stored_bytes == NULL || g.CanBeImmediate(stored_bytes)); + InstructionOperand* value_operand = g.CanBeImmediate(value) ? g.UseImmediate(value) @@ -1060,25 +1062,21 @@ void InstructionSelector::VisitFloat64LessThanOrEqual(Node* node) { } -#define BINARY_SIMD_OPERATION_LIST(V) \ - V(Float32x4Add) \ - V(Float32x4Sub) \ - V(Float32x4Mul) \ - V(Float32x4Div) \ - V(Float32x4Min) \ - V(Float32x4Max) +#define BINARY_SIMD_OPERATION_LIST1(V) \ + V(Float32x4Add) \ + V(Float32x4Sub) \ + V(Float32x4Mul) \ + V(Float32x4Div) -#define DECLARE_VISIT_BINARY_SIMD_OPERATION(type) \ - void InstructionSelector::Visit##type(Node* node) { \ - IA32OperandGenerator g(this); \ - InstructionOperand* output = IsSupported(AVX) ? g.DefineAsRegister(node) \ - : g.DefineSameAsFirst(node); \ - Emit(k##type, output, g.UseRegister(node->InputAt(0)), \ - g.Use(node->InputAt(1))); \ +#define DECLARE_VISIT_BINARY_SIMD_OPERATION1(type) \ + void InstructionSelector::Visit##type(Node* node) { \ + IA32OperandGenerator g(this); \ + Emit(k##type, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), \ + g.UseRegister(node->InputAt(1))); \ } -BINARY_SIMD_OPERATION_LIST(DECLARE_VISIT_BINARY_SIMD_OPERATION) +BINARY_SIMD_OPERATION_LIST1(DECLARE_VISIT_BINARY_SIMD_OPERATION1) void InstructionSelector::VisitFloat32x4Constructor(Node* node) { @@ -1089,13 +1087,63 @@ void InstructionSelector::VisitFloat32x4Constructor(Node* node) { } +// TODO(chunyang): current code generation for int32x4 requires register for +// both input parameters. We can optimize it later. +void InstructionSelector::VisitInt32x4Mul(Node* node) { + IA32OperandGenerator g(this); + Emit(kInt32x4Mul, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1))); +} + + +void InstructionSelector::VisitInt32x4Constructor(Node* node) { + IA32OperandGenerator g(this); + Emit(kInt32x4Constructor, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(2)), g.UseRegister(node->InputAt(3))); +} + + +void InstructionSelector::VisitInt32x4Bool(Node* node) { + IA32OperandGenerator g(this); + InstructionOperand* temps[] = {g.TempRegister(eax)}; + Emit(kInt32x4Bool, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)), + g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)), + g.UseRegister(node->InputAt(3)), 1, temps); +} + + +void InstructionSelector::VisitFloat64x2Constructor(Node* node) { + IA32OperandGenerator g(this); + Emit(kFloat64x2Constructor, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); +} + + #define UNARY_SIMD_OPERATION_LIST1(V) \ V(Float32x4GetX) \ V(Float32x4GetY) \ V(Float32x4GetZ) \ V(Float32x4GetW) \ V(Float32x4GetSignMask) \ - V(Float32x4Splat) + V(Float32x4Splat) \ + V(Int32x4GetX) \ + V(Int32x4GetY) \ + V(Int32x4GetZ) \ + V(Int32x4GetW) \ + V(Int32x4GetFlagX) \ + V(Int32x4GetFlagY) \ + V(Int32x4GetFlagZ) \ + V(Int32x4GetFlagW) \ + V(Int32x4GetSignMask) \ + V(Int32x4Splat) \ + V(Int32x4BitsToFloat32x4) \ + V(Int32x4ToFloat32x4) \ + V(Float32x4BitsToInt32x4) \ + V(Float32x4ToInt32x4) \ + V(Float64x2GetX) \ + V(Float64x2GetY) \ + V(Float64x2GetSignMask) #define DECLARE_VISIT_UARY_SIMD_OPERATION1(opcode) \ void InstructionSelector::Visit##opcode(Node* node) { \ @@ -1113,8 +1161,14 @@ UNARY_SIMD_OPERATION_LIST1(DECLARE_VISIT_UARY_SIMD_OPERATION1) V(Float32x4Neg) \ V(Float32x4Reciprocal) \ V(Float32x4ReciprocalSqrt) \ - V(Float32x4Sqrt) - + V(Float32x4Sqrt) \ + V(Int32x4Neg) \ + V(Int32x4Not) \ + V(Float64x2Abs) \ + V(Float64x2Neg) \ + V(Float64x2Sqrt) + +// TODO(weiliang): free Sqrt SameAsFirst constraint. #define DECLARE_VISIT_UARY_SIMD_OPERATION2(opcode) \ void InstructionSelector::Visit##opcode(Node* node) { \ IA32OperandGenerator g(this); \ @@ -1126,58 +1180,155 @@ UNARY_SIMD_OPERATION_LIST1(DECLARE_VISIT_UARY_SIMD_OPERATION1) UNARY_SIMD_OPERATION_LIST2(DECLARE_VISIT_UARY_SIMD_OPERATION2) -void InstructionSelector::VisitFloat32x4Scale(Node* node) { +#define BINARY_SIMD_OPERATION_LIST2(V) \ + V(Float32x4Min) \ + V(Float32x4Max) \ + V(Int32x4Add) \ + V(Int32x4And) \ + V(Int32x4Sub) \ + V(Int32x4Or) \ + V(Int32x4Xor) \ + V(Int32x4Equal) \ + V(Int32x4GreaterThan) \ + V(Int32x4LessThan) \ + V(Float64x2Add) \ + V(Float64x2Sub) \ + V(Float64x2Mul) \ + V(Float64x2Div) \ + V(Float64x2Min) \ + V(Float64x2Max) \ + V(Float32x4Scale) \ + V(Float32x4WithX) \ + V(Float32x4WithY) \ + V(Float32x4WithZ) \ + V(Float32x4WithW) \ + V(Int32x4WithX) \ + V(Int32x4WithY) \ + V(Int32x4WithZ) \ + V(Int32x4WithW) \ + V(Float64x2Scale) \ + V(Float64x2WithX) \ + V(Float64x2WithY) + +#define DECLARE_VISIT_BINARY_SIMD_OPERATION2(type) \ + void InstructionSelector::Visit##type(Node* node) { \ + IA32OperandGenerator g(this); \ + Emit(k##type, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), \ + g.UseRegister(node->InputAt(1))); \ + } + + +BINARY_SIMD_OPERATION_LIST2(DECLARE_VISIT_BINARY_SIMD_OPERATION2) + + +#define BINARY_SIMD_OPERATION_LIST3(V) \ + V(Float32x4Equal) \ + V(Float32x4NotEqual) \ + V(Float32x4GreaterThan) \ + V(Float32x4GreaterThanOrEqual) \ + V(Float32x4LessThan) \ + V(Float32x4LessThanOrEqual) + +#define DECLARE_VISIT_BINARY_SIMD_OPERATION3(type) \ + void InstructionSelector::Visit##type(Node* node) { \ + IA32OperandGenerator g(this); \ + Emit(k##type, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), \ + g.UseRegister(node->InputAt(1))); \ + } + + +BINARY_SIMD_OPERATION_LIST3(DECLARE_VISIT_BINARY_SIMD_OPERATION3) + + +void InstructionSelector::VisitFloat32x4Clamp(Node* node) { IA32OperandGenerator g(this); - Emit(kFloat32x4Scale, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + Emit(kFloat32x4Clamp, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(2))); } -void InstructionSelector::VisitFloat32x4WithX(Node* node) { +void InstructionSelector::VisitFloat64x2Clamp(Node* node) { IA32OperandGenerator g(this); - Emit(kFloat32x4WithX, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + Emit(kFloat64x2Clamp, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(2))); } -void InstructionSelector::VisitFloat32x4WithY(Node* node) { +void InstructionSelector::VisitFloat32x4Select(Node* node) { IA32OperandGenerator g(this); - Emit(kFloat32x4WithY, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + Emit(kFloat32x4Select, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(2))); } -void InstructionSelector::VisitFloat32x4WithZ(Node* node) { +void InstructionSelector::VisitInt32x4Select(Node* node) { IA32OperandGenerator g(this); - Emit(kFloat32x4WithZ, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + Emit(kInt32x4Select, g.DefineAsRegister(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + g.UseRegister(node->InputAt(2))); } -void InstructionSelector::VisitFloat32x4WithW(Node* node) { +void InstructionSelector::VisitFloat32x4Swizzle(Node* node) { IA32OperandGenerator g(this); - Emit(kFloat32x4WithW, g.DefineSameAsFirst(node), - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); + Emit(kFloat32x4Swizzle, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)), + g.UseImmediate(node->InputAt(2)), g.UseImmediate(node->InputAt(3)), + g.UseImmediate(node->InputAt(4))); } -void InstructionSelector::VisitFloat32x4Clamp(Node* node) { +void InstructionSelector::VisitFloat32x4Shuffle(Node* node) { IA32OperandGenerator g(this); - Emit(kFloat32x4Clamp, g.DefineSameAsFirst(node), + Emit(kFloat32x4Shuffle, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), - g.UseRegister(node->InputAt(2))); + g.UseImmediate(node->InputAt(2)), g.UseImmediate(node->InputAt(3)), + g.UseImmediate(node->InputAt(4)), g.UseImmediate(node->InputAt(5))); } -void InstructionSelector::VisitFloat32x4Swizzle(Node* node) { +void InstructionSelector::VisitInt32x4Shuffle(Node* node) { IA32OperandGenerator g(this); - Emit(kFloat32x4Swizzle, g.DefineSameAsFirst(node), + Emit(kInt32x4Shuffle, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), + g.UseImmediate(node->InputAt(2)), g.UseImmediate(node->InputAt(3)), + g.UseImmediate(node->InputAt(4)), g.UseImmediate(node->InputAt(5))); +} + + +void InstructionSelector::VisitInt32x4Swizzle(Node* node) { + IA32OperandGenerator g(this); + Emit(kInt32x4Swizzle, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)), g.UseImmediate(node->InputAt(2)), g.UseImmediate(node->InputAt(3)), g.UseImmediate(node->InputAt(4))); } +void InstructionSelector::VisitInt32x4ShiftLeft(Node* node) { + IA32OperandGenerator g(this); + Emit(kInt32x4ShiftLeft, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1))); +} + + +void InstructionSelector::VisitInt32x4ShiftRight(Node* node) { + IA32OperandGenerator g(this); + Emit(kInt32x4ShiftRight, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1))); +} + + +void InstructionSelector::VisitInt32x4ShiftRightArithmetic(Node* node) { + IA32OperandGenerator g(this); + Emit(kInt32x4ShiftRightArithmetic, g.DefineSameAsFirst(node), + g.UseRegister(node->InputAt(0)), g.Use(node->InputAt(1))); +} + + // static MachineOperatorBuilder::Flags InstructionSelector::SupportedMachineOperatorFlags() { diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index 86c009e9348..76d105209f3 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -3226,7 +3226,7 @@ void SubStringStub::Generate(MacroAssembler* masm) { void ToFloat32x4Stub::Generate(MacroAssembler* masm) { - // The ToFloat32x4Obj stub takes one argument in rax. + // The ToFloat32x4Obj stub takes one argument in eax. Label non_float32x4; __ JumpIfSmi(eax, &non_float32x4, Label::kNear); __ CmpObjectType(eax, FLOAT32x4_TYPE, edi); @@ -3239,6 +3239,34 @@ void ToFloat32x4Stub::Generate(MacroAssembler* masm) { } +void ToInt32x4Stub::Generate(MacroAssembler* masm) { + // The ToInt32x4Obj stub takes one argument in eax. + Label non_int32x4; + __ JumpIfSmi(eax, &non_int32x4, Label::kNear); + __ CmpObjectType(eax, INT32x4_TYPE, edi); + __ j(not_equal, &non_int32x4, Label::kNear); + __ Ret(); + + __ bind(&non_int32x4); + __ TailCallRuntime(Runtime::kAllocateInt32x4, 0, 1); + __ Ret(); +} + + +void ToFloat64x2Stub::Generate(MacroAssembler* masm) { + // The ToFloat64x2Obj stub takes one argument in eax. + Label non_float64x2; + __ JumpIfSmi(eax, &non_float64x2, Label::kNear); + __ CmpObjectType(eax, FLOAT32x4_TYPE, edi); + __ j(not_equal, &non_float64x2, Label::kNear); + __ Ret(); + + __ bind(&non_float64x2); + __ TailCallRuntime(Runtime::kAllocateFloat64x2, 0, 1); + __ Ret(); +} + + void ToNumberStub::Generate(MacroAssembler* masm) { // The ToNumber stub takes one argument in eax. Label not_smi; diff --git a/src/ia32/interface-descriptors-ia32.cc b/src/ia32/interface-descriptors-ia32.cc index 98c4795cb8c..1789b95597a 100644 --- a/src/ia32/interface-descriptors-ia32.cc +++ b/src/ia32/interface-descriptors-ia32.cc @@ -81,6 +81,18 @@ void ToFloat32x4Descriptor::Initialize(CallInterfaceDescriptorData* data) { } +void ToInt32x4Descriptor::Initialize(CallInterfaceDescriptorData* data) { + Register registers[] = {esi, eax}; + data->Initialize(arraysize(registers), registers, NULL); +} + + +void ToFloat64x2Descriptor::Initialize(CallInterfaceDescriptorData* data) { + Register registers[] = {esi, eax}; + data->Initialize(arraysize(registers), registers, NULL); +} + + void NumberToStringDescriptor::Initialize(CallInterfaceDescriptorData* data) { Register registers[] = {esi, eax}; data->Initialize(arraysize(registers), registers, NULL); @@ -179,6 +191,23 @@ void AllocateFloat32x4Descriptor::Initialize( } +void AllocateInt32x4Descriptor::Initialize(CallInterfaceDescriptorData* data) { + // register state + // esi -- context + Register registers[] = {esi}; + data->Initialize(arraysize(registers), registers, nullptr); +} + + +void AllocateFloat64x2Descriptor::Initialize( + CallInterfaceDescriptorData* data) { + // register state + // esi -- context + Register registers[] = {esi}; + data->Initialize(arraysize(registers), registers, nullptr); +} + + void ArrayConstructorConstantArgCountDescriptor::Initialize( CallInterfaceDescriptorData* data) { // register state