Skip to content

Commit

Permalink
[wasm-simd] Prototype f32x4 and f64x2 pmin and pmax
Browse files Browse the repository at this point in the history
This patch implements f32x4.pmin, f32x4.pmax, f64x2.pmin, and f64x2.pmax
for x64 and interpreter.

Pseudo-min and Pseudo-max instructions were proposed in
WebAssembly/simd#122. These instructions
exactly match std::min and std::max in C++ STL, and thus have different
semantics from the existing min and max.

The instruction-selector for x64 switches the operands around, because
it allows for defining the dst to be same as first (really the second
input node), allowing better codegen.

For example, b = f32x4.pmin(a, b) directly maps to vminps(b, b, a) or
minps(b, a), as long as we can define dst == b, and switching the
instruction operands around allows us to do that.

Bug: v8:10501
Change-Id: I06f983fc1764caf673e600ac91d9c0ac5166e17e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2186630
Commit-Queue: Zhi An Ng <[email protected]>
Reviewed-by: Tobias Tebbi <[email protected]>
Reviewed-by: Deepti Gandluri <[email protected]>
Cr-Commit-Position: refs/heads/master@{#67688}
  • Loading branch information
ngzhian authored and Commit Bot committed May 8, 2020
1 parent 6adf7e8 commit 9a68fa1
Show file tree
Hide file tree
Showing 13 changed files with 146 additions and 3 deletions.
16 changes: 16 additions & 0 deletions src/compiler/backend/instruction-selector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1884,6 +1884,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Qfma(node);
case IrOpcode::kF64x2Qfms:
return MarkAsSimd128(node), VisitF64x2Qfms(node);
case IrOpcode::kF64x2Pmin:
return MarkAsSimd128(node), VisitF64x2Pmin(node);
case IrOpcode::kF64x2Pmax:
return MarkAsSimd128(node), VisitF64x2Pmax(node);
case IrOpcode::kF32x4Splat:
return MarkAsSimd128(node), VisitF32x4Splat(node);
case IrOpcode::kF32x4ExtractLane:
Expand Down Expand Up @@ -1930,6 +1934,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Qfma(node);
case IrOpcode::kF32x4Qfms:
return MarkAsSimd128(node), VisitF32x4Qfms(node);
case IrOpcode::kF32x4Pmin:
return MarkAsSimd128(node), VisitF32x4Pmin(node);
case IrOpcode::kF32x4Pmax:
return MarkAsSimd128(node), VisitF32x4Pmax(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI64x2SplatI32Pair:
Expand Down Expand Up @@ -2653,6 +2661,14 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
// && !V8_TARGET_ARCH_X64

// TODO(v8:10501) Prototyping pmin and pmax instructions.
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64

void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

void InstructionSelector::VisitParameter(Node* node) {
Expand Down
24 changes: 24 additions & 0 deletions src/compiler/backend/x64/code-generator-x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2607,6 +2607,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64F32x4Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Minps(dst, i.InputSimd128Register(1));
break;
}
case kX64F32x4Pmax: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Maxps(dst, i.InputSimd128Register(1));
break;
}
case kX64F64x2Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Minpd(dst, i.InputSimd128Register(1));
break;
}
case kX64F64x2Pmax: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Maxpd(dst, i.InputSimd128Register(1));
break;
}
case kX64I64x2Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
Expand Down
4 changes: 4 additions & 0 deletions src/compiler/backend/x64/instruction-codes-x64.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ namespace compiler {
V(X64F64x2Le) \
V(X64F64x2Qfma) \
V(X64F64x2Qfms) \
V(X64F64x2Pmin) \
V(X64F64x2Pmax) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
Expand All @@ -195,6 +197,8 @@ namespace compiler {
V(X64F32x4Le) \
V(X64F32x4Qfma) \
V(X64F32x4Qfms) \
V(X64F32x4Pmin) \
V(X64F32x4Pmax) \
V(X64I64x2Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
Expand Down
4 changes: 4 additions & 0 deletions src/compiler/backend/x64/instruction-scheduler-x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Le:
case kX64F64x2Qfma:
case kX64F64x2Qfms:
case kX64F64x2Pmin:
case kX64F64x2Pmax:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
Expand All @@ -167,6 +169,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Le:
case kX64F32x4Qfma:
case kX64F32x4Qfms:
case kX64F32x4Pmin:
case kX64F32x4Pmax:
case kX64I64x2Splat:
case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
Expand Down
28 changes: 28 additions & 0 deletions src/compiler/backend/x64/instruction-selector-x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3380,6 +3380,34 @@ void InstructionSelector::VisitS8x16Swizzle(Node* node) {
arraysize(temps), temps);
}

namespace {
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
// Due to the way minps/minpd work, we want the dst to be same as the second
// input: b = pmin(a, b) directly maps to minps b a.
X64OperandGenerator g(selector);
selector->Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)));
}
} // namespace

void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitPminOrPmax(this, node, kX64F32x4Pmin);
}

void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitPminOrPmax(this, node, kX64F32x4Pmax);
}

void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitPminOrPmax(this, node, kX64F64x2Pmin);
}

void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, node, kX64F64x2Pmax);
}

// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
Expand Down
4 changes: 4 additions & 0 deletions src/compiler/machine-operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F64x2Le, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Qfma, Operator::kNoProperties, 3, 0, 1) \
V(F64x2Qfms, Operator::kNoProperties, 3, 0, 1) \
V(F64x2Pmin, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Pmax, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(F32x4SConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
V(F32x4UConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
Expand All @@ -358,6 +360,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F32x4Le, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Qfma, Operator::kNoProperties, 3, 0, 1) \
V(F32x4Qfms, Operator::kNoProperties, 3, 0, 1) \
V(F32x4Pmin, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Pmax, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(I64x2SplatI32Pair, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Neg, Operator::kNoProperties, 1, 0, 1) \
Expand Down
4 changes: 4 additions & 0 deletions src/compiler/machine-operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F64x2Le();
const Operator* F64x2Qfma();
const Operator* F64x2Qfms();
const Operator* F64x2Pmin();
const Operator* F64x2Pmax();

const Operator* F32x4Splat();
const Operator* F32x4ExtractLane(int32_t);
Expand All @@ -598,6 +600,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4Le();
const Operator* F32x4Qfma();
const Operator* F32x4Qfms();
const Operator* F32x4Pmin();
const Operator* F32x4Pmax();

const Operator* I64x2Splat();
const Operator* I64x2SplatI32Pair();
Expand Down
4 changes: 4 additions & 0 deletions src/compiler/opcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,8 @@
V(F64x2Le) \
V(F64x2Qfma) \
V(F64x2Qfms) \
V(F64x2Pmin) \
V(F64x2Pmax) \
V(F32x4Splat) \
V(F32x4ExtractLane) \
V(F32x4ReplaceLane) \
Expand All @@ -788,6 +790,8 @@
V(F32x4Ge) \
V(F32x4Qfma) \
V(F32x4Qfms) \
V(F32x4Pmin) \
V(F32x4Pmax) \
V(I64x2Splat) \
V(I64x2SplatI32Pair) \
V(I64x2ExtractLane) \
Expand Down
12 changes: 12 additions & 0 deletions src/compiler/wasm-compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4128,6 +4128,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF64x2Qfms:
return graph()->NewNode(mcgraph()->machine()->F64x2Qfms(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprF64x2Pmin:
return graph()->NewNode(mcgraph()->machine()->F64x2Pmin(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Pmax:
return graph()->NewNode(mcgraph()->machine()->F64x2Pmax(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Splat:
return graph()->NewNode(mcgraph()->machine()->F32x4Splat(), inputs[0]);
case wasm::kExprF32x4SConvertI32x4:
Expand Down Expand Up @@ -4193,6 +4199,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF32x4Qfms:
return graph()->NewNode(mcgraph()->machine()->F32x4Qfms(), inputs[0],
inputs[1], inputs[2]);
case wasm::kExprF32x4Pmin:
return graph()->NewNode(mcgraph()->machine()->F32x4Pmin(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Pmax:
return graph()->NewNode(mcgraph()->machine()->F32x4Pmax(), inputs[0],
inputs[1]);
case wasm::kExprI64x2Splat:
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
case wasm::kExprI64x2Neg:
Expand Down
4 changes: 4 additions & 0 deletions src/wasm/wasm-interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2299,12 +2299,16 @@ class ThreadImpl {
BINOP_CASE(F64x2Div, f64x2, float2, 2, base::Divide(a, b))
BINOP_CASE(F64x2Min, f64x2, float2, 2, JSMin(a, b))
BINOP_CASE(F64x2Max, f64x2, float2, 2, JSMax(a, b))
BINOP_CASE(F64x2Pmin, f64x2, float2, 2, std::min(a, b))
BINOP_CASE(F64x2Pmax, f64x2, float2, 2, std::max(a, b))
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
BINOP_CASE(F32x4Div, f32x4, float4, 4, a / b)
BINOP_CASE(F32x4Min, f32x4, float4, 4, JSMin(a, b))
BINOP_CASE(F32x4Max, f32x4, float4, 4, JSMax(a, b))
BINOP_CASE(F32x4Pmin, f32x4, float4, 4, std::min(a, b))
BINOP_CASE(F32x4Pmax, f32x4, float4, 4, std::max(a, b))
BINOP_CASE(I64x2Add, i64x2, int2, 2, base::AddWithWraparound(a, b))
BINOP_CASE(I64x2Sub, i64x2, int2, 2, base::SubWithWraparound(a, b))
BINOP_CASE(I64x2Mul, i64x2, int2, 2, base::MulWithWraparound(a, b))
Expand Down
5 changes: 5 additions & 0 deletions src/wasm/wasm-opcodes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,11 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I16x8_OP(BitMask, "bitmask")
CASE_I32x4_OP(BitMask, "bitmask")

CASE_F32x4_OP(Pmin, "pmin")
CASE_F32x4_OP(Pmax, "pmax")
CASE_F64x2_OP(Pmin, "pmin")
CASE_F64x2_OP(Pmax, "pmax")

// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")
Expand Down
6 changes: 5 additions & 1 deletion src/wasm/wasm-opcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,11 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I32x4AddHoriz, 0xfdb0, s_ss) \
V(F32x4AddHoriz, 0xfdb2, s_ss) \
V(F32x4RecipApprox, 0xfdb3, s_s) \
V(F32x4RecipSqrtApprox, 0xfdba, s_s)
V(F32x4RecipSqrtApprox, 0xfdba, s_s) \
V(F32x4Pmin, 0xfdda, s_ss) \
V(F32x4Pmax, 0xfddb, s_ss) \
V(F64x2Pmin, 0xfddc, s_ss) \
V(F64x2Pmax, 0xfddd, s_ss)

#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
V(I8x16ExtractLaneS, 0xfd15, _) \
Expand Down
34 changes: 32 additions & 2 deletions test/cctest/wasm/test-run-wasm-simd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,18 @@ T Div(T a, T b) {

template <typename T>
T Minimum(T a, T b) {
return a <= b ? a : b;
// Follow one of the possible implementation given in
// https://en.cppreference.com/w/cpp/algorithm/min so that it works the same
// way for floats (when given NaNs/Infs).
return (b < a) ? b : a;
}

template <typename T>
T Maximum(T a, T b) {
return a >= b ? a : b;
// Follow one of the possible implementation given in
// https://en.cppreference.com/w/cpp/algorithm/max so that it works the same
// way for floats (when given NaNs/Infs).
return (a < b) ? b : a;
}

template <typename T>
Expand Down Expand Up @@ -750,6 +756,18 @@ WASM_SIMD_TEST(F32x4Max) {
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax);
}

#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
}

WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64

void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatCompareOp expected_op) {
WasmRunner<int32_t, float, float> r(execution_tier, lower_simd);
Expand Down Expand Up @@ -1340,6 +1358,18 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
}

#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
}

WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
}
#endif // V8_TARGET_ARCH_X64

void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleCompareOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
Expand Down

0 comments on commit 9a68fa1

Please sign in to comment.