Skip to content

Commit

Permalink
[interpreter] Implement SIMD extended multiply instructions
Browse files Browse the repository at this point in the history
These were accepted into the proposal in WebAssembly#376.

There are 12 instructions in total:

- i16x8.extmul_{low,high}_i8x16_{s,u}
- i32x4.extmul_{low,high}_i16x8_{s,u}
- i64x2.extmul_{low,high}_i32x4_{s,u}

The implementation is straightforward, widen (using existing
operations), then a multiply with the wider shape.

Added a test generation script that reuses some logic in the generator
for arithmetic instructions. Since these instructions have different
src and dst shapes, I tweaked the base class to allow for having
different shapes.
  • Loading branch information
ngzhian committed Feb 2, 2021
1 parent 0cd0a20 commit 270d6c2
Show file tree
Hide file tree
Showing 15 changed files with 1,435 additions and 18 deletions.
12 changes: 12 additions & 0 deletions interpreter/binary/decode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,11 @@ let simd_prefix s =
| 0x97l -> i16x8_min_u
| 0x98l -> i16x8_max_s
| 0x99l -> i16x8_max_u
| 0x9al -> i16x8_extmul_low_i8x16_s
| 0x9bl -> i16x8_avgr_u
| 0x9dl -> i16x8_extmul_high_i8x16_s
| 0x9el -> i16x8_extmul_low_i8x16_u
| 0x9fl -> i16x8_extmul_high_i8x16_u
| 0xa0l -> i32x4_abs
| 0xa1l -> i32x4_neg
| 0xa3l -> i32x4_all_true
Expand All @@ -385,12 +389,20 @@ let simd_prefix s =
| 0xb8l -> i32x4_max_s
| 0xb9l -> i32x4_max_u
| 0xbal -> i32x4_dot_i16x8_s
| 0xbbl -> i32x4_extmul_low_i16x8_s
| 0xbdl -> i32x4_extmul_high_i16x8_s
| 0xbel -> i32x4_extmul_low_i16x8_u
| 0xbfl -> i32x4_extmul_high_i16x8_u
| 0xc1l -> i64x2_neg
| 0xcbl -> i64x2_shl
| 0xccl -> i64x2_shr_s
| 0xcdl -> i64x2_shr_u
| 0xcel -> i64x2_add
| 0xd1l -> i64x2_sub
| 0xd2l -> i64x2_extmul_low_i32x4_s
| 0xd3l -> i64x2_extmul_high_i32x4_s
| 0xd6l -> i64x2_extmul_low_i32x4_u
| 0xd7l -> i64x2_extmul_high_i32x4_u
| 0xd5l -> i64x2_mul
| 0xd8l -> f32x4_ceil
| 0xd9l -> f32x4_floor
Expand Down
12 changes: 12 additions & 0 deletions interpreter/binary/encode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,10 @@ let encode m =
| Binary (V128 V128Op.(I16x8 MaxS)) -> simd_op 0x98l
| Binary (V128 V128Op.(I16x8 MaxU)) -> simd_op 0x99l
| Binary (V128 V128Op.(I16x8 AvgrU)) -> simd_op 0x9bl
| Binary (V128 V128Op.(I16x8 ExtMulLowS)) -> simd_op 0x9al
| Binary (V128 V128Op.(I16x8 ExtMulHighS)) -> simd_op 0x9dl
| Binary (V128 V128Op.(I16x8 ExtMulLowU)) -> simd_op 0x9el
| Binary (V128 V128Op.(I16x8 ExtMulHighU)) -> simd_op 0x9fl
| Binary (V128 V128Op.(I32x4 Add)) -> simd_op 0xael
| Binary (V128 V128Op.(I32x4 Sub)) -> simd_op 0xb1l
| Binary (V128 V128Op.(I32x4 MinS)) -> simd_op 0xb6l
Expand All @@ -485,9 +489,17 @@ let encode m =
| Binary (V128 V128Op.(I32x4 LeU)) -> simd_op 0x3el
| Binary (V128 V128Op.(I32x4 GeS)) -> simd_op 0x3fl
| Binary (V128 V128Op.(I32x4 GeU)) -> simd_op 0x40l
| Binary (V128 V128Op.(I32x4 ExtMulLowS)) -> simd_op 0xbbl
| Binary (V128 V128Op.(I32x4 ExtMulHighS)) -> simd_op 0xbdl
| Binary (V128 V128Op.(I32x4 ExtMulLowU)) -> simd_op 0xbel
| Binary (V128 V128Op.(I32x4 ExtMulHighU)) -> simd_op 0xbfl
| Binary (V128 V128Op.(I64x2 Add)) -> simd_op 0xcel
| Binary (V128 V128Op.(I64x2 Sub)) -> simd_op 0xd1l
| Binary (V128 V128Op.(I64x2 Mul)) -> simd_op 0xd5l
| Binary (V128 V128Op.(I64x2 ExtMulLowS)) -> simd_op 0xd2l
| Binary (V128 V128Op.(I64x2 ExtMulHighS)) -> simd_op 0xd3l
| Binary (V128 V128Op.(I64x2 ExtMulLowU)) -> simd_op 0xd6l
| Binary (V128 V128Op.(I64x2 ExtMulHighU)) -> simd_op 0xd7l
| Binary (V128 V128Op.(F32x4 Eq)) -> simd_op 0x41l
| Binary (V128 V128Op.(F32x4 Ne)) -> simd_op 0x42l
| Binary (V128 V128Op.(F32x4 Lt)) -> simd_op 0x43l
Expand Down
12 changes: 12 additions & 0 deletions interpreter/exec/eval_simd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ module SimdOp (SXX : Simd.S) (Value : ValueType with type t = SXX.t) = struct
| I16x8 MaxS -> SXX.I16x8.max_s
| I16x8 MaxU -> SXX.I16x8.max_u
| I16x8 AvgrU -> SXX.I16x8.avgr_u
| I16x8 ExtMulLowS -> SXX.I16x8_convert.extmul_low_s
| I16x8 ExtMulHighS -> SXX.I16x8_convert.extmul_high_s
| I16x8 ExtMulLowU -> SXX.I16x8_convert.extmul_low_u
| I16x8 ExtMulHighU -> SXX.I16x8_convert.extmul_high_u
| I32x4 Add -> SXX.I32x4.add
| I32x4 Sub -> SXX.I32x4.sub
| I32x4 MinS -> SXX.I32x4.min_s
Expand All @@ -119,9 +123,17 @@ module SimdOp (SXX : Simd.S) (Value : ValueType with type t = SXX.t) = struct
| I32x4 GeS -> SXX.I32x4.ge_s
| I32x4 GeU -> SXX.I32x4.ge_u
| I32x4 DotI16x8S -> SXX.I32x4_convert.dot_i16x8_s
| I32x4 ExtMulLowS -> SXX.I32x4_convert.extmul_low_s
| I32x4 ExtMulHighS -> SXX.I32x4_convert.extmul_high_s
| I32x4 ExtMulLowU -> SXX.I32x4_convert.extmul_low_u
| I32x4 ExtMulHighU -> SXX.I32x4_convert.extmul_high_u
| I64x2 Add -> SXX.I64x2.add
| I64x2 Sub -> SXX.I64x2.sub
| I64x2 Mul -> SXX.I64x2.mul
| I64x2 ExtMulLowS -> SXX.I64x2_convert.extmul_low_s
| I64x2 ExtMulHighS -> SXX.I64x2_convert.extmul_high_s
| I64x2 ExtMulLowU -> SXX.I64x2_convert.extmul_low_u
| I64x2 ExtMulHighU -> SXX.I64x2_convert.extmul_high_u
| F32x4 Eq -> SXX.F32x4.eq
| F32x4 Ne -> SXX.F32x4.ne
| F32x4 Lt -> SXX.F32x4.lt
Expand Down
38 changes: 34 additions & 4 deletions interpreter/exec/simd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ sig
val widen_high_s : t -> t
val widen_low_u : t -> t
val widen_high_u : t -> t
val extmul_low_s : t -> t -> t
val extmul_high_s : t -> t -> t
val extmul_low_u : t -> t -> t
val extmul_high_u : t -> t -> t
end
module I32x4_convert : sig
val trunc_sat_f32x4_s : t -> t
Expand All @@ -186,10 +190,20 @@ sig
val widen_low_u : t -> t
val widen_high_u : t -> t
val dot_i16x8_s : t -> t -> t
val extmul_low_s : t -> t -> t
val extmul_high_s : t -> t -> t
val extmul_low_u : t -> t -> t
val extmul_high_u : t -> t -> t
end
module I64x2_convert : sig
val widen_low_s : t -> t
val widen_high_s : t -> t
val widen_low_u : t -> t
val widen_high_u : t -> t
val extmul_low_s : t -> t -> t
val extmul_high_s : t -> t -> t
val extmul_low_u : t -> t -> t
val extmul_high_u : t -> t -> t
end
module F32x4_convert : sig
val convert_i32x4_s : t -> t
Expand Down Expand Up @@ -417,6 +431,10 @@ struct
let widen_low_u = widen Lib.List.take 0xffl
let widen_high_u = widen Lib.List.drop 0xffl

let extmul_low_s x y = I16x8.mul (widen_low_s x) (widen_low_s y)
let extmul_high_s x y = I16x8.mul (widen_high_s x) (widen_high_s y)
let extmul_low_u x y = I16x8.mul (widen_low_u x) (widen_low_u y)
let extmul_high_u x y = I16x8.mul (widen_high_u x) (widen_high_u y)
end

module I32x4_convert = struct
Expand All @@ -441,16 +459,28 @@ struct
| [], [] -> []
| _, _ -> assert false
in Rep.of_i32x4 (dot xs ys)

let extmul_low_s x y = I32x4.mul (widen_low_s x) (widen_low_s y)
let extmul_high_s x y = I32x4.mul (widen_high_s x) (widen_high_s y)
let extmul_low_u x y = I32x4.mul (widen_low_u x) (widen_low_u y)
let extmul_high_u x y = I32x4.mul (widen_high_u x) (widen_high_u y)
end

module I64x2_convert = struct
let widen mask x =
let widen take_or_drop mask x =
Rep.of_i64x2
(List.map
(fun i32 -> Int64.(logand mask (of_int32 i32)))
(Lib.List.take 2 (Rep.to_i32x4 x)))
let widen_low_s = widen 0xffffffffffffffffL
let widen_low_u = widen 0xffffffffL
(take_or_drop 2 (Rep.to_i32x4 x)))
let widen_low_s = widen Lib.List.take 0xffffffffffffffffL
let widen_high_s = widen Lib.List.drop 0xffffffffffffffffL
let widen_low_u = widen Lib.List.take 0xffffffffL
let widen_high_u = widen Lib.List.drop 0xffffffffL

let extmul_low_s x y = I64x2.mul (widen_low_s x) (widen_low_s y)
let extmul_high_s x y = I64x2.mul (widen_high_s x) (widen_high_s y)
let extmul_low_u x y = I64x2.mul (widen_low_u x) (widen_low_u y)
let extmul_high_u x y = I64x2.mul (widen_high_u x) (widen_high_u y)
end

module F32x4_convert = struct
Expand Down
1 change: 1 addition & 0 deletions interpreter/syntax/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct
| Swizzle | Shuffle of int list | NarrowS | NarrowU
| AddSatS | AddSatU | SubSatS | SubSatU
| DotI16x8S
| ExtMulLowS | ExtMulHighS | ExtMulLowU | ExtMulHighU
type funop = Abs | Neg | Sqrt
| Ceil | Floor | Trunc | Nearest
| ConvertI32x4S | ConvertI32x4U
Expand Down
12 changes: 12 additions & 0 deletions interpreter/syntax/operators.ml
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ let i16x8_min_u = Binary (V128 V128Op.(I16x8 MinU))
let i16x8_max_s = Binary (V128 V128Op.(I16x8 MaxS))
let i16x8_max_u = Binary (V128 V128Op.(I16x8 MaxU))
let i16x8_avgr_u = Binary (V128 V128Op.(I16x8 AvgrU))
let i16x8_extmul_low_i8x16_s = Binary (V128 V128Op.(I16x8 ExtMulLowS))
let i16x8_extmul_high_i8x16_s = Binary (V128 V128Op.(I16x8 ExtMulHighS))
let i16x8_extmul_low_i8x16_u = Binary (V128 V128Op.(I16x8 ExtMulLowU))
let i16x8_extmul_high_i8x16_u = Binary (V128 V128Op.(I16x8 ExtMulHighU))

let i32x4_splat = Convert (V128 V128Op.(I32x4 Splat))
let i32x4_extract_lane imm = SimdExtract (V128Op.I32x4 (ZX, imm))
Expand Down Expand Up @@ -375,6 +379,10 @@ let i32x4_mul = Binary (V128 V128Op.(I32x4 Mul))
let i32x4_trunc_sat_f32x4_s = Unary (V128 V128Op.(I32x4 TruncSatF32x4S))
let i32x4_trunc_sat_f32x4_u = Unary (V128 V128Op.(I32x4 TruncSatF32x4U))
let i32x4_dot_i16x8_s = Binary (V128 V128Op.(I32x4 DotI16x8S))
let i32x4_extmul_low_i16x8_s = Binary (V128 V128Op.(I32x4 ExtMulLowS))
let i32x4_extmul_high_i16x8_s = Binary (V128 V128Op.(I32x4 ExtMulHighS))
let i32x4_extmul_low_i16x8_u = Binary (V128 V128Op.(I32x4 ExtMulLowU))
let i32x4_extmul_high_i16x8_u = Binary (V128 V128Op.(I32x4 ExtMulHighU))

let i64x2_splat = Convert (V128 V128Op.(I64x2 Splat))
let i64x2_extract_lane imm = SimdExtract (V128Op.I64x2 (ZX, imm))
Expand All @@ -386,6 +394,10 @@ let i64x2_mul = Binary (V128 V128Op.(I64x2 Mul))
let i64x2_shl = SimdShift V128Op.(I64x2 Shl)
let i64x2_shr_s = SimdShift V128Op.(I64x2 ShrS)
let i64x2_shr_u = SimdShift V128Op.(I64x2 ShrU)
let i64x2_extmul_low_i32x4_s = Binary (V128 V128Op.(I64x2 ExtMulLowS))
let i64x2_extmul_high_i32x4_s = Binary (V128 V128Op.(I64x2 ExtMulHighS))
let i64x2_extmul_low_i32x4_u = Binary (V128 V128Op.(I64x2 ExtMulLowU))
let i64x2_extmul_high_i32x4_u = Binary (V128 V128Op.(I64x2 ExtMulHighU))

let f32x4_splat = Convert (V128 V128Op.(F32x4 Splat))
let f32x4_extract_lane imm = SimdExtract (V128Op.F32x4 (ZX, imm))
Expand Down
12 changes: 12 additions & 0 deletions interpreter/text/arrange.ml
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,10 @@ struct
| I16x8 MaxS -> "i16x8.max_s"
| I16x8 MaxU -> "i16x8.max_u"
| I16x8 AvgrU -> "i16x8.avgr_u"
| I16x8 ExtMulLowS -> "i16x8.extmul_low_i8x16_s"
| I16x8 ExtMulHighS -> "i16x8.extmul_high_i8x16_s"
| I16x8 ExtMulLowU -> "i16x8.extmul_low_i8x16_u"
| I16x8 ExtMulHighU -> "i16x8.extmul_high_i8x16_u"
| I32x4 Add -> "i32x4.add"
| I32x4 Sub -> "i32x4.sub"
| I32x4 Mul -> "i32x4.mul"
Expand All @@ -304,9 +308,17 @@ struct
| I32x4 MaxS -> "i32x4.max_s"
| I32x4 MaxU -> "i32x4.max_u"
| I32x4 DotI16x8S -> "i32x4.dot_i16x8_s"
| I32x4 ExtMulLowS -> "i32x4.extmul_low_i16x8_s"
| I32x4 ExtMulHighS -> "i32x4.extmul_high_i16x8_s"
| I32x4 ExtMulLowU -> "i32x4.extmul_low_i16x8_u"
| I32x4 ExtMulHighU -> "i32x4.extmul_high_i16x8_u"
| I64x2 Add -> "i64x2.add"
| I64x2 Sub -> "i64x2.sub"
| I64x2 Mul -> "i64x2.mul"
| I64x2 ExtMulLowS -> "i64x2.extmul_low_i32x4_s"
| I64x2 ExtMulHighS -> "i64x2.extmul_high_i32x4_s"
| I64x2 ExtMulLowU -> "i64x2.extmul_low_i32x4_u"
| I64x2 ExtMulHighU -> "i64x2.extmul_high_i32x4_u"
| F32x4 Eq -> "f32x4.eq"
| F32x4 Ne -> "f32x4.ne"
| F32x4 Lt -> "f32x4.lt"
Expand Down
13 changes: 13 additions & 0 deletions interpreter/text/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,19 @@ rule token = parse
| "i32x4.dot_i16x8_s"
{ BINARY i32x4_dot_i16x8_s }

| "i16x8.extmul_low_i8x16_"(sign as s)
{ BINARY (ext s i16x8_extmul_low_i8x16_s i16x8_extmul_low_i8x16_u) }
| "i16x8.extmul_high_i8x16_"(sign as s)
{ BINARY (ext s i16x8_extmul_high_i8x16_s i16x8_extmul_high_i8x16_u) }
| "i32x4.extmul_low_i16x8_"(sign as s)
{ BINARY (ext s i32x4_extmul_low_i16x8_s i32x4_extmul_low_i16x8_u) }
| "i32x4.extmul_high_i16x8_"(sign as s)
{ BINARY (ext s i32x4_extmul_high_i16x8_s i32x4_extmul_high_i16x8_u) }
| "i64x2.extmul_low_i32x4_"(sign as s)
{ BINARY (ext s i64x2_extmul_low_i32x4_s i64x2_extmul_low_i32x4_u) }
| "i64x2.extmul_high_i32x4_"(sign as s)
{ BINARY (ext s i64x2_extmul_high_i32x4_s i64x2_extmul_high_i32x4_u) }

| (simd_shape as s) { SIMD_SHAPE (simd_shape s) }

| name as s { VAR s }
Expand Down
1 change: 1 addition & 0 deletions test/core/simd/meta/gen_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
'simd_f64x2_pmin_pmax',
'simd_i32x4_dot_i16x8',
'simd_load_lane',
'simd_ext_mul',
)


Expand Down
21 changes: 17 additions & 4 deletions test/core/simd/meta/simd_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,27 @@ def __str__(self):
def lane(self):
return self.LANE_VALUE.get(self.LANE_TYPE)

@property
def dst_lane(self):
return self.lane

@property
def src_lane(self):
# Used for arithmetic that extends the lane, e.g. i16x8 lanes, which
# are extended multiply to i32x4.
if hasattr(self, 'SRC_LANE_TYPE'):
return self.LANE_VALUE.get(self.SRC_LANE_TYPE)
else:
return self.lane

@property
def normal_unary_op_test_data(self):
lane = self.lane
lane = self.src_lane
return [0, 1, -1, lane.max - 1, lane.min + 1, lane.min, lane.max, lane.mask]

@property
def normal_binary_op_test_data(self):
lane = self.lane
lane = self.src_lane
return [
(0, 0),
(0, 1),
Expand Down Expand Up @@ -170,7 +183,7 @@ def get_case_data(self):
for data_group, v128_forms in self.bin_test_data:
for data in data_group:
case_data.append([op_name, [str(data[0]), str(data[1])],
str(o.binary_op(data[0], data[1], self.lane)),
str(o.binary_op(data[0], data[1], self.src_lane, self.dst_lane)),
v128_forms])
for data_group in self.full_bin_test_data:
for data in data_group.get(op_name):
Expand All @@ -183,7 +196,7 @@ def get_case_data(self):
for data_group, v128_forms in self.unary_test_data:
for data in data_group:
case_data.append([op_name, [str(data)],
str(o.unary_op(data, self.lane)),
str(o.unary_op(data, self.dst_lane)),
v128_forms])

return case_data
Expand Down
75 changes: 75 additions & 0 deletions test/core/simd/meta/simd_ext_mul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python3

""" Base class for generating extended multiply instructions. These
instructions 2 inputs of the same (narrower) lane shape, multiplies
corresponding lanes with extension (no overflow/wraparound), producing 1 output
of a (wider) shape. These instructions can choose to work on the low or high
halves of the inputs, and perform signed or unsigned multiply.
Subclasses need to define 3 attributes:
- LANE_TYPE (this is the output shape)
- SRC_LANE_TYPE (this is the input (narrower) shape)
- BINARY_OPS (list of operations)
"""

from simd_arithmetic import SimdArithmeticCase


class SimdExtMulCase(SimdArithmeticCase):
UNARY_OPS = ()

@property
def full_bin_test_data(self):
return []

def get_combine_cases(self):
return ''

@property
def bin_test_data(self):
lane_forms = [self.SRC_LANE_TYPE, self.SRC_LANE_TYPE, self.LANE_TYPE]
return [(self.normal_binary_op_test_data, lane_forms)]

@property
def hex_binary_op_test_data(self):
return []

def gen_test_cases(self):
wast_filename = '../simd_{wide}_extmul_{narrow}.wast'.format(
wide=self.LANE_TYPE, narrow=self.SRC_LANE_TYPE)
with open(wast_filename, 'w') as fp:
fp.write(self.get_all_cases())


class SimdI16x8ExtMulCase(SimdExtMulCase):
LANE_TYPE = 'i16x8'
SRC_LANE_TYPE = 'i8x16'
BINARY_OPS = ('extmul_low_i8x16_s', 'extmul_high_i8x16_s',
'extmul_low_i8x16_u', 'extmul_high_i8x16_u')


class SimdI32x4ExtMulCase(SimdExtMulCase):
LANE_TYPE = 'i32x4'
SRC_LANE_TYPE = 'i16x8'
BINARY_OPS = ('extmul_low_i16x8_s', 'extmul_high_i16x8_s',
'extmul_low_i16x8_u', 'extmul_high_i16x8_u')


class SimdI64x2ExtMulCase(SimdExtMulCase):
LANE_TYPE = 'i64x2'
SRC_LANE_TYPE = 'i32x4'
BINARY_OPS = ('extmul_low_i32x4_s', 'extmul_high_i32x4_s',
'extmul_low_i32x4_u', 'extmul_high_i32x4_u')


def gen_test_cases():
simd_i16x8_ext_mul_case = SimdI16x8ExtMulCase()
simd_i16x8_ext_mul_case.gen_test_cases()
simd_i32x4_ext_mul_case = SimdI32x4ExtMulCase()
simd_i32x4_ext_mul_case.gen_test_cases()
simd_i64x2_ext_mul_case = SimdI64x2ExtMulCase()
simd_i64x2_ext_mul_case.gen_test_cases()


if __name__ == '__main__':
gen_test_cases()
Loading

0 comments on commit 270d6c2

Please sign in to comment.