From 437514e23878b4e7b0f457f7982bb724c0c569e6 Mon Sep 17 00:00:00 2001 From: Brendan Dahl Date: Tue, 6 Aug 2024 21:20:14 +0000 Subject: [PATCH] [FP16] Implement lane access instructions. Specified at https://github.com/WebAssembly/half-precision/blob/main/proposals/half-precision/Overview.md --- scripts/gen-s-parser.py | 3 ++ src/gen-s-parser.inc | 23 +++++++++ src/ir/child-typer.h | 2 + src/ir/cost.h | 1 + src/literal.h | 4 ++ src/passes/Print.cpp | 9 ++++ src/tools/fuzzing/fuzzing.cpp | 1 + src/wasm-binary.h | 3 ++ src/wasm-interpreter.h | 6 +++ src/wasm.h | 5 ++ src/wasm/literal.cpp | 19 ++++++++ src/wasm/wasm-binary.cpp | 14 ++++++ src/wasm/wasm-stack.cpp | 9 ++++ src/wasm/wasm-validator.cpp | 9 ++++ src/wasm/wasm.cpp | 2 + test/lit/basic/f16.wast | 89 +++++++++++++++++++++++++++++++++++ test/spec/f16.wast | 12 +++++ 17 files changed, 211 insertions(+) diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index f3510964482..9519ad4eeae 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -298,6 +298,9 @@ ("i64x2.splat", "makeUnary(UnaryOp::SplatVecI64x2)"), ("i64x2.extract_lane", "makeSIMDExtract(SIMDExtractOp::ExtractLaneVecI64x2, 2)"), ("i64x2.replace_lane", "makeSIMDReplace(SIMDReplaceOp::ReplaceLaneVecI64x2, 2)"), + ("f16x8.splat", "makeUnary(UnaryOp::SplatVecF16x8)"), + ("f16x8.extract_lane", "makeSIMDExtract(SIMDExtractOp::ExtractLaneVecF16x8, 8)"), + ("f16x8.replace_lane", "makeSIMDReplace(SIMDReplaceOp::ReplaceLaneVecF16x8, 8)"), ("f32x4.splat", "makeUnary(UnaryOp::SplatVecF32x4)"), ("f32x4.extract_lane", "makeSIMDExtract(SIMDExtractOp::ExtractLaneVecF32x4, 4)"), ("f32x4.replace_lane", "makeSIMDReplace(SIMDReplaceOp::ReplaceLaneVecF32x4, 4)"), diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index c54a0de54b1..33cddcb26b7 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -307,6 +307,29 @@ switch (buf[0]) { } case 'f': { switch (buf[1]) { + case '1': { + switch (buf[6]) { + case 'e': + if (op == "f16x8.extract_lane"sv) { + CHECK_ERR(makeSIMDExtract(ctx, pos, annotations, SIMDExtractOp::ExtractLaneVecF16x8, 8)); + return Ok{}; + } + goto parse_error; + case 'r': + if (op == "f16x8.replace_lane"sv) { + CHECK_ERR(makeSIMDReplace(ctx, pos, annotations, SIMDReplaceOp::ReplaceLaneVecF16x8, 8)); + return Ok{}; + } + goto parse_error; + case 's': + if (op == "f16x8.splat"sv) { + CHECK_ERR(makeUnary(ctx, pos, annotations, UnaryOp::SplatVecF16x8)); + return Ok{}; + } + goto parse_error; + default: goto parse_error; + } + } case '3': { switch (buf[3]) { case '.': { diff --git a/src/ir/child-typer.h b/src/ir/child-typer.h index 5e2dc237dfc..17717a32c14 100644 --- a/src/ir/child-typer.h +++ b/src/ir/child-typer.h @@ -228,6 +228,7 @@ template struct ChildTyper : OverriddenVisitor { case ReplaceLaneVecI64x2: note(&curr->value, Type::i64); break; + case ReplaceLaneVecF16x8: case ReplaceLaneVecF32x4: note(&curr->value, Type::f32); break; @@ -337,6 +338,7 @@ template struct ChildTyper : OverriddenVisitor { case TruncSatUFloat32ToInt64: case ReinterpretFloat32: case PromoteFloat32: + case SplatVecF16x8: case SplatVecF32x4: note(&curr->value, Type::f32); break; diff --git a/src/ir/cost.h b/src/ir/cost.h index 38c74a2037c..06512d656de 100644 --- a/src/ir/cost.h +++ b/src/ir/cost.h @@ -184,6 +184,7 @@ struct CostAnalyzer : public OverriddenVisitor { case SplatVecI16x8: case SplatVecI32x4: case SplatVecI64x2: + case SplatVecF16x8: case SplatVecF32x4: case SplatVecF64x2: case NotVec128: diff --git a/src/literal.h b/src/literal.h index 190fe0eeced..63bbf6e7481 100644 --- a/src/literal.h +++ b/src/literal.h @@ -444,6 +444,7 @@ class Literal { std::array getLanesUI16x8() const; std::array getLanesI32x4() const; std::array getLanesI64x2() const; + std::array getLanesF16x8() const; std::array getLanesF32x4() const; std::array getLanesF64x2() const; @@ -463,6 +464,9 @@ class Literal { Literal splatI64x2() const; Literal extractLaneI64x2(uint8_t index) const; Literal replaceLaneI64x2(const Literal& other, uint8_t index) const; + Literal splatF16x8() const; + Literal extractLaneF16x8(uint8_t index) const; + Literal replaceLaneF16x8(const Literal& other, uint8_t index) const; Literal splatF32x4() const; Literal extractLaneF32x4(uint8_t index) const; Literal replaceLaneF32x4(const Literal& other, uint8_t index) const; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index aca43924d34..fd22f1b716f 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -703,6 +703,9 @@ struct PrintExpressionContents case ExtractLaneVecI64x2: o << "i64x2.extract_lane"; break; + case ExtractLaneVecF16x8: + o << "f16x8.extract_lane"; + break; case ExtractLaneVecF32x4: o << "f32x4.extract_lane"; break; @@ -728,6 +731,9 @@ struct PrintExpressionContents case ReplaceLaneVecI64x2: o << "i64x2.replace_lane"; break; + case ReplaceLaneVecF16x8: + o << "f16x8.replace_lane"; + break; case ReplaceLaneVecF32x4: o << "f32x4.replace_lane"; break; @@ -1137,6 +1143,9 @@ struct PrintExpressionContents case SplatVecI64x2: o << "i64x2.splat"; break; + case SplatVecF16x8: + o << "f16x8.splat"; + break; case SplatVecF32x4: o << "f32x4.splat"; break; diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 2aff7146e28..fd0dfb26c25 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -3579,6 +3579,7 @@ Expression* TranslateToFuzzReader::makeSIMDExtract(Type type) { break; case ExtractLaneSVecI16x8: case ExtractLaneUVecI16x8: + case ExtractLaneVecF16x8: index = upTo(8); break; case ExtractLaneVecI32x4: diff --git a/src/wasm-binary.h b/src/wasm-binary.h index 5fae1b64d7b..38bf5d475a4 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -1054,6 +1054,9 @@ enum ASTNodes { // half precision opcodes F32_F16LoadMem = 0x30, F32_F16StoreMem = 0x31, + F16x8Splat = 0x120, + F16x8ExtractLane = 0x121, + F16x8ReplaceLane = 0x122, // bulk memory opcodes diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 3e62d53354a..f59a005b645 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -479,6 +479,8 @@ class ExpressionRunner : public OverriddenVisitor { return value.splatI32x4(); case SplatVecI64x2: return value.splatI64x2(); + case SplatVecF16x8: + return value.splatF16x8(); case SplatVecF32x4: return value.splatF32x4(); case SplatVecF64x2: @@ -1070,6 +1072,8 @@ class ExpressionRunner : public OverriddenVisitor { return vec.extractLaneI32x4(curr->index); case ExtractLaneVecI64x2: return vec.extractLaneI64x2(curr->index); + case ExtractLaneVecF16x8: + return vec.extractLaneF16x8(curr->index); case ExtractLaneVecF32x4: return vec.extractLaneF32x4(curr->index); case ExtractLaneVecF64x2: @@ -1098,6 +1102,8 @@ class ExpressionRunner : public OverriddenVisitor { return vec.replaceLaneI32x4(value, curr->index); case ReplaceLaneVecI64x2: return vec.replaceLaneI64x2(value, curr->index); + case ReplaceLaneVecF16x8: + return vec.replaceLaneF16x8(value, curr->index); case ReplaceLaneVecF32x4: return vec.replaceLaneF32x4(value, curr->index); case ReplaceLaneVecF64x2: diff --git a/src/wasm.h b/src/wasm.h index 4a4ed561f91..56e94fef78b 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -229,6 +229,9 @@ enum UnaryOp { RelaxedTruncZeroSVecF64x2ToVecI32x4, RelaxedTruncZeroUVecF64x2ToVecI32x4, + // Half precision SIMD + SplatVecF16x8, + InvalidUnary }; @@ -490,6 +493,7 @@ enum SIMDExtractOp { ExtractLaneUVecI16x8, ExtractLaneVecI32x4, ExtractLaneVecI64x2, + ExtractLaneVecF16x8, ExtractLaneVecF32x4, ExtractLaneVecF64x2 }; @@ -499,6 +503,7 @@ enum SIMDReplaceOp { ReplaceLaneVecI16x8, ReplaceLaneVecI32x4, ReplaceLaneVecI64x2, + ReplaceLaneVecF16x8, ReplaceLaneVecF32x4, ReplaceLaneVecF64x2, }; diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index f2100ea712e..a532b92d002 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -20,6 +20,7 @@ #include #include "emscripten-optimizer/simple_ast.h" +#include "fp16.h" #include "ir/bits.h" #include "pretty_printing.h" #include "support/bits.h" @@ -1729,6 +1730,13 @@ LaneArray<4> Literal::getLanesI32x4() const { LaneArray<2> Literal::getLanesI64x2() const { return getLanes(*this); } +LaneArray<8> Literal::getLanesF16x8() const { + auto lanes = getLanesUI16x8(); + for (size_t i = 0; i < lanes.size(); ++i) { + lanes[i] = Literal(fp16_ieee_to_fp32_value(lanes[i].geti32())); + } + return lanes; +} LaneArray<4> Literal::getLanesF32x4() const { auto lanes = getLanesI32x4(); for (size_t i = 0; i < lanes.size(); ++i) { @@ -1766,6 +1774,10 @@ Literal Literal::splatI8x16() const { return splat(*this); } Literal Literal::splatI16x8() const { return splat(*this); } Literal Literal::splatI32x4() const { return splat(*this); } Literal Literal::splatI64x2() const { return splat(*this); } +Literal Literal::splatF16x8() const { + uint16_t f16 = fp16_ieee_from_fp32_value(getf32()); + return splat(Literal(f16)); +} Literal Literal::splatF32x4() const { return splat(*this); } Literal Literal::splatF64x2() const { return splat(*this); } @@ -1787,6 +1799,9 @@ Literal Literal::extractLaneI32x4(uint8_t index) const { Literal Literal::extractLaneI64x2(uint8_t index) const { return getLanesI64x2().at(index); } +Literal Literal::extractLaneF16x8(uint8_t index) const { + return getLanesF16x8().at(index); +} Literal Literal::extractLaneF32x4(uint8_t index) const { return getLanesF32x4().at(index); } @@ -1815,6 +1830,10 @@ Literal Literal::replaceLaneI32x4(const Literal& other, uint8_t index) const { Literal Literal::replaceLaneI64x2(const Literal& other, uint8_t index) const { return replace<2, &Literal::getLanesI64x2>(*this, other, index); } +Literal Literal::replaceLaneF16x8(const Literal& other, uint8_t index) const { + return replace<8, &Literal::getLanesF16x8>( + *this, Literal(fp16_ieee_from_fp32_value(other.getf32())), index); +} Literal Literal::replaceLaneF32x4(const Literal& other, uint8_t index) const { return replace<4, &Literal::getLanesF32x4>(*this, other, index); } diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index b9645ab8fec..5eaa8451510 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -6239,6 +6239,10 @@ bool WasmBinaryReader::maybeVisitSIMDUnary(Expression*& out, uint32_t code) { curr = allocator.alloc(); curr->op = SplatVecI64x2; break; + case BinaryConsts::F16x8Splat: + curr = allocator.alloc(); + curr->op = SplatVecF16x8; + break; case BinaryConsts::F32x4Splat: curr = allocator.alloc(); curr->op = SplatVecF32x4; @@ -6569,6 +6573,11 @@ bool WasmBinaryReader::maybeVisitSIMDExtract(Expression*& out, uint32_t code) { curr->op = ExtractLaneVecI64x2; curr->index = getLaneIndex(2); break; + case BinaryConsts::F16x8ExtractLane: + curr = allocator.alloc(); + curr->op = ExtractLaneVecF16x8; + curr->index = getLaneIndex(8); + break; case BinaryConsts::F32x4ExtractLane: curr = allocator.alloc(); curr->op = ExtractLaneVecF32x4; @@ -6611,6 +6620,11 @@ bool WasmBinaryReader::maybeVisitSIMDReplace(Expression*& out, uint32_t code) { curr->op = ReplaceLaneVecI64x2; curr->index = getLaneIndex(2); break; + case BinaryConsts::F16x8ReplaceLane: + curr = allocator.alloc(); + curr->op = ReplaceLaneVecF16x8; + curr->index = getLaneIndex(8); + break; case BinaryConsts::F32x4ReplaceLane: curr = allocator.alloc(); curr->op = ReplaceLaneVecF32x4; diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 35db3b32250..19b98769b56 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -590,6 +590,9 @@ void BinaryInstWriter::visitSIMDExtract(SIMDExtract* curr) { case ExtractLaneVecI64x2: o << U32LEB(BinaryConsts::I64x2ExtractLane); break; + case ExtractLaneVecF16x8: + o << U32LEB(BinaryConsts::F16x8ExtractLane); + break; case ExtractLaneVecF32x4: o << U32LEB(BinaryConsts::F32x4ExtractLane); break; @@ -615,6 +618,9 @@ void BinaryInstWriter::visitSIMDReplace(SIMDReplace* curr) { case ReplaceLaneVecI64x2: o << U32LEB(BinaryConsts::I64x2ReplaceLane); break; + case ReplaceLaneVecF16x8: + o << U32LEB(BinaryConsts::F16x8ReplaceLane); + break; case ReplaceLaneVecF32x4: o << U32LEB(BinaryConsts::F32x4ReplaceLane); break; @@ -1050,6 +1056,9 @@ void BinaryInstWriter::visitUnary(Unary* curr) { case SplatVecI64x2: o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::I64x2Splat); break; + case SplatVecF16x8: + o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F16x8Splat); + break; case SplatVecF32x4: o << int8_t(BinaryConsts::SIMDPrefix) << U32LEB(BinaryConsts::F32x4Splat); break; diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index b3291743225..6e59ce8d81b 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -1272,6 +1272,10 @@ void FunctionValidator::visitSIMDExtract(SIMDExtract* curr) { lane_t = Type::i64; lanes = 2; break; + case ExtractLaneVecF16x8: + lane_t = Type::f32; + lanes = 8; + break; case ExtractLaneVecF32x4: lane_t = Type::f32; lanes = 4; @@ -1318,6 +1322,10 @@ void FunctionValidator::visitSIMDReplace(SIMDReplace* curr) { lane_t = Type::i64; lanes = 2; break; + case ReplaceLaneVecF16x8: + lane_t = Type::f32; + lanes = 8; + break; case ReplaceLaneVecF32x4: lane_t = Type::f32; lanes = 4; @@ -2036,6 +2044,7 @@ void FunctionValidator::visitUnary(Unary* curr) { shouldBeEqual( curr->value->type, Type(Type::i64), curr, "expected i64 splat value"); break; + case SplatVecF16x8: case SplatVecF32x4: shouldBeEqual( curr->type, Type(Type::v128), curr, "expected splat to have v128 type"); diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index b17250e6c6c..ae70e4a2238 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -386,6 +386,7 @@ void SIMDExtract::finalize() { case ExtractLaneVecI64x2: type = Type::i64; break; + case ExtractLaneVecF16x8: case ExtractLaneVecF32x4: type = Type::f32; break; @@ -636,6 +637,7 @@ void Unary::finalize() { case SplatVecI16x8: case SplatVecI32x4: case SplatVecI64x2: + case SplatVecF16x8: case SplatVecF32x4: case SplatVecF64x2: case NotVec128: diff --git a/test/lit/basic/f16.wast b/test/lit/basic/f16.wast index c68b0306f96..1dceb807525 100644 --- a/test/lit/basic/f16.wast +++ b/test/lit/basic/f16.wast @@ -17,6 +17,12 @@ ;; CHECK-TEXT: (type $1 (func (param i32 f32))) + ;; CHECK-TEXT: (type $2 (func (param f32) (result v128))) + + ;; CHECK-TEXT: (type $3 (func (param v128) (result f32))) + + ;; CHECK-TEXT: (type $4 (func (param v128 f32) (result v128))) + ;; CHECK-TEXT: (memory $0 1 1) ;; CHECK-TEXT: (func $f32.load_f16 (type $0) (param $0 i32) (result f32) @@ -28,6 +34,12 @@ ;; CHECK-BIN: (type $1 (func (param i32 f32))) + ;; CHECK-BIN: (type $2 (func (param f32) (result v128))) + + ;; CHECK-BIN: (type $3 (func (param v128) (result f32))) + + ;; CHECK-BIN: (type $4 (func (param v128 f32) (result v128))) + ;; CHECK-BIN: (memory $0 1 1) ;; CHECK-BIN: (func $f32.load_f16 (type $0) (param $0 i32) (result f32) @@ -58,11 +70,69 @@ (local.get $1) ) ) + + ;; CHECK-TEXT: (func $f16x8.splat (type $2) (param $0 f32) (result v128) + ;; CHECK-TEXT-NEXT: (f16x8.splat + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-BIN: (func $f16x8.splat (type $2) (param $0 f32) (result v128) + ;; CHECK-BIN-NEXT: (f16x8.splat + ;; CHECK-BIN-NEXT: (local.get $0) + ;; CHECK-BIN-NEXT: ) + ;; CHECK-BIN-NEXT: ) + (func $f16x8.splat (param $0 f32) (result v128) + (f16x8.splat + (local.get $0) + ) + ) + + ;; CHECK-TEXT: (func $f16x8.extract_lane (type $3) (param $0 v128) (result f32) + ;; CHECK-TEXT-NEXT: (f16x8.extract_lane 0 + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-BIN: (func $f16x8.extract_lane (type $3) (param $0 v128) (result f32) + ;; CHECK-BIN-NEXT: (f16x8.extract_lane 0 + ;; CHECK-BIN-NEXT: (local.get $0) + ;; CHECK-BIN-NEXT: ) + ;; CHECK-BIN-NEXT: ) + (func $f16x8.extract_lane (param $0 v128) (result f32) + (f16x8.extract_lane 0 + (local.get $0) + ) + ) + + ;; CHECK-TEXT: (func $f16x8.replace_lane (type $4) (param $0 v128) (param $1 f32) (result v128) + ;; CHECK-TEXT-NEXT: (f16x8.replace_lane 0 + ;; CHECK-TEXT-NEXT: (local.get $0) + ;; CHECK-TEXT-NEXT: (local.get $1) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-TEXT-NEXT: ) + ;; CHECK-BIN: (func $f16x8.replace_lane (type $4) (param $0 v128) (param $1 f32) (result v128) + ;; CHECK-BIN-NEXT: (f16x8.replace_lane 0 + ;; CHECK-BIN-NEXT: (local.get $0) + ;; CHECK-BIN-NEXT: (local.get $1) + ;; CHECK-BIN-NEXT: ) + ;; CHECK-BIN-NEXT: ) + (func $f16x8.replace_lane (param $0 v128) (param $1 f32) (result v128) + (f16x8.replace_lane 0 + (local.get $0) + (local.get $1) + ) + ) + ) ;; CHECK-BIN-NODEBUG: (type $0 (func (param i32) (result f32))) ;; CHECK-BIN-NODEBUG: (type $1 (func (param i32 f32))) +;; CHECK-BIN-NODEBUG: (type $2 (func (param f32) (result v128))) + +;; CHECK-BIN-NODEBUG: (type $3 (func (param v128) (result f32))) + +;; CHECK-BIN-NODEBUG: (type $4 (func (param v128 f32) (result v128))) + ;; CHECK-BIN-NODEBUG: (memory $0 1 1) ;; CHECK-BIN-NODEBUG: (func $0 (type $0) (param $0 i32) (result f32) @@ -77,3 +147,22 @@ ;; CHECK-BIN-NODEBUG-NEXT: (local.get $1) ;; CHECK-BIN-NODEBUG-NEXT: ) ;; CHECK-BIN-NODEBUG-NEXT: ) + +;; CHECK-BIN-NODEBUG: (func $2 (type $2) (param $0 f32) (result v128) +;; CHECK-BIN-NODEBUG-NEXT: (f16x8.splat +;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) +;; CHECK-BIN-NODEBUG-NEXT: ) +;; CHECK-BIN-NODEBUG-NEXT: ) + +;; CHECK-BIN-NODEBUG: (func $3 (type $3) (param $0 v128) (result f32) +;; CHECK-BIN-NODEBUG-NEXT: (f16x8.extract_lane 0 +;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) +;; CHECK-BIN-NODEBUG-NEXT: ) +;; CHECK-BIN-NODEBUG-NEXT: ) + +;; CHECK-BIN-NODEBUG: (func $4 (type $4) (param $0 v128) (param $1 f32) (result v128) +;; CHECK-BIN-NODEBUG-NEXT: (f16x8.replace_lane 0 +;; CHECK-BIN-NODEBUG-NEXT: (local.get $0) +;; CHECK-BIN-NODEBUG-NEXT: (local.get $1) +;; CHECK-BIN-NODEBUG-NEXT: ) +;; CHECK-BIN-NODEBUG-NEXT: ) diff --git a/test/spec/f16.wast b/test/spec/f16.wast index 19bad175674..ef32b2ab545 100644 --- a/test/spec/f16.wast +++ b/test/spec/f16.wast @@ -6,6 +6,11 @@ (func (export "f32.load_f16") (result f32) (f32.load_f16 (i32.const 0))) (func (export "f32.store_f16") (f32.store_f16 (i32.const 0) (f32.const 100.5))) (func (export "i32.load16_u") (result i32) (i32.load16_u (i32.const 2))) + (func (export "f16x8.splat") (param $0 f32) (result v128) (f16x8.splat (local.get $0))) + (func (export "f16x8.extract_lane_first") (param $0 v128) (result f32) (f16x8.extract_lane 0 (local.get $0))) + (func (export "f16x8.extract_lane_last") (param $0 v128) (result f32) (f16x8.extract_lane 7 (local.get $0))) + (func (export "f16x8.replace_lane_first") (param $0 v128) (param $1 f32) (result v128) (f16x8.replace_lane 0 (local.get $0) (local.get $1))) + (func (export "f16x8.replace_lane_last") (param $0 v128) (param $1 f32) (result v128) (f16x8.replace_lane 7 (local.get $0) (local.get $1))) ) (assert_return (invoke "f32.load_f16") (f32.const 42.0)) @@ -13,3 +18,10 @@ (assert_return (invoke "f32.load_f16") (f32.const 100.5)) ;; Ensure that the above operations didn't write to memory they shouldn't have. (assert_return (invoke "i32.load16_u") (i32.const 0xDEAD)) + +;; lane accesses +(assert_return (invoke "f16x8.splat" (f32.const 100.5)) (v128.const i16x8 0x5648 0x5648 0x5648 0x5648 0x5648 0x5648 0x5648 0x5648)) +(assert_return (invoke "f16x8.extract_lane_first" (v128.const i16x8 0x5648 0 0 0 0 0 0 0)) (f32.const 100.5)) +(assert_return (invoke "f16x8.extract_lane_last" (v128.const i16x8 0 0 0 0 0 0 0 0xc500)) (f32.const -5)) +(assert_return (invoke "f16x8.replace_lane_first" (v128.const i64x2 0 0) (f32.const 100.5)) (v128.const i16x8 0x5648 0 0 0 0 0 0 0)) +(assert_return (invoke "f16x8.replace_lane_last" (v128.const i64x2 0 0) (f32.const 100.5)) (v128.const i16x8 0 0 0 0 0 0 0 0x5648))