Rewrite wasm-shell to use new wast parser (#6601)

Use the new wast parser to parse a full script up front, then traverse the parsed script data structure and execute the commands. wasm-shell had previously used the new wat parser for top-level modules, but it now uses the new parser for module assertions as well. Fix various bugs this uncovered. After this change, wasm-shell supports all the assertions used in the upstream spec tests (although not new kinds of assertions introduced in any proposals). Uncomment various `assert_exhaustion` tests that we can now execute. Other kinds of assertions remain commented out in our tests: wasm-shell now supports `assert_unlinkable`, but the interpreter does not eagerly check for the existence of imports, so those tests do not pass. Tests that check for NaNs also remain commented out because they do not yet use the standard syntax that wasm-shell now supports for canonical and arithmetic NaN results, and our interpreter would not pass all of those tests even if they did use the standard syntax.
WebAssembly · May 18, 2024 · 921644c · 921644c
1 parent 369cddf
commit 921644c
Show file tree

Hide file tree

Showing 20 changed files with 462 additions and 367 deletions.
diff --git a/src/literal.h b/src/literal.h
@@ -347,6 +347,8 @@ class Literal {
   bool operator!=(const Literal& other) const;
 
   bool isNaN();
+  bool isCanonicalNaN();
+  bool isArithmeticNaN();
 
   static uint32_t NaNPayload(float f);
   static uint64_t NaNPayload(double f);

diff --git a/src/parser/contexts.h b/src/parser/contexts.h
@@ -1665,7 +1665,10 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> {
     return Ok{};
   }
 
-  Result<> addExport(Index, Name value, Name name, ExternalKind kind) {
+  Result<> addExport(Index pos, Name value, Name name, ExternalKind kind) {
+    if (wasm.getExportOrNull(name)) {
+      return in.err(pos, "duplicate export");
+    }
     wasm.addExport(builder.makeExport(name, value, kind));
     return Ok{};
   }

diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
@@ -23,6 +23,7 @@
 #include <variant>
 
 #include "lexer.h"
+#include "support/bits.h"
 #include "support/string.h"
 
 using namespace std::string_view_literals;
@@ -1005,6 +1006,9 @@ std::optional<uint32_t> Lexer::takeAlign() {
     }
     Lexer subLexer(result->span.substr(6));
     if (auto o = subLexer.takeU32()) {
+      if (Bits::popCount(*o) != 1) {
+        return std::nullopt;
+      }
       pos += result->span.size();
       advance();
       return o;

diff --git a/src/parser/parsers.h b/src/parser/parsers.h
@@ -30,7 +30,8 @@ template<typename Ctx> Result<typename Ctx::HeapTypeT> heaptype(Ctx&);
 template<typename Ctx> MaybeResult<typename Ctx::RefTypeT> reftype(Ctx&);
 template<typename Ctx> MaybeResult<typename Ctx::TypeT> tupletype(Ctx&);
 template<typename Ctx> Result<typename Ctx::TypeT> valtype(Ctx&);
-template<typename Ctx> MaybeResult<typename Ctx::ParamsT> params(Ctx&);
+template<typename Ctx>
+MaybeResult<typename Ctx::ParamsT> params(Ctx&, bool allowNames = true);
 template<typename Ctx> MaybeResult<typename Ctx::ResultsT> results(Ctx&);
 template<typename Ctx> MaybeResult<typename Ctx::SignatureT> functype(Ctx&);
 template<typename Ctx> Result<typename Ctx::FieldT> storagetype(Ctx&);
@@ -325,7 +326,8 @@ MaybeResult<typename Ctx::LabelIdxT> maybeLabelidx(Ctx&,
 template<typename Ctx>
 Result<typename Ctx::LabelIdxT> labelidx(Ctx&, bool inDelegate = false);
 template<typename Ctx> Result<typename Ctx::TagIdxT> tagidx(Ctx&);
-template<typename Ctx> Result<typename Ctx::TypeUseT> typeuse(Ctx&);
+template<typename Ctx>
+Result<typename Ctx::TypeUseT> typeuse(Ctx&, bool allowNames = true);
 MaybeResult<ImportNames> inlineImport(Lexer&);
 Result<std::vector<Name>> inlineExports(Lexer&);
 template<typename Ctx> Result<> strtype(Ctx&);
@@ -561,13 +563,18 @@ template<typename Ctx> Result<typename Ctx::TypeT> valtype(Ctx& ctx) {
 // param  ::= '(' 'param id? t:valtype ')' => [t]
 //          | '(' 'param t*:valtype* ')' => [t*]
 // params ::= param*
-template<typename Ctx> MaybeResult<typename Ctx::ParamsT> params(Ctx& ctx) {
+template<typename Ctx>
+MaybeResult<typename Ctx::ParamsT> params(Ctx& ctx, bool allowNames) {
   bool hasAny = false;
   auto res = ctx.makeParams();
   while (ctx.in.takeSExprStart("param"sv)) {
     hasAny = true;
+    auto pos = ctx.in.getPos();
     if (auto id = ctx.in.takeID()) {
       // Single named param
+      if (!allowNames) {
+        return ctx.in.err(pos, "unexpected named parameter");
+      }
       auto type = valtype(ctx);
       CHECK_ERR(type);
       if (!ctx.in.takeRParen()) {
@@ -1065,7 +1072,7 @@ template<typename Ctx> Result<typename Ctx::BlockTypeT> blocktype(Ctx& ctx) {
   // We either had no results or multiple results. Reset and parse again as a
   // type use.
   ctx.in = initialLexer;
-  auto use = typeuse(ctx);
+  auto use = typeuse(ctx, false);
   CHECK_ERR(use);
 
   auto type = ctx.getBlockTypeFromTypeUse(pos, *use);
@@ -1935,7 +1942,7 @@ Result<> makeCallIndirect(Ctx& ctx,
                           bool isReturn) {
   auto table = maybeTableidx(ctx);
   CHECK_ERR(table);
-  auto type = typeuse(ctx);
+  auto type = typeuse(ctx, false);
   CHECK_ERR(type);
   return ctx.makeCallIndirect(
     pos, annotations, table.getPtr(), *type, isReturn);
@@ -2669,7 +2676,8 @@ template<typename Ctx> Result<typename Ctx::TagIdxT> tagidx(Ctx& ctx) {
 //                 (if typedefs[x] = [t1*] -> [t2*])
 //           | ((t1,IDs):param)* (t2:result)*                          => x, IDs
 //                 (if x is minimum s.t. typedefs[x] = [t1*] -> [t2*])
-template<typename Ctx> Result<typename Ctx::TypeUseT> typeuse(Ctx& ctx) {
+template<typename Ctx>
+Result<typename Ctx::TypeUseT> typeuse(Ctx& ctx, bool allowNames) {
   auto pos = ctx.in.getPos();
   std::optional<typename Ctx::HeapTypeT> type;
   if (ctx.in.takeSExprStart("type"sv)) {
@@ -2683,7 +2691,7 @@ template<typename Ctx> Result<typename Ctx::TypeUseT> typeuse(Ctx& ctx) {
     type = *x;
   }
 
-  auto namedParams = params(ctx);
+  auto namedParams = params(ctx, allowNames);
   CHECK_ERR(namedParams);
 
   auto resultTypes = results(ctx);

diff --git a/src/parser/wast-parser.cpp b/src/parser/wast-parser.cpp
@@ -41,8 +41,7 @@ Result<Literals> consts(Lexer& in) {
 
 MaybeResult<Action> action(Lexer& in) {
   if (in.takeSExprStart("invoke"sv)) {
-    // TODO: Do we need to use this optional id?
-    in.takeID();
+    auto id = in.takeID();
     auto name = in.takeName();
     if (!name) {
       return in.err("expected export name");
@@ -52,20 +51,19 @@ MaybeResult<Action> action(Lexer& in) {
     if (!in.takeRParen()) {
       return in.err("expected end of invoke action");
     }
-    return InvokeAction{*name, *args};
+    return InvokeAction{id, *name, *args};
   }
 
   if (in.takeSExprStart("get"sv)) {
-    // TODO: Do we need to use this optional id?
-    in.takeID();
+    auto id = in.takeID();
     auto name = in.takeName();
     if (!name) {
       return in.err("expected export name");
     }
     if (!in.takeRParen()) {
       return in.err("expected end of get action");
     }
-    return GetAction{*name};
+    return GetAction{id, *name};
   }
 
   return {};
@@ -236,7 +234,7 @@ MaybeResult<AssertReturn> assertReturn(Lexer& in) {
 }
 
 // (assert_exception action)
-MaybeResult<AssertException> assertException(Lexer& in) {
+MaybeResult<AssertAction> assertException(Lexer& in) {
   if (!in.takeSExprStart("assert_exception"sv)) {
     return {};
   }
@@ -245,7 +243,7 @@ MaybeResult<AssertException> assertException(Lexer& in) {
   if (!in.takeRParen()) {
     return in.err("expected end of assert_exception");
   }
-  return AssertException{*a};
+  return AssertAction{ActionAssertionType::Exception, *a};
 }
 
 // (assert_exhaustion action msg)
@@ -266,7 +264,7 @@ MaybeResult<AssertAction> assertAction(Lexer& in) {
   if (!in.takeRParen()) {
     return in.err("expected end of assertion");
   }
-  return AssertAction{type, *a, *msg};
+  return AssertAction{type, *a};
 }
 
 // (assert_malformed module msg)
@@ -293,7 +291,7 @@ MaybeResult<AssertModule> assertModule(Lexer& in) {
   if (!in.takeRParen()) {
     return in.err("expected end of assertion");
   }
-  return AssertModule{type, *mod, *msg};
+  return AssertModule{type, *mod};
 }
 
 // (assert_trap action msg)
@@ -312,7 +310,7 @@ MaybeResult<Assertion> assertTrap(Lexer& in) {
     if (!in.takeRParen()) {
       return in.err("expected end of assertion");
     }
-    return Assertion{AssertAction{ActionAssertionType::Trap, *a, *msg}};
+    return Assertion{AssertAction{ActionAssertionType::Trap, *a}};
   }
   auto mod = wastModule(in);
   if (mod.getErr()) {
@@ -325,7 +323,7 @@ MaybeResult<Assertion> assertTrap(Lexer& in) {
   if (!in.takeRParen()) {
     return in.err("expected end of assertion");
   }
-  return Assertion{AssertModule{ModuleAssertionType::Trap, *mod, *msg}};
+  return Assertion{AssertModule{ModuleAssertionType::Trap, *mod}};
 }
 
 MaybeResult<Assertion> assertion(Lexer& in) {
@@ -391,24 +389,30 @@ Result<WASTCommand> command(Lexer& in) {
   return *mod;
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+
 Result<WASTScript> wast(Lexer& in) {
   WASTScript cmds;
   while (!in.empty()) {
+    size_t line = in.position().line;
     auto cmd = command(in);
     if (cmd.getErr() && cmds.empty()) {
       // The entire script might be a single module comprising a sequence of
       // module fields with a top-level `(module ...)`.
       auto wasm = std::make_shared<Module>();
       CHECK_ERR(parseModule(*wasm, in.buffer));
-      cmds.emplace_back(std::move(wasm));
+      cmds.push_back({WASTModule{std::move(wasm)}, line});
       return cmds;
     }
     CHECK_ERR(cmd);
-    cmds.emplace_back(std::move(*cmd));
+    cmds.push_back(ScriptEntry{std::move(*cmd), line});
   }
   return cmds;
 }
 
+#pragma GCC diagnostic pop
+
 } // anonymous namespace
 
 Result<WASTScript> parseScript(std::string_view in) {

diff --git a/src/parser/wat-parser.cpp b/src/parser/wat-parser.cpp
@@ -237,14 +237,6 @@ Result<> parseModule(Module& wasm, Lexer& lexer) {
   return doParseModule(wasm, lexer, true);
 }
 
-Result<Expression*> parseExpression(Module& wasm, Lexer& lexer) {
-  ParseDefsCtx ctx(lexer, wasm, {}, {}, {}, {}, {});
-  auto e = expr(ctx);
-  CHECK_ERR(e);
-  lexer = ctx.in;
-  return *e;
-}
-
 Result<Literal> parseConst(Lexer& lexer) {
   Module wasm;
   ParseDefsCtx ctx(lexer, wasm, {}, {}, {}, {}, {});

diff --git a/src/parser/wat-parser.h b/src/parser/wat-parser.h
@@ -34,14 +34,14 @@ Result<> parseModule(Module& wasm, Lexer& lexer);
 
 Result<Literal> parseConst(Lexer& lexer);
 
-Result<Expression*> parseExpression(Module& wasm, Lexer& lexer);
-
 struct InvokeAction {
+  std::optional<Name> base;
   Name name;
   Literals args;
 };
 
 struct GetAction {
+  std::optional<Name> base;
   Name name;
 };
 
@@ -68,19 +68,14 @@ using ExpectedResults = std::vector<ExpectedResult>;
 
 struct AssertReturn {
   Action action;
-  ExpectedResults results;
-};
-
-struct AssertException {
-  Action action;
+  ExpectedResults expected;
 };
 
-enum class ActionAssertionType { Trap, Exhaustion };
+enum class ActionAssertionType { Trap, Exhaustion, Exception };
 
 struct AssertAction {
   ActionAssertionType type;
   Action action;
-  std::string msg;
 };
 
 enum class QuotedModuleType { Text, Binary };
@@ -97,19 +92,22 @@ enum class ModuleAssertionType { Trap, Malformed, Invalid, Unlinkable };
 struct AssertModule {
   ModuleAssertionType type;
   WASTModule wasm;
-  std::string msg;
 };
 
-using Assertion =
-  std::variant<AssertReturn, AssertException, AssertAction, AssertModule>;
+using Assertion = std::variant<AssertReturn, AssertAction, AssertModule>;
 
 struct Register {
   Name name;
 };
 
 using WASTCommand = std::variant<WASTModule, Register, Action, Assertion>;
 
-using WASTScript = std::vector<WASTCommand>;
+struct ScriptEntry {
+  WASTCommand cmd;
+  size_t line;
+};
+
+using WASTScript = std::vector<ScriptEntry>;
 
 Result<WASTScript> parseScript(std::string_view in);