From 8b5f4d43f2277d82e765b171ec94d4f3c4c5279f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 21 Dec 2023 15:20:59 -0800 Subject: [PATCH 01/63] work --- src/passes/Precompute.cpp | 78 ++++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 4178342a2dc..a067d7b1663 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -27,16 +27,17 @@ // looked at. // -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "ir/iteration.h" +#include "ir/literal-utils.h" +#include "ir/local-graph.h" +#include "ir/manipulation.h" +#include "ir/properties.h" +#include "ir/utils.h" +#include "pass.h" +#include "support/unique_deferring_queue.h" +#include "wasm-builder.h" +#include "wasm-interpreter.h" +#include "wasm.h" namespace wasm { @@ -277,6 +278,7 @@ struct Precompute // try to evaluate this into a const Flow flow = precomputeExpression(curr); if (!canEmitConstantFor(flow.values)) { + tryToPartiallyPrecompute(curr); return; } if (flow.breaking()) { @@ -319,6 +321,62 @@ struct Precompute } } + // If we failed to precompute a constant, perhaps we can still precompute part + // of this expression. Specifically, consider this case: + // + // (A + // (select + // (B) + // (C) + // (condition) + // ) + // ) + // + // Perhaps we can compute A(B) and A(C). If so, we can emit a better select: + // + // (select + // (result of A-of-B) + // (result of A-of-C) + // (condition) + // ) + // ) + // + // Note that in general for code size we want to move operations *out* of + // selects and ifs (OptimizeInstructions does that), but here we are + // computing a constant to replace nonconstant code, and it is definitely + // worthwhile. + void tryToPartiallyPrecompute(Expression* curr) { + // TODO: only when optlevel = 3? measure speeds + + ChildIterator children(curr); + if (children.getNumChildren() != 1) { + return; + } + if (auto* select = children.getChild(0)->dynCast()) { // Copy the entire expression. Then in the copy, replace the select with // each of its arms and precompute those. - auto *copy = ExpressionManipulator::copy(curr, *getModule()); + auto* copy = ExpressionManipulator::copy(curr, *getModule()); ChildIterator copyChildren(copy); copyChildren.getChild(0) = select->ifTrue; auto ifTrue = precomputeExpression(copy); From 5106fe8100b4f8ae37678d6be788869c92dcf001 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 21 Dec 2023 15:58:45 -0800 Subject: [PATCH 06/63] fix --- src/passes/Precompute.cpp | 6 +++++ test/lit/passes/precompute-partial.wast | 30 +++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 255ef1566f2..2a466f2f5e2 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -350,6 +350,10 @@ struct Precompute void tryToPartiallyPrecompute(Expression* curr) { // TODO: only when optlevel = 3? measure speeds + if (curr->type == Type::unreachable) { + return; + } + ChildIterator children(curr); if (children.getNumChildren() != 1) { return; @@ -375,6 +379,8 @@ struct Precompute // We precomputed them both successfully! Apply them. select->ifTrue = ifTrue.getConstExpression(*getModule()); select->ifFalse = ifFalse.getConstExpression(*getModule()); + select->type = select->ifTrue->type; + assert(select->type == select->ifFalse->type); replaceCurrent(select); } } diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 434c53e4dcb..f2625b06e5e 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -56,7 +56,7 @@ (i32.const 2) ) - ;; CHECK: (func $simple-1 (type $1) (param $param i32) (result i32) + ;; CHECK: (func $simple-1 (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (i32.const 0) @@ -74,7 +74,7 @@ ) ) - ;; CHECK: (func $simple-2 (type $1) (param $param i32) (result i32) + ;; CHECK: (func $simple-2 (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 0) @@ -91,7 +91,7 @@ ) ) - ;; CHECK: (func $simple-3 (type $1) (param $param i32) (result i32) + ;; CHECK: (func $simple-3 (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (i32.const 1) @@ -108,7 +108,7 @@ ) ) - ;; CHECK: (func $simple-4 (type $1) (param $param i32) (result i32) + ;; CHECK: (func $simple-4 (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 1) @@ -124,4 +124,26 @@ ) ) ) + + ;; CHECK: (func $unreachable (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $unreachable (param $param i32) (result i32) + ;; We should ignore unreachable code like this. We would need to make sure + ;; to emit the proper type on the outside, and it's simpler to just defer + ;; this to DCE. + (i32.eqz + (select + (i32.const 0) + (i32.const 0) + (unreachable) + ) + ) + ) ) From e12d243747c0f64a95fa1094abc6c3958a49217a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 21 Dec 2023 16:02:59 -0800 Subject: [PATCH 07/63] test --- test/lit/passes/precompute-partial.wast | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index f2625b06e5e..7244cb1dc55 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -31,6 +31,9 @@ ;; We can apply the struct.get to the select arms: As the globals are all ;; immutable, we can read the function references from them, and emit a ;; select on those values, saving the struct.get operation entirely. + ;; + ;; Note that this test also checks updating the type of the select, which + ;; initially returned a struct, and afterwards returns a func. (struct.get $vtable 0 (select (global.get $A$vtable) From 55f9c7fa13936130241bf542f9d2e484f4c5dce6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 21 Dec 2023 16:42:30 -0800 Subject: [PATCH 08/63] fix --- src/passes/Precompute.cpp | 19 ++++++++++++++++ test/lit/passes/precompute-partial.wast | 30 +++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 2a466f2f5e2..f62a2c9dc39 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -350,10 +350,29 @@ struct Precompute void tryToPartiallyPrecompute(Expression* curr) { // TODO: only when optlevel = 3? measure speeds + // Avoid fixing up unreachable code here; leave it for DCE. if (curr->type == Type::unreachable) { return; } + // In normal precomputing we replace the entire expression with the result. + // Here we only precompute part of the expression, so we can have dangling + // references like this: + // + // (block $name + // (select + // .. + // .. + // (block + // (br_if $target + // + // That is, the condition can refer to a control flow structure, and the + // condition would remain in the output. Ignore all control flow for + // simplicity. + if (Properties::isControlFlowStructure(curr)) { + return; + } + ChildIterator children(curr); if (children.getNumChildren() != 1) { return; diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 7244cb1dc55..586e99e99ab 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -149,4 +149,34 @@ ) ) ) + + ;; CHECK: (func $control-flow (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (block $target (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (br_if $target + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $control-flow (param $param i32) (result i32) + ;; We ignore control flow structures to avoid issues with removing them but + ;; still having references to their name. + (block $target (result i32) + (select + (i32.const 0) + ;; Something nonconstant to avoid the entire testcase becoming trivial. + (local.get $param) + ;; If we precomputed the block into the select arms, this br_if + ;; would have nowhere to go. + (br_if $target + (i32.const 1) + (local.get $param) + ) + ) + ) + ) ) From 63a843f757503bcd648bd1fafc8fae8d2f3309eb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 2 Jan 2024 10:23:46 -0800 Subject: [PATCH 09/63] fix --- src/passes/Precompute.cpp | 24 ++++++------ test/lit/passes/precompute-partial.wast | 51 +++++++++++++++++++++---- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index f62a2c9dc39..6159f9006a7 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -324,7 +324,7 @@ struct Precompute } // If we failed to precompute a constant, perhaps we can still precompute part - // of this expression. Specifically, consider this case: + // of an expression. Specifically, consider this case: // // (A // (select @@ -337,16 +337,15 @@ struct Precompute // Perhaps we can compute A(B) and A(C). If so, we can emit a better select: // // (select - // (result of A-of-B) - // (result of A-of-C) + // (constant result of A(B)) + // (constant result of A(C)) // (condition) // ) // ) // // Note that in general for code size we want to move operations *out* of // selects and ifs (OptimizeInstructions does that), but here we are - // computing a constant to replace nonconstant code, and it is definitely - // worthwhile. + // computing two constant which replace four expressions, so it is worthwhile. void tryToPartiallyPrecompute(Expression* curr) { // TODO: only when optlevel = 3? measure speeds @@ -361,22 +360,26 @@ struct Precompute // // (block $name // (select - // .. - // .. - // (block + // (B) + // (C) + // (block ;; condition // (br_if $target // // That is, the condition can refer to a control flow structure, and the - // condition would remain in the output. Ignore all control flow for - // simplicity. + // condition remain in the output, so if we remove that structure - which + // can happen if it is A in the comment above - then we have a problem. + // Ignore all control flow for simplicity, as they don't control and useful + // computation we want to precompute away anyhow. if (Properties::isControlFlowStructure(curr)) { return; } + // We are lookin ChildIterator children(curr); if (children.getNumChildren() != 1) { return; } + if (auto* select = children.getChild(0)->dynCast(); + if (!select) { + return; + } - if (auto* select = children.getChild(0)->dynCast(); if (!select) { return; @@ -404,6 +413,7 @@ struct Precompute // Copy the entire expression. Then in the copy, replace the select with // each of its arms and precompute those. + // TODO: must we copy? Can we not replace twice or thrice? auto* copy = ExpressionManipulator::copy(curr, *getModule()); ChildIterator copyChildren(copy); copyChildren.getChild(0) = select->ifTrue; From 7efd92888b25f0e5cc60b46f2f7a7db3b321c595 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 11:16:01 -0800 Subject: [PATCH 18/63] wild --- src/passes/Precompute.cpp | 127 +++++++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 8 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 08d5c532d89..e198417dfdd 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -215,6 +215,7 @@ struct Precompute void doWalkFunction(Function* func) { // Walk the function and precompute things. super::doWalkFunction(func); + partiallyPrecompute(func); if (!propagate) { return; } @@ -228,6 +229,7 @@ struct Precompute // another walk to apply them and perhaps other optimizations that are // unlocked. super::doWalkFunction(func); + partiallyPrecompute(func); } // Note that in principle even more cycles could find further work here, in // very rare cases. To avoid constructing a LocalGraph again just for that @@ -285,7 +287,7 @@ struct Precompute if (flow.breakTo == NONCONSTANT_FLOW) { // This cannot be turned into a constant, but perhaps we can partially // precompute it. - tryToPartiallyPrecompute(curr); + considerPartiallyPrecomputing(curr); return; } if (flow.breakTo == RETURN_FLOW) { @@ -348,13 +350,122 @@ struct Precompute // selects and ifs (OptimizeInstructions does that), but here we are // computing two constant which replace four expressions, so it is worthwhile. // - // XXX OR: use an ExpressionStackWalker if we see a select. Then as we go, keep - // the select in mind, and precompute two versions when it is a child of ours - // (enumerate all parents when we get to the select). Each version has - // (block (drop condition) value1or2). If we precompute both, replace this - // with that condition over those values. Ignore more than 1 nested select to - // avoid N^2 - void tryToPartiallyPrecompute(Expression* curr) { + // To do such partial precomputing, in the main pass we note selects that look + // promising. If we find any then we do a second pass later just for that (as + // doing so requires walking up the stack in a manner that we want to avoid in + // main pass, for overhead reasons; see below). + // + // Note that selects are all we really need here: Other passes would turn an + // if into a select if the arms are simple enough, and only in those cases + // (simple arms) do we have a chance at partially precomputing. (For example, + // if an arm is a constant then we can, but if it is a call then we can't.) + std::unordered_set partiallyPrecomputable; + + void considerPartiallyPrecomputing(Expression* curr) { + if (auto* select = curr->dynCast()) { + // This is another select in the middle somewhere; give up. + return; + } + } + + // This select looks promising, note it and its stack. + stackMap[curr] = expressionStack; + } + } + } stackFinder(*this); + stackFinder.walkFunction(func); + + // TODO determinism + + for (auto& [select, stack] : stackFinder.stackMap) { + // Each stack ends in the select. + assert(stack.back() == select); + + // Don't + } + + // We can only optimize concrete types, so that the select has something to // return. if (!curr->type.isConcrete()) { From 8c63b82417e18d3bfa229719561e7f1b5f65d208 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 11:27:29 -0800 Subject: [PATCH 19/63] more --- src/passes/Precompute.cpp | 83 ++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index e198417dfdd..54c47de1527 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -435,15 +435,16 @@ struct Precompute void visitSelect(Select* curr) { if (parent.partiallyPrecomputable.count(curr)) { - // This is a relevant select. Note the stack, but also check if it is - // nested in another select, which is a case that for simplicity we - // don't handle yet TODO - assert(expressionStack.back() == select); + if (expressionStack.size() == 1) { + // There is nothing above this select, so nothing we can precompute + // into it. + return; + } + + // Check if this is nested in another select, which is a case that for + // simplicity we don't handle yet TODO for (auto* item : expressionStack) { - if (item == select) { - continue; - } - if (item->is()) { // This is another select in the middle somewhere; give up. return; } @@ -459,38 +460,50 @@ struct Precompute // TODO determinism for (auto& [select, stack] : stackFinder.stackMap) { - // Each stack ends in the select. + // Each stack ends in the select itself, and contains more than the select + // itself. assert(stack.back() == select); + assert(stack.size() >= 2); + + // Go up through the parents, until we can't do any more work. + Index i = stack.size() - 2; + while (1) { + auto* parent = stack[i]; + if (!parent->type.isConcrete()) { + // The parent does not have a concrete type, so we can't move it + // into the select: the select needs a concrete type. (For example, + // if the parent is a drop or is unreachable, those are things we + // don't want to handle.) We stop here, as once we see one such + // parent we can't expect to make any more progress. + break; + } - // Don't - } + // We are precomputing the select arms, but leaving the condition as-is. + // If the condition breaks to the parent, then we can't move the parent + // into the select arms: + // + // (block $name ;; this must stack outside of the select + // (select + // (B) + // (C) + // (block ;; condition + // (br_if $target + // + // Ignore all control flow for simplicity, as they aren't interesting + // for us (other passes remove them when possibly anyhow), and assume + // we can't do anything else later. + if (Properties::isControlFlowStructure(parent)) { + break; + } + // Continue to the next parent, if there is one. + if (i == 0) { + break; + } + i--; + } - // We can only optimize concrete types, so that the select has something to - // return. - if (!curr->type.isConcrete()) { - return; - } - // In normal precomputing we replace the entire expression with the result. - // Here we only precompute part of the expression, so we can have dangling - // references like this: - // - // (block $name - // (select - // (B) - // (C) - // (block ;; condition - // (br_if $target - // - // That is, the condition can refer to a control flow structure, and the - // condition remain in the output, so if we remove that structure - which - // can happen if it is A in the comment above - then we have a problem. - // Ignore all control flow for simplicity, as they don't control and useful - // computation we want to precompute away anyhow. - if (Properties::isControlFlowStructure(curr)) { - return; - } // We are looking for a single child which is a select. ChildIterator children(curr); From 6a04aedd23e0ac10c1869dabefefc269082c00a1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 12:56:48 -0800 Subject: [PATCH 20/63] more --- src/passes/Precompute.cpp | 135 ++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 58 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 54c47de1527..90a9ed9d32c 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -425,6 +425,11 @@ struct Precompute // apply the outer struct.get we arrive at the outer struct.new, which is in // fact the global $C, and we succeed. void partiallyPrecompute(Function* func) { + if (partiallyPrecomputable.empty()) { + // Nothing to do. + return; + } + // Walk the function to find the parent stacks of the selects. struct StackFinder : public ExpressionStackWalker { Precompute& parent; @@ -459,16 +464,24 @@ struct Precompute // TODO determinism - for (auto& [select, stack] : stackFinder.stackMap) { + for (auto& [select_, stack] : stackFinder.stackMap) { + // We will be iterating on the select as we go (see below), so use a new + // variable. + auto* select = select_; + // Each stack ends in the select itself, and contains more than the select // itself. assert(stack.back() == select); assert(stack.size() >= 2); + // The select must have a parent. + Index selectIndex = stack.size() - 1; + assert(selectIndex >= 1); + // Go up through the parents, until we can't do any more work. - Index i = stack.size() - 2; + Index parentIndex = stack.size() - 2; while (1) { - auto* parent = stack[i]; + auto* parent = stack[parentIndex]; if (!parent->type.isConcrete()) { // The parent does not have a concrete type, so we can't move it // into the select: the select needs a concrete type. (For example, @@ -496,68 +509,48 @@ struct Precompute break; } + // This looks promising, so try to precompute here. What we do is + // precompute twice, once with the select replaced with the left arm, + // and once with the right. If both succeed then we can create a new + // select (with the same condition as before) whose arms are the + // precomputed values. + //auto* originalIfTrue = select->ifTrue; + //auto* originalIfFalse = select->ifFalse; + + // Find the pointer to the select in its immediate parent. + auto** pointerToSelect = getChildPointerInParent(stack, selectIndex, func); + *pointerToSelect = select->ifTrue; + auto ifTrue = precomputeExpression(copy); + // TODO: We could handle breaks here perhaps, and remove the isConcrete + // check in that case. + if (canEmitConstantFor(ifTrue.values) && ifTrue.values.isConcrete()) { + *pointerToSelect = select->ifFalse; + auto ifFalse = precomputeExpression(copy); + if (canEmitConstantFor(ifFalse.values) && + ifFalse.values.isConcrete()) { + // Wonderful, we can precompute here! The select can now contain the + // computed values in its arms. + select->ifTrue = ifTrue.getConstExpression(*getModule()); + select->ifFalse = ifFalse.getConstExpression(*getModule()); + select->finalize(); + + // And the parent of the select is replaced by the select. + auto** pointerToParent = getChildPointerInParent(stack, parentIndex, func); + *pointerToParent = select; + + // Update state for further iterations, as we may push this select + // even further in the parents. + selectIndex = parentIndex; + } + } + // Continue to the next parent, if there is one. if (i == 0) { break; } i--; } - - - - // We are looking for a single child which is a select. - ChildIterator children(curr); - if (children.getNumChildren() != 1) { - return; - } - // TODO: maybe only do this if the select arms have depth 1? They must be - // constant, or global, for us to have any hope..? avoids N^2 too - auto* select = children.getChild(0)->dynCast()) { // We only have a reasonable hope of success if the select arms are things - // like constants or global gets. - auto isValid = [](Expression* arm) { - // TODO: more? - return arm->is() || arm->is() || arm->is(); - }; - if (isValid(select->ifTrue) && isValid(select->ifFalse)) { + // like constants or global gets. At a first approximation, allow the set + // of things we allow in constant initializers (but we can probably allow + // more here TODO). + auto& wasm = *getModule(); + if (Properties::isValidConstantExpression(wasm, select->ifTrue) && + Properties::isValidConstantExpression(wasm, select->ifFalse)) { partiallyPrecomputable.insert(select); } } diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 56a093ce915..c353792c35e 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -100,17 +100,16 @@ ) ;; CHECK: (func $test-trap (type $1) (param $x i32) (result funcref) - ;; CHECK-NEXT: (struct.get $vtable 0 - ;; CHECK-NEXT: (select (result (ref null $vtable)) + ;; CHECK-NEXT: (block ;; (replaces something unreachable we can't emit) + ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) - ;; CHECK-NEXT: (global.get $B$vtable) - ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test-trap (export "test-trap") (param $x i32) (result funcref) - ;; One arm has a null, which makes the struct.get trap, so there is nothing - ;; to optimize here. + ;; One arm has a null, which makes the struct.get trap, so we can turn this + ;; into an unreachable. (struct.get $vtable 0 (select (ref.null $vtable) From 4e6b174d7a4a1e73177ea76571ee31570912d4f5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 13:42:40 -0800 Subject: [PATCH 26/63] work --- test/lit/passes/precompute-partial.wast | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index c353792c35e..7c0735a20ed 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -99,6 +99,38 @@ ) ) + ;; CHECK: (func $test-expanded-twice-stop (type $6) (param $x i32) + ;; CHECK-NEXT: (call $send-i32 + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-expanded-twice-stop (export "test-expanded-twice-stop") (param $x i32) + ;; As $test-expanded-twice, but we stop after two expansions when we fail on + ;; the call. + (call $send-i32 + (ref.is_null + (struct.get $vtable 0 + (select + (global.get $A$vtable) + (global.get $B$vtable) + (local.get $x) + ) + ) + ) + ) + ) + + ;; CHECK: (func $send-i32 (type $6) (param $x i32) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) + (func $send-i32 (param $x i32) + ;; Helper for above. + ) + ;; CHECK: (func $test-trap (type $1) (param $x i32) (result funcref) ;; CHECK-NEXT: (block ;; (replaces something unreachable we can't emit) ;; CHECK-NEXT: (drop From 0ff3f9650af5a8cbff6e55fdcd815b0d813a9a7a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 13:43:14 -0800 Subject: [PATCH 27/63] work --- test/lit/passes/precompute-partial.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 7c0735a20ed..33c65536a26 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. -;; RUN: wasm-opt %s --precompute-propagate -all -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --precompute-propagate -all -S -o - | filecheck %s (module (rec From c84e0d6a24ef7172d3f035eb0b45e0c3d4627e6c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 13:44:02 -0800 Subject: [PATCH 28/63] work --- test/lit/passes/precompute-partial.wast | 255 ++++++++++++------------ 1 file changed, 129 insertions(+), 126 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 33c65536a26..f74fe997d5c 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -2,6 +2,129 @@ ;; RUN: foreach %s %t wasm-opt --precompute-propagate -all -S -o - | filecheck %s +(module + ;; CHECK: (func $simple-1 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-1 (param $param i32) (result i32) + ;; Test simple i32 operations. + (i32.eqz + (select + (i32.const 42) + (i32.const 1337) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $simple-2 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-2 (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 0) + (i32.const 10) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $simple-3 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-3 (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 20) + (i32.const 0) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $simple-4 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $simple-4 (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 0) + (i32.const 0) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $unreachable (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $unreachable (param $param i32) (result i32) + ;; We should ignore unreachable code like this. We would need to make sure + ;; to emit the proper type on the outside, and it's simpler to just defer + ;; this to DCE. + (i32.eqz + (select + (i32.const 0) + (i32.const 0) + (unreachable) + ) + ) + ) + + ;; CHECK: (func $control-flow (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (block $target (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (br_if $target + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $control-flow (param $param i32) (result i32) + ;; We ignore control flow structures to avoid issues with removing them (as + ;; the condition might refer to them, as in this testcase). + (block $target (result i32) + (select + (i32.const 0) + (i32.const 1) + ;; If we precomputed the block into the select arms, this br_if + ;; would have nowhere to go. + (br_if $target + (i32.const 1) + (local.get $param) + ) + ) + ) + ) +) + +;; References. (module (rec ;; CHECK: (rec @@ -32,7 +155,7 @@ (ref.func $B$func) )) - ;; CHECK: (func $test-expanded (type $1) (param $x i32) (result funcref) + ;; CHECK: (func $test-expanded (type $0) (param $x i32) (result funcref) ;; CHECK-NEXT: (select (result (ref $specific-func)) ;; CHECK-NEXT: (ref.func $A$func) ;; CHECK-NEXT: (ref.func $B$func) @@ -55,7 +178,7 @@ ) ) - ;; CHECK: (func $test-subtyping (type $1) (param $x i32) (result funcref) + ;; CHECK: (func $test-subtyping (type $0) (param $x i32) (result funcref) ;; CHECK-NEXT: (select (result (ref $specific-func)) ;; CHECK-NEXT: (ref.func $A$func) ;; CHECK-NEXT: (ref.func $B$func) @@ -78,7 +201,7 @@ ) ) - ;; CHECK: (func $test-expanded-twice (type $0) (param $x i32) (result i32) + ;; CHECK: (func $test-expanded-twice (type $6) (param $x i32) (result i32) ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (i32.const 0) @@ -99,7 +222,7 @@ ) ) - ;; CHECK: (func $test-expanded-twice-stop (type $6) (param $x i32) + ;; CHECK: (func $test-expanded-twice-stop (type $5) (param $x i32) ;; CHECK-NEXT: (call $send-i32 ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 0) @@ -124,14 +247,14 @@ ) ) - ;; CHECK: (func $send-i32 (type $6) (param $x i32) + ;; CHECK: (func $send-i32 (type $5) (param $x i32) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $send-i32 (param $x i32) ;; Helper for above. ) - ;; CHECK: (func $test-trap (type $1) (param $x i32) (result funcref) + ;; CHECK: (func $test-trap (type $0) (param $x i32) (result funcref) ;; CHECK-NEXT: (block ;; (replaces something unreachable we can't emit) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.null none) @@ -166,124 +289,4 @@ ;; Helper for above. (i32.const 2) ) - - ;; CHECK: (func $simple-1 (type $0) (param $param i32) (result i32) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (local.get $param) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $simple-1 (param $param i32) (result i32) - ;; Test simple i32 operations as well. - (i32.eqz - (select - (i32.const 42) - (i32.const 1337) - (local.get $param) - ) - ) - ) - - ;; CHECK: (func $simple-2 (type $0) (param $param i32) (result i32) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (local.get $param) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $simple-2 (param $param i32) (result i32) - (i32.eqz - (select - (i32.const 0) - (i32.const 10) - (local.get $param) - ) - ) - ) - - ;; CHECK: (func $simple-3 (type $0) (param $param i32) (result i32) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $param) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $simple-3 (param $param i32) (result i32) - (i32.eqz - (select - (i32.const 20) - (i32.const 0) - (local.get $param) - ) - ) - ) - - ;; CHECK: (func $simple-4 (type $0) (param $param i32) (result i32) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $param) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $simple-4 (param $param i32) (result i32) - (i32.eqz - (select - (i32.const 0) - (i32.const 0) - (local.get $param) - ) - ) - ) - - ;; CHECK: (func $unreachable (type $0) (param $param i32) (result i32) - ;; CHECK-NEXT: (i32.eqz - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $unreachable (param $param i32) (result i32) - ;; We should ignore unreachable code like this. We would need to make sure - ;; to emit the proper type on the outside, and it's simpler to just defer - ;; this to DCE. - (i32.eqz - (select - (i32.const 0) - (i32.const 0) - (unreachable) - ) - ) - ) - - ;; CHECK: (func $control-flow (type $0) (param $param i32) (result i32) - ;; CHECK-NEXT: (block $target (result i32) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (br_if $target - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $param) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $control-flow (param $param i32) (result i32) - ;; We ignore control flow structures to avoid issues with removing them (as - ;; the condition might refer to them, as in this testcase). - (block $target (result i32) - (select - (i32.const 0) - (i32.const 1) - ;; If we precomputed the block into the select arms, this br_if - ;; would have nowhere to go. - (br_if $target - (i32.const 1) - (local.get $param) - ) - ) - ) - ) ) From dde94ae02c80e90d61bc8ade1a44f44d291b8dba Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 13:48:28 -0800 Subject: [PATCH 29/63] almost --- test/lit/passes/precompute-partial.wast | 52 +++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index f74fe997d5c..bf7f49d5962 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -290,3 +290,55 @@ (i32.const 2) ) ) + +;; References with nesting. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $vtable (sub (struct (field funcref)))) + (type $vtable (sub (struct funcref))) + + (type $itable (sub (struct (ref $vtable)))) + ) + + ;; CHECK: (global $A$vtable (ref $vtable) (struct.new $vtable + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: )) + (global $A$itable (ref $itable) (struct.new $itable + (struct.new $vtable + (ref.func $A$func) + ) + )) + + ;; CHECK: (global $B$vtable (ref $vtable) (struct.new $vtable + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: )) + (global $B$itable (ref $itable) (struct.new $itable + (struct.new $vtable + (ref.func $B$func) + ) + )) + + (func $test-expanded (export "test-expanded") (param $x i32) (result funcref) + ;; Nesting in the global declarations means we cannot precompute the inner + ;; struct.get by itself (as that would return the inner struct.new), but + ;; when we do the outer struct.get, it all comes together. + (struct.get $vtable 0 + (struct.get $itable 0 + (select + (global.get $A$itable) + (global.get $B$itable) + (local.get $x) + ) + ) + ) + ) + + (func $A$func + ;; Helper for above. + ) + + (func $B$func + ;; Helper for above. + ) +) From e825d06fe7d12eb6d2ee54916ab0b449f583f3a3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 14:02:22 -0800 Subject: [PATCH 30/63] work --- src/passes/Precompute.cpp | 15 ++++++---- test/lit/passes/precompute-partial.wast | 37 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 24ad76a6f60..1b854e602b2 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -229,12 +229,13 @@ struct Precompute // another walk to apply them and perhaps other optimizations that are // unlocked. super::doWalkFunction(func); - partiallyPrecompute(func); + // We could also try to partially precompute again, but that is a somewhat + // heavy operation, so we only do it the first time, and leave such things + // for later runs of this pass and for --converge. } // Note that in principle even more cycles could find further work here, in // very rare cases. To avoid constructing a LocalGraph again just for that - // unlikely chance, we leave such things for later runs of this pass and for - // --converge. + // unlikely chance, we leave such things for later. } template void reuseConstantNode(T* curr, Flow flow) { @@ -544,8 +545,12 @@ struct Precompute } } - // Whether we succeeded to precompute here or not, continue to the next - // parent, if there is one. + // Whether we succeeded to precompute here or not, restore the parent's + // pointer to its original state (if we precomputed, the parent is no + // longer in use, but there is no harm in modifying it). + *pointerToSelect = select; + + // Continue to the outer parent, if there is one. if (parentIndex == 0) { break; } diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index bf7f49d5962..8dca5f3c4a9 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -255,16 +255,17 @@ ) ;; CHECK: (func $test-trap (type $0) (param $x i32) (result funcref) - ;; CHECK-NEXT: (block ;; (replaces something unreachable we can't emit) - ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $vtable 0 + ;; CHECK-NEXT: (select (result (ref null $vtable)) ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: (global.get $B$vtable) + ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test-trap (export "test-trap") (param $x i32) (result funcref) - ;; One arm has a null, which makes the struct.get trap, so we can turn this - ;; into an unreachable. + ;; One arm has a null, which makes the struct.get trap, so we ignore this + ;; for now. TODO: handle traps and breaks (struct.get $vtable 0 (select (ref.null $vtable) @@ -298,11 +299,14 @@ ;; CHECK-NEXT: (type $vtable (sub (struct (field funcref)))) (type $vtable (sub (struct funcref))) + ;; CHECK: (type $itable (sub (struct (field (ref $vtable))))) (type $itable (sub (struct (ref $vtable)))) ) - ;; CHECK: (global $A$vtable (ref $vtable) (struct.new $vtable - ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK: (global $A$itable (ref $itable) (struct.new $itable + ;; CHECK-NEXT: (struct.new $vtable + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: )) (global $A$itable (ref $itable) (struct.new $itable (struct.new $vtable @@ -310,8 +314,10 @@ ) )) - ;; CHECK: (global $B$vtable (ref $vtable) (struct.new $vtable - ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK: (global $B$itable (ref $itable) (struct.new $itable + ;; CHECK-NEXT: (struct.new $vtable + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: )) (global $B$itable (ref $itable) (struct.new $itable (struct.new $vtable @@ -319,6 +325,13 @@ ) )) + ;; CHECK: (func $test-expanded (type $3) (param $x i32) (result funcref) + ;; CHECK-NEXT: (select (result (ref $2)) + ;; CHECK-NEXT: (ref.func $A$func) + ;; CHECK-NEXT: (ref.func $B$func) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $test-expanded (export "test-expanded") (param $x i32) (result funcref) ;; Nesting in the global declarations means we cannot precompute the inner ;; struct.get by itself (as that would return the inner struct.new), but @@ -334,10 +347,16 @@ ) ) + ;; CHECK: (func $A$func (type $2) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) (func $A$func ;; Helper for above. ) + ;; CHECK: (func $B$func (type $2) + ;; CHECK-NEXT: (nop) + ;; CHECK-NEXT: ) (func $B$func ;; Helper for above. ) From df8d86f52c04d14f529a052f3ab17ac2e50eeb84 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 14:06:42 -0800 Subject: [PATCH 31/63] test --- test/lit/passes/precompute-partial.wast | 68 +++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 8dca5f3c4a9..91024cca6c3 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -72,6 +72,74 @@ ) ) + ;; CHECK: (func $binary (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: (i32.const 21) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary (param $param i32) (result i32) + (i32.add + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + (i32.const 1) + ) + ) + + ;; CHECK: (func $binary-2 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 11) + ;; CHECK-NEXT: (i32.const 21) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary-2 (param $param i32) (result i32) + ;; As above but with the select in the other arm. + (i32.add + (i32.const 1) + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $binary-3 (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.add + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: (i32.const 40) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary-3 (param $param i32) (result i32) + ;; Two selects. We do not optimize here (but in theory could, as the + ;; condition is identical - other passes should merge that). + (i32.add + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + (select + (i32.const 30) + (i32.const 40) + (local.get $param) + ) + ) + ) + ;; CHECK: (func $unreachable (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (i32.eqz ;; CHECK-NEXT: (select From 01a1fad57714e28cbad2a6ca91266a4febda4fc9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 14:08:57 -0800 Subject: [PATCH 32/63] test --- test/lit/passes/precompute-partial.wast | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 91024cca6c3..c6dba00ed20 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -269,7 +269,7 @@ ) ) - ;; CHECK: (func $test-expanded-twice (type $6) (param $x i32) (result i32) + ;; CHECK: (func $test-expanded-twice (type $5) (param $x i32) (result i32) ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (i32.const 0) @@ -290,7 +290,7 @@ ) ) - ;; CHECK: (func $test-expanded-twice-stop (type $5) (param $x i32) + ;; CHECK: (func $test-expanded-twice-stop (type $6) (param $x i32) ;; CHECK-NEXT: (call $send-i32 ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 0) @@ -315,13 +315,32 @@ ) ) - ;; CHECK: (func $send-i32 (type $5) (param $x i32) + ;; CHECK: (func $send-i32 (type $6) (param $x i32) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) (func $send-i32 (param $x i32) ;; Helper for above. ) + ;; CHECK: (func $binary (type $5) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $binary (param $param i32) (result i32) + ;; (Note that this works because immutable globals can be compared.) + (ref.eq + (select + (global.get $A$vtable) + (global.get $B$vtable) + (local.get $param) + ) + (global.get $A$vtable) + ) + ) + ;; CHECK: (func $test-trap (type $0) (param $x i32) (result funcref) ;; CHECK-NEXT: (struct.get $vtable 0 ;; CHECK-NEXT: (select (result (ref null $vtable)) From 35984d79d110d341bc8ad60ada601bc056f5feac Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 15:27:01 -0800 Subject: [PATCH 33/63] fix --- src/passes/Precompute.cpp | 12 ++++++------ test/lit/passes/precompute-partial.wast | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 1b854e602b2..14a4a561ce9 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -478,12 +478,12 @@ struct Precompute Index parentIndex = selectIndex - 1; while (1) { auto* parent = stack[parentIndex]; - if (!parent->type.isConcrete()) { - // The parent does not have a concrete type, so we can't move it - // into the select: the select needs a concrete type. (For example, - // if the parent is a drop or is unreachable, those are things we - // don't want to handle.) We stop here, as once we see one such - // parent we can't expect to make any more progress. + // If the parent lacks a concrete type then we can't move it into the + // select: the select needs a concrete (and non-tuple) type. For example + // if the parent is a drop or is unreachable, those are things we don't + // want to handle, and we stop here (once we see one such parent we + // can't expect to make any more progress). + if (!parent->type.isConcrete() || parent->type.isTuple()) { break; } diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index c6dba00ed20..ad0bc117428 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -162,6 +162,28 @@ ) ) + ;; CHECK: (func $tuple (type $1) (param $param i32) (result i32 i32) + ;; CHECK-NEXT: (tuple.make 2 + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $tuple (param $param i32) (result i32 i32) + ;; We should ignore tuples, as select outputs cannot be tuples. + (tuple.make 2 + (select + (i32.const 0) + (i32.const 1) + (local.get $param) + ) + (i32.const 2) + ) + ) + ;; CHECK: (func $control-flow (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (block $target (result i32) ;; CHECK-NEXT: (select From 2731967c3ed2e61e96c4830ed2118ab35bcf3ace Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 16:29:50 -0800 Subject: [PATCH 34/63] badd --- test/lit/passes/precompute-partial.wast | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index ad0bc117428..e595ea55450 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -212,6 +212,34 @@ ) ) ) + + ;; CHECK: (func $break (type $0) (param $x i32) (result i32) + ;; CHECK-NEXT: (block $label$1 (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $break (param $x i32) (result i32) + (block $label$1 (result i32) + (drop + (br_if $label$1 + (select + (i32.const 0) + (i32.const 1) + (local.get $x) + ) + (i32.const 2) + ) + ) + (i32.const 3) + ) + ) ) ;; References. From 5b782a54f2a460dff0fd3c624ceee1fe9853a46d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 16:44:37 -0800 Subject: [PATCH 35/63] fix --- src/literal.h | 6 +++--- src/passes/Precompute.cpp | 13 ++++++++----- src/wasm-interpreter.h | 2 +- test/lit/passes/precompute-partial.wast | 11 +++++++---- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/literal.h b/src/literal.h index b484fc3b8ed..971cc8b3e0e 100644 --- a/src/literal.h +++ b/src/literal.h @@ -698,7 +698,7 @@ class Literals : public SmallVector { } Literals(size_t initialSize) : SmallVector(initialSize) {} - Type getType() { + Type getType() const { if (empty()) { return Type::none; } @@ -711,8 +711,8 @@ class Literals : public SmallVector { } return Type(types); } - bool isNone() { return size() == 0; } - bool isConcrete() { return size() != 0; } + bool isNone() const { return size() == 0; } + bool isConcrete() const { return size() != 0; } }; std::ostream& operator<<(std::ostream& o, wasm::Literal literal); diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 14a4a561ce9..0ad638fc803 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -510,6 +510,12 @@ struct Precompute // and once with the right. If both succeed then we can create a new // select (with the same condition as before) whose arms are the // precomputed values. + auto isValidPrecomputation = [&](const Flow& flow) { + // For now we handle simple concrete values. We could also handle + // breaks in principle TODO + return canEmitConstantFor(flow.values) && !flow.breaking() && + flow.values.isConcrete(); + }; // Find the pointer to the select in its immediate parent so that we can // replace it first with one arm and then the other. @@ -517,13 +523,10 @@ struct Precompute getChildPointerInImmediateParent(stack, selectIndex, func); *pointerToSelect = select->ifTrue; auto ifTrue = precomputeExpression(parent); - // TODO: We could handle breaks here perhaps, and remove the isConcrete - // check in that case. - if (canEmitConstantFor(ifTrue.values) && ifTrue.values.isConcrete()) { + if (isValidPrecomputation(ifTrue)) { *pointerToSelect = select->ifFalse; auto ifFalse = precomputeExpression(parent); - if (canEmitConstantFor(ifFalse.values) && - ifFalse.values.isConcrete()) { + if (isValidPrecomputation(ifFalse)) { // Wonderful, we can precompute here! The select can now contain the // computed values in its arms. select->ifTrue = ifTrue.getConstExpression(*getModule()); diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index eb0a1274abc..eeca7c3ccde 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -80,7 +80,7 @@ class Flow { return builder.makeConstantExpression(values); } - bool breaking() { return breakTo.is(); } + bool breaking() const { return breakTo.is(); } void clearIf(Name target) { if (breakTo == target) { diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index e595ea55450..7333e30678f 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -216,10 +216,13 @@ ;; CHECK: (func $break (type $0) (param $x i32) (result i32) ;; CHECK-NEXT: (block $label$1 (result i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (br_if $label$1 + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 3) From 634db0337f0f5c60c05a6455cfc24be5281fed74 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 16:45:23 -0800 Subject: [PATCH 36/63] work --- test/lit/passes/precompute-partial.wast | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 7333e30678f..512ea25903d 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -214,9 +214,9 @@ ) ;; CHECK: (func $break (type $0) (param $x i32) (result i32) - ;; CHECK-NEXT: (block $label$1 (result i32) + ;; CHECK-NEXT: (block $label (result i32) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (br_if $label$1 + ;; CHECK-NEXT: (br_if $label ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (i32.const 1) @@ -229,9 +229,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $break (param $x i32) (result i32) - (block $label$1 (result i32) + (block $label (result i32) (drop - (br_if $label$1 + (br_if $label (select (i32.const 0) (i32.const 1) From 135b0ffae1fcc7f2fbb00eb4d49e294d088b1408 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 4 Jan 2024 16:46:02 -0800 Subject: [PATCH 37/63] work --- test/lit/passes/precompute-partial.wast | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 512ea25903d..e04afb28552 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -229,6 +229,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $break (param $x i32) (result i32) + ;; We should change nothing here: we cannot precompute breaks yet TODO (block $label (result i32) (drop (br_if $label @@ -405,7 +406,7 @@ ;; CHECK-NEXT: ) (func $test-trap (export "test-trap") (param $x i32) (result funcref) ;; One arm has a null, which makes the struct.get trap, so we ignore this - ;; for now. TODO: handle traps and breaks + ;; for now. TODO: handle traps (struct.get $vtable 0 (select (ref.null $vtable) From a9f0abcccfa6b6daeae5eeaf24a468fb7fb045e1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 09:25:25 -0800 Subject: [PATCH 38/63] work --- test/lit/passes/precompute-partial.wast | 46 +++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index e04afb28552..137d5c87cd8 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -244,6 +244,52 @@ (i32.const 3) ) ) + + ;; CHECK: (func $toplevel (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $toplevel (param $param i32) (result i32) + ;; There is nothing to do for a select with no parent, but do not error at + ;; least. + (select + (i32.const 10) + (i32.const 20) + (local.get $param) + ) + ) + + ;; CHECK: (func $nested (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $nested (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 0) + (i32.const 10) + (i32.eqz + (select + (i32.const 0) + (i32.const 10) + (local.get $param) + ) + ) + ) + ) + ) ) ;; References. From 5c2fe38dee2bedb4b7cc8854de4560ad24a28453 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 09:34:32 -0800 Subject: [PATCH 39/63] work --- src/passes/Precompute.cpp | 26 ++--- test/lit/passes/precompute-partial.wast | 125 ++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 25 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 0ad638fc803..5502e32ee37 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -368,9 +368,13 @@ struct Precompute // like constants or global gets. At a first approximation, allow the set // of things we allow in constant initializers (but we can probably allow // more here TODO). + // + // We also ignore selects with no parent (that are the entire function + // body) as then there is nothing to optimize into their arms. auto& wasm = *getModule(); if (Properties::isValidConstantExpression(wasm, select->ifTrue) && - Properties::isValidConstantExpression(wasm, select->ifFalse)) { + Properties::isValidConstantExpression(wasm, select->ifFalse) && + getFunction()->body != select) { partiallyPrecomputable.insert(select); } } @@ -431,7 +435,7 @@ struct Precompute return; } - // Walk the function to find the parent stacks of the selects. + // Walk the function to find the parent stacks of the promising selects. struct StackFinder : public ExpressionStackWalker { Precompute& parent; @@ -441,22 +445,6 @@ struct Precompute void visitSelect(Select* curr) { if (parent.partiallyPrecomputable.count(curr)) { - if (expressionStack.size() == 1) { - // There is nothing above this select, so nothing we can precompute - // into it. - return; - } - - // Check if this is nested in another select, which is a case that for - // simplicity we don't handle yet TODO - for (auto* item : expressionStack) { - if (item != curr && item->is()) { // We only have a reasonable hope of success if the select arms are things // like constants or global gets. At a first approximation, allow the set @@ -430,7 +440,7 @@ struct Precompute // apply the outer struct.get we arrive at the outer struct.new, which is in // fact the global $C, and we succeed. void partiallyPrecompute(Function* func) { - if (partiallyPrecomputable.empty()) { + if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) { // Nothing to do. return; } diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 5cf8fe955c6..b7d3d52af3d 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. -;; RUN: foreach %s %t wasm-opt --precompute-propagate -all -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --precompute-propagate --optimize-level=2 -all -S -o - | filecheck %s (module ;; CHECK: (func $simple-1 (type $0) (param $param i32) (result i32) From 89e50ad943756129fb5375baa7e48de9fc6b250d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 13:04:34 -0800 Subject: [PATCH 45/63] fix --- src/passes/Precompute.cpp | 27 +++++++++++++------------ test/lit/passes/precompute-partial.wast | 11 ++++------ 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index aae679b4441..466f83830ee 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -35,6 +35,7 @@ #include "ir/properties.h" #include "ir/utils.h" #include "pass.h" +#include "support/insert_ordered.h" #include "support/unique_deferring_queue.h" #include "wasm-builder.h" #include "wasm-interpreter.h" @@ -346,26 +347,28 @@ struct Precompute // // Perhaps we can compute A(B) and A(C). If so, we can emit a better select: // - // (select - // (constant result of A(B)) - // (constant result of A(C)) - // (condition) - // ) + // (select + // (constant result of A(B)) + // (constant result of A(C)) + // (condition) // ) // // Note that in general for code size we want to move operations *out* of // selects and ifs (OptimizeInstructions does that), but here we are - // computing two constant which replace four expressions, so it is worthwhile. + // computing two constants which replace three expressions, so it is + // worthwhile. // // To do such partial precomputing, in the main pass we note selects that look // promising. If we find any then we do a second pass later just for that (as // doing so requires walking up the stack in a manner that we want to avoid in - // main pass, for overhead reasons; see below). + // the main pass for overhead reasons; see below). // // Note that selects are all we really need here: Other passes would turn an // if into a select if the arms are simple enough, and only in those cases - // (simple arms) do we have a chance at partially precomputing. (For example, + // (simple arms) do we have a chance at partially precomputing. For example, // if an arm is a constant then we can, but if it is a call then we can't.) + // However, there are cases like an if with arms with side effects that end in + // precomputable things, that are missed atm TODO std::unordered_set partiallyPrecomputable; void considerPartiallyPrecomputing(Expression* curr) { @@ -437,8 +440,8 @@ struct Precompute // Applying the inner struct.get to $C leads us to the inner struct.new, but // that is an interior pointer in the global - it is not something we can // refer to using a global.get, so precomputing it fails. However, when we - // apply the outer struct.get we arrive at the outer struct.new, which is in - // fact the global $C, and we succeed. + // apply both struct.gets at once we arrive at the outer struct.new, which is + // in fact the global $C, and we succeed. void partiallyPrecompute(Function* func) { if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) { // Nothing to do. @@ -451,7 +454,7 @@ struct Precompute StackFinder(Precompute& parent) : parent(parent) {} - std::unordered_map stackMap; + InsertOrderedMap stackMap; void visitSelect(Select* curr) { if (parent.partiallyPrecomputable.count(curr)) { @@ -461,8 +464,6 @@ struct Precompute } stackFinder(*this); stackFinder.walkFunction(func); - // TODO determinism - // Note which expressions we've modified as we go, as it is invalid to // modify more than once. This could happen in theory in a situation like // this: diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index b7d3d52af3d..49917d79bd0 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -330,16 +330,13 @@ ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 1) ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 1) - ;; CHECK-NEXT: (i32.const 0) - ;; CHECK-NEXT: (local.get $param) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $param) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $nested (param $param i32) (result i32) - ;; Both the outer and inner selects can be optimized, separately. They - ;; could then be further optimized, but we leave that for later iterations. FIXME + ;; As above, with an outer eqz as well. Now both the outer and inner selects + ;; can be optimized, and after the inner one is it can be optimized with the + ;; outer one as well. (i32.eqz (select (i32.const 0) From 1689d4b343c416728d1bc1a7188e35eb5da3ffc8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 13:37:55 -0800 Subject: [PATCH 46/63] comment --- src/passes/Precompute.cpp | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 466f83830ee..cecfe447090 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -454,6 +454,13 @@ struct Precompute StackFinder(Precompute& parent) : parent(parent) {} + // We will later iterate on this in the order of insertion, which keeps + // things deterministic, and also usually lets us do consecutive work + // like a select nested in another select's condition, simply because we + // will traverse the selects in postorder (however, because we cannot + // always succeed in an incremental manner - see the comment on this + // function - it is possible in theory that some work can happen in a + // later execution o the pass). InsertOrderedMap stackMap; void visitSelect(Select* curr) { @@ -478,29 +485,9 @@ struct Precompute // is always infinity, so we can modify, and the same thing happens with the // second select, causing a second modification. In practice it does not // seem that wasm has instructions that allow this situation to occur, but - // this code is still useful to guard against future problems. - // - // In some cases we could modify more than once in a consecutive manner: - // - // (i32.eqz - // (select - // (i32.const 0) - // (i32.const 10) - // (i32.eqz - // (select - // (i32.const 0) - // (i32.const 20) - // (local.get $param) - // ) - // ) - // ) - // ) - // - // After optimizing the eqzs into the selects, they can be optimized - // together. However, that would require updating the stack for the outer - // one (so it doesn't see stale information). To keep things simple we just - // avoid processing already-modified code, which leaves such situations for - // later iterations of the pass. + // this code is still useful to guard against future problems, and it may + // also speed things up (if something was modified, we need never consider + // it again). std::unordered_set modified; for (auto& [select, stack] : stackFinder.stackMap) { From 56f49a95a4437f4f9a9ff5fa499f60d425b2989b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 13:47:35 -0800 Subject: [PATCH 47/63] commentses --- src/passes/Precompute.cpp | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index cecfe447090..d3c60f6c305 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -459,8 +459,8 @@ struct Precompute // like a select nested in another select's condition, simply because we // will traverse the selects in postorder (however, because we cannot // always succeed in an incremental manner - see the comment on this - // function - it is possible in theory that some work can happen in a - // later execution o the pass). + // function - it is possible in theory that some work can happen only in a + // later execution of the pass). InsertOrderedMap stackMap; void visitSelect(Select* curr) { @@ -483,11 +483,12 @@ struct Precompute // // When we consider the first select we can see that the computation result // is always infinity, so we can modify, and the same thing happens with the - // second select, causing a second modification. In practice it does not - // seem that wasm has instructions that allow this situation to occur, but - // this code is still useful to guard against future problems, and it may - // also speed things up (if something was modified, we need never consider - // it again). + // second select, causing a second modification. In this example the result + // is the same either way, but at least in theory an actual problem can + // occur. Note that in practice it does not seem that wasm has instructions + // that enable it atm, but this code is still useful to guard against future + // problems, and as a minor speedup (quickly skip code if it was already + // modified). std::unordered_set modified; for (auto& [select, stack] : stackFinder.stackMap) { @@ -500,14 +501,19 @@ struct Precompute assert(selectIndex >= 1); if (modified.count(select)) { + // This select was modified; go to the next one. continue; } - // Go up through the parents, until we can't do any more work. + // Go up through the parents, until we can't do any more work. At each + // parent we'll try to execute it and all intermediate parents into the + // select arms. Index parentIndex = selectIndex - 1; while (1) { auto* parent = stack[parentIndex]; if (modified.count(parent)) { + // This parent was modified; exit the loop on parents as no upper + // parent is valid to try either. break; } @@ -524,7 +530,7 @@ struct Precompute // If the condition breaks to the parent, then we can't move the parent // into the select arms: // - // (block $name ;; this must stack outside of the select + // (block $name ;; this must stay outside of the select // (select // (B) // (C) @@ -532,8 +538,7 @@ struct Precompute // (br_if $target // // Ignore all control flow for simplicity, as they aren't interesting - // for us (other passes remove them when possibly anyhow), and assume - // we can't do anything else later. + // for us, and other passes should have removed them anyhow. if (Properties::isControlFlowStructure(parent)) { break; } @@ -566,15 +571,13 @@ struct Precompute select->ifFalse = ifFalse.getConstExpression(*getModule()); select->finalize(); - // And the parent of the select is replaced by the select. + // The parent of the select is now replaced by the select. auto** pointerToParent = getChildPointerInImmediateParent(stack, parentIndex, func); *pointerToParent = select; - // Update state for further iterations, as we may push this select - // even further in the parents. The new stack now ends at the - // select we just moved (as we do not need to consider anything - // below it, as before), and everything there is now modified. + // Update state for further iterations: Mark everything modified and + // move the select to the parent's location. for (Index i = parentIndex; i <= selectIndex; i++) { modified.insert(stack[i]); } @@ -821,7 +824,7 @@ struct Precompute Function* func) { if (index == 0) { // There is nothing above this expression, so the pointer referring to it - // is the function's body pointer. + // is the function's body. return &func->body; } From ae59dd9eef350d7ee549b90f069dc2d8911da520 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:40:49 -0800 Subject: [PATCH 48/63] less --- test/lit/passes/precompute-partial.wast | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 49917d79bd0..d7b9b204c99 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -487,7 +487,7 @@ ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $test-expanded (export "test-expanded") (param $x i32) (result funcref) + (func $test-expanded (param $x i32) (result funcref) ;; We can apply the struct.get to the select arms: As the globals are all ;; immutable, we can read the function references from them, and emit a ;; select on those values, saving the struct.get operation entirely. @@ -510,7 +510,7 @@ ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $test-subtyping (export "test-subtyping") (param $x i32) (result funcref) + (func $test-subtyping (param $x i32) (result funcref) ;; As above, but now we have struct.news directly in the arms, and one is ;; of a subtype of the final result (which should not prevent optimization). (struct.get $vtable.sub 0 @@ -533,7 +533,7 @@ ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $test-expanded-twice (export "test-expanded-twice") (param $x i32) (result i32) + (func $test-expanded-twice (param $x i32) (result i32) ;; As $test-expanded, but we have two operations that can be applied. Both ;; references are non-null, so the select arms will become 0. (ref.is_null @@ -556,7 +556,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $test-expanded-twice-stop (export "test-expanded-twice-stop") (param $x i32) + (func $test-expanded-twice-stop (param $x i32) ;; As $test-expanded-twice, but we stop after two expansions when we fail on ;; the call. (call $send-i32 @@ -607,7 +607,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $test-trap (export "test-trap") (param $x i32) (result funcref) + (func $test-trap (param $x i32) (result funcref) ;; One arm has a null, which makes the struct.get trap, so we ignore this ;; for now. TODO: handle traps (struct.get $vtable 0 @@ -676,7 +676,7 @@ ;; CHECK-NEXT: (local.get $x) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $test-expanded (export "test-expanded") (param $x i32) (result funcref) + (func $test-expanded (param $x i32) (result funcref) ;; Nesting in the global declarations means we cannot precompute the inner ;; struct.get by itself (as that would return the inner struct.new), but ;; when we do the outer struct.get, it all comes together. From 0792c10922b0a7961e64d40d1402f1cc2897d25c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:41:41 -0800 Subject: [PATCH 49/63] less --- test/lit/passes/precompute-partial.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index d7b9b204c99..36b367bdd7c 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -11,7 +11,7 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $simple-1 (param $param i32) (result i32) - ;; Test simple i32 operations. + ;; The eqz can be applied to the select arms. (i32.eqz (select (i32.const 42) From 8bd5652c468b987e1da07e4bad225ce93903c467 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:43:02 -0800 Subject: [PATCH 50/63] less --- test/lit/passes/precompute-partial.wast | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 36b367bdd7c..2904678ec0c 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -229,7 +229,8 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $break (param $x i32) (result i32) - ;; We should change nothing here: we cannot precompute breaks yet TODO + ;; We should change nothing here: we do not partially precompute breaks yet + ;; TODO (block $label (result i32) (drop (br_if $label From c8f6e0e5d8738c5dab490678878620e4b932f5c6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:43:46 -0800 Subject: [PATCH 51/63] work --- test/lit/passes/precompute-partial.wast | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 2904678ec0c..92c098642d3 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -248,8 +248,8 @@ ;; CHECK: (func $toplevel (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: (i32.const 10) - ;; CHECK-NEXT: (i32.const 20) ;; CHECK-NEXT: (local.get $param) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -257,8 +257,8 @@ ;; There is nothing to do for a select with no parent, but do not error at ;; least. (select + (i32.const 0) (i32.const 10) - (i32.const 20) (local.get $param) ) ) From b48cefc8f24e14826feef1781023d57cb5769db9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:45:14 -0800 Subject: [PATCH 52/63] work --- test/lit/passes/precompute-partial.wast | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 92c098642d3..782251076f9 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -336,8 +336,8 @@ ;; CHECK-NEXT: ) (func $nested (param $param i32) (result i32) ;; As above, with an outer eqz as well. Now both the outer and inner selects - ;; can be optimized, and after the inner one is it can be optimized with the - ;; outer one as well. + ;; can be optimized, and after the inner one is, it can be optimized with + ;; the outer one as well, leaving a single select and no eqz. (i32.eqz (select (i32.const 0) From 65f55b958c99108e1692f6788b621b76883d49b6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:45:58 -0800 Subject: [PATCH 53/63] work --- test/lit/passes/precompute-partial.wast | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 782251076f9..42b476ff4dc 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -366,17 +366,13 @@ ;; CHECK-NEXT: (i32.const 40) ;; CHECK-NEXT: (local.get $param) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (select - ;; CHECK-NEXT: (i32.const 50) - ;; CHECK-NEXT: (i32.const 60) - ;; CHECK-NEXT: (local.get $param) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $param) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $nested-arms (param $param i32) (result i32) - ;; We can do nothing for selects nested directly in other selects, but do - ;; not error at least. + ;; We can do nothing for selects nested directly in select arms, but do not + ;; error at least. (i32.eqz (select (select @@ -389,11 +385,7 @@ (i32.const 40) (local.get $param) ) - (select - (i32.const 50) - (i32.const 60) - (local.get $param) - ) + (local.get $param) ) ) ) From 51cb2462e636f89779a0fb95d7449383ff90db0d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:47:45 -0800 Subject: [PATCH 54/63] work --- test/lit/passes/precompute-partial.wast | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 42b476ff4dc..d76fc3cc7f9 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -371,8 +371,8 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $nested-arms (param $param i32) (result i32) - ;; We can do nothing for selects nested directly in select arms, but do not - ;; error at least. + ;; We do nothing for selects nested directly in select arms, but do not + ;; error at least. Note that we could apply the eqz two levels deep TODO (i32.eqz (select (select From c1f04e92e5b0ac391afc43412b6686aa788031b8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:48:15 -0800 Subject: [PATCH 55/63] work --- test/lit/passes/precompute-partial.wast | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index d76fc3cc7f9..b0c46b8c9b6 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -366,7 +366,11 @@ ;; CHECK-NEXT: (i32.const 40) ;; CHECK-NEXT: (local.get $param) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 50) + ;; CHECK-NEXT: (i32.const 60) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -385,7 +389,11 @@ (i32.const 40) (local.get $param) ) - (local.get $param) + (select + (i32.const 50) + (i32.const 60) + (local.get $param) + ) ) ) ) From d813d659942221bb7e6f37b8ab939d2fbda60b04 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 5 Jan 2024 15:53:11 -0800 Subject: [PATCH 56/63] work --- test/lit/passes/precompute-partial.wast | 29 ++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index b0c46b8c9b6..5fa08ddbd0a 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -588,7 +588,8 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $binary (param $param i32) (result i32) - ;; (Note that this works because immutable globals can be compared.) + ;; This is a common pattern in J2Wasm output. Note that this is optimized + ;; because immutable globals can be compared at compile time. (ref.eq (select (global.get $A$vtable) @@ -637,7 +638,7 @@ ) ) -;; References with nesting. +;; References with nested globals. (module (rec ;; CHECK: (rec @@ -680,7 +681,8 @@ (func $test-expanded (param $x i32) (result funcref) ;; Nesting in the global declarations means we cannot precompute the inner ;; struct.get by itself (as that would return the inner struct.new), but - ;; when we do the outer struct.get, it all comes together. + ;; when we do the outer struct.get, it all comes together. This is a common + ;; pattern in J2Wasm output. (struct.get $vtable 0 (struct.get $itable 0 (select @@ -692,6 +694,27 @@ ) ) + ;; CHECK: (func $test-expanded-almost (type $4) (param $x i32) (result anyref) + ;; CHECK-NEXT: (struct.get $itable 0 + ;; CHECK-NEXT: (select (result (ref $itable)) + ;; CHECK-NEXT: (global.get $A$itable) + ;; CHECK-NEXT: (global.get $B$itable) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-expanded-almost (param $x i32) (result anyref) + ;; As above, but without the outer struct.get. We get "stuck" with the + ;; inner part of the global, as explained there, and fail to optimize here. + (struct.get $itable 0 + (select + (global.get $A$itable) + (global.get $B$itable) + (local.get $x) + ) + ) + ) + ;; CHECK: (func $A$func (type $2) ;; CHECK-NEXT: (nop) ;; CHECK-NEXT: ) From 66c43c78cecc2f9c80d5a6b5d5551b5fec230c17 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Jan 2024 16:46:03 -0800 Subject: [PATCH 57/63] comment --- src/passes/Precompute.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index d3c60f6c305..f13dc3eca67 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -448,7 +448,11 @@ struct Precompute return; } - // Walk the function to find the parent stacks of the promising selects. + // Walk the function to find the parent stacks of the promising selects. We + // copy the stacks and process them later. We do it like this because if we + // wanted to process stacks as we reached them then we'd trip over + // ourselves: when we optimize we replace a parent, but that parent is an + // expression we'll reach later in the walk, so modifying it is unsafe. struct StackFinder : public ExpressionStackWalker { Precompute& parent; From ceec9d1b26b1bcb2b9e930dbdf10c468127cd3ad Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Jan 2024 16:49:24 -0800 Subject: [PATCH 58/63] comment --- src/passes/Precompute.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index f13dc3eca67..e052f3c5297 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -479,20 +479,21 @@ struct Precompute // modify more than once. This could happen in theory in a situation like // this: // - // (ternary.f32.max + // (ternary.f32.max ;; fictional instruction for explanatory purposes // (select ..) // (select ..) // (f32.infinity) // ) // // When we consider the first select we can see that the computation result - // is always infinity, so we can modify, and the same thing happens with the - // second select, causing a second modification. In this example the result - // is the same either way, but at least in theory an actual problem can - // occur. Note that in practice it does not seem that wasm has instructions - // that enable it atm, but this code is still useful to guard against future - // problems, and as a minor speedup (quickly skip code if it was already - // modified). + // is always infinity, so we can optimize here and replace the ternary. Then + // the same thing happens with the second select, causing the ternary to be + // replaced again, which is unsafe. (Note that in this example the result + // is the same either way, but at least in theory an instruction could exist + // for whom there was a difference.) In practice it does not seem that wasm + // has instructions capable of this atm but this code is still useful to + // guard against future problems, and as a minor speedup (quickly skip code + // if it was already modified). std::unordered_set modified; for (auto& [select, stack] : stackFinder.stackMap) { From e6467bac948f991c93f226b61a1bbafa50b009eb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 9 Jan 2024 16:57:08 -0800 Subject: [PATCH 59/63] simpler loop --- src/passes/Precompute.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index e052f3c5297..c16e584a18f 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -513,8 +513,9 @@ struct Precompute // Go up through the parents, until we can't do any more work. At each // parent we'll try to execute it and all intermediate parents into the // select arms. - Index parentIndex = selectIndex - 1; - while (1) { + for (Index parentIndex = selectIndex - 1; + parentIndex != Index(-1); + parentIndex--) { auto* parent = stack[parentIndex]; if (modified.count(parent)) { // This parent was modified; exit the loop on parents as no upper @@ -596,12 +597,6 @@ struct Precompute // pointer to its original state (if we precomputed, the parent is no // longer in use, but there is no harm in modifying it). *pointerToSelect = select; - - // Continue to the outer parent, if there is one. - if (parentIndex == 0) { - break; - } - parentIndex--; } } } From 5afd4dbe2bd7b9f5a018bbbc077c2392e107b057 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Jan 2024 09:04:09 -0800 Subject: [PATCH 60/63] add suggested comment --- src/passes/Precompute.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index c16e584a18f..65ea2c735bb 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -488,8 +488,9 @@ struct Precompute // When we consider the first select we can see that the computation result // is always infinity, so we can optimize here and replace the ternary. Then // the same thing happens with the second select, causing the ternary to be - // replaced again, which is unsafe. (Note that in this example the result - // is the same either way, but at least in theory an instruction could exist + // replaced again, which is unsafe because it no longer exists after we + // precomputed it the first time. (Note that in this example the result is + // the same either way, but at least in theory an instruction could exist // for whom there was a difference.) In practice it does not seem that wasm // has instructions capable of this atm but this code is still useful to // guard against future problems, and as a minor speedup (quickly skip code From c561a399f6bf2f1251618449381e53a44f377d21 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Jan 2024 09:07:03 -0800 Subject: [PATCH 61/63] add suggested testcases --- test/lit/passes/precompute-partial.wast | 57 +++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 5fa08ddbd0a..4875b76c282 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -72,6 +72,63 @@ ) ) + ;; CHECK: (func $not-left (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $not-left (param $param i32) (result i32) + (i32.eqz + (select + (local.get $param) ;; this cannot be precomputed, so we do nothing + (i32.const 0) + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $not-right (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $not-right (param $param i32) (result i32) + (i32.eqz + (select + (i32.const 0) + (local.get $param) ;; this cannot be precomputed, so we do nothing + (local.get $param) + ) + ) + ) + + ;; CHECK: (func $not-neither (type $0) (param $param i32) (result i32) + ;; CHECK-NEXT: (i32.eqz + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: (local.get $param) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $not-neither (param $param i32) (result i32) + (i32.eqz + (select + (local.get $param) ;; these cannot be precomputed, + (local.get $param) ;; so we do nothing + (local.get $param) + ) + ) + ) + ;; CHECK: (func $binary (type $0) (param $param i32) (result i32) ;; CHECK-NEXT: (select ;; CHECK-NEXT: (i32.const 11) From 6ecc7b0a473bccb149afd5fa6aee5ec3fca97741 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Jan 2024 09:11:02 -0800 Subject: [PATCH 62/63] simplify test --- test/lit/passes/precompute-partial.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/precompute-partial.wast b/test/lit/passes/precompute-partial.wast index 4875b76c282..86a55f56a5b 100644 --- a/test/lit/passes/precompute-partial.wast +++ b/test/lit/passes/precompute-partial.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. -;; RUN: foreach %s %t wasm-opt --precompute-propagate --optimize-level=2 -all -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --precompute --optimize-level=2 -all -S -o - | filecheck %s (module ;; CHECK: (func $simple-1 (type $0) (param $param i32) (result i32) From 6f4d81e1dbd2b9ba1270057b474872323cf544d5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 10 Jan 2024 10:43:47 -0800 Subject: [PATCH 63/63] format --- src/passes/Precompute.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/passes/Precompute.cpp b/src/passes/Precompute.cpp index 65ea2c735bb..5baf2331f14 100644 --- a/src/passes/Precompute.cpp +++ b/src/passes/Precompute.cpp @@ -514,8 +514,7 @@ struct Precompute // Go up through the parents, until we can't do any more work. At each // parent we'll try to execute it and all intermediate parents into the // select arms. - for (Index parentIndex = selectIndex - 1; - parentIndex != Index(-1); + for (Index parentIndex = selectIndex - 1; parentIndex != Index(-1); parentIndex--) { auto* parent = stack[parentIndex]; if (modified.count(parent)) {