From d35493e44cfc657cbc7883cf78d1e6f37d6e05a6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 15:23:35 -0800 Subject: [PATCH 01/42] start --- src/passes/StringLowering.cpp | 423 ++++++++++++++++++++++++++++++++++ 1 file changed, 423 insertions(+) create mode 100644 src/passes/StringLowering.cpp diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp new file mode 100644 index 00000000000..0cab61a886a --- /dev/null +++ b/src/passes/StringLowering.cpp @@ -0,0 +1,423 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Lowers string.const etc. into imports. That is, +// +// (local.set $x (string.const "foo.bar")) +// +// will turn into +// +// (import "string.const" "0" (global externref $string.const_foo_bar)) +// .. +// (local.set $x (global.get $string.const_foo.bar)) +// +// as well as a custom section "string.consts" containing a JSON string of an +// array with the string contents: +// +// ["foo.bar"] +// +// The expectation is that the host environment will read the custom section and +// then provide the imports, resulting in something like this: +// +// let stringConstsBuffer = +// WebAssembly.Module.customSections(module, 'string.consts'); +// let stringConstsString = +// new TextDecoder().decode(new Uint8Array(stringConstsBuffer, 'name')); +// WebAssembly.instantiate(module, { +// string.const: JSON.stringify(stringConstsString) // ["foo.bar"] +// }); +// + +#include "pass.h" +#include "wasm-builder.h" +#include "wasm.h" + +namespace wasm { + +struct StringLowering : public WalkerPass> { + // Core lowering of a 32-bit load: ensures it is done using aligned + // operations, which means we can leave it alone if it's already aligned, or + // else we break it up into smaller loads that are. + Expression* lowerLoadI32(Load* curr) { + if (curr->align == 0 || curr->align == curr->bytes) { + return curr; + } + auto mem = getModule()->getMemory(curr->memory); + auto indexType = mem->indexType; + Builder builder(*getModule()); + assert(curr->type == Type::i32); + auto temp = builder.addVar(getFunction(), indexType); + Expression* ret; + if (curr->bytes == 2) { + ret = builder.makeBinary( + OrInt32, + builder.makeLoad(1, + false, + curr->offset, + 1, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeBinary( + ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 1, + 1, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeConst(int32_t(8)))); + if (curr->signed_) { + ret = Bits::makeSignExt(ret, 2, *getModule()); + } + } else if (curr->bytes == 4) { + if (curr->align == 1) { + ret = builder.makeBinary( + OrInt32, + builder.makeBinary( + OrInt32, + builder.makeLoad(1, + false, + curr->offset, + 1, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeBinary( + ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 1, + 1, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeConst(int32_t(8)))), + builder.makeBinary( + OrInt32, + builder.makeBinary( + ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 2, + 1, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeConst(int32_t(16))), + builder.makeBinary( + ShlInt32, + builder.makeLoad(1, + false, + curr->offset + 3, + 1, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeConst(int32_t(24))))); + } else if (curr->align == 2) { + ret = builder.makeBinary( + OrInt32, + builder.makeLoad(2, + false, + curr->offset, + 2, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeBinary( + ShlInt32, + builder.makeLoad(2, + false, + curr->offset + 2, + 2, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory), + builder.makeConst(int32_t(16)))); + } else { + WASM_UNREACHABLE("invalid alignment"); + } + } else { + WASM_UNREACHABLE("invalid size"); + } + return builder.makeBlock({builder.makeLocalSet(temp, curr->ptr), ret}); + } + + // Core lowering of a 32-bit store. + Expression* lowerStoreI32(Store* curr) { + if (curr->align == 0 || curr->align == curr->bytes) { + return curr; + } + Builder builder(*getModule()); + assert(curr->value->type == Type::i32); + auto mem = getModule()->getMemory(curr->memory); + auto indexType = mem->indexType; + auto tempPtr = builder.addVar(getFunction(), indexType); + auto tempValue = builder.addVar(getFunction(), Type::i32); + auto* block = + builder.makeBlock({builder.makeLocalSet(tempPtr, curr->ptr), + builder.makeLocalSet(tempValue, curr->value)}); + if (curr->bytes == 2) { + block->list.push_back( + builder.makeStore(1, + curr->offset, + 1, + builder.makeLocalGet(tempPtr, indexType), + builder.makeLocalGet(tempValue, Type::i32), + Type::i32, + curr->memory)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 1, + 1, + builder.makeLocalGet(tempPtr, indexType), + builder.makeBinary(ShrUInt32, + builder.makeLocalGet(tempValue, Type::i32), + builder.makeConst(int32_t(8))), + Type::i32, + curr->memory)); + } else if (curr->bytes == 4) { + if (curr->align == 1) { + block->list.push_back( + builder.makeStore(1, + curr->offset, + 1, + builder.makeLocalGet(tempPtr, indexType), + builder.makeLocalGet(tempValue, Type::i32), + Type::i32, + curr->memory)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 1, + 1, + builder.makeLocalGet(tempPtr, indexType), + builder.makeBinary(ShrUInt32, + builder.makeLocalGet(tempValue, Type::i32), + builder.makeConst(int32_t(8))), + Type::i32, + curr->memory)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 2, + 1, + builder.makeLocalGet(tempPtr, indexType), + builder.makeBinary(ShrUInt32, + builder.makeLocalGet(tempValue, Type::i32), + builder.makeConst(int32_t(16))), + Type::i32, + curr->memory)); + block->list.push_back(builder.makeStore( + 1, + curr->offset + 3, + 1, + builder.makeLocalGet(tempPtr, indexType), + builder.makeBinary(ShrUInt32, + builder.makeLocalGet(tempValue, Type::i32), + builder.makeConst(int32_t(24))), + Type::i32, + curr->memory)); + } else if (curr->align == 2) { + block->list.push_back( + builder.makeStore(2, + curr->offset, + 2, + builder.makeLocalGet(tempPtr, indexType), + builder.makeLocalGet(tempValue, Type::i32), + Type::i32, + curr->memory)); + block->list.push_back(builder.makeStore( + 2, + curr->offset + 2, + 2, + builder.makeLocalGet(tempPtr, indexType), + builder.makeBinary(ShrUInt32, + builder.makeLocalGet(tempValue, Type::i32), + builder.makeConst(int32_t(16))), + Type::i32, + curr->memory)); + } else { + WASM_UNREACHABLE("invalid alignment"); + } + } else { + WASM_UNREACHABLE("invalid size"); + } + block->finalize(); + return block; + } + + void visitLoad(Load* curr) { + // If unreachable, just remove the load, which removes the unaligned + // operation in a trivial way. + if (curr->type == Type::unreachable) { + replaceCurrent(curr->ptr); + return; + } + if (curr->align == 0 || curr->align == curr->bytes) { + // Nothing to do: leave the node unchanged. All code lower down assumes + // the operation is unaligned. + return; + } + Builder builder(*getModule()); + auto type = curr->type.getBasic(); + Expression* replacement; + switch (type) { + default: + WASM_UNREACHABLE("unhandled unaligned load"); + case Type::i32: + replacement = lowerLoadI32(curr); + break; + case Type::f32: + curr->type = Type::i32; + replacement = builder.makeUnary(ReinterpretInt32, lowerLoadI32(curr)); + break; + case Type::i64: + case Type::f64: + if (type == Type::i64 && curr->bytes != 8) { + // A load of <64 bits. + curr->type = Type::i32; + replacement = builder.makeUnary( + curr->signed_ ? ExtendSInt32 : ExtendUInt32, lowerLoadI32(curr)); + break; + } + // Load two 32-bit pieces, and combine them. + auto mem = getModule()->getMemory(curr->memory); + auto indexType = mem->indexType; + auto temp = builder.addVar(getFunction(), indexType); + auto* set = builder.makeLocalSet(temp, curr->ptr); + Expression* low = + lowerLoadI32(builder.makeLoad(4, + false, + curr->offset, + curr->align, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory)); + low = builder.makeUnary(ExtendUInt32, low); + // Note that the alignment is assumed to be the same here, even though + // we add an offset of 4. That is because this is an unaligned load, so + // the alignment is 1, 2, or 4, which means it stays the same after + // adding 4. + Expression* high = + lowerLoadI32(builder.makeLoad(4, + false, + curr->offset + 4, + curr->align, + builder.makeLocalGet(temp, indexType), + Type::i32, + curr->memory)); + high = builder.makeUnary(ExtendUInt32, high); + high = + builder.makeBinary(ShlInt64, high, builder.makeConst(int64_t(32))); + auto* combined = builder.makeBinary(OrInt64, low, high); + replacement = builder.makeSequence(set, combined); + // Ensure the proper output type. + if (type == Type::f64) { + replacement = builder.makeUnary(ReinterpretInt64, replacement); + } + break; + } + replaceCurrent(replacement); + } + + void visitStore(Store* curr) { + Builder builder(*getModule()); + // If unreachable, just remove the store, which removes the unaligned + // operation in a trivial way. + if (curr->type == Type::unreachable) { + replaceCurrent(builder.makeBlock( + {builder.makeDrop(curr->ptr), builder.makeDrop(curr->value)})); + return; + } + if (curr->align == 0 || curr->align == curr->bytes) { + // Nothing to do: leave the node unchanged. All code lower down assumes + // the operation is unaligned. + return; + } + auto type = curr->value->type.getBasic(); + Expression* replacement; + switch (type) { + default: + WASM_UNREACHABLE("unhandled unaligned store"); + case Type::i32: + replacement = lowerStoreI32(curr); + break; + case Type::f32: + curr->type = Type::i32; + curr->value = builder.makeUnary(ReinterpretFloat32, curr->value); + replacement = lowerStoreI32(curr); + break; + case Type::i64: + case Type::f64: + if (type == Type::i64 && curr->bytes != 8) { + // A store of <64 bits. + curr->type = Type::i32; + curr->value = builder.makeUnary(WrapInt64, curr->value); + replacement = lowerStoreI32(curr); + break; + } + // Otherwise, fall through to f64 case for a 64-bit load. + // Ensure an integer input value. + auto* value = curr->value; + if (type == Type::f64) { + value = builder.makeUnary(ReinterpretFloat64, value); + } + // Store as two 32-bit pieces. + auto mem = getModule()->getMemory(curr->memory); + auto indexType = mem->indexType; + auto tempPtr = builder.addVar(getFunction(), indexType); + auto* setPtr = builder.makeLocalSet(tempPtr, curr->ptr); + auto tempValue = builder.addVar(getFunction(), Type::i64); + auto* setValue = builder.makeLocalSet(tempValue, value); + Expression* low = builder.makeUnary( + WrapInt64, builder.makeLocalGet(tempValue, Type::i64)); + low = lowerStoreI32( + builder.makeStore(4, + curr->offset, + curr->align, + builder.makeLocalGet(tempPtr, indexType), + low, + Type::i32, + curr->memory)); + Expression* high = + builder.makeBinary(ShrUInt64, + builder.makeLocalGet(tempValue, Type::i64), + builder.makeConst(int64_t(32))); + high = builder.makeUnary(WrapInt64, high); + // Note that the alignment is assumed to be the same here, even though + // we add an offset of 4. That is because this is an unaligned store, so + // the alignment is 1, 2, or 4, which means it stays the same after + // adding 4. + high = lowerStoreI32( + builder.makeStore(4, + curr->offset + 4, + curr->align, + builder.makeLocalGet(tempPtr, indexType), + high, + Type::i32, + curr->memory)); + replacement = builder.makeBlock({setPtr, setValue, low, high}); + break; + } + replaceCurrent(replacement); + } +}; + +Pass* createStringLoweringPass() { return new StringLowering(); } + +} // namespace wasm From 77d56463e1f9c4defc3d51d04114586679c24f00 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 15:40:04 -0800 Subject: [PATCH 02/42] werk --- src/passes/StringLowering.cpp | 410 +++++----------------------------- 1 file changed, 51 insertions(+), 359 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 0cab61a886a..417bb6469b2 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -42,379 +42,71 @@ // }); // +#include "ir/module-utils.h" +#include "ir/find_all.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" namespace wasm { -struct StringLowering : public WalkerPass> { - // Core lowering of a 32-bit load: ensures it is done using aligned - // operations, which means we can leave it alone if it's already aligned, or - // else we break it up into smaller loads that are. - Expression* lowerLoadI32(Load* curr) { - if (curr->align == 0 || curr->align == curr->bytes) { - return curr; - } - auto mem = getModule()->getMemory(curr->memory); - auto indexType = mem->indexType; - Builder builder(*getModule()); - assert(curr->type == Type::i32); - auto temp = builder.addVar(getFunction(), indexType); - Expression* ret; - if (curr->bytes == 2) { - ret = builder.makeBinary( - OrInt32, - builder.makeLoad(1, - false, - curr->offset, - 1, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeBinary( - ShlInt32, - builder.makeLoad(1, - false, - curr->offset + 1, - 1, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeConst(int32_t(8)))); - if (curr->signed_) { - ret = Bits::makeSignExt(ret, 2, *getModule()); - } - } else if (curr->bytes == 4) { - if (curr->align == 1) { - ret = builder.makeBinary( - OrInt32, - builder.makeBinary( - OrInt32, - builder.makeLoad(1, - false, - curr->offset, - 1, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeBinary( - ShlInt32, - builder.makeLoad(1, - false, - curr->offset + 1, - 1, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeConst(int32_t(8)))), - builder.makeBinary( - OrInt32, - builder.makeBinary( - ShlInt32, - builder.makeLoad(1, - false, - curr->offset + 2, - 1, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeConst(int32_t(16))), - builder.makeBinary( - ShlInt32, - builder.makeLoad(1, - false, - curr->offset + 3, - 1, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeConst(int32_t(24))))); - } else if (curr->align == 2) { - ret = builder.makeBinary( - OrInt32, - builder.makeLoad(2, - false, - curr->offset, - 2, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeBinary( - ShlInt32, - builder.makeLoad(2, - false, - curr->offset + 2, - 2, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory), - builder.makeConst(int32_t(16)))); - } else { - WASM_UNREACHABLE("invalid alignment"); - } - } else { - WASM_UNREACHABLE("invalid size"); - } - return builder.makeBlock({builder.makeLocalSet(temp, curr->ptr), ret}); - } +struct StringLowering : public Pass { + // All the strings we found in the module, and a reverse mapping. + std::vector strings; + std::unordered_map stringIndexes; - // Core lowering of a 32-bit store. - Expression* lowerStoreI32(Store* curr) { - if (curr->align == 0 || curr->align == curr->bytes) { - return curr; - } - Builder builder(*getModule()); - assert(curr->value->type == Type::i32); - auto mem = getModule()->getMemory(curr->memory); - auto indexType = mem->indexType; - auto tempPtr = builder.addVar(getFunction(), indexType); - auto tempValue = builder.addVar(getFunction(), Type::i32); - auto* block = - builder.makeBlock({builder.makeLocalSet(tempPtr, curr->ptr), - builder.makeLocalSet(tempValue, curr->value)}); - if (curr->bytes == 2) { - block->list.push_back( - builder.makeStore(1, - curr->offset, - 1, - builder.makeLocalGet(tempPtr, indexType), - builder.makeLocalGet(tempValue, Type::i32), - Type::i32, - curr->memory)); - block->list.push_back(builder.makeStore( - 1, - curr->offset + 1, - 1, - builder.makeLocalGet(tempPtr, indexType), - builder.makeBinary(ShrUInt32, - builder.makeLocalGet(tempValue, Type::i32), - builder.makeConst(int32_t(8))), - Type::i32, - curr->memory)); - } else if (curr->bytes == 4) { - if (curr->align == 1) { - block->list.push_back( - builder.makeStore(1, - curr->offset, - 1, - builder.makeLocalGet(tempPtr, indexType), - builder.makeLocalGet(tempValue, Type::i32), - Type::i32, - curr->memory)); - block->list.push_back(builder.makeStore( - 1, - curr->offset + 1, - 1, - builder.makeLocalGet(tempPtr, indexType), - builder.makeBinary(ShrUInt32, - builder.makeLocalGet(tempValue, Type::i32), - builder.makeConst(int32_t(8))), - Type::i32, - curr->memory)); - block->list.push_back(builder.makeStore( - 1, - curr->offset + 2, - 1, - builder.makeLocalGet(tempPtr, indexType), - builder.makeBinary(ShrUInt32, - builder.makeLocalGet(tempValue, Type::i32), - builder.makeConst(int32_t(16))), - Type::i32, - curr->memory)); - block->list.push_back(builder.makeStore( - 1, - curr->offset + 3, - 1, - builder.makeLocalGet(tempPtr, indexType), - builder.makeBinary(ShrUInt32, - builder.makeLocalGet(tempValue, Type::i32), - builder.makeConst(int32_t(24))), - Type::i32, - curr->memory)); - } else if (curr->align == 2) { - block->list.push_back( - builder.makeStore(2, - curr->offset, - 2, - builder.makeLocalGet(tempPtr, indexType), - builder.makeLocalGet(tempValue, Type::i32), - Type::i32, - curr->memory)); - block->list.push_back(builder.makeStore( - 2, - curr->offset + 2, - 2, - builder.makeLocalGet(tempPtr, indexType), - builder.makeBinary(ShrUInt32, - builder.makeLocalGet(tempValue, Type::i32), - builder.makeConst(int32_t(16))), - Type::i32, - curr->memory)); - } else { - WASM_UNREACHABLE("invalid alignment"); - } - } else { - WASM_UNREACHABLE("invalid size"); - } - block->finalize(); - return block; + void run(Module* module) override { + scanStrings(module); + replaceStrings(module); + addGlobals(module); } - void visitLoad(Load* curr) { - // If unreachable, just remove the load, which removes the unaligned - // operation in a trivial way. - if (curr->type == Type::unreachable) { - replaceCurrent(curr->ptr); - return; - } - if (curr->align == 0 || curr->align == curr->bytes) { - // Nothing to do: leave the node unchanged. All code lower down assumes - // the operation is unaligned. - return; - } - Builder builder(*getModule()); - auto type = curr->type.getBasic(); - Expression* replacement; - switch (type) { - default: - WASM_UNREACHABLE("unhandled unaligned load"); - case Type::i32: - replacement = lowerLoadI32(curr); - break; - case Type::f32: - curr->type = Type::i32; - replacement = builder.makeUnary(ReinterpretInt32, lowerLoadI32(curr)); - break; - case Type::i64: - case Type::f64: - if (type == Type::i64 && curr->bytes != 8) { - // A load of <64 bits. - curr->type = Type::i32; - replacement = builder.makeUnary( - curr->signed_ ? ExtendSInt32 : ExtendUInt32, lowerLoadI32(curr)); - break; - } - // Load two 32-bit pieces, and combine them. - auto mem = getModule()->getMemory(curr->memory); - auto indexType = mem->indexType; - auto temp = builder.addVar(getFunction(), indexType); - auto* set = builder.makeLocalSet(temp, curr->ptr); - Expression* low = - lowerLoadI32(builder.makeLoad(4, - false, - curr->offset, - curr->align, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory)); - low = builder.makeUnary(ExtendUInt32, low); - // Note that the alignment is assumed to be the same here, even though - // we add an offset of 4. That is because this is an unaligned load, so - // the alignment is 1, 2, or 4, which means it stays the same after - // adding 4. - Expression* high = - lowerLoadI32(builder.makeLoad(4, - false, - curr->offset + 4, - curr->align, - builder.makeLocalGet(temp, indexType), - Type::i32, - curr->memory)); - high = builder.makeUnary(ExtendUInt32, high); - high = - builder.makeBinary(ShlInt64, high, builder.makeConst(int64_t(32))); - auto* combined = builder.makeBinary(OrInt64, low, high); - replacement = builder.makeSequence(set, combined); - // Ensure the proper output type. - if (type == Type::f64) { - replacement = builder.makeUnary(ReinterpretInt64, replacement); + // Scan the entire wasm to find the relevant strings and populate strings and + // stringIndexes. + void scanStrings() { + using StringSet = std::unordered_set; + + struct StringWalker : public PostWalker { + StringSet& strings; + + StringWalker(StringSet& strings) : strings(strings) {} + + void visitStringConst(StringConst* curr) { strings.insert(curr->string); } + }; + + ModuleUtils::ParallelFunctionAnalysis analysis( + *wasm, [&](Function* func, StringSet& strings) { + if (!func->imported()) { + StringWalker(strings).walk(func->body); } - break; - } - replaceCurrent(replacement); - } + }); + + // Also walk the global module code (for simplicity, also add it to the + // function map, using a "function" key of nullptr). + auto& globalStrings = analysis.map[nullptr]; + StringWalker(globalStrings).walkModuleCode(wasm); - void visitStore(Store* curr) { - Builder builder(*getModule()); - // If unreachable, just remove the store, which removes the unaligned - // operation in a trivial way. - if (curr->type == Type::unreachable) { - replaceCurrent(builder.makeBlock( - {builder.makeDrop(curr->ptr), builder.makeDrop(curr->value)})); - return; + // Generate the indexes from the combined set of necessary strings, + // which we sort for determinism. + StringSet allStrings; + for (auto& [func, strings] : analysis.map) { + for (auto& string : strings) { + allStrings.insert(string); + } } - if (curr->align == 0 || curr->align == curr->bytes) { - // Nothing to do: leave the node unchanged. All code lower down assumes - // the operation is unaligned. - return; + for (auto& string : allStrings) { + strings.push_back(string); } - auto type = curr->value->type.getBasic(); - Expression* replacement; - switch (type) { - default: - WASM_UNREACHABLE("unhandled unaligned store"); - case Type::i32: - replacement = lowerStoreI32(curr); - break; - case Type::f32: - curr->type = Type::i32; - curr->value = builder.makeUnary(ReinterpretFloat32, curr->value); - replacement = lowerStoreI32(curr); - break; - case Type::i64: - case Type::f64: - if (type == Type::i64 && curr->bytes != 8) { - // A store of <64 bits. - curr->type = Type::i32; - curr->value = builder.makeUnary(WrapInt64, curr->value); - replacement = lowerStoreI32(curr); - break; - } - // Otherwise, fall through to f64 case for a 64-bit load. - // Ensure an integer input value. - auto* value = curr->value; - if (type == Type::f64) { - value = builder.makeUnary(ReinterpretFloat64, value); - } - // Store as two 32-bit pieces. - auto mem = getModule()->getMemory(curr->memory); - auto indexType = mem->indexType; - auto tempPtr = builder.addVar(getFunction(), indexType); - auto* setPtr = builder.makeLocalSet(tempPtr, curr->ptr); - auto tempValue = builder.addVar(getFunction(), Type::i64); - auto* setValue = builder.makeLocalSet(tempValue, value); - Expression* low = builder.makeUnary( - WrapInt64, builder.makeLocalGet(tempValue, Type::i64)); - low = lowerStoreI32( - builder.makeStore(4, - curr->offset, - curr->align, - builder.makeLocalGet(tempPtr, indexType), - low, - Type::i32, - curr->memory)); - Expression* high = - builder.makeBinary(ShrUInt64, - builder.makeLocalGet(tempValue, Type::i64), - builder.makeConst(int64_t(32))); - high = builder.makeUnary(WrapInt64, high); - // Note that the alignment is assumed to be the same here, even though - // we add an offset of 4. That is because this is an unaligned store, so - // the alignment is 1, 2, or 4, which means it stays the same after - // adding 4. - high = lowerStoreI32( - builder.makeStore(4, - curr->offset + 4, - curr->align, - builder.makeLocalGet(tempPtr, indexType), - high, - Type::i32, - curr->memory)); - replacement = builder.makeBlock({setPtr, setValue, low, high}); - break; + std::sort(strings.begin(), strings.end()); + for (Index i = 0; i < strings.size(); i++) { + stringIndexes[strings[i]] = i; } - replaceCurrent(replacement); + } + + void replaceStrings(Module* module) { + } + + void addGlobals(Module* module) { } }; From 5ebd0a1c50686fadb22ef6c9c275ee026dbea379 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 15:57:38 -0800 Subject: [PATCH 03/42] work --- src/passes/StringLowering.cpp | 67 ++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 417bb6469b2..85593cc83ec 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -41,6 +41,9 @@ // string.const: JSON.stringify(stringConstsString) // ["foo.bar"] // }); // +// After running this you may benefit from --simplify-globals and +// --reorder-globals. +// #include "ir/module-utils.h" #include "ir/find_all.h" @@ -55,29 +58,34 @@ struct StringLowering : public Pass { std::vector strings; std::unordered_map stringIndexes; + // Pointers to all StringConsts, so that we can replace them. + using StringPtrs = std::vector; + StringPtrs stringPtrs; + + // Main entry point. void run(Module* module) override { scanStrings(module); + addImports(module); replaceStrings(module); - addGlobals(module); } - // Scan the entire wasm to find the relevant strings and populate strings and - // stringIndexes. + // Scan the entire wasm to find the relevant strings and populate our global + // data structures. void scanStrings() { - using StringSet = std::unordered_set; - struct StringWalker : public PostWalker { - StringSet& strings; + StringPtr& stringPtrs; - StringWalker(StringSet& strings) : strings(strings) {} + StringWalker(StringPtr& stringPtrs) : stringPtrs(stringPtrs) {} - void visitStringConst(StringConst* curr) { strings.insert(curr->string); } + void visitStringConst(StringConst* curr) { + stringPtrs.push_back(getCurrentPointer()); + } }; - ModuleUtils::ParallelFunctionAnalysis analysis( - *wasm, [&](Function* func, StringSet& strings) { + ModuleUtils::ParallelFunctionAnalysis analysis( + *wasm, [&](Function* func, StringPtrs& stringPtrs) { if (!func->imported()) { - StringWalker(strings).walk(func->body); + StringWalker(stringPtrs).walk(func->body); } }); @@ -86,15 +94,17 @@ struct StringLowering : public Pass { auto& globalStrings = analysis.map[nullptr]; StringWalker(globalStrings).walkModuleCode(wasm); - // Generate the indexes from the combined set of necessary strings, - // which we sort for determinism. - StringSet allStrings; - for (auto& [func, strings] : analysis.map) { - for (auto& string : strings) { - allStrings.insert(string); + // Combine all the strings. + std::unordered_set stringSet; + for (auto& [_, stringPtrs] : analysis.map) { + for (auto** stringPtr : stringPtrs) { + stringSet.insert((*stringPtr)->name); + stringPtrs.push_back(stringPtr); } } - for (auto& string : allStrings) { + // Generate the indexes from the combined set of necessary strings, which we + // sort for determinism. + for (auto& string : stringSet) { strings.push_back(string); } std::sort(strings.begin(), strings.end()); @@ -103,10 +113,27 @@ struct StringLowering : public Pass { } } - void replaceStrings(Module* module) { + // For each string index, the name of the import that replaces it. + std::vector importNames; + + void addImports(Module* module) { + // TOOD: imports should be in front, so they can be used from globals, or + // run sort-globals internally.. + Builder builder(*module); + for (auto& string : strings) { + auto name = Names::getValidGlobalName(*module, std::string("string.const_") + string.str); + importNames.push_back(name); + module->addGlobal(builder.makeGlobal(name, Type::externref, nullptr, Builder::Immutable)); + } } - void addGlobals(Module* module) { + void replaceStrings(Module* module) { + Builder builder(*module); + for (auto** stringPtr : stringPtrs) { + auto stringConst = *stringPtr; + auto importName = importNames[stringIndexes[stringConst->string]]; + *stringPtr = builder.makeGlobalGet(importName, Type::externref); + } } }; From 3049a99910e92aeb0d267e4d09008209b8faed0b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 16:10:24 -0800 Subject: [PATCH 04/42] work --- src/passes/CMakeLists.txt | 1 + src/passes/pass.cpp | 2 ++ src/passes/passes.h | 1 + 3 files changed, 4 insertions(+) diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 0e74d4b93b8..4cb696c2199 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -90,6 +90,7 @@ set(passes_SOURCES SignaturePruning.cpp SignatureRefining.cpp SignExtLowering.cpp + StringLowering.cpp Strip.cpp StripTargetFeatures.cpp RedundantSetElimination.cpp diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 5f5db2f7f49..04e105f0188 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -475,6 +475,8 @@ void PassRegistry::registerPasses() { "ssa-nomerge", "ssa-ify variables so that they have a single assignment, ignoring merges", createSSAifyNoMergePass); + registerPass( + "string-lowering", "lower wasm strings to imports", createStringLoweringPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", diff --git a/src/passes/passes.h b/src/passes/passes.h index 66535bdc0e2..950e30aa7b9 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -153,6 +153,7 @@ Pass* createSimplifyLocalsNoTeePass(); Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); +Pass* createStringLoweringPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); From 56f940ec32eba8edda16a91c01f6bb68af840363 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 16:17:05 -0800 Subject: [PATCH 05/42] buid --- src/passes/StringLowering.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 85593cc83ec..99d848ce556 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -46,6 +46,7 @@ // #include "ir/module-utils.h" +#include "ir/names.h" #include "ir/find_all.h" #include "pass.h" #include "wasm-builder.h" @@ -59,7 +60,7 @@ struct StringLowering : public Pass { std::unordered_map stringIndexes; // Pointers to all StringConsts, so that we can replace them. - using StringPtrs = std::vector; + using StringPtrs = std::vector; StringPtrs stringPtrs; // Main entry point. @@ -71,11 +72,11 @@ struct StringLowering : public Pass { // Scan the entire wasm to find the relevant strings and populate our global // data structures. - void scanStrings() { + void scanStrings(Module* module) { struct StringWalker : public PostWalker { - StringPtr& stringPtrs; + StringPtrs& stringPtrs; - StringWalker(StringPtr& stringPtrs) : stringPtrs(stringPtrs) {} + StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {} void visitStringConst(StringConst* curr) { stringPtrs.push_back(getCurrentPointer()); @@ -83,7 +84,7 @@ struct StringLowering : public Pass { }; ModuleUtils::ParallelFunctionAnalysis analysis( - *wasm, [&](Function* func, StringPtrs& stringPtrs) { + *module, [&](Function* func, StringPtrs& stringPtrs) { if (!func->imported()) { StringWalker(stringPtrs).walk(func->body); } @@ -92,13 +93,13 @@ struct StringLowering : public Pass { // Also walk the global module code (for simplicity, also add it to the // function map, using a "function" key of nullptr). auto& globalStrings = analysis.map[nullptr]; - StringWalker(globalStrings).walkModuleCode(wasm); + StringWalker(globalStrings).walkModuleCode(module); // Combine all the strings. std::unordered_set stringSet; for (auto& [_, stringPtrs] : analysis.map) { for (auto** stringPtr : stringPtrs) { - stringSet.insert((*stringPtr)->name); + stringSet.insert((*stringPtr)->cast()->string); stringPtrs.push_back(stringPtr); } } @@ -116,23 +117,25 @@ struct StringLowering : public Pass { // For each string index, the name of the import that replaces it. std::vector importNames; + Type nnexternref = Type(HeapType::ext, NonNullable); + void addImports(Module* module) { // TOOD: imports should be in front, so they can be used from globals, or // run sort-globals internally.. Builder builder(*module); for (auto& string : strings) { - auto name = Names::getValidGlobalName(*module, std::string("string.const_") + string.str); + auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(string.str)); importNames.push_back(name); - module->addGlobal(builder.makeGlobal(name, Type::externref, nullptr, Builder::Immutable)); + module->addGlobal(builder.makeGlobal(name, nnexternref, nullptr, Builder::Immutable)); } } void replaceStrings(Module* module) { Builder builder(*module); for (auto** stringPtr : stringPtrs) { - auto stringConst = *stringPtr; + auto* stringConst = (*stringPtr)->cast(); auto importName = importNames[stringIndexes[stringConst->string]]; - *stringPtr = builder.makeGlobalGet(importName, Type::externref); + *stringPtr = builder.makeGlobalGet(importName, nnexternref); } } }; From 83dd068e6c61f1592bfa5a035571787fa62ff3fb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 16:19:24 -0800 Subject: [PATCH 06/42] test --- test/lit/passes/string-lowering.wast | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 test/lit/passes/string-lowering.wast diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast new file mode 100644 index 00000000000..9bc3a94e9f3 --- /dev/null +++ b/test/lit/passes/string-lowering.wast @@ -0,0 +1,25 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s + +(module + (global $global anyref (string.const "foo")) + + (func $a + (drop + (string.const "bar") + ) + (drop + (string.const "baz") + ) + ) + + (func $a + (drop + (string.const "bar") + ) + (drop + (string.const "foo") + ) + ) +) From 48089e0457a0c5db75fac9985c21a0a175ee9be0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 16:32:13 -0800 Subject: [PATCH 07/42] work --- src/passes/StringLowering.cpp | 24 ++++++++++++++++++------ test/lit/passes/string-lowering.wast | 2 +- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 99d848ce556..3c508c85d11 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -82,36 +82,45 @@ struct StringLowering : public Pass { stringPtrs.push_back(getCurrentPointer()); } }; - +std::cout << "a1\n"; ModuleUtils::ParallelFunctionAnalysis analysis( *module, [&](Function* func, StringPtrs& stringPtrs) { if (!func->imported()) { StringWalker(stringPtrs).walk(func->body); } }); +std::cout << "a2\n"; // Also walk the global module code (for simplicity, also add it to the // function map, using a "function" key of nullptr). auto& globalStrings = analysis.map[nullptr]; StringWalker(globalStrings).walkModuleCode(module); +std::cout << "a3\n"; // Combine all the strings. std::unordered_set stringSet; - for (auto& [_, stringPtrs] : analysis.map) { - for (auto** stringPtr : stringPtrs) { + for (auto& [_, currStringPtrs] : analysis.map) { + for (auto** stringPtr : currStringPtrs) { +std::cout << " b1 " << stringPtr << "\n"; +std::cout << " b2 " << *stringPtr << "\n"; +std::cout << " b3 " << **stringPtr << "\n"; stringSet.insert((*stringPtr)->cast()->string); stringPtrs.push_back(stringPtr); } } + +std::cout << "a4\n"; // Generate the indexes from the combined set of necessary strings, which we // sort for determinism. for (auto& string : stringSet) { strings.push_back(string); } +std::cout << "a5\n"; std::sort(strings.begin(), strings.end()); for (Index i = 0; i < strings.size(); i++) { stringIndexes[strings[i]] = i; } +std::cout << "a6\n"; } // For each string index, the name of the import that replaces it. @@ -123,10 +132,13 @@ struct StringLowering : public Pass { // TOOD: imports should be in front, so they can be used from globals, or // run sort-globals internally.. Builder builder(*module); - for (auto& string : strings) { - auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(string.str)); + for (Index i = 0; i < strings.size(); i++) { + auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(strings[i].str)); importNames.push_back(name); - module->addGlobal(builder.makeGlobal(name, nnexternref, nullptr, Builder::Immutable)); + auto global = builder.makeGlobal(name, nnexternref, nullptr, Builder::Immutable); + global->module = "string.const"; + global->base = std::to_string(i); + module->addGlobal(std::move(global)); } } diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 9bc3a94e9f3..5caffcf4df7 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -14,7 +14,7 @@ ) ) - (func $a + (func $b (drop (string.const "bar") ) From 7859d8cf67058f1330ab9646bf8978113a2c6453 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 16:36:49 -0800 Subject: [PATCH 08/42] work --- src/passes/StringLowering.cpp | 20 +++++++++++++++----- test/lit/passes/string-lowering.wast | 14 +++++++++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 3c508c85d11..1fb1e7d8c71 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -65,21 +65,31 @@ struct StringLowering : public Pass { // Main entry point. void run(Module* module) override { - scanStrings(module); + processModule(module); addImports(module); replaceStrings(module); } - // Scan the entire wasm to find the relevant strings and populate our global + // Scan the entire wasm to find the relevant strings to populate our global // data structures. - void scanStrings(Module* module) { - struct StringWalker : public PostWalker { + void processModule(Module* module) { + struct StringWalker : public PostWalker> { StringPtrs& stringPtrs; StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {} + void visitExpression(Expression* curr) { + if (curr->is()) { + stringPtrs.push_back(getCurrentPointer()); + } + + // We must replace all string heap types with extern. Do so now in this + // operation to avoid needing a second pass. + if (curr->type.isRef() && curr->type.getHeapType() == HeapType::string) { + curr->type = Type(HeapType::ext, curr->type.getNullability()); + } + } void visitStringConst(StringConst* curr) { - stringPtrs.push_back(getCurrentPointer()); } }; std::cout << "a1\n"; diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 5caffcf4df7..59d91e6745b 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -3,7 +3,9 @@ ;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s (module - (global $global anyref (string.const "foo")) + (global $global (ref string) (string.const "foo")) + + (global $global2 (ref null string) (string.const "bar")) (func $a (drop @@ -14,12 +16,18 @@ ) ) - (func $b + (func $b ;; TODO params and results and tuples and what not (drop (string.const "bar") ) (drop - (string.const "foo") + (string.const "other") + ) + (drop + (global.get $global) + ) + (drop + (global.get $global2) ) ) ) From 955bbe6e2e378f2020556b1f3c071cdc877d5f6e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 16:42:40 -0800 Subject: [PATCH 09/42] progress --- src/passes/StringLowering.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 1fb1e7d8c71..6e016cf866a 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -79,7 +79,9 @@ struct StringLowering : public Pass { StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {} void visitExpression(Expression* curr) { +std::cout << "visit " << *curr << '\n'; if (curr->is()) { +std::cout << " stash!\n"; stringPtrs.push_back(getCurrentPointer()); } @@ -89,8 +91,6 @@ struct StringLowering : public Pass { curr->type = Type(HeapType::ext, curr->type.getNullability()); } } - void visitStringConst(StringConst* curr) { - } }; std::cout << "a1\n"; ModuleUtils::ParallelFunctionAnalysis analysis( @@ -119,7 +119,7 @@ std::cout << " b3 " << **stringPtr << "\n"; } } -std::cout << "a4\n"; +std::cout << "a4 " << stringPtrs.size() << "\n"; // Generate the indexes from the combined set of necessary strings, which we // sort for determinism. for (auto& string : stringSet) { @@ -153,11 +153,14 @@ std::cout << "a6\n"; } void replaceStrings(Module* module) { +std::cout << "rep1 " << stringPtrs.size() << "\n"; Builder builder(*module); for (auto** stringPtr : stringPtrs) { auto* stringConst = (*stringPtr)->cast(); auto importName = importNames[stringIndexes[stringConst->string]]; +std::cout << " rep2 " << importName << " : " << **stringPtr << "\n"; *stringPtr = builder.makeGlobalGet(importName, nnexternref); +std::cout << " rep3 " << **stringPtr << "\n"; } } }; From 01c7868c07e55dc360db744dd1d192982ae1a523 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 29 Jan 2024 16:52:03 -0800 Subject: [PATCH 10/42] work? --- src/ir/type-updating.h | 1 - src/passes/StringLowering.cpp | 10 +++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ir/type-updating.h b/src/ir/type-updating.h index 92913699e98..ae247b6c75b 100644 --- a/src/ir/type-updating.h +++ b/src/ir/type-updating.h @@ -443,7 +443,6 @@ class TypeMapper : public GlobalTypeRewriter { std::unordered_map newSignatures; -public: TypeMapper(Module& wasm, const TypeUpdates& mapping) : GlobalTypeRewriter(wasm), mapping(mapping) {} diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 6e016cf866a..62e4289a18b 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -48,6 +48,7 @@ #include "ir/module-utils.h" #include "ir/names.h" #include "ir/find_all.h" +#include "ir/type-updating.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" @@ -68,6 +69,7 @@ struct StringLowering : public Pass { processModule(module); addImports(module); replaceStrings(module); + updateTypes(module); } // Scan the entire wasm to find the relevant strings to populate our global @@ -88,7 +90,7 @@ std::cout << " stash!\n"; // We must replace all string heap types with extern. Do so now in this // operation to avoid needing a second pass. if (curr->type.isRef() && curr->type.getHeapType() == HeapType::string) { - curr->type = Type(HeapType::ext, curr->type.getNullability()); +// curr->type = Type(HeapType::ext, curr->type.getNullability()); } } }; @@ -163,6 +165,12 @@ std::cout << " rep2 " << importName << " : " << **stringPtr << "\n"; std::cout << " rep3 " << **stringPtr << "\n"; } } + + void updateTypes(Module* module) { + TypeMapper::TypeUpdates updates; + updates[HeapType::string] = HeapType::ext; + TypeMapper(*module, updates).map(); + } }; Pass* createStringLoweringPass() { return new StringLowering(); } From f00ca07e85589c8a126efbe6dff3996d8d342adc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 13:19:38 -0800 Subject: [PATCH 11/42] cleanup --- src/passes/StringLowering.cpp | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 62e4289a18b..38936d606f4 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -81,9 +81,7 @@ struct StringLowering : public Pass { StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {} void visitExpression(Expression* curr) { -std::cout << "visit " << *curr << '\n'; if (curr->is()) { -std::cout << " stash!\n"; stringPtrs.push_back(getCurrentPointer()); } @@ -94,45 +92,37 @@ std::cout << " stash!\n"; } } }; -std::cout << "a1\n"; + ModuleUtils::ParallelFunctionAnalysis analysis( *module, [&](Function* func, StringPtrs& stringPtrs) { if (!func->imported()) { StringWalker(stringPtrs).walk(func->body); } }); -std::cout << "a2\n"; // Also walk the global module code (for simplicity, also add it to the // function map, using a "function" key of nullptr). auto& globalStrings = analysis.map[nullptr]; StringWalker(globalStrings).walkModuleCode(module); -std::cout << "a3\n"; // Combine all the strings. std::unordered_set stringSet; for (auto& [_, currStringPtrs] : analysis.map) { for (auto** stringPtr : currStringPtrs) { -std::cout << " b1 " << stringPtr << "\n"; -std::cout << " b2 " << *stringPtr << "\n"; -std::cout << " b3 " << **stringPtr << "\n"; stringSet.insert((*stringPtr)->cast()->string); stringPtrs.push_back(stringPtr); } } -std::cout << "a4 " << stringPtrs.size() << "\n"; // Generate the indexes from the combined set of necessary strings, which we // sort for determinism. for (auto& string : stringSet) { strings.push_back(string); } -std::cout << "a5\n"; std::sort(strings.begin(), strings.end()); for (Index i = 0; i < strings.size(); i++) { stringIndexes[strings[i]] = i; } -std::cout << "a6\n"; } // For each string index, the name of the import that replaces it. @@ -155,14 +145,11 @@ std::cout << "a6\n"; } void replaceStrings(Module* module) { -std::cout << "rep1 " << stringPtrs.size() << "\n"; Builder builder(*module); for (auto** stringPtr : stringPtrs) { auto* stringConst = (*stringPtr)->cast(); auto importName = importNames[stringIndexes[stringConst->string]]; -std::cout << " rep2 " << importName << " : " << **stringPtr << "\n"; *stringPtr = builder.makeGlobalGet(importName, nnexternref); -std::cout << " rep3 " << **stringPtr << "\n"; } } From 9dae0b3c238c4871db3e10ea203476758b662665 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 13:31:49 -0800 Subject: [PATCH 12/42] father --- src/passes/StringLowering.cpp | 70 ++++++++++------------------------- 1 file changed, 20 insertions(+), 50 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 38936d606f4..a1484533056 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -15,40 +15,19 @@ */ // -// Lowers string.const etc. into imports. That is, +// Utilities for lowering strings into simpler things. // -// (local.set $x (string.const "foo.bar")) +// StringGathering collects all string.const operations and stores them in +// globals, avoiding them appearing in code that can run more than once (which +// can have overhead in VMs). // -// will turn into -// -// (import "string.const" "0" (global externref $string.const_foo_bar)) -// .. -// (local.set $x (global.get $string.const_foo.bar)) -// -// as well as a custom section "string.consts" containing a JSON string of an -// array with the string contents: -// -// ["foo.bar"] -// -// The expectation is that the host environment will read the custom section and -// then provide the imports, resulting in something like this: -// -// let stringConstsBuffer = -// WebAssembly.Module.customSections(module, 'string.consts'); -// let stringConstsString = -// new TextDecoder().decode(new Uint8Array(stringConstsBuffer, 'name')); -// WebAssembly.instantiate(module, { -// string.const: JSON.stringify(stringConstsString) // ["foo.bar"] -// }); -// -// After running this you may benefit from --simplify-globals and -// --reorder-globals. +// Building on that, an extended version of StringGathering will also replace +// those new globals with imported globals of type externref, for use with the +// string imports proposal. String operations will likewise need to be lowered. // #include "ir/module-utils.h" #include "ir/names.h" -#include "ir/find_all.h" -#include "ir/type-updating.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" @@ -67,9 +46,8 @@ struct StringLowering : public Pass { // Main entry point. void run(Module* module) override { processModule(module); - addImports(module); + addGlobals(module); replaceStrings(module); - updateTypes(module); } // Scan the entire wasm to find the relevant strings to populate our global @@ -125,21 +103,19 @@ struct StringLowering : public Pass { } } - // For each string index, the name of the import that replaces it. - std::vector importNames; + // For each string index, the name of the global that replaces it. + std::vector globalNames; - Type nnexternref = Type(HeapType::ext, NonNullable); + Type nnstringref = Type(HeapType::string, NonNullable); - void addImports(Module* module) { - // TOOD: imports should be in front, so they can be used from globals, or - // run sort-globals internally.. + void addGlobals(Module* module) { + // TODO: Add them frist Builder builder(*module); - for (Index i = 0; i < strings.size(); i++) { - auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(strings[i].str)); - importNames.push_back(name); - auto global = builder.makeGlobal(name, nnexternref, nullptr, Builder::Immutable); - global->module = "string.const"; - global->base = std::to_string(i); + for (auto& string : strings) { + auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(string.str)); + globalNames.push_back(name); + auto* stringConst = builder.makeStringConst(string); + auto global = builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable); module->addGlobal(std::move(global)); } } @@ -148,16 +124,10 @@ struct StringLowering : public Pass { Builder builder(*module); for (auto** stringPtr : stringPtrs) { auto* stringConst = (*stringPtr)->cast(); - auto importName = importNames[stringIndexes[stringConst->string]]; - *stringPtr = builder.makeGlobalGet(importName, nnexternref); + auto importName = globalNames[stringIndexes[stringConst->string]]; + *stringPtr = builder.makeGlobalGet(importName, nnstringref); } } - - void updateTypes(Module* module) { - TypeMapper::TypeUpdates updates; - updates[HeapType::string] = HeapType::ext; - TypeMapper(*module, updates).map(); - } }; Pass* createStringLoweringPass() { return new StringLowering(); } From ba7518266602bc4cd188d49793824cda3258f471 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 13:36:32 -0800 Subject: [PATCH 13/42] work --- src/passes/StringLowering.cpp | 12 +++++++++- test/lit/passes/string-lowering.wast | 34 ++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index a1484533056..b2d89797e9e 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -26,6 +26,8 @@ // string imports proposal. String operations will likewise need to be lowered. // +#include + #include "ir/module-utils.h" #include "ir/names.h" #include "pass.h" @@ -109,15 +111,23 @@ struct StringLowering : public Pass { Type nnstringref = Type(HeapType::string, NonNullable); void addGlobals(Module* module) { - // TODO: Add them frist + // Note all the new names we create for the sorting later. + std::unordered_set newNames; + Builder builder(*module); for (auto& string : strings) { auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(string.str)); globalNames.push_back(name); + newNames.insert(name); auto* stringConst = builder.makeStringConst(string); auto global = builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable); module->addGlobal(std::move(global)); } + + // Sort our new globals to the start, as others may use them. + std::stable_sort(module->globals.begin(), module->globals.end(), [&](const std::unique_ptr& a, const std::unique_ptr& b) { + return newNames.count(a->name) && !newNames.count(b->name); + }); } void replaceStrings(Module* module) { diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 59d91e6745b..0cebc95c329 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -3,10 +3,30 @@ ;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s (module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (global $string.const_bar (ref string) (string.const "bar")) + + ;; CHECK: (global $string.const_baz (ref string) (string.const "baz")) + + ;; CHECK: (global $string.const_foo (ref string) (string.const "foo")) + + ;; CHECK: (global $string.const_other (ref string) (string.const "other")) + + ;; CHECK: (global $global (ref string) (global.get $string.const_foo)) (global $global (ref string) (string.const "foo")) + ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) (global $global2 (ref null string) (string.const "bar")) + ;; CHECK: (func $a (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_bar) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_baz) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $a (drop (string.const "bar") @@ -16,6 +36,20 @@ ) ) + ;; CHECK: (func $b (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_bar) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_other) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $global) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $global2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $b ;; TODO params and results and tuples and what not (drop (string.const "bar") From 5adfed42aa45866782e047a7b0411c967abf3ec1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 13:41:26 -0800 Subject: [PATCH 14/42] ment --- src/passes/StringLowering.cpp | 16 ++++------------ test/lit/passes/string-lowering.wast | 3 +++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index b2d89797e9e..6b920643cbc 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -55,21 +55,13 @@ struct StringLowering : public Pass { // Scan the entire wasm to find the relevant strings to populate our global // data structures. void processModule(Module* module) { - struct StringWalker : public PostWalker> { + struct StringWalker : public PostWalker { StringPtrs& stringPtrs; StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {} - void visitExpression(Expression* curr) { - if (curr->is()) { - stringPtrs.push_back(getCurrentPointer()); - } - - // We must replace all string heap types with extern. Do so now in this - // operation to avoid needing a second pass. - if (curr->type.isRef() && curr->type.getHeapType() == HeapType::string) { -// curr->type = Type(HeapType::ext, curr->type.getNullability()); - } + void visitStringConst(StringConst* curr) { + stringPtrs.push_back(getCurrentPointer()); } }; @@ -95,7 +87,7 @@ struct StringLowering : public Pass { } // Generate the indexes from the combined set of necessary strings, which we - // sort for determinism. + // sort for determinism (alphabetically). for (auto& string : stringSet) { strings.push_back(string); } diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 0cebc95c329..86d33797047 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -2,6 +2,9 @@ ;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s +;; All the strings should be collected into globals and used from there. They +;; should also be sorted deterministically (alphabetically). + (module ;; CHECK: (type $0 (func)) From 2c7779ab91d80d646f5f1a866f732eb086fb8085 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 13:56:37 -0800 Subject: [PATCH 15/42] work --- src/passes/StringLowering.cpp | 49 ++++++++++++++++++++++++++-- test/lit/passes/string-lowering.wast | 2 ++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 6b920643cbc..d244321e36f 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -102,14 +102,56 @@ struct StringLowering : public Pass { Type nnstringref = Type(HeapType::string, NonNullable); + // Existing globals already in the form we emit can be reused. That is, if + // we see + // + // (global $foo (ref null string) (string.const ..)) + // + // then we can just use that as the global for that string. This avoids + // repeated executions of the pass adding more and more globals. + // + // Note that we don't note these in newNames: They are already in the right + // sorted position, before any uses, as we use the first of them for each + // string. Only actually new names need sorting. + // + // Any time we reuse a global, we must not modify its body (or else we'd + // replace the global that all others read from); we note them here and + // avoid them in replaceStrings later to avoid such trampling. + std::unordered_set stringPtrsToPreserve; + void addGlobals(Module* module) { // Note all the new names we create for the sorting later. std::unordered_set newNames; + // Each string will get a global. + globalNames.resize(strings.size()); + + // Find globals to reuse (see comment on stringPtrsToPreserve for context). + for (auto& global : module->globals) { + if (global->type == nnstringref && !global->imported()) { + if (auto* stringConst = global->init->dynCast()) { + auto stringIndex = stringIndexes[stringConst->string]; + auto& globalName = globalNames[stringIndex]; + if (!globalName.is()) { + // This is the first global for this string, use it. + globalName = global->name; + stringPtrsToPreserve.insert(&global->init); + } + } + } + } + Builder builder(*module); - for (auto& string : strings) { + for (Index i = 0; i < strings.size(); i++) { + auto& globalName = globalNames[i]; + if (globalName.is()) { + // We are reusing a global for this one. + continue; + } + + auto& string = strings[i]; auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(string.str)); - globalNames.push_back(name); + globalName = name; newNames.insert(name); auto* stringConst = builder.makeStringConst(string); auto global = builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable); @@ -125,6 +167,9 @@ struct StringLowering : public Pass { void replaceStrings(Module* module) { Builder builder(*module); for (auto** stringPtr : stringPtrs) { + if (stringPtrsToPreserve.count(stringPtr)) { + continue; + } auto* stringConst = (*stringPtr)->cast(); auto importName = globalNames[stringIndexes[stringConst->string]]; *stringPtr = builder.makeGlobalGet(importName, nnstringref); diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 86d33797047..522674eb244 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -68,3 +68,5 @@ ) ) ) + +;; test reusing existing globalses From 9b70621b157ec7ef943d8393f8568f337e2e211f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:01:29 -0800 Subject: [PATCH 16/42] test --- test/lit/passes/string-lowering.wast | 39 +++++++++++++++++++++------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 522674eb244..04e6121d4c0 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -6,17 +6,16 @@ ;; should also be sorted deterministically (alphabetically). (module + ;; Note that $global will be reused: no new global will be added for "foo". + ;; $global2 almost can, but it has the wrong type, so it won't. + ;; CHECK: (type $0 (func)) ;; CHECK: (global $string.const_bar (ref string) (string.const "bar")) - ;; CHECK: (global $string.const_baz (ref string) (string.const "baz")) - - ;; CHECK: (global $string.const_foo (ref string) (string.const "foo")) - ;; CHECK: (global $string.const_other (ref string) (string.const "other")) - ;; CHECK: (global $global (ref string) (global.get $string.const_foo)) + ;; CHECK: (global $global (ref string) (string.const "foo")) (global $global (ref string) (string.const "foo")) ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) @@ -27,7 +26,7 @@ ;; CHECK-NEXT: (global.get $string.const_bar) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (global.get $string.const_baz) + ;; CHECK-NEXT: (global.get $global) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $a @@ -35,7 +34,7 @@ (string.const "bar") ) (drop - (string.const "baz") + (string.const "foo") ) ) @@ -53,7 +52,7 @@ ;; CHECK-NEXT: (global.get $global2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - (func $b ;; TODO params and results and tuples and what not + (func $b (drop (string.const "bar") ) @@ -69,4 +68,26 @@ ) ) -;; test reusing existing globalses +;; Multiple possible reusable globals. Also test ignoring of imports. +(module + ;; CHECK: (import "a" "b" (global $import (ref string))) + (import "a" "b" (global $import (ref string))) + + ;; CHECK: (global $global1 (ref string) (string.const "foo")) + (global $global1 (ref string) (string.const "foo")) + + ;; CHECK: (global $global2 (ref string) (global.get $global1)) + (global $global2 (ref string) (string.const "foo")) + + ;; CHECK: (global $global3 (ref string) (global.get $global1)) + (global $global3 (ref string) (string.const "foo")) + + ;; CHECK: (global $global4 (ref string) (string.const "bar")) + (global $global4 (ref string) (string.const "bar")) + + ;; CHECK: (global $global5 (ref string) (global.get $global4)) + (global $global5 (ref string) (string.const "bar")) + + ;; CHECK: (global $global6 (ref string) (global.get $global4)) + (global $global6 (ref string) (string.const "bar")) +) From d20f8d52ccd0da9cee06d65ad5e77088af9f2a67 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:01:37 -0800 Subject: [PATCH 17/42] format --- src/passes/StringLowering.cpp | 15 ++++++++++----- src/passes/pass.cpp | 5 +++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index d244321e36f..ad377369830 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -150,18 +150,23 @@ struct StringLowering : public Pass { } auto& string = strings[i]; - auto name = Names::getValidGlobalName(*module, std::string("string.const_") + std::string(string.str)); + auto name = Names::getValidGlobalName( + *module, std::string("string.const_") + std::string(string.str)); globalName = name; newNames.insert(name); auto* stringConst = builder.makeStringConst(string); - auto global = builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable); + auto global = + builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable); module->addGlobal(std::move(global)); } // Sort our new globals to the start, as others may use them. - std::stable_sort(module->globals.begin(), module->globals.end(), [&](const std::unique_ptr& a, const std::unique_ptr& b) { - return newNames.count(a->name) && !newNames.count(b->name); - }); + std::stable_sort( + module->globals.begin(), + module->globals.end(), + [&](const std::unique_ptr& a, const std::unique_ptr& b) { + return newNames.count(a->name) && !newNames.count(b->name); + }); } void replaceStrings(Module* module) { diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 04e105f0188..18644ac69fb 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -475,8 +475,9 @@ void PassRegistry::registerPasses() { "ssa-nomerge", "ssa-ify variables so that they have a single assignment, ignoring merges", createSSAifyNoMergePass); - registerPass( - "string-lowering", "lower wasm strings to imports", createStringLoweringPass); + registerPass("string-lowering", + "lower wasm strings to imports", + createStringLoweringPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", From c2528310dc322cad7fba3f665aa7252a2721235b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:04:04 -0800 Subject: [PATCH 18/42] rename --- src/passes/StringLowering.cpp | 4 ++-- src/passes/pass.cpp | 11 ++++++++--- src/passes/passes.h | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index ad377369830..29f489fcc73 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -36,7 +36,7 @@ namespace wasm { -struct StringLowering : public Pass { +struct StringGathering : public Pass { // All the strings we found in the module, and a reverse mapping. std::vector strings; std::unordered_map stringIndexes; @@ -182,6 +182,6 @@ struct StringLowering : public Pass { } }; -Pass* createStringLoweringPass() { return new StringLowering(); } +Pass* createStringGatheringPass() { return new StringGathering(); } } // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 18644ac69fb..8a52e178ed8 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -475,9 +475,9 @@ void PassRegistry::registerPasses() { "ssa-nomerge", "ssa-ify variables so that they have a single assignment, ignoring merges", createSSAifyNoMergePass); - registerPass("string-lowering", - "lower wasm strings to imports", - createStringLoweringPass); + registerPass("string-gathering", + "gathers wasm strings to globals", + createStringGatheringPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", @@ -713,6 +713,11 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() { addIfNoDWARFIssues("simplify-globals"); } addIfNoDWARFIssues("remove-unused-module-elements"); + if (options.optimizeLevel >= 2 && wasm->features.hasStrings()) { + // Gather strings to globals right before reorder-globals, which will then + // sort them properly. + addIfNoDWARFIssues("string-gathering"); + } if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) { addIfNoDWARFIssues("reorder-globals"); } diff --git a/src/passes/passes.h b/src/passes/passes.h index 950e30aa7b9..460dbace243 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -153,7 +153,7 @@ Pass* createSimplifyLocalsNoTeePass(); Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); -Pass* createStringLoweringPass(); +Pass* createStringGatheringPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); From 33978991230d997f461b0cc4d19ed4ad55aea139 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:04:14 -0800 Subject: [PATCH 19/42] rename --- test/lit/passes/string-lowering.wast | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 04e6121d4c0..c7c656d549e 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -1,6 +1,6 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --string-gathering -all -S -o - | filecheck %s ;; All the strings should be collected into globals and used from there. They ;; should also be sorted deterministically (alphabetically). From 883eba45bfe9ddfa53e7001fc0797238bb8c407b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:04:34 -0800 Subject: [PATCH 20/42] rename --- test/lit/passes/{string-lowering.wast => string-gathering.wast} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/lit/passes/{string-lowering.wast => string-gathering.wast} (100%) diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-gathering.wast similarity index 100% rename from test/lit/passes/string-lowering.wast rename to test/lit/passes/string-gathering.wast From d93ca57d11cfe53eb93ad6b01d7710432c7efb47 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:13:41 -0800 Subject: [PATCH 21/42] work --- src/passes/StringLowering.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 29f489fcc73..4ae46914b21 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -24,6 +24,7 @@ // Building on that, an extended version of StringGathering will also replace // those new globals with imported globals of type externref, for use with the // string imports proposal. String operations will likewise need to be lowered. +// TODO // #include From 07ce36a22522a8a337d8acebffe68fd6b597c092 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:14:11 -0800 Subject: [PATCH 22/42] work --- test/lit/passes/string-gathering.wast | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast index c7c656d549e..21fe358a3c6 100644 --- a/test/lit/passes/string-gathering.wast +++ b/test/lit/passes/string-gathering.wast @@ -59,6 +59,9 @@ (drop (string.const "other") ) + ;; Existing global.gets are not modified (but after this pass, + ;; SimplifyGlobals could help; though in practice the globals would have + ;; been propagated here anyhow). (drop (global.get $global) ) From 4670f8edb66d7e366b5dfd8adb0d53a87d3400c1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 14:32:21 -0800 Subject: [PATCH 23/42] update --- test/lit/help/wasm-opt.test | 2 + test/lit/help/wasm2js.test | 2 + test/lit/passes/simplify-globals-strings.wast | 65 ------------------- 3 files changed, 4 insertions(+), 65 deletions(-) delete mode 100644 test/lit/passes/simplify-globals-strings.wast diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 07d218007d7..6b19ba52e67 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -467,6 +467,8 @@ ;; CHECK-NEXT: --stack-check enforce limits on llvm's ;; CHECK-NEXT: __stack_pointer global ;; CHECK-NEXT: +;; CHECK-NEXT: --string-gathering gathers wasm strings to globals +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index e81d07718ea..a5f2e6c30be 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -426,6 +426,8 @@ ;; CHECK-NEXT: --stack-check enforce limits on llvm's ;; CHECK-NEXT: __stack_pointer global ;; CHECK-NEXT: +;; CHECK-NEXT: --string-gathering gathers wasm strings to globals +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/passes/simplify-globals-strings.wast b/test/lit/passes/simplify-globals-strings.wast deleted file mode 100644 index 813e2964ce4..00000000000 --- a/test/lit/passes/simplify-globals-strings.wast +++ /dev/null @@ -1,65 +0,0 @@ -;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. -;; NOTE: This test was ported using port_passes_tests_to_lit.py and could be cleaned up. - -;; We do not "inline" strings from globals, as that might cause more -;; allocations to happen. TODO if VMs optimize that, remove this - -;; RUN: foreach %s %t wasm-opt --simplify-globals -all -S -o - | filecheck %s - -;; Also test with -O3 --gufa to see that no other pass does this kind of thing, -;; as extra coverage. - -;; RUN: foreach %s %t wasm-opt -O3 --gufa -all -S -o - | filecheck %s --check-prefix=O3GUF - -(module - ;; CHECK: (type $0 (func (result anyref))) - - ;; CHECK: (global $string stringref (string.const "one")) - ;; O3GUF: (type $0 (func (result anyref))) - - ;; O3GUF: (global $string (ref string) (string.const "one")) - (global $string stringref (string.const "one")) - - ;; CHECK: (global $string-mut (mut stringref) (string.const "two")) - ;; O3GUF: (global $string-mut (mut (ref string)) (string.const "two")) - (global $string-mut (mut stringref) (string.const "two")) - - ;; CHECK: (export "global" (func $global)) - ;; O3GUF: (export "global" (func $global)) - (export "global" (func $global)) - - ;; CHECK: (export "written" (func $written)) - ;; O3GUF: (export "written" (func $written)) - (export "written" (func $written)) - - ;; CHECK: (func $global (type $0) (result anyref) - ;; CHECK-NEXT: (global.get $string) - ;; CHECK-NEXT: ) - ;; O3GUF: (func $global (type $0) (result anyref) - ;; O3GUF-NEXT: (global.get $string) - ;; O3GUF-NEXT: ) - (func $global (result anyref) - ;; This should not turn into "one". - (global.get $string) - ) - - ;; CHECK: (func $written (type $0) (result anyref) - ;; CHECK-NEXT: (global.set $string-mut - ;; CHECK-NEXT: (string.const "three") - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (global.get $string-mut) - ;; CHECK-NEXT: ) - ;; O3GUF: (func $written (type $0) (result anyref) - ;; O3GUF-NEXT: (global.set $string-mut - ;; O3GUF-NEXT: (string.const "three") - ;; O3GUF-NEXT: ) - ;; O3GUF-NEXT: (global.get $string-mut) - ;; O3GUF-NEXT: ) - (func $written (result anyref) - (global.set $string-mut - (string.const "three") - ) - ;; This should not turn into "three". - (global.get $string-mut) - ) -) From b8fb866418d8f4d846c316d6f15178630e282cc8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 15:53:21 -0800 Subject: [PATCH 24/42] work --- src/passes/StringLowering.cpp | 46 ++++++++++++++++++++++++++++++++--- src/passes/pass.cpp | 3 +++ src/passes/passes.h | 1 + 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 4ae46914b21..172c99b3b03 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -21,10 +21,9 @@ // globals, avoiding them appearing in code that can run more than once (which // can have overhead in VMs). // -// Building on that, an extended version of StringGathering will also replace -// those new globals with imported globals of type externref, for use with the -// string imports proposal. String operations will likewise need to be lowered. -// TODO +// StringLowering does the same, and also replaces those new globals with +// imported globals of type externref, for use with the string imports proposal. +// String operations will likewise need to be lowered. TODO // #include @@ -32,6 +31,7 @@ #include "ir/module-utils.h" #include "ir/names.h" #include "pass.h" +#include "support/json.h" #include "wasm-builder.h" #include "wasm.h" @@ -183,6 +183,44 @@ struct StringGathering : public Pass { } }; +struct StringLowering : public StringGathering { + void run(Module* module) override { + if (!module->features.has(FeatureSet::Strings)) { + return; + } + + // First, run the gathering operation so all string.consts are in one place. + StringGathering::run(module); + + // Lower the string.const globals into imports. + makeImports(module); + + // Now that no string contents remain, disable the feature. + module->features.disable(FeatureSet::Strings); + } + + void makeImports(Module* module) { + Index importIndex = 0; + json::value stringArray; + stringArray.setArray(); + std::vector importedStrings; + for (auto& global : module->globals) { + if (global->init) { + if (auto* c = global->init->dynCast()) { + global->module = "string.const"; + global->base = std::to_string(importIndex); + importIndex++; + global->init = nullptr; + stringArray.push_back(c->string.str); + } + } + } + + stringArray.stringify(std::cout); + } +} + Pass* createStringGatheringPass() { return new StringGathering(); } +Pass* createStringLoweringPass() { return new StringLowering(); } } // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 8a52e178ed8..9bcf3fea0f8 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -478,6 +478,9 @@ void PassRegistry::registerPasses() { registerPass("string-gathering", "gathers wasm strings to globals", createStringGatheringPass); + registerPass("string-lowering", + "lowers wasm strings and operations to imports", + createStringLoweringPass); registerPass( "strip", "deprecated; same as strip-debug", createStripDebugPass); registerPass("stack-check", diff --git a/src/passes/passes.h b/src/passes/passes.h index 460dbace243..15e6dfc15f0 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -154,6 +154,7 @@ Pass* createSimplifyLocalsNoStructurePass(); Pass* createSimplifyLocalsNoTeeNoStructurePass(); Pass* createStackCheckPass(); Pass* createStringGatheringPass(); +Pass* createStringLoweringPass(); Pass* createStripDebugPass(); Pass* createStripDWARFPass(); Pass* createStripProducersPass(); From 3bb6e3d53de47a1198be2a02a14ff3437d5dff16 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:09:00 -0800 Subject: [PATCH 25/42] work --- src/passes/StringLowering.cpp | 8 ++++--- src/support/json.cpp | 44 +++++++++++++++++++++++++++++++++++ src/support/json.h | 5 ++++ 3 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 src/support/json.cpp diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 172c99b3b03..1c3fb3bf874 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -201,7 +201,7 @@ struct StringLowering : public StringGathering { void makeImports(Module* module) { Index importIndex = 0; - json::value stringArray; + json::Value stringArray; stringArray.setArray(); std::vector importedStrings; for (auto& global : module->globals) { @@ -211,14 +211,16 @@ struct StringLowering : public StringGathering { global->base = std::to_string(importIndex); importIndex++; global->init = nullptr; - stringArray.push_back(c->string.str); + + auto str = json::Value::make(std::string(c->string.str).c_str()); + stringArray.push_back(str); } } } stringArray.stringify(std::cout); } -} +}; Pass* createStringGatheringPass() { return new StringGathering(); } Pass* createStringLoweringPass() { return new StringLowering(); } diff --git a/src/support/json.cpp b/src/support/json.cpp new file mode 100644 index 00000000000..82929279dbb --- /dev/null +++ b/src/support/json.cpp @@ -0,0 +1,44 @@ +/* + * Copyright 2024 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "support/json.h" + +namespace json { + +void stringify(std::ostream& os, bool pretty) { + if (isString()) { + // TODO: escaping + os << '"' << getCString() << '"'; + } else if (isArray()) { + os << '['; + auto first = true; + for (auto& item : getArray()) { + if (first) { + first = false; + } else { + // TODO prety whitespace + os << ','; + } + item.stringify(os, pretty); + } + os << ']'; + } else { + WASM_UNREACHABLE("TODO: stringify all of JSON"); + } +} + +} + diff --git a/src/support/json.h b/src/support/json.h index bc880f382d8..c6b9829cb38 100644 --- a/src/support/json.h +++ b/src/support/json.h @@ -54,6 +54,11 @@ struct Value { Ref& operator[](IString x) { return (*this->get())[x]; } }; + template + static Ref make(T t) { + return Ref(new Value(t)); + } + enum Type { String = 0, Number = 1, From 3a5ad662707b84575ef3ce9ec1cfbe8900061dc0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:09:16 -0800 Subject: [PATCH 26/42] work --- src/support/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index 87f835c7b03..13bf463e42c 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -8,6 +8,7 @@ set(support_SOURCES dfa_minimization.cpp file.cpp istring.cpp + json.cpp path.cpp safe_integer.cpp threads.cpp From e4d0b77e4c15363f4f720932f0a4dea82f9a8902 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:09:50 -0800 Subject: [PATCH 27/42] work --- src/support/json.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/support/json.cpp b/src/support/json.cpp index 82929279dbb..2bbae335653 100644 --- a/src/support/json.cpp +++ b/src/support/json.cpp @@ -18,7 +18,7 @@ namespace json { -void stringify(std::ostream& os, bool pretty) { +void Value::stringify(std::ostream& os, bool pretty) { if (isString()) { // TODO: escaping os << '"' << getCString() << '"'; @@ -32,7 +32,7 @@ void stringify(std::ostream& os, bool pretty) { // TODO prety whitespace os << ','; } - item.stringify(os, pretty); + item->stringify(os, pretty); } os << ']'; } else { From 90fbb2517a09bb816d3fe8c617c0539edc0adab6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:10:48 -0800 Subject: [PATCH 28/42] work --- test/lit/passes/string-lowering.wast | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 test/lit/passes/string-lowering.wast diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast new file mode 100644 index 00000000000..10c970561c8 --- /dev/null +++ b/test/lit/passes/string-lowering.wast @@ -0,0 +1,17 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s + +(module + (func $a + (drop + (string.const "foo") + ) + (drop + (string.const "bar") + ) + (drop + (string.const "foo") + ) + ) +) From 1a3dc6311bed07ed2311cb0d8f36558053844aaa Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:18:53 -0800 Subject: [PATCH 29/42] work --- src/passes/StringLowering.cpp | 7 ++++++- test/lit/passes/string-lowering.wast | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 1c3fb3bf874..0322f2ab10f 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -218,7 +218,12 @@ struct StringLowering : public StringGathering { } } - stringArray.stringify(std::cout); + // Add a custom section with the JSON. + std::stringstream stream; + stringArray.stringify(stream); + auto str = stream.str(); + std::vector vec(str.begin(), str.end()); + module->customSections.emplace_back(CustomSection{"string.consts", std::move(vec)}); } }; diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 10c970561c8..44e46a6e3ff 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -3,7 +3,25 @@ ;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s (module - (func $a + ;; CHECK: (type $0 (func)) + + ;; CHECK: (import "string.const" "0" (global $string.const_bar (ref string))) + + ;; CHECK: (import "string.const" "1" (global $string.const_foo (ref string))) + + ;; CHECK: (func $consts (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_foo) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_bar) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (global.get $string.const_foo) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $consts + ;; These consts will become global.gets of new imported globals. (drop (string.const "foo") ) From 8fc15f1dfcd4576cd0170ad7cc0ca75e636743d9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:19:48 -0800 Subject: [PATCH 30/42] work --- src/passes/StringLowering.cpp | 2 ++ src/support/json.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 0322f2ab10f..dab63efee13 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -221,9 +221,11 @@ struct StringLowering : public StringGathering { // Add a custom section with the JSON. std::stringstream stream; stringArray.stringify(stream); + stringArray.stringify(std::cerr); auto str = stream.str(); std::vector vec(str.begin(), str.end()); module->customSections.emplace_back(CustomSection{"string.consts", std::move(vec)}); + } }; diff --git a/src/support/json.cpp b/src/support/json.cpp index 2bbae335653..125b1c22a4e 100644 --- a/src/support/json.cpp +++ b/src/support/json.cpp @@ -29,7 +29,7 @@ void Value::stringify(std::ostream& os, bool pretty) { if (first) { first = false; } else { - // TODO prety whitespace + // TODO pretty whitespace os << ','; } item->stringify(os, pretty); From 694ef94d6156b3551142cbd2c18a8476b59e66cc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:29:46 -0800 Subject: [PATCH 31/42] test --- test/lit/passes/string-gathering.wast | 4 ++++ test/lit/passes/string-lowering.wast | 26 +++++++------------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast index 21fe358a3c6..3e414affe31 100644 --- a/test/lit/passes/string-gathering.wast +++ b/test/lit/passes/string-gathering.wast @@ -1,9 +1,13 @@ ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. ;; RUN: foreach %s %t wasm-opt --string-gathering -all -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s --check-prefix=LOWER ;; All the strings should be collected into globals and used from there. They ;; should also be sorted deterministically (alphabetically). +;; +;; LOWER also lowers away strings entirely, leaving only imports and a custom +;; section (that part is tested in string-lowering.wast). (module ;; Note that $global will be reused: no new global will be added for "foo". diff --git a/test/lit/passes/string-lowering.wast b/test/lit/passes/string-lowering.wast index 44e46a6e3ff..628092d1c4e 100644 --- a/test/lit/passes/string-lowering.wast +++ b/test/lit/passes/string-lowering.wast @@ -1,27 +1,11 @@ -;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. +;; This file checks the custom section that --string-lowering adds. The other +;; operations are tested in string-gathering.wast (which is auto-updated, unlike +;; this which is manual). ;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s (module - ;; CHECK: (type $0 (func)) - - ;; CHECK: (import "string.const" "0" (global $string.const_bar (ref string))) - - ;; CHECK: (import "string.const" "1" (global $string.const_foo (ref string))) - - ;; CHECK: (func $consts (type $0) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (global.get $string.const_foo) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (global.get $string.const_bar) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (global.get $string.const_foo) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) (func $consts - ;; These consts will become global.gets of new imported globals. (drop (string.const "foo") ) @@ -33,3 +17,7 @@ ) ) ) + +;; The custom section should contain foo and bar, and foo only once. +;; CHECK: custom section "string.consts", size 13, contents: "[\"bar\",\"foo\"]" + From 4ea57ee550a115ebae203d7fedcdf5e559c3c523 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:31:50 -0800 Subject: [PATCH 32/42] test --- src/passes/StringLowering.cpp | 4 +-- test/lit/passes/string-gathering.wast | 40 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index dab63efee13..8f87681b018 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -195,8 +195,8 @@ struct StringLowering : public StringGathering { // Lower the string.const globals into imports. makeImports(module); - // Now that no string contents remain, disable the feature. - module->features.disable(FeatureSet::Strings); + // TODO: disable the feature here after we lower everything away. + // module->features.disable(FeatureSet::Strings); } void makeImports(Module* module) { diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast index 3e414affe31..918010e49a0 100644 --- a/test/lit/passes/string-gathering.wast +++ b/test/lit/passes/string-gathering.wast @@ -23,6 +23,15 @@ (global $global (ref string) (string.const "foo")) ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) + ;; LOWER: (type $0 (func)) + + ;; LOWER: (import "string.const" "0" (global $string.const_bar (ref string))) + + ;; LOWER: (import "string.const" "1" (global $string.const_other (ref string))) + + ;; LOWER: (import "string.const" "2" (global $global (ref string))) + + ;; LOWER: (global $global2 stringref (global.get $string.const_bar)) (global $global2 (ref null string) (string.const "bar")) ;; CHECK: (func $a (type $0) @@ -33,6 +42,14 @@ ;; CHECK-NEXT: (global.get $global) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; LOWER: (func $a (type $0) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $string.const_bar) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $global) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: ) (func $a (drop (string.const "bar") @@ -56,6 +73,20 @@ ;; CHECK-NEXT: (global.get $global2) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; LOWER: (func $b (type $0) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $string.const_bar) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $string.const_other) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $global) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: (drop + ;; LOWER-NEXT: (global.get $global2) + ;; LOWER-NEXT: ) + ;; LOWER-NEXT: ) (func $b (drop (string.const "bar") @@ -78,23 +109,32 @@ ;; Multiple possible reusable globals. Also test ignoring of imports. (module ;; CHECK: (import "a" "b" (global $import (ref string))) + ;; LOWER: (import "a" "b" (global $import (ref string))) (import "a" "b" (global $import (ref string))) ;; CHECK: (global $global1 (ref string) (string.const "foo")) (global $global1 (ref string) (string.const "foo")) ;; CHECK: (global $global2 (ref string) (global.get $global1)) + ;; LOWER: (import "string.const" "0" (global $global1 (ref string))) + + ;; LOWER: (import "string.const" "1" (global $global4 (ref string))) + + ;; LOWER: (global $global2 (ref string) (global.get $global1)) (global $global2 (ref string) (string.const "foo")) ;; CHECK: (global $global3 (ref string) (global.get $global1)) + ;; LOWER: (global $global3 (ref string) (global.get $global1)) (global $global3 (ref string) (string.const "foo")) ;; CHECK: (global $global4 (ref string) (string.const "bar")) (global $global4 (ref string) (string.const "bar")) ;; CHECK: (global $global5 (ref string) (global.get $global4)) + ;; LOWER: (global $global5 (ref string) (global.get $global4)) (global $global5 (ref string) (string.const "bar")) ;; CHECK: (global $global6 (ref string) (global.get $global4)) + ;; LOWER: (global $global6 (ref string) (global.get $global4)) (global $global6 (ref string) (string.const "bar")) ) From 86517b0aa9b4a16d34e3e20b349b1710e32e0067 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 30 Jan 2024 16:32:05 -0800 Subject: [PATCH 33/42] format --- src/passes/StringLowering.cpp | 4 ++-- src/support/json.cpp | 3 +-- src/support/json.h | 5 +---- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 8f87681b018..68bacfb0be9 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -224,8 +224,8 @@ struct StringLowering : public StringGathering { stringArray.stringify(std::cerr); auto str = stream.str(); std::vector vec(str.begin(), str.end()); - module->customSections.emplace_back(CustomSection{"string.consts", std::move(vec)}); - + module->customSections.emplace_back( + CustomSection{"string.consts", std::move(vec)}); } }; diff --git a/src/support/json.cpp b/src/support/json.cpp index 125b1c22a4e..d43ac03239a 100644 --- a/src/support/json.cpp +++ b/src/support/json.cpp @@ -40,5 +40,4 @@ void Value::stringify(std::ostream& os, bool pretty) { } } -} - +} // namespace json diff --git a/src/support/json.h b/src/support/json.h index c6b9829cb38..c005543f60e 100644 --- a/src/support/json.h +++ b/src/support/json.h @@ -54,10 +54,7 @@ struct Value { Ref& operator[](IString x) { return (*this->get())[x]; } }; - template - static Ref make(T t) { - return Ref(new Value(t)); - } + template static Ref make(T t) { return Ref(new Value(t)); } enum Type { String = 0, From b2895cad40ffcc02382c67480aca5399225e3865 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 31 Jan 2024 12:46:05 -0800 Subject: [PATCH 34/42] yolo --- src/passes/StringLowering.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 68bacfb0be9..78c629997ea 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -30,6 +30,7 @@ #include "ir/module-utils.h" #include "ir/names.h" +#include "ir/type-updating.h" #include "pass.h" #include "support/json.h" #include "wasm-builder.h" @@ -195,8 +196,11 @@ struct StringLowering : public StringGathering { // Lower the string.const globals into imports. makeImports(module); - // TODO: disable the feature here after we lower everything away. - // module->features.disable(FeatureSet::Strings); + // Remove all HeapType::string etc. in favor of externref. + updateTypes(module); + + // Disable the feature here after we lowered everything away. + module->features.disable(FeatureSet::Strings); } void makeImports(Module* module) { @@ -227,6 +231,12 @@ struct StringLowering : public StringGathering { module->customSections.emplace_back( CustomSection{"string.consts", std::move(vec)}); } + + void updateTypes(Module* module) { + TypeMapper::TypeUpdates updates; + updates[HeapType::string] = HeapType::ext; + TypeMapper(*module, updates).map(); + } }; Pass* createStringGatheringPass() { return new StringGathering(); } From ae3871ee92b8791cb928f75dd6c60447b1f92de6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 31 Jan 2024 12:47:42 -0800 Subject: [PATCH 35/42] Update src/passes/StringLowering.cpp Co-authored-by: Thomas Lively --- src/passes/StringLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 4ae46914b21..4314e1b1395 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -106,7 +106,7 @@ struct StringGathering : public Pass { // Existing globals already in the form we emit can be reused. That is, if // we see // - // (global $foo (ref null string) (string.const ..)) + // (global $foo (ref string) (string.const ..)) // // then we can just use that as the global for that string. This avoids // repeated executions of the pass adding more and more globals. From a8cf56a473bf826a729c86c913564e35800d091b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 31 Jan 2024 12:51:32 -0800 Subject: [PATCH 36/42] feedback: simplify to avoid reverse index map --- src/passes/StringLowering.cpp | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 4314e1b1395..2bfb8f009d4 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -38,9 +38,8 @@ namespace wasm { struct StringGathering : public Pass { - // All the strings we found in the module, and a reverse mapping. + // All the strings we found in the module. std::vector strings; - std::unordered_map stringIndexes; // Pointers to all StringConsts, so that we can replace them. using StringPtrs = std::vector; @@ -87,19 +86,15 @@ struct StringGathering : public Pass { } } - // Generate the indexes from the combined set of necessary strings, which we - // sort for determinism (alphabetically). + // Sort the strings for determinism (alphabetically). for (auto& string : stringSet) { strings.push_back(string); } std::sort(strings.begin(), strings.end()); - for (Index i = 0; i < strings.size(); i++) { - stringIndexes[strings[i]] = i; - } } - // For each string index, the name of the global that replaces it. - std::vector globalNames; + // For each string, the name of the global that replaces it. + std::unordered_map stringToGlobalName; Type nnstringref = Type(HeapType::string, NonNullable); @@ -124,15 +119,11 @@ struct StringGathering : public Pass { // Note all the new names we create for the sorting later. std::unordered_set newNames; - // Each string will get a global. - globalNames.resize(strings.size()); - // Find globals to reuse (see comment on stringPtrsToPreserve for context). for (auto& global : module->globals) { if (global->type == nnstringref && !global->imported()) { if (auto* stringConst = global->init->dynCast()) { - auto stringIndex = stringIndexes[stringConst->string]; - auto& globalName = globalNames[stringIndex]; + auto& globalName = stringToGlobalName[stringConst->string]; if (!globalName.is()) { // This is the first global for this string, use it. globalName = global->name; @@ -144,7 +135,7 @@ struct StringGathering : public Pass { Builder builder(*module); for (Index i = 0; i < strings.size(); i++) { - auto& globalName = globalNames[i]; + auto& globalName = stringToGlobalName[strings[i]]; if (globalName.is()) { // We are reusing a global for this one. continue; @@ -177,7 +168,7 @@ struct StringGathering : public Pass { continue; } auto* stringConst = (*stringPtr)->cast(); - auto importName = globalNames[stringIndexes[stringConst->string]]; + auto importName = stringToGlobalName[stringConst->string]; *stringPtr = builder.makeGlobalGet(importName, nnstringref); } } From a2155ed7f268e05826502ce4714f3ea12a56c71b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 31 Jan 2024 12:54:38 -0800 Subject: [PATCH 37/42] feedback: fix name --- src/passes/StringLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 2bfb8f009d4..ce976ae5f56 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -168,8 +168,8 @@ struct StringGathering : public Pass { continue; } auto* stringConst = (*stringPtr)->cast(); - auto importName = stringToGlobalName[stringConst->string]; - *stringPtr = builder.makeGlobalGet(importName, nnstringref); + auto globalName = stringToGlobalName[stringConst->string]; + *stringPtr = builder.makeGlobalGet(globalName, nnstringref); } } }; From aad2278d287143eec3ddf19e0473f264ec3c51f5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 31 Jan 2024 13:52:20 -0800 Subject: [PATCH 38/42] Update src/passes/StringLowering.cpp Co-authored-by: Thomas Lively --- src/passes/StringLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index ce976ae5f56..e422dae7dc9 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -87,9 +87,7 @@ struct StringGathering : public Pass { } // Sort the strings for determinism (alphabetically). - for (auto& string : stringSet) { - strings.push_back(string); - } + strings = std::vector(stringSet.begin(), stringSet.end()); std::sort(strings.begin(), strings.end()); } From 9f6bff73eff42cfdffc7bb9c5270c6733bcd2c4a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 31 Jan 2024 13:55:05 -0800 Subject: [PATCH 39/42] comment --- src/passes/StringLowering.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index ce976ae5f56..8943e301c04 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -152,7 +152,10 @@ struct StringGathering : public Pass { module->addGlobal(std::move(global)); } - // Sort our new globals to the start, as others may use them. + // Sort our new globals to the start, as other global initializers may use + // them (and it would be invalid for us to appear after a use). This sort is + // a simple way to ensure that we validate, but it may be unoptimal (we + // leave that for reorder-globals). std::stable_sort( module->globals.begin(), module->globals.end(), From d5cc29ea23d49288ef4976b4e4015ea7973264c5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 31 Jan 2024 16:46:31 -0800 Subject: [PATCH 40/42] fix --- src/ir/type-updating.cpp | 3 --- src/passes/StringLowering.cpp | 1 - test/lit/passes/string-gathering.wast | 26 ++++++++++++++------------ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/ir/type-updating.cpp b/src/ir/type-updating.cpp index e9cf1859b8c..fb701f64aa2 100644 --- a/src/ir/type-updating.cpp +++ b/src/ir/type-updating.cpp @@ -176,9 +176,6 @@ void GlobalTypeRewriter::mapTypes(const TypeMap& oldToNewTypes) { } HeapType getNew(HeapType type) { - if (type.isBasic()) { - return type; - } auto iter = oldToNewTypes.find(type); if (iter != oldToNewTypes.end()) { return iter->second; diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 630c5013017..c57e528f019 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -217,7 +217,6 @@ struct StringLowering : public StringGathering { // Add a custom section with the JSON. std::stringstream stream; stringArray.stringify(stream); - stringArray.stringify(std::cerr); auto str = stream.str(); std::vector vec(str.begin(), str.end()); module->customSections.emplace_back( diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast index 918010e49a0..657858fc050 100644 --- a/test/lit/passes/string-gathering.wast +++ b/test/lit/passes/string-gathering.wast @@ -7,7 +7,9 @@ ;; should also be sorted deterministically (alphabetically). ;; ;; LOWER also lowers away strings entirely, leaving only imports and a custom -;; section (that part is tested in string-lowering.wast). +;; section (that part is tested in string-lowering.wast). It also removes all +;; uses of the string heap type, leaving extern instead for the imported +;; strings. (module ;; Note that $global will be reused: no new global will be added for "foo". @@ -25,13 +27,13 @@ ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) ;; LOWER: (type $0 (func)) - ;; LOWER: (import "string.const" "0" (global $string.const_bar (ref string))) + ;; LOWER: (import "string.const" "0" (global $string.const_bar (ref extern))) - ;; LOWER: (import "string.const" "1" (global $string.const_other (ref string))) + ;; LOWER: (import "string.const" "1" (global $string.const_other (ref extern))) - ;; LOWER: (import "string.const" "2" (global $global (ref string))) + ;; LOWER: (import "string.const" "2" (global $global (ref extern))) - ;; LOWER: (global $global2 stringref (global.get $string.const_bar)) + ;; LOWER: (global $global2 externref (global.get $string.const_bar)) (global $global2 (ref null string) (string.const "bar")) ;; CHECK: (func $a (type $0) @@ -109,32 +111,32 @@ ;; Multiple possible reusable globals. Also test ignoring of imports. (module ;; CHECK: (import "a" "b" (global $import (ref string))) - ;; LOWER: (import "a" "b" (global $import (ref string))) + ;; LOWER: (import "a" "b" (global $import (ref extern))) (import "a" "b" (global $import (ref string))) ;; CHECK: (global $global1 (ref string) (string.const "foo")) (global $global1 (ref string) (string.const "foo")) ;; CHECK: (global $global2 (ref string) (global.get $global1)) - ;; LOWER: (import "string.const" "0" (global $global1 (ref string))) + ;; LOWER: (import "string.const" "0" (global $global1 (ref extern))) - ;; LOWER: (import "string.const" "1" (global $global4 (ref string))) + ;; LOWER: (import "string.const" "1" (global $global4 (ref extern))) - ;; LOWER: (global $global2 (ref string) (global.get $global1)) + ;; LOWER: (global $global2 (ref extern) (global.get $global1)) (global $global2 (ref string) (string.const "foo")) ;; CHECK: (global $global3 (ref string) (global.get $global1)) - ;; LOWER: (global $global3 (ref string) (global.get $global1)) + ;; LOWER: (global $global3 (ref extern) (global.get $global1)) (global $global3 (ref string) (string.const "foo")) ;; CHECK: (global $global4 (ref string) (string.const "bar")) (global $global4 (ref string) (string.const "bar")) ;; CHECK: (global $global5 (ref string) (global.get $global4)) - ;; LOWER: (global $global5 (ref string) (global.get $global4)) + ;; LOWER: (global $global5 (ref extern) (global.get $global4)) (global $global5 (ref string) (string.const "bar")) ;; CHECK: (global $global6 (ref string) (global.get $global4)) - ;; LOWER: (global $global6 (ref string) (global.get $global4)) + ;; LOWER: (global $global6 (ref extern) (global.get $global4)) (global $global6 (ref string) (string.const "bar")) ) From 65fc8d83216f2c3cba2d4c67f29661f538b8d21a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 1 Feb 2024 16:35:05 -0800 Subject: [PATCH 41/42] clean --- src/passes/StringLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index c57e528f019..31e41b9e8c7 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -218,7 +218,7 @@ struct StringLowering : public StringGathering { std::stringstream stream; stringArray.stringify(stream); auto str = stream.str(); - std::vector vec(str.begin(), str.end()); + auto vec = std::vector(str.begin(), str.end()); module->customSections.emplace_back( CustomSection{"string.consts", std::move(vec)}); } From 66746ee1c6b5b629f64624cdedffea43389fdee9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 5 Feb 2024 12:37:26 -0800 Subject: [PATCH 42/42] update help test --- test/lit/help/wasm-opt.test | 3 +++ test/lit/help/wasm2js.test | 3 +++ 2 files changed, 6 insertions(+) diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index 8732f5e27ce..5a207a1b05d 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -469,6 +469,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --string-gathering gathers wasm strings to globals ;; CHECK-NEXT: +;; CHECK-NEXT: --string-lowering lowers wasm strings and +;; CHECK-NEXT: operations to imports +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test index 40d608857b2..43243b99ab8 100644 --- a/test/lit/help/wasm2js.test +++ b/test/lit/help/wasm2js.test @@ -428,6 +428,9 @@ ;; CHECK-NEXT: ;; CHECK-NEXT: --string-gathering gathers wasm strings to globals ;; CHECK-NEXT: +;; CHECK-NEXT: --string-lowering lowers wasm strings and +;; CHECK-NEXT: operations to imports +;; CHECK-NEXT: ;; CHECK-NEXT: --strip deprecated; same as strip-debug ;; CHECK-NEXT: ;; CHECK-NEXT: --strip-debug strip debug info (including the