diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 8bef4e98c8c..683c2bed718 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -332,8 +332,9 @@ def is_git_repo(): 'exception-handling.wast', 'translate-eh-old-to-new.wast', 'rse-eh.wast', - # Non-UTF8 strings trap in V8 + # Non-UTF8 strings trap in V8, and have limitations in our interpreter 'string-lowering.wast', + 'precompute-strings.wast', ] diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 5179d6ad03f..ccb56aba81c 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1903,7 +1903,12 @@ class ExpressionRunner : public OverriddenVisitor { return Literal(curr->string.toString()); } - bool hasNonAsciiUpTo(const Literals& values, Index end) { + // Returns if there is a non-ascii character in a list of values, looking only + // up to an index that is provided (not inclusive). If the index is not + // provided we look in the entire list. + bool hasNonAsciiUpTo(const Literals& values, + std::optional maybeEnd = std::nullopt) { + Index end = maybeEnd ? *maybeEnd : values.size(); for (Index i = 0; i < end; ++i) { if (uint32_t(values[i].geti32()) > 127) { return true; @@ -1930,7 +1935,7 @@ class ExpressionRunner : public OverriddenVisitor { // This is only correct if all the bytes stored in `values` correspond to // single unicode code points. See `visitStringWTF16Get` for details. - if (hasNonAsciiUpTo(data->values, data->values.size())) { + if (hasNonAsciiUpTo(data->values)) { return Flow(NONCONSTANT_FLOW); } @@ -1998,7 +2003,7 @@ class ExpressionRunner : public OverriddenVisitor { } // We don't handle non-ascii code points correctly yet. - if (hasNonAsciiUpTo(refValues, refValues.size())) { + if (hasNonAsciiUpTo(refValues)) { return Flow(NONCONSTANT_FLOW); } @@ -2138,7 +2143,47 @@ class ExpressionRunner : public OverriddenVisitor { return Flow(NONCONSTANT_FLOW); } Flow visitStringSliceWTF(StringSliceWTF* curr) { - return Flow(NONCONSTANT_FLOW); + // For now we only support JS-style strings. + if (curr->op != StringSliceWTF16) { + return Flow(NONCONSTANT_FLOW); + } + + Flow ref = visit(curr->ref); + if (ref.breaking()) { + return ref; + } + Flow start = visit(curr->start); + if (start.breaking()) { + return start; + } + Flow end = visit(curr->end); + if (end.breaking()) { + return end; + } + + auto refData = ref.getSingleValue().getGCData(); + if (!refData) { + trap("null ref"); + } + auto& refValues = refData->values; + auto startVal = start.getSingleValue().getUnsigned(); + auto endVal = end.getSingleValue().getUnsigned(); + if (endVal > refValues.size()) { + trap("array oob"); + } + if (hasNonAsciiUpTo(refValues, endVal)) { + return Flow(NONCONSTANT_FLOW); + } + Literals contents; + if (endVal > startVal) { + contents.reserve(endVal - startVal); + for (size_t i = startVal; i < endVal; i++) { + if (i < refValues.size()) { + contents.push_back(refValues[i]); + } + } + } + return makeGCData(contents, curr->type); } Flow visitStringSliceIter(StringSliceIter* curr) { return Flow(NONCONSTANT_FLOW); diff --git a/test/lit/exec/strings.wast b/test/lit/exec/strings.wast index 706602b1ce5..5ff8ce47a87 100644 --- a/test/lit/exec/strings.wast +++ b/test/lit/exec/strings.wast @@ -244,6 +244,17 @@ ) ) ) + + ;; CHECK: [fuzz-exec] calling slice + ;; CHECK-NEXT: [fuzz-exec] note result: slice => string("def") + (func $slice (export "slice") (result (ref string)) + ;; Slicing [3:6] here should definitely output "def". + (stringview_wtf16.slice + (string.const "abcdefgh") + (i32.const 3) + (i32.const 6) + ) + ) ) ;; CHECK: [fuzz-exec] calling new_wtf16_array ;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello") @@ -309,6 +320,9 @@ ;; CHECK-NEXT: [LoggingExternalInterface logging 98] ;; CHECK-NEXT: [LoggingExternalInterface logging 99] ;; CHECK-NEXT: [LoggingExternalInterface logging 0] + +;; CHECK: [fuzz-exec] calling slice +;; CHECK-NEXT: [fuzz-exec] note result: slice => string("def") ;; CHECK-NEXT: [fuzz-exec] comparing compare.1 ;; CHECK-NEXT: [fuzz-exec] comparing compare.10 ;; CHECK-NEXT: [fuzz-exec] comparing compare.2 @@ -329,3 +343,4 @@ ;; CHECK-NEXT: [fuzz-exec] comparing get_codeunit ;; CHECK-NEXT: [fuzz-exec] comparing get_length ;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array +;; CHECK-NEXT: [fuzz-exec] comparing slice diff --git a/test/lit/passes/precompute-strings.wast b/test/lit/passes/precompute-strings.wast index aa138b289c0..f704306481a 100644 --- a/test/lit/passes/precompute-strings.wast +++ b/test/lit/passes/precompute-strings.wast @@ -129,4 +129,33 @@ (i32.const 0) ) ) + + ;; CHECK: (func $slice (type $1) (result (ref string)) + ;; CHECK-NEXT: (string.const "def") + ;; CHECK-NEXT: ) + (func $slice (export "slice") (result (ref string)) + ;; Slicing [3:6] here should definitely output "def". + (stringview_wtf16.slice + (string.const "abcdefgh") + (i32.const 3) + (i32.const 6) + ) + ) + + ;; CHECK: (func $slice-bad (type $1) (result (ref string)) + ;; CHECK-NEXT: (stringview_wtf16.slice + ;; CHECK-NEXT: (string.const "abcd\c2\a3fgh") + ;; CHECK-NEXT: (i32.const 3) + ;; CHECK-NEXT: (i32.const 6) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $slice-bad (export "slice-bad") (result (ref string)) + ;; This slice contains non-ascii, so we do not optimize. + (stringview_wtf16.slice + ;; abcd£fgh + (string.const "abcd\C2\A3fgh") + (i32.const 3) + (i32.const 6) + ) + ) )