Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Strings] Implement stringview_wtf16.slice #6404

Merged
merged 7 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion scripts/fuzz_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,9 @@ def is_git_repo():
'exception-handling.wast',
'translate-eh-old-to-new.wast',
'rse-eh.wast',
# Non-UTF8 strings trap in V8
# Non-UTF8 strings trap in V8, and have limitations in our interpreter
'string-lowering.wast',
'precompute-strings.wast',
]


Expand Down
53 changes: 49 additions & 4 deletions src/wasm-interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1903,7 +1903,12 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
return Literal(curr->string.toString());
}

bool hasNonAsciiUpTo(const Literals& values, Index end) {
// Returns if there is a non-ascii character in a list of values, looking only
// up to an index that is provided (not inclusive). If the index is not
// provided we look in the entire list.
bool hasNonAsciiUpTo(const Literals& values,
std::optional<Index> maybeEnd = std::nullopt) {
Index end = maybeEnd ? *maybeEnd : values.size();
for (Index i = 0; i < end; ++i) {
if (uint32_t(values[i].geti32()) > 127) {
return true;
Expand All @@ -1930,7 +1935,7 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {

// This is only correct if all the bytes stored in `values` correspond to
// single unicode code points. See `visitStringWTF16Get` for details.
if (hasNonAsciiUpTo(data->values, data->values.size())) {
if (hasNonAsciiUpTo(data->values)) {
return Flow(NONCONSTANT_FLOW);
}

Expand Down Expand Up @@ -1998,7 +2003,7 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
}

// We don't handle non-ascii code points correctly yet.
if (hasNonAsciiUpTo(refValues, refValues.size())) {
if (hasNonAsciiUpTo(refValues)) {
return Flow(NONCONSTANT_FLOW);
}

Expand Down Expand Up @@ -2138,7 +2143,47 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
return Flow(NONCONSTANT_FLOW);
}
Flow visitStringSliceWTF(StringSliceWTF* curr) {
return Flow(NONCONSTANT_FLOW);
// For now we only support JS-style strings.
if (curr->op != StringSliceWTF16) {
return Flow(NONCONSTANT_FLOW);
}

Flow ref = visit(curr->ref);
if (ref.breaking()) {
return ref;
}
Flow start = visit(curr->start);
if (start.breaking()) {
return start;
}
Flow end = visit(curr->end);
if (end.breaking()) {
return end;
}

auto refData = ref.getSingleValue().getGCData();
if (!refData) {
trap("null ref");
}
auto& refValues = refData->values;
auto startVal = start.getSingleValue().getUnsigned();
auto endVal = end.getSingleValue().getUnsigned();
if (endVal > refValues.size()) {
trap("array oob");
}
if (hasNonAsciiUpTo(refValues, endVal)) {
return Flow(NONCONSTANT_FLOW);
}
Literals contents;
if (endVal > startVal) {
contents.reserve(endVal - startVal);
for (size_t i = startVal; i < endVal; i++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to check that the string is ascii up to endVal before we execute the slice.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, fixed.

if (i < refValues.size()) {
contents.push_back(refValues[i]);
}
}
}
return makeGCData(contents, curr->type);
}
Flow visitStringSliceIter(StringSliceIter* curr) {
return Flow(NONCONSTANT_FLOW);
Expand Down
15 changes: 15 additions & 0 deletions test/lit/exec/strings.wast
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,17 @@
)
)
)

;; CHECK: [fuzz-exec] calling slice
;; CHECK-NEXT: [fuzz-exec] note result: slice => string("def")
(func $slice (export "slice") (result (ref string))
;; Slicing [3:6] here should definitely output "def".
(stringview_wtf16.slice
(string.const "abcdefgh")
(i32.const 3)
(i32.const 6)
)
)
)
;; CHECK: [fuzz-exec] calling new_wtf16_array
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
Expand Down Expand Up @@ -309,6 +320,9 @@
;; CHECK-NEXT: [LoggingExternalInterface logging 98]
;; CHECK-NEXT: [LoggingExternalInterface logging 99]
;; CHECK-NEXT: [LoggingExternalInterface logging 0]

;; CHECK: [fuzz-exec] calling slice
;; CHECK-NEXT: [fuzz-exec] note result: slice => string("def")
;; CHECK-NEXT: [fuzz-exec] comparing compare.1
;; CHECK-NEXT: [fuzz-exec] comparing compare.10
;; CHECK-NEXT: [fuzz-exec] comparing compare.2
Expand All @@ -329,3 +343,4 @@
;; CHECK-NEXT: [fuzz-exec] comparing get_codeunit
;; CHECK-NEXT: [fuzz-exec] comparing get_length
;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array
;; CHECK-NEXT: [fuzz-exec] comparing slice
29 changes: 29 additions & 0 deletions test/lit/passes/precompute-strings.wast
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,33 @@
(i32.const 0)
)
)

;; CHECK: (func $slice (type $1) (result (ref string))
;; CHECK-NEXT: (string.const "def")
;; CHECK-NEXT: )
(func $slice (export "slice") (result (ref string))
;; Slicing [3:6] here should definitely output "def".
(stringview_wtf16.slice
(string.const "abcdefgh")
(i32.const 3)
(i32.const 6)
)
)

;; CHECK: (func $slice-bad (type $1) (result (ref string))
;; CHECK-NEXT: (stringview_wtf16.slice
;; CHECK-NEXT: (string.const "abcd\c2\a3fgh")
;; CHECK-NEXT: (i32.const 3)
;; CHECK-NEXT: (i32.const 6)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $slice-bad (export "slice-bad") (result (ref string))
;; This slice contains non-ascii, so we do not optimize.
(stringview_wtf16.slice
;; abcd£fgh
(string.const "abcd\C2\A3fgh")
(i32.const 3)
(i32.const 6)
)
)
)
Loading