Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial interning implementation #1612

Merged
merged 27 commits into from
Jul 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6767371
Initial interning implementation
Pauan Jun 22, 2019
86a8842
Changing IntoWasmAbi to use interning
Pauan Jun 22, 2019
0a61e12
Making interning manual
Pauan Jun 22, 2019
0359da2
Potential fix for OptionIntoWasmAbi?
Pauan Jun 24, 2019
f28cfc2
Fixing some things for the cache
Pauan Jul 2, 2019
f7e8e70
Fixing compile errors
Pauan Jul 16, 2019
2ee4c54
Changing to use WasmSlice for the caching
Pauan Jul 16, 2019
fd88626
Fixing compile errors
Pauan Jul 16, 2019
4e50465
Undoing some formatting
Pauan Jul 16, 2019
1e4cac9
Simplifying the output
Pauan Jul 16, 2019
1723e9d
More simplifications
Pauan Jul 16, 2019
cc6ec86
Fixing compile errors
Pauan Jul 16, 2019
adf21fe
Removing unneeded size argument
Pauan Jul 17, 2019
8572255
Making uluru optional
Pauan Jul 17, 2019
c3676bc
Removing unneeded if statement
Pauan Jul 17, 2019
2405fad
Shifting the unsafety guarantees around
Pauan Jul 18, 2019
3177fa9
Minor doc fix
Pauan Jul 18, 2019
ca15a59
Changing from uluru to HashMap
Pauan Jul 18, 2019
544ec49
Shifting the unsafe responsibility a bit
Pauan Jul 18, 2019
366ed23
Adding in docs for intern
Pauan Jul 18, 2019
c82253c
Fixing doc test error
Pauan Jul 18, 2019
554ef90
Fixing issue with wasm-interpreter
Pauan Jul 19, 2019
10ab4cb
Fixing TypeScript types for cached strings
Pauan Jul 19, 2019
f8da1e6
Fixing unsafe_get_str function
Pauan Jul 19, 2019
aef62bd
Adding in enable-interning to CI test suite
Pauan Jul 19, 2019
7b6ef70
Adding in note to the intern docs
Pauan Jul 19, 2019
59af318
Fixing CI build error
Pauan Jul 19, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ spans = ["wasm-bindgen-macro/spans"]
std = []
serde-serialize = ["serde", "serde_json", "std"]
nightly = []
enable-interning = ["std"]

# Whether or not the `#[wasm_bindgen]` macro is strict and generates an error on
# all unused attributes
Expand All @@ -38,6 +39,7 @@ xxx_debug_only_print_generated_code = ["wasm-bindgen-macro/xxx_debug_only_print_
wasm-bindgen-macro = { path = "crates/macro", version = "=0.2.48" }
serde = { version = "1.0", optional = true }
serde_json = { version = "1.0", optional = true }
cfg-if = "0.1.9"

[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
js-sys = { path = 'crates/js-sys', version = '0.3.25' }
Expand Down
2 changes: 2 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ jobs:
displayName: "Anyref test suite builds"
- script: cargo test --target wasm32-unknown-unknown --features serde-serialize
displayName: "Crate test suite (with serde)"
- script: cargo test --target wasm32-unknown-unknown --features enable-interning
displayName: "Crate test suite (with enable-interning)"
- script: cargo test --target wasm32-unknown-unknown -p no-std
displayName: "Crate test suite (no_std)"
- script: cargo test -p wasm-bindgen-futures
Expand Down
7 changes: 5 additions & 2 deletions crates/cli-support/src/descriptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ tys! {
BOOLEAN
FUNCTION
CLOSURE
CACHED_STRING
STRING
REF
REFMUT
Expand Down Expand Up @@ -58,6 +59,7 @@ pub enum Descriptor {
RefMut(Box<Descriptor>),
Slice(Box<Descriptor>),
Vector(Box<Descriptor>),
CachedString,
String,
Anyref,
Enum { hole: u32 },
Expand Down Expand Up @@ -127,6 +129,7 @@ impl Descriptor {
SLICE => Descriptor::Slice(Box::new(Descriptor::_decode(data, clamped))),
VECTOR => Descriptor::Vector(Box::new(Descriptor::_decode(data, clamped))),
OPTIONAL => Descriptor::Option(Box::new(Descriptor::_decode(data, clamped))),
CACHED_STRING => Descriptor::CachedString,
STRING => Descriptor::String,
ANYREF => Descriptor::Anyref,
ENUM => Descriptor::Enum { hole: get(data) },
Expand Down Expand Up @@ -159,12 +162,12 @@ impl Descriptor {

pub fn vector_kind(&self) -> Option<VectorKind> {
let inner = match *self {
Descriptor::String => return Some(VectorKind::String),
Descriptor::String | Descriptor::CachedString => return Some(VectorKind::String),
Descriptor::Vector(ref d) => &**d,
Descriptor::Slice(ref d) => &**d,
Descriptor::Ref(ref d) => match **d {
Descriptor::Slice(ref d) => &**d,
Descriptor::String => return Some(VectorKind::String),
Descriptor::String | Descriptor::CachedString => return Some(VectorKind::String),
_ => return None,
},
Descriptor::RefMut(ref d) => match **d {
Expand Down
28 changes: 28 additions & 0 deletions crates/cli-support/src/js/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,34 @@ impl<'a> Context<'a> {
Ok(())
}

fn expose_get_cached_string_from_wasm(&mut self) -> Result<(), Error> {
if !self.should_write_global("get_cached_string_from_wasm") {
return Ok(());
}

self.expose_get_object();
self.expose_get_string_from_wasm()?;

// This has support for both `&str` and `Option<&str>`.
//
// If `ptr` is not `0` then we know that it's a `&str` or `Some(&str)`, so we just decode it.
//
// If `ptr` is `0` then the `len` is a pointer to the cached `JsValue`, so we return that.
//
// If `ptr` and `len` are both `0` then that means it's `None`, in that case we rely upon
// the fact that `getObject(0)` is guaranteed to be `undefined`.
self.global("
function getCachedStringFromWasm(ptr, len) {
if (ptr === 0) {
return getObject(len);
} else {
return getStringFromWasm(ptr, len);
}
}
");
Ok(())
}

fn expose_get_array_js_value_from_wasm(&mut self) -> Result<(), Error> {
if !self.should_write_global("get_array_js_value_from_wasm") {
return Ok(());
Expand Down
37 changes: 37 additions & 0 deletions crates/cli-support/src/js/outgoing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,33 @@ impl<'a, 'b> Outgoing<'a, 'b> {
Ok(format!("v{}", i))
}

NonstandardOutgoing::CachedString {
offset,
length,
owned,
optional,
} => {
let ptr = self.arg(*offset);
let len = self.arg(*length);
let tmp = self.js.tmp();

if *optional {
self.js.typescript_optional("string");
} else {
self.js.typescript_required("string");
}

self.cx.expose_get_cached_string_from_wasm()?;

self.js.prelude(&format!("const v{} = getCachedStringFromWasm({}, {});", tmp, ptr, len));

if *owned {
self.prelude_free_cached_string(&ptr, &len)?;
}

Ok(format!("v{}", tmp))
}

NonstandardOutgoing::StackClosure {
a,
b,
Expand Down Expand Up @@ -408,4 +435,14 @@ impl<'a, 'b> Outgoing<'a, 'b> {
));
self.cx.require_internal_export("__wbindgen_free")
}

fn prelude_free_cached_string(&mut self, ptr: &str, len: &str) -> Result<(), Error> {
self.js.prelude(&format!(
"if ({ptr} !== 0) {{ wasm.__wbindgen_free({ptr}, {len}); }}",
ptr = ptr,
len = len,
));

self.cx.require_internal_export("__wbindgen_free")
}
}
6 changes: 3 additions & 3 deletions crates/cli-support/src/webidl/incoming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ impl IncomingBuilder {
Descriptor::RefMut(d) => self.process_ref(true, d)?,
Descriptor::Option(d) => self.process_option(d)?,

Descriptor::String | Descriptor::Vector(_) => {
Descriptor::String | Descriptor::CachedString | Descriptor::Vector(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!("unsupported argument type for calling Rust function from JS {:?}", arg)
})? ;
Expand Down Expand Up @@ -256,7 +256,7 @@ impl IncomingBuilder {
self.bindings
.push(NonstandardIncoming::BorrowedAnyref { val: expr });
}
Descriptor::String | Descriptor::Slice(_) => {
Descriptor::String | Descriptor::CachedString | Descriptor::Slice(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
"unsupported slice type for calling Rust function from JS {:?}",
Expand Down Expand Up @@ -363,7 +363,7 @@ impl IncomingBuilder {
self.webidl.push(ast::WebidlScalarType::Any);
}

Descriptor::String | Descriptor::Vector(_) => {
Descriptor::String | Descriptor::CachedString | Descriptor::Vector(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
"unsupported optional slice type for calling Rust function from JS {:?}",
Expand Down
31 changes: 31 additions & 0 deletions crates/cli-support/src/webidl/outgoing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ pub enum NonstandardOutgoing {
kind: VectorKind,
},

/// A Rust String (or &str) which might be cached, or might be `None`.
///
/// If `offset` is 0 then it is cached, and the cached JsValue's index is in `length`.
///
/// If `offset` and `length` are both 0, then it is `None`.
CachedString {
offset: u32,
length: u32,
owned: bool,
optional: bool,
},

/// A `&[u64]` or `&[i64]` is being passed to JS, and the 64-bit sizes here
/// aren't supported by WebIDL bindings yet.
View64 {
Expand Down Expand Up @@ -240,6 +252,8 @@ impl OutgoingBuilder<'_> {
Descriptor::Ref(d) => self.process_ref(false, d)?,
Descriptor::RefMut(d) => self.process_ref(true, d)?,

Descriptor::CachedString => self.cached_string(false, true),

Descriptor::Vector(_) | Descriptor::String => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
Expand Down Expand Up @@ -281,6 +295,7 @@ impl OutgoingBuilder<'_> {
self.bindings
.push(NonstandardOutgoing::BorrowedAnyref { idx });
}
Descriptor::CachedString => self.cached_string(false, false),
Descriptor::Slice(_) | Descriptor::String => {
use wasm_webidl_bindings::ast::WebidlScalarType::*;

Expand Down Expand Up @@ -422,6 +437,9 @@ impl OutgoingBuilder<'_> {
}
Descriptor::Ref(d) => self.process_option_ref(false, d)?,
Descriptor::RefMut(d) => self.process_option_ref(true, d)?,

Descriptor::CachedString => self.cached_string(true, true),

Descriptor::String | Descriptor::Vector(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
Expand Down Expand Up @@ -455,6 +473,7 @@ impl OutgoingBuilder<'_> {
self.bindings
.push(NonstandardOutgoing::BorrowedAnyref { idx });
}
Descriptor::CachedString => self.cached_string(true, false),
Descriptor::String | Descriptor::Slice(_) => {
let kind = arg.vector_kind().ok_or_else(|| {
format_err!(
Expand Down Expand Up @@ -505,6 +524,18 @@ impl OutgoingBuilder<'_> {
.push(NonstandardOutgoing::Standard(binding.into()));
}

fn cached_string(&mut self, optional: bool, owned: bool) {
let offset = self.push_wasm(ValType::I32);
let length = self.push_wasm(ValType::I32);
self.webidl.push(ast::WebidlScalarType::DomString);
self.bindings.push(NonstandardOutgoing::CachedString {
offset,
length,
owned,
optional,
})
}

fn option_native(&mut self, signed: bool, ty: ValType) {
let present = self.push_wasm(ValType::I32);
let val = self.push_wasm(ty);
Expand Down
82 changes: 82 additions & 0 deletions src/cache/intern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use cfg_if::cfg_if;


cfg_if! {
if #[cfg(feature = "enable-interning")] {
use std::thread_local;
use std::string::String;
use std::borrow::ToOwned;
use std::cell::RefCell;
use std::collections::HashMap;
use crate::JsValue;

struct Cache {
entries: RefCell<HashMap<String, JsValue>>,
}

thread_local! {
static CACHE: Cache = Cache {
entries: RefCell::new(HashMap::new()),
};
}

/// This returns the raw index of the cached JsValue, so you must take care
/// so that you don't use it after it is freed.
pub(crate) fn unsafe_get_str(s: &str) -> Option<u32> {
CACHE.with(|cache| {
let cache = cache.entries.borrow();

cache.get(s).map(|x| x.idx)
})
}

fn intern_str(key: &str) {
CACHE.with(|cache| {
let mut cache = cache.entries.borrow_mut();

// Can't use `entry` because `entry` requires a `String`
if !cache.contains_key(key) {
cache.insert(key.to_owned(), JsValue::from(key));
alexcrichton marked this conversation as resolved.
Show resolved Hide resolved
}
})
}
}
}


/// Interns Rust strings so that it's much faster to send them to JS.
///
/// Sending strings from Rust to JS is slow, because it has to do a full `O(n)`
/// copy and *also* encode from UTF-8 to UTF-16. This must be done every single
/// time a string is sent to JS.
///
/// If you are sending the same string multiple times, you can call this `intern`
/// function, which simply returns its argument unchanged:
///
/// ```rust
/// # use wasm_bindgen::intern;
/// intern("foo") // returns "foo"
/// # ;
/// ```
///
/// However, if you enable the `"enable-interning"` feature for wasm-bindgen,
/// then it will add the string into an internal cache.
///
/// When you send that cached string to JS, it will look it up in the cache,
/// which completely avoids the `O(n)` copy and encoding. This has a significant
/// speed boost (as high as 783%)!
///
/// However, there is a small cost to this caching, so you shouldn't cache every
/// string. Only cache strings which have a high likelihood of being sent
/// to JS multiple times.
///
/// Also, keep in mind that this function is a *performance hint*: it's not
/// *guaranteed* that the string will be cached, and the caching strategy
/// might change at any time, so don't rely upon it.
#[inline]
pub fn intern(s: &str) -> &str {
alexcrichton marked this conversation as resolved.
Show resolved Hide resolved
#[cfg(feature = "enable-interning")]
intern_str(s);

s
}
1 change: 1 addition & 0 deletions src/cache/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod intern;
31 changes: 27 additions & 4 deletions src/convert/slices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::prelude::v1::*;
use core::slice;
use core::str;

use cfg_if::cfg_if;
use crate::convert::OptionIntoWasmAbi;
use crate::convert::{FromWasmAbi, IntoWasmAbi, RefFromWasmAbi, RefMutFromWasmAbi, WasmAbi};

Expand Down Expand Up @@ -123,6 +124,24 @@ vectors! {
u8 i8 u16 i16 u32 i32 u64 i64 usize isize f32 f64
}


cfg_if! {
if #[cfg(feature = "enable-interning")] {
#[inline]
fn unsafe_get_cached_str(x: &str) -> Option<WasmSlice> {
// This uses 0 for the ptr as an indication that it is a JsValue and not a str.
crate::cache::intern::unsafe_get_str(x).map(|x| WasmSlice { ptr: 0, len: x })
}

} else {
#[inline]
fn unsafe_get_cached_str(_x: &str) -> Option<WasmSlice> {
None
}
}
}


if_std! {
impl<T> IntoWasmAbi for Vec<T> where Box<[T]>: IntoWasmAbi<Abi = WasmSlice> {
type Abi = <Box<[T]> as IntoWasmAbi>::Abi;
Expand Down Expand Up @@ -153,12 +172,14 @@ if_std! {

#[inline]
fn into_abi(self) -> Self::Abi {
self.into_bytes().into_abi()
// This is safe because the JsValue is immediately looked up in the heap and
// then returned, so use-after-free cannot occur.
unsafe_get_cached_str(&self).unwrap_or_else(|| self.into_bytes().into_abi())
}
}

impl OptionIntoWasmAbi for String {
fn none() -> WasmSlice { null_slice() }
fn none() -> Self::Abi { null_slice() }
}

impl FromWasmAbi for String {
Expand All @@ -180,12 +201,14 @@ impl<'a> IntoWasmAbi for &'a str {

#[inline]
fn into_abi(self) -> Self::Abi {
self.as_bytes().into_abi()
// This is safe because the JsValue is immediately looked up in the heap and
// then returned, so use-after-free cannot occur.
unsafe_get_cached_str(self).unwrap_or_else(|| self.as_bytes().into_abi())
}
}

impl<'a> OptionIntoWasmAbi for &'a str {
fn none() -> WasmSlice {
fn none() -> Self::Abi {
null_slice()
}
}
Expand Down
Loading