Skip to content

Commit

Permalink
Optimize Linker's string internment (#994)
Browse files Browse the repository at this point in the history
* rename field

* query string2symbol just once per get_or_intern

* add hints to StringInterner::get_or_insert

* add cold hints to match

* add inline annotations

* add inline annotation to LenOrder::as_str

* put internment code into its own function
  • Loading branch information
Robbepop authored Apr 22, 2024
1 parent 13aa334 commit e91c804
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 16 deletions.
2 changes: 1 addition & 1 deletion crates/core/src/hint.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/// Indicates that the calling scope is unlikely to be executed.
#[cold]
#[inline]
fn cold() {}
pub fn cold() {}

/// Indicates that the condition is likely `true`.
#[inline]
Expand Down
88 changes: 73 additions & 15 deletions crates/wasmi/src/linker.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::{
core::hint,
func::{FuncEntity, HostFuncEntity, HostFuncTrampolineEntity},
module::{ImportName, ImportType},
AsContext,
Expand Down Expand Up @@ -269,6 +270,7 @@ impl PartialOrd for LenOrder {
}

impl LenOrder {
#[inline]
pub fn as_str(&self) -> &LenOrderStr {
(&*self.0).into()
}
Expand Down Expand Up @@ -333,36 +335,92 @@ impl Ord for LenOrderStr {
/// Efficiently interns strings and distributes symbols.
#[derive(Debug, Default, Clone)]
pub struct StringInterner {
string2idx: BTreeMap<LenOrder, Symbol>,
string2symbol: BTreeMap<LenOrder, Symbol>,
strings: Vec<Arc<str>>,
}

#[derive(Debug, Copy, Clone)]
pub enum InternHint {
/// Hint that the string to be interned likely already exists.
LikelyExists,
/// Hint that the string to be interned likely does not yet exist.
LikelyNew,
}

impl StringInterner {
/// Returns the next symbol.
fn next_symbol(&self) -> Symbol {
Symbol::from_usize(self.strings.len())
/// Returns the symbol of the string and interns it if necessary.
///
/// Optimized for `string` not to be contained in [`StringInterner`] before this operation.
#[inline]
pub fn get_or_intern(&mut self, string: &str, hint: InternHint) -> Symbol {
match hint {
InternHint::LikelyExists => self.get_or_intern_hint_existing(string),
InternHint::LikelyNew => self.get_or_intern_hint_new(string),
}
}

/// Returns the symbol of the string and interns it if necessary.
pub fn get_or_intern(&mut self, string: &str) -> Symbol {
match self.string2idx.get(<&LenOrderStr>::from(string)) {
Some(symbol) => *symbol,
None => {
let symbol = self.next_symbol();
let rc_string: Arc<str> = Arc::from(string);
self.string2idx.insert(LenOrder(rc_string.clone()), symbol);
self.strings.push(rc_string);
///
/// # Note
///
/// - Optimized for `string` not to be contained in [`StringInterner`] before this operation.
/// - Allocates `string` twice on the heap if it already existed prior to this operation.
fn get_or_intern_hint_new(&mut self, string: &str) -> Symbol {
match self.string2symbol.entry(LenOrder(string.into())) {
Entry::Vacant(entry) => {
let symbol = Symbol::from_usize(self.strings.len());
self.strings.push(entry.key().clone().0);
entry.insert(symbol);
symbol
}
Entry::Occupied(entry) => {
hint::cold();
*entry.get()
}
}
}

/// Returns the symbol of the string and interns it if necessary.
///
/// # Note
///
/// - Optimized for `string` to already be contained in [`StringInterner`] before this operation.
/// - Queries the position within `strings2symbol` twice in case `string` already existed.
#[inline]
fn get_or_intern_hint_existing(&mut self, string: &str) -> Symbol {
match self.string2symbol.get(<&LenOrderStr>::from(string)) {
Some(symbol) => *symbol,
None => self.intern(string),
}
}

/// Interns the `string` into the [`StringInterner`].
///
/// # Panics
///
/// If the `string` already exists.
#[cold]
fn intern(&mut self, string: &str) -> Symbol {
let symbol = Symbol::from_usize(self.strings.len());
let rc_string: Arc<str> = Arc::from(string);
let old = self
.string2symbol
.insert(LenOrder(rc_string.clone()), symbol);
assert!(old.is_none());
self.strings.push(rc_string);
symbol
}

/// Returns the symbol for the string if interned.
#[inline]
pub fn get(&self, string: &str) -> Option<Symbol> {
self.string2idx.get(<&LenOrderStr>::from(string)).copied()
self.string2symbol
.get(<&LenOrderStr>::from(string))
.copied()
}

/// Resolves the symbol to the underlying string.
#[inline]
pub fn resolve(&self, symbol: Symbol) -> Option<&str> {
self.strings.get(symbol.into_usize()).map(Deref::deref)
}
Expand Down Expand Up @@ -862,8 +920,8 @@ impl<T> LinkerInner<T> {
/// Returns the import key for the module name and item name.
fn import_key(&mut self, module: &str, name: &str) -> ImportKey {
ImportKey::new(
self.strings.get_or_intern(module),
self.strings.get_or_intern(name),
self.strings.get_or_intern(module, InternHint::LikelyExists),
self.strings.get_or_intern(name, InternHint::LikelyNew),
)
}

Expand Down

0 comments on commit e91c804

Please sign in to comment.