Skip to content

Commit

Permalink
Use a sharded symbol interner
Browse files Browse the repository at this point in the history
This should reduce contention when accessing the symbol interner from
multiple threads when using parallel rustc.
  • Loading branch information
bjorn3 committed Sep 18, 2021
1 parent 6cdd42f commit 4304f75
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 41 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4036,6 +4036,7 @@ version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"rustc-hash",
"syn",
"synstructure",
]
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ proc-macro = true
synstructure = "0.12.1"
syn = { version = "1", features = ["full"] }
proc-macro2 = "1"
rustc-hash = "1.1.0"
quote = "1"
73 changes: 52 additions & 21 deletions compiler/rustc_macros/src/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
//! ```
use proc_macro2::{Span, TokenStream};
use quote::quote;
use quote::{quote, ToTokens};
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use syn::parse::{Parse, ParseStream, Result};
use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token};

Expand Down Expand Up @@ -100,6 +101,46 @@ impl Errors {
}
}

struct PrefillStream {
prefill: [Vec<String>; 256],
}

impl PrefillStream {
fn new() -> Self {
const EMPTY_VEC: Vec<String> = Vec::new();
PrefillStream { prefill: [EMPTY_VEC; 256] }
}

fn add_symbol(&mut self, symbol: String) -> u32 {
// Warning: hasher has to be kept in sync with rustc_span::symbols to ensure that all
// pre-filled symbols are assigned the correct shard.
let mut state = rustc_hash::FxHasher::default();
symbol.hash(&mut state);
let hash = state.finish();

let shard = (hash & 0xff) as usize;
let index = self.prefill[shard].len();

self.prefill[shard].push(symbol);

(index << 8) as u32 | shard as u32
}
}

impl ToTokens for PrefillStream {
fn to_tokens(&self, tokens: &mut TokenStream) {
for shard in &self.prefill {
let mut shard_stream = quote! {};
for symbol in shard {
shard_stream.extend(quote! { #symbol, });
}
tokens.extend(quote! {
&[#shard_stream],
});
}
}
}

pub fn symbols(input: TokenStream) -> TokenStream {
let (mut output, errors) = symbols_with_errors(input);

Expand All @@ -126,8 +167,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {

let mut keyword_stream = quote! {};
let mut symbols_stream = quote! {};
let mut prefill_stream = quote! {};
let mut counter = 0u32;
let mut digit_stream = quote! {};
let mut prefill_stream = PrefillStream::new();
let mut keys =
HashMap::<String, Span>::with_capacity(input.keywords.len() + input.symbols.len() + 10);
let mut prev_key: Option<(Span, String)> = None;
Expand Down Expand Up @@ -157,13 +198,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
let value = &keyword.value;
let value_string = value.value();
check_dup(keyword.name.span(), &value_string, &mut errors);
prefill_stream.extend(quote! {
#value,
});
let sym = prefill_stream.add_symbol(value_string);
keyword_stream.extend(quote! {
pub const #name: Symbol = Symbol::new(#counter);
pub const #name: Symbol = Symbol::new(#sym);
});
counter += 1;
}

// Generate the listed symbols.
Expand All @@ -176,30 +214,21 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
check_dup(symbol.name.span(), &value, &mut errors);
check_order(symbol.name.span(), &name.to_string(), &mut errors);

prefill_stream.extend(quote! {
#value,
});
let sym = prefill_stream.add_symbol(value);
symbols_stream.extend(quote! {
pub const #name: Symbol = Symbol::new(#counter);
pub const #name: Symbol = Symbol::new(#sym);
});
counter += 1;
}

// Generate symbols for the strings "0", "1", ..., "9".
let digits_base = counter;
counter += 10;
for n in 0..10 {
let n = n.to_string();
check_dup(Span::call_site(), &n, &mut errors);
prefill_stream.extend(quote! {
#n,
});
let sym = prefill_stream.add_symbol(n);
digit_stream.extend(quote! { Symbol::new(#sym), });
}
let _ = counter; // for future use

let output = quote! {
const SYMBOL_DIGITS_BASE: u32 = #digits_base;

#[doc(hidden)]
#[allow(non_upper_case_globals)]
mod kw_generated {
Expand All @@ -214,6 +243,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
#symbols_stream
}

const SYMBOL_DIGITS: [Symbol; 10] = [#digit_stream];

impl Interner {
pub(crate) fn fresh() -> Self {
Interner::prefill(&[
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
#![feature(array_windows)]
#![feature(crate_visibility_modifier)]
#![feature(hash_raw_entry)]
#![feature(if_let_guard)]
#![feature(negative_impls)]
#![feature(nll)]
Expand Down
60 changes: 41 additions & 19 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
//! type, and vice versa.
use rustc_arena::DroplessArena;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::{FxHashMap, FxHasher};
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::sync::Lock;
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};

use std::cmp::{Ord, PartialEq, PartialOrd};
use std::collections::hash_map::RawEntryMut;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::str;
Expand Down Expand Up @@ -1698,8 +1699,7 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
}
}

#[derive(Default)]
pub(crate) struct Interner(Lock<InternerInner>);
pub(crate) struct Interner([Lock<InternerShard>; 256]);

// The `&'static str`s in this type actually point into the arena.
//
Expand All @@ -1710,46 +1710,68 @@ pub(crate) struct Interner(Lock<InternerInner>);
// This type is private to prevent accidentally constructing more than one `Interner` on the same
// thread, which makes it easy to mixup `Symbol`s between `Interner`s.
#[derive(Default)]
struct InternerInner {
struct InternerShard {
arena: DroplessArena,
names: FxHashMap<&'static str, Symbol>,
strings: Vec<&'static str>,
}

impl Interner {
fn prefill(init: &[&'static str]) -> Self {
Interner(Lock::new(InternerInner {
strings: init.into(),
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
..Default::default()
#[cfg(test)]
fn empty_for_test() -> Self {
Self([(); 256].map(|()| Lock::new(InternerShard::default())))
}

fn prefill(init: &[&[&'static str]; 256]) -> Self {
let mut i = 0;
Interner(init.map(|init| {
let shard = Lock::new(InternerShard {
strings: init.into(),
names: init
.iter()
.copied()
.zip((0..).map(|idx| Symbol::new(idx << 8 | i)))
.collect(),
..Default::default()
});
i += 1;
shard
}))
}

#[inline]
fn intern(&self, string: &str) -> Symbol {
let mut inner = self.0.lock();
if let Some(&name) = inner.names.get(string) {
return name;
}
// Warning: hasher has to be kept in sync with rustc_macros::symbols to ensure that all
// pre-filled symbols are assigned the correct shard.
let mut state = FxHasher::default();
string.hash(&mut state);
let hash = state.finish();

let mut shard = self.0[(hash & 0xff) as usize].lock();
let shard = &mut *shard;
let vac = match shard.names.raw_entry_mut().from_key_hashed_nocheck(hash, string) {
RawEntryMut::Occupied(occ) => return *occ.get(),
RawEntryMut::Vacant(vac) => vac,
};

let name = Symbol::new(inner.strings.len() as u32);
let name = Symbol::new(((shard.strings.len() as u32) << 8) | (hash as u32 & 0xff));

// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
// UTF-8.
let string: &str =
unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) };
unsafe { str::from_utf8_unchecked(shard.arena.alloc_slice(string.as_bytes())) };
// It is safe to extend the arena allocation to `'static` because we only access
// these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) };
inner.strings.push(string);
inner.names.insert(string, name);
shard.strings.push(string);
vac.insert(string, name);
name
}

// Get the symbol as a string. `Symbol::as_str()` should be used in
// preference to this function.
fn get(&self, symbol: Symbol) -> &str {
self.0.lock().strings[symbol.0.as_usize()]
self.0[symbol.0.as_usize() & 0xff].lock().strings[symbol.0.as_usize() >> 8]
}
}

Expand Down Expand Up @@ -1784,7 +1806,7 @@ pub mod sym {
pub fn integer<N: TryInto<usize> + Copy + ToString>(n: N) -> Symbol {
if let Result::Ok(idx) = n.try_into() {
if idx < 10 {
return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32);
return super::SYMBOL_DIGITS[idx];
}
}
Symbol::intern(&n.to_string())
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_span/src/symbol/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::create_default_session_globals_then;

#[test]
fn interner_tests() {
let i = Interner::default();
let i = Interner::empty_for_test();
// first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry:
Expand Down

0 comments on commit 4304f75

Please sign in to comment.