-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of #5121 - Eh2406:string_interning, r=alexcrichton
String interning This builds on the work from #5118. This interns the strings in the part of resolver that gets cloned a lot. In a test on #4810 (comment) Before we got to 1700000 ticks in ~(63 to 67) sec from #5118 After we got to 1700000 ticks in ~(42 to 45) sec The interning code itself would be much better with a `leak` function that converts a `String` to a `&'static str`. Something like: ```rust pub fn leek(s: String) -> &'static str { let ptr = s.as_ptr(); let len = s.len(); mem::forget(s); unsafe { let slice = slice::from_raw_parts(ptr, len); str::from_utf8(slice).unwrap() } } ``` but "there is no unsafe in Cargo", and I am not the best at unsafe. So I just `to_string` and lived with the extra copy. Is there a better way to hand out references? I assumed that `InternedString::new` world start appearing in profile result, and that we would want `PackageId`, and `Summary`, Et Al. to store the `InternedString`. That is why I put the interner in a shared folder. So far it is just used in the resolver. It may make sense for a lot more of the Strings to be interned, but with the extra copy... I have not explored it yet.
- Loading branch information
Showing
5 changed files
with
85 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
use std::sync::RwLock; | ||
use std::collections::HashSet; | ||
use std::slice; | ||
use std::str; | ||
use std::mem; | ||
use std::cmp::Ordering; | ||
use std::ops::Deref; | ||
|
||
pub fn leek(s: String) -> &'static str { | ||
let boxed = s.into_boxed_str(); | ||
let ptr = boxed.as_ptr(); | ||
let len = boxed.len(); | ||
mem::forget(boxed); | ||
unsafe { | ||
let slice = slice::from_raw_parts(ptr, len); | ||
str::from_utf8_unchecked(slice) | ||
} | ||
} | ||
|
||
lazy_static! { | ||
static ref STRING_CASHE: RwLock<HashSet<&'static str>> = | ||
RwLock::new(HashSet::new()); | ||
} | ||
|
||
#[derive(Eq, PartialEq, Hash, Clone, Copy)] | ||
pub struct InternedString { | ||
ptr: *const u8, | ||
len: usize, | ||
} | ||
|
||
impl InternedString { | ||
pub fn new(str: &str) -> InternedString { | ||
let mut cache = STRING_CASHE.write().unwrap(); | ||
if let Some(&s) = cache.get(str) { | ||
return InternedString { ptr: s.as_ptr(), len: s.len() }; | ||
} | ||
let s = leek(str.to_string()); | ||
cache.insert(s); | ||
InternedString { ptr: s.as_ptr(), len: s.len() } | ||
} | ||
} | ||
|
||
impl Deref for InternedString { | ||
type Target = str; | ||
|
||
fn deref(&self) -> &'static str { | ||
unsafe { | ||
let slice = slice::from_raw_parts(self.ptr, self.len); | ||
&str::from_utf8_unchecked(slice) | ||
} | ||
} | ||
} | ||
|
||
impl Ord for InternedString { | ||
fn cmp(&self, other: &InternedString) -> Ordering { | ||
let str: &str = &*self; | ||
str.cmp(&*other) | ||
} | ||
} | ||
|
||
impl PartialOrd for InternedString { | ||
fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> { | ||
Some(self.cmp(other)) | ||
} | ||
} | ||
|
||
unsafe impl Send for InternedString {} | ||
unsafe impl Sync for InternedString {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters