From ce93f7e54b1e98b06c38141915878b53b04c7a46 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sun, 20 Aug 2023 22:22:35 +0200 Subject: [PATCH 1/2] feat: Store file contents statically and use binary search for lookup. For large directories, RustEmbed generates large amounts of LLVM IR. This is mostly caused by use of match expression with string literals. In Rust, literals from match set are compared one by one; it is kind of like a big if-else chain. Instead, we can store a static lookup table (sorted by file name) and run a binary search over that. This should make file lookup more efficient in runtime and improve compile time too. This improves LLVM IR even for small uses of rust-embed; for examples/basic.rs a size of generated IR goes down by 7% from 4397 to 4082 lines. --- impl/src/lib.rs | 37 ++++++++++++++++++++++--------------- utils/src/lib.rs | 4 +++- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/impl/src/lib.rs b/impl/src/lib.rs index 9281c4a..b771b1b 100644 --- a/impl/src/lib.rs +++ b/impl/src/lib.rs @@ -7,6 +7,7 @@ extern crate proc_macro; use proc_macro::TokenStream; use proc_macro2::TokenStream as TokenStream2; use std::{ + collections::BTreeMap, env, iter::FromIterator, path::{Path, PathBuf}, @@ -18,13 +19,13 @@ fn embedded( ) -> TokenStream2 { extern crate rust_embed_utils; - let mut match_values = Vec::::new(); + let mut match_values = BTreeMap::new(); let mut list_values = Vec::::new(); let includes: Vec<&str> = includes.iter().map(AsRef::as_ref).collect(); let excludes: Vec<&str> = excludes.iter().map(AsRef::as_ref).collect(); for rust_embed_utils::FileEntry { rel_path, full_canonical_path } in rust_embed_utils::get_files(absolute_folder_path.clone(), &includes, &excludes) { - match_values.push(embed_file(relative_folder_path.clone(), &rel_path, &full_canonical_path)); + match_values.insert(rel_path.clone(), embed_file(relative_folder_path.clone(), &rel_path, &full_canonical_path)); list_values.push(if let Some(prefix) = prefix { format!("{}{}", prefix, rel_path) @@ -50,17 +51,23 @@ fn embedded( } else { TokenStream2::new() }; - + let match_values = match_values.into_iter().map(|(path, bytes)| { + quote! { + (#path, #bytes), + } + }); quote! { #not_debug_attr impl #ident { /// Get an embedded file and its metadata. pub fn get(file_path: &str) -> Option { #handle_prefix - match file_path.replace("\\", "/").as_str() { - #(#match_values)* - _ => None, - } + let key = file_path.replace("\\", "/"); + const ENTRIES: &'static [(&'static str, rust_embed::EmbeddedFile)] = &[ + #(#match_values)*]; + let position = ENTRIES.binary_search_by_key(&key.as_str(), |entry| entry.0); + position.ok().map(|index| ENTRIES[index].1.clone()) + } fn names() -> std::slice::Iter<'static, &'static str> { @@ -203,22 +210,22 @@ fn embed_file(folder_path: Option<&str>, rel_path: &str, full_canonical_path: &s let full_relative_path = full_relative_path.to_string_lossy(); quote! { rust_embed::flate!(static FILE: [u8] from #full_relative_path); - let bytes = &FILE[..]; + const BYTES: &'static [u8] = FILE; } } else { quote! { - let bytes = &include_bytes!(#full_canonical_path)[..]; + const BYTES: &'static [u8] = include_bytes!(#full_canonical_path); } }; quote! { - #rel_path => { - #embedding_code + { + #embedding_code - Some(rust_embed::EmbeddedFile { - data: std::borrow::Cow::from(bytes), - metadata: rust_embed::Metadata::__rust_embed_new([#(#hash),*], #last_modified #mimetype_tokens) - }) + rust_embed::EmbeddedFile { + data: std::borrow::Cow::Borrowed(BYTES), + metadata: rust_embed::Metadata::__rust_embed_new([#(#hash),*], #last_modified #mimetype_tokens) + } } } } diff --git a/utils/src/lib.rs b/utils/src/lib.rs index 3ded6c7..1a8fe30 100644 --- a/utils/src/lib.rs +++ b/utils/src/lib.rs @@ -78,12 +78,14 @@ pub fn get_files<'patterns>(folder_path: String, includes: &'patterns [&str], ex } /// A file embedded into the binary +#[derive(Clone)] pub struct EmbeddedFile { pub data: Cow<'static, [u8]>, pub metadata: Metadata, } /// Metadata about an embedded file +#[derive(Clone)] pub struct Metadata { hash: [u8; 32], last_modified: Option, @@ -93,7 +95,7 @@ pub struct Metadata { impl Metadata { #[doc(hidden)] - pub fn __rust_embed_new(hash: [u8; 32], last_modified: Option, #[cfg(feature = "mime-guess")] mimetype: &'static str) -> Self { + pub const fn __rust_embed_new(hash: [u8; 32], last_modified: Option, #[cfg(feature = "mime-guess")] mimetype: &'static str) -> Self { Self { hash, last_modified, From 01a4457d70f041da4c2a2c7d895c926342d2b845 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Sun, 20 Aug 2023 23:05:13 +0200 Subject: [PATCH 2/2] Fix test failures (mostly under 'compression' feature). If 'compression' is enabled, the static data contains function pointers to get contents of embedded file. That is done so as to work around 'include_flate' using lazy_static. --- impl/src/lib.rs | 27 ++++++++++++++++++++------- utils/src/lib.rs | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/impl/src/lib.rs b/impl/src/lib.rs index b771b1b..a60abbf 100644 --- a/impl/src/lib.rs +++ b/impl/src/lib.rs @@ -56,6 +56,16 @@ fn embedded( (#path, #bytes), } }); + let value_type = if cfg!(feature = "compression") { + quote! { fn() -> rust_embed::EmbeddedFile } + } else { + quote! { rust_embed::EmbeddedFile } + }; + let get_value = if cfg!(feature = "compression") { + quote! {|idx| (ENTRIES[idx].1)()} + } else { + quote! {|idx| ENTRIES[idx].1.clone()} + }; quote! { #not_debug_attr impl #ident { @@ -63,10 +73,10 @@ fn embedded( pub fn get(file_path: &str) -> Option { #handle_prefix let key = file_path.replace("\\", "/"); - const ENTRIES: &'static [(&'static str, rust_embed::EmbeddedFile)] = &[ + const ENTRIES: &'static [(&'static str, #value_type)] = &[ #(#match_values)*]; let position = ENTRIES.binary_search_by_key(&key.as_str(), |entry| entry.0); - position.ok().map(|index| ENTRIES[index].1.clone()) + position.ok().map(#get_value) } @@ -209,21 +219,24 @@ fn embed_file(folder_path: Option<&str>, rel_path: &str, full_canonical_path: &s let full_relative_path = PathBuf::from_iter([folder_path.expect("folder_path must be provided under `compression` feature"), rel_path]); let full_relative_path = full_relative_path.to_string_lossy(); quote! { - rust_embed::flate!(static FILE: [u8] from #full_relative_path); - const BYTES: &'static [u8] = FILE; + rust_embed::flate!(static BYTES: [u8] from #full_relative_path); } } else { quote! { const BYTES: &'static [u8] = include_bytes!(#full_canonical_path); } }; - + let closure_args = if cfg!(feature = "compression") { + quote! { || } + } else { + quote! {} + }; quote! { - { + #closure_args { #embedding_code rust_embed::EmbeddedFile { - data: std::borrow::Cow::Borrowed(BYTES), + data: std::borrow::Cow::Borrowed(&BYTES), metadata: rust_embed::Metadata::__rust_embed_new([#(#hash),*], #last_modified #mimetype_tokens) } } diff --git a/utils/src/lib.rs b/utils/src/lib.rs index 1a8fe30..899f109 100644 --- a/utils/src/lib.rs +++ b/utils/src/lib.rs @@ -100,7 +100,7 @@ impl Metadata { hash, last_modified, #[cfg(feature = "mime-guess")] - mimetype: mimetype.into(), + mimetype: Cow::Borrowed(mimetype), } }