From abd521f350b14e68aaf2a69b3461963bae4a2f42 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 11 Sep 2024 16:06:59 +0400 Subject: [PATCH 01/10] wip --- crates/compilers/src/cache.rs | 41 +++++++++---- crates/compilers/src/lib.rs | 2 + crates/compilers/src/preprocessor.rs | 90 ++++++++++++++++++++++++++++ crates/core/src/utils.rs | 3 + 4 files changed, 125 insertions(+), 11 deletions(-) create mode 100644 crates/compilers/src/preprocessor.rs diff --git a/crates/compilers/src/cache.rs b/crates/compilers/src/cache.rs index 0d5d1613..3c9fc9bd 100644 --- a/crates/compilers/src/cache.rs +++ b/crates/compilers/src/cache.rs @@ -4,6 +4,7 @@ use crate::{ buildinfo::RawBuildInfo, compilers::{Compiler, CompilerSettings, Language}, output::Builds, + preprocessor::interface_representation, resolver::GraphEdges, ArtifactFile, ArtifactOutput, Artifacts, ArtifactsMap, Graph, OutputContext, Project, ProjectPaths, ProjectPathsConfig, SourceCompilationKind, @@ -173,7 +174,10 @@ impl CompilerCache { pub fn join_entries(&mut self, root: &Path) -> &mut Self { self.files = std::mem::take(&mut self.files) .into_iter() - .map(|(path, entry)| (root.join(path), entry)) + .map(|(path, mut entry)| { + entry.join_imports(root); + (root.join(path), entry) + }) .collect(); self } @@ -182,7 +186,11 @@ impl CompilerCache { pub fn strip_entries_prefix(&mut self, base: &Path) -> &mut Self { self.files = std::mem::take(&mut self.files) .into_iter() - .map(|(path, entry)| (path.strip_prefix(base).map(Into::into).unwrap_or(path), entry)) + .map(|(path, mut entry)| { + let path = path.strip_prefix(base).map(Into::into).unwrap_or(path); + entry.strip_imports_prefixes(base); + (path, entry) + }) .collect(); self } @@ -405,6 +413,8 @@ pub struct CacheEntry { pub last_modification_date: u64, /// hash to identify whether the content of the file changed pub content_hash: String, + /// hash of the interface representation of the file, if it's a source file + pub interface_repr_hash: Option, /// identifier name see [`foundry_compilers_core::utils::source_name()`] pub source_name: PathBuf, /// what config was set when compiling this file @@ -550,6 +560,17 @@ impl CacheEntry { self.artifacts().all(|a| a.path.exists()) } + /// Joins all import paths with `base` + pub fn join_imports(&mut self, base: &Path) { + self.imports = self.imports.iter().map(|i| base.join(i)).collect(); + } + + /// Strips `base` from all import paths + pub fn strip_imports_prefixes(&mut self, base: &Path) { + self.imports = + self.imports.iter().map(|i| i.strip_prefix(base).unwrap_or(i).to_path_buf()).collect(); + } + /// Sets the artifact's paths to `base` adjoined to the artifact's `path`. pub fn join_artifacts_files(&mut self, base: &Path) { self.artifacts_mut().for_each(|a| a.path = base.join(&a.path)) @@ -625,20 +646,18 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput, C: Compiler> { impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { /// Creates a new cache entry for the file fn create_cache_entry(&mut self, file: PathBuf, source: &Source) { - let imports = self - .edges - .imports(&file) - .into_iter() - .map(|import| strip_prefix(import, self.project.root()).into()) - .collect(); - + let content_hash = source.content_hash(); + let interface_repr_hash = file.starts_with(&self.project.paths.sources).then(|| { + interface_representation(&source.content).unwrap_or_else(|_| content_hash.clone()) + }); let entry = CacheEntry { last_modification_date: CacheEntry::::read_last_modification_date(&file) .unwrap_or_default(), - content_hash: source.content_hash(), + content_hash, + interface_repr_hash, source_name: strip_prefix(&file, self.project.root()).into(), compiler_settings: self.project.settings.clone(), - imports, + imports: self.edges.imports(&file).into_iter().map(|i| i.into()).collect(), version_requirement: self.edges.version_requirement(&file).map(|v| v.to_string()), // artifacts remain empty until we received the compiler output artifacts: Default::default(), diff --git a/crates/compilers/src/lib.rs b/crates/compilers/src/lib.rs index fc2f8c3a..74d38cb9 100644 --- a/crates/compilers/src/lib.rs +++ b/crates/compilers/src/lib.rs @@ -24,6 +24,8 @@ pub use resolver::Graph; pub mod compilers; pub use compilers::*; +mod preprocessor; + mod compile; pub use compile::{ output::{AggregatedCompilerOutput, ProjectCompileOutput}, diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs new file mode 100644 index 00000000..763ab35c --- /dev/null +++ b/crates/compilers/src/preprocessor.rs @@ -0,0 +1,90 @@ +use foundry_compilers_core::utils; +use solang_parser::{ + diagnostics::Diagnostic, + helpers::CodeLocation, + pt::{ContractPart, ContractTy, FunctionAttribute, FunctionTy, SourceUnitPart, Visibility}, +}; + +pub(crate) fn interface_representation(content: &str) -> Result> { + let (source_unit, _) = solang_parser::parse(&content, 0)?; + let mut locs_to_remove = Vec::new(); + + for part in source_unit.0 { + if let SourceUnitPart::ContractDefinition(contract) = part { + if matches!(contract.ty, ContractTy::Interface(_) | ContractTy::Library(_)) { + continue; + } + for part in contract.parts { + if let ContractPart::FunctionDefinition(func) = part { + let is_exposed = func.ty == FunctionTy::Function + && func.attributes.iter().any(|attr| { + matches!( + attr, + FunctionAttribute::Visibility( + Visibility::External(_) | Visibility::Public(_) + ) + ) + }) + || matches!( + func.ty, + FunctionTy::Constructor | FunctionTy::Fallback | FunctionTy::Receive + ); + + if !is_exposed { + locs_to_remove.push(func.loc); + } + + if let Some(ref body) = func.body { + locs_to_remove.push(body.loc()); + } + } + } + } + } + + let mut content = content.to_string(); + let mut offset = 0; + + for loc in locs_to_remove { + let start = loc.start() - offset; + let end = loc.end() - offset; + + content.replace_range(start..end, ""); + offset += end - start; + } + + let content = content.replace("\n", ""); + Ok(utils::RE_TWO_OR_MORE_SPACES.replace_all(&content, "").to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_interface_representation() { + let content = r#" +library Lib { + function libFn() internal { + // logic to keep + } +} +contract A { + function a() external {} + function b() public {} + function c() internal { + // logic logic logic + } + function d() private {} + function e() external { + // logic logic logic + } +}"#; + + let result = interface_representation(content).unwrap(); + assert_eq!( + result, + r#"library Lib {function libFn() internal {// logic to keep}}contract A {function a() externalfunction b() publicfunction e() external }"# + ); + } +} diff --git a/crates/core/src/utils.rs b/crates/core/src/utils.rs index 877d2d9f..17e80775 100644 --- a/crates/core/src/utils.rs +++ b/crates/core/src/utils.rs @@ -42,6 +42,9 @@ pub static RE_SOL_SDPX_LICENSE_IDENTIFIER: Lazy = /// A regex used to remove extra lines in flatenned files pub static RE_THREE_OR_MORE_NEWLINES: Lazy = Lazy::new(|| Regex::new("\n{3,}").unwrap()); +/// A regex used to remove extra lines in flatenned files +pub static RE_TWO_OR_MORE_SPACES: Lazy = Lazy::new(|| Regex::new(" {2,}").unwrap()); + /// A regex that matches version pragma in a Vyper pub static RE_VYPER_VERSION: Lazy = Lazy::new(|| Regex::new(r"#(?:pragma version|@version)\s+(?P.+)").unwrap()); From fb64ca754b80fa502b8ae3caeb7991f2d9ba57d3 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 11 Sep 2024 17:23:38 +0400 Subject: [PATCH 02/10] wip --- crates/compilers/src/cache.rs | 176 +++++++++++++++++---------- crates/compilers/src/preprocessor.rs | 11 ++ 2 files changed, 120 insertions(+), 67 deletions(-) diff --git a/crates/compilers/src/cache.rs b/crates/compilers/src/cache.rs index 3c9fc9bd..07222b28 100644 --- a/crates/compilers/src/cache.rs +++ b/crates/compilers/src/cache.rs @@ -4,7 +4,7 @@ use crate::{ buildinfo::RawBuildInfo, compilers::{Compiler, CompilerSettings, Language}, output::Builds, - preprocessor::interface_representation, + preprocessor::{interface_representation_hash}, resolver::GraphEdges, ArtifactFile, ArtifactOutput, Artifacts, ArtifactsMap, Graph, OutputContext, Project, ProjectPaths, ProjectPathsConfig, SourceCompilationKind, @@ -641,15 +641,24 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput, C: Compiler> { /// The file hashes. pub content_hashes: HashMap, + + /// The interface representations for source files. + pub interface_repr_hashes: HashMap, } impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { /// Creates a new cache entry for the file - fn create_cache_entry(&mut self, file: PathBuf, source: &Source) { + fn create_cache_entry( + &mut self, + file: PathBuf, + source: &Source, + edges: Option<&GraphEdges>, + ) { + let edges = edges.unwrap_or(&self.edges); let content_hash = source.content_hash(); - let interface_repr_hash = file.starts_with(&self.project.paths.sources).then(|| { - interface_representation(&source.content).unwrap_or_else(|_| content_hash.clone()) - }); + let interface_repr_hash = file + .starts_with(&self.project.paths.sources) + .then(|| interface_representation_hash(&source)); let entry = CacheEntry { last_modification_date: CacheEntry::::read_last_modification_date(&file) .unwrap_or_default(), @@ -657,8 +666,8 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { interface_repr_hash, source_name: strip_prefix(&file, self.project.root()).into(), compiler_settings: self.project.settings.clone(), - imports: self.edges.imports(&file).into_iter().map(|i| i.into()).collect(), - version_requirement: self.edges.version_requirement(&file).map(|v| v.to_string()), + imports: edges.imports(&file).into_iter().map(|i| i.into()).collect(), + version_requirement: edges.version_requirement(&file).map(|v| v.to_string()), // artifacts remain empty until we received the compiler output artifacts: Default::default(), seen_by_compiler: false, @@ -692,7 +701,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { // Ensure that we have a cache entry for all sources. if !self.cache.files.contains_key(file) { - self.create_cache_entry(file.clone(), source); + self.create_cache_entry(file.clone(), source, None); } } @@ -749,62 +758,66 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { return true; } - false - } - - // Walks over all cache entires, detects dirty files and removes them from cache. - fn find_and_remove_dirty(&mut self) { - fn populate_dirty_files( - file: &Path, - dirty_files: &mut HashSet, - edges: &GraphEdges, - ) { - for file in edges.importers(file) { - // If file is marked as dirty we either have already visited it or it was marked as - // dirty initially and will be visited at some point later. - if !dirty_files.contains(file) { - dirty_files.insert(file.to_path_buf()); - populate_dirty_files(file, dirty_files, edges); + // If any requested extra files are missing for any artifact, mark source as dirty to + // generate them + for artifacts in self.cached_artifacts.values() { + for artifacts in artifacts.values() { + for artifact_file in artifacts { + if self.project.artifacts_handler().is_dirty(artifact_file).unwrap_or(true) { + return true; + } } } } - // Iterate over existing cache entries. - let files = self.cache.files.keys().cloned().collect::>(); + false + } + // Walks over all cache entires, detects dirty files and removes them from cache. + fn find_and_remove_dirty(&mut self) { let mut sources = Sources::new(); - // Read all sources, marking entries as dirty on I/O errors. - for file in &files { - let Ok(source) = Source::read(file) else { - self.dirty_sources.insert(file.clone()); + // Read all sources, removing entries on I/O errors. + for file in self.cache.files.keys().cloned().collect::>() { + let Ok(source) = Source::read(&file) else { + self.cache.files.remove(&file); continue; }; sources.insert(file.clone(), source); } - // Build a temporary graph for walking imports. We need this because `self.edges` - // only contains graph data for in-scope sources but we are operating on cache entries. - if let Ok(graph) = Graph::::resolve_sources(&self.project.paths, sources) { - let (sources, edges) = graph.into_sources(); + let src_files = sources + .keys() + .filter(|f| f.starts_with(&self.project.paths.sources)) + .collect::>(); - // Calculate content hashes for later comparison. - self.fill_hashes(&sources); + // Calculate content hashes for later comparison. + self.fill_hashes(&sources); - // Pre-add all sources that are guaranteed to be dirty - for file in sources.keys() { - if self.is_dirty_impl(file) { - self.dirty_sources.insert(file.clone()); - } + // Pre-add all sources that are guaranteed to be dirty + for file in self.cache.files.keys() { + if self.is_dirty_impl(file, false) { + self.dirty_sources.insert(file.clone()); } + } - // Perform DFS to find direct/indirect importers of dirty files. - for file in self.dirty_sources.clone().iter() { - populate_dirty_files(file, &mut self.dirty_sources, &edges); + // Mark sources as dirty based on their imports + for (file, entry) in &self.cache.files { + if self.dirty_sources.contains(file) { + continue; + } + let is_src = src_files.contains(file); + for import in &entry.imports { + if is_src && self.dirty_sources.contains(import) { + self.dirty_sources.insert(file.clone()); + break; + } else if !is_src + && self.dirty_sources.contains(import) + && (!src_files.contains(import) || self.is_dirty_impl(import, true)) + { + self.dirty_sources.insert(file.clone()); + } } - } else { - // Purge all sources on graph resolution error. - self.dirty_sources.extend(files); } // Remove all dirty files from cache. @@ -812,22 +825,55 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { debug!("removing dirty file from cache: {}", file.display()); self.cache.remove(file); } - } - fn is_dirty_impl(&self, file: &Path) -> bool { - let Some(hash) = self.content_hashes.get(file) else { - trace!("missing content hash"); - return true; + // Build a temporary graph for populating cache. We want to ensure that we preserve all just + // removed entries with updated data. We need separate graph for this because + // `self.edges` only contains graph data for in-scope sources but we are operating on cache + // entries. + let Ok(graph) = Graph::::resolve_sources(&self.project.paths, sources) + else { + // Purge all sources on graph resolution error. + self.cache.files.clear(); + return; }; + let (sources, edges) = graph.into_sources(); + + for (file, source) in sources { + if self.cache.files.contains_key(&file) { + continue; + } + + self.create_cache_entry(file.clone(), &source, Some(&edges)); + } + } + + fn is_dirty_impl(&self, file: &Path, use_interface_repr: bool) -> bool { let Some(entry) = self.cache.entry(file) else { trace!("missing cache entry"); return true; }; - if entry.content_hash != *hash { - trace!("content hash changed"); - return true; + if use_interface_repr { + let Some(interface_hash) = self.interface_repr_hashes.get(file) else { + trace!("missing interface hash"); + return true; + }; + + if entry.interface_repr_hash.as_ref().map_or(true, |h| h != interface_hash) { + trace!("interface hash changed"); + return true; + }; + } else { + let Some(content_hash) = self.content_hashes.get(file) else { + trace!("missing content hash"); + return true; + }; + + if entry.content_hash != *content_hash { + trace!("content hash changed"); + return true; + } } if !self.project.settings.can_use_cached(&entry.compiler_settings) { @@ -835,18 +881,6 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { return true; } - // If any requested extra files are missing for any artifact, mark source as dirty to - // generate them - for artifacts in self.cached_artifacts.values() { - for artifacts in artifacts.values() { - for artifact_file in artifacts { - if self.project.artifacts_handler().is_dirty(artifact_file).unwrap_or(true) { - return true; - } - } - } - } - // all things match, can be reused false } @@ -857,6 +891,13 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { if let hash_map::Entry::Vacant(entry) = self.content_hashes.entry(file.clone()) { entry.insert(source.content_hash()); } + if file.starts_with(&self.project.paths.sources) { + if let hash_map::Entry::Vacant(entry) = + self.interface_repr_hashes.entry(file.clone()) + { + entry.insert(interface_representation_hash(&source)); + } + } } } } @@ -940,6 +981,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCache<'a, T, C> { dirty_sources: Default::default(), content_hashes: Default::default(), sources_in_scope: Default::default(), + interface_repr_hashes: Default::default(), }; ArtifactsCache::Cached(cache) diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index 763ab35c..c7d4e0b9 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -1,4 +1,7 @@ +use alloy_primitives::hex; +use foundry_compilers_artifacts::Source; use foundry_compilers_core::utils; +use md5::Digest; use solang_parser::{ diagnostics::Diagnostic, helpers::CodeLocation, @@ -57,6 +60,14 @@ pub(crate) fn interface_representation(content: &str) -> Result String { + let Ok(repr) = interface_representation(&source.content) else { return source.content_hash() }; + let mut hasher = md5::Md5::new(); + hasher.update(&repr); + let result = hasher.finalize(); + hex::encode(result) +} + #[cfg(test)] mod tests { use super::*; From 0876b019f80f01edaf0be5932ae4f7ace1aafbd5 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 11 Sep 2024 17:36:54 +0400 Subject: [PATCH 03/10] add preprocessor --- crates/compilers/src/cache.rs | 2 +- crates/compilers/src/compile/project.rs | 47 +++- crates/compilers/src/preprocessor.rs | 289 +++++++++++++++++++++++- 3 files changed, 324 insertions(+), 14 deletions(-) diff --git a/crates/compilers/src/cache.rs b/crates/compilers/src/cache.rs index 07222b28..e11f37a1 100644 --- a/crates/compilers/src/cache.rs +++ b/crates/compilers/src/cache.rs @@ -4,7 +4,7 @@ use crate::{ buildinfo::RawBuildInfo, compilers::{Compiler, CompilerSettings, Language}, output::Builds, - preprocessor::{interface_representation_hash}, + preprocessor::interface_representation_hash, resolver::GraphEdges, ArtifactFile, ArtifactOutput, Artifacts, ArtifactsMap, Graph, OutputContext, Project, ProjectPaths, ProjectPathsConfig, SourceCompilationKind, diff --git a/crates/compilers/src/compile/project.rs b/crates/compilers/src/compile/project.rs index b76c47a5..f4c3a67c 100644 --- a/crates/compilers/src/compile/project.rs +++ b/crates/compilers/src/compile/project.rs @@ -114,11 +114,15 @@ use crate::{ use foundry_compilers_core::error::Result; use rayon::prelude::*; use semver::Version; -use std::{collections::HashMap, path::PathBuf, time::Instant}; +use std::{collections::HashMap, fmt::Debug, path::PathBuf, time::Instant}; /// A set of different Solc installations with their version and the sources to be compiled pub(crate) type VersionedSources = HashMap>; +pub trait Preprocessor: Debug { + fn preprocess(&self, compiler: &C, input: C::Input, dirty: &Vec) -> Result; +} + #[derive(Debug)] pub struct ProjectCompiler<'a, T: ArtifactOutput, C: Compiler> { /// Contains the relationship of the source files and their imports @@ -126,6 +130,8 @@ pub struct ProjectCompiler<'a, T: ArtifactOutput, C: Compiler> { project: &'a Project, /// how to compile all the sources sources: CompilerSources, + /// Optional preprocessor + preprocessor: Option>>, } impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> { @@ -160,7 +166,14 @@ impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> { sources, }; - Ok(Self { edges, project, sources }) + Ok(Self { edges, project, sources, preprocessor: None }) + } + + pub fn with_preprocessor( + self, + preprocessor: impl Preprocessor + 'static, + ) -> ProjectCompiler<'a, T, C> { + ProjectCompiler { preprocessor: Some(Box::new(preprocessor)), ..self } } /// Compiles all the sources of the `Project` in the appropriate mode @@ -197,7 +210,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> { /// - check cache fn preprocess(self) -> Result> { trace!("preprocessing"); - let Self { edges, project, mut sources } = self; + let Self { edges, project, mut sources, preprocessor } = self; // convert paths on windows to ensure consistency with the `CompilerOutput` `solc` emits, // which is unix style `/` @@ -207,7 +220,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> { // retain and compile only dirty sources and all their imports sources.filter(&mut cache); - Ok(PreprocessedState { sources, cache }) + Ok(PreprocessedState { sources, cache, preprocessor }) } } @@ -221,15 +234,18 @@ struct PreprocessedState<'a, T: ArtifactOutput, C: Compiler> { /// Cache that holds `CacheEntry` objects if caching is enabled and the project is recompiled cache: ArtifactsCache<'a, T, C>, + + /// Optional preprocessor + preprocessor: Option>>, } impl<'a, T: ArtifactOutput, C: Compiler> PreprocessedState<'a, T, C> { /// advance to the next state by compiling all sources fn compile(self) -> Result> { trace!("compiling"); - let PreprocessedState { sources, mut cache } = self; + let PreprocessedState { sources, mut cache, preprocessor } = self; - let mut output = sources.compile(&mut cache)?; + let mut output = sources.compile(&mut cache, preprocessor)?; // source paths get stripped before handing them over to solc, so solc never uses absolute // paths, instead `--base-path ` is set. this way any metadata that's derived from @@ -410,6 +426,7 @@ impl CompilerSources { fn compile, T: ArtifactOutput>( self, cache: &mut ArtifactsCache<'_, T, C>, + preprocessor: Option>>, ) -> Result> { let project = cache.project(); let graph = cache.graph(); @@ -455,6 +472,18 @@ impl CompilerSources { let mut input = C::Input::build(sources, settings, language, version.clone()); input.strip_prefix(project.paths.root.as_path()); + let actually_dirty = actually_dirty + .into_iter() + .map(|path| { + path.strip_prefix(project.paths.root.as_path()) + .unwrap_or(&path) + .to_path_buf() + }) + .collect(); + + if let Some(preprocessor) = preprocessor.as_ref() { + input = preprocessor.preprocess(&project.compiler, input, &actually_dirty)?; + } jobs.push((input, actually_dirty)); } @@ -478,11 +507,7 @@ impl CompilerSources { let build_info = RawBuildInfo::new(&input, &output, project.build_info)?; - output.retain_files( - actually_dirty - .iter() - .map(|f| f.strip_prefix(project.paths.root.as_path()).unwrap_or(f)), - ); + output.retain_files(actually_dirty); output.join_all(project.paths.root.as_path()); aggregated.extend(version.clone(), build_info, output); diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index c7d4e0b9..13489f70 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -1,12 +1,29 @@ +use super::project::Preprocessor; +use crate::{ + flatten::{apply_updates, Updates}, + multi::{MultiCompiler, MultiCompilerInput}, + solc::{SolcCompiler, SolcVersionedInput}, + Compiler, Result, SolcError, +}; use alloy_primitives::hex; -use foundry_compilers_artifacts::Source; +use foundry_compilers_artifacts::{ + ast::SourceLocation, + output_selection::OutputSelection, + visitor::{Visitor, Walk}, + ContractDefinition, ContractKind, Expression, FunctionCall, MemberAccess, NewExpression, + Source, SourceUnit, SourceUnitPart, Sources, TypeName, +}; use foundry_compilers_core::utils; -use md5::Digest; +use md5::{digest::typenum::Exp, Digest}; use solang_parser::{ diagnostics::Diagnostic, helpers::CodeLocation, pt::{ContractPart, ContractTy, FunctionAttribute, FunctionTy, SourceUnitPart, Visibility}, }; +use std::{ + collections::{BTreeMap, BTreeSet}, + path::{Path, PathBuf}, +}; pub(crate) fn interface_representation(content: &str) -> Result> { let (source_unit, _) = solang_parser::parse(&content, 0)?; @@ -68,6 +85,274 @@ pub(crate) fn interface_representation_hash(source: &Source) -> String { hex::encode(result) } +#[derive(Debug)] +pub struct ItemLocation { + start: usize, + end: usize, +} + +impl ItemLocation { + fn try_from_loc(loc: SourceLocation) -> Option { + Some(ItemLocation { start: loc.start?, end: loc.start? + loc.length? }) + } +} + +#[derive(Debug)] +enum BytecodeDependencyKind { + CreationCode, + New(ItemLocation, String), +} + +#[derive(Debug)] +struct BytecodeDependency { + kind: BytecodeDependencyKind, + loc: ItemLocation, + referenced_contract: usize, +} + +#[derive(Debug)] +struct BytecodeDependencyCollector<'a> { + source: &'a str, + dependencies: Vec, +} + +impl BytecodeDependencyCollector<'_> { + fn new(source: &str) -> BytecodeDependencyCollector<'_> { + BytecodeDependencyCollector { source, dependencies: Vec::new() } + } +} + +impl Visitor for BytecodeDependencyCollector<'_> { + fn visit_function_call(&mut self, call: &FunctionCall) { + let (new_loc, expr) = match &call.expression { + Expression::NewExpression(expr) => (expr.src, expr), + Expression::FunctionCallOptions(expr) => { + if let Expression::NewExpression(new_expr) = &expr.expression { + (expr.src, new_expr) + } else { + return; + } + } + _ => return, + }; + + let TypeName::UserDefinedTypeName(type_name) = &expr.type_name else { return }; + + let Some(loc) = ItemLocation::try_from_loc(call.src) else { return }; + let Some(name_loc) = ItemLocation::try_from_loc(type_name.src) else { return }; + let Some(new_loc) = ItemLocation::try_from_loc(new_loc) else { return }; + let name = &self.source[name_loc.start..name_loc.end]; + + self.dependencies.push(BytecodeDependency { + kind: BytecodeDependencyKind::New(new_loc, name.to_string()), + loc, + referenced_contract: type_name.referenced_declaration as usize, + }); + } + + fn visit_member_access(&mut self, access: &MemberAccess) { + if access.member_name != "creationCode" { + return; + } + + let Expression::FunctionCall(call) = &access.expression else { return }; + + let Expression::Identifier(ident) = &call.expression else { return }; + + if ident.name != "type" { + return; + } + + let Some(Expression::Identifier(ident)) = call.arguments.first() else { return }; + + let Some(referenced) = ident.referenced_declaration else { return }; + + let Some(loc) = ItemLocation::try_from_loc(access.src) else { return }; + + self.dependencies.push(BytecodeDependency { + kind: BytecodeDependencyKind::CreationCode, + loc, + referenced_contract: referenced as usize, + }); + } +} + +struct TestOptimizer<'a> { + asts: BTreeMap, + dirty: &'a Vec, + sources: &'a mut Sources, +} + +impl TestOptimizer<'_> { + fn new<'a>( + asts: BTreeMap, + dirty: &'a Vec, + sources: &'a mut Sources, + ) -> TestOptimizer<'a> { + TestOptimizer { asts, dirty, sources } + } + + fn optimize(self) { + let mut updates = Updates::default(); + let ignored_contracts = self.collect_ignored_contracts(); + self.rename_contracts_to_abstract(&ignored_contracts, &mut updates); + self.remove_bytecode_dependencies(&ignored_contracts, &mut updates); + + apply_updates(self.sources, updates); + } + + fn collect_ignored_contracts(&self) -> BTreeSet { + let mut ignored_sources = BTreeSet::new(); + + for (path, ast) in &self.asts { + if path.to_str().unwrap().contains("test") || path.to_str().unwrap().contains("script") + { + ignored_sources.insert(ast.id); + } else if self.dirty.contains(path) { + ignored_sources.insert(ast.id); + + for node in &ast.nodes { + if let SourceUnitPart::ImportDirective(import) = node { + ignored_sources.insert(import.source_unit); + } + } + } + } + + let mut ignored_contracts = BTreeSet::new(); + + for ast in self.asts.values() { + if ignored_sources.contains(&ast.id) { + for node in &ast.nodes { + if let SourceUnitPart::ContractDefinition(contract) = node { + ignored_contracts.insert(contract.id); + } + } + } + } + + ignored_contracts + } + + fn rename_contracts_to_abstract( + &self, + ignored_contracts: &BTreeSet, + updates: &mut Updates, + ) { + for (path, ast) in &self.asts { + for node in &ast.nodes { + if let SourceUnitPart::ContractDefinition(contract) = node { + if ignored_contracts.contains(&contract.id) { + continue; + } + if matches!(contract.kind, ContractKind::Contract) && !contract.is_abstract { + if let Some(start) = contract.src.start { + updates.entry(path.clone()).or_default().insert(( + start, + start, + "abstract ".to_string(), + )); + } + } + } + } + } + } + + fn remove_bytecode_dependencies( + &self, + ignored_contracts: &BTreeSet, + updates: &mut Updates, + ) { + for (path, ast) in &self.asts { + let src = self.sources.get(path).unwrap().content.as_str(); + let mut collector = BytecodeDependencyCollector::new(src); + ast.walk(&mut collector); + let updates = updates.entry(path.clone()).or_default(); + for dep in collector.dependencies { + match dep.kind { + BytecodeDependencyKind::CreationCode => { + updates.insert((dep.loc.start, dep.loc.end, "bytes(\"\")".to_string())); + } + BytecodeDependencyKind::New(new_loc, name) => { + updates.insert(( + new_loc.start, + new_loc.end, + format!("{name}(payable(address(uint160(uint256(keccak256(abi.encode"), + )); + updates.insert((dep.loc.end, dep.loc.end, format!("))))))"))); + } + }; + } + } + } +} + +#[derive(Debug)] +pub struct TestOptimizerPreprocessor; + +impl Preprocessor for TestOptimizerPreprocessor { + fn preprocess( + &self, + solc: &SolcCompiler, + mut input: SolcVersionedInput, + dirty: &Vec, + ) -> Result { + let prev_output_selection = std::mem::replace( + &mut input.input.settings.output_selection, + OutputSelection::ast_output_selection(), + ); + let output = solc.compile(&input)?; + + input.input.settings.output_selection = prev_output_selection; + + if let Some(e) = output.errors.iter().find(|e| e.severity.is_error()) { + return Err(SolcError::msg(e)); + } + + let asts = output + .sources + .into_iter() + .filter_map(|(path, source)| { + if !input.input.sources.contains_key(&path) { + return None; + } + + Some((|| { + let ast = source.ast.ok_or_else(|| SolcError::msg("missing AST"))?; + let ast: SourceUnit = serde_json::from_str(&serde_json::to_string(&ast)?)?; + Ok((path, ast)) + })()) + }) + .collect::>>()?; + + TestOptimizer::new(asts, dirty, &mut input.input.sources).optimize(); + + Ok(input) + } +} + +impl Preprocessor for TestOptimizerPreprocessor { + fn preprocess( + &self, + compiler: &MultiCompiler, + input: ::Input, + dirty: &Vec, + ) -> Result<::Input> { + match input { + MultiCompilerInput::Solc(input) => { + if let Some(solc) = &compiler.solc { + let input = self.preprocess(solc, input, dirty)?; + Ok(MultiCompilerInput::Solc(input)) + } else { + Ok(MultiCompilerInput::Solc(input)) + } + } + MultiCompilerInput::Vyper(input) => Ok(MultiCompilerInput::Vyper(input)), + } + } +} + #[cfg(test)] mod tests { use super::*; From 314f284e3660701a69b20915a849d72e42735bf6 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 11 Sep 2024 22:48:47 +0400 Subject: [PATCH 04/10] wip modified: crates/compilers/src/preprocessor.rs --- crates/artifacts/solc/src/ast/misc.rs | 2 +- crates/compilers/src/cache.rs | 7 +- crates/compilers/src/compile/project.rs | 20 +- crates/compilers/src/flatten.rs | 94 ++++++---- crates/compilers/src/lib.rs | 2 +- crates/compilers/src/preprocessor.rs | 237 +++++++++++++++--------- crates/core/src/error.rs | 2 +- 7 files changed, 224 insertions(+), 140 deletions(-) diff --git a/crates/artifacts/solc/src/ast/misc.rs b/crates/artifacts/solc/src/ast/misc.rs index 6ec3187b..7144ddc5 100644 --- a/crates/artifacts/solc/src/ast/misc.rs +++ b/crates/artifacts/solc/src/ast/misc.rs @@ -4,7 +4,7 @@ use std::{fmt, fmt::Write, str::FromStr}; /// Represents the source location of a node: `::`. /// /// The `start`, `length` and `index` can be -1 which is represented as `None` -#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct SourceLocation { pub start: Option, pub length: Option, diff --git a/crates/compilers/src/cache.rs b/crates/compilers/src/cache.rs index e11f37a1..1e81c11d 100644 --- a/crates/compilers/src/cache.rs +++ b/crates/compilers/src/cache.rs @@ -658,7 +658,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { let content_hash = source.content_hash(); let interface_repr_hash = file .starts_with(&self.project.paths.sources) - .then(|| interface_representation_hash(&source)); + .then(|| interface_representation_hash(source)); let entry = CacheEntry { last_modification_date: CacheEntry::::read_last_modification_date(&file) .unwrap_or_default(), @@ -788,7 +788,10 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { let src_files = sources .keys() - .filter(|f| f.starts_with(&self.project.paths.sources)) + .filter(|f| { + !f.starts_with(&self.project.paths.tests) + && !f.starts_with(&self.project.paths.scripts) + }) .collect::>(); // Calculate content hashes for later comparison. diff --git a/crates/compilers/src/compile/project.rs b/crates/compilers/src/compile/project.rs index f4c3a67c..2d875fa4 100644 --- a/crates/compilers/src/compile/project.rs +++ b/crates/compilers/src/compile/project.rs @@ -109,7 +109,8 @@ use crate::{ output::{AggregatedCompilerOutput, Builds}, report, resolver::GraphEdges, - ArtifactOutput, CompilerSettings, Graph, Project, ProjectCompileOutput, Sources, + ArtifactOutput, CompilerSettings, Graph, Project, ProjectCompileOutput, ProjectPathsConfig, + Sources, }; use foundry_compilers_core::error::Result; use rayon::prelude::*; @@ -119,8 +120,14 @@ use std::{collections::HashMap, fmt::Debug, path::PathBuf, time::Instant}; /// A set of different Solc installations with their version and the sources to be compiled pub(crate) type VersionedSources = HashMap>; +/// Invoked before the actual compiler invocation and can override the input. pub trait Preprocessor: Debug { - fn preprocess(&self, compiler: &C, input: C::Input, dirty: &Vec) -> Result; + fn preprocess( + &self, + compiler: &C, + input: C::Input, + paths: &ProjectPathsConfig, + ) -> Result; } #[derive(Debug)] @@ -169,11 +176,8 @@ impl<'a, T: ArtifactOutput, C: Compiler> ProjectCompiler<'a, T, C> { Ok(Self { edges, project, sources, preprocessor: None }) } - pub fn with_preprocessor( - self, - preprocessor: impl Preprocessor + 'static, - ) -> ProjectCompiler<'a, T, C> { - ProjectCompiler { preprocessor: Some(Box::new(preprocessor)), ..self } + pub fn with_preprocessor(self, preprocessor: impl Preprocessor + 'static) -> Self { + Self { preprocessor: Some(Box::new(preprocessor)), ..self } } /// Compiles all the sources of the `Project` in the appropriate mode @@ -482,7 +486,7 @@ impl CompilerSources { .collect(); if let Some(preprocessor) = preprocessor.as_ref() { - input = preprocessor.preprocess(&project.compiler, input, &actually_dirty)?; + input = preprocessor.preprocess(&project.compiler, input, &project.paths)?; } jobs.push((input, actually_dirty)); diff --git a/crates/compilers/src/flatten.rs b/crates/compilers/src/flatten.rs index 45da3902..df4f5f87 100644 --- a/crates/compilers/src/flatten.rs +++ b/crates/compilers/src/flatten.rs @@ -17,9 +17,10 @@ use foundry_compilers_core::{ }; use itertools::Itertools; use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeSet, HashMap, HashSet}, hash::Hash, path::{Path, PathBuf}, + sync::Arc, }; use visitor::Walk; @@ -95,7 +96,7 @@ impl Visitor for ReferencesCollector { } fn visit_external_assembly_reference(&mut self, reference: &ExternalInlineAssemblyReference) { - let mut src = reference.src.clone(); + let mut src = reference.src; // If suffix is used in assembly reference (e.g. value.slot), it will be included into src. // However, we are only interested in the referenced name, thus we strip . part. @@ -112,47 +113,32 @@ impl Visitor for ReferencesCollector { /// Updates to be applied to the sources. /// source_path -> (start, end, new_value) -type Updates = HashMap>; +pub type Updates = HashMap>; -pub struct FlatteningResult<'a> { +pub struct FlatteningResult { /// Updated source in the order they shoud be written to the output file. sources: Vec, /// Pragmas that should be present in the target file. pragmas: Vec, /// License identifier that should be present in the target file. - license: Option<&'a str>, + license: Option, } -impl<'a> FlatteningResult<'a> { +impl FlatteningResult { fn new( - flattener: &Flattener, - mut updates: Updates, + mut flattener: Flattener, + updates: Updates, pragmas: Vec, - license: Option<&'a str>, + license: Option, ) -> Self { - let mut sources = Vec::new(); - - for path in &flattener.ordered_sources { - let mut content = flattener.sources.get(path).unwrap().content.as_bytes().to_vec(); - let mut offset: isize = 0; - if let Some(updates) = updates.remove(path) { - let mut updates = updates.iter().collect::>(); - updates.sort_by_key(|(start, _, _)| *start); - for (start, end, new_value) in updates { - let start = (*start as isize + offset) as usize; - let end = (*end as isize + offset) as usize; - - content.splice(start..end, new_value.bytes()); - offset += new_value.len() as isize - (end - start) as isize; - } - } - let content = format!( - "// {}\n{}", - path.strip_prefix(&flattener.project_root).unwrap_or(path).display(), - String::from_utf8(content).unwrap() - ); - sources.push(content); - } + apply_updates(&mut flattener.sources, updates); + + let sources = flattener + .ordered_sources + .iter() + .map(|path| flattener.sources.remove(path).unwrap().content) + .map(Arc::unwrap_or_clone) + .collect(); Self { sources, pragmas, license } } @@ -274,9 +260,10 @@ impl Flattener { /// 3. Remove all imports. /// 4. Remove all pragmas except for the ones in the target file. /// 5. Remove all license identifiers except for the one in the target file. - pub fn flatten(&self) -> String { + pub fn flatten(self) -> String { let mut updates = Updates::new(); + self.append_filenames(&mut updates); let top_level_names = self.rename_top_level_definitions(&mut updates); self.rename_contract_level_types_references(&top_level_names, &mut updates); self.remove_qualified_imports(&mut updates); @@ -289,15 +276,26 @@ impl Flattener { self.flatten_result(updates, target_pragmas, target_license).get_flattened_target() } - fn flatten_result<'a>( - &'a self, + fn flatten_result( + self, updates: Updates, target_pragmas: Vec, - target_license: Option<&'a str>, - ) -> FlatteningResult<'_> { + target_license: Option, + ) -> FlatteningResult { FlatteningResult::new(self, updates, target_pragmas, target_license) } + /// Appends a comment with the file name to the beginning of each source. + fn append_filenames(&self, updates: &mut Updates) { + for path in &self.ordered_sources { + updates.entry(path.clone()).or_default().insert(( + 0, + 0, + format!("// {}\n", path.strip_prefix(&self.project_root).unwrap_or(path).display()), + )); + } + } + /// Finds and goes over all references to file-level definitions and updates them to match /// definition name. This is needed for two reasons: /// 1. We want to rename all aliased or qualified imports. @@ -752,14 +750,14 @@ impl Flattener { /// Removes all license identifiers from all sources. Returns licesnse identifier from target /// file, if any. - fn process_licenses(&self, updates: &mut Updates) -> Option<&str> { + fn process_licenses(&self, updates: &mut Updates) -> Option { let mut target_license = None; for loc in &self.collect_licenses() { if loc.path == self.target { let license_line = self.read_location(loc); let license_start = license_line.find("SPDX-License-Identifier:").unwrap(); - target_license = Some(license_line[license_start..].trim()); + target_license = Some(license_line[license_start..].trim().to_string()); } updates.entry(loc.path.clone()).or_default().insert(( loc.start, @@ -887,3 +885,21 @@ pub fn combine_version_pragmas(pragmas: Vec<&str>) -> Option { None } + +pub fn apply_updates(sources: &mut Sources, mut updates: Updates) { + for (path, source) in sources { + if let Some(updates) = updates.remove(path) { + let mut offset = 0; + let mut content = source.content.as_bytes().to_vec(); + for (start, end, new_value) in updates { + let start = (start as isize + offset) as usize; + let end = (end as isize + offset) as usize; + + content.splice(start..end, new_value.bytes()); + offset += new_value.len() as isize - (end - start) as isize; + } + + source.content = Arc::new(String::from_utf8_lossy(&content).to_string()); + } + } +} diff --git a/crates/compilers/src/lib.rs b/crates/compilers/src/lib.rs index 74d38cb9..1eebbfe9 100644 --- a/crates/compilers/src/lib.rs +++ b/crates/compilers/src/lib.rs @@ -24,7 +24,7 @@ pub use resolver::Graph; pub mod compilers; pub use compilers::*; -mod preprocessor; +pub mod preprocessor; mod compile; pub use compile::{ diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index 13489f70..2678b97d 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -1,53 +1,53 @@ use super::project::Preprocessor; use crate::{ flatten::{apply_updates, Updates}, - multi::{MultiCompiler, MultiCompilerInput}, + multi::{MultiCompiler, MultiCompilerInput, MultiCompilerLanguage}, solc::{SolcCompiler, SolcVersionedInput}, - Compiler, Result, SolcError, + Compiler, ProjectPathsConfig, Result, SolcError, }; -use alloy_primitives::hex; +use alloy_primitives::{hex}; use foundry_compilers_artifacts::{ ast::SourceLocation, output_selection::OutputSelection, visitor::{Visitor, Walk}, - ContractDefinition, ContractKind, Expression, FunctionCall, MemberAccess, NewExpression, - Source, SourceUnit, SourceUnitPart, Sources, TypeName, + ContractDefinitionPart, Expression, FunctionCall, + FunctionKind, MemberAccess, SolcLanguage, Source, SourceUnit, SourceUnitPart, + Sources, TypeName, }; use foundry_compilers_core::utils; -use md5::{digest::typenum::Exp, Digest}; -use solang_parser::{ - diagnostics::Diagnostic, - helpers::CodeLocation, - pt::{ContractPart, ContractTy, FunctionAttribute, FunctionTy, SourceUnitPart, Visibility}, -}; +use md5::{ Digest}; +use solang_parser::{diagnostics::Diagnostic, helpers::CodeLocation, pt}; use std::{ - collections::{BTreeMap, BTreeSet}, + collections::{BTreeMap, }, + fmt::Write, path::{Path, PathBuf}, }; pub(crate) fn interface_representation(content: &str) -> Result> { - let (source_unit, _) = solang_parser::parse(&content, 0)?; + let (source_unit, _) = solang_parser::parse(content, 0)?; let mut locs_to_remove = Vec::new(); for part in source_unit.0 { - if let SourceUnitPart::ContractDefinition(contract) = part { - if matches!(contract.ty, ContractTy::Interface(_) | ContractTy::Library(_)) { + if let pt::SourceUnitPart::ContractDefinition(contract) = part { + if matches!(contract.ty, pt::ContractTy::Interface(_) | pt::ContractTy::Library(_)) { continue; } for part in contract.parts { - if let ContractPart::FunctionDefinition(func) = part { - let is_exposed = func.ty == FunctionTy::Function + if let pt::ContractPart::FunctionDefinition(func) = part { + let is_exposed = func.ty == pt::FunctionTy::Function && func.attributes.iter().any(|attr| { matches!( attr, - FunctionAttribute::Visibility( - Visibility::External(_) | Visibility::Public(_) + pt::FunctionAttribute::Visibility( + pt::Visibility::External(_) | pt::Visibility::Public(_) ) ) }) || matches!( func.ty, - FunctionTy::Constructor | FunctionTy::Fallback | FunctionTy::Receive + pt::FunctionTy::Constructor + | pt::FunctionTy::Fallback + | pt::FunctionTy::Receive ); if !is_exposed { @@ -92,11 +92,17 @@ pub struct ItemLocation { } impl ItemLocation { - fn try_from_loc(loc: SourceLocation) -> Option { - Some(ItemLocation { start: loc.start?, end: loc.start? + loc.length? }) + fn try_from_loc(loc: SourceLocation) -> Option { + Some(Self { start: loc.start?, end: loc.start? + loc.length? }) } } +fn is_test_or_script(path: &Path, paths: &ProjectPathsConfig) -> bool { + let test_dir = paths.tests.strip_prefix(&paths.root).unwrap_or(&paths.root); + let script_dir = paths.scripts.strip_prefix(&paths.root).unwrap_or(&paths.root); + path.starts_with(test_dir) || path.starts_with(script_dir) +} + #[derive(Debug)] enum BytecodeDependencyKind { CreationCode, @@ -177,113 +183,161 @@ impl Visitor for BytecodeDependencyCollector<'_> { } } -struct TestOptimizer<'a> { +/// Keeps data about a single contract definition. +struct ContractData { + /// Artifact ID to use in `getCode`/`deployCode` calls. + artifact: String, + /// Whether contract has a non-empty constructor. + has_constructor: bool, +} + +/// Receives a set of source files along with their ASTs and removes bytecode dependencies from +/// contracts by replacing them with cheatcode invocations. +struct BytecodeDependencyOptimizer<'a> { asts: BTreeMap, - dirty: &'a Vec, + paths: &'a ProjectPathsConfig, sources: &'a mut Sources, } -impl TestOptimizer<'_> { +impl BytecodeDependencyOptimizer<'_> { fn new<'a>( asts: BTreeMap, - dirty: &'a Vec, + paths: &'a ProjectPathsConfig, sources: &'a mut Sources, - ) -> TestOptimizer<'a> { - TestOptimizer { asts, dirty, sources } + ) -> BytecodeDependencyOptimizer<'a> { + BytecodeDependencyOptimizer { asts, paths, sources } } - fn optimize(self) { + fn process(self) { let mut updates = Updates::default(); - let ignored_contracts = self.collect_ignored_contracts(); - self.rename_contracts_to_abstract(&ignored_contracts, &mut updates); - self.remove_bytecode_dependencies(&ignored_contracts, &mut updates); + + let contracts = self.collect_contracts(&mut updates); + self.remove_bytecode_dependencies(&contracts, &mut updates); apply_updates(self.sources, updates); } - fn collect_ignored_contracts(&self) -> BTreeSet { - let mut ignored_sources = BTreeSet::new(); + /// Collects a mapping from contract AST id to [ContractData]. + fn collect_contracts(&self, updates: &mut Updates) -> BTreeMap { + let mut contracts = BTreeMap::new(); for (path, ast) in &self.asts { - if path.to_str().unwrap().contains("test") || path.to_str().unwrap().contains("script") - { - ignored_sources.insert(ast.id); - } else if self.dirty.contains(path) { - ignored_sources.insert(ast.id); - - for node in &ast.nodes { - if let SourceUnitPart::ImportDirective(import) = node { - ignored_sources.insert(import.source_unit); - } - } - } - } - - let mut ignored_contracts = BTreeSet::new(); - - for ast in self.asts.values() { - if ignored_sources.contains(&ast.id) { - for node in &ast.nodes { - if let SourceUnitPart::ContractDefinition(contract) = node { - ignored_contracts.insert(contract.id); - } - } - } - } - - ignored_contracts - } + let src = self.sources.get(path).unwrap().content.as_str(); - fn rename_contracts_to_abstract( - &self, - ignored_contracts: &BTreeSet, - updates: &mut Updates, - ) { - for (path, ast) in &self.asts { for node in &ast.nodes { if let SourceUnitPart::ContractDefinition(contract) = node { - if ignored_contracts.contains(&contract.id) { + let artifact = format!("{}:{}", path.display(), contract.name); + let constructor = contract.nodes.iter().find_map(|node| { + let ContractDefinitionPart::FunctionDefinition(func) = node else { + return None; + }; + if *func.kind() != FunctionKind::Constructor { + return None; + } + + Some(func) + }); + + if constructor.map_or(true, |func| func.parameters.parameters.is_empty()) { + contracts + .insert(contract.id, ContractData { artifact, has_constructor: false }); continue; } - if matches!(contract.kind, ContractKind::Contract) && !contract.is_abstract { - if let Some(start) = contract.src.start { - updates.entry(path.clone()).or_default().insert(( - start, - start, - "abstract ".to_string(), - )); + contracts.insert(contract.id, ContractData { artifact, has_constructor: true }); + + let constructor = constructor.unwrap(); + let updates = updates.entry(path.clone()).or_default(); + + let mut constructor_helper = + format!("struct ConstructorHelper{} {{", contract.id); + + for param in &constructor.parameters.parameters { + if let Some(loc) = ItemLocation::try_from_loc(param.src) { + let param = &src[loc.start..loc.end] + .replace(" memory ", " ") + .replace(" calldata ", " "); + write!(constructor_helper, "{param};").unwrap(); } } + + constructor_helper.push('}'); + + if let Some(loc) = ItemLocation::try_from_loc(constructor.src) { + updates.insert((loc.start, loc.start, constructor_helper)); + } } } } + + contracts } + /// Goes over all source files and replaces bytecode dependencies with cheatcode invocations. fn remove_bytecode_dependencies( &self, - ignored_contracts: &BTreeSet, + contracts: &BTreeMap, updates: &mut Updates, ) { + let test_dir = &self.paths.tests.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); + let script_dir = + &self.paths.scripts.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); for (path, ast) in &self.asts { + if !path.starts_with(test_dir) && !path.starts_with(script_dir) { + continue; + } let src = self.sources.get(path).unwrap().content.as_str(); + + if src.is_empty() { + continue; + } let mut collector = BytecodeDependencyCollector::new(src); ast.walk(&mut collector); + let updates = updates.entry(path.clone()).or_default(); + let vm_interface_name = format!("VmContractHelper{}", ast.id); + let vm = format!("{vm_interface_name}(0x7109709ECfa91a80626fF3989D68f67F5b1DD12D)"); + for dep in collector.dependencies { + let ContractData { artifact, has_constructor } = + contracts.get(&dep.referenced_contract).unwrap(); match dep.kind { BytecodeDependencyKind::CreationCode => { - updates.insert((dep.loc.start, dep.loc.end, "bytes(\"\")".to_string())); - } - BytecodeDependencyKind::New(new_loc, name) => { updates.insert(( - new_loc.start, - new_loc.end, - format!("{name}(payable(address(uint160(uint256(keccak256(abi.encode"), + dep.loc.start, + dep.loc.end, + format!("{vm}.getCode(\"{artifact}\")"), )); - updates.insert((dep.loc.end, dep.loc.end, format!("))))))"))); + } + BytecodeDependencyKind::New(new_loc, name) => { + if !*has_constructor { + updates.insert(( + dep.loc.start, + dep.loc.end, + format!("{name}(payable({vm}.deployCode(\"{artifact}\")))"), + )); + } else { + updates.insert(( + new_loc.start, + new_loc.end, + format!("{name}(payable({vm}.deployCode(\"{artifact}\", abi.encode({name}.ConstructorHelper{}", dep.referenced_contract), + )); + updates.insert((dep.loc.end, dep.loc.end, "))))".to_string())); + } } }; } + updates.insert(( + src.len() - 1, + src.len() - 1, + format!( + r#" +interface {vm_interface_name} {{ + function deployCode(string memory _artifact, bytes memory _data) external returns (address); + function deployCode(string memory _artifact) external returns (address); + function getCode(string memory _artifact) external returns (bytes memory); +}}"# + ), + )); } } } @@ -296,8 +350,14 @@ impl Preprocessor for TestOptimizerPreprocessor { &self, solc: &SolcCompiler, mut input: SolcVersionedInput, - dirty: &Vec, + paths: &ProjectPathsConfig, ) -> Result { + // Skip if we are not compiling any tests or scripts. Avoids unnecessary solc invocation and + // AST parsing. + if input.input.sources.iter().all(|(path, _)| !is_test_or_script(path, paths)) { + return Ok(input); + } + let prev_output_selection = std::mem::replace( &mut input.input.settings.output_selection, OutputSelection::ast_output_selection(), @@ -326,7 +386,7 @@ impl Preprocessor for TestOptimizerPreprocessor { }) .collect::>>()?; - TestOptimizer::new(asts, dirty, &mut input.input.sources).optimize(); + BytecodeDependencyOptimizer::new(asts, paths, &mut input.input.sources).process(); Ok(input) } @@ -337,12 +397,13 @@ impl Preprocessor for TestOptimizerPreprocessor { &self, compiler: &MultiCompiler, input: ::Input, - dirty: &Vec, + paths: &ProjectPathsConfig, ) -> Result<::Input> { match input { MultiCompilerInput::Solc(input) => { if let Some(solc) = &compiler.solc { - let input = self.preprocess(solc, input, dirty)?; + let paths = paths.clone().with_language::(); + let input = self.preprocess(solc, input, &paths)?; Ok(MultiCompilerInput::Solc(input)) } else { Ok(MultiCompilerInput::Solc(input)) diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index aa9e5221..d58b0f44 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -5,7 +5,7 @@ use std::{ }; use thiserror::Error; -pub type Result = std::result::Result; +pub type Result = std::result::Result; #[allow(unused_macros)] #[macro_export] From 86f05629eaf710bc2f09c7350641db2879608a6c Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 11 Sep 2024 22:48:47 +0400 Subject: [PATCH 05/10] wip modified: crates/compilers/src/preprocessor.rs --- crates/compilers/src/cache.rs | 16 +++++------ crates/compilers/src/preprocessor.rs | 43 +++++++++++++++++++++------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/crates/compilers/src/cache.rs b/crates/compilers/src/cache.rs index 1e81c11d..c41c1923 100644 --- a/crates/compilers/src/cache.rs +++ b/crates/compilers/src/cache.rs @@ -786,14 +786,6 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { sources.insert(file.clone(), source); } - let src_files = sources - .keys() - .filter(|f| { - !f.starts_with(&self.project.paths.tests) - && !f.starts_with(&self.project.paths.scripts) - }) - .collect::>(); - // Calculate content hashes for later comparison. self.fill_hashes(&sources); @@ -804,6 +796,14 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { } } + let src_files = sources + .keys() + .filter(|f| { + !f.starts_with(&self.project.paths.tests) + && !f.starts_with(&self.project.paths.scripts) + }) + .collect::>(); + // Mark sources as dirty based on their imports for (file, entry) in &self.cache.files { if self.dirty_sources.contains(file) { diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index 2678b97d..c5e1c6d1 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -10,15 +10,14 @@ use foundry_compilers_artifacts::{ ast::SourceLocation, output_selection::OutputSelection, visitor::{Visitor, Walk}, - ContractDefinitionPart, Expression, FunctionCall, - FunctionKind, MemberAccess, SolcLanguage, Source, SourceUnit, SourceUnitPart, - Sources, TypeName, + ContractDefinitionPart, Expression, FunctionCall, FunctionKind, MemberAccess, NewExpression, + SolcLanguage, Source, SourceUnit, SourceUnitPart, Sources, TypeName, }; use foundry_compilers_core::utils; -use md5::{ Digest}; +use md5::Digest; use solang_parser::{diagnostics::Diagnostic, helpers::CodeLocation, pt}; use std::{ - collections::{BTreeMap, }, + collections::BTreeMap, fmt::Write, path::{Path, PathBuf}, }; @@ -120,15 +119,22 @@ struct BytecodeDependency { struct BytecodeDependencyCollector<'a> { source: &'a str, dependencies: Vec, + total_count: usize, } impl BytecodeDependencyCollector<'_> { fn new(source: &str) -> BytecodeDependencyCollector<'_> { - BytecodeDependencyCollector { source, dependencies: Vec::new() } + BytecodeDependencyCollector { source, dependencies: Vec::new(), total_count: 0 } } } impl Visitor for BytecodeDependencyCollector<'_> { + fn visit_new_expression(&mut self, expr: &NewExpression) { + if let TypeName::UserDefinedTypeName(_) = &expr.type_name { + self.total_count += 1; + } + } + fn visit_function_call(&mut self, call: &FunctionCall) { let (new_loc, expr) = match &call.expression { Expression::NewExpression(expr) => (expr.src, expr), @@ -160,6 +166,7 @@ impl Visitor for BytecodeDependencyCollector<'_> { if access.member_name != "creationCode" { return; } + self.total_count += 1; let Expression::FunctionCall(call) = &access.expression else { return }; @@ -208,13 +215,15 @@ impl BytecodeDependencyOptimizer<'_> { BytecodeDependencyOptimizer { asts, paths, sources } } - fn process(self) { + fn process(self) -> Result<()> { let mut updates = Updates::default(); let contracts = self.collect_contracts(&mut updates); - self.remove_bytecode_dependencies(&contracts, &mut updates); + self.remove_bytecode_dependencies(&contracts, &mut updates)?; apply_updates(self.sources, updates); + + Ok(()) } /// Collects a mapping from contract AST id to [ContractData]. @@ -277,7 +286,7 @@ impl BytecodeDependencyOptimizer<'_> { &self, contracts: &BTreeMap, updates: &mut Updates, - ) { + ) -> Result<()> { let test_dir = &self.paths.tests.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); let script_dir = &self.paths.scripts.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); @@ -293,6 +302,18 @@ impl BytecodeDependencyOptimizer<'_> { let mut collector = BytecodeDependencyCollector::new(src); ast.walk(&mut collector); + + // It is possible to write weird expressions which we won't catch. + // e.g. (((new Contract)))() is valid syntax + // + // We need to ensure that we've collected all dependencies that are in the contract. + if collector.dependencies.len() != collector.total_count { + return Err(SolcError::msg(format!( + "failed to collect all bytecode dependencies for {}", + path.display() + ))); + } + let updates = updates.entry(path.clone()).or_default(); let vm_interface_name = format!("VmContractHelper{}", ast.id); let vm = format!("{vm_interface_name}(0x7109709ECfa91a80626fF3989D68f67F5b1DD12D)"); @@ -339,6 +360,8 @@ interface {vm_interface_name} {{ ), )); } + + Ok(()) } } @@ -386,7 +409,7 @@ impl Preprocessor for TestOptimizerPreprocessor { }) .collect::>>()?; - BytecodeDependencyOptimizer::new(asts, paths, &mut input.input.sources).process(); + BytecodeDependencyOptimizer::new(asts, paths, &mut input.input.sources).process()?; Ok(input) } From af6550ff86bdc9b44721b954aada37cc7989f82b Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Fri, 13 Sep 2024 13:18:19 +0400 Subject: [PATCH 06/10] fixes --- crates/compilers/src/cache.rs | 107 ++++++++++-------------- crates/compilers/src/compile/project.rs | 14 ++-- crates/compilers/src/preprocessor.rs | 3 +- 3 files changed, 52 insertions(+), 72 deletions(-) diff --git a/crates/compilers/src/cache.rs b/crates/compilers/src/cache.rs index c41c1923..8541a8b0 100644 --- a/crates/compilers/src/cache.rs +++ b/crates/compilers/src/cache.rs @@ -174,10 +174,7 @@ impl CompilerCache { pub fn join_entries(&mut self, root: &Path) -> &mut Self { self.files = std::mem::take(&mut self.files) .into_iter() - .map(|(path, mut entry)| { - entry.join_imports(root); - (root.join(path), entry) - }) + .map(|(path, entry)| (root.join(path), entry)) .collect(); self } @@ -186,11 +183,7 @@ impl CompilerCache { pub fn strip_entries_prefix(&mut self, base: &Path) -> &mut Self { self.files = std::mem::take(&mut self.files) .into_iter() - .map(|(path, mut entry)| { - let path = path.strip_prefix(base).map(Into::into).unwrap_or(path); - entry.strip_imports_prefixes(base); - (path, entry) - }) + .map(|(path, entry)| (path.strip_prefix(base).map(Into::into).unwrap_or(path), entry)) .collect(); self } @@ -560,17 +553,6 @@ impl CacheEntry { self.artifacts().all(|a| a.path.exists()) } - /// Joins all import paths with `base` - pub fn join_imports(&mut self, base: &Path) { - self.imports = self.imports.iter().map(|i| base.join(i)).collect(); - } - - /// Strips `base` from all import paths - pub fn strip_imports_prefixes(&mut self, base: &Path) { - self.imports = - self.imports.iter().map(|i| i.strip_prefix(base).unwrap_or(i).to_path_buf()).collect(); - } - /// Sets the artifact's paths to `base` adjoined to the artifact's `path`. pub fn join_artifacts_files(&mut self, base: &Path) { self.artifacts_mut().for_each(|a| a.path = base.join(&a.path)) @@ -647,27 +629,33 @@ pub(crate) struct ArtifactsCacheInner<'a, T: ArtifactOutput, C: Compiler> { } impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { + /// Whther given file is a source file or a test/script file. + fn is_source_file(&self, file: &Path) -> bool { + !file.starts_with(&self.project.paths.tests) + && !file.starts_with(&self.project.paths.scripts) + } + /// Creates a new cache entry for the file - fn create_cache_entry( - &mut self, - file: PathBuf, - source: &Source, - edges: Option<&GraphEdges>, - ) { - let edges = edges.unwrap_or(&self.edges); - let content_hash = source.content_hash(); - let interface_repr_hash = file - .starts_with(&self.project.paths.sources) - .then(|| interface_representation_hash(source)); + fn create_cache_entry(&mut self, file: PathBuf, source: &Source) { + let imports = self + .edges + .imports(&file) + .into_iter() + .map(|import| strip_prefix(import, self.project.root()).into()) + .collect(); + + let interface_repr_hash = + self.is_source_file(&file).then(|| interface_representation_hash(source)); + let entry = CacheEntry { last_modification_date: CacheEntry::::read_last_modification_date(&file) .unwrap_or_default(), - content_hash, + content_hash: source.content_hash(), interface_repr_hash, source_name: strip_prefix(&file, self.project.root()).into(), compiler_settings: self.project.settings.clone(), - imports: edges.imports(&file).into_iter().map(|i| i.into()).collect(), - version_requirement: edges.version_requirement(&file).map(|v| v.to_string()), + imports, + version_requirement: self.edges.version_requirement(&file).map(|v| v.to_string()), // artifacts remain empty until we received the compiler output artifacts: Default::default(), seen_by_compiler: false, @@ -701,7 +689,7 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { // Ensure that we have a cache entry for all sources. if !self.cache.files.contains_key(file) { - self.create_cache_entry(file.clone(), source, None); + self.create_cache_entry(file.clone(), source); } } @@ -796,27 +784,35 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { } } - let src_files = sources - .keys() - .filter(|f| { - !f.starts_with(&self.project.paths.tests) - && !f.starts_with(&self.project.paths.scripts) - }) - .collect::>(); + // Build a temporary graph for populating cache. We want to ensure that we preserve all just + // removed entries with updated data. We need separate graph for this because + // `self.edges` only contains graph data for in-scope sources but we are operating on cache + // entries. + let Ok(graph) = Graph::::resolve_sources(&self.project.paths, sources) + else { + // Purge all sources on graph resolution error. + self.cache.files.clear(); + return; + }; + + let (sources, edges) = graph.into_sources(); // Mark sources as dirty based on their imports - for (file, entry) in &self.cache.files { + for file in sources.keys() { if self.dirty_sources.contains(file) { continue; } - let is_src = src_files.contains(file); - for import in &entry.imports { + let is_src = self.is_source_file(file); + for import in edges.imports(file) { + // Any source file importing dirty source file is dirty. if is_src && self.dirty_sources.contains(import) { self.dirty_sources.insert(file.clone()); break; + // For non-src files we mark them as dirty only if they import dirty non-src file + // or src file for which interface representation changed. } else if !is_src && self.dirty_sources.contains(import) - && (!src_files.contains(import) || self.is_dirty_impl(import, true)) + && (!self.is_source_file(import) || self.is_dirty_impl(import, true)) { self.dirty_sources.insert(file.clone()); } @@ -829,25 +825,13 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { self.cache.remove(file); } - // Build a temporary graph for populating cache. We want to ensure that we preserve all just - // removed entries with updated data. We need separate graph for this because - // `self.edges` only contains graph data for in-scope sources but we are operating on cache - // entries. - let Ok(graph) = Graph::::resolve_sources(&self.project.paths, sources) - else { - // Purge all sources on graph resolution error. - self.cache.files.clear(); - return; - }; - - let (sources, edges) = graph.into_sources(); - + // Create new entries for all source files for (file, source) in sources { if self.cache.files.contains_key(&file) { continue; } - self.create_cache_entry(file.clone(), &source, Some(&edges)); + self.create_cache_entry(file.clone(), &source); } } @@ -894,7 +878,8 @@ impl<'a, T: ArtifactOutput, C: Compiler> ArtifactsCacheInner<'a, T, C> { if let hash_map::Entry::Vacant(entry) = self.content_hashes.entry(file.clone()) { entry.insert(source.content_hash()); } - if file.starts_with(&self.project.paths.sources) { + // Fill interface representation hashes for source files + if self.is_source_file(&file) { if let hash_map::Entry::Vacant(entry) = self.interface_repr_hashes.entry(file.clone()) { diff --git a/crates/compilers/src/compile/project.rs b/crates/compilers/src/compile/project.rs index 2d875fa4..6ea0738b 100644 --- a/crates/compilers/src/compile/project.rs +++ b/crates/compilers/src/compile/project.rs @@ -476,14 +476,6 @@ impl CompilerSources { let mut input = C::Input::build(sources, settings, language, version.clone()); input.strip_prefix(project.paths.root.as_path()); - let actually_dirty = actually_dirty - .into_iter() - .map(|path| { - path.strip_prefix(project.paths.root.as_path()) - .unwrap_or(&path) - .to_path_buf() - }) - .collect(); if let Some(preprocessor) = preprocessor.as_ref() { input = preprocessor.preprocess(&project.compiler, input, &project.paths)?; @@ -511,7 +503,11 @@ impl CompilerSources { let build_info = RawBuildInfo::new(&input, &output, project.build_info)?; - output.retain_files(actually_dirty); + output.retain_files( + actually_dirty + .iter() + .map(|f| f.strip_prefix(project.paths.root.as_path()).unwrap_or(f)), + ); output.join_all(project.paths.root.as_path()); aggregated.extend(version.clone(), build_info, output); diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index c5e1c6d1..18c95548 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -5,7 +5,7 @@ use crate::{ solc::{SolcCompiler, SolcVersionedInput}, Compiler, ProjectPathsConfig, Result, SolcError, }; -use alloy_primitives::{hex}; +use alloy_primitives::hex; use foundry_compilers_artifacts::{ ast::SourceLocation, output_selection::OutputSelection, @@ -302,7 +302,6 @@ impl BytecodeDependencyOptimizer<'_> { let mut collector = BytecodeDependencyCollector::new(src); ast.walk(&mut collector); - // It is possible to write weird expressions which we won't catch. // e.g. (((new Contract)))() is valid syntax // From 276bc9470709143489697788c883fcd11afe030f Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Fri, 13 Sep 2024 19:33:04 +0400 Subject: [PATCH 07/10] helper libs --- crates/compilers/src/preprocessor.rs | 192 ++++++++++++++++++++------- crates/core/src/error.rs | 3 + 2 files changed, 149 insertions(+), 46 deletions(-) diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index 18c95548..623efe26 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -11,9 +11,10 @@ use foundry_compilers_artifacts::{ output_selection::OutputSelection, visitor::{Visitor, Walk}, ContractDefinitionPart, Expression, FunctionCall, FunctionKind, MemberAccess, NewExpression, - SolcLanguage, Source, SourceUnit, SourceUnitPart, Sources, TypeName, + ParameterList, SolcLanguage, Source, SourceUnit, SourceUnitPart, Sources, TypeName, }; use foundry_compilers_core::utils; +use itertools::Itertools; use md5::Digest; use solang_parser::{diagnostics::Diagnostic, helpers::CodeLocation, pt}; use std::{ @@ -22,6 +23,11 @@ use std::{ path::{Path, PathBuf}, }; +/// Removes parts of the contract which do not alter its interface: +/// - Internal functions +/// - External functions bodies +/// +/// Preserves all libraries and interfaces. pub(crate) fn interface_representation(content: &str) -> Result> { let (source_unit, _) = solang_parser::parse(content, 0)?; let mut locs_to_remove = Vec::new(); @@ -76,6 +82,7 @@ pub(crate) fn interface_representation(content: &str) -> Result String { let Ok(repr) = interface_representation(&source.content) else { return source.content_hash() }; let mut hasher = md5::Md5::new(); @@ -102,12 +109,16 @@ fn is_test_or_script(path: &Path, paths: &ProjectPathsConfig) -> bool { path.starts_with(test_dir) || path.starts_with(script_dir) } +/// Kind of the bytecode dependency. #[derive(Debug)] enum BytecodeDependencyKind { + /// `type(Contract).creationCode` CreationCode, + /// `new Contract` New(ItemLocation, String), } +/// Represents a single bytecode dependency. #[derive(Debug)] struct BytecodeDependency { kind: BytecodeDependencyKind, @@ -190,12 +201,73 @@ impl Visitor for BytecodeDependencyCollector<'_> { } } +fn build_constructor_struct<'a>( + parameters: &'a ParameterList, + src: &'a str, +) -> Result<(String, Vec<&'a str>)> { + let mut s = "struct ConstructorArgs {".to_string(); + let mut param_names = Vec::new(); + + for param in ¶meters.parameters { + param_names.push(param.name.as_str()); + if let Some(loc) = ItemLocation::try_from_loc(param.src) { + let param_def = + &src[loc.start..loc.end].replace(" memory ", " ").replace(" calldata ", " "); + write!(s, "{param_def};")?; + } + } + + s.push('}'); + + Ok((s, param_names)) +} + /// Keeps data about a single contract definition. -struct ContractData { - /// Artifact ID to use in `getCode`/`deployCode` calls. +struct ContractData<'a> { + /// AST id of the contract. + ast_id: usize, + /// Path of the source file. + path: &'a Path, + /// Name of the contract + name: &'a str, + /// Constructor parameters. + constructor_params: Option<&'a ParameterList>, + /// Reference to source code. + src: &'a str, + /// Artifact string to pass into cheatcodes. artifact: String, - /// Whether contract has a non-empty constructor. - has_constructor: bool, +} + +impl ContractData<'_> { + pub fn build_helper(&self) -> Result> { + let Self { ast_id, path, name, constructor_params, src, .. } = self; + + let Some(params) = constructor_params else { return Ok(None) }; + let (constructor_struct, param_names) = build_constructor_struct(params, src)?; + let abi_encode = format!( + "abi.encode({})", + param_names.iter().map(|name| format!("args.{name}")).join(", ") + ); + + let helper = format!( + r#" +pragma solidity >=0.4.0; + +import "{path}"; + +abstract contract DeployHelper{ast_id} is {name} {{ + {constructor_struct} +}} + +function encodeArgs{ast_id}(DeployHelper{ast_id}.ConstructorArgs memory args) pure returns (bytes memory) {{ + return {abi_encode}; +}} + "#, + path = path.display(), + ); + + Ok(Some(helper)) + } } /// Receives a set of source files along with their ASTs and removes bytecode dependencies from @@ -215,24 +287,38 @@ impl BytecodeDependencyOptimizer<'_> { BytecodeDependencyOptimizer { asts, paths, sources } } + fn is_src_file(&self, file: &Path) -> bool { + let tests = self.paths.tests.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); + let scripts = self.paths.scripts.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); + + !file.starts_with(tests) && !file.starts_with(scripts) + } + fn process(self) -> Result<()> { let mut updates = Updates::default(); - let contracts = self.collect_contracts(&mut updates); + let contracts = self.collect_contracts(); + let additional_sources = self.create_deploy_helpers(&contracts)?; self.remove_bytecode_dependencies(&contracts, &mut updates)?; + self.sources.extend(additional_sources); + apply_updates(self.sources, updates); Ok(()) } /// Collects a mapping from contract AST id to [ContractData]. - fn collect_contracts(&self, updates: &mut Updates) -> BTreeMap { + fn collect_contracts(&self) -> BTreeMap> { let mut contracts = BTreeMap::new(); for (path, ast) in &self.asts { let src = self.sources.get(path).unwrap().content.as_str(); + if !self.is_src_file(path) { + continue; + } + for node in &ast.nodes { if let SourceUnitPart::ContractDefinition(contract) = node { let artifact = format!("{}:{}", path.display(), contract.name); @@ -247,33 +333,19 @@ impl BytecodeDependencyOptimizer<'_> { Some(func) }); - if constructor.map_or(true, |func| func.parameters.parameters.is_empty()) { - contracts - .insert(contract.id, ContractData { artifact, has_constructor: false }); - continue; - } - contracts.insert(contract.id, ContractData { artifact, has_constructor: true }); - - let constructor = constructor.unwrap(); - let updates = updates.entry(path.clone()).or_default(); - - let mut constructor_helper = - format!("struct ConstructorHelper{} {{", contract.id); - - for param in &constructor.parameters.parameters { - if let Some(loc) = ItemLocation::try_from_loc(param.src) { - let param = &src[loc.start..loc.end] - .replace(" memory ", " ") - .replace(" calldata ", " "); - write!(constructor_helper, "{param};").unwrap(); - } - } - - constructor_helper.push('}'); - - if let Some(loc) = ItemLocation::try_from_loc(constructor.src) { - updates.insert((loc.start, loc.start, constructor_helper)); - } + contracts.insert( + contract.id, + ContractData { + artifact, + constructor_params: constructor + .map(|constructor| &constructor.parameters) + .filter(|params| !params.parameters.is_empty()), + src, + ast_id: contract.id, + path, + name: &contract.name, + }, + ); } } } @@ -281,17 +353,30 @@ impl BytecodeDependencyOptimizer<'_> { contracts } + /// Creates a helper library used to generate helpers for contract deployment. + fn create_deploy_helpers( + &self, + contracts: &BTreeMap>, + ) -> Result { + let mut new_sources = Sources::new(); + for (id, contract) in contracts { + if let Some(code) = contract.build_helper()? { + let path = format!("foundry-pp/DeployHelper{}.sol", id); + new_sources.insert(path.into(), Source::new(code)); + } + } + + Ok(new_sources) + } + /// Goes over all source files and replaces bytecode dependencies with cheatcode invocations. fn remove_bytecode_dependencies( &self, - contracts: &BTreeMap, + contracts: &BTreeMap>, updates: &mut Updates, ) -> Result<()> { - let test_dir = &self.paths.tests.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); - let script_dir = - &self.paths.scripts.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); for (path, ast) in &self.asts { - if !path.starts_with(test_dir) && !path.starts_with(script_dir) { + if self.is_src_file(path) { continue; } let src = self.sources.get(path).unwrap().content.as_str(); @@ -299,6 +384,10 @@ impl BytecodeDependencyOptimizer<'_> { if src.is_empty() { continue; } + + let updates = updates.entry(path.clone()).or_default(); + let mut used_helpers = Vec::new(); + let mut collector = BytecodeDependencyCollector::new(src); ast.walk(&mut collector); @@ -313,15 +402,18 @@ impl BytecodeDependencyOptimizer<'_> { ))); } - let updates = updates.entry(path.clone()).or_default(); let vm_interface_name = format!("VmContractHelper{}", ast.id); let vm = format!("{vm_interface_name}(0x7109709ECfa91a80626fF3989D68f67F5b1DD12D)"); for dep in collector.dependencies { - let ContractData { artifact, has_constructor } = - contracts.get(&dep.referenced_contract).unwrap(); + let Some(ContractData { artifact, constructor_params, .. }) = + contracts.get(&dep.referenced_contract) + else { + continue; + }; match dep.kind { BytecodeDependencyKind::CreationCode => { + // for creation code we need to just call getCode updates.insert(( dep.loc.start, dep.loc.end, @@ -329,28 +421,36 @@ impl BytecodeDependencyOptimizer<'_> { )); } BytecodeDependencyKind::New(new_loc, name) => { - if !*has_constructor { + if constructor_params.is_none() { updates.insert(( dep.loc.start, dep.loc.end, format!("{name}(payable({vm}.deployCode(\"{artifact}\")))"), )); } else { + used_helpers.push(dep.referenced_contract); updates.insert(( new_loc.start, new_loc.end, - format!("{name}(payable({vm}.deployCode(\"{artifact}\", abi.encode({name}.ConstructorHelper{}", dep.referenced_contract), + format!("{name}(payable({vm}.deployCode(\"{artifact}\", encodeArgs{id}(DeployHelper{id}.ConstructorArgs", id = dep.referenced_contract), )); updates.insert((dep.loc.end, dep.loc.end, "))))".to_string())); } } }; } + let helper_imports = used_helpers.into_iter().map(|id| { + format!( + "import {{DeployHelper{id}, encodeArgs{id}}} from \"foundry-pp/DeployHelper{id}.sol\";", + ) + }).join("\n"); updates.insert(( - src.len() - 1, - src.len() - 1, + src.len(), + src.len(), format!( r#" +{helper_imports} + interface {vm_interface_name} {{ function deployCode(string memory _artifact, bytes memory _data) external returns (address); function deployCode(string memory _artifact) external returns (address); diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index d58b0f44..db78648c 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -70,6 +70,9 @@ pub enum SolcError { #[error("no artifact found for `{}:{}`", .0.display(), .1)] ArtifactNotFound(PathBuf, String), + #[error(transparent)] + Fmt(#[from] std::fmt::Error), + #[cfg(feature = "project-util")] #[error(transparent)] FsExtra(#[from] fs_extra::error::Error), From fa5d7bfbf9d0779c1703fdd03cf0dbabd7a06213 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Fri, 13 Sep 2024 19:59:02 +0400 Subject: [PATCH 08/10] some docs --- crates/compilers/src/preprocessor.rs | 109 ++++++++++++++++++--------- 1 file changed, 74 insertions(+), 35 deletions(-) diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index 623efe26..42c6140d 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -18,8 +18,7 @@ use itertools::Itertools; use md5::Digest; use solang_parser::{diagnostics::Diagnostic, helpers::CodeLocation, pt}; use std::{ - collections::BTreeMap, - fmt::Write, + collections::{BTreeMap, BTreeSet}, path::{Path, PathBuf}, }; @@ -126,6 +125,7 @@ struct BytecodeDependency { referenced_contract: usize, } +/// Walks over AST and collects [`BytecodeDependency`]s. #[derive(Debug)] struct BytecodeDependencyCollector<'a> { source: &'a str, @@ -201,27 +201,6 @@ impl Visitor for BytecodeDependencyCollector<'_> { } } -fn build_constructor_struct<'a>( - parameters: &'a ParameterList, - src: &'a str, -) -> Result<(String, Vec<&'a str>)> { - let mut s = "struct ConstructorArgs {".to_string(); - let mut param_names = Vec::new(); - - for param in ¶meters.parameters { - param_names.push(param.name.as_str()); - if let Some(loc) = ItemLocation::try_from_loc(param.src) { - let param_def = - &src[loc.start..loc.end].replace(" memory ", " ").replace(" calldata ", " "); - write!(s, "{param_def};")?; - } - } - - s.push('}'); - - Ok((s, param_names)) -} - /// Keeps data about a single contract definition. struct ContractData<'a> { /// AST id of the contract. @@ -239,15 +218,65 @@ struct ContractData<'a> { } impl ContractData<'_> { + /// If contract has a non-empty constructor, generates a helper source file for it containing a + /// helper to encode constructor arguments. + /// + /// This is needed because current preprocessing wraps the arguments, leaving them unchanged. + /// This allows us to handle nested new expressions correctly. However, this requires us to have + /// a way to wrap both named and unnamed arguments. i.e you can't do abi.encode({arg: val}). + /// + /// This function produces a helper struct + a helper function to encode the arguments. The + /// struct is defined in scope of an abstract contract inheriting the contract containing the + /// constructor. This is done as a hack to allow us to inherit the same scope of definitions. + /// + /// The resulted helper looks like this: + /// ```solidity + /// import "lib/openzeppelin-contracts/contracts/token/ERC20.sol"; + /// + /// abstract contract DeployHelper335 is ERC20 { + /// struct ConstructorArgs { + /// string name; + /// string symbol; + /// } + /// } + /// + /// function encodeArgs335(DeployHelper335.ConstructorArgs memory args) pure returns (bytes memory) { + /// return abi.encode(args.name, args.symbol); + /// } + /// ``` + /// + /// Example usage: + /// ```solidity + /// new ERC20(name, symbol) + /// ``` + /// becomes + /// ```solidity + /// vm.deployCode("artifact path", encodeArgs335(DeployHelper335.ConstructorArgs(name, symbol))) + /// ``` + /// With named arguments: + /// ```solidity + /// new ERC20({name: name, symbol: symbol}) + /// ``` + /// becomes + /// ```solidity + /// vm.deployCode("artifact path", encodeArgs335(DeployHelper335.ConstructorArgs({name: name, symbol: symbol}))) + /// ``` pub fn build_helper(&self) -> Result> { let Self { ast_id, path, name, constructor_params, src, .. } = self; let Some(params) = constructor_params else { return Ok(None) }; - let (constructor_struct, param_names) = build_constructor_struct(params, src)?; - let abi_encode = format!( - "abi.encode({})", - param_names.iter().map(|name| format!("args.{name}")).join(", ") - ); + + let struct_fields = params + .parameters + .iter() + .filter_map(|param| { + let loc = ItemLocation::try_from_loc(param.src)?; + Some(src[loc.start..loc.end].replace(" memory ", " ").replace(" calldata ", " ")) + }) + .join("; "); + + let abi_encode_args = + params.parameters.iter().map(|param| format!("args.{}", param.name)).join(", "); let helper = format!( r#" @@ -256,11 +285,13 @@ pragma solidity >=0.4.0; import "{path}"; abstract contract DeployHelper{ast_id} is {name} {{ - {constructor_struct} + struct ConstructorArgs {{ + {struct_fields}; + }} }} function encodeArgs{ast_id}(DeployHelper{ast_id}.ConstructorArgs memory args) pure returns (bytes memory) {{ - return {abi_encode}; + return abi.encode({abi_encode_args}); }} "#, path = path.display(), @@ -287,6 +318,7 @@ impl BytecodeDependencyOptimizer<'_> { BytecodeDependencyOptimizer { asts, paths, sources } } + /// Returns true if the file is not a test or script file. fn is_src_file(&self, file: &Path) -> bool { let tests = self.paths.tests.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); let scripts = self.paths.scripts.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); @@ -308,7 +340,8 @@ impl BytecodeDependencyOptimizer<'_> { Ok(()) } - /// Collects a mapping from contract AST id to [ContractData]. + /// Collects a mapping from contract AST id to [ContractData] for all contracts defined in src/ + /// files. fn collect_contracts(&self) -> BTreeMap> { let mut contracts = BTreeMap::new(); @@ -353,7 +386,9 @@ impl BytecodeDependencyOptimizer<'_> { contracts } - /// Creates a helper library used to generate helpers for contract deployment. + /// Creates helper libraries for contracts with a non-empty constructor. + /// + /// See [`ContractData::build_helper`] for more details. fn create_deploy_helpers( &self, contracts: &BTreeMap>, @@ -369,7 +404,8 @@ impl BytecodeDependencyOptimizer<'_> { Ok(new_sources) } - /// Goes over all source files and replaces bytecode dependencies with cheatcode invocations. + /// Goes over all test/script files and replaces bytecode dependencies with cheatcode + /// invocations. fn remove_bytecode_dependencies( &self, contracts: &BTreeMap>, @@ -386,7 +422,7 @@ impl BytecodeDependencyOptimizer<'_> { } let updates = updates.entry(path.clone()).or_default(); - let mut used_helpers = Vec::new(); + let mut used_helpers = BTreeSet::new(); let mut collector = BytecodeDependencyCollector::new(src); ast.walk(&mut collector); @@ -422,13 +458,16 @@ impl BytecodeDependencyOptimizer<'_> { } BytecodeDependencyKind::New(new_loc, name) => { if constructor_params.is_none() { + // if there's no constructor, we can just call deployCode with one + // argument updates.insert(( dep.loc.start, dep.loc.end, format!("{name}(payable({vm}.deployCode(\"{artifact}\")))"), )); } else { - used_helpers.push(dep.referenced_contract); + // if there's a constructor, we use our helper + used_helpers.insert(dep.referenced_contract); updates.insert(( new_loc.start, new_loc.end, From 15ce120c2e17aa384a43af79a202a36ac78a0c6f Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Fri, 13 Sep 2024 20:06:55 +0400 Subject: [PATCH 09/10] clean up --- crates/compilers/src/preprocessor.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index 42c6140d..096a3d5f 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -102,6 +102,7 @@ impl ItemLocation { } } +/// Checks if the given path is a test/script file. fn is_test_or_script(path: &Path, paths: &ProjectPathsConfig) -> bool { let test_dir = paths.tests.strip_prefix(&paths.root).unwrap_or(&paths.root); let script_dir = paths.scripts.strip_prefix(&paths.root).unwrap_or(&paths.root); @@ -318,14 +319,6 @@ impl BytecodeDependencyOptimizer<'_> { BytecodeDependencyOptimizer { asts, paths, sources } } - /// Returns true if the file is not a test or script file. - fn is_src_file(&self, file: &Path) -> bool { - let tests = self.paths.tests.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); - let scripts = self.paths.scripts.strip_prefix(&self.paths.root).unwrap_or(&self.paths.root); - - !file.starts_with(tests) && !file.starts_with(scripts) - } - fn process(self) -> Result<()> { let mut updates = Updates::default(); @@ -348,7 +341,7 @@ impl BytecodeDependencyOptimizer<'_> { for (path, ast) in &self.asts { let src = self.sources.get(path).unwrap().content.as_str(); - if !self.is_src_file(path) { + if is_test_or_script(path, &self.paths) { continue; } @@ -412,7 +405,7 @@ impl BytecodeDependencyOptimizer<'_> { updates: &mut Updates, ) -> Result<()> { for (path, ast) in &self.asts { - if self.is_src_file(path) { + if !is_test_or_script(path, &self.paths) { continue; } let src = self.sources.get(path).unwrap().content.as_str(); From a379db3eba7739500c7062381503c82a6b556eee Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Mon, 16 Sep 2024 21:01:57 +0300 Subject: [PATCH 10/10] fixes --- crates/compilers/src/preprocessor.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/crates/compilers/src/preprocessor.rs b/crates/compilers/src/preprocessor.rs index 096a3d5f..d79f6619 100644 --- a/crates/compilers/src/preprocessor.rs +++ b/crates/compilers/src/preprocessor.rs @@ -263,7 +263,7 @@ impl ContractData<'_> { /// vm.deployCode("artifact path", encodeArgs335(DeployHelper335.ConstructorArgs({name: name, symbol: symbol}))) /// ``` pub fn build_helper(&self) -> Result> { - let Self { ast_id, path, name, constructor_params, src, .. } = self; + let Self { ast_id, path, name, constructor_params, src, artifact } = self; let Some(params) = constructor_params else { return Ok(None) }; @@ -278,6 +278,9 @@ impl ContractData<'_> { let abi_encode_args = params.parameters.iter().map(|param| format!("args.{}", param.name)).join(", "); + + let vm_interface_name = format!("VmContractHelper{}", ast_id); + let vm = format!("{vm_interface_name}(0x7109709ECfa91a80626fF3989D68f67F5b1DD12D)"); let helper = format!( r#" @@ -294,6 +297,16 @@ abstract contract DeployHelper{ast_id} is {name} {{ function encodeArgs{ast_id}(DeployHelper{ast_id}.ConstructorArgs memory args) pure returns (bytes memory) {{ return abi.encode({abi_encode_args}); }} + +function deployCode{ast_id}(DeployHelper{ast_id}.ConstructorArgs memory args) returns({name}) {{ + return {name}(payable({vm}.deployCode("{artifact}", encodeArgs{ast_id}(args)))); +}} + +interface {vm_interface_name} {{ + function deployCode(string memory _artifact, bytes memory _data) external returns (address); + function deployCode(string memory _artifact) external returns (address); + function getCode(string memory _artifact) external returns (bytes memory); +}} "#, path = path.display(), ); @@ -464,16 +477,16 @@ impl BytecodeDependencyOptimizer<'_> { updates.insert(( new_loc.start, new_loc.end, - format!("{name}(payable({vm}.deployCode(\"{artifact}\", encodeArgs{id}(DeployHelper{id}.ConstructorArgs", id = dep.referenced_contract), + format!("deployCode{id}(DeployHelper{id}.ConstructorArgs", id = dep.referenced_contract), )); - updates.insert((dep.loc.end, dep.loc.end, "))))".to_string())); + updates.insert((dep.loc.end, dep.loc.end, ")".to_string())); } } }; } let helper_imports = used_helpers.into_iter().map(|id| { format!( - "import {{DeployHelper{id}, encodeArgs{id}}} from \"foundry-pp/DeployHelper{id}.sol\";", + "import {{DeployHelper{id}, encodeArgs{id}, deployCode{id}}} from \"foundry-pp/DeployHelper{id}.sol\";", ) }).join("\n"); updates.insert((