diff --git a/src/cargo/ops/cargo_package/mod.rs b/src/cargo/ops/cargo_package/mod.rs index 6c920eec4e3..ef6706c941a 100644 --- a/src/cargo/ops/cargo_package/mod.rs +++ b/src/cargo/ops/cargo_package/mod.rs @@ -226,6 +226,7 @@ fn do_package<'a>( // Packages need to be created in dependency order, because dependencies must // be added to our local overlay before we can create lockfiles that depend on them. + let mut vcs_info_builder = vcs::VcsInfoBuilder::new(ws, opts); let sorted_pkgs = deps.sort(); let mut outputs: Vec<(Package, PackageOpts<'_>, FileLock)> = Vec::new(); for (pkg, cli_features) in sorted_pkgs { @@ -234,7 +235,7 @@ fn do_package<'a>( to_package: ops::Packages::Default, ..opts.clone() }; - let ar_files = prepare_archive(ws, &pkg, &opts)?; + let ar_files = prepare_archive(ws, &pkg, &opts, &mut vcs_info_builder)?; if opts.list { for ar_file in &ar_files { @@ -369,6 +370,7 @@ fn prepare_archive( ws: &Workspace<'_>, pkg: &Package, opts: &PackageOpts<'_>, + vcs_info_builder: &mut vcs::VcsInfoBuilder<'_, '_>, ) -> CargoResult> { let gctx = ws.gctx(); let mut src = PathSource::new(pkg.root(), pkg.package_id().source_id(), gctx); @@ -387,7 +389,7 @@ fn prepare_archive( let src_files = src.list_files(pkg)?; // Check (git) repository state, getting the current commit hash. - let vcs_info = vcs::check_repo_state(pkg, &src_files, gctx, &opts)?; + let vcs_info = vcs_info_builder.build(pkg, &src_files)?; build_ar_list(ws, pkg, src_files, vcs_info) } diff --git a/src/cargo/ops/cargo_package/vcs.rs b/src/cargo/ops/cargo_package/vcs.rs index 3c447f32b72..ab125e8a929 100644 --- a/src/cargo/ops/cargo_package/vcs.rs +++ b/src/cargo/ops/cargo_package/vcs.rs @@ -1,5 +1,6 @@ //! Helpers to gather the VCS information for `cargo package`. +use std::collections::HashMap; use std::path::Path; use std::path::PathBuf; @@ -9,6 +10,7 @@ use serde::Serialize; use tracing::debug; use crate::core::Package; +use crate::core::Workspace; use crate::sources::PathEntry; use crate::CargoResult; use crate::GlobalContext; @@ -32,6 +34,84 @@ pub struct GitVcsInfo { dirty: bool, } +/// A shared builder for generating [`VcsInfo`] for packages inside the same workspace. +/// +/// This is aimed to cache duplicate works like file system or VCS info lookups. +pub struct VcsInfoBuilder<'a, 'gctx> { + ws: &'a Workspace<'gctx>, + opts: &'a PackageOpts<'gctx>, + /// Map each git workdir path to the workdir's git status cache. + caches: HashMap, +} + +impl<'a, 'gctx> VcsInfoBuilder<'a, 'gctx> { + pub fn new( + ws: &'a Workspace<'gctx>, + opts: &'a PackageOpts<'gctx>, + ) -> VcsInfoBuilder<'a, 'gctx> { + VcsInfoBuilder { + ws, + opts, + caches: Default::default(), + } + } + + /// Builds an [`VcsInfo`] for the given `pkg` and its associated `src_files`. + pub fn build( + &mut self, + pkg: &Package, + src_files: &[PathEntry], + ) -> CargoResult> { + check_repo_state(pkg, src_files, self.ws.gctx(), self.opts, &mut self.caches) + } +} + +/// Cache of git status inquries for a Git workdir. +struct RepoStatusCache { + repo: git2::Repository, + /// Status of each file path relative to the git workdir path. + cache: HashMap, +} + +impl RepoStatusCache { + fn new(repo: git2::Repository) -> RepoStatusCache { + RepoStatusCache { + repo, + cache: Default::default(), + } + } + + /// Like [`git2::Repository::status_file`] but with cache. + fn status_file(&mut self, path: &Path) -> CargoResult { + use std::collections::hash_map::Entry; + match self.cache.entry(path.into()) { + Entry::Occupied(entry) => { + tracing::trace!( + target: "cargo_package_vcs_cache", + "git status cache hit for `{}` at workdir `{}`", + path.display(), + self.repo.workdir().unwrap().display() + ); + Ok(*entry.get()) + } + Entry::Vacant(entry) => { + tracing::trace!( + target: "cargo_package_vcs_cache", + "git status cache miss for `{}` at workdir `{}`", + path.display(), + self.repo.workdir().unwrap().display() + ); + let status = self.repo.status_file(path)?; + Ok(*entry.insert(status)) + } + } + } + + fn workdir(&self) -> &Path { + self.repo.workdir().unwrap() + } +} + /// Checks if the package source is in a *git* DVCS repository. /// /// If *git*, and the source is *dirty* (e.g., has uncommitted changes), @@ -45,6 +125,7 @@ pub fn check_repo_state( src_files: &[PathEntry], gctx: &GlobalContext, opts: &PackageOpts<'_>, + caches: &mut HashMap, ) -> CargoResult> { let Ok(repo) = git2::Repository::discover(p.root()) else { gctx.shell().verbose(|shell| { @@ -64,14 +145,20 @@ pub fn check_repo_state( }; debug!("found a git repo at `{}`", workdir.display()); + + let cache = caches + .entry(workdir.to_path_buf()) + .or_insert_with(|| RepoStatusCache::new(repo)); + let path = p.manifest_path(); - let path = paths::strip_prefix_canonical(path, workdir).unwrap_or_else(|_| path.to_path_buf()); - let Ok(status) = repo.status_file(&path) else { + let path = + paths::strip_prefix_canonical(path, cache.workdir()).unwrap_or_else(|_| path.to_path_buf()); + let Ok(status) = cache.status_file(&path) else { gctx.shell().verbose(|shell| { shell.warn(format!( "no (git) Cargo.toml found at `{}` in workdir `{}`", path.display(), - workdir.display() + cache.workdir().display() )) })?; // No checked-in `Cargo.toml` found. This package may be irrelevant. @@ -84,7 +171,7 @@ pub fn check_repo_state( shell.warn(format!( "found (git) Cargo.toml ignored at `{}` in workdir `{}`", path.display(), - workdir.display() + cache.workdir().display() )) })?; // An ignored `Cargo.toml` found. This package may be irrelevant. @@ -92,19 +179,19 @@ pub fn check_repo_state( return Ok(None); } - warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?; + warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &cache)?; debug!( "found (git) Cargo.toml at `{}` in workdir `{}`", path.display(), - workdir.display(), + cache.workdir().display(), ); let path_in_vcs = path .parent() .and_then(|p| p.to_str()) .unwrap_or("") .replace("\\", "/"); - let Some(git) = git(p, gctx, src_files, &repo, &opts)? else { + let Some(git) = git(p, gctx, src_files, cache, &opts)? else { // If the git repo lacks essensial field like `sha1`, and since this field exists from the beginning, // then don't generate the corresponding file in order to maintain consistency with past behavior. return Ok(None); @@ -129,9 +216,10 @@ pub fn check_repo_state( fn warn_symlink_checked_out_as_plain_text_file( gctx: &GlobalContext, src_files: &[PathEntry], - repo: &git2::Repository, + cache: &RepoStatusCache, ) -> CargoResult<()> { - if repo + if cache + .repo .config() .and_then(|c| c.get_bool("core.symlinks")) .unwrap_or(true) @@ -144,7 +232,7 @@ fn warn_symlink_checked_out_as_plain_text_file( shell.warn(format_args!( "found symbolic links that may be checked out as regular files for git repo at `{}`\n\ This might cause the `.crate` file to include incorrect or incomplete files", - repo.workdir().unwrap().display(), + cache.workdir().display(), ))?; let extra_note = if cfg!(windows) { "\nAnd on Windows, enable the Developer Mode to support symlinks" @@ -164,7 +252,7 @@ fn git( pkg: &Package, gctx: &GlobalContext, src_files: &[PathEntry], - repo: &git2::Repository, + cache: &mut RepoStatusCache, opts: &PackageOpts<'_>, ) -> CargoResult> { // This is a collection of any dirty or untracked files. This covers: @@ -173,10 +261,10 @@ fn git( // - ignored (in case the user has an `include` directive that // conflicts with .gitignore). let mut dirty_files = Vec::new(); - collect_statuses(repo, &mut dirty_files)?; + collect_statuses(&cache.repo, &mut dirty_files)?; // Include each submodule so that the error message can provide // specifically *which* files in a submodule are modified. - status_submodules(repo, &mut dirty_files)?; + status_submodules(&cache.repo, &mut dirty_files)?; // Find the intersection of dirty in git, and the src_files that would // be packaged. This is a lazy n^2 check, but seems fine with @@ -186,7 +274,7 @@ fn git( .iter() .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path))) .map(|p| p.as_ref()) - .chain(dirty_metadata_paths(pkg, repo)?.iter()) + .chain(dirty_metadata_paths(pkg, cache)?.iter()) .map(|path| { pathdiff::diff_paths(path, cwd) .as_ref() @@ -199,10 +287,10 @@ fn git( if !dirty || opts.allow_dirty { // Must check whetherthe repo has no commit firstly, otherwise `revparse_single` would fail on bare commit repo. // Due to lacking the `sha1` field, it's better not record the `GitVcsInfo` for consistency. - if repo.is_empty()? { + if cache.repo.is_empty()? { return Ok(None); } - let rev_obj = repo.revparse_single("HEAD")?; + let rev_obj = cache.repo.revparse_single("HEAD")?; Ok(Some(GitVcsInfo { sha1: rev_obj.id().to_string(), dirty, @@ -225,9 +313,8 @@ fn git( /// This is required because those paths may link to a file outside the /// current package root, but still under the git workdir, affecting the /// final packaged `.crate` file. -fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult> { +fn dirty_metadata_paths(pkg: &Package, repo: &mut RepoStatusCache) -> CargoResult> { let mut dirty_files = Vec::new(); - let workdir = repo.workdir().unwrap(); let root = pkg.root(); let meta = pkg.manifest().metadata(); for path in [&meta.license_file, &meta.readme] { @@ -239,12 +326,12 @@ fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult