From fa79d6ef48e6acd007bc9fc8c3dded5df2e64449 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 8 Nov 2024 15:12:15 -0500 Subject: [PATCH] Compute superset of existing and required hashes when healing cache (#8955) ## Summary The basic issue here is that `uv add` will compute and store a hash for each package. But if you later run `uv pip install` _after_ `uv cache prune --ci`, we need to re-download the source distribution. After re-downloading, we compare the hashes before and after. But `uv pip install` doesn't compute any hashes by default. So the hashes "differ" and we error. Instead, we need to compute a superset of the already-existing and newly-requested hashes when performing this re-download. (In practice, this will always be SHA-256.) Closes https://github.com/astral-sh/uv/issues/8929. ## Test Plan ```shell export UV_CACHE_DIR="$PWD/cache" rm -rf "$UV_CACHE_DIR" .venv .venv-2 pyproject.toml uv.lock echo $(uv --version) uv init --name uv-cache-issue cargo run add --python 3.13 "pycairo" uv cache prune --ci rm -rf .venv .venv-2 uv venv --python python3.11 .venv-2 . .venv-2/bin/activate cargo run pip install "pycairo" ``` --- crates/uv-distribution/src/error.rs | 6 +-- crates/uv-distribution/src/source/mod.rs | 69 ++++++++++++++++++------ 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/crates/uv-distribution/src/error.rs b/crates/uv-distribution/src/error.rs index d2f9f5aec159e..0975a7163bd43 100644 --- a/crates/uv-distribution/src/error.rs +++ b/crates/uv-distribution/src/error.rs @@ -11,7 +11,7 @@ use uv_distribution_filename::WheelFilenameError; use uv_fs::Simplified; use uv_normalize::PackageName; use uv_pep440::{Version, VersionSpecifiers}; -use uv_pypi_types::{HashDigest, ParsedUrlError}; +use uv_pypi_types::{HashAlgorithm, HashDigest, ParsedUrlError}; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -94,8 +94,8 @@ pub enum Error { MetadataLowering(#[from] MetadataError), #[error("Distribution not found at: {0}")] NotFound(Url), - #[error("Attempted to re-extract the source distribution for `{0}`, but the hashes didn't match. Run `{}` to clear the cache.", "uv cache clean".green())] - CacheHeal(String), + #[error("Attempted to re-extract the source distribution for `{0}`, but the {1} hash didn't match. Run `{}` to clear the cache.", "uv cache clean".green())] + CacheHeal(String, HashAlgorithm), #[error("The source distribution requires Python {0}, but {1} is installed")] RequiresPython(VersionSpecifiers, Version), diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 70a2333b2b8e1..5739b2c8edffd 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -36,7 +36,7 @@ use uv_fs::{rename_with_retry, write_atomic, LockedFile}; use uv_metadata::read_archive_metadata; use uv_pep440::release_specifiers_to_ranges; use uv_platform_tags::Tags; -use uv_pypi_types::{HashDigest, Metadata12, RequiresTxt, ResolutionMetadata}; +use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata12, RequiresTxt, ResolutionMetadata}; use uv_types::{BuildContext, SourceBuildTrait}; use zip::ZipArchive; @@ -641,8 +641,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Download the source distribution. debug!("Downloading source distribution: {source}"); let entry = cache_shard.shard(revision.id()).entry(SOURCE); + let algorithms = hashes.algorithms(); let hashes = self - .download_archive(response, source, ext, entry.path(), hashes) + .download_archive(response, source, ext, entry.path(), &algorithms) .await?; Ok(revision.with_hashes(hashes)) @@ -928,8 +929,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Unzip the archive to a temporary directory. debug!("Unpacking source distribution: {source}"); let entry = cache_shard.shard(revision.id()).entry(SOURCE); + let algorithms = hashes.algorithms(); let hashes = self - .persist_archive(&resource.path, resource.ext, entry.path(), hashes) + .persist_archive(&resource.path, resource.ext, entry.path(), &algorithms) .await?; // Include the hashes and cache info in the revision. @@ -1525,11 +1527,25 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { hashes: HashPolicy<'_>, ) -> Result { warn!("Re-extracting missing source distribution: {source}"); + + // Take the union of the requested and existing hash algorithms. + let algorithms = { + let mut algorithms = hashes.algorithms(); + for digest in revision.hashes() { + algorithms.push(digest.algorithm()); + } + algorithms.sort(); + algorithms.dedup(); + algorithms + }; + let hashes = self - .persist_archive(&resource.path, resource.ext, entry.path(), hashes) + .persist_archive(&resource.path, resource.ext, entry.path(), &algorithms) .await?; - if hashes != revision.hashes() { - return Err(Error::CacheHeal(source.to_string())); + for existing in revision.hashes() { + if !hashes.contains(existing) { + return Err(Error::CacheHeal(source.to_string(), existing.algorithm())); + } } Ok(revision.with_hashes(hashes)) } @@ -1549,11 +1565,24 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let cache_entry = entry.shard().entry(HTTP_REVISION); let download = |response| { async { + // Take the union of the requested and existing hash algorithms. + let algorithms = { + let mut algorithms = hashes.algorithms(); + for digest in revision.hashes() { + algorithms.push(digest.algorithm()); + } + algorithms.sort(); + algorithms.dedup(); + algorithms + }; + let hashes = self - .download_archive(response, source, ext, entry.path(), hashes) + .download_archive(response, source, ext, entry.path(), &algorithms) .await?; - if hashes != revision.hashes() { - return Err(Error::CacheHeal(source.to_string())); + for existing in revision.hashes() { + if !hashes.contains(existing) { + return Err(Error::CacheHeal(source.to_string(), existing.algorithm())); + } } Ok(revision.with_hashes(hashes)) } @@ -1581,7 +1610,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, ext: SourceDistExtension, target: &Path, - hashes: HashPolicy<'_>, + algorithms: &[HashAlgorithm], ) -> Result, Error> { let temp_dir = tempfile::tempdir_in( self.build_context @@ -1595,8 +1624,11 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .into_async_read(); // Create a hasher for each hash algorithm. - let algorithms = hashes.algorithms(); - let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); + let mut hashers = algorithms + .iter() + .copied() + .map(Hasher::from) + .collect::>(); let mut hasher = uv_extract::hash::HashReader::new(reader.compat(), &mut hashers); // Download and unzip the source distribution into a temporary directory. @@ -1605,7 +1637,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { drop(span); // If necessary, exhaust the reader to compute the hash. - if !hashes.is_none() { + if !algorithms.is_empty() { hasher.finish().await.map_err(Error::HashExhaustion)?; } @@ -1640,7 +1672,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: &Path, ext: SourceDistExtension, target: &Path, - hashes: HashPolicy<'_>, + algorithms: &[HashAlgorithm], ) -> Result, Error> { debug!("Unpacking for build: {}", path.display()); @@ -1655,15 +1687,18 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .map_err(Error::CacheRead)?; // Create a hasher for each hash algorithm. - let algorithms = hashes.algorithms(); - let mut hashers = algorithms.into_iter().map(Hasher::from).collect::>(); + let mut hashers = algorithms + .iter() + .copied() + .map(Hasher::from) + .collect::>(); let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers); // Unzip the archive into a temporary directory. uv_extract::stream::archive(&mut hasher, ext, &temp_dir.path()).await?; // If necessary, exhaust the reader to compute the hash. - if !hashes.is_none() { + if !algorithms.is_empty() { hasher.finish().await.map_err(Error::HashExhaustion)?; }