Skip to content

Commit

Permalink
Compute superset of existing and required hashes when healing cache (#…
Browse files Browse the repository at this point in the history
…8955)

## Summary

The basic issue here is that `uv add` will compute and store a hash for
each package. But if you later run `uv pip install` _after_ `uv cache
prune --ci`, we need to re-download the source distribution. After
re-downloading, we compare the hashes before and after. But `uv pip
install` doesn't compute any hashes by default. So the hashes "differ"
and we error.

Instead, we need to compute a superset of the already-existing and
newly-requested hashes when performing this re-download. (In practice,
this will always be SHA-256.)

Closes #8929.

## Test Plan

```shell
export UV_CACHE_DIR="$PWD/cache"

rm -rf "$UV_CACHE_DIR" .venv .venv-2 pyproject.toml uv.lock

echo $(uv --version)

uv init --name uv-cache-issue
cargo run add --python 3.13 "pycairo"

uv cache prune --ci

rm -rf .venv .venv-2

uv venv --python python3.11 .venv-2
. .venv-2/bin/activate
cargo run pip install "pycairo"
```
  • Loading branch information
charliermarsh authored Nov 8, 2024
1 parent f29634b commit 0b5a061
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 20 deletions.
6 changes: 3 additions & 3 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use uv_distribution_filename::WheelFilenameError;
use uv_fs::Simplified;
use uv_normalize::PackageName;
use uv_pep440::{Version, VersionSpecifiers};
use uv_pypi_types::{HashDigest, ParsedUrlError};
use uv_pypi_types::{HashAlgorithm, HashDigest, ParsedUrlError};

#[derive(Debug, thiserror::Error)]
pub enum Error {
Expand Down Expand Up @@ -94,8 +94,8 @@ pub enum Error {
MetadataLowering(#[from] MetadataError),
#[error("Distribution not found at: {0}")]
NotFound(Url),
#[error("Attempted to re-extract the source distribution for `{0}`, but the hashes didn't match. Run `{}` to clear the cache.", "uv cache clean".green())]
CacheHeal(String),
#[error("Attempted to re-extract the source distribution for `{0}`, but the {1} hash didn't match. Run `{}` to clear the cache.", "uv cache clean".green())]
CacheHeal(String, HashAlgorithm),
#[error("The source distribution requires Python {0}, but {1} is installed")]
RequiresPython(VersionSpecifiers, Version),

Expand Down
69 changes: 52 additions & 17 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use uv_fs::{rename_with_retry, write_atomic, LockedFile};
use uv_metadata::read_archive_metadata;
use uv_pep440::release_specifiers_to_ranges;
use uv_platform_tags::Tags;
use uv_pypi_types::{HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
use uv_types::{BuildContext, SourceBuildTrait};
use zip::ZipArchive;

Expand Down Expand Up @@ -641,8 +641,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// Download the source distribution.
debug!("Downloading source distribution: {source}");
let entry = cache_shard.shard(revision.id()).entry(SOURCE);
let algorithms = hashes.algorithms();
let hashes = self
.download_archive(response, source, ext, entry.path(), hashes)
.download_archive(response, source, ext, entry.path(), &algorithms)
.await?;

Ok(revision.with_hashes(hashes))
Expand Down Expand Up @@ -928,8 +929,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
// Unzip the archive to a temporary directory.
debug!("Unpacking source distribution: {source}");
let entry = cache_shard.shard(revision.id()).entry(SOURCE);
let algorithms = hashes.algorithms();
let hashes = self
.persist_archive(&resource.path, resource.ext, entry.path(), hashes)
.persist_archive(&resource.path, resource.ext, entry.path(), &algorithms)
.await?;

// Include the hashes and cache info in the revision.
Expand Down Expand Up @@ -1525,11 +1527,25 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
hashes: HashPolicy<'_>,
) -> Result<Revision, Error> {
warn!("Re-extracting missing source distribution: {source}");

// Take the union of the requested and existing hash algorithms.
let algorithms = {
let mut algorithms = hashes.algorithms();
for digest in revision.hashes() {
algorithms.push(digest.algorithm());
}
algorithms.sort();
algorithms.dedup();
algorithms
};

let hashes = self
.persist_archive(&resource.path, resource.ext, entry.path(), hashes)
.persist_archive(&resource.path, resource.ext, entry.path(), &algorithms)
.await?;
if hashes != revision.hashes() {
return Err(Error::CacheHeal(source.to_string()));
for existing in revision.hashes() {
if !hashes.contains(existing) {
return Err(Error::CacheHeal(source.to_string(), existing.algorithm()));
}
}
Ok(revision.with_hashes(hashes))
}
Expand All @@ -1549,11 +1565,24 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let cache_entry = entry.shard().entry(HTTP_REVISION);
let download = |response| {
async {
// Take the union of the requested and existing hash algorithms.
let algorithms = {
let mut algorithms = hashes.algorithms();
for digest in revision.hashes() {
algorithms.push(digest.algorithm());
}
algorithms.sort();
algorithms.dedup();
algorithms
};

let hashes = self
.download_archive(response, source, ext, entry.path(), hashes)
.download_archive(response, source, ext, entry.path(), &algorithms)
.await?;
if hashes != revision.hashes() {
return Err(Error::CacheHeal(source.to_string()));
for existing in revision.hashes() {
if !hashes.contains(existing) {
return Err(Error::CacheHeal(source.to_string(), existing.algorithm()));
}
}
Ok(revision.with_hashes(hashes))
}
Expand Down Expand Up @@ -1581,7 +1610,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
source: &BuildableSource<'_>,
ext: SourceDistExtension,
target: &Path,
hashes: HashPolicy<'_>,
algorithms: &[HashAlgorithm],
) -> Result<Vec<HashDigest>, Error> {
let temp_dir = tempfile::tempdir_in(
self.build_context
Expand All @@ -1595,8 +1624,11 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.into_async_read();

// Create a hasher for each hash algorithm.
let algorithms = hashes.algorithms();
let mut hashers = algorithms.into_iter().map(Hasher::from).collect::<Vec<_>>();
let mut hashers = algorithms
.iter()
.copied()
.map(Hasher::from)
.collect::<Vec<_>>();
let mut hasher = uv_extract::hash::HashReader::new(reader.compat(), &mut hashers);

// Download and unzip the source distribution into a temporary directory.
Expand All @@ -1605,7 +1637,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
drop(span);

// If necessary, exhaust the reader to compute the hash.
if !hashes.is_none() {
if !algorithms.is_empty() {
hasher.finish().await.map_err(Error::HashExhaustion)?;
}

Expand Down Expand Up @@ -1640,7 +1672,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
path: &Path,
ext: SourceDistExtension,
target: &Path,
hashes: HashPolicy<'_>,
algorithms: &[HashAlgorithm],
) -> Result<Vec<HashDigest>, Error> {
debug!("Unpacking for build: {}", path.display());

Expand All @@ -1655,15 +1687,18 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.map_err(Error::CacheRead)?;

// Create a hasher for each hash algorithm.
let algorithms = hashes.algorithms();
let mut hashers = algorithms.into_iter().map(Hasher::from).collect::<Vec<_>>();
let mut hashers = algorithms
.iter()
.copied()
.map(Hasher::from)
.collect::<Vec<_>>();
let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers);

// Unzip the archive into a temporary directory.
uv_extract::stream::archive(&mut hasher, ext, &temp_dir.path()).await?;

// If necessary, exhaust the reader to compute the hash.
if !hashes.is_none() {
if !algorithms.is_empty() {
hasher.finish().await.map_err(Error::HashExhaustion)?;
}

Expand Down

0 comments on commit 0b5a061

Please sign in to comment.