From 482da4e0e858d1b50f830ce872ccede16a7aaa7a Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Tue, 19 Nov 2024 01:31:16 -0800 Subject: [PATCH] Retry the cleanup of downloadAndExtract This helps the HTTP downloader better cope with filesystems where unlink is non-atomic. Closes #24295. PiperOrigin-RevId: 697920434 Change-Id: I91b4dbf07a2efdca07c0310e15aac5f4d89c4091 --- .../build/lib/bazel/repository/starlark/BUILD | 1 + .../starlark/StarlarkBaseExternalContext.java | 64 +++++++++++++++---- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD index 87638f45b0b801..b006e2e833b62d 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD @@ -61,6 +61,7 @@ java_library( "//src/main/java/net/starlark/java/syntax", "//third_party:auto_value", "//third_party:error_prone_annotations", + "//third_party:flogger", "//third_party:guava", "//third_party:java-diff-utils", "//third_party:jsr305", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index 78d48eff8fb615..b5c7c57e0e0ac3 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -24,6 +24,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.Maps; +import com.google.common.flogger.GoogleLogger; import com.google.common.util.concurrent.Futures; import com.google.devtools.build.lib.actions.FileValue; import com.google.devtools.build.lib.analysis.BlazeDirectories; @@ -79,6 +80,7 @@ import java.nio.file.InvalidPathException; import java.nio.file.Paths; import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; @@ -145,6 +147,8 @@ private interface AsyncTask extends SilentCloseable { void close(); } + private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); + /** Max. length of command line args added as a profiler description. */ private static final int MAX_PROFILE_ARGS_LEN = 512; @@ -1095,21 +1099,55 @@ public StructImpl downloadAndExtract( } StructImpl downloadResult = calculateDownloadResult(checksum, downloadedPath); - try { - if (downloadDirectory.exists()) { - downloadDirectory.deleteTree(); + deleteTreeWithRetries(downloadDirectory); + return downloadResult; + } + + /** + * This method wraps the deleteTree method in a retry loop, to solve an issue when trying to + * recursively clean up temporary directories during dependency downloads when they are stored on + * filesystems where unlinking a file may not be immediately reflected in a list of its parent + * directory. Specifically, the symptom of this problem was the entire bazel build aborting + * because during the cleanup of a dependency download (e.g Rust crate), there was an IOException + * because the parent directory being removed was "not empty" (yet). Please see + * https://github.com/bazelbuild/bazel/issues/23687 and + * https://github.com/bazelbuild/bazel/issues/20013 for further details. + * + * @param downloadDirectory + * @throws RepositoryFunctionException + */ + private static void deleteTreeWithRetries(Path downloadDirectory) + throws RepositoryFunctionException { + Instant start = Instant.now(); + Instant deadline = start.plus(Duration.ofSeconds(5)); + + for (int attempts = 1; ; attempts++) { + try { + if (downloadDirectory.exists()) { + downloadDirectory.deleteTree(); + } + if (attempts > 1) { + long elapsedMillis = Duration.between(start, Instant.now()).toMillis(); + logger.atInfo().log( + "Deleting %s took %d attempts over %dms.", + downloadDirectory.getPathString(), attempts, elapsedMillis); + } + break; + } catch (IOException e) { + if (Instant.now().isAfter(deadline)) { + throw new RepositoryFunctionException( + new IOException( + "Couldn't delete temporary directory (" + + downloadDirectory.getPathString() + + ") after " + + attempts + + " attempts: " + + e.getMessage(), + e), + Transience.TRANSIENT); + } } - } catch (IOException e) { - throw new RepositoryFunctionException( - new IOException( - "Couldn't delete temporary directory (" - + downloadDirectory.getPathString() - + "): " - + e.getMessage(), - e), - Transience.TRANSIENT); } - return downloadResult; } @StarlarkMethod(