From 74fe66974e0c4c6b920e067070a519140829c22a Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Wed, 21 Feb 2024 08:54:12 -0800 Subject: [PATCH] Reland "Also path map transitive header jar paths with direct classpath optimization" Rollforward of fd196bf03f4301cc3fbfc307570637f4de71c82c, which was rolled back in f5d76b1777ce861a7e551342fe25363517d53ff5. The analysis-time memory regression (newly retained `NestedSet` instances) has been resolved separately in 31fae9e8e6687cbaf0dfe55a466696210c80be96. The execution-time memory regression (unconditionally flattened `NestedSet`s) is now only incurred with path mapping enabled. Original description: `JavaHeaderCompileAction` can apply an optimization where it compiles the source files only against direct dependencies, making use of the fact that Turbine includes sufficient information about transitive dependencies into the direct header jars in a special directory. In order to ensure that downstream consumers of header compilation action can use its `.jdeps` file regardless of which of these actions actually uses path mapping, all such paths to transitive jars have to be unmapped. With this commit, actions can declare additional artifacts whose paths they want to apply path mapping to. By always passing all transitive jars into the path mapper, even when only the direct jars are inputs to the action, the transitive header jar paths can be unmapped and stripped path collisions between them correctly result in a noop `PathMapper` being used for the current action. Closes #21401. PiperOrigin-RevId: 609010786 Change-Id: I0d9ea5b11430ee40be74fe582af84fedaa52ade6 --- .../build/lib/actions/AbstractAction.java | 8 +++ .../lib/analysis/actions/PathMappers.java | 23 +++---- .../analysis/actions/StrippingPathMapper.java | 26 +++++--- .../lib/rules/java/JavaCompileAction.java | 61 +++++++++++++------ .../rules/java/JavaHeaderCompileAction.java | 30 ++++++++- .../config_stripped_outputs_test.sh | 51 ++++++++++++++++ 6 files changed, 151 insertions(+), 48 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java b/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java index 8bfcbf46e41a22..6fcf99a89453ae 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java +++ b/src/main/java/com/google/devtools/build/lib/actions/AbstractAction.java @@ -653,4 +653,12 @@ public ImmutableMap getExecProperties() { public PlatformInfo getExecutionPlatform() { return owner.getExecutionPlatform(); } + + /** + * Returns artifacts that should be subject to path mapping (see {@link Spawn#getPathMapper()}, + * but aren't inputs of the action. + */ + public NestedSet getAdditionalArtifactsForPathMapping() { + return NestedSetBuilder.emptySet(Order.STABLE_ORDER); + } } diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java index 3ebba4796c5213..f0d52883152cef 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/PathMappers.java @@ -15,6 +15,7 @@ package com.google.devtools.build.lib.analysis.actions; import com.google.common.collect.ImmutableSet; +import com.google.devtools.build.lib.actions.AbstractAction; import com.google.devtools.build.lib.actions.Action; import com.google.devtools.build.lib.actions.ActionKeyContext; import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander; @@ -63,8 +64,8 @@ public final class PathMappers { * Actions that support path mapping should call this method from {@link * Action#getKey(ActionKeyContext, ArtifactExpander)}. * - *

Compared to {@link #create(Action, OutputPathsMode)}, this method does not flatten nested - * sets and thus can't result in memory regressions. + *

Compared to {@link #create(AbstractAction, OutputPathsMode)}, this method does not flatten + * nested sets and thus can't result in memory regressions. * * @param mnemonic the mnemonic of the action * @param executionInfo the execution info of the action @@ -103,22 +104,12 @@ public static void addToFingerprint( * OutputPathsMode, Fingerprint)} from {@link Action#getKey(ActionKeyContext, ArtifactExpander)} * to ensure correct incremental builds. * - * @param action the {@link Action} for which a {@link Spawn} is to be created + * @param action the {@link AbstractAction} for which a {@link Spawn} is to be created * @param outputPathsMode the value of {@link CoreOptions#outputPathsMode} * @return a {@link PathMapper} that maps paths of the action's inputs and outputs. May be {@link * PathMapper#NOOP} if path mapping is not applicable to the action. */ - public static PathMapper create(Action action, OutputPathsMode outputPathsMode) { - if (getEffectiveOutputPathsMode( - outputPathsMode, action.getMnemonic(), action.getExecutionInfo()) - != OutputPathsMode.STRIP) { - return PathMapper.NOOP; - } - return StrippingPathMapper.tryCreate(action).orElse(PathMapper.NOOP); - } - - public static PathMapper createPathMapperForTesting( - Action action, OutputPathsMode outputPathsMode) { + public static PathMapper create(AbstractAction action, OutputPathsMode outputPathsMode) { if (getEffectiveOutputPathsMode( outputPathsMode, action.getMnemonic(), action.getExecutionInfo()) != OutputPathsMode.STRIP) { @@ -128,8 +119,8 @@ public static PathMapper createPathMapperForTesting( } /** - * Helper method to simplify calling {@link #create(Action, OutputPathsMode)} for actions that - * store the configuration directly. + * Helper method to simplify calling {@link #create(SpawnAction, OutputPathsMode)} for actions + * that store the configuration directly. * * @param configuration the configuration * @return the value of diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java index b453d1afbc7241..9f7dd36652157c 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/StrippingPathMapper.java @@ -15,6 +15,8 @@ package com.google.devtools.build.lib.analysis.actions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.devtools.build.lib.actions.AbstractAction; import com.google.devtools.build.lib.actions.Action; import com.google.devtools.build.lib.actions.ActionInput; import com.google.devtools.build.lib.actions.ActionInputHelper.BasicActionInput; @@ -26,9 +28,8 @@ import com.google.devtools.build.lib.actions.PathMapper; import com.google.devtools.build.lib.actions.Spawn; import com.google.devtools.build.lib.analysis.config.CoreOptions.OutputPathsMode; -import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.vfs.PathFragment; -import java.util.HashSet; +import java.util.HashMap; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -88,10 +89,15 @@ public final class StrippingPathMapper { * @param action the action to potentially strip paths from * @return a {@link StrippingPathMapper} if the action supports it, else {@link Optional#empty()}. */ - static Optional tryCreate(Action action) { + static Optional tryCreate(AbstractAction action) { // This is expected to always be "bazel-out", but we don't want to hardcode it here. PathFragment outputRoot = action.getPrimaryOutput().getExecPath().subFragment(0, 1); - if (isPathStrippable(action.getInputs(), outputRoot)) { + // Additional artifacts to map are not part of the action's inputs, but may still lead to + // path collisions after stripping. It is thus important to include them in this check. + if (isPathStrippable( + Iterables.concat( + action.getInputs().toList(), action.getAdditionalArtifactsForPathMapping().toList()), + outputRoot)) { return Optional.of( create(action.getMnemonic(), action instanceof StarlarkAction, outputRoot)); } @@ -250,7 +256,7 @@ private static boolean isOutputPath(PathFragment pathFragment, PathFragment outp *

This method checks b). */ private static boolean isPathStrippable( - NestedSet actionInputs, PathFragment outputRoot) { + Iterable actionInputs, PathFragment outputRoot) { // For qualifying action types, check that no inputs or outputs would clash if paths were // removed, e.g. "bazel-out/k8-fastbuild/foo" and "bazel-out/host/foo". // @@ -261,15 +267,15 @@ private static boolean isPathStrippable( // with configurations). While this would help more action instances qualify, it also blocks // caching the same action in host and target configurations. This could be mitigated by // stripping the host prefix *only* when the entire action is in the host configuration. - HashSet rootRelativePaths = new HashSet<>(); - for (ActionInput input : actionInputs.toList()) { + HashMap rootRelativePaths = new HashMap<>(); + for (ActionInput input : actionInputs) { if (!isOutputPath(input, outputRoot)) { continue; } // For "bazel-out/k8-fastbuild/foo/bar", get "foo/bar". - if (!rootRelativePaths.add(input.getExecPath().subFragment(2))) { - // TODO(bazel-team): don't fail on duplicate inputs, i.e. when the same exact exec path - // (including config prefix) is included twice. + if (!rootRelativePaths + .computeIfAbsent(input.getExecPath().subFragment(2), k -> input) + .equals(input)) { return false; } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java index d68b3a7187a1bc..3deb37b52aa358 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java @@ -25,6 +25,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Strings; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -82,7 +84,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; @@ -289,7 +290,6 @@ static class ReducedClasspath { } } - private JavaSpawn getReducedSpawn( ActionExecutionContext actionExecutionContext, ReducedClasspath reducedClasspath, @@ -721,6 +721,8 @@ public NestedSet getPossibleInputsForTesting() { * @param spawnResult the executor action that created the possibly stripped .jdeps output * @param outputDepsProto path to the .jdeps output * @param actionInputs all inputs to the current action + * @param additionalArtifactsForPathMapping any additional artifacts that may be referenced in the + * .jdeps file by path * @param actionExecutionContext the action execution context * @return the full deps proto (also written to disk to satisfy the action's declared output) */ @@ -728,6 +730,7 @@ static Deps.Dependencies createFullOutputDeps( SpawnResult spawnResult, Artifact outputDepsProto, NestedSet actionInputs, + NestedSet additionalArtifactsForPathMapping, ActionExecutionContext actionExecutionContext, PathMapper pathMapper) throws IOException { @@ -739,36 +742,51 @@ static Deps.Dependencies createFullOutputDeps( return executorJdeps; } + // No paths to rewrite. + if (executorJdeps.getDependencyCount() == 0) { + return executorJdeps; + } + // For each of the action's generated inputs, revert its mapped path back to its original path. - Map mappedToOriginalPath = new HashMap<>(); - for (Artifact actionInput : actionInputs.toList()) { + BiMap mappedToOriginalPath = HashBiMap.create(); + for (Artifact actionInput : + Iterables.concat(actionInputs.toList(), additionalArtifactsForPathMapping.toList())) { if (actionInput.isSourceArtifact()) { continue; } String mappedPath = pathMapper.getMappedExecPathString(actionInput); - if (mappedToOriginalPath.put(mappedPath, actionInput.getExecPath()) != null) { - // If an entry already exists, that means different inputs reduce to the same stripped path. - // That also means PathStripper would exempt this action from path stripping, so the - // executor-produced .jdeps already includes full paths. No need to update it. - return executorJdeps; + PathFragment previousPath = mappedToOriginalPath.put(mappedPath, actionInput.getExecPath()); + if (previousPath != null && !previousPath.equals(actionInput.getExecPath())) { + throw new IllegalStateException( + String.format( + "Duplicate mapped path %s derived from %s and %s", + mappedPath, actionInput.getExecPath(), mappedToOriginalPath.get(mappedPath))); } } - // No paths to rewrite. - if (executorJdeps.getDependencyCount() == 0) { - return executorJdeps; - } - // Rewrite the .jdeps proto with full paths. PathFragment outputRoot = outputDepsProto.getExecPath().subFragment(0, 1); Deps.Dependencies.Builder fullDepsBuilder = Deps.Dependencies.newBuilder(executorJdeps); for (Deps.Dependency.Builder dep : fullDepsBuilder.getDependencyBuilderList()) { PathFragment pathOnExecutor = PathFragment.create(dep.getPath()); PathFragment originalPath = mappedToOriginalPath.get(pathOnExecutor.getPathString()); - if (originalPath == null && pathOnExecutor.subFragment(0, 1).equals(outputRoot)) { - // The mapped path -> full path map failed, which means the paths weren't mapped. Fast- - // return the original jdeps to save unnecessary CPU time. - return executorJdeps; + // Source files, which do not lie under the output root, are not mapped. It is also possible + // that a jdeps file contains a reference to a transitive classpath element that isn't an + // input to the current action (see + // https://github.com/google/turbine/commit/f9f2decee04a3c651671f7488a7c9d7952df88c8), just an + // additional artifact marked for path mapping, and itself wasn't built with path mapping + // enabled (e .g. due to path collisions). In that case, the path will already be unmapped and + // we can leave it as is. For entirely unexpected paths, we still report an error. + if (originalPath == null + && pathOnExecutor.subFragment(0, 1).equals(outputRoot) + && !mappedToOriginalPath.containsValue(pathOnExecutor)) { + throw new IllegalStateException( + String.format( + "Missing original path for mapped path %s in %s%njdeps: %s%npath map: %s", + pathOnExecutor, + outputDepsProto.getExecPath(), + executorJdeps, + mappedToOriginalPath)); } dep.setPath( originalPath == null ? pathOnExecutor.getPathString() : originalPath.getPathString()); @@ -816,7 +834,12 @@ private Deps.Dependencies readFullOutputDeps( SpawnResult result = Iterables.getOnlyElement(results); try { return createFullOutputDeps( - result, outputDepsProto, getInputs(), actionExecutionContext, pathMapper); + result, + outputDepsProto, + getInputs(), + getAdditionalArtifactsForPathMapping(), + actionExecutionContext, + pathMapper); } catch (IOException e) { throw ActionExecutionException.fromExecException( new EnvironmentalExecException( diff --git a/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java index b37facf9e5c57b..ec7064c6d6b6b3 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java @@ -76,6 +76,7 @@ public final class JavaHeaderCompileAction extends SpawnAction { private final boolean insertDependencies; + private final NestedSet additionalArtifactsForPathMapping; private JavaHeaderCompileAction( ActionOwner owner, @@ -89,7 +90,8 @@ private JavaHeaderCompileAction( CharSequence progressMessage, String mnemonic, OutputPathsMode outputPathsMode, - boolean insertDependencies) { + boolean insertDependencies, + NestedSet additionalArtifactsForPathMapping) { super( owner, tools, @@ -103,6 +105,12 @@ private JavaHeaderCompileAction( mnemonic, outputPathsMode); this.insertDependencies = insertDependencies; + this.additionalArtifactsForPathMapping = additionalArtifactsForPathMapping; + } + + @Override + public NestedSet getAdditionalArtifactsForPathMapping() { + return additionalArtifactsForPathMapping; } @Override @@ -113,7 +121,12 @@ protected void afterExecute( try { Deps.Dependencies fullOutputDeps = JavaCompileAction.createFullOutputDeps( - spawnResult, outputDepsProto, getInputs(), context, pathMapper); + spawnResult, + outputDepsProto, + getInputs(), + getAdditionalArtifactsForPathMapping(), + context, + pathMapper); JavaCompileActionContext javaContext = context.getContext(JavaCompileActionContext.class); if (insertDependencies && javaContext != null) { javaContext.insertDependencies(outputDepsProto, fullOutputDeps); @@ -459,10 +472,20 @@ public void build(JavaToolchainProvider javaToolchain) throws RuleErrorException } if (useDirectClasspath) { NestedSet classpath; + NestedSet additionalArtifactsForPathMapping; if (!directJars.isEmpty() || classpathEntries.isEmpty()) { classpath = directJars; + // When using the direct classpath optimization, Turbine generates .jdeps entries based on + // the transitive dependency information packages into META-INF/TRANSITIVE. When path + // mapping is used, these entries may have been subject to it when they were generated. + // Since the contents of that directory are not unmapped, we need to instead unmap the + // paths emitted in the .jdeps file, which requires knowing the full list of artifact + // paths even if they aren't inputs to the current action. + // https://github.com/google/turbine/commit/f9f2decee04a3c651671f7488a7c9d7952df88c8 + additionalArtifactsForPathMapping = classpathEntries; } else { classpath = classpathEntries; + additionalArtifactsForPathMapping = NestedSetBuilder.emptySet(Order.STABLE_ORDER); } mandatoryInputsBuilder.addTransitive(classpath); @@ -494,7 +517,8 @@ public void build(JavaToolchainProvider javaToolchain) throws RuleErrorException // If classPathMode == BAZEL, also make sure to inject the dependencies to be // available to downstream actions. Else just do enough work to locally create the // full .jdeps from the .stripped .jdeps produced on the executor. - /* insertDependencies= */ classpathMode == JavaClasspathMode.BAZEL)); + /* insertDependencies= */ classpathMode == JavaClasspathMode.BAZEL, + additionalArtifactsForPathMapping)); return; } diff --git a/src/test/shell/integration/config_stripped_outputs_test.sh b/src/test/shell/integration/config_stripped_outputs_test.sh index 8498a0c79afcb9..da111c4ec0eab5 100755 --- a/src/test/shell/integration/config_stripped_outputs_test.sh +++ b/src/test/shell/integration/config_stripped_outputs_test.sh @@ -328,4 +328,55 @@ EOF assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/libbase_lib-hjar.jdeps" } +function test_direct_classpath() { + local -r pkg="${FUNCNAME[0]}" + mkdir -p "$pkg/java/hello/" || fail "Expected success" + # When compiling C, the direct classpath optimization in Turbine embeds information about the + # dependency D into the header jar, which then results in the path to Ds header jar being included + # in the jdeps file for B. The full compilation action for A requires the header jar for D and + # thus the path to it in the jdeps file of B has to be unmapped properly for the reduced classpath + # created for A to contain it. + cat > "$pkg/java/hello/A.java" <<'EOF' +package hello; +public class A extends B {} +EOF + cat > "$pkg/java/hello/B.java" <<'EOF' +package hello; +public class B extends C {} +EOF + cat > "$pkg/java/hello/C.java" <<'EOF' +package hello; +public class C extends D {} +EOF + cat > "$pkg/java/hello/D.java" <<'EOF' +package hello; +public class D {} +EOF + cat > "$pkg/java/hello/BUILD" <<'EOF' +java_library(name='a', srcs=['A.java'], deps = [':b']) +java_library(name='b', srcs=['B.java'], deps = [':c']) +java_library(name='c', srcs=['C.java'], deps = [':d']) +java_library(name='d', srcs=['D.java']) +EOF + + bazel build --experimental_java_classpath=bazel \ + --experimental_output_paths=strip \ + //"$pkg"/java/hello:a -s 2>"$TEST_log" \ + || fail "Expected success" + + # java_library .jar compilation: + assert_paths_stripped "$TEST_log" "$pkg/java/hello/liba.jar-0.params" + # java_library header jar compilation: + assert_paths_stripped "$TEST_log" "bin/$pkg/java/hello/libb-hjar.jar" + # jdeps files should contain the original paths since they are read by downstream actions that may + # not use path mapping. + assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/liba.jdeps" + assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libb.jdeps" + assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libb-hjar.jdeps" + assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libc.jdeps" + assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libc-hjar.jdeps" + assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libd.jdeps" + assert_contains_no_stripped_path "${bazel_bin}/$pkg/java/hello/libd-hjar.jdeps" +} + run_suite "Tests stripping config prefixes from output paths for better action caching"