From 01dc4abec46b6c8b61edce21771dbb4548e57c11 Mon Sep 17 00:00:00 2001 From: Tiago Quelhas Date: Wed, 5 Apr 2023 06:50:47 -0700 Subject: [PATCH] Cache Merkle trees for tree artifacts. Currently, a large tree artifact cannot benefit from the Merkle tree cache if it always appears on a nested set together with other (unique per-action) files. To improve this, modify SpawnInputExpander to treat the tree as a distinct node in the input hierarchy that can be cached separately. Also simplify the cache keys for filesets and runfiles, since the SpawnInputExpander is a per-build singleton, and this cache is only shared by actions within a single build. Progress on https://github.com/bazelbuild/bazel/issues/17923. Closes #17929. PiperOrigin-RevId: 522039585 Change-Id: Ia4f2603325acfd4400239894214f2884a71d69cf --- .../build/lib/exec/SpawnInputExpander.java | 84 +++++++++++++------ .../lib/remote/RemoteExecutionService.java | 5 ++ .../remote/RemoteExecutionServiceTest.java | 33 ++++++-- 3 files changed, 89 insertions(+), 33 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/exec/SpawnInputExpander.java b/src/main/java/com/google/devtools/build/lib/exec/SpawnInputExpander.java index 944892e7012473..7922e4362b21eb 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/SpawnInputExpander.java +++ b/src/main/java/com/google/devtools/build/lib/exec/SpawnInputExpander.java @@ -13,6 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.exec; +import static com.google.common.collect.ImmutableList.toImmutableList; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -22,6 +23,7 @@ import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander; import com.google.devtools.build.lib.actions.Artifact.MissingExpansionException; +import com.google.devtools.build.lib.actions.Artifact.SpecialArtifact; import com.google.devtools.build.lib.actions.Artifact.TreeFileArtifact; import com.google.devtools.build.lib.actions.FileArtifactValue; import com.google.devtools.build.lib.actions.FilesetManifest; @@ -39,7 +41,6 @@ import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import java.io.IOException; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -266,14 +267,19 @@ public SortedMap getInputMapping( /** The interface for accessing part of the input hierarchy. */ public interface InputWalker { + + /** Returns the leaf nodes at this point in the hierarchy. */ SortedMap getLeavesInputMapping() throws IOException, ForbiddenActionInputException; - void visitNonLeaves(InputVisitor visitor) throws IOException, ForbiddenActionInputException; + /** Invokes the visitor on the non-leaf nodes at this point in the hierarchy. */ + default void visitNonLeaves(InputVisitor visitor) + throws IOException, ForbiddenActionInputException {} } /** The interface for visiting part of the input hierarchy. */ public interface InputVisitor { + /** * Visits a part of the input hierarchy. * @@ -305,13 +311,8 @@ public void walkInputs( RunfilesSupplier runfilesSupplier = spawn.getRunfilesSupplier(); visitor.visit( - // The list of variables affecting the functional expressions below. - Arrays.asList( - // Assuming that artifactExpander and actionInputFileCache, different for each spawn, - // always expand the same way. - this, // For accessing addRunfilesToInputs. - runfilesSupplier, - baseDirectory), + // Cache key for the sub-mapping containing the runfiles inputs for this spawn. + ImmutableList.of(runfilesSupplier, baseDirectory), new InputWalker() { @Override public SortedMap getLeavesInputMapping() @@ -321,20 +322,14 @@ public SortedMap getLeavesInputMapping() inputMap, runfilesSupplier, actionInputFileCache, artifactExpander, baseDirectory); return inputMap; } - - @Override - public void visitNonLeaves(InputVisitor childVisitor) {} }); Map> filesetMappings = spawn.getFilesetMappings(); // filesetMappings is assumed to be very small, so no need to implement visitNonLeaves() for // improved runtime. visitor.visit( - // The list of variables affecting the functional expressions below. - Arrays.asList( - this, // For accessing addFilesetManifests. - filesetMappings, - baseDirectory), + // Cache key for the sub-mapping containing the fileset inputs for this spawn. + ImmutableList.of(filesetMappings, baseDirectory), new InputWalker() { @Override public SortedMap getLeavesInputMapping() @@ -343,13 +338,10 @@ public SortedMap getLeavesInputMapping() addFilesetManifests(filesetMappings, inputMap, baseDirectory); return inputMap; } - - @Override - public void visitNonLeaves(InputVisitor childVisitor) {} }); } - /** Walks through one level of a {@link NestedSet} of {@link ActionInput}s. */ + /** Visits a {@link NestedSet} occurring in {@link Spawn#getInputFiles}. */ private void walkNestedSetInputs( PathFragment baseDirectory, NestedSet someInputFiles, @@ -357,18 +349,21 @@ private void walkNestedSetInputs( InputVisitor visitor) throws IOException, ForbiddenActionInputException { visitor.visit( - // addInputs is static so no need to add 'this' as dependent key. - Arrays.asList( - // Assuming that artifactExpander, different for each spawn, always expands the same - // way. - someInputFiles.toNode(), baseDirectory), + // Cache key for the sub-mapping containing the files in this nested set. + ImmutableList.of(someInputFiles.toNode(), baseDirectory), new InputWalker() { @Override public SortedMap getLeavesInputMapping() { TreeMap inputMap = new TreeMap<>(); + // Consider files inside tree artifacts to be non-leaves. This caches better when a + // large tree is not the sole direct child of a nested set. + ImmutableList leaves = + someInputFiles.getLeaves().stream() + .filter(a -> !isTreeArtifact(a)) + .collect(toImmutableList()); addInputs( inputMap, - NestedSetBuilder.wrap(someInputFiles.getOrder(), someInputFiles.getLeaves()), + NestedSetBuilder.wrap(someInputFiles.getOrder(), leaves), artifactExpander, baseDirectory); return inputMap; @@ -377,6 +372,12 @@ public SortedMap getLeavesInputMapping() { @Override public void visitNonLeaves(InputVisitor childVisitor) throws IOException, ForbiddenActionInputException { + for (ActionInput input : someInputFiles.getLeaves()) { + if (isTreeArtifact(input)) { + walkTreeInputs( + baseDirectory, (SpecialArtifact) input, artifactExpander, childVisitor); + } + } for (NestedSet subInputs : someInputFiles.getNonLeaves()) { walkNestedSetInputs(baseDirectory, subInputs, artifactExpander, childVisitor); } @@ -384,11 +385,40 @@ public void visitNonLeaves(InputVisitor childVisitor) }); } + /** Visits a tree artifact occurring in {@link Spawn#getInputFiles}. */ + private void walkTreeInputs( + PathFragment baseDirectory, + SpecialArtifact tree, + ArtifactExpander artifactExpander, + InputVisitor visitor) + throws IOException, ForbiddenActionInputException { + visitor.visit( + // Cache key for the sub-mapping containing the files in this tree artifact. + ImmutableList.of(tree, baseDirectory), + new InputWalker() { + @Override + public SortedMap getLeavesInputMapping() { + TreeMap inputMap = new TreeMap<>(); + addInputs( + inputMap, + NestedSetBuilder.create(Order.STABLE_ORDER, tree), + artifactExpander, + baseDirectory); + return inputMap; + } + }); + } + + private static boolean isTreeArtifact(ActionInput input) { + return input instanceof SpecialArtifact && ((SpecialArtifact) input).isTreeArtifact(); + } + /** * Exception signaling that an input was not a regular file: most likely a directory. This * exception is currently never thrown in practice since we do not enforce "strict" mode. */ private static final class ForbiddenNonFileException extends ForbiddenActionInputException { + ForbiddenNonFileException(ActionInput input) { super("Not a file: " + input.getExecPathString()); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java index 11c0c45ef414a7..f38c1e3dd1f620 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java @@ -343,6 +343,11 @@ public boolean mayBeExecutedRemotely(Spawn spawn) { && Spawns.mayBeExecutedRemotely(spawn); } + @VisibleForTesting + Cache getMerkleTreeCache() { + return merkleTreeCache; + } + private SortedMap buildOutputDirMap(Spawn spawn) { TreeMap outputDirMap = new TreeMap<>(); for (ActionInput output : spawn.getOutputFiles()) { diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java index ec6e25fc4987e7..ce08401d9f8f88 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java @@ -65,6 +65,7 @@ import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.ArtifactRoot; import com.google.devtools.build.lib.actions.ArtifactRoot.RootType; +import com.google.devtools.build.lib.actions.EmptyRunfilesSupplier; import com.google.devtools.build.lib.actions.ExecutionRequirements; import com.google.devtools.build.lib.actions.ResourceSet; import com.google.devtools.build.lib.actions.SimpleSpawn; @@ -1808,8 +1809,10 @@ public void buildMerkleTree_withMemoization_works() throws Exception { // arrange // Single node NestedSets are folded, so always add a dummy file everywhere. - ActionInput dummyFile = ActionInputHelper.fromPath("dummy"); - fakeFileCache.createScratchInput(dummyFile, "dummy"); + ActionInput dummyFile = ActionInputHelper.fromPath("file"); + fakeFileCache.createScratchInput(dummyFile, "file"); + + ActionInput tree = ActionsTestUtil.createTreeArtifactWithGeneratingAction(artifactRoot, "tree"); ActionInput barFile = ActionInputHelper.fromPath("bar/file"); NestedSet nodeBar = @@ -1829,12 +1832,14 @@ public void buildMerkleTree_withMemoization_works() throws Exception { NestedSet nodeRoot1 = new NestedSetBuilder(Order.STABLE_ORDER) .add(dummyFile) + .add(tree) .addTransitive(nodeBar) .addTransitive(nodeFoo1) .build(); NestedSet nodeRoot2 = new NestedSetBuilder(Order.STABLE_ORDER) .add(dummyFile) + .add(tree) .addTransitive(nodeBar) .addTransitive(nodeFoo2) .build(); @@ -1869,15 +1874,31 @@ public void buildMerkleTree_withMemoization_works() throws Exception { service.buildRemoteAction(spawn1, context1); // assert first time - // Called for: manifests, runfiles, nodeRoot1, nodeFoo1 and nodeBar. - verify(service, times(5)).uncachedBuildMerkleTreeVisitor(any(), any(), any()); + verify(service, times(6)).uncachedBuildMerkleTreeVisitor(any(), any(), any()); + assertThat(service.getMerkleTreeCache().asMap().keySet()) + .containsExactly( + ImmutableList.of(ImmutableMap.of(), PathFragment.EMPTY_FRAGMENT), // fileset mapping + ImmutableList.of(EmptyRunfilesSupplier.INSTANCE, PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(tree, PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeRoot1.toNode(), PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeFoo1.toNode(), PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeBar.toNode(), PathFragment.EMPTY_FRAGMENT)); // act second time service.buildRemoteAction(spawn2, context2); // assert second time - // Called again for: manifests, runfiles, nodeRoot2 and nodeFoo2 but not nodeBar (cached). - verify(service, times(5 + 4)).uncachedBuildMerkleTreeVisitor(any(), any(), any()); + verify(service, times(6 + 2)).uncachedBuildMerkleTreeVisitor(any(), any(), any()); + assertThat(service.getMerkleTreeCache().asMap().keySet()) + .containsExactly( + ImmutableList.of(ImmutableMap.of(), PathFragment.EMPTY_FRAGMENT), // fileset mapping + ImmutableList.of(EmptyRunfilesSupplier.INSTANCE, PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(tree, PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeRoot1.toNode(), PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeRoot2.toNode(), PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeFoo1.toNode(), PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeFoo2.toNode(), PathFragment.EMPTY_FRAGMENT), + ImmutableList.of(nodeBar.toNode(), PathFragment.EMPTY_FRAGMENT)); } @Test