Skip to content

Commit

Permalink
Cache Merkle trees for tree artifacts.
Browse files Browse the repository at this point in the history
Currently, a large tree artifact cannot benefit from the Merkle tree cache if
it always appears on a nested set together with other (unique per-action)
files.

To improve this, modify SpawnInputExpander to treat the tree as a distinct
node in the input hierarchy that can be cached separately.

Also simplify the cache keys for filesets and runfiles, since the
SpawnInputExpander is a per-build singleton, and this cache is only shared by
actions within a single build.

Progress on bazelbuild#17923.

Closes bazelbuild#17929.

PiperOrigin-RevId: 522039585
Change-Id: Ia4f2603325acfd4400239894214f2884a71d69cf
  • Loading branch information
tjgq authored and fweikert committed May 25, 2023
1 parent c25c990 commit 01dc4ab
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.exec;

import static com.google.common.collect.ImmutableList.toImmutableList;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
Expand All @@ -22,6 +23,7 @@
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.Artifact.ArtifactExpander;
import com.google.devtools.build.lib.actions.Artifact.MissingExpansionException;
import com.google.devtools.build.lib.actions.Artifact.SpecialArtifact;
import com.google.devtools.build.lib.actions.Artifact.TreeFileArtifact;
import com.google.devtools.build.lib.actions.FileArtifactValue;
import com.google.devtools.build.lib.actions.FilesetManifest;
Expand All @@ -39,7 +41,6 @@
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -266,14 +267,19 @@ public SortedMap<PathFragment, ActionInput> getInputMapping(

/** The interface for accessing part of the input hierarchy. */
public interface InputWalker {

/** Returns the leaf nodes at this point in the hierarchy. */
SortedMap<PathFragment, ActionInput> getLeavesInputMapping()
throws IOException, ForbiddenActionInputException;

void visitNonLeaves(InputVisitor visitor) throws IOException, ForbiddenActionInputException;
/** Invokes the visitor on the non-leaf nodes at this point in the hierarchy. */
default void visitNonLeaves(InputVisitor visitor)
throws IOException, ForbiddenActionInputException {}
}

/** The interface for visiting part of the input hierarchy. */
public interface InputVisitor {

/**
* Visits a part of the input hierarchy.
*
Expand Down Expand Up @@ -305,13 +311,8 @@ public void walkInputs(

RunfilesSupplier runfilesSupplier = spawn.getRunfilesSupplier();
visitor.visit(
// The list of variables affecting the functional expressions below.
Arrays.asList(
// Assuming that artifactExpander and actionInputFileCache, different for each spawn,
// always expand the same way.
this, // For accessing addRunfilesToInputs.
runfilesSupplier,
baseDirectory),
// Cache key for the sub-mapping containing the runfiles inputs for this spawn.
ImmutableList.of(runfilesSupplier, baseDirectory),
new InputWalker() {
@Override
public SortedMap<PathFragment, ActionInput> getLeavesInputMapping()
Expand All @@ -321,20 +322,14 @@ public SortedMap<PathFragment, ActionInput> getLeavesInputMapping()
inputMap, runfilesSupplier, actionInputFileCache, artifactExpander, baseDirectory);
return inputMap;
}

@Override
public void visitNonLeaves(InputVisitor childVisitor) {}
});

Map<Artifact, ImmutableList<FilesetOutputSymlink>> filesetMappings = spawn.getFilesetMappings();
// filesetMappings is assumed to be very small, so no need to implement visitNonLeaves() for
// improved runtime.
visitor.visit(
// The list of variables affecting the functional expressions below.
Arrays.asList(
this, // For accessing addFilesetManifests.
filesetMappings,
baseDirectory),
// Cache key for the sub-mapping containing the fileset inputs for this spawn.
ImmutableList.of(filesetMappings, baseDirectory),
new InputWalker() {
@Override
public SortedMap<PathFragment, ActionInput> getLeavesInputMapping()
Expand All @@ -343,32 +338,32 @@ public SortedMap<PathFragment, ActionInput> getLeavesInputMapping()
addFilesetManifests(filesetMappings, inputMap, baseDirectory);
return inputMap;
}

@Override
public void visitNonLeaves(InputVisitor childVisitor) {}
});
}

/** Walks through one level of a {@link NestedSet} of {@link ActionInput}s. */
/** Visits a {@link NestedSet} occurring in {@link Spawn#getInputFiles}. */
private void walkNestedSetInputs(
PathFragment baseDirectory,
NestedSet<? extends ActionInput> someInputFiles,
ArtifactExpander artifactExpander,
InputVisitor visitor)
throws IOException, ForbiddenActionInputException {
visitor.visit(
// addInputs is static so no need to add 'this' as dependent key.
Arrays.asList(
// Assuming that artifactExpander, different for each spawn, always expands the same
// way.
someInputFiles.toNode(), baseDirectory),
// Cache key for the sub-mapping containing the files in this nested set.
ImmutableList.of(someInputFiles.toNode(), baseDirectory),
new InputWalker() {
@Override
public SortedMap<PathFragment, ActionInput> getLeavesInputMapping() {
TreeMap<PathFragment, ActionInput> inputMap = new TreeMap<>();
// Consider files inside tree artifacts to be non-leaves. This caches better when a
// large tree is not the sole direct child of a nested set.
ImmutableList<? extends ActionInput> leaves =
someInputFiles.getLeaves().stream()
.filter(a -> !isTreeArtifact(a))
.collect(toImmutableList());
addInputs(
inputMap,
NestedSetBuilder.wrap(someInputFiles.getOrder(), someInputFiles.getLeaves()),
NestedSetBuilder.wrap(someInputFiles.getOrder(), leaves),
artifactExpander,
baseDirectory);
return inputMap;
Expand All @@ -377,18 +372,53 @@ public SortedMap<PathFragment, ActionInput> getLeavesInputMapping() {
@Override
public void visitNonLeaves(InputVisitor childVisitor)
throws IOException, ForbiddenActionInputException {
for (ActionInput input : someInputFiles.getLeaves()) {
if (isTreeArtifact(input)) {
walkTreeInputs(
baseDirectory, (SpecialArtifact) input, artifactExpander, childVisitor);
}
}
for (NestedSet<? extends ActionInput> subInputs : someInputFiles.getNonLeaves()) {
walkNestedSetInputs(baseDirectory, subInputs, artifactExpander, childVisitor);
}
}
});
}

/** Visits a tree artifact occurring in {@link Spawn#getInputFiles}. */
private void walkTreeInputs(
PathFragment baseDirectory,
SpecialArtifact tree,
ArtifactExpander artifactExpander,
InputVisitor visitor)
throws IOException, ForbiddenActionInputException {
visitor.visit(
// Cache key for the sub-mapping containing the files in this tree artifact.
ImmutableList.of(tree, baseDirectory),
new InputWalker() {
@Override
public SortedMap<PathFragment, ActionInput> getLeavesInputMapping() {
TreeMap<PathFragment, ActionInput> inputMap = new TreeMap<>();
addInputs(
inputMap,
NestedSetBuilder.create(Order.STABLE_ORDER, tree),
artifactExpander,
baseDirectory);
return inputMap;
}
});
}

private static boolean isTreeArtifact(ActionInput input) {
return input instanceof SpecialArtifact && ((SpecialArtifact) input).isTreeArtifact();
}

/**
* Exception signaling that an input was not a regular file: most likely a directory. This
* exception is currently never thrown in practice since we do not enforce "strict" mode.
*/
private static final class ForbiddenNonFileException extends ForbiddenActionInputException {

ForbiddenNonFileException(ActionInput input) {
super("Not a file: " + input.getExecPathString());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,11 @@ public boolean mayBeExecutedRemotely(Spawn spawn) {
&& Spawns.mayBeExecutedRemotely(spawn);
}

@VisibleForTesting
Cache<Object, MerkleTree> getMerkleTreeCache() {
return merkleTreeCache;
}

private SortedMap<PathFragment, ActionInput> buildOutputDirMap(Spawn spawn) {
TreeMap<PathFragment, ActionInput> outputDirMap = new TreeMap<>();
for (ActionInput output : spawn.getOutputFiles()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.ArtifactRoot;
import com.google.devtools.build.lib.actions.ArtifactRoot.RootType;
import com.google.devtools.build.lib.actions.EmptyRunfilesSupplier;
import com.google.devtools.build.lib.actions.ExecutionRequirements;
import com.google.devtools.build.lib.actions.ResourceSet;
import com.google.devtools.build.lib.actions.SimpleSpawn;
Expand Down Expand Up @@ -1808,8 +1809,10 @@ public void buildMerkleTree_withMemoization_works() throws Exception {

// arrange
// Single node NestedSets are folded, so always add a dummy file everywhere.
ActionInput dummyFile = ActionInputHelper.fromPath("dummy");
fakeFileCache.createScratchInput(dummyFile, "dummy");
ActionInput dummyFile = ActionInputHelper.fromPath("file");
fakeFileCache.createScratchInput(dummyFile, "file");

ActionInput tree = ActionsTestUtil.createTreeArtifactWithGeneratingAction(artifactRoot, "tree");

ActionInput barFile = ActionInputHelper.fromPath("bar/file");
NestedSet<ActionInput> nodeBar =
Expand All @@ -1829,12 +1832,14 @@ public void buildMerkleTree_withMemoization_works() throws Exception {
NestedSet<ActionInput> nodeRoot1 =
new NestedSetBuilder<ActionInput>(Order.STABLE_ORDER)
.add(dummyFile)
.add(tree)
.addTransitive(nodeBar)
.addTransitive(nodeFoo1)
.build();
NestedSet<ActionInput> nodeRoot2 =
new NestedSetBuilder<ActionInput>(Order.STABLE_ORDER)
.add(dummyFile)
.add(tree)
.addTransitive(nodeBar)
.addTransitive(nodeFoo2)
.build();
Expand Down Expand Up @@ -1869,15 +1874,31 @@ public void buildMerkleTree_withMemoization_works() throws Exception {
service.buildRemoteAction(spawn1, context1);

// assert first time
// Called for: manifests, runfiles, nodeRoot1, nodeFoo1 and nodeBar.
verify(service, times(5)).uncachedBuildMerkleTreeVisitor(any(), any(), any());
verify(service, times(6)).uncachedBuildMerkleTreeVisitor(any(), any(), any());
assertThat(service.getMerkleTreeCache().asMap().keySet())
.containsExactly(
ImmutableList.of(ImmutableMap.of(), PathFragment.EMPTY_FRAGMENT), // fileset mapping
ImmutableList.of(EmptyRunfilesSupplier.INSTANCE, PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(tree, PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeRoot1.toNode(), PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeFoo1.toNode(), PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeBar.toNode(), PathFragment.EMPTY_FRAGMENT));

// act second time
service.buildRemoteAction(spawn2, context2);

// assert second time
// Called again for: manifests, runfiles, nodeRoot2 and nodeFoo2 but not nodeBar (cached).
verify(service, times(5 + 4)).uncachedBuildMerkleTreeVisitor(any(), any(), any());
verify(service, times(6 + 2)).uncachedBuildMerkleTreeVisitor(any(), any(), any());
assertThat(service.getMerkleTreeCache().asMap().keySet())
.containsExactly(
ImmutableList.of(ImmutableMap.of(), PathFragment.EMPTY_FRAGMENT), // fileset mapping
ImmutableList.of(EmptyRunfilesSupplier.INSTANCE, PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(tree, PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeRoot1.toNode(), PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeRoot2.toNode(), PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeFoo1.toNode(), PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeFoo2.toNode(), PathFragment.EMPTY_FRAGMENT),
ImmutableList.of(nodeBar.toNode(), PathFragment.EMPTY_FRAGMENT));
}

@Test
Expand Down

0 comments on commit 01dc4ab

Please sign in to comment.