Skip to content

Commit

Permalink
Do not invalidate remote metadata during action dirtiness check
Browse files Browse the repository at this point in the history
... if the corresponding output is materialized afterwards and hasn't been changed.

Failing to do so will cause an incremental build to re-check the action cache for actions whose outputs were materialized during last build. This can be very slow if the size of the outputs are large.

We achieved this by storing `FileContentsProxy` in the remote metadata after materialization. During dirtiness check, we compare remote metadata and the corresponding local metadata with their `digest`, or `proxy` if `digest` is not available for local metadata,

A user reported back the wall time of their very first increment build is improved by this change from `14s` to less than `1s` in common case, and from `115s` to less than `1s` in worst case. bazelbuild#24763

PiperOrigin-RevId: 715734718
Change-Id: Id1a1a59d8b5f3e91a7ae05a73663ff37eee6d163
  • Loading branch information
coeuvre committed Jan 16, 2025
1 parent 38dbf04 commit c5c5bae
Show file tree
Hide file tree
Showing 18 changed files with 401 additions and 158 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,17 @@ protected boolean couldBeModifiedByMetadata(FileArtifactValue o) {
return true;
}

RegularFileArtifactValue lastKnown = (RegularFileArtifactValue) o;
return size != lastKnown.size || !Objects.equals(proxy, lastKnown.proxy);
switch (o) {
case RegularFileArtifactValue lastKnown -> {
return size != lastKnown.size || !Objects.equals(proxy, lastKnown.proxy);
}
case RemoteFileArtifactValueWithMaterializationData lastKnown -> {
return size != lastKnown.getSize() || !Objects.equals(proxy, lastKnown.proxy);
}
default -> {
return true;
}
}
}
}

Expand All @@ -536,36 +545,25 @@ public static class RemoteFileArtifactValue extends FileArtifactValue {
private final byte[] digest;
private final long size;
private final int locationIndex;
@Nullable private final PathFragment materializationExecPath;

private RemoteFileArtifactValue(
byte[] digest,
long size,
int locationIndex,
@Nullable PathFragment materializationExecPath) {
this.digest = Preconditions.checkNotNull(digest);
private RemoteFileArtifactValue(byte[] digest, long size, int locationIndex) {
this.digest = checkNotNull(digest);
this.size = size;
this.locationIndex = locationIndex;
this.materializationExecPath = materializationExecPath;
}

public static RemoteFileArtifactValue create(
byte[] digest, long size, int locationIndex, long expireAtEpochMilli) {
return create(
digest, size, locationIndex, expireAtEpochMilli, /* materializationExecPath= */ null);
public static RemoteFileArtifactValue create(byte[] digest, long size, int locationIndex) {
return new RemoteFileArtifactValue(digest, size, locationIndex);
}

@VisibleForTesting
public static RemoteFileArtifactValue create(
public static RemoteFileArtifactValueWithMaterializationData createWithMaterializationData(
byte[] digest,
long size,
int locationIndex,
long expireAtEpochMilli,
@Nullable PathFragment materializationExecPath) {
return expireAtEpochMilli < 0
? new RemoteFileArtifactValue(digest, size, locationIndex, materializationExecPath)
: new RemoteFileArtifactValueWithExpiration(
digest, size, locationIndex, materializationExecPath, expireAtEpochMilli);
return new RemoteFileArtifactValueWithMaterializationData(
digest, size, locationIndex, materializationExecPath, expireAtEpochMilli);
}

/**
Expand All @@ -575,10 +573,10 @@ public static RemoteFileArtifactValue create(
public static RemoteFileArtifactValue createFromExistingWithMaterializationPath(
RemoteFileArtifactValue metadata, PathFragment materializationExecPath) {
checkNotNull(materializationExecPath);
if (metadata.materializationExecPath != null) {
if (metadata.getMaterializationExecPath().isPresent()) {
return metadata;
}
return create(
return createWithMaterializationData(
metadata.getDigest(),
metadata.getSize(),
metadata.getLocationIndex(),
Expand All @@ -598,13 +596,12 @@ public boolean equals(Object o) {
RemoteFileArtifactValue that = (RemoteFileArtifactValue) o;
return Arrays.equals(digest, that.digest)
&& size == that.size
&& locationIndex == that.locationIndex
&& Objects.equals(materializationExecPath, that.materializationExecPath);
&& locationIndex == that.locationIndex;
}

@Override
public final int hashCode() {
return Objects.hash(Arrays.hashCode(digest), size, locationIndex, materializationExecPath);
public int hashCode() {
return Objects.hash(Arrays.hashCode(digest), size, locationIndex);
}

@Override
Expand All @@ -618,7 +615,7 @@ public final byte[] getDigest() {
}

@Override
public final FileContentsProxy getContentsProxy() {
public FileContentsProxy getContentsProxy() {
throw new UnsupportedOperationException();
}

Expand All @@ -638,11 +635,6 @@ public final int getLocationIndex() {
return locationIndex;
}

@Override
public final Optional<PathFragment> getMaterializationExecPath() {
return Optional.ofNullable(materializationExecPath);
}

/**
* Returns the time when the remote file expires in milliseconds since epoch. A negative value
* means the remote is not known to expire.
Expand Down Expand Up @@ -674,28 +666,33 @@ public final boolean isRemote() {
}

@Override
public final String toString() {
public String toString() {
return MoreObjects.toStringHelper(this)
.add("digest", bytesToString(digest))
.add("size", size)
.add("locationIndex", locationIndex)
.add("materializationExecPath", materializationExecPath)
.add("expireAtEpochMilli", getExpireAtEpochMilli())
.toString();
}
}

/** A remote artifact that expires at a particular time. */
private static final class RemoteFileArtifactValueWithExpiration extends RemoteFileArtifactValue {
/**
* A remote artifact that contains additional data for materialization. This is used when the
* output mode allows Bazel to materialize remote output to local filesystem.
*/
public static final class RemoteFileArtifactValueWithMaterializationData
extends RemoteFileArtifactValue {
@Nullable private final PathFragment materializationExecPath;
private long expireAtEpochMilli;
@Nullable private FileContentsProxy proxy;

private RemoteFileArtifactValueWithExpiration(
private RemoteFileArtifactValueWithMaterializationData(
byte[] digest,
long size,
int locationIndex,
PathFragment materializationExecPath,
long expireAtEpochMilli) {
super(digest, size, locationIndex, materializationExecPath);
super(digest, size, locationIndex);
this.materializationExecPath = materializationExecPath;
this.expireAtEpochMilli = expireAtEpochMilli;
}

Expand All @@ -706,14 +703,75 @@ public long getExpireAtEpochMilli() {

@Override
public void extendExpireAtEpochMilli(long expireAtEpochMilli) {
if (expireAtEpochMilli < 0) {
return;
}
Preconditions.checkState(expireAtEpochMilli > this.expireAtEpochMilli);
this.expireAtEpochMilli = expireAtEpochMilli;
}

/**
* Returns a non-null {@link FileContentsProxy} if this remote metadata is backed by a local
* file, e.g. the file is materialized after action execution.
*/
@Override
public FileContentsProxy getContentsProxy() {
return proxy;
}

/**
* Sets the {@link FileContentsProxy} if the output backed by this remote metadata is
* materialized later.
*/
public void setContentsProxy(FileContentsProxy proxy) {
this.proxy = proxy;
}

@Override
public boolean isAlive(Instant now) {
if (expireAtEpochMilli < 0) {
return true;
}
return now.toEpochMilli() < expireAtEpochMilli;
}

@Override
public Optional<PathFragment> getMaterializationExecPath() {
return Optional.ofNullable(materializationExecPath);
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof RemoteFileArtifactValueWithMaterializationData that)) {
return false;
}

return Arrays.equals(getDigest(), that.getDigest())
&& getSize() == that.getSize()
&& getLocationIndex() == that.getLocationIndex()
&& Objects.equals(materializationExecPath, that.materializationExecPath);
}

@Override
public int hashCode() {
return Objects.hash(
Arrays.hashCode(getDigest()), getSize(), getLocationIndex(), materializationExecPath);
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("digest", bytesToString(getDigest()))
.add("size", getSize())
.add("locationIndex", getLocationIndex())
.add("materializationExecPath", materializationExecPath)
.add("expireAtEpochMilli", expireAtEpochMilli)
.add("proxy", proxy)
.toString();
}
}

/** A {@link FileArtifactValue} representing a symlink that is not to be resolved. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,11 @@ private static RemoteFileArtifactValue decodeRemoteMetadata(
PathFragment.create(getStringForIndex(indexer, VarInt.getVarInt(source)));
}

return RemoteFileArtifactValue.create(
if (expireAtEpochMilli < 0 && materializationExecPath == null) {
return RemoteFileArtifactValue.create(digest, size, locationIndex);
}

return RemoteFileArtifactValue.createWithMaterializationData(
digest, size, locationIndex, expireAtEpochMilli, materializationExecPath);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
import com.google.devtools.build.lib.actions.Artifact.SpecialArtifact;
import com.google.devtools.build.lib.actions.Artifact.TreeFileArtifact;
import com.google.devtools.build.lib.actions.FileArtifactValue;
import com.google.devtools.build.lib.actions.FileArtifactValue.RemoteFileArtifactValueWithMaterializationData;
import com.google.devtools.build.lib.actions.FileContentsProxy;
import com.google.devtools.build.lib.actions.InputMetadataProvider;
import com.google.devtools.build.lib.actions.cache.OutputMetadataStore;
import com.google.devtools.build.lib.actions.cache.VirtualActionInput;
import com.google.devtools.build.lib.events.Reporter;
Expand Down Expand Up @@ -531,7 +534,8 @@ private Completable downloadFileNoCheckRx(
directExecutor())
.doOnComplete(
() -> {
finalizeDownload(tempPath, finalPath, dirsWithOutputPermissions);
finalizeDownload(
metadata, tempPath, finalPath, dirsWithOutputPermissions);
alreadyDeleted.set(true);
})
.doOnError(
Expand All @@ -553,7 +557,8 @@ private Completable downloadFileNoCheckRx(
}));
}

private void finalizeDownload(Path tmpPath, Path finalPath, Set<Path> dirsWithOutputPermissions)
private void finalizeDownload(
FileArtifactValue metadata, Path tmpPath, Path finalPath, Set<Path> dirsWithOutputPermissions)
throws IOException {
Path parentDir = checkNotNull(finalPath.getParentDirectory());

Expand Down Expand Up @@ -585,6 +590,9 @@ private void finalizeDownload(Path tmpPath, Path finalPath, Set<Path> dirsWithOu
// for artifacts produced by local actions.
tmpPath.chmod(outputPermissions.getPermissionsMode());
FileSystemUtils.moveFile(tmpPath, finalPath);
if (metadata instanceof RemoteFileArtifactValueWithMaterializationData remote) {
remote.setContentsProxy(FileContentsProxy.create(finalPath.stat()));
}
}

private interface TaskWithTempPath {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,12 @@ void injectRemoteFile(PathFragment path, byte[] digest, long size, long expireAt
return;
}
var metadata =
RemoteFileArtifactValue.create(digest, size, /* locationIndex= */ 1, expireAtEpochMilli);
RemoteFileArtifactValue.createWithMaterializationData(
digest,
size,
/* locationIndex= */ 1,
expireAtEpochMilli,
/* materializationExecPath= */ null);
remoteOutputTree.injectFile(path, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.skyframe;

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static java.util.concurrent.TimeUnit.MINUTES;

import com.google.common.base.Preconditions;
Expand All @@ -31,6 +32,7 @@
import com.google.devtools.build.lib.actions.Artifact.TreeFileArtifact;
import com.google.devtools.build.lib.actions.FileArtifactValue;
import com.google.devtools.build.lib.actions.FileArtifactValue.RemoteFileArtifactValue;
import com.google.devtools.build.lib.actions.FileArtifactValue.RemoteFileArtifactValueWithMaterializationData;
import com.google.devtools.build.lib.actions.FileStateType;
import com.google.devtools.build.lib.actions.RemoteArtifactChecker;
import com.google.devtools.build.lib.concurrent.ExecutorUtil;
Expand Down Expand Up @@ -476,21 +478,39 @@ private boolean treeArtifactIsDirty(Artifact artifact, TreeArtifactValue value)

// This could be improved by short-circuiting as soon as we see a child that is not present in
// the TreeArtifactValue, but it doesn't seem to be a major source of overhead.
// visitTree() is called from multiple threads in parallel so this need to be a hash set
Set<PathFragment> currentChildren = Sets.newConcurrentHashSet();
// visitTree() is called from multiple threads in parallel so this need to be a concurrent set
Set<PathFragment> currentLocalChildren = Sets.newConcurrentHashSet();
try {
TreeArtifactValue.visitTree(
path,
(child, type, traversedSymlink) -> {
if (type != Dirent.Type.DIRECTORY) {
currentChildren.add(child);
currentLocalChildren.add(child);
}
});
} catch (IOException e) {
return true;
}
return !(currentChildren.isEmpty() && value.isEntirelyRemote())
&& !currentChildren.equals(value.getChildPaths());

if (currentLocalChildren.isEmpty() && value.isEntirelyRemote()) {
return false;
}

var lastKnownLocalChildren =
value.getChildValues().entrySet().stream()
.filter(
entry -> {
var metadata = entry.getValue();
if (!(metadata
instanceof RemoteFileArtifactValueWithMaterializationData remote)) {
return true;
}
return remote.getContentsProxy() != null;
})
.map(entry -> entry.getKey().getParentRelativePath())
.collect(toImmutableSet());

return !currentLocalChildren.equals(lastKnownLocalChildren);
}

private boolean artifactIsDirtyWithDirectSystemCalls(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -512,14 +512,14 @@ private RemoteFileArtifactValue createRemoteFileMetadata(String content) {
private RemoteFileArtifactValue createRemoteFileMetadata(
String content, @Nullable PathFragment materializationExecPath) {
byte[] bytes = content.getBytes(UTF_8);
return RemoteFileArtifactValue.create(
return RemoteFileArtifactValue.createWithMaterializationData(
digest(bytes), bytes.length, 1, /* expireAtEpochMilli= */ -1, materializationExecPath);
}

private RemoteFileArtifactValue createRemoteFileMetadata(
String content, long expireAtEpochMilli, @Nullable PathFragment materializationExecPath) {
byte[] bytes = content.getBytes(UTF_8);
return RemoteFileArtifactValue.create(
return RemoteFileArtifactValue.createWithMaterializationData(
digest(bytes), bytes.length, 1, expireAtEpochMilli, materializationExecPath);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,7 @@
public class CompletionContextTest {
private static final FileArtifactValue DUMMY_METADATA =
RemoteFileArtifactValue.create(
/* digest= */ new byte[0],
/* size= */ 0,
/* locationIndex= */ 0,
/* expireAtEpochMilli= */ -1);
/* digest= */ new byte[0], /* size= */ 0, /* locationIndex= */ 0);

private Path execRoot;
private ArtifactRoot outputRoot;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ private RemoteFileArtifactValue createRemoteMetadata(
.getHashFunction()
.hashBytes(bytes)
.asBytes();
return RemoteFileArtifactValue.create(
return RemoteFileArtifactValue.createWithMaterializationData(
digest, bytes.length, 1, expireAtEpochMilli, materializationExecPath);
}

Expand Down
Loading

0 comments on commit c5c5bae

Please sign in to comment.