-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement extra resources for test actions #13996
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -152,6 +152,35 @@ public String parseIfMatches(String tag) throws ValidationException { | |
return null; | ||
}); | ||
|
||
/** How many extra resources an action requires for execution. */ | ||
public static final ParseableRequirement RESOURCES = | ||
ParseableRequirement.create( | ||
"resources:<str>:<int>", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why we are using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just to match the
In its current form, this change is matching the existing code: integers in execution requirements, floats in test tags, integer on the command line, and floats in the resource manager code. But I'm open to suggestions if we want to use slightly different conventions than what is used for CPU resources? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. floats feel like they'd be more flexible, I can make the change here in #16785. |
||
Pattern.compile("resources:(.+:.+)"), | ||
s -> { | ||
Preconditions.checkNotNull(s); | ||
|
||
int splitIndex = s.indexOf(":"); | ||
String resourceCount = s.substring(splitIndex+1); | ||
int value; | ||
try { | ||
value = Integer.parseInt(resourceCount); | ||
} catch (NumberFormatException e) { | ||
return "can't be parsed as an integer"; | ||
} | ||
|
||
// De-and-reserialize & compare to only allow canonical integer formats. | ||
if (!Integer.toString(value).equals(resourceCount)) { | ||
return "must be in canonical format (e.g. '4' instead of '+04')"; | ||
wilwell marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
if (value < 1) { | ||
return "can't be zero or negative"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why some of values could not be zero? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also just to match the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unlike with CPUs (that are required to orchestrate tests) it's likely GPU or embedded target based tests will be in test suites with unit tests that don't require newly defined resources. This should be changed to accept values of 0. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I agree that we could accept zero values |
||
} | ||
|
||
return null; | ||
}); | ||
|
||
/** If an action supports running in persistent worker mode. */ | ||
public static final String SUPPORTS_WORKERS = "supports-workers"; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,9 +25,13 @@ | |
import com.google.devtools.build.lib.util.OS; | ||
import com.google.devtools.build.lib.util.Pair; | ||
import java.io.IOException; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.Iterator; | ||
import java.util.LinkedList; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.concurrent.CountDownLatch; | ||
|
||
/** | ||
|
@@ -137,6 +141,10 @@ public static ResourceManager instance() { | |
// definition in the ResourceSet class. | ||
private double usedRam; | ||
|
||
// Used amount of extra resources. Corresponds to the extra resource | ||
// definition in the ResourceSet class. | ||
private Map<String, Float> usedExtraResources; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please sync the changes from the repository. This file had significant changes during last month. |
||
|
||
// Used local test count. Corresponds to the local test count definition in the ResourceSet class. | ||
private int usedLocalTestCount; | ||
|
||
|
@@ -145,6 +153,7 @@ public static ResourceManager instance() { | |
|
||
private ResourceManager() { | ||
requestList = new LinkedList<>(); | ||
usedExtraResources = new HashMap<>(); | ||
} | ||
|
||
@VisibleForTesting public static ResourceManager instanceForTestingOnly() { | ||
|
@@ -158,6 +167,7 @@ private ResourceManager() { | |
public synchronized void resetResourceUsage() { | ||
usedCpu = 0; | ||
usedRam = 0; | ||
usedExtraResources = new HashMap<>(); | ||
usedLocalTestCount = 0; | ||
for (Pair<ResourceSet, CountDownLatch> request : requestList) { | ||
// CountDownLatch can be set only to 0 or 1. | ||
|
@@ -177,6 +187,7 @@ public synchronized void setAvailableResources(ResourceSet resources) { | |
ResourceSet.create( | ||
staticResources.getMemoryMb(), | ||
staticResources.getCpuUsage(), | ||
staticResources.getExtraResourceUsage(), | ||
staticResources.getLocalTestCount()); | ||
processWaitingThreads(); | ||
} | ||
|
@@ -265,14 +276,25 @@ ResourceHandle tryAcquire(ActionExecutionMetadata owner, ResourceSet resources) | |
private void incrementResources(ResourceSet resources) { | ||
usedCpu += resources.getCpuUsage(); | ||
usedRam += resources.getMemoryMb(); | ||
|
||
resources.getExtraResourceUsage().entrySet().forEach( | ||
resource -> { | ||
String key = (String)resource.getKey(); | ||
float value = resource.getValue(); | ||
if (usedExtraResources.containsKey(key)) { | ||
value += (float)usedExtraResources.get(key); | ||
} | ||
usedExtraResources.put(key, value); | ||
}); | ||
|
||
usedLocalTestCount += resources.getLocalTestCount(); | ||
} | ||
|
||
/** | ||
* Return true if any resources have been claimed through this manager. | ||
*/ | ||
public synchronized boolean inUse() { | ||
return usedCpu != 0.0 || usedRam != 0.0 || usedLocalTestCount != 0 || !requestList.isEmpty(); | ||
return usedCpu != 0.0 || usedRam != 0.0 || !usedExtraResources.isEmpty() || usedLocalTestCount != 0 || !requestList.isEmpty(); | ||
} | ||
|
||
|
||
|
@@ -323,6 +345,13 @@ private synchronized CountDownLatch acquire(ResourceSet resources) { | |
private synchronized boolean release(ResourceSet resources) { | ||
usedCpu -= resources.getCpuUsage(); | ||
usedRam -= resources.getMemoryMb(); | ||
|
||
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) { | ||
String key = (String)resource.getKey(); | ||
float value = (float)usedExtraResources.get(key) - resource.getValue(); | ||
usedExtraResources.put(key, value); | ||
} | ||
|
||
usedLocalTestCount -= resources.getLocalTestCount(); | ||
|
||
// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being | ||
|
@@ -334,6 +363,20 @@ private synchronized boolean release(ResourceSet resources) { | |
if (usedRam < epsilon) { | ||
usedRam = 0; | ||
} | ||
|
||
Set<String> toRemove = new HashSet<>(); | ||
usedExtraResources.entrySet().forEach( | ||
resource -> { | ||
String key = (String)resource.getKey(); | ||
float value = (float)usedExtraResources.get(key); | ||
if (value < epsilon) { | ||
toRemove.add(key); | ||
} | ||
}); | ||
for (String key : toRemove) { | ||
usedExtraResources.remove(key); | ||
} | ||
|
||
if (!requestList.isEmpty()) { | ||
processWaitingThreads(); | ||
return true; | ||
|
@@ -361,12 +404,29 @@ private synchronized void processWaitingThreads() { | |
} | ||
} | ||
|
||
/** | ||
* Return true iff all requested extra resources are considered to be available. | ||
*/ | ||
private boolean areExtraResourcesAvailable(ResourceSet resources) { | ||
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) { | ||
String key = (String)resource.getKey(); | ||
float used = (float)usedExtraResources.getOrDefault(key, 0f); | ||
float requested = resource.getValue(); | ||
float available = (float)availableResources.getExtraResourceUsage().getOrDefault(key, 0f); | ||
float epsilon = 0.0001f; // Account for possible rounding errors. | ||
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
// Method will return true if all requested resources are considered to be available. | ||
private boolean areResourcesAvailable(ResourceSet resources) { | ||
Preconditions.checkNotNull(availableResources); | ||
// Comparison below is robust, since any calculation errors will be fixed | ||
// by the release() method. | ||
if (usedCpu == 0.0 && usedRam == 0.0 && usedLocalTestCount == 0) { | ||
if (usedCpu == 0.0 && usedRam == 0.0 && usedExtraResources.isEmpty() && usedLocalTestCount == 0) { | ||
return true; | ||
} | ||
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for | ||
|
@@ -410,9 +470,10 @@ private boolean areResourcesAvailable(ResourceSet resources) { | |
// 3) If used resource amount is less than total available resource amount. | ||
boolean cpuIsAvailable = cpu == 0.0 || usedCpu == 0.0 || usedCpu + cpu <= availableCpu; | ||
boolean ramIsAvailable = ram == 0.0 || usedRam == 0.0 || ram <= remainingRam; | ||
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources); | ||
boolean localTestCountIsAvailable = localTestCount == 0 || usedLocalTestCount == 0 | ||
|| usedLocalTestCount + localTestCount <= availableLocalTestCount; | ||
return cpuIsAvailable && ramIsAvailable && localTestCountIsAvailable; | ||
return cpuIsAvailable && ramIsAvailable && extraResourcesIsAvailable && localTestCountIsAvailable; | ||
} | ||
|
||
@VisibleForTesting | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,13 +14,16 @@ | |
|
||
package com.google.devtools.build.lib.actions; | ||
|
||
import com.google.common.base.Joiner; | ||
import com.google.common.base.Splitter; | ||
import com.google.common.collect.ImmutableMap; | ||
import com.google.common.primitives.Doubles; | ||
import com.google.devtools.build.lib.collect.nestedset.NestedSet; | ||
import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable; | ||
import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec; | ||
import com.google.devtools.common.options.Converter; | ||
import com.google.devtools.common.options.OptionsParsingException; | ||
import io.reactivex.rxjava3.annotations.NonNull; | ||
import java.util.Iterator; | ||
import java.util.NoSuchElementException; | ||
|
||
|
@@ -43,12 +46,23 @@ public class ResourceSet implements ResourceSetOrBuilder { | |
/** The number of CPUs, or fractions thereof. */ | ||
private final double cpuUsage; | ||
|
||
/** | ||
* Map of extra resources (for example: GPUs, embedded boards, ...) mapping | ||
* name of the resource to a value. | ||
*/ | ||
private final ImmutableMap<String, Float> extraResourceUsage; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please mention what type of resources it could be? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
/** The number of local tests. */ | ||
private final int localTestCount; | ||
|
||
private ResourceSet(double memoryMb, double cpuUsage, int localTestCount) { | ||
this(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount); | ||
} | ||
|
||
private ResourceSet(double memoryMb, double cpuUsage, @NonNull ImmutableMap<String, Float> extraResourceUsage, int localTestCount) { | ||
this.memoryMb = memoryMb; | ||
this.cpuUsage = cpuUsage; | ||
this.extraResourceUsage = extraResourceUsage; | ||
this.localTestCount = localTestCount; | ||
} | ||
|
||
|
@@ -74,18 +88,28 @@ public static ResourceSet createWithLocalTestCount(int localTestCount) { | |
} | ||
|
||
/** | ||
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, ioUsage, and | ||
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, and | ||
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or | ||
* {@link #createWithLocalTestCount(int)}. Use this method primarily when constructing | ||
* ResourceSets that represent available resources. | ||
*/ | ||
public static ResourceSet create(double memoryMb, double cpuUsage, int localTestCount) { | ||
return ResourceSet.create(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount); | ||
} | ||
|
||
/** | ||
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, extraResources, and | ||
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or | ||
* {@link #createWithLocalTestCount(int)}. Use this method primarily when constructing | ||
* ResourceSets that represent available resources. | ||
*/ | ||
@AutoCodec.Instantiator | ||
public static ResourceSet create( | ||
double memoryMb, double cpuUsage, int localTestCount) { | ||
if (memoryMb == 0 && cpuUsage == 0 && localTestCount == 0) { | ||
double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount) { | ||
if (memoryMb == 0 && cpuUsage == 0 && extraResourceUsage.size() == 0 && localTestCount == 0) { | ||
return ZERO; | ||
} | ||
return new ResourceSet(memoryMb, cpuUsage, localTestCount); | ||
return new ResourceSet(memoryMb, cpuUsage, extraResourceUsage, localTestCount); | ||
} | ||
|
||
/** Returns the amount of real memory (resident set size) used in MB. */ | ||
|
@@ -105,6 +129,10 @@ public double getCpuUsage() { | |
return cpuUsage; | ||
} | ||
|
||
public ImmutableMap<String, Float> getExtraResourceUsage() { | ||
return extraResourceUsage; | ||
} | ||
|
||
/** Returns the local test count used. */ | ||
public int getLocalTestCount() { | ||
return localTestCount; | ||
|
@@ -115,6 +143,7 @@ public String toString() { | |
return "Resources: \n" | ||
+ "Memory: " + memoryMb + "M\n" | ||
+ "CPU: " + cpuUsage + "\n" | ||
+ Joiner.on("\n").withKeyValueSeparator(": ").join(extraResourceUsage.entrySet()) | ||
+ "Local tests: " + localTestCount + "\n"; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,7 @@ | |
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail; | ||
import com.google.devtools.build.lib.server.FailureDetails.TestAction; | ||
import com.google.devtools.build.lib.server.FailureDetails.TestAction.Code; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
|
@@ -162,32 +163,22 @@ public boolean isPersistentTestRunner() { | |
return isPersistentTestRunner; | ||
} | ||
|
||
public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs) | ||
throws UserExecException { | ||
if (usingLocalTestJobs) { | ||
return LOCAL_TEST_JOBS_BASED_RESOURCES; | ||
} | ||
|
||
private double getLocalCpuResourceUsage(Label label) throws UserExecException { | ||
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size); | ||
|
||
// Tests can override their CPU reservation with a "cpu:<n>" tag. | ||
ResourceSet testResourcesFromTag = null; | ||
double cpuCount = -1.0; | ||
for (String tag : executionInfo.keySet()) { | ||
try { | ||
String cpus = ExecutionRequirements.CPU.parseIfMatches(tag); | ||
if (cpus != null) { | ||
if (testResourcesFromTag != null) { | ||
if (cpuCount != -1.0) { | ||
String message = | ||
String.format( | ||
"%s has more than one '%s' tag, but duplicate tags aren't allowed", | ||
label, ExecutionRequirements.CPU.userFriendlyName()); | ||
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS)); | ||
} | ||
testResourcesFromTag = | ||
ResourceSet.create( | ||
testResourcesFromSize.getMemoryMb(), | ||
Float.parseFloat(cpus), | ||
testResourcesFromSize.getLocalTestCount()); | ||
cpuCount = Float.parseFloat(cpus); | ||
} | ||
} catch (ValidationException e) { | ||
String message = | ||
|
@@ -200,8 +191,54 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs | |
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG)); | ||
} | ||
} | ||
return cpuCount != -1.0 ? cpuCount : testResourcesFromSize.getCpuUsage(); | ||
} | ||
|
||
return testResourcesFromTag != null ? testResourcesFromTag : testResourcesFromSize; | ||
private ImmutableMap<String, Float> getLocalExtraResourceUsage(Label label) throws UserExecException { | ||
// Tests can specify requirements for extra resources using "resources:<resourcename>:<amount>" tag. | ||
Map<String, Float> extraResources = new HashMap<>(); | ||
for (String tag : executionInfo.keySet()) { | ||
try { | ||
String extras = ExecutionRequirements.RESOURCES.parseIfMatches(tag); | ||
if (extras != null) { | ||
int splitIndex = extras.indexOf(":"); | ||
String resourceName = extras.substring(0, splitIndex); | ||
String resourceCount = extras.substring(splitIndex+1); | ||
if (extraResources.get(resourceName) != null) { | ||
String message = | ||
String.format( | ||
"%s has more than one '%s' tag, but duplicate tags aren't allowed", | ||
label, ExecutionRequirements.RESOURCES.userFriendlyName()); | ||
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS)); | ||
} | ||
extraResources.put(resourceName, Float.parseFloat(resourceCount)); | ||
} | ||
} catch (ValidationException e) { | ||
String message = | ||
String.format( | ||
"%s has a '%s' tag, but its value '%s' didn't pass validation: %s", | ||
label, | ||
ExecutionRequirements.RESOURCES.userFriendlyName(), | ||
e.getTagValue(), | ||
e.getMessage()); | ||
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG)); | ||
} | ||
} | ||
return ImmutableMap.copyOf(extraResources); | ||
} | ||
|
||
public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please reaarange the methods to make the diff between two version more clear? |
||
throws UserExecException { | ||
if (usingLocalTestJobs) { | ||
return LOCAL_TEST_JOBS_BASED_RESOURCES; | ||
} | ||
|
||
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size); | ||
return ResourceSet.create( | ||
testResourcesFromSize.getMemoryMb(), | ||
getLocalCpuResourceUsage(label), | ||
getLocalExtraResourceUsage(label), | ||
testResourcesFromSize.getLocalTestCount()); | ||
} | ||
|
||
private static FailureDetail createFailureDetail(String message, Code detailedCode) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need this library?