Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extra resources #16785

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,30 @@ public String parseIfMatches(String tag) throws ValidationException {
return null;
});

/** How many extra resources an action requires for execution. */
public static final ParseableRequirement RESOURCES =
ParseableRequirement.create(
"resources:<str>:<float>",
Pattern.compile("resources:(.+:.+)"),
drewmacrae marked this conversation as resolved.
Show resolved Hide resolved
s -> {
Preconditions.checkNotNull(s);

int splitIndex = s.indexOf(":");
String resourceCount = s.substring(splitIndex+1);
float value;
try {
value = Float.parseFloat(resourceCount);
} catch (NumberFormatException e) {
return "can't be parsed as a float";
}

if (value < 0) {
return "can't be negative";
}

return null;
});

/** If an action supports running in persistent worker mode. */
public static final String SUPPORTS_WORKERS = "supports-workers";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,13 @@
import com.google.devtools.build.lib.worker.WorkerPool;
import java.io.IOException;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import javax.annotation.Nullable;

Expand Down Expand Up @@ -183,14 +188,16 @@ public double getUsedCPU() {
// definition in the ResourceSet class.
private double usedRam;

// Used amount of extra resources. Corresponds to the extra resource
// definition in the ResourceSet class.
private Map<String, Float> usedExtraResources;

// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
private int usedLocalTestCount;

/** If set, local-only actions are given priority over dynamically run actions. */
private boolean prioritizeLocalActions;

private ResourceManager() {}

@VisibleForTesting
public static ResourceManager instanceForTestingOnly() {
return new ResourceManager();
Expand All @@ -204,6 +211,7 @@ public static ResourceManager instanceForTestingOnly() {
public synchronized void resetResourceUsage() {
usedCpu = 0;
usedRam = 0;
usedExtraResources = new HashMap<>();
usedLocalTestCount = 0;
for (Pair<ResourceSet, LatchWithWorker> request : localRequests) {
request.second.latch.countDown();
Expand Down Expand Up @@ -298,6 +306,17 @@ private Worker incrementResources(ResourceSet resources)
throws IOException, InterruptedException {
usedCpu += resources.getCpuUsage();
usedRam += resources.getMemoryMb();

resources.getExtraResourceUsage().entrySet().forEach(
resource -> {
String key = (String)resource.getKey();
float value = resource.getValue();
if (usedExtraResources.containsKey(key)) {
value += (float)usedExtraResources.get(key);
}
usedExtraResources.put(key, value);
});

usedLocalTestCount += resources.getLocalTestCount();

if (resources.getWorkerKey() != null) {
Expand All @@ -310,6 +329,7 @@ private Worker incrementResources(ResourceSet resources)
public synchronized boolean inUse() {
return usedCpu != 0.0
|| usedRam != 0.0
|| !usedExtraResources.isEmpty()
|| usedLocalTestCount != 0
|| !localRequests.isEmpty()
|| !dynamicWorkerRequests.isEmpty()
Expand Down Expand Up @@ -369,7 +389,7 @@ public void acquireResourceOwnership() {
* wait.
*/
private synchronized LatchWithWorker acquire(ResourceSet resources, ResourcePriority priority)
throws IOException, InterruptedException {
throws IOException, InterruptedException, NoSuchElementException {
if (areResourcesAvailable(resources)) {
Worker worker = incrementResources(resources);
return new LatchWithWorker(/* latch= */ null, worker);
Expand Down Expand Up @@ -417,6 +437,7 @@ private boolean release(ResourceSet resources, @Nullable Worker worker)
private synchronized void releaseResourcesOnly(ResourceSet resources) {
usedCpu -= resources.getCpuUsage();
usedRam -= resources.getMemoryMb();

usedLocalTestCount -= resources.getLocalTestCount();

// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
Expand All @@ -428,6 +449,19 @@ private synchronized void releaseResourcesOnly(ResourceSet resources) {
if (usedRam < epsilon) {
usedRam = 0;
}

Set<String> toRemove = new HashSet<>();
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String)resource.getKey();
float value = (float)usedExtraResources.get(key) - resource.getValue();
usedExtraResources.put(key, value);
if (value < epsilon) {
toRemove.add(key);
}
}
for (String key : toRemove) {
usedExtraResources.remove(key);
}
}

private synchronized boolean processAllWaitingThreads() throws IOException, InterruptedException {
Expand Down Expand Up @@ -466,9 +500,39 @@ private synchronized void processWaitingThreads(Deque<Pair<ResourceSet, LatchWit
}
}

/**
* Throws an exception if requested extra resource isn't being tracked
*/
private void assertExtraResourcesTracked(ResourceSet resources)
throws NoSuchElementException {
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String)resource.getKey();
if (!availableResources.getExtraResourceUsage().containsKey(key)) {
throw new NoSuchElementException("Resource "+key+" is not tracked in this resource set.");
}
}
}

/**
* Return true iff all requested extra resources are considered to be available.
*/
private boolean areExtraResourcesAvailable(ResourceSet resources) throws NoSuchElementException {
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String)resource.getKey();
float used = (float)usedExtraResources.getOrDefault(key, 0f);
float requested = resource.getValue();
float available = availableResources.getExtraResourceUsage().get(key);
float epsilon = 0.0001f; // Account for possible rounding errors.
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) {
return false;
}
}
return true;
}

// Method will return true if all requested resources are considered to be available.
@VisibleForTesting
boolean areResourcesAvailable(ResourceSet resources) {
boolean areResourcesAvailable(ResourceSet resources) throws NoSuchElementException {
Preconditions.checkNotNull(availableResources);
// Comparison below is robust, since any calculation errors will be fixed
// by the release() method.
Expand All @@ -484,7 +548,11 @@ boolean areResourcesAvailable(ResourceSet resources) {
workerKey == null
|| (activeWorkers < availableWorkers && workerPool.couldBeBorrowed(workerKey));

if (usedCpu == 0.0 && usedRam == 0.0 && usedLocalTestCount == 0 && workerIsAvailable) {
// We test for tracking of extra resources whenever acquired and throw an
// exception before acquiring any untracked resource.
assertExtraResourcesTracked(resources);

if (usedCpu == 0.0 && usedRam == 0.0 && usedExtraResources.isEmpty() && usedLocalTestCount == 0 && workerIsAvailable) {
return true;
}
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
Expand Down Expand Up @@ -515,7 +583,8 @@ boolean areResourcesAvailable(ResourceSet resources) {
localTestCount == 0
|| usedLocalTestCount == 0
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
return cpuIsAvailable && ramIsAvailable && localTestCountIsAvailable && workerIsAvailable;
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources);
return cpuIsAvailable && ramIsAvailable && extraResourcesIsAvailable && localTestCountIsAvailable && workerIsAvailable;
}

@VisibleForTesting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

package com.google.devtools.build.lib.actions;

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.primitives.Doubles;
import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable;
import com.google.devtools.build.lib.util.OS;
Expand Down Expand Up @@ -43,6 +45,12 @@ public class ResourceSet implements ResourceSetOrBuilder {
/** The number of CPUs, or fractions thereof. */
private final double cpuUsage;

/**
* Map of extra resources (for example: GPUs, embedded boards, ...) mapping
* name of the resource to a value.
*/
private final ImmutableMap<String, Float> extraResourceUsage;

/** The number of local tests. */
private final int localTestCount;

Expand All @@ -51,8 +59,13 @@ public class ResourceSet implements ResourceSetOrBuilder {

private ResourceSet(
double memoryMb, double cpuUsage, int localTestCount, @Nullable WorkerKey workerKey) {
this(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, workerKey);
}

private ResourceSet(double memoryMb, double cpuUsage, @Nullable ImmutableMap<String, Float> extraResourceUsage, int localTestCount, @Nullable WorkerKey workerKey) {
this.memoryMb = memoryMb;
drewmacrae marked this conversation as resolved.
Show resolved Hide resolved
this.cpuUsage = cpuUsage;
this.extraResourceUsage = extraResourceUsage;
this.localTestCount = localTestCount;
this.workerKey = workerKey;
}
Expand Down Expand Up @@ -83,21 +96,35 @@ public static ResourceSet createWithLocalTestCount(int localTestCount) {
}

/**
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, ioUsage, and
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, and
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or {@link
* #createWithLocalTestCount(int)}. Use this method primarily when constructing ResourceSets that
* represent available resources.
*/
public static ResourceSet create(double memoryMb, double cpuUsage, int localTestCount) {
return createWithWorkerKey(memoryMb, cpuUsage, localTestCount, /* workerKey= */ null);
return ResourceSet.createWithWorkerKey(memoryMb, cpuUsage, ImmutableMap.of(), localTestCount, /* wolkerKey= */ null);
}

/**
* Returns a new ResourceSet with the provided values for memoryMb, cpuUsage, extraResources, and
* localTestCount. Most action resource definitions should use {@link #createWithRamCpu} or
* {@link #createWithLocalTestCount(int)}. Use this method primarily when constructing
* ResourceSets that represent available resources.
*/
public static ResourceSet create(double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount) {
return createWithWorkerKey(memoryMb, cpuUsage, extraResourceUsage, localTestCount, /* workerKey= */ null);
}

public static ResourceSet createWithWorkerKey(double memoryMb, double cpuUsage, int localTestCount, WorkerKey workerKey) {
return ResourceSet.createWithWorkerKey(memoryMb, cpuUsage, /* extraResourceUsage= */ImmutableMap.of(), localTestCount, workerKey);
}

public static ResourceSet createWithWorkerKey(
double memoryMb, double cpuUsage, int localTestCount, WorkerKey workerKey) {
if (memoryMb == 0 && cpuUsage == 0 && localTestCount == 0 && workerKey == null) {
double memoryMb, double cpuUsage, ImmutableMap<String, Float> extraResourceUsage, int localTestCount, WorkerKey workerKey) {
if (memoryMb == 0 && cpuUsage == 0 && extraResourceUsage.size() == 0 && localTestCount == 0 && workerKey == null) {
return ZERO;
}
return new ResourceSet(memoryMb, cpuUsage, localTestCount, workerKey);
return new ResourceSet(memoryMb, cpuUsage, extraResourceUsage, localTestCount, workerKey);
}

/** Returns the amount of real memory (resident set size) used in MB. */
Expand All @@ -124,6 +151,10 @@ public double getCpuUsage() {
return cpuUsage;
}

public ImmutableMap<String, Float> getExtraResourceUsage() {
return extraResourceUsage;
}

/** Returns the local test count used. */
public int getLocalTestCount() {
return localTestCount;
Expand All @@ -138,6 +169,7 @@ public String toString() {
+ "CPU: "
+ cpuUsage
+ "\n"
+ Joiner.on("\n").withKeyValueSeparator(": ").join(extraResourceUsage.entrySet())
+ "Local tests: "
+ localTestCount
+ "\n";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.google.devtools.build.lib.server.FailureDetails.FailureDetail;
import com.google.devtools.build.lib.server.FailureDetails.TestAction;
import com.google.devtools.build.lib.server.FailureDetails.TestAction.Code;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -160,25 +161,29 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs
}

ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
return ResourceSet.create(
testResourcesFromSize.getMemoryMb(),
getLocalCpuResourceUsage(label),
getLocalExtraResourceUsage(label),
testResourcesFromSize.getLocalTestCount());
}

private double getLocalCpuResourceUsage(Label label) throws UserExecException {
ResourceSet testResourcesFromSize = TestTargetProperties.getResourceSetFromSize(size);
// Tests can override their CPU reservation with a "cpu:<n>" tag.
ResourceSet testResourcesFromTag = null;
double cpuCount = -1.0;
for (String tag : executionInfo.keySet()) {
try {
String cpus = ExecutionRequirements.CPU.parseIfMatches(tag);
if (cpus != null) {
if (testResourcesFromTag != null) {
if (cpuCount != -1.0) {
String message =
String.format(
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
label, ExecutionRequirements.CPU.userFriendlyName());
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
}
testResourcesFromTag =
ResourceSet.create(
testResourcesFromSize.getMemoryMb(),
Float.parseFloat(cpus),
testResourcesFromSize.getLocalTestCount());
cpuCount = Float.parseFloat(cpus);
}
} catch (ValidationException e) {
String message =
Expand All @@ -191,10 +196,44 @@ public ResourceSet getLocalResourceUsage(Label label, boolean usingLocalTestJobs
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
}
}
return cpuCount != -1.0 ? cpuCount : testResourcesFromSize.getCpuUsage();
}

return testResourcesFromTag != null ? testResourcesFromTag : testResourcesFromSize;
private ImmutableMap<String, Float> getLocalExtraResourceUsage(Label label) throws UserExecException {
// Tests can specify requirements for extra resources using "resources:<resourcename>:<amount>" tag.
Map<String, Float> extraResources = new HashMap<>();
for (String tag : executionInfo.keySet()) {
try {
String extras = ExecutionRequirements.RESOURCES.parseIfMatches(tag);
if (extras != null) {
int splitIndex = extras.indexOf(":");
String resourceName = extras.substring(0, splitIndex);
String resourceCount = extras.substring(splitIndex+1);
if (extraResources.get(resourceName) != null) {
String message =
String.format(
"%s has more than one '%s' tag, but duplicate tags aren't allowed",
drewmacrae marked this conversation as resolved.
Show resolved Hide resolved
label, ExecutionRequirements.RESOURCES.userFriendlyName());
throw new UserExecException(createFailureDetail(message, Code.DUPLICATE_CPU_TAGS));
}
extraResources.put(resourceName, Float.parseFloat(resourceCount));
}
} catch (ValidationException e) {
String message =
String.format(
"%s has a '%s' tag, but its value '%s' didn't pass validation: %s",
label,
ExecutionRequirements.RESOURCES.userFriendlyName(),
e.getTagValue(),
e.getMessage());
throw new UserExecException(createFailureDetail(message, Code.INVALID_CPU_TAG));
}
}
return ImmutableMap.copyOf(extraResources);
}



private static FailureDetail createFailureDetail(String message, Code detailedCode) {
return FailureDetail.newBuilder()
.setMessage(message)
Expand Down
Loading