Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7.1.0] Attempt to fix cancellation crash in repo fetching w/ worker thread #21599

Merged
merged 1 commit into from
Mar 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Table;
import com.google.common.util.concurrent.Uninterruptibles;
import com.google.devtools.build.lib.analysis.BlazeDirectories;
import com.google.devtools.build.lib.analysis.RuleDefinition;
import com.google.devtools.build.lib.bazel.repository.RepositoryResolvedEvent;
Expand All @@ -38,6 +39,7 @@
import com.google.devtools.build.lib.profiler.Profiler;
import com.google.devtools.build.lib.profiler.ProfilerTask;
import com.google.devtools.build.lib.profiler.SilentCloseable;
import com.google.devtools.build.lib.repository.RepositoryFetchProgress;
import com.google.devtools.build.lib.rules.repository.NeedsSkyframeRestartException;
import com.google.devtools.build.lib.rules.repository.RepoRecordedInput;
import com.google.devtools.build.lib.rules.repository.RepositoryDirectoryValue;
Expand All @@ -57,6 +59,7 @@
import com.google.devtools.build.skyframe.SkyKey;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -162,7 +165,18 @@ public RepositoryDirectoryValue.Builder fetch(
// restart, and need to send over a fresh Environment.
state.delegateEnvQueue.put(env);
}
switch (state.signalQueue.take()) {
Signal signal;
try {
signal = state.signalQueue.take();
} catch (InterruptedException e) {
// This means that we caught a Ctrl-C. Make sure to close the state object to interrupt the
// worker thread, wait for it to finish, and then propagate the InterruptedException.
state.close();
signal = Uninterruptibles.takeUninterruptibly(state.signalQueue);
Thread.interrupted(); // clear the interrupted status
throw new InterruptedException();
}
switch (signal) {
case RESTART:
return null;
case DONE:
Expand All @@ -175,18 +189,15 @@ public RepositoryDirectoryValue.Builder fetch(
Throwables.throwIfUnchecked(e.getCause());
throw new IllegalStateException(
"unexpected exception type: " + e.getClass(), e.getCause());
} finally {
// Make sure we interrupt the worker thread if work on the Skyframe thread were cut short
// for any reason.
state.close();
try {
// Synchronously wait for the worker thread to finish any remaining work.
workerFuture.get();
} catch (ExecutionException e) {
// When this happens, we either already dealt with the exception (see `catch` clause
// above), or we're in the middle of propagating an InterruptedException in which case
// we don't care about the result of execution anyway.
}
} catch (CancellationException e) {
// This can only happen if the state object was invalidated due to memory pressure, in
// which case we can simply reattempt the fetch.
env.getListener()
.post(
RepositoryFetchProgress.ongoing(
RepositoryName.createUnvalidated(rule.getName()),
"fetch interrupted due to memory pressure; restarting."));
return fetch(rule, outputDirectory, directories, env, recordedInputValues, key);
}
}
// TODO(wyv): use a switch expression above instead and remove this.
Expand Down
Loading