Skip to content

Commit

Permalink
Pass flag if task is speculative from EventDrivenFaultTolerantQuerySc…
Browse files Browse the repository at this point in the history
…heduler
  • Loading branch information
losipiuk committed May 11, 2023
1 parent 366c404 commit 16aba82
Showing 1 changed file with 29 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.lang.Math.round;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
Expand Down Expand Up @@ -883,12 +884,12 @@ private void scheduleTasks()
long tasksWaitingForNode = preSchedulingTaskContexts.values().stream().filter(context -> !context.getNodeLease().getNode().isDone()).count();

while (tasksWaitingForNode < maxTasksWaitingForNode && !schedulingQueue.isEmpty()) {
ScheduledTask scheduledTask = schedulingQueue.pollOrThrow();
StageExecution stageExecution = getStageExecution(scheduledTask.stageId());
PrioritizedScheduledTask scheduledTask = schedulingQueue.pollOrThrow();
StageExecution stageExecution = getStageExecution(scheduledTask.task().stageId());
if (stageExecution.getState().isDone()) {
continue;
}
int partitionId = scheduledTask.partitionId();
int partitionId = scheduledTask.task().partitionId();
Optional<NodeRequirements> nodeRequirements = stageExecution.getNodeRequirements(partitionId);
if (nodeRequirements.isEmpty()) {
// execution finished
Expand All @@ -897,7 +898,7 @@ private void scheduleTasks()
MemoryRequirements memoryRequirements = stageExecution.getMemoryRequirements(partitionId);
NodeLease lease = nodeAllocator.acquire(nodeRequirements.get(), memoryRequirements.getRequiredMemory());
lease.getNode().addListener(() -> eventQueue.add(Event.WAKE_UP), queryExecutor);
preSchedulingTaskContexts.put(scheduledTask, new PreSchedulingTaskContext(lease));
preSchedulingTaskContexts.put(scheduledTask.task(), new PreSchedulingTaskContext(lease, scheduledTask.isSpeculative()));
tasksWaitingForNode++;
}
}
Expand Down Expand Up @@ -964,7 +965,7 @@ public void onSinkInstanceHandleAcquired(SinkInstanceHandleAcquiredEvent sinkIns

try {
InternalNode node = getDone(nodeLease.getNode());
Optional<RemoteTask> remoteTask = stageExecution.schedule(partitionId, sinkInstanceHandle, attempt, node);
Optional<RemoteTask> remoteTask = stageExecution.schedule(partitionId, sinkInstanceHandle, attempt, node, context.isSpeculative());
remoteTask.ifPresent(task -> {
task.addStateChangeListener(createExchangeSinkInstanceHandleUpdateRequiredListener());
task.addStateChangeListener(taskStatus -> {
Expand Down Expand Up @@ -1111,8 +1112,10 @@ public void onSplitAssignment(SplitAssignmentEvent event)
assignment.sealedPartitions().forEach(partitionId -> {
Optional<PrioritizedScheduledTask> scheduledTask = stageExecution.sealPartition(partitionId);
scheduledTask.ifPresent(prioritizedTask -> {
if (preSchedulingTaskContexts.containsKey(prioritizedTask.task())) {
PreSchedulingTaskContext context = preSchedulingTaskContexts.get(prioritizedTask.task());
if (context != null) {
// task is already waiting for node or for sink instance handle
context.setSpeculative(prioritizedTask.isSpeculative()); // update speculative flag
return;
}
schedulingQueue.addOrUpdate(prioritizedTask);
Expand Down Expand Up @@ -1362,7 +1365,7 @@ public Optional<EventDrivenFaultTolerantQueryScheduler.GetExchangeSinkInstanceHa
attempt));
}

public Optional<RemoteTask> schedule(int partitionId, ExchangeSinkInstanceHandle exchangeSinkInstanceHandle, int attempt, InternalNode node)
public Optional<RemoteTask> schedule(int partitionId, ExchangeSinkInstanceHandle exchangeSinkInstanceHandle, int attempt, InternalNode node, boolean speculative)
{
if (getState().isDone()) {
return Optional.empty();
Expand Down Expand Up @@ -1404,7 +1407,7 @@ public Optional<RemoteTask> schedule(int partitionId, ExchangeSinkInstanceHandle
splits,
noMoreSplits,
Optional.of(partition.getMemoryRequirements().getRequiredMemory()),
false); // TODO pass correct value here
speculative);
task.ifPresent(remoteTask -> {
partition.addTask(remoteTask, outputBuffers);
runningPartitions.add(partitionId);
Expand Down Expand Up @@ -2042,15 +2045,16 @@ public int getNonSpeculativeTaskCount()
return nonSpeculativeTaskCount;
}

public ScheduledTask pollOrThrow()
public PrioritizedScheduledTask pollOrThrow()
{
ScheduledTask task = queue.poll();
IndexedPriorityQueue.Prioritized<ScheduledTask> task = queue.pollPrioritized();
checkState(task != null, "queue is empty");
if (nonSpeculativeTaskCount > 0) {
// non speculative tasks are always pooled first
nonSpeculativeTaskCount--;
}
return task;
// negate priority to reverse operation we do in addOrUpdate
return new PrioritizedScheduledTask(task.getValue(), toIntExact(-task.getPriority()));
}

public void addOrUpdate(PrioritizedScheduledTask prioritizedTask)
Expand Down Expand Up @@ -2332,18 +2336,31 @@ private record GetExchangeSinkInstanceHandleResult(CompletableFuture<ExchangeSin
private static class PreSchedulingTaskContext
{
private final NodeLease nodeLease;
private boolean speculative;
private boolean waitingForSinkInstanceHandle;

public PreSchedulingTaskContext(NodeLease nodeLease)
public PreSchedulingTaskContext(NodeLease nodeLease, boolean speculative)
{
this.nodeLease = requireNonNull(nodeLease, "nodeLease is null");
this.speculative = speculative;
}

public NodeLease getNodeLease()
{
return nodeLease;
}

public boolean isSpeculative()
{
return speculative;
}

public void setSpeculative(boolean speculative)
{
checkArgument(!speculative || this.speculative, "cannot change speculative flag false -> true");
this.speculative = speculative;
}

public boolean isWaitingForSinkInstanceHandle()
{
return waitingForSinkInstanceHandle;
Expand Down

0 comments on commit 16aba82

Please sign in to comment.