Skip to content

Commit

Permalink
[flink] FlinkRunner initializes the same split twice (apache#31313)
Browse files Browse the repository at this point in the history
  • Loading branch information
je-ik committed Jan 15, 2025
1 parent e1245d9 commit 7680207
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
import org.apache.beam.runners.flink.translation.wrappers.streaming.io.source.unbounded.FlinkUnboundedSource;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.util.construction.UnboundedReadFromBoundedSource;
import org.apache.flink.api.common.eventtime.Watermark;
import org.apache.flink.api.connector.source.Boundedness;
import org.apache.flink.api.connector.source.Source;
Expand Down Expand Up @@ -73,18 +71,6 @@ public static <T> FlinkUnboundedSource<T> unbounded(
return new FlinkUnboundedSource<>(stepName, source, serializablePipelineOptions, numSplits);
}

public static FlinkUnboundedSource<byte[]> unboundedImpulse(long shutdownSourceAfterIdleMs) {
FlinkPipelineOptions flinkPipelineOptions = FlinkPipelineOptions.defaults();
flinkPipelineOptions.setShutdownSourcesAfterIdleMs(shutdownSourceAfterIdleMs);
return new FlinkUnboundedSource<>(
"Impulse",
new UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter<>(
new BeamImpulseSource()),
new SerializablePipelineOptions(flinkPipelineOptions),
1,
record -> BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
}

public static FlinkBoundedSource<byte[]> boundedImpulse() {
return new FlinkBoundedSource<>(
"Impulse",
Expand Down Expand Up @@ -130,7 +116,7 @@ public Boundedness getBoundedness() {
throws Exception {
FlinkSourceSplitEnumerator<T> enumerator =
new FlinkSourceSplitEnumerator<>(
enumContext, beamSource, serializablePipelineOptions.get(), numSplits);
enumContext, beamSource, serializablePipelineOptions.get(), numSplits, true);
checkpoint.forEach(
(subtaskId, splitsForSubtask) -> enumerator.addSplitsBack(splitsForSubtask, subtaskId));
return enumerator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,45 +63,58 @@ public FlinkSourceSplitEnumerator(
Source<T> beamSource,
PipelineOptions pipelineOptions,
int numSplits) {

this(context, beamSource, pipelineOptions, numSplits, false);
}

public FlinkSourceSplitEnumerator(
SplitEnumeratorContext<FlinkSourceSplit<T>> context,
Source<T> beamSource,
PipelineOptions pipelineOptions,
int numSplits,
boolean splitsInitialized) {

this.context = context;
this.beamSource = beamSource;
this.pipelineOptions = pipelineOptions;
this.numSplits = numSplits;
this.pendingSplits = new HashMap<>(numSplits);
this.splitsInitialized = false;
this.splitsInitialized = splitsInitialized;
}

@Override
public void start() {
context.callAsync(
() -> {
try {
LOG.info("Starting source {}", beamSource);
List<? extends Source<T>> beamSplitSourceList = splitBeamSource();
Map<Integer, List<FlinkSourceSplit<T>>> flinkSourceSplitsList = new HashMap<>();
int i = 0;
for (Source<T> beamSplitSource : beamSplitSourceList) {
int targetSubtask = i % context.currentParallelism();
List<FlinkSourceSplit<T>> splitsForTask =
flinkSourceSplitsList.computeIfAbsent(
targetSubtask, ignored -> new ArrayList<>());
splitsForTask.add(new FlinkSourceSplit<>(i, beamSplitSource));
i++;
if (!splitsInitialized) {
context.callAsync(
() -> {
try {
LOG.info("Starting source {}", beamSource);
List<? extends Source<T>> beamSplitSourceList = splitBeamSource();
Map<Integer, List<FlinkSourceSplit<T>>> flinkSourceSplitsList = new HashMap<>();
int i = 0;
for (Source<T> beamSplitSource : beamSplitSourceList) {
int targetSubtask = i % context.currentParallelism();
List<FlinkSourceSplit<T>> splitsForTask =
flinkSourceSplitsList.computeIfAbsent(
targetSubtask, ignored -> new ArrayList<>());
splitsForTask.add(new FlinkSourceSplit<>(i, beamSplitSource));
i++;
}
return flinkSourceSplitsList;
} catch (Exception e) {
throw new RuntimeException(e);
}
return flinkSourceSplitsList;
} catch (Exception e) {
throw new RuntimeException(e);
}
},
(sourceSplits, error) -> {
if (error != null) {
throw new RuntimeException("Failed to start source enumerator.", error);
} else {
pendingSplits.putAll(sourceSplits);
splitsInitialized = true;
sendPendingSplitsToSourceReaders();
}
});
},
(sourceSplits, error) -> {
if (error != null) {
throw new RuntimeException("Failed to start source enumerator.", error);
} else {
pendingSplits.putAll(sourceSplits);
splitsInitialized = true;
sendPendingSplitsToSourceReaders();
}
});
}
}

@Override
Expand Down

0 comments on commit 7680207

Please sign in to comment.