Skip to content

Commit

Permalink
Add Redistribute translation to Samza runner
Browse files Browse the repository at this point in the history
  • Loading branch information
kennknowles committed Apr 24, 2024
1 parent 21e3fa1 commit 8f1d3da
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.samza.translation;

import com.google.auto.service.AutoService;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.sdk.runners.TransformHierarchy;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.util.construction.NativeTransforms;
import org.apache.beam.sdk.util.construction.graph.PipelineNode;
import org.apache.beam.sdk.util.construction.graph.QueryablePipeline;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;

/**
* Translates Reshuffle transform into Samza's native partitionBy operator, which will partition
* each incoming message by the key into a Task corresponding to that key.
*/
public class RedistributeByKeyTranslator<K, V>
implements TransformTranslator<PTransform<PCollection<KV<K, V>>, PCollection<KV<K, V>>>> {

private final ReshuffleTranslator<K, V, V> reshuffleTranslator =
new ReshuffleTranslator<>("rdstr-");

@Override
public void translate(
PTransform<PCollection<KV<K, V>>, PCollection<KV<K, V>>> transform,
TransformHierarchy.Node node,
TranslationContext ctx) {
reshuffleTranslator.translate(transform, node, ctx);
}

@Override
public void translatePortable(
PipelineNode.PTransformNode transform,
QueryablePipeline pipeline,
PortableTranslationContext ctx) {
reshuffleTranslator.translatePortable(transform, pipeline, ctx);
}

/** Predicate to determine whether a URN is a Samza native transform. */
@AutoService(NativeTransforms.IsNativeTransform.class)
public static class IsSamzaNativeTransform implements NativeTransforms.IsNativeTransform {
@Override
public boolean test(RunnerApi.PTransform pTransform) {
return false;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@
public class ReshuffleTranslator<K, InT, OutT>
implements TransformTranslator<PTransform<PCollection<KV<K, InT>>, PCollection<KV<K, OutT>>>> {

private final String prefix;

ReshuffleTranslator(String prefix) {
this.prefix = prefix;
}

ReshuffleTranslator() {
this("rshfl-");
}

@Override
public void translate(
PTransform<PCollection<KV<K, InT>>, PCollection<KV<K, OutT>>> transform,
Expand All @@ -60,7 +70,7 @@ public void translate(
inputStream,
inputCoder.getKeyCoder(),
elementCoder,
"rshfl-" + ctx.getTransformId(),
prefix + ctx.getTransformId(),
ctx.getPipelineOptions().getMaxSourceParallelism() > 1);

ctx.registerMessageStream(output, outputStream);
Expand All @@ -83,7 +93,7 @@ public void translatePortable(
inputStream,
((KvCoder<K, InT>) windowedInputCoder.getValueCoder()).getKeyCoder(),
windowedInputCoder,
"rshfl-" + ctx.getTransformId(),
prefix + ctx.getTransformId(),
ctx.getPipelineOptions().getMaxSourceParallelism() > 1);

ctx.registerMessageStream(outputId, outputStream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ public Map<String, TransformTranslator<?>> getTransformTranslators() {
return ImmutableMap.<String, TransformTranslator<?>>builder()
.put(PTransformTranslation.READ_TRANSFORM_URN, new ReadTranslator<>())
.put(PTransformTranslation.RESHUFFLE_URN, new ReshuffleTranslator<>())
.put(PTransformTranslation.REDISTRIBUTE_BY_KEY_URN, new RedistributeByKeyTranslator<>())
.put(PTransformTranslation.PAR_DO_TRANSFORM_URN, new ParDoBoundMultiTranslator<>())
.put(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>())
.put(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>())
Expand Down

0 comments on commit 8f1d3da

Please sign in to comment.