From 1dfd39bc6d01678babebf5d7c35f9d5e91dff0a2 Mon Sep 17 00:00:00 2001
From: Jeff Kinard <jeff@thekinards.com>
Date: Fri, 5 Apr 2024 18:27:55 -0400
Subject: [PATCH 01/18] Fix merging with missing environments. (#30864)

---
 sdks/python/apache_beam/runners/common.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py
index 630ed7910c8d..721bee4fceb5 100644
--- a/sdks/python/apache_beam/runners/common.py
+++ b/sdks/python/apache_beam/runners/common.py
@@ -1981,27 +1981,34 @@ def env_key(env):
             base_env_key(e)
             for e in environments.expand_anyof_environments(env)))
 
-  cannonical_enviornments = collections.defaultdict(list)
+  canonical_environments = collections.defaultdict(list)
   for env_id, env in pipeline_proto.components.environments.items():
-    cannonical_enviornments[env_key(env)].append(env_id)
+    canonical_environments[env_key(env)].append(env_id)
 
-  if len(cannonical_enviornments) == len(
-      pipeline_proto.components.environments):
+  if len(canonical_environments) == len(pipeline_proto.components.environments):
     # All environments are already sufficiently distinct.
     return pipeline_proto
 
   environment_remappings = {
       e: es[0]
-      for es in cannonical_enviornments.values() for e in es
+      for es in canonical_environments.values() for e in es
   }
 
   if not inplace:
     pipeline_proto = copy.copy(pipeline_proto)
 
   for t in pipeline_proto.components.transforms.values():
+    if t.environment_id not in pipeline_proto.components.environments:
+      # TODO(https://github.com/apache/beam/issues/30876): Remove this
+      #  workaround.
+      continue
     if t.environment_id:
       t.environment_id = environment_remappings[t.environment_id]
   for w in pipeline_proto.components.windowing_strategies.values():
+    if w.environment_id not in pipeline_proto.components.environments:
+      # TODO(https://github.com/apache/beam/issues/30876): Remove this
+      #  workaround.
+      continue
     if w.environment_id:
       w.environment_id = environment_remappings[w.environment_id]
   for e in set(pipeline_proto.components.environments.keys()) - set(

From 4452a6c8d9758c9cd3ea39d37aaf149927c633ce Mon Sep 17 00:00:00 2001
From: tvalentyn <tvalentyn@users.noreply.github.com>
Date: Fri, 5 Apr 2024 16:43:54 -0700
Subject: [PATCH 02/18] Adds a bound on pydantic to exclude incompatible
 versions in the compat test. (#30863)

* Add a lower bound on pydantic

* Add an upper bound on pydantic in the respective test suite.

* Restrict deps some more.
---
 sdks/python/tox.ini | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 86d12f31a18e..153b6acde8e7 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -102,7 +102,7 @@ passenv = GIT_*,BUILD_*,ghprb*,CHANGE_ID,BRANCH_NAME,JENKINS_*,CODECOV_*,GITHUB_
 extras = test,gcp,interactive,dataframe,aws
 commands =
   bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append"
-  
+
 [testenv:py38-lint]
 # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in pylint.
 setenv =
@@ -314,6 +314,8 @@ commands =
 [testenv:py{38,39}-tft-{113,114}]
 deps =
   113: tensorflow_transform>=1.13.0,<1.14.0
+  # Help pip resolve conflict with typing-extensions due to an old version of tensorflow https://github.com/apache/beam/issues/30852
+  113: pydantic<2.0
   114: tensorflow_transform>=1.14.0,<1.15.0
 commands =
   bash {toxinidir}/scripts/run_pytest.sh {envname} 'apache_beam/ml/transforms apache_beam/examples/snippets/transforms/elementwise/mltransform_test.py'
@@ -373,6 +375,8 @@ commands =
 [testenv:py{38,39,310}-tensorflow-212]
 deps =
   212: tensorflow>=2.12rc1,<2.13
+  # Help pip resolve conflict with typing-extensions for old version of TF https://github.com/apache/beam/issues/30852
+  212: pydantic<2.7
 extras = test,gcp
 commands =
   # Log tensorflow version for debugging
@@ -444,6 +448,8 @@ commands =
 deps =
   014: tensorflow-hub>=0.14.0,<0.15.0
   015: tensorflow-hub>=0.15.0,<0.16.0
+  # Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852
+  015: pydantic<2.7
   tensorflow-text # required to register ops for text embedding models.
 
 extras = test,gcp

From 3c9e9c80e3badd6af5d2b06d0b9523fabf332760 Mon Sep 17 00:00:00 2001
From: Hai Joey Tran <joey.tran@schrodinger.com>
Date: Fri, 5 Apr 2024 19:46:08 -0400
Subject: [PATCH 03/18] Add identity function as default to FlatMap (#30744)

* Add identity function as default to FlatMap

* Update sdks/python/apache_beam/transforms/core_test.py

Co-authored-by: tvalentyn <tvalentyn@users.noreply.github.com>

* update docstring

* yapf

* Add flatmap with no function example

* yapf

* Update flatmp_nofunction metadata

* Update website/www/site/content/en/documentation/transforms/python/elementwise/flatmap.md

Co-authored-by: tvalentyn <tvalentyn@users.noreply.github.com>

* Update sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_nofunction.py

Co-authored-by: tvalentyn <tvalentyn@users.noreply.github.com>

* isort

---------

Co-authored-by: tvalentyn <tvalentyn@users.noreply.github.com>
---
 .../elementwise/flatmap_nofunction.py         | 54 +++++++++++++++++++
 .../transforms/elementwise/flatmap_test.py    |  7 +++
 sdks/python/apache_beam/transforms/core.py    | 10 +++-
 .../apache_beam/transforms/core_test.py       | 11 ++++
 .../transforms/python/elementwise/flatmap.md  | 22 +++++---
 5 files changed, 95 insertions(+), 9 deletions(-)
 create mode 100644 sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_nofunction.py

diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_nofunction.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_nofunction.py
new file mode 100644
index 000000000000..868172699e29
--- /dev/null
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_nofunction.py
@@ -0,0 +1,54 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# pytype: skip-file
+# pylint:disable=line-too-long
+
+# beam-playground:
+#   name: FlatMapNoFunction
+#   description: Demonstration of FlatMap transform usage without a function.
+#   multifile: false
+#   default_example: false
+#   context_line: 44
+#   categories:
+#     - Core Transforms
+#   complexity: BASIC
+#   tags:
+#     - transforms
+#     - strings
+#     - map
+
+
+def flatmap_nofunction(test=None):
+  # [START flatmap_nofunction]
+  import apache_beam as beam
+
+  with beam.Pipeline() as pipeline:
+    plants = (
+        pipeline
+        | 'Gardening plants' >> beam.Create(
+            [['🍓Strawberry', '🥕Carrot', '🍆Eggplant'], ['🍅Tomato', '🥔Potato']])
+        | 'Split words' >> beam.FlatMap()
+        | beam.Map(print))
+    # [END flatmap_nofunction]
+    if test:
+      test(plants)
+
+
+if __name__ == '__main__':
+  flatmap_nofunction()
diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py
index 793c12466562..6dd02a208a74 100644
--- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py
+++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py
@@ -30,6 +30,7 @@
 from . import flatmap_generator
 from . import flatmap_lambda
 from . import flatmap_multiple_arguments
+from . import flatmap_nofunction
 from . import flatmap_side_inputs_dict
 from . import flatmap_side_inputs_iter
 from . import flatmap_side_inputs_singleton
@@ -65,6 +66,9 @@ def check_valid_plants(actual):
 @mock.patch(
     'apache_beam.examples.snippets.transforms.elementwise.flatmap_function.print',
     str)
+@mock.patch(
+    'apache_beam.examples.snippets.transforms.elementwise.flatmap_nofunction.print',
+    str)
 @mock.patch(
     'apache_beam.examples.snippets.transforms.elementwise.flatmap_lambda.print',
     str)
@@ -93,6 +97,9 @@ def test_flatmap_simple(self):
   def test_flatmap_function(self):
     flatmap_function.flatmap_function(check_plants)
 
+  def test_flatmap_nofunction(self):
+    flatmap_nofunction.flatmap_nofunction(check_plants)
+
   def test_flatmap_lambda(self):
     flatmap_lambda.flatmap_lambda(check_plants)
 
diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py
index 297915023c5c..cb1521176e4e 100644
--- a/sdks/python/apache_beam/transforms/core.py
+++ b/sdks/python/apache_beam/transforms/core.py
@@ -1899,13 +1899,19 @@ def to_runner_api(self, unused_context):
     return beam_runner_api_pb2.FunctionSpec(urn=self._urn)
 
 
-def FlatMap(fn, *args, **kwargs):  # pylint: disable=invalid-name
+def identity(x: T) -> T:
+  return x
+
+
+def FlatMap(fn=identity, *args, **kwargs):  # pylint: disable=invalid-name
   """:func:`FlatMap` is like :class:`ParDo` except it takes a callable to
   specify the transformation.
 
   The callable must return an iterable for each element of the input
   :class:`~apache_beam.pvalue.PCollection`. The elements of these iterables will
-  be flattened into the output :class:`~apache_beam.pvalue.PCollection`.
+  be flattened into the output :class:`~apache_beam.pvalue.PCollection`. If
+  no callable is given, then all elements of the input PCollection must already
+  be iterables themselves and will be flattened into the output PCollection.
 
   Args:
     fn (callable): a callable object.
diff --git a/sdks/python/apache_beam/transforms/core_test.py b/sdks/python/apache_beam/transforms/core_test.py
index 4fbeaa2ee97a..b0f54b8bb66d 100644
--- a/sdks/python/apache_beam/transforms/core_test.py
+++ b/sdks/python/apache_beam/transforms/core_test.py
@@ -170,6 +170,17 @@ def test_flatten_mismatched_windows(self):
       _ = (source1, source2, source3) | "flatten" >> beam.Flatten()
 
 
+class FlatMapTest(unittest.TestCase):
+  def test_default(self):
+
+    with beam.Pipeline() as pipeline:
+      letters = (
+          pipeline
+          | beam.Create(['abc', 'def'], reshuffle=False)
+          | beam.FlatMap())
+      assert_that(letters, equal_to(['a', 'b', 'c', 'd', 'e', 'f']))
+
+
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()
diff --git a/website/www/site/content/en/documentation/transforms/python/elementwise/flatmap.md b/website/www/site/content/en/documentation/transforms/python/elementwise/flatmap.md
index 0a4f5e7fae07..3b0164193b1a 100644
--- a/website/www/site/content/en/documentation/transforms/python/elementwise/flatmap.md
+++ b/website/www/site/content/en/documentation/transforms/python/elementwise/flatmap.md
@@ -49,7 +49,15 @@ We define a function `split_words` which splits an input `str` element using the
 {{< playground_snippet language="py" path="SDK_PYTHON_FlatMapFunction" show="flatmap_function" >}}
 {{< /playground >}}
 
-### Example 3: FlatMap with a lambda function
+### Example 3: FlatMap without a function
+
+A common use case of `FlatMap` is to flatten a `PCollection` of iterables into a `PCollection` of elements. To do that, don't specify the function argument to `FlatMap`, which uses the identity mapping function.
+
+{{< playground height="700px" >}}
+{{< playground_snippet language="py" path="SDK_PYTHON_FlatMapNoFunction" show="flatmap_no_function" >}}
+{{< /playground >}}
+
+### Example 4: FlatMap with a lambda function
 
 For this example, we want to flatten a `PCollection` of lists of `str`s into a `PCollection` of `str`s.
 Each input element is already an `iterable`, where each element is what we want in the resulting `PCollection`.
@@ -59,7 +67,7 @@ We use a lambda function that returns the same input element it received.
 {{< playground_snippet language="py" path="SDK_PYTHON_FlatMapLambda" show="flatmap_lambda" >}}
 {{< /playground >}}
 
-### Example 4: FlatMap with a generator
+### Example 5: FlatMap with a generator
 
 For this example, we want to flatten a `PCollection` of lists of `str`s into a `PCollection` of `str`s.
 We use a generator to iterate over the input list and yield each of the elements.
@@ -69,7 +77,7 @@ Each yielded result in the generator is an element in the resulting `PCollection
 {{< playground_snippet language="py" path="SDK_PYTHON_FlatMapGenerator" show="flatmap_generator" >}}
 {{< /playground >}}
 
-### Example 5: FlatMapTuple for key-value pairs
+### Example 6: FlatMapTuple for key-value pairs
 
 If your `PCollection` consists of `(key, value)` pairs,
 you can use `FlatMapTuple` to unpack them into different function arguments.
@@ -78,7 +86,7 @@ you can use `FlatMapTuple` to unpack them into different function arguments.
 {{< playground_snippet language="py" path="SDK_PYTHON_FlatMapTuple" show="flatmap_tuple" >}}
 {{< /playground >}}
 
-### Example 6: FlatMap with multiple arguments
+### Example 7: FlatMap with multiple arguments
 
 You can pass functions with multiple arguments to `FlatMap`.
 They are passed as additional positional arguments or keyword arguments to the function.
@@ -89,7 +97,7 @@ In this example, `split_words` takes `text` and `delimiter` as arguments.
 {{< playground_snippet language="py" path="SDK_PYTHON_FlatMapMultipleArguments" show="flatmap_multiple_arguments" >}}
 {{< /playground >}}
 
-### Example 7: FlatMap with side inputs as singletons
+### Example 8: FlatMap with side inputs as singletons
 
 If the `PCollection` has a single value, such as the average from another computation,
 passing the `PCollection` as a *singleton* accesses that value.
@@ -101,7 +109,7 @@ We then use that value as the delimiter for the `str.split` method.
 {{< playground_snippet language="py" path="SDK_PYTHON_FlatMapSideInputSingleton" show="flatmap_side_inputs_singleton" >}}
 {{< /playground >}}
 
-### Example 8: FlatMap with side inputs as iterators
+### Example 9: FlatMap with side inputs as iterators
 
 If the `PCollection` has multiple values, pass the `PCollection` as an *iterator*.
 This accesses elements lazily as they are needed,
@@ -114,7 +122,7 @@ so it is possible to iterate over large `PCollection`s that won't fit into memor
 > **Note**: You can pass the `PCollection` as a *list* with `beam.pvalue.AsList(pcollection)`,
 > but this requires that all the elements fit into memory.
 
-### Example 9: FlatMap with side inputs as dictionaries
+### Example 10: FlatMap with side inputs as dictionaries
 
 If a `PCollection` is small enough to fit into memory, then that `PCollection` can be passed as a *dictionary*.
 Each element must be a `(key, value)` pair.

From 54673996c9bf2ee076b04833bbae2729d6cebbaf Mon Sep 17 00:00:00 2001
From: Ahmed Abualsaud <65791736+ahmedabu98@users.noreply.github.com>
Date: Mon, 8 Apr 2024 06:55:17 -0400
Subject: [PATCH 04/18] [Java] ManagedIO  (#30808)

* managed api for java

* yaml utils
---
 build.gradle.kts                              |   1 +
 sdks/java/core/build.gradle                   |   1 +
 .../beam/sdk/schemas/utils/YamlUtils.java     | 171 +++++++++++++
 .../apache/beam/sdk/util/YamlUtilsTest.java   | 228 ++++++++++++++++++
 sdks/java/managed/build.gradle                |  37 +++
 .../org/apache/beam/sdk/managed/Managed.java  | 195 +++++++++++++++
 .../ManagedSchemaTransformProvider.java       | 183 ++++++++++++++
 .../apache/beam/sdk/managed/package-info.java |  20 ++
 .../ManagedSchemaTransformProviderTest.java   | 103 ++++++++
 .../apache/beam/sdk/managed/ManagedTest.java  | 114 +++++++++
 .../managed/TestSchemaTransformProvider.java  |  98 ++++++++
 .../src/test/resources/test_config.yaml       |  21 ++
 settings.gradle.kts                           |   2 +
 13 files changed, 1174 insertions(+)
 create mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java
 create mode 100644 sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java
 create mode 100644 sdks/java/managed/build.gradle
 create mode 100644 sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/Managed.java
 create mode 100644 sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProvider.java
 create mode 100644 sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/package-info.java
 create mode 100644 sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProviderTest.java
 create mode 100644 sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedTest.java
 create mode 100644 sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/TestSchemaTransformProvider.java
 create mode 100644 sdks/java/managed/src/test/resources/test_config.yaml

diff --git a/build.gradle.kts b/build.gradle.kts
index ded692677b53..9c42ffdc8cea 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -303,6 +303,7 @@ tasks.register("javaPreCommit") {
   dependsOn(":sdks:java:io:synthetic:build")
   dependsOn(":sdks:java:io:xml:build")
   dependsOn(":sdks:java:javadoc:allJavadoc")
+  dependsOn(":sdks:java:managed:build")
   dependsOn(":sdks:java:testing:expansion-service:build")
   dependsOn(":sdks:java:testing:jpms-tests:build")
   dependsOn(":sdks:java:testing:load-tests:build")
diff --git a/sdks/java/core/build.gradle b/sdks/java/core/build.gradle
index 438a3fb1806c..5a47cb5237ea 100644
--- a/sdks/java/core/build.gradle
+++ b/sdks/java/core/build.gradle
@@ -98,6 +98,7 @@ dependencies {
   permitUnusedDeclared enforcedPlatform(library.java.google_cloud_platform_libraries_bom)
   provided library.java.json_org
   implementation library.java.everit_json_schema
+  implementation "org.yaml:snakeyaml:2.0"
   shadowTest library.java.everit_json_schema
   provided library.java.junit
   testImplementation "com.github.stefanbirkner:system-rules:1.19.0"
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java
new file mode 100644
index 000000000000..5c05b2bed396
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/utils/YamlUtils.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.schemas.utils;
+
+import static org.apache.beam.sdk.values.Row.toRow;
+
+import java.math.BigDecimal;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.Field;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CaseFormat;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.yaml.snakeyaml.Yaml;
+
+public class YamlUtils {
+  private static final Map<Schema.TypeName, Function<String, @Nullable Object>> YAML_VALUE_PARSERS =
+      ImmutableMap
+          .<Schema.TypeName,
+              Function<String, @org.checkerframework.checker.nullness.qual.Nullable Object>>
+              builder()
+          .put(Schema.TypeName.BYTE, Byte::valueOf)
+          .put(Schema.TypeName.INT16, Short::valueOf)
+          .put(Schema.TypeName.INT32, Integer::valueOf)
+          .put(Schema.TypeName.INT64, Long::valueOf)
+          .put(Schema.TypeName.FLOAT, Float::valueOf)
+          .put(Schema.TypeName.DOUBLE, Double::valueOf)
+          .put(Schema.TypeName.DECIMAL, BigDecimal::new)
+          .put(Schema.TypeName.BOOLEAN, Boolean::valueOf)
+          .put(Schema.TypeName.STRING, str -> str)
+          .put(Schema.TypeName.BYTES, str -> BaseEncoding.base64().decode(str))
+          .build();
+
+  public static Row toBeamRow(@Nullable String yamlString, Schema schema) {
+    return toBeamRow(yamlString, schema, false);
+  }
+
+  public static Row toBeamRow(
+      @Nullable String yamlString, Schema schema, boolean convertNamesToCamelCase) {
+    if (yamlString == null || yamlString.isEmpty()) {
+      List<Field> requiredFields =
+          schema.getFields().stream()
+              .filter(field -> !field.getType().getNullable())
+              .collect(Collectors.toList());
+      if (requiredFields.isEmpty()) {
+        return Row.nullRow(schema);
+      } else {
+        throw new IllegalArgumentException(
+            String.format(
+                "Received an empty YAML string, but output schema contains required fields: %s",
+                requiredFields));
+      }
+    }
+    Yaml yaml = new Yaml();
+    Object yamlMap = yaml.load(yamlString);
+
+    Preconditions.checkArgument(
+        yamlMap instanceof Map,
+        "Expected a YAML mapping but got type '%s' instead.",
+        Preconditions.checkNotNull(yamlMap).getClass());
+
+    return toBeamRow(
+        (Map<String, Object>) Preconditions.checkNotNull(yamlMap), schema, convertNamesToCamelCase);
+  }
+
+  private static @Nullable Object toBeamValue(
+      Field field, @Nullable Object yamlValue, boolean convertNamesToCamelCase) {
+    FieldType fieldType = field.getType();
+
+    if (yamlValue == null) {
+      if (fieldType.getNullable()) {
+        return null;
+      } else {
+        throw new IllegalArgumentException(
+            "Received null value for non-nullable field \"" + field.getName() + "\"");
+      }
+    }
+
+    if (yamlValue instanceof String
+        || yamlValue instanceof Number
+        || yamlValue instanceof Boolean) {
+      String yamlStringValue = yamlValue.toString();
+      if (YAML_VALUE_PARSERS.containsKey(fieldType.getTypeName())) {
+        return YAML_VALUE_PARSERS.get(fieldType.getTypeName()).apply(yamlStringValue);
+      }
+    }
+
+    if (yamlValue instanceof byte[] && fieldType.getTypeName() == Schema.TypeName.BYTES) {
+      return yamlValue;
+    }
+
+    if (yamlValue instanceof List) {
+      FieldType innerType =
+          Preconditions.checkNotNull(
+              fieldType.getCollectionElementType(),
+              "Cannot convert YAML type '%s` to `%s` because the YAML value is a List, but the output schema field does not define a collection type.",
+              yamlValue.getClass(),
+              fieldType);
+      return ((List<Object>) yamlValue)
+          .stream()
+              .map(
+                  v ->
+                      Preconditions.checkNotNull(
+                          toBeamValue(field.withType(innerType), v, convertNamesToCamelCase)))
+              .collect(Collectors.toList());
+    }
+
+    if (yamlValue instanceof Map) {
+      if (fieldType.getTypeName() == Schema.TypeName.ROW) {
+        Schema nestedSchema =
+            Preconditions.checkNotNull(
+                fieldType.getRowSchema(),
+                "Received a YAML '%s' type, but output schema field '%s' does not define a Row Schema",
+                yamlValue.getClass(),
+                fieldType);
+        return toBeamRow((Map<String, Object>) yamlValue, nestedSchema, convertNamesToCamelCase);
+      } else if (fieldType.getTypeName() == Schema.TypeName.MAP) {
+        return yamlValue;
+      }
+    }
+
+    throw new UnsupportedOperationException(
+        String.format(
+            "Converting YAML type '%s' to '%s' is not supported", yamlValue.getClass(), fieldType));
+  }
+
+  @SuppressWarnings("nullness")
+  public static Row toBeamRow(Map<String, Object> yamlMap, Schema rowSchema, boolean toCamelCase) {
+    return rowSchema.getFields().stream()
+        .map(
+            field ->
+                toBeamValue(
+                    field,
+                    yamlMap.get(maybeGetSnakeCase(field.getName(), toCamelCase)),
+                    toCamelCase))
+        .collect(toRow(rowSchema));
+  }
+
+  private static String maybeGetSnakeCase(String str, boolean getSnakeCase) {
+    return getSnakeCase ? CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, str) : str;
+  }
+
+  public static String yamlStringFromMap(@Nullable Map<String, Object> map) {
+    if (map == null || map.isEmpty()) {
+      return "";
+    }
+    return new Yaml().dumpAsMap(map);
+  }
+}
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java
new file mode 100644
index 000000000000..6e6984dde3a6
--- /dev/null
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/util/YamlUtilsTest.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.util;
+
+import static org.junit.Assert.assertEquals;
+
+import java.math.BigDecimal;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.utils.YamlUtils;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.io.BaseEncoding;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class YamlUtilsTest {
+  @Rule public transient ExpectedException thrown = ExpectedException.none();
+
+  public String makeNested(String input) {
+    return Arrays.stream(input.split("\n"))
+        .map(str -> "  " + str)
+        .collect(Collectors.joining("\n"));
+  }
+
+  @Test
+  public void testEmptyYamlString() {
+    Schema schema = Schema.builder().build();
+
+    assertEquals(Row.nullRow(schema), YamlUtils.toBeamRow("", schema));
+  }
+
+  @Test
+  public void testInvalidEmptyYamlWithNonEmptySchema() {
+    Schema schema = Schema.builder().addStringField("dummy").build();
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(
+        "Received an empty YAML string, but output schema contains required fields");
+    thrown.expectMessage("dummy");
+
+    YamlUtils.toBeamRow("", schema);
+  }
+
+  @Test
+  public void testNullableValues() {
+    String yamlString = "nullable_string:\n" + "nullable_integer:\n" + "nullable_boolean:\n";
+    Schema schema =
+        Schema.builder()
+            .addNullableStringField("nullable_string")
+            .addNullableInt32Field("nullable_integer")
+            .addNullableBooleanField("nullable_boolean")
+            .build();
+
+    assertEquals(Row.nullRow(schema), YamlUtils.toBeamRow(yamlString, schema));
+  }
+
+  @Test
+  public void testMissingNullableValues() {
+    String yamlString = "nullable_string:";
+    Schema schema =
+        Schema.builder()
+            .addNullableStringField("nullable_string")
+            .addNullableInt32Field("nullable_integer")
+            .addNullableBooleanField("nullable_boolean")
+            .build();
+
+    assertEquals(Row.nullRow(schema), YamlUtils.toBeamRow(yamlString, schema));
+  }
+
+  @Test
+  public void testInvalidNullableValues() {
+    String yamlString = "nullable_string:\n" + "integer:";
+    Schema schema =
+        Schema.builder().addNullableStringField("nullable_string").addInt32Field("integer").build();
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Received null value for non-nullable field \"integer\"");
+    YamlUtils.toBeamRow(yamlString, schema);
+  }
+
+  @Test
+  public void testInvalidMissingRequiredValues() {
+    String yamlString = "nullable_string:";
+    Schema schema =
+        Schema.builder().addNullableStringField("nullable_string").addInt32Field("integer").build();
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Received null value for non-nullable field \"integer\"");
+
+    YamlUtils.toBeamRow(yamlString, schema);
+  }
+
+  @Test
+  public void testExtraFieldsAreIgnored() {
+    String yamlString = "field1: val1\n" + "field2: val2";
+    Schema schema = Schema.builder().addStringField("field1").build();
+    Row expectedRow = Row.withSchema(schema).withFieldValue("field1", "val1").build();
+
+    assertEquals(expectedRow, YamlUtils.toBeamRow(yamlString, schema));
+  }
+
+  @Test
+  public void testInvalidTopLevelArray() {
+    String invalidYaml = "- top_level_list" + "- another_list";
+    Schema schema = Schema.builder().build();
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Expected a YAML mapping");
+    YamlUtils.toBeamRow(invalidYaml, schema);
+  }
+
+  private static final Schema FLAT_SCHEMA =
+      Schema.builder()
+          .addByteField("byte_field")
+          .addInt16Field("int16_field")
+          .addInt32Field("int32_field")
+          .addInt64Field("int64_field")
+          .addFloatField("float_field")
+          .addDoubleField("double_field")
+          .addDecimalField("decimal_field")
+          .addBooleanField("boolean_field")
+          .addStringField("string_field")
+          .addByteArrayField("bytes_field")
+          .build();
+
+  private static final Row FLAT_ROW =
+      Row.withSchema(FLAT_SCHEMA)
+          .withFieldValue("byte_field", Byte.valueOf("123"))
+          .withFieldValue("int16_field", Short.valueOf("16"))
+          .withFieldValue("int32_field", 32)
+          .withFieldValue("int64_field", 64L)
+          .withFieldValue("float_field", 123.456F)
+          .withFieldValue("double_field", 456.789)
+          .withFieldValue("decimal_field", BigDecimal.valueOf(789.123))
+          .withFieldValue("boolean_field", true)
+          .withFieldValue("string_field", "some string")
+          .withFieldValue("bytes_field", BaseEncoding.base64().decode("abc"))
+          .build();
+
+  private static final String FLAT_YAML =
+      "byte_field: 123\n"
+          + "int16_field: 16\n"
+          + "int32_field: 32\n"
+          + "int64_field: 64\n"
+          + "float_field: 123.456\n"
+          + "double_field: 456.789\n"
+          + "decimal_field: 789.123\n"
+          + "boolean_field: true\n"
+          + "string_field: some string\n"
+          + "bytes_field: abc";
+
+  @Test
+  public void testAllTypesFlat() {
+    assertEquals(FLAT_ROW, YamlUtils.toBeamRow(FLAT_YAML, FLAT_SCHEMA));
+  }
+
+  @Test
+  public void testAllTypesNested() {
+    String nestedFlatTypes = makeNested(FLAT_YAML);
+    String topLevelYaml = "top_string: abc\n" + "nested: \n" + nestedFlatTypes;
+
+    Schema schema =
+        Schema.builder().addStringField("top_string").addRowField("nested", FLAT_SCHEMA).build();
+    Row expectedRow =
+        Row.withSchema(schema)
+            .withFieldValue("top_string", "abc")
+            .withFieldValue("nested", FLAT_ROW)
+            .build();
+
+    assertEquals(expectedRow, YamlUtils.toBeamRow(topLevelYaml, schema));
+  }
+
+  private static final String INT_ARRAY_YAML =
+      "arr:\n" + "  - 1\n" + "  - 2\n" + "  - 3\n" + "  - 4\n" + "  - 5\n";
+
+  private static final Schema INT_ARRAY_SCHEMA =
+      Schema.builder().addArrayField("arr", Schema.FieldType.INT32).build();
+
+  private static final Row INT_ARRAY_ROW =
+      Row.withSchema(INT_ARRAY_SCHEMA)
+          .withFieldValue("arr", IntStream.range(1, 6).boxed().collect(Collectors.toList()))
+          .build();
+
+  @Test
+  public void testArray() {
+    assertEquals(INT_ARRAY_ROW, YamlUtils.toBeamRow(INT_ARRAY_YAML, INT_ARRAY_SCHEMA));
+  }
+
+  @Test
+  public void testNestedArray() {
+    String nestedArray = makeNested(INT_ARRAY_YAML);
+    String yamlString = "str_field: some string\n" + "nested: \n" + nestedArray;
+
+    Schema schema =
+        Schema.builder()
+            .addStringField("str_field")
+            .addRowField("nested", INT_ARRAY_SCHEMA)
+            .build();
+
+    Row expectedRow =
+        Row.withSchema(schema)
+            .withFieldValue("str_field", "some string")
+            .withFieldValue("nested", INT_ARRAY_ROW)
+            .build();
+
+    assertEquals(expectedRow, YamlUtils.toBeamRow(yamlString, schema));
+  }
+}
diff --git a/sdks/java/managed/build.gradle b/sdks/java/managed/build.gradle
new file mode 100644
index 000000000000..88e537d66f8c
--- /dev/null
+++ b/sdks/java/managed/build.gradle
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+apply plugin: 'org.apache.beam.module'
+applyJavaNature(
+        automaticModuleName: 'org.apache.beam.sdk.managed',
+)
+
+
+description = "Apache Beam :: SDKs :: Java :: Managed"
+ext.summary = """Library that provides managed IOs."""
+
+
+dependencies {
+    implementation project(path: ":sdks:java:core", configuration: "shadow")
+    implementation library.java.vendored_guava_32_1_2_jre
+//    implementation library.java.vendored_grpc_1_60_1
+
+    testImplementation library.java.junit
+    testRuntimeOnly "org.yaml:snakeyaml:2.0"
+    testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
+}
diff --git a/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/Managed.java b/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/Managed.java
new file mode 100644
index 000000000000..b2b010b1e434
--- /dev/null
+++ b/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/Managed.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.managed;
+
+import com.google.auto.value.AutoValue;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransform;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider;
+import org.apache.beam.sdk.schemas.utils.YamlUtils;
+import org.apache.beam.sdk.values.PCollectionRowTuple;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+
+/**
+ * Top-level {@link org.apache.beam.sdk.transforms.PTransform}s that build and instantiate turnkey
+ * transforms.
+ *
+ * <h3>Available transforms</h3>
+ *
+ * <p>This API currently supports two operations: {@link Read} and {@link Write}. Each one
+ * enumerates the available transforms in a {@code TRANSFORMS} map.
+ *
+ * <h3>Building a Managed turnkey transform</h3>
+ *
+ * <p>Turnkey transforms are represented as {@link SchemaTransform}s, which means each one has a
+ * defined configuration. A given transform can be built with a {@code Map<String, Object>} that
+ * specifies arguments using like so:
+ *
+ * <pre>{@code
+ * PCollectionRowTuple output = PCollectionRowTuple.empty(pipeline).apply(
+ *       Managed.read(ICEBERG)
+ *           .withConfig(ImmutableMap.<String, Map>.builder()
+ *               .put("foo", "abc")
+ *               .put("bar", 123)
+ *               .build()));
+ * }</pre>
+ *
+ * <p>Instead of specifying configuration arguments directly in the code, one can provide the
+ * location to a YAML file that contains this information. Say we have the following YAML file:
+ *
+ * <pre>{@code
+ * foo: "abc"
+ * bar: 123
+ * }</pre>
+ *
+ * <p>The file's path can be passed in to the Managed API like so:
+ *
+ * <pre>{@code
+ * PCollectionRowTuple input = PCollectionRowTuple.of("input", pipeline.apply(Create.of(...)))
+ *
+ * PCollectionRowTuple output = input.apply(
+ *     Managed.write(ICEBERG)
+ *         .withConfigUrl(<config path>));
+ * }</pre>
+ */
+public class Managed {
+
+  // TODO: Dynamically generate a list of supported transforms
+  public static final String ICEBERG = "iceberg";
+
+  public static final Map<String, String> READ_TRANSFORMS =
+      ImmutableMap.<String, String>builder()
+          .put(ICEBERG, "beam:schematransform:org.apache.beam:iceberg_read:v1")
+          .build();
+  public static final Map<String, String> WRITE_TRANSFORMS =
+      ImmutableMap.<String, String>builder()
+          .put(ICEBERG, "beam:schematransform:org.apache.beam:iceberg_write:v1")
+          .build();
+
+  /**
+   * Instantiates a {@link Managed.Read} transform for the specified source. The supported managed
+   * sources are:
+   *
+   * <ul>
+   *   <li>{@link Managed#ICEBERG} : Read from Apache Iceberg
+   * </ul>
+   */
+  public static ManagedTransform read(String source) {
+
+    return new AutoValue_Managed_ManagedTransform.Builder()
+        .setIdentifier(
+            Preconditions.checkNotNull(
+                READ_TRANSFORMS.get(source.toLowerCase()),
+                "An unsupported source was specified: '%s'. Please specify one of the following sources: %s",
+                source,
+                READ_TRANSFORMS.keySet()))
+        .setSupportedIdentifiers(new ArrayList<>(READ_TRANSFORMS.values()))
+        .build();
+  }
+
+  /**
+   * Instantiates a {@link Managed.Write} transform for the specified sink. The supported managed
+   * sinks are:
+   *
+   * <ul>
+   *   <li>{@link Managed#ICEBERG} : Write to Apache Iceberg
+   * </ul>
+   */
+  public static ManagedTransform write(String sink) {
+    return new AutoValue_Managed_ManagedTransform.Builder()
+        .setIdentifier(
+            Preconditions.checkNotNull(
+                WRITE_TRANSFORMS.get(sink.toLowerCase()),
+                "An unsupported sink was specified: '%s'. Please specify one of the following sinks: %s",
+                sink,
+                WRITE_TRANSFORMS.keySet()))
+        .setSupportedIdentifiers(new ArrayList<>(WRITE_TRANSFORMS.values()))
+        .build();
+  }
+
+  @AutoValue
+  public abstract static class ManagedTransform extends SchemaTransform {
+    abstract String getIdentifier();
+
+    abstract @Nullable String getConfig();
+
+    abstract @Nullable String getConfigUrl();
+
+    @VisibleForTesting
+    abstract List<String> getSupportedIdentifiers();
+
+    abstract Builder toBuilder();
+
+    @AutoValue.Builder
+    abstract static class Builder {
+      abstract Builder setIdentifier(String identifier);
+
+      abstract Builder setConfig(@Nullable String config);
+
+      abstract Builder setConfigUrl(@Nullable String configUrl);
+
+      @VisibleForTesting
+      abstract Builder setSupportedIdentifiers(List<String> supportedIdentifiers);
+
+      abstract ManagedTransform build();
+    }
+
+    /**
+     * Use the input Map of configuration arguments to build and instantiate the underlying
+     * transform. The map can ignore nullable parameters, but needs to include all required
+     * parameters. Check the underlying transform's schema ({@link
+     * SchemaTransformProvider#configurationSchema()}) to see which parameters are available.
+     */
+    public ManagedTransform withConfig(Map<String, Object> config) {
+      return toBuilder().setConfig(YamlUtils.yamlStringFromMap(config)).build();
+    }
+
+    /**
+     * Like {@link #withConfig(Map)}, but instead extracts the configuration arguments from a
+     * specified YAML file location.
+     */
+    public ManagedTransform withConfigUrl(String configUrl) {
+      return toBuilder().setConfigUrl(configUrl).build();
+    }
+
+    @VisibleForTesting
+    ManagedTransform withSupportedIdentifiers(List<String> supportedIdentifiers) {
+      return toBuilder().setSupportedIdentifiers(supportedIdentifiers).build();
+    }
+
+    @Override
+    public PCollectionRowTuple expand(PCollectionRowTuple input) {
+      ManagedSchemaTransformProvider.ManagedConfig managedConfig =
+          ManagedSchemaTransformProvider.ManagedConfig.builder()
+              .setTransformIdentifier(getIdentifier())
+              .setConfig(getConfig())
+              .setConfigUrl(getConfigUrl())
+              .build();
+
+      SchemaTransform underlyingTransform =
+          new ManagedSchemaTransformProvider(getSupportedIdentifiers()).from(managedConfig);
+
+      return input.apply(underlyingTransform);
+    }
+  }
+}
diff --git a/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProvider.java b/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProvider.java
new file mode 100644
index 000000000000..1ee2b11a90ff
--- /dev/null
+++ b/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProvider.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.managed;
+
+import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
+
+import com.google.auto.service.AutoService;
+import com.google.auto.value.AutoValue;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.ServiceLoader;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.io.FileSystems;
+import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.schemas.annotations.SchemaFieldDescription;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransform;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider;
+import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider;
+import org.apache.beam.sdk.schemas.utils.YamlUtils;
+import org.apache.beam.sdk.values.PCollectionRowTuple;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings;
+
+@AutoService(SchemaTransformProvider.class)
+public class ManagedSchemaTransformProvider
+    extends TypedSchemaTransformProvider<ManagedSchemaTransformProvider.ManagedConfig> {
+
+  @Override
+  public String identifier() {
+    return "beam:schematransform:org.apache.beam:managed:v1";
+  }
+
+  private final Map<String, SchemaTransformProvider> schemaTransformProviders = new HashMap<>();
+
+  public ManagedSchemaTransformProvider() {}
+
+  ManagedSchemaTransformProvider(Collection<String> supportedIdentifiers) {
+    try {
+      for (SchemaTransformProvider schemaTransformProvider :
+          ServiceLoader.load(SchemaTransformProvider.class)) {
+        if (schemaTransformProviders.containsKey(schemaTransformProvider.identifier())) {
+          throw new IllegalArgumentException(
+              "Found multiple SchemaTransformProvider implementations with the same identifier "
+                  + schemaTransformProvider.identifier());
+        }
+        schemaTransformProviders.put(schemaTransformProvider.identifier(), schemaTransformProvider);
+      }
+    } catch (Exception e) {
+      throw new RuntimeException(e.getMessage());
+    }
+
+    schemaTransformProviders.entrySet().removeIf(e -> !supportedIdentifiers.contains(e.getKey()));
+  }
+
+  @DefaultSchema(AutoValueSchema.class)
+  @AutoValue
+  @VisibleForTesting
+  abstract static class ManagedConfig {
+    public static Builder builder() {
+      return new AutoValue_ManagedSchemaTransformProvider_ManagedConfig.Builder();
+    }
+
+    @SchemaFieldDescription("Identifier of the underlying IO to instantiate.")
+    public abstract String getTransformIdentifier();
+
+    @SchemaFieldDescription("URL path to the YAML config file used to build the underlying IO.")
+    public abstract @Nullable String getConfigUrl();
+
+    @SchemaFieldDescription("YAML string config used to build the underlying IO.")
+    public abstract @Nullable String getConfig();
+
+    @AutoValue.Builder
+    public abstract static class Builder {
+      public abstract Builder setTransformIdentifier(String identifier);
+
+      public abstract Builder setConfigUrl(@Nullable String configUrl);
+
+      public abstract Builder setConfig(@Nullable String config);
+
+      public abstract ManagedConfig build();
+    }
+
+    protected void validate() {
+      boolean configExists = !Strings.isNullOrEmpty(getConfig());
+      boolean configUrlExists = !Strings.isNullOrEmpty(getConfigUrl());
+      checkArgument(
+          !(configExists && configUrlExists) && (configExists || configUrlExists),
+          "Please specify a config or a config URL, but not both.");
+    }
+  }
+
+  @Override
+  protected SchemaTransform from(ManagedConfig managedConfig) {
+    managedConfig.validate();
+    SchemaTransformProvider schemaTransformProvider =
+        Preconditions.checkNotNull(
+            schemaTransformProviders.get(managedConfig.getTransformIdentifier()),
+            "Could not find transform with identifier %s, or it may not be supported",
+            managedConfig.getTransformIdentifier());
+
+    // parse config before expansion to check if it matches underlying transform's config schema
+    Schema transformConfigSchema = schemaTransformProvider.configurationSchema();
+    Row transformConfig;
+    try {
+      transformConfig = getRowConfig(managedConfig, transformConfigSchema);
+    } catch (Exception e) {
+      throw new IllegalArgumentException(
+          String.format(
+              "Specified configuration does not align with the underlying transform's configuration schema [%s].",
+              transformConfigSchema),
+          e);
+    }
+
+    return new ManagedSchemaTransform(transformConfig, schemaTransformProvider);
+  }
+
+  private static class ManagedSchemaTransform extends SchemaTransform {
+    private final Row transformConfig;
+    private final SchemaTransformProvider underlyingTransformProvider;
+
+    ManagedSchemaTransform(
+        Row transformConfig, SchemaTransformProvider underlyingTransformProvider) {
+      this.transformConfig = transformConfig;
+      this.underlyingTransformProvider = underlyingTransformProvider;
+    }
+
+    @Override
+    public PCollectionRowTuple expand(PCollectionRowTuple input) {
+      SchemaTransform underlyingTransform = underlyingTransformProvider.from(transformConfig);
+
+      return input.apply(underlyingTransform);
+    }
+  }
+
+  @VisibleForTesting
+  static Row getRowConfig(ManagedConfig config, Schema transformSchema) {
+    String transformYamlConfig;
+    if (!Strings.isNullOrEmpty(config.getConfigUrl())) {
+      try {
+        MatchResult.Metadata fileMetaData =
+            FileSystems.matchSingleFileSpec(Preconditions.checkNotNull(config.getConfigUrl()));
+        ByteBuffer buffer = ByteBuffer.allocate((int) fileMetaData.sizeBytes());
+        FileSystems.open(fileMetaData.resourceId()).read(buffer);
+        transformYamlConfig = new String(buffer.array(), StandardCharsets.UTF_8);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    } else {
+      transformYamlConfig = config.getConfig();
+    }
+
+    return YamlUtils.toBeamRow(transformYamlConfig, transformSchema, true);
+  }
+
+  @VisibleForTesting
+  Map<String, SchemaTransformProvider> getAllProviders() {
+    return schemaTransformProviders;
+  }
+}
diff --git a/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/package-info.java b/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/package-info.java
new file mode 100644
index 000000000000..d129e4a7a225
--- /dev/null
+++ b/sdks/java/managed/src/main/java/org/apache/beam/sdk/managed/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Managed reads and writes. */
+package org.apache.beam.sdk.managed;
diff --git a/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProviderTest.java b/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProviderTest.java
new file mode 100644
index 000000000000..0c495d0d2c5c
--- /dev/null
+++ b/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedSchemaTransformProviderTest.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.managed;
+
+import static org.apache.beam.sdk.managed.ManagedSchemaTransformProvider.ManagedConfig;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.net.URISyntaxException;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.values.Row;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class ManagedSchemaTransformProviderTest {
+  @Rule public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testFailWhenNoConfigSpecified() {
+    ManagedSchemaTransformProvider.ManagedConfig config =
+        ManagedSchemaTransformProvider.ManagedConfig.builder()
+            .setTransformIdentifier("some identifier")
+            .build();
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("Please specify a config or a config URL, but not both");
+    config.validate();
+  }
+
+  @Test
+  public void testGetRowFromYamlConfig() {
+    String yamlString = "extra_string: abc\n" + "extra_integer: 123";
+    ManagedConfig config =
+        ManagedConfig.builder()
+            .setTransformIdentifier(TestSchemaTransformProvider.IDENTIFIER)
+            .setConfig(yamlString)
+            .build();
+    Schema configSchema = new TestSchemaTransformProvider().configurationSchema();
+    Row expectedRow =
+        Row.withSchema(configSchema)
+            .withFieldValue("extraString", "abc")
+            .withFieldValue("extraInteger", 123)
+            .build();
+    Row configRow =
+        ManagedSchemaTransformProvider.getRowConfig(
+            config, new TestSchemaTransformProvider().configurationSchema());
+
+    assertEquals(expectedRow, configRow);
+  }
+
+  @Test
+  public void testGetRowFromConfigUrl() throws URISyntaxException {
+    String yamlConfigPath =
+        Paths.get(getClass().getClassLoader().getResource("test_config.yaml").toURI())
+            .toFile()
+            .getAbsolutePath();
+    ManagedConfig config =
+        ManagedConfig.builder()
+            .setTransformIdentifier(TestSchemaTransformProvider.IDENTIFIER)
+            .setConfigUrl(yamlConfigPath)
+            .build();
+    Schema configSchema = new TestSchemaTransformProvider().configurationSchema();
+    Row expectedRow =
+        Row.withSchema(configSchema)
+            .withFieldValue("extraString", "abc")
+            .withFieldValue("extraInteger", 123)
+            .build();
+    Row configRow =
+        ManagedSchemaTransformProvider.getRowConfig(
+            config, new TestSchemaTransformProvider().configurationSchema());
+
+    assertEquals(expectedRow, configRow);
+  }
+
+  @Test
+  public void testDiscoverTestProvider() {
+    ManagedSchemaTransformProvider provider =
+        new ManagedSchemaTransformProvider(Arrays.asList(TestSchemaTransformProvider.IDENTIFIER));
+
+    assertTrue(provider.getAllProviders().containsKey(TestSchemaTransformProvider.IDENTIFIER));
+  }
+}
diff --git a/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedTest.java b/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedTest.java
new file mode 100644
index 000000000000..ceb71a06f33c
--- /dev/null
+++ b/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/ManagedTest.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.managed;
+
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionRowTuple;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class ManagedTest {
+  @Rule public transient ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testInvalidTransform() {
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("An unsupported source was specified");
+    Managed.read("nonexistent-source");
+
+    thrown.expect(NullPointerException.class);
+    thrown.expectMessage("An unsupported sink was specified");
+    Managed.write("nonexistent-sink");
+  }
+
+  @Rule public TestPipeline pipeline = TestPipeline.create();
+
+  private static final Schema SCHEMA =
+      Schema.builder().addStringField("str").addInt32Field("int").build();
+  private static final List<Row> ROWS =
+      Arrays.asList(
+          Row.withSchema(SCHEMA).withFieldValue("str", "a").withFieldValue("int", 1).build(),
+          Row.withSchema(SCHEMA).withFieldValue("str", "b").withFieldValue("int", 2).build(),
+          Row.withSchema(SCHEMA).withFieldValue("str", "c").withFieldValue("int", 3).build());
+
+  public void runTestProviderTest(Managed.ManagedTransform writeOp) {
+    PCollection<Row> rows =
+        PCollectionRowTuple.of("input", pipeline.apply(Create.of(ROWS)).setRowSchema(SCHEMA))
+            .apply(writeOp)
+            .get("output");
+
+    Schema outputSchema = rows.getSchema();
+    PAssert.that(rows)
+        .containsInAnyOrder(
+            ROWS.stream()
+                .map(
+                    row ->
+                        Row.withSchema(outputSchema)
+                            .addValues(row.getValues())
+                            .addValue("abc")
+                            .addValue(123)
+                            .build())
+                .collect(Collectors.toList()));
+    pipeline.run();
+  }
+
+  @Test
+  public void testManagedTestProviderWithConfigMap() {
+    Managed.ManagedTransform writeOp =
+        Managed.write(Managed.ICEBERG)
+            .toBuilder()
+            .setIdentifier(TestSchemaTransformProvider.IDENTIFIER)
+            .build()
+            .withSupportedIdentifiers(Arrays.asList(TestSchemaTransformProvider.IDENTIFIER))
+            .withConfig(ImmutableMap.of("extra_string", "abc", "extra_integer", 123));
+
+    runTestProviderTest(writeOp);
+  }
+
+  @Test
+  public void testManagedTestProviderWithConfigFile() throws Exception {
+    String yamlConfigPath =
+        Paths.get(getClass().getClassLoader().getResource("test_config.yaml").toURI())
+            .toFile()
+            .getAbsolutePath();
+
+    Managed.ManagedTransform writeOp =
+        Managed.write(Managed.ICEBERG)
+            .toBuilder()
+            .setIdentifier(TestSchemaTransformProvider.IDENTIFIER)
+            .build()
+            .withSupportedIdentifiers(Arrays.asList(TestSchemaTransformProvider.IDENTIFIER))
+            .withConfigUrl(yamlConfigPath);
+
+    runTestProviderTest(writeOp);
+  }
+}
diff --git a/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/TestSchemaTransformProvider.java b/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/TestSchemaTransformProvider.java
new file mode 100644
index 000000000000..136d98d468d0
--- /dev/null
+++ b/sdks/java/managed/src/test/java/org/apache/beam/sdk/managed/TestSchemaTransformProvider.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.managed;
+
+import com.google.auto.service.AutoService;
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.schemas.annotations.SchemaFieldDescription;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransform;
+import org.apache.beam.sdk.schemas.transforms.SchemaTransformProvider;
+import org.apache.beam.sdk.schemas.transforms.TypedSchemaTransformProvider;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionRowTuple;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptors;
+
+@AutoService(SchemaTransformProvider.class)
+public class TestSchemaTransformProvider
+    extends TypedSchemaTransformProvider<TestSchemaTransformProvider.Config> {
+  static final String IDENTIFIER = "beam:schematransform:org.apache.beam:test_transform:v1";
+
+  @DefaultSchema(AutoValueSchema.class)
+  @AutoValue
+  public abstract static class Config {
+    public static Builder builder() {
+      return new AutoValue_TestSchemaTransformProvider_Config.Builder();
+    }
+
+    @SchemaFieldDescription("String to add to each row element.")
+    public abstract String getExtraString();
+
+    @SchemaFieldDescription("Integer to add to each row element.")
+    public abstract Integer getExtraInteger();
+
+    @AutoValue.Builder
+    public abstract static class Builder {
+      public abstract Builder setExtraString(String extraString);
+
+      public abstract Builder setExtraInteger(Integer extraInteger);
+
+      public abstract Config build();
+    }
+  }
+
+  @Override
+  public SchemaTransform from(Config config) {
+    String extraString = config.getExtraString();
+    Integer extraInteger = config.getExtraInteger();
+    return new SchemaTransform() {
+      @Override
+      public PCollectionRowTuple expand(PCollectionRowTuple input) {
+        Schema schema =
+            Schema.builder()
+                .addFields(input.get("input").getSchema().getFields())
+                .addStringField("extra_string")
+                .addInt32Field("extra_integer")
+                .build();
+        PCollection<Row> rows =
+            input
+                .get("input")
+                .apply(
+                    MapElements.into(TypeDescriptors.rows())
+                        .via(
+                            row ->
+                                Row.withSchema(schema)
+                                    .addValues(row.getValues())
+                                    .addValue(extraString)
+                                    .addValue(extraInteger)
+                                    .build()))
+                .setRowSchema(schema);
+        return PCollectionRowTuple.of("output", rows);
+      }
+    };
+  }
+
+  @Override
+  public String identifier() {
+    return IDENTIFIER;
+  }
+}
diff --git a/sdks/java/managed/src/test/resources/test_config.yaml b/sdks/java/managed/src/test/resources/test_config.yaml
new file mode 100644
index 000000000000..7725c32b348e
--- /dev/null
+++ b/sdks/java/managed/src/test/resources/test_config.yaml
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+extra_string: "abc"
+extra_integer: 123
\ No newline at end of file
diff --git a/settings.gradle.kts b/settings.gradle.kts
index ec11fd32fdd3..1e52e425b215 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -353,3 +353,5 @@ include("sdks:java:io:kafka:kafka-100")
 findProject(":sdks:java:io:kafka:kafka-100")?.name = "kafka-100"
 include("sdks:java:io:kafka:kafka-01103")
 findProject(":sdks:java:io:kafka:kafka-01103")?.name = "kafka-01103"
+include("sdks:java:managed")
+findProject(":sdks:java:managed")?.name = "managed"

From 58e4f8325fc0a2f7cd71bf9ccefea06a5ac6f75b Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Mon, 8 Apr 2024 10:25:17 -0400
Subject: [PATCH 05/18] Patch release website changes (#30839)

* Patch release website changes

* Known issue

* Mark as released
---
 CHANGES.md                                          |  8 ++++++++
 website/www/site/config.toml                        |  2 +-
 .../www/site/content/en/get-started/downloads.md    | 13 ++++++++++---
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 170d8d08da74..5824c71a98dc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -52,6 +52,7 @@
 
 * ([#X](https://github.com/apache/beam/issues/X)).
 -->
+
 # [2.56.0] - Unreleased
 
 ## Highlights
@@ -88,6 +89,12 @@
 
 * ([#X](https://github.com/apache/beam/issues/X)).
 
+# [2.55.1]
+
+## Bugfixes
+
+* Fixed issue that broke WriteToJson in languages other than Java (X-lang) ([#30776](https://github.com/apache/beam/issues/30776)).
+
 # [2.55.0] - 2024-03-25
 
 ## Highlights
@@ -133,6 +140,7 @@
 ## Known Issues
 
 * In Python pipelines, when shutting down inactive bundle processors, shutdown logic can overaggressively hold the lock, blocking acceptance of new work. Symptoms of this issue include slowness or stuckness in long-running jobs. Fixed in 2.56.0 ([#30679](https://github.com/apache/beam/pull/30679)).
+* WriteToJson broken in languages other than Java (X-lang) ([#30776](https://github.com/apache/beam/issues/30776)).
 
 # [2.54.0] - 2024-02-14
 
diff --git a/website/www/site/config.toml b/website/www/site/config.toml
index e3e2638825f0..9e29281dd795 100644
--- a/website/www/site/config.toml
+++ b/website/www/site/config.toml
@@ -104,7 +104,7 @@ github_project_repo = "https://github.com/apache/beam"
 
 [params]
 description = "Apache Beam is an open source, unified model and set of language-specific SDKs for defining and executing data processing workflows, and also data ingestion and integration flows, supporting Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). Dataflow pipelines simplify the mechanics of large-scale batch and streaming data processing and can run on a number of runtimes like Apache Flink, Apache Spark, and Google Cloud Dataflow (a cloud service). Beam also brings DSL in different languages, allowing users to easily implement their data integration processes."
-release_latest = "2.55.0"
+release_latest = "2.55.1"
 # The repository and branch where the files live in Github or Colab. This is used
 # to serve and stage from your local branch, but publish to the master branch.
 #   e.g. https://github.com/{{< param branch_repo >}}/path/to/notebook.ipynb
diff --git a/website/www/site/content/en/get-started/downloads.md b/website/www/site/content/en/get-started/downloads.md
index 8b11768d4ded..d5e04b33403d 100644
--- a/website/www/site/content/en/get-started/downloads.md
+++ b/website/www/site/content/en/get-started/downloads.md
@@ -96,10 +96,17 @@ versions denoted `0.x.y`.
 
 ## Releases
 
+### 2.55.1 (2024-03-25)
+Official [source code download](https://downloads.apache.org/beam/2.55.1/apache-beam-2.55.1-source-release.zip).
+[SHA-512](https://downloads.apache.org/beam/2.55.1/apache-beam-2.55.1-source-release.zip.sha512).
+[signature](https://downloads.apache.org/beam/2.55.1/apache-beam-2.55.1-source-release.zip.asc).
+
+[Release notes](https://github.com/apache/beam/releases/tag/v2.55.1)
+
 ### 2.55.0 (2024-03-25)
-Official [source code download](https://downloads.apache.org/beam/2.55.0/apache-beam-2.55.0-source-release.zip).
-[SHA-512](https://downloads.apache.org/beam/2.55.0/apache-beam-2.55.0-source-release.zip.sha512).
-[signature](https://downloads.apache.org/beam/2.55.0/apache-beam-2.55.0-source-release.zip.asc).
+Official [source code download](https://archive.apache.org/beam/2.55.0/apache-beam-2.55.0-source-release.zip).
+[SHA-512](https://archive.apache.org/beam/2.55.0/apache-beam-2.55.0-source-release.zip.sha512).
+[signature](https://archive.apache.org/beam/2.55.0/apache-beam-2.55.0-source-release.zip.asc).
 
 [Release notes](https://github.com/apache/beam/releases/tag/v2.55.0)
 [Blog post](/blog/beam-2.55.0).

From 1c962a5ec7ac80eb1165a7fe8fc9c362b94dcd0f Mon Sep 17 00:00:00 2001
From: Daria Bezkorovaina <99457894+dariabezkorovaina@users.noreply.github.com>
Date: Mon, 8 Apr 2024 21:14:09 +0100
Subject: [PATCH 06/18] Duet AI Prompts - Documentation Lookup Without Links 
 (#30873)

* Create 53_dataflow_runner.md

* Create 54_spark_runner.md

* Create 55_direct_runner.md

* Create 29_advanced_side_inputs.md

* Create 30_advanced_additional_outputs.md

* Create 31_advanced_batched_dofn.md

* Update 31_advanced_batched_dofn.md

* Update 30_advanced_additional_outputs.md

* Create 01_basic_learning_apache_beam.md

* Create 02_basic_pipelines.md

* Create 03_basic_configuring_pipelines.md

* Update 04_basic_pcollections.md

* Create 04_basic_pcollections.md

* Update 05_basic_ptransforms.md

* Create 05_basic_ptransforms.md

* Update 06_basic_schema.md

* Create 06_basic_schema.md

* Update 07_basic_runners.md

* Update 07_basic_runners.md

* Create 07_basic_runners.md

* Update 07_basic_runners.md

* Update 08_basic_windowing.md

* Create 08_basic_windowing.md

* Update 09_basic_triggers.md

* Create 09_basic_triggers.md

* Update 10_basic_metrics.md

* Update 10_basic_metrics.md

* Create 10_basic_metrics.md

* Update 11_basic_state.md

* Create 11_basic_state.md

* Update 12_basic_timers.md

* Update 12_basic_timers.md

* Create 12_basic_timers.md

* Update 13_advanced_splittable_dofn.md

* Update 13_advanced_splittable_dofn.md

* Create 13_advanced_splittable_dofn.md

* Update 13_advanced_splittable_dofn.md

* Update 14_advanced_pipeline_patterns.md

* Update 14_advanced_pipeline_patterns.md

* Create 14_advanced_pipeline_patterns.md

* Update 15_advanced_xlang.md

* Create 15_advanced_xlang.md

* Update 16_advanced_pipeline_lifecycle.md

* Update 16_advanced_pipeline_lifecycle.md

* Create 16_advanced_pipeline_lifecycle.md

* Update 17_advanced_ai_ml.md

* Update 17_advanced_ai_ml.md

* Update 17_advanced_ai_ml.md

* Create 17_advanced_ai_ml.md

* Update 18_io_kafka.md

* Create 18_io_kafka.md

* Update 19_io_pubsub.md

* Create 19_io_pubsub.md

* Update 20_io_biguery.md

* Create 20_io_biguery.md

* Update 21_io_bigtable.md

* Create 21_io_bigtable.md

* Update 22_io_spanner.md

* Create 22_io_spanner.md

* Update 23_io_gcs.md

* Create 23_io_gcs.md

* Update 24_io_json.md

* Create 24_io_json.md

* Update 25_io_csv.md

* Create 25_io_csv.md

* Update 26_io_avro.md

* Create 26_io_avro.md

* Update 27_io_parquet.md

* Create 27_io_parquet.md

* Update 28_io_jdbc.md

* Create 28_io_jdbc.md

* Update learning/prompts/documentation-lookup-nolinks/08_basic_windowing.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/09_basic_triggers.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/18_io_kafka.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/19_io_pubsub.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/20_io_biguery.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/20_io_biguery.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/21_io_bigtable.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/22_io_spanner.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/24_io_json.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/26_io_avro.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/27_io_parquet.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/25_io_csv.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/28_io_jdbc.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/29_advanced_side_inputs.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/53_dataflow_runner.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update learning/prompts/documentation-lookup-nolinks/54_spark_runner.md

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>

* Update 53_dataflow_runner.md

* Update 07_basic_runners.md

* Update 53_dataflow_runner.md

---------

Co-authored-by: Andrey Devyatkin <andrey.9evyatkin@gmail.com>
---
 .../01_basic_learning_apache_beam.md          |  16 ++
 .../02_basic_pipelines.md                     |  18 +++
 .../03_basic_configuring_pipelines.md         |  31 ++++
 .../04_basic_pcollections.md                  |  25 +++
 .../05_basic_ptransforms.md                   |  34 ++++
 .../06_basic_schema.md                        |  25 +++
 .../07_basic_runners.md                       |  25 +++
 .../08_basic_windowing.md                     |  31 ++++
 .../09_basic_triggers.md                      |  31 ++++
 .../10_basic_metrics.md                       |  25 +++
 .../11_basic_state.md                         |  19 +++
 .../12_basic_timers.md                        |  11 ++
 .../13_advanced_splittable_dofn.md            |  18 +++
 .../14_advanced_pipeline_patterns.md          |  14 ++
 .../15_advanced_xlang.md                      |  11 ++
 .../16_advanced_pipeline_lifecycle.md         |  31 ++++
 .../17_advanced_ai_ml.md                      |  18 +++
 .../18_io_kafka.md                            |  29 ++++
 .../19_io_pubsub.md                           |  23 +++
 .../20_io_biguery.md                          |  42 +++++
 .../21_io_bigtable.md                         |  28 ++++
 .../22_io_spanner.md                          |  33 ++++
 .../documentation-lookup-nolinks/23_io_gcs.md |  15 ++
 .../24_io_json.md                             |  29 ++++
 .../documentation-lookup-nolinks/25_io_csv.md |  30 ++++
 .../26_io_avro.md                             |  25 +++
 .../27_io_parquet.md                          |  21 +++
 .../28_io_jdbc.md                             |  42 +++++
 .../29_advanced_side_inputs.md                |  36 +++++
 .../30_advanced_additional_outputs.md         | 100 ++++++++++++
 .../31_advanced_batched_dofn.md               |  39 +++++
 .../53_dataflow_runner.md                     |  80 ++++++++++
 .../54_spark_runner.md                        | 148 ++++++++++++++++++
 .../55_direct_runner.md                       |  68 ++++++++
 .../04_basic_pcollections.md                  |   6 +-
 .../05_basic_ptransforms.md                   |   4 +-
 .../documentation-lookup/06_basic_schema.md   |   8 +-
 .../documentation-lookup/07_basic_runners.md  |  10 +-
 .../08_basic_windowing.md                     |  14 +-
 .../documentation-lookup/09_basic_triggers.md |  14 +-
 .../documentation-lookup/10_basic_metrics.md  |   9 +-
 .../documentation-lookup/11_basic_state.md    |  23 +--
 .../documentation-lookup/12_basic_timers.md   |   7 +-
 .../13_advanced_splittable_dofn.md            |  12 +-
 .../14_advanced_pipeline_patterns.md          |   5 +-
 .../documentation-lookup/15_advanced_xlang.md |  10 +-
 .../16_advanced_pipeline_lifecycle.md         |   9 +-
 .../documentation-lookup/17_advanced_ai_ml.md |   5 +-
 .../documentation-lookup/18_io_kafka.md       |  11 +-
 .../documentation-lookup/19_io_pubsub.md      |  13 +-
 .../documentation-lookup/20_io_biguery.md     |  18 ++-
 .../documentation-lookup/21_io_bigtable.md    |   9 +-
 .../documentation-lookup/22_io_spanner.md     |   6 +-
 .../prompts/documentation-lookup/23_io_gcs.md |  15 +-
 .../documentation-lookup/24_io_json.md        |   8 +-
 .../prompts/documentation-lookup/25_io_csv.md |   7 +-
 .../documentation-lookup/26_io_avro.md        |   8 +-
 .../documentation-lookup/27_io_parquet.md     |   6 +-
 .../documentation-lookup/28_io_jdbc.md        |   9 +-
 .../30_advanced_additional_outputs.md         |  12 +-
 .../31_advanced_batched_dofn.md               |  27 ++--
 61 files changed, 1314 insertions(+), 142 deletions(-)
 create mode 100644 learning/prompts/documentation-lookup-nolinks/01_basic_learning_apache_beam.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/02_basic_pipelines.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/03_basic_configuring_pipelines.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/04_basic_pcollections.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/05_basic_ptransforms.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/06_basic_schema.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/07_basic_runners.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/08_basic_windowing.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/09_basic_triggers.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/10_basic_metrics.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/11_basic_state.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/12_basic_timers.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/13_advanced_splittable_dofn.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/14_advanced_pipeline_patterns.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/15_advanced_xlang.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/16_advanced_pipeline_lifecycle.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/17_advanced_ai_ml.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/18_io_kafka.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/19_io_pubsub.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/20_io_biguery.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/21_io_bigtable.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/22_io_spanner.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/23_io_gcs.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/24_io_json.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/25_io_csv.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/26_io_avro.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/27_io_parquet.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/28_io_jdbc.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/29_advanced_side_inputs.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/31_advanced_batched_dofn.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/53_dataflow_runner.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/54_spark_runner.md
 create mode 100644 learning/prompts/documentation-lookup-nolinks/55_direct_runner.md

diff --git a/learning/prompts/documentation-lookup-nolinks/01_basic_learning_apache_beam.md b/learning/prompts/documentation-lookup-nolinks/01_basic_learning_apache_beam.md
new file mode 100644
index 000000000000..e28d44b1dfd7
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/01_basic_learning_apache_beam.md
@@ -0,0 +1,16 @@
+Prompt:
+What is Apache Beam?
+
+Response:
+Apache Beam is an open-source unified programming model for data processing pipelines. It provides a set of language-specific SDKs that allow you to define and execute data processing workflows.
+Apache Beam allows you to write pipelines in one of the supported languages (Java, Python, Go, and Scala). The pipelines can be executed on one of the supported runners (Apache Flink, Apache Spark, and Google Cloud Dataflow).
+
+Learning Apache Beam is a valuable skill for data processing and analysis. Here are some resources to get you started:
+* The 'Get Started' section of the Apache Beam website is a perfect place to start. It provides an overview of Apache Beam and its key concepts.
+* For a hands-on introduction, you can start by exploring the WordCount example provided in the 'Get Started' section, which demonstrates the basic concepts of Apache Beam.
+* Tour of Beam is an interactive tour that teaches you core Beam concepts. It provides a sandbox environment to write and run pipelines while exploring different topics.
+* Beam Playground offers an interactive space to experiment with the Apache Beam transforms and examples without installing anything on your system.
+
+After you have a basic understanding of Apache Beam, you can start to build your own pipelines. The Apache Beam website has a number of resources that can help you get started, including Apache Beam Documentation and Apache Beam Community. For those interested in the source code, you can explore the Apache Beam repository on GitHub.
+
+By leveraging these resources, you can build a strong foundation in Apache Beam and start constructing your own data processing pipelines. Additionally, Apache Beam is an open-source project, and contributions are welcome. Whether you're fixing bugs, improving documentation, or adding new features, your work can benefit the entire Beam community. To learn more about how you can contribute, visit the 'Contribute' section of the Apache Beam website.
diff --git a/learning/prompts/documentation-lookup-nolinks/02_basic_pipelines.md b/learning/prompts/documentation-lookup-nolinks/02_basic_pipelines.md
new file mode 100644
index 000000000000..29620aad6fd4
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/02_basic_pipelines.md
@@ -0,0 +1,18 @@
+Prompt:
+What is a pipeline in Apache Beam?
+
+Response:
+In Apache Beam, a pipeline encapsulates the entirety of a data processing task, including all the data and each step of the process. Essentially, it's a Directed Acyclic Graph of transformations (known as `PTransform`) applied to data collections (`PCollection`).
+
+The simplest pipelines in Apache Beam follow a linear flow of operations, typically adhering to a read-process-write pattern. However, pipelines can also be significantly more complex, featuring multiple input sources, output sinks, and operations (known as a `PTransform`) that can both read from and output to multiple `PCollection` objects. For more information about pipeline design and best practices, see the 'Common Pipeline Patterns' section in the Apache Beam documentation.
+
+To use Apache Beam, your driver program must first create an instance of the Beam SDK class `Pipeline` (typically in the `main()` function).
+
+```python
+import apache_beam as beam
+
+with beam.Pipeline() as pipeline:
+  pass  # Build your pipeline here.
+```
+
+When you create your pipeline, you need to set configuration options. You can set your pipeline’s configuration options programmatically. You can also set the options ahead of time, or read them from the command line, and then pass them to the `Pipeline` object when you create the object. This option is often easier than setting the pipeline configuration options programmatically.
diff --git a/learning/prompts/documentation-lookup-nolinks/03_basic_configuring_pipelines.md b/learning/prompts/documentation-lookup-nolinks/03_basic_configuring_pipelines.md
new file mode 100644
index 000000000000..664b14df87dc
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/03_basic_configuring_pipelines.md
@@ -0,0 +1,31 @@
+Prompt:
+How do I configure pipeline in Apache Beam?
+
+Response:
+Configuring pipeline options in Apache Beam is crucial for specifying the execution environment, managing resources, and tailoring the pipeline's behavior to meet specific data processing needs and performance requirements.
+
+You can set pipeline options programmatically or pass them in using the command line. These options include runner-specific settings, job name, project ID (for cloud runners), machine types, number of workers, and more.
+
+Apache Beam offers a variety of standard pipeline options that allow you to customize and optimize your data processing pipelines.
+
+Beam SDKs include a command-line parser that you can use to set pipeline options. Use command-line arguments in the format `--<option>=<value>`. For example, the following command sets the `--runner` option `DirectRunner` and the `--project` option `my-project-id`:
+
+```bash
+python my-pipeline.py --runner=DirectRunner --project=my-project-id
+```
+
+To set the pipeline options programmatically, use the `PipelineOptions` class. For example, the following code sets the `--runner` option to `DirectRunner` and the `--project` option to `my-project-id`:
+
+```python
+from apache_beam import Pipeline
+from apache_beam.options.pipeline_options import PipelineOptions
+
+options = PipelineOptions(
+    project='my-project-id',
+    runner='DirectRunner'
+)
+```
+
+In addition to the standard pipeline options, you can add custom pipeline options. For a common pattern for configuring pipeline options, see the 'Pipeline option patterns' section in the Apache Beam documentation.
+
+The WordCount example pipeline in the 'Get Started' section of the Apache Beam documentation demonstrates how to set pipeline options at runtime by using command-line options.
diff --git a/learning/prompts/documentation-lookup-nolinks/04_basic_pcollections.md b/learning/prompts/documentation-lookup-nolinks/04_basic_pcollections.md
new file mode 100644
index 000000000000..9aa1e49968cd
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/04_basic_pcollections.md
@@ -0,0 +1,25 @@
+Prompt:
+What is a `PCollection` in Apache Beam?
+
+Response:
+A `PCollection` in Apache Beam is an unordered bag of elements. Each `PCollection` is a potentially distributed, homogeneous data set or data stream, and is owned by the specific `Pipeline` object for which it is created. It’s the primary data structure used in Apache Beam pipelines to handle large-scale data processing in batch and streaming modes.
+
+The following example shows how to create a `PCollection` using the `Create` transform:
+
+```python
+import apache_beam as beam
+
+with beam.Pipeline() as pipeline:
+  pcollection = pipeline | beam.Create([...])  # Create a PCollection
+```
+
+A `PCollection` can either be bounded or unbounded, making it versatile for different data source types. Bounded `PCollection` objects represent a finite data set, such as files or databases, ideal for batch processing. Unbounded `PCollection` objects, on the other hand, represent data streams that continuously grow over time, such as real-time event logs, suitable for stream processing.
+
+Beam’s computational patterns and transforms are focused on situations where distributed data-parallel computation is required. Therefore, a `PCollection` has the following key characteristics:
+   - All elements must be of the same type (with support of structured types).
+   - Every `PCollection` has a coder, which is a specification of the binary format of the elements.
+   - Elements are immutable. They can't be altered after creation.
+   - Random access to individual elements of the collection isn't allowed.
+   - Beam encodes each element for distributed processing.
+
+For more information about `PCollection` objects in Apache Beam and usage patterns, see the 'Beam Programming Guide: PCollections' section in the Apache Beam documentation.
diff --git a/learning/prompts/documentation-lookup-nolinks/05_basic_ptransforms.md b/learning/prompts/documentation-lookup-nolinks/05_basic_ptransforms.md
new file mode 100644
index 000000000000..7a64e6ed6263
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/05_basic_ptransforms.md
@@ -0,0 +1,34 @@
+Prompt:
+What is a `PTransform` in Apache Beam?
+
+Response:
+A `PTransform` (or transform) represents a data processing operation, or a step, in an Apache Beam pipeline. A transform is applied to zero or more `PCollection` objects and produces zero or more `PCollection` objects.
+
+Transforms have the following key characteristics:
+1. Versatility: Able to execute a diverse range of operations on `PCollection` objects.
+2. Composability: Can be combined to form elaborate data processing pipelines.
+3. Parallel execution: Designed for distributed processing, allowing simultaneous execution across multiple workers.
+4. Scalability: Able to handle extensive data and suitable for both batch and streaming data.
+
+The Beam SDKs contain different transforms that you can apply to your pipeline’s `PCollection` objects. The following list includes common transform types:
+ - Source transforms such as `TextIO.Read` and `Create`. A source transform conceptually has no input.
+ - Processing and conversion operations such as `ParDo`, `GroupByKey`, `CoGroupByKey`, `Combine`, and `Count`.
+ - Outputting transforms such as `TextIO.Write`.
+ - User-defined, application-specific composite transforms.
+
+Transform processing logic is provided in the form of a function object, colloquially referred to as 'user code'. This code is applied to each element of the input `PCollection` (or more than one `PCollection`). The `PCollection` objects can be linked together to create complex data processing sequences.
+User code for transforms must satisfy the requirements of the Beam model.
+
+The following example shows how to apply custom user code to a `PCollection` using the `ParDo` transform:
+
+```python
+import apache_beam as beam
+
+def SomeUserCode(element):
+    # Do something with an element
+    return element
+
+with beam.Pipeline() as pipeline:
+    input_collection = pipeline | beam.Create([...])
+    output_collection = input_collection | beam.ParDo(SomeUserCode())
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/06_basic_schema.md b/learning/prompts/documentation-lookup-nolinks/06_basic_schema.md
new file mode 100644
index 000000000000..0f0c7921b0fc
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/06_basic_schema.md
@@ -0,0 +1,25 @@
+Prompt:
+What are schemas in Apache Beam?
+
+Response:
+A Schema in Apache Beam is a language-independent type definition for a `PCollection`. The schema defines elements of that `PCollection` as an ordered list of named fields.
+
+In many cases, the element type in a `PCollection` has a structure that can be introspected. Some examples are JSON, Protocol Buffer, Avro, and database row objects. All of these formats can be converted to Beam Schemas.
+
+To take advantage of schemas, your `PCollection` objects must have a schema attached. Often, the source itself attaches a schema to the `PCollection`.
+
+Although schemas are language-independent, they're designed to embed naturally into the Beam SDK programming languages. This feature allows you to continue using native types while leveraging the advantage of having Beam understand your element schemas.
+
+Apache Beam provides a collection of schema transforms that operate on schemas. For an example, see `SqlTransform` in the 'Beam SQL overview' section of the Apache Beam documentation.
+
+The following list includes key capabilities provided by schema transforms:
+* field selection
+* grouping and aggregation
+* join operations
+* filtering data
+* adding and removing fields
+* renaming fields
+* type conversion
+* enhanced ParDo functionality
+
+For more information about schemas and the Apache Beam best practices when working with structured data, see the 'Schema Usage Patterns' section in the Apache Beam documentation.
diff --git a/learning/prompts/documentation-lookup-nolinks/07_basic_runners.md b/learning/prompts/documentation-lookup-nolinks/07_basic_runners.md
new file mode 100644
index 000000000000..8ab719f0017f
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/07_basic_runners.md
@@ -0,0 +1,25 @@
+Prompt:
+What is a runner in Apache Beam?
+
+Response:
+Apache Beam runners are the execution engines that run the pipelines. They translate or adapt the pipeline into a form that can be executed on a massively parallel big data processing system, such as Apache Flink, Apache Spark, Google Cloud Dataflow, and more.
+
+Choosing a runner is an important step in the pipeline development process. The runner you choose determines where and how your pipeline runs. For more information about available runners and their capabilities, see the Beam Capability Matrix.
+
+To specify a runner, when you execute the pipeline, use the `--runner` flag. For example, to run the WordCount pipeline on Google Cloud Dataflow, you would run the following command:
+
+```python
+python -m apache_beam.examples.wordcount \
+  --region DATAFLOW_REGION \
+  --input gs://dataflow-samples/shakespeare/kinglear.txt \
+  --output gs://STORAGE_BUCKET/results/outputs \
+  --runner DataflowRunner \
+  --project PROJECT_ID \
+  --temp_location gs://STORAGE_BUCKET/tmp/
+```
+
+For more details about the WordCount example, see the WordCount source code in the Apache Beam GitHub repository.
+
+The Apache Beam Direct Runner executes the pipeline on your local machine. It is helpful for testing and debugging purposes.
+
+For instructions about how to configure your development environment and runner, see the Apache Beam quickstarts for Java, Python, and Go.
diff --git a/learning/prompts/documentation-lookup-nolinks/08_basic_windowing.md b/learning/prompts/documentation-lookup-nolinks/08_basic_windowing.md
new file mode 100644
index 000000000000..7702d278c9f9
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/08_basic_windowing.md
@@ -0,0 +1,31 @@
+Prompt:
+What is windowing in Apache Beam?
+
+Response:
+In stream processing, windowing allows you to divide data streams into logical units for efficient and correct parallel processing.
+
+With an unbounded data set, collecting all of the elements is impossible, because new elements are constantly being added. In the Apache Beam model, any `PCollection` (including unbounded `PCollection` objects) can be subdivided into logical windows. Grouping transforms then consider the elements of each `PCollection` on a per-window basis.
+
+Because Apache Beam's default windowing strategy is to assign each element to a single, global window, you must explicitly specify a windowing function for your pipeline.
+
+The following code snippet shows how to divide a `PCollection` into 60-second windows:
+
+```python
+from apache_beam import beam
+from apache_beam import window
+
+fixed_windowed_items = items | 'window' >> beam.WindowInto(window.FixedWindows(60))
+```
+
+Beam provides a number of built-in windowing functions that you can use to subdivide your data into different types of windows:
+- Fixed Time Windows
+- Sliding Time Windows
+- Per-Session Windows
+- Single Global Window
+- Calendar-based Windows (not supported by the Beam SDK for Python or Go)
+
+You can also create your own custom windowing function `WindowFn`.
+
+You need to specify a triggering strategy to determine when to emit the results of your pipeline’s windowed computations.
+
+You can adjust the windowing strategy to allow for late data, which is data that arrives after the watermark has passed the end of the window. You can also specify how to handle late data, such as discarding or adding it to the next window.
diff --git a/learning/prompts/documentation-lookup-nolinks/09_basic_triggers.md b/learning/prompts/documentation-lookup-nolinks/09_basic_triggers.md
new file mode 100644
index 000000000000..ede1300aa29c
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/09_basic_triggers.md
@@ -0,0 +1,31 @@
+Prompt:
+What is a trigger in Apache Beam?
+
+Response:
+Apache Beam uses triggers to determine when to emit the aggregated results of each window, referred to as a pane.
+
+Triggers provide two additional capabilities compared to outputting at the end of a window:
+- Allowing early results to be output before the end of the window.
+- Allowing late data to be handled after the end of the window.
+
+This feature lets you control the flow of your data and balance between completeness, latency, and cost.
+
+You set the triggers for a `PCollection` by setting the `trigger` parameter of the `WindowInto` transform.
+
+```python
+pcollection | WindowInto(
+    FixedWindows(1 * 60),
+    trigger=AfterProcessingTime(1 * 60),
+    accumulation_mode=AccumulationMode.DISCARDING,
+)
+```
+
+When a trigger fires, it emits the current contents of the window as a pane. Because a trigger can fire multiple times, the accumulation mode determines whether the system accumulates the window panes as the trigger fires, or discards them. This behavior is controlled by the window accumulation mode parameter of the `WindowInto` transform.
+
+Beam provides several built-in triggers that you can use to determine when to emit the results of your pipeline's windowed computations:
+- Event time triggers
+- Processing time triggers
+- Data-driven triggers
+- Composite triggers
+
+One of the most useful trigger patterns is the `AfterWatermark` trigger, which fires a single time when Apache Beam estimates that all the data has arrived, such as when the watermark passes the end of the window.
diff --git a/learning/prompts/documentation-lookup-nolinks/10_basic_metrics.md b/learning/prompts/documentation-lookup-nolinks/10_basic_metrics.md
new file mode 100644
index 000000000000..ec1972a44d36
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/10_basic_metrics.md
@@ -0,0 +1,25 @@
+Prompt:
+What are the metrics in Apache Beam?
+
+Response:
+In the Apache Beam model, metrics provide insights into the current state of your pipeline, including during pipeline execution.
+
+Metrics are named and scoped to a specific step in the pipeline. They can be created dynamically during pipeline execution. If a runner doesn't support some part of reporting metrics, the fallback behavior is to drop the metric updates rather than fail the pipeline.
+
+Apache Beam provides a number of built-in metric types:
+* Counters
+* Distributions
+* Gauges
+
+To declare a metric, use the `beam.metrics.Metrics` class. For example:
+
+```python
+self.words_counter = Metrics.counter(self.__class__, 'words')
+self.word_lengths_counter = Metrics.counter(self.__class__, 'word_lengths')
+self.word_lengths_dist = Metrics.distribution(self.__class__, 'word_len_dist')
+self.empty_line_counter = Metrics.counter(self.__class__, 'empty_lines')
+```
+
+For implementation details, see the WordCount example with metrics in the Apache Beam GitHub repository.
+
+You can export metrics to external sinks. Spark and Flink runners support REST HTTP and Graphite.
diff --git a/learning/prompts/documentation-lookup-nolinks/11_basic_state.md b/learning/prompts/documentation-lookup-nolinks/11_basic_state.md
new file mode 100644
index 000000000000..9cda394159a5
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/11_basic_state.md
@@ -0,0 +1,19 @@
+Prompt:
+What is a state in Apache Beam?
+
+Response:
+State in Apache Beam is an API that allows for managing per-key state in element-wise operations like `ParDo` or `Map`. It enables fine-grained control over aggregations, particularly in situations where windowing and triggers are insufficient.
+
+State is scoped to the current processing key within a window, meaning each key has its own state. State is particularly useful for complex aggregations across multiple elements.
+
+Apache Beam provides several types of state that can be used for different use cases, including scalar and collection values:
+- `ValueState`
+- `BagState`
+- `SetState`
+- `MapState`
+- `OrderedListState`
+- `CombiningState`
+
+The per-key state needs to be garbage-collected because the increasing size of the state can negatively impact performance. For more information about common garbage collection strategies, see the 'Garbage collecting state' section in the Beam Programming Guide.
+
+For more information and additional examples of using state in Apache Beam, read the community blog post 'Stateful processing with Apache Beam' and refer to the 'State and Timers examples' section in the Beam Programming Guide.
diff --git a/learning/prompts/documentation-lookup-nolinks/12_basic_timers.md b/learning/prompts/documentation-lookup-nolinks/12_basic_timers.md
new file mode 100644
index 000000000000..49872bd4ef6a
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/12_basic_timers.md
@@ -0,0 +1,11 @@
+Prompt:
+What is a timer in Apache Beam?
+
+Response:
+In Apache Beam, a timer is a per-key timer callback API that enables delayed processing of data stored using the State API.
+
+Apache Beam provides two types of timers: processing time timers and event time timers. Processing time timers are based on the system clock. Event time timers are based on the timestamps of the data elements.
+
+Apache Beam also supports dynamically setting a timer tag using `TimerMap` in the Java SDK. This feature lets you set multiple different timers in a `DoFn`. You can set the timer tags to be chosen dynamically, for example, based on data in the input elements.
+
+For more information and additional examples of using timers in Apache Beam, read the community blog post 'Timely (and Stateful) Processing with Apache Beam' and see the 'State and Timers examples' section in the Apache Beam documentation.
diff --git a/learning/prompts/documentation-lookup-nolinks/13_advanced_splittable_dofn.md b/learning/prompts/documentation-lookup-nolinks/13_advanced_splittable_dofn.md
new file mode 100644
index 000000000000..9ee65bcd5bec
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/13_advanced_splittable_dofn.md
@@ -0,0 +1,18 @@
+Prompt:
+What is a splittable `DoFn` in Apache Beam?
+
+Response:
+A splittable `DoFn` (SDF) is a generalization of `DoFn` that lets you process elements in a non-monolithic way. A splittable `DoFn` makes it easier to create complex, modular I/O connectors in Apache Beam.
+When you apply a splittable `DoFn` to an element, the runner can split the element’s processing into smaller tasks. You can checkpoint the processing of an element, and you can split the remaining work to yield additional parallelism.
+
+At a high level, an SDF is responsible for processing element and restriction pairs. A restriction represents a subset of work that would have been necessary to have been done when processing the element.
+
+Executing a splittable `DoFn` uses the following steps:
+1. Each element is paired with a restriction. For example, filename is paired with offset range representing the whole file.
+2. Each element and restriction pair is split. For example, offset ranges are broken up into smaller pieces.
+3. The runner redistributes the element and restriction pairs to several workers.
+4. Element and restriction pairs are processed in parallel. For example, the file is read. Within this last step, the element and restriction pair can pause its own processing or be split into further element and restriction pairs.
+
+For an example, see the 'Splittable `DoFn`' module in the Tour of Beam.
+
+For more information, see the community blog post 'Splittable `DoFn` in Apache Beam is Ready to Use'.
diff --git a/learning/prompts/documentation-lookup-nolinks/14_advanced_pipeline_patterns.md b/learning/prompts/documentation-lookup-nolinks/14_advanced_pipeline_patterns.md
new file mode 100644
index 000000000000..f6f86ba40512
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/14_advanced_pipeline_patterns.md
@@ -0,0 +1,14 @@
+Prompt:
+What pipeline patterns exist in Apache Beam?
+
+Response:
+Beam pipeline patterns are a set of best practices for building Apache Beam pipelines. They are based on real-world Beam deployments and are designed to help you build resilient, flexible, and portable pipelines.
+
+The following list includes some of the most common pipeline patterns:
+- File processing patterns
+- Pipeline options patterns
+- Side input patterns
+- Windowing patterns
+- State and Timers patterns
+
+For more information, see the 'Pipeline Patterns' section of the Beam Programming Guide.
diff --git a/learning/prompts/documentation-lookup-nolinks/15_advanced_xlang.md b/learning/prompts/documentation-lookup-nolinks/15_advanced_xlang.md
new file mode 100644
index 000000000000..a04ecb51f91c
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/15_advanced_xlang.md
@@ -0,0 +1,11 @@
+Prompt:
+What is a multi-language pipeline in Apache Beam?
+
+Response:
+Apache Beam lets you combine transforms written in any supported SDK language (currently, Java and Python) and use them in one multi-language pipeline. These pipelines can, for instance, extract data from a Python source, apply processing via a Java transform, and then deliver the results to a Python sink. Another example is leveraging the Apache Kafka connector and SQL transform from the Java SDK within Python pipelines.
+
+To learn how to create a multi-language pipeline, refer to the multi-language pipelines quickstarts for Java and Python on the Apache Beam website.
+
+Depending on the SDK language of the pipeline, you can use a high-level SDK wrapper class or a low-level transform class to access a cross-language transform. For more information, see the 'Using cross-language transforms' section in the Apache Beam documentation.
+
+To develop a cross-language transform, you need to define a Uniform Resource Name (URN) for registering the transform with an expansion service. For more information, see the 'Defining a URN' section in the Beam Programming Guide.
diff --git a/learning/prompts/documentation-lookup-nolinks/16_advanced_pipeline_lifecycle.md b/learning/prompts/documentation-lookup-nolinks/16_advanced_pipeline_lifecycle.md
new file mode 100644
index 000000000000..f05c6c942a8c
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/16_advanced_pipeline_lifecycle.md
@@ -0,0 +1,31 @@
+Prompt:
+What is a pipeline development lifecycle in Apache Beam?
+
+Response:
+The Apache Beam pipeline development lifecycle is an iterative process that usually involves the following steps:
+- Design your pipeline.
+- Develop your pipeline code.
+- Test your pipeline.
+- Deploy your pipeline.
+
+During each iteration, you might need to go back and forth between the different steps to refine your pipeline code and to fix bugs.
+
+To design a pipeline, you need answers to the following questions:
+- Where is the data stored?
+- What does the data look like?
+- What do you want to do with your data?
+- What does your output data look like, and where should it go?
+
+An Apache Beam program expresses a data processing pipeline, from start to finish. To construct a pipeline using the classes in the Apache Beam SDKs, your program needs to perform the following steps:
+
+- Create a `Pipeline` object.
+- Use a `Read` or `Create` transform to create one or more `PCollection` objects for your pipeline data.
+- Apply transforms to each `PCollection`.
+- Write or otherwise output the final, transformed `PCollection` objects.
+- Run the pipeline.
+
+The Apache Beam documentation has more information about pipeline design, development, execution, and common pipeline patterns.
+
+Testing pipelines is a particularly important step in developing an effective data processing solution. The indirect nature of the Beam model, in which your user code constructs a pipeline graph to be executed remotely, can make debugging failed runs difficult. For more information about pipeline testing strategies, see the 'Test Your Pipeline' section in the Apache Beam documentation.
+
+Choosing a runner is a crucial step in deploying your pipeline. The runner you choose determines where and how your pipeline executes. For more information about pipeline deployment, see 'Container environments' on the Apache Beam website.
diff --git a/learning/prompts/documentation-lookup-nolinks/17_advanced_ai_ml.md b/learning/prompts/documentation-lookup-nolinks/17_advanced_ai_ml.md
new file mode 100644
index 000000000000..4682cd810fe2
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/17_advanced_ai_ml.md
@@ -0,0 +1,18 @@
+Prompt:
+What are the AI and ML capabilities in Apache Beam?
+
+Response:
+Apache Beam has several built-in AI and ML capabilities that enable you to:
+- Process large datasets for both preprocessing and model inference.
+- Conduct exploratory data analysis and smoothly scale up data pipelines in production as part of your MLOps ecosystem.
+- Run your models in production with varying data loads, both in batch and streaming pipelines.
+
+For common AI platform integration patterns in Apache Beam, see the 'AI Platform integration patterns' section in the Apache Beam documentation.
+
+The recommended way to implement inference in Apache Beam is by using the RunInference API. For more information about how to use RunInference for PyTorch, scikit-learn, and TensorFlow, see the 'Use RunInference in Apache Beam' example in the Apache Beam GitHub repository.
+
+Using pre-trained models in Apache Beam is also supported with PyTorch, Scikit-learn, and TensorFlow. Running inference on custom models is also supported.
+
+Apache Beam also supports automatically updating the model being used with the `RunInference PTransform` in streaming pipelines without stopping the pipeline. The feature lets you avoid downtime downtime. For more information, see 'Automatic model refresh' on the Apache Beam website.
+
+For more information about using machine learning models with Apache Beam, read the blog post 'Running ML models now easier with new Dataflow ML innovations on Apache Beam'.
diff --git a/learning/prompts/documentation-lookup-nolinks/18_io_kafka.md b/learning/prompts/documentation-lookup-nolinks/18_io_kafka.md
new file mode 100644
index 000000000000..010226a9d340
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/18_io_kafka.md
@@ -0,0 +1,29 @@
+Prompt:
+Is Apache Kafka supported in Apache Beam?
+
+Response:
+Apache Kafka is a distributed streaming platform that is used for building real-time data pipelines and streaming applications. It is horizontally scalable, fault-tolerant, and fast.
+
+Apache Beam has a built-in KafkaIO connector for Apache Kafka that enables Apache Beam pipelines to read data from and write data to Kafka. Apache Kafka is supported in both batch and streaming pipelines and can serve as both a source and a sink. For more information, refer to the KafkaIO connector documentation for your preferred programming language.
+
+To get started with Apache Kafka and Apache Beam, refer to the Dataflow Cookbook repository.
+
+Here is an example of Apache Beam pipeline code for reading data from a Kafka topic:
+
+```python
+with beam.Pipeline(options=options) as p:
+    output = (
+        p
+        | "Read from Kafka"
+        >> ReadFromKafka(
+            consumer_config={"bootstrap.servers": "localhost:9092"},
+            topics=["my_topic"],
+            with_metadata=False,
+        )
+        | "Log Data" >> Map(logging.info)
+    )
+```
+
+This code reads data from a Kafka topic `my_topic` on `localhost:9092` and logs the data to the console.
+
+For a detailed demonstration of using the KafkaIO connector, refer to the KafkaIO example in the Apache Beam GitHub repository.
diff --git a/learning/prompts/documentation-lookup-nolinks/19_io_pubsub.md b/learning/prompts/documentation-lookup-nolinks/19_io_pubsub.md
new file mode 100644
index 000000000000..13c0b05042b4
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/19_io_pubsub.md
@@ -0,0 +1,23 @@
+Prompt:
+Is Pub/Sub supported in Apache Beam?
+
+Response:
+Pub/Sub is a Google Cloud service that provides a simple, reliable, scalable, and secure real-time messaging service for sending and receiving messages between independent applications. Apache Beam provides a PubSubIO connector that allows you to read and write messages from and to Pub/Sub. Pub/Sub is currently supported only in streaming pipelines.
+
+Pub/Sub is supported natively in the Beam SDKs for Java, Python, and Go. It is also available in Typescript through X Language.
+
+To get started with Pub/Sub and Apache Beam, the Dataflow Cookbook GitHub repository provides code examples for reading and writing data from and to Pub/Sub.
+
+Here is an example of the Apache Beam pipeline code for reading data from a Pub/Sub topic and writing it to another topic:
+
+```python
+from apache_beam.io import ReadFromPubSub
+from apache_beam.io import WriteToPubSub
+
+with beam.Pipeline(options=options) as p:
+    (
+        p
+        | "Read from Pub/Sub" >> ReadFromPubSub(topic="input_topic")
+        | "Write to Pub/Sub" >> WriteToPubSub(topic="output_topic")
+    )
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/20_io_biguery.md b/learning/prompts/documentation-lookup-nolinks/20_io_biguery.md
new file mode 100644
index 000000000000..427e25c43a14
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/20_io_biguery.md
@@ -0,0 +1,42 @@
+Prompt:
+Is BigQuery supported in Apache Beam?
+
+Response:
+Yes, Apache Beam supports BigQuery. BigQuery is a serverless and cost-effective enterprise data warehouse offered by Google Cloud. Apache Beam provides a BigQueryIO connector to read and write data from and to BigQuery. The BigQueryIO connector supports both batch and streaming pipelines.
+
+The following Apache Beam SDKs support the BigQueryIO connector:
+* Java (natively)
+* Python (natively)
+* Go (natively and through X Language)
+* Typescript (through X Language)
+
+To read data from BigQuery, use the `ReadFromBigQuery` function. Apache Beam can read data directly from a BigQuery table or using an SQL query. The default mode is to return table rows read from a BigQuery source as dictionaries. Built-in `TableRow` objects can also be returned.
+
+Here is an example of Apache Beam pipeline code for reading from BigQuery:
+
+```python
+from apache_beam.io.gcp.bigquery import ReadFromBigQuery
+
+with beam.Pipeline(options=options) as p:
+    # read from a table
+    lines_table = p | "Read" >> ReadFromBigQuery(table=table)
+    # read from a query
+    lines_query = p | "Read" >> ReadFromBigQuery(query="SELECT * FROM table")
+```
+
+Here is an example of Apache Beam pipeline code for writing to BigQuery:
+
+```python
+from apache_beam.io.gcp.bigquery import WriteToBigQuery
+
+with beam.Pipeline(options=options) as p:
+    # write to a table
+    p | "Write" >> beam.io.WriteToBigQuery(
+        table,
+        schema=TABLE_SCHEMA,
+        create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
+        write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND,
+    )
+```
+
+To get started with BigQuery and Apache Beam, see the examples in the Dataflow Cookbook GitHub repository.
diff --git a/learning/prompts/documentation-lookup-nolinks/21_io_bigtable.md b/learning/prompts/documentation-lookup-nolinks/21_io_bigtable.md
new file mode 100644
index 000000000000..28931de2ee8b
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/21_io_bigtable.md
@@ -0,0 +1,28 @@
+Prompt:
+Is Cloud Bigtable supported in Apache Beam?
+
+Response:
+Cloud Bigtable is a Google Cloud service that provides enterprise-grade NoSQL database service with single-digit millisecond latency, limitless scale, and 99.999% availability for large analytical and operational workloads.
+
+Apache Beam has a built-in connector for Cloud Bigtable that lets you read data from and write data to Bigtable. For more information, see the BigtableIO connector documentation for your preferred programming language.
+
+To get started with Cloud Bigtable and Apache Beam, refer to the Dataflow Cookbook GitHub repository.
+
+Here is an example of Apache Beam pipeline code for reading data from a Bigtable table:
+
+```python
+with beam.Pipeline(options=options) as p:
+    output = (
+        p
+        | "Read from Bigtable"
+        >> ReadFromBigtable(
+            project_id=project_id,
+            instance_id=instance_id,
+            table_id=table_id,
+        )
+        | "Extract cells" >> beam.Map(lambda row: row._cells)
+        | "Log Data" >> Map(logging.info)
+    )
+```
+
+For performance benchmarks of the Bigtable IO connector, see the 'Bigtable performance' section on the Apache Beam website.
diff --git a/learning/prompts/documentation-lookup-nolinks/22_io_spanner.md b/learning/prompts/documentation-lookup-nolinks/22_io_spanner.md
new file mode 100644
index 000000000000..bad9dc78bb71
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/22_io_spanner.md
@@ -0,0 +1,33 @@
+Prompt:
+Is Cloud Spanner supported in Apache Beam?
+
+Response:
+Cloud Spanner is a Google Cloud service that provides a fully managed, mission-critical, relational database service that offers transactional consistency at global scale, schemas, SQL (ANSI 2011 with extensions), and automatic, synchronous replication for high availability.
+
+Apache Beam has a built-in SpannerIO connector for Cloud Spanner that enables Apache Beam pipelines to read data from and write data to Cloud Spanner. Cloud Spanner is supported in both batch pipelines and streaming pipelines and can serve both as a source and a sink. For more information, see the SpannerIO connector documentation for Java, Python, or Go.
+
+To get started with Cloud Spanner and Apache Beam, refer to the Dataflow Cookbook GitHub repository.
+
+Here is an example of Apache Beam pipeline code for reading data from a Cloud Spanner table using an SQL query:
+
+```python
+class ExampleRow(NamedTuple):
+    id: int
+    name: str
+
+
+with beam.Pipeline(options=options) as p:
+    output = (
+        p
+        | "Read from table"
+        >> ReadFromSpanner(
+            project_id=options.project_id,
+            instance_id=options.instance_id,
+            database_id=options.database_id,
+            row_type=ExampleRow,
+            sql="SELECT * FROM example_row",
+        )
+        | "Map Data" >> Map(lambda row: f"Id = {row.id}, Name = {row.name}")
+        | "Log Data" >> Map(logging.info)
+    )
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/23_io_gcs.md b/learning/prompts/documentation-lookup-nolinks/23_io_gcs.md
new file mode 100644
index 000000000000..75ef3b85919f
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/23_io_gcs.md
@@ -0,0 +1,15 @@
+Prompt:
+Is the Google Cloud Storage file system supported in Apache Beam?
+
+Response:
+Google Cloud Storage is an object storage service for storing and accessing data on Google Cloud. Apache Beam supports both working with the Google Cloud Storage file system and reading data from and writing data to Google Cloud Storage buckets.
+
+Apache Beam has a built-in TextIO connector for reading data from and writing data to Google Cloud Storage buckets. For more information, see the TextIO connector documentation for your programming language of choice.
+
+The Apache Beam AvroIO, XMLIO, TFRecordIO, and ParquetIO connectors also support reading data from and writing data to different file formats within Google Cloud Storage buckets.
+
+You can specify the Google Cloud Storage file path using the following format: `gs://<bucket>/<path>`. For example, `gs://my-bucket/my-file.txt`.
+
+You can use wildcards in read and write transforms to process data from multiple files or write data to multiple files simultaneously. For instance, you can specify `gs://my-bucket/my-files-*.txt` to read or write to files with names matching the specified pattern.
+
+Apache Beam also allows pipelines to interact directly with the Google Cloud Storage file system. For instance, you can use this file system to verify the existence of a file, retrieve a list of files in a directory, or delete a file. The Google Cloud Storage file system is supported in Java, Python, and Go.
diff --git a/learning/prompts/documentation-lookup-nolinks/24_io_json.md b/learning/prompts/documentation-lookup-nolinks/24_io_json.md
new file mode 100644
index 000000000000..9c2ca19538b5
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/24_io_json.md
@@ -0,0 +1,29 @@
+Prompt:
+Is JSON supported in Apache Beam?
+
+Response:
+JSON is a data format for storing and exchanging data. Apache Beam supports reading data from and writing data to JSON files using the `ReadFromJson` and `WriteToJson` transforms within the `TextIO` module. For more information, see the TextIO connector documentation for your programming language of choice.
+
+To get started with JSON and Apache Beam, refer to the Dataflow Cookbook GitHub repository.
+
+Here is an example of Apache Beam pipeline code for reading data from a JSON file:
+
+```python
+class JsonOptions(PipelineOptions):
+    @classmethod
+    def _add_argparse_args(cls, parser):
+        parser.add_argument(
+            "--file_path",
+            default="gs://your-bucket/your-file.json",
+            help="Json file path",
+        )
+
+
+options = JsonOptions()
+with beam.Pipeline(options=options) as p:
+    output = (
+        p
+        | "Read from Json file" >> ReadFromJson(path=options.file_path, lines=False)
+        | "Log Data" >> Map(logging.info)
+    )
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/25_io_csv.md b/learning/prompts/documentation-lookup-nolinks/25_io_csv.md
new file mode 100644
index 000000000000..89e87ee4e804
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/25_io_csv.md
@@ -0,0 +1,30 @@
+Prompt:
+Is CSV format supported in Apache Beam?
+
+Response:
+CSV is a data format for storing and exchanging data. Apache Beam supports reading data from and writing data to CSV files using the `ReadFromText` and `WriteToText` transforms in the `TextIO` module. For more information, see the TextIO connector documentation for your programming language of choice.
+
+To get started with CSV and Apache Beam, refer to the Dataflow Cookbook GitHub repository.
+
+Here is an example of Apache Beam pipeline code for reading data from a CSV file:
+
+```python
+class CsvOptions(PipelineOptions):
+    @classmethod
+    def _add_argparse_args(cls, parser):
+        parser.add_argument(
+            "--file_path",
+            default="gs://your-bucket/your-file.csv",
+            help="Csv file path",
+        )
+
+
+options = CsvOptions()
+
+with beam.Pipeline(options=options) as p:
+    output = (
+        p
+        | "Read from Csv file" >> ReadFromCsv(path=options.file_path)
+        | "Log Data" >> Map(logging.info)
+    )
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/26_io_avro.md b/learning/prompts/documentation-lookup-nolinks/26_io_avro.md
new file mode 100644
index 000000000000..d20710b3470e
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/26_io_avro.md
@@ -0,0 +1,25 @@
+Prompt:
+Is Apache Avro format supported in Apache Beam?
+
+Response:
+Apache Avro is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Avro files using the `ReadFromAvro` and `WriteToAvro` transforms in the `AvroIO` module. For more information, see the AvroIO connector documentation for your programming language of choice.
+
+To get started with Avro and Apache Beam, refer to the Dataflow Cookbook GitHub repository.
+
+Here is an example of Apache Beam pipeline code for reading data from an Avro file:
+
+```python
+class ReadAvroOptions(PipelineOptions):
+    @classmethod
+    def _add_argparse_args(cls, parser):
+        parser.add_argument(
+            "--path",
+            default="gs://cloud-samples-data/bigquery/us-states/*.avro",
+            help="GCS path to read from",
+        )
+
+    options = ReadAvroOptions()
+
+    with beam.Pipeline(options=options) as p:
+        (p | "Read from Avro" >> ReadFromAvro(options.path) | Map(logging.info))
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/27_io_parquet.md b/learning/prompts/documentation-lookup-nolinks/27_io_parquet.md
new file mode 100644
index 000000000000..a533bb830ee3
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/27_io_parquet.md
@@ -0,0 +1,21 @@
+Prompt:
+Is Apache Parquet format supported in Apache Beam?
+
+Response:
+Apache Parquet is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Parquet files using the `ReadFromParquet` and `WriteToParquet` transforms in the `ParquetIO` module. For more information, see the ParquetIO connector documentation for your programming language of choice.
+
+To get started with Avro and Apache Beam, refer to the Dataflow Cookbook GitHub repository.
+
+Here is an example of Apache Beam pipeline code for reading data from a Parquet file:
+
+```python
+class ReadParquetOptions(PipelineOptions):
+    @classmethod
+    def _add_argparse_args(cls, parser):
+        parser.add_argument("--path", help="GCS path to parquet file")
+
+    options = ReadParquetOptions()
+
+    with beam.Pipeline(options=options) as p:
+        (p | "ReadParquet" >> ReadFromParquet(options.path) | Map(logging.info))
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/28_io_jdbc.md b/learning/prompts/documentation-lookup-nolinks/28_io_jdbc.md
new file mode 100644
index 000000000000..b98f992d067c
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/28_io_jdbc.md
@@ -0,0 +1,42 @@
+Prompt:
+Is Oracle Java Database Connectivity (JDBC) supported in Apache Beam?
+
+Response:
+The Oracle Java Database Connectivity (JDBC) API is a Java API for connecting to relational databases. Apache Beam supports reading and writing data with JDBC using the `ReadFromJdbc` and `WriteToJdbc` transforms in the `JdbcIO` module. The JdbcIO connector is native to Java and available through cross-language transforms in Python and Go.
+
+To get started with JDBC and Apache Beam, refer to the Dataflow Cookbook GitHub repository.
+
+Here is an example of Apache Beam pipeline code for reading data with JDBC:
+
+```python
+class ExampleRow(NamedTuple):
+    id: int
+    name: str
+
+
+class ReadJdbcOptions(PipelineOptions):
+    @classmethod
+    def _add_argparse_args(cls, parser):
+        parser.add_argument("--driverClassName", help="JDBC driver class name")
+        parser.add_argument("--url", help="JDBC URL")
+        parser.add_argument("--username", help="JDBC username")
+        parser.add_argument("--password", help="JDBC password")
+        parser.add_argument("--query", default="SELECT * FROM users", help="JDBC query")
+
+
+options = ReadJdbcOptions()
+
+with beam.Pipeline(options=options) as p:
+    (
+        p
+        | "Read from JDBC"
+        >> ReadFromJdbc(
+            driverClassName=options.driverClassName,
+            url=options.url,
+            username=options.username,
+            password=options.password,
+            query=options.query,
+        )
+        | Map(logging.info)
+    )
+```
diff --git a/learning/prompts/documentation-lookup-nolinks/29_advanced_side_inputs.md b/learning/prompts/documentation-lookup-nolinks/29_advanced_side_inputs.md
new file mode 100644
index 000000000000..d857380b8202
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/29_advanced_side_inputs.md
@@ -0,0 +1,36 @@
+Prompt:
+What are side inputs in Apache Beam?
+
+Response:
+In Apache Beam, side inputs are additional inputs that can be provided to a `ParDo` transform, in addition to the main input `PCollection`. These side inputs can be used by the `ParDo` transform's `DoFn` to access and read additional data during the processing of each element in the main input `PCollection`.
+
+Side inputs are particularly useful when a Beam pipeline needs to ingest additional data dynamically at runtime when processing each element, rather than relying on preset or hard-coded values. By leveraging side inputs, Apache Beam can determine additional data based on the main `PCollection` data or even from a different branch of your pipeline. For example, you can leverage side inputs for your streaming analytics use cases, enriching streaming events with additional data retrieved from a lookup table.
+
+In the Apache Beam Python SDK, you can provide side inputs as extra arguments in the `DoFn`'s `process` method or the `Map`/`FlatMap` transforms. The Python SDK supports optional, positional, and keyword arguments.
+
+```python
+class MyDoFn(beam.DoFn):
+  def process(self, element, side_input):
+    ...
+```
+
+In the Apache Beam Java SDK, side inputs are provided to the `ParDo` transform by using the `.withSideInputs` method. To access a side input from within a `DoFn`, the `DoFn.ProcessContext.sideInput` method is invoked.
+
+```java
+PCollection<Integer> input = ...;
+PCollectionView<Integer> sideInput = ...;
+PCollection<Integer> output =
+    input.apply(ParDo.of(new DoFn<Integer, Integer>() {
+        @ProcessElement
+        public void processElement(ProcessContext c) {
+            Integer sideInputValue = c.sideInput(sideInput);
+            ...
+        }
+    }).withSideInputs(sideInput));
+```
+
+Side inputs can also be used to process windowed data. Apache Beam uses the windows for the main input elements to look up the appropriate windows for the side input elements. Apache Beam projects the main input’s window to the side input's window set and uses the side input from the resulting window. Main and side inputs might have identical or different windowing strategies.
+
+For example, if the main input `PCollection` is windowed into ten-minute windows, and the side input is windowed into one-hour windows, Apache Beam projects the main input window set and selects the value from the corresponding one-hour side input window. For more information, refer to the Apache Beam documentation on side inputs and windowing.
+
+For common patterns and code samples to implement side inputs in Apache Beam pipelines, refer to the Apache Beam documentation on side input patterns.
diff --git a/learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md b/learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md
new file mode 100644
index 000000000000..9c3feeda22d2
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/30_advanced_additional_outputs.md
@@ -0,0 +1,100 @@
+Prompt:
+What are additional outputs in Apache Beam?
+
+Response:
+In Apache Beam, additional outputs, also known as tagged or side outputs, refer to one or multiple extra `PCollection` objects produced by a single `ParDo` transform in addition to the main output `PCollection`. The `ParDo` transform has the flexibility to produce any number of additional output `PCollection` objects and return them bundled together with the main output `PCollection`.
+
+Additional outputs serve as a mechanism to implement pipeline branching. You can use them when there is a need to split the output of a single transform into several `PCollection` objects or produce outputs in different formats. Additional outputs become particularly beneficial when a transform’s computation per element is time-consuming because they enable transforms to process each element in the input `PCollection` just once.
+
+Producing additional outputs requires tagging each output `PCollection` with a unique identifier, which is then used to emit elements to the corresponding output.
+
+In the Apache Beam Java SDK, you can implement additional outputs by creating a `TupleTag` object to identify each collection produced by the `ParDo` transform. After specifying the `TupleTag` objects for each of the outputs, the tags are passed to the `ParDo` using the `.withOutputTags` method.
+
+The following Java code implements two additional output `PCollection` objects for string and integer values in addition to the main output `PCollection` of strings:
+
+```java
+// Input PCollection that contains strings.
+PCollection<String> input = ...;
+// Output tag for the main output PCollection of strings.
+final TupleTag<String> mainOutputTag = new TupleTag<String>() {};
+// Output tag for the additional output PCollection of strings.
+final TupleTag<String> additionalOutputTagString = new TupleTag<Integer>() {};
+// Output tag for the additional output PCollection of integers.
+final TupleTag<Integer> additionalOutputTagIntegers = new TupleTag<Integer>() {};
+
+PCollectionTuple results = input.apply(ParDo
+    .of(new DoFn<String, String>() {
+        // DoFn continues here.
+        ...
+    })
+    // Specify the tag for the main output.
+    .withOutputTags(mainOutputTag,
+        // Specify the tags for the two additional outputs as a TupleTagList.
+        TupleTagList.of(additionalOutputTagString)
+        .and(additionalOutputTagIntegers)));
+```
+
+The `processElement` method can emit elements to the main output or any additional output by invoking the output method on the `MultiOutputReceiver` object. The output method takes the tag of the output and the element to be emitted as arguments.
+
+```java
+public void processElement(@Element String word, MultiOutputReceiver out) {
+    if (condition
+        for main output) {
+        // Emit element to main output
+        out.get(mainOutputTag).output(word);
+    } else {
+        // Emit element to additional string output
+        out.get(additionalOutputTagString).output(word);
+    }
+    if (condition
+        for additional integer output) {
+        // Emit element to additional integer output
+        out.get(additionalOutputTagIntegers).output(word.length());
+    }
+}
+```
+
+In the Apache Beam Python SDK, you can implement additional outputs by invoking the `with_outputs()` method on the `ParDo` transform and specifying the expected tags for the multiple outputs.
+
+The following Python code demonstrates how to implement additional outputs for a `ParDo` transform that outputs two `PCollection` objects of strings and integers in addition to the main output `PCollection` of strings:
+
+```python
+class SplitLinesToWordsFn(beam.DoFn):
+    # These tags will be used to tag the outputs of this DoFn.
+    OUTPUT_TAG_SHORT_WORDS = "tag_short_words"
+    OUTPUT_TAG_CHARACTER_COUNT = "tag_character_count"
+
+    def process(self, element):
+        # yield a count (integer) to the OUTPUT_TAG_CHARACTER_COUNT tagged collection.
+        yield pvalue.TaggedOutput(self.OUTPUT_TAG_CHARACTER_COUNT, len(element))
+
+        words = re.findall(r"[A-Za-z\']+", element)
+        for word in words:
+            if len(word) <= 3:
+                # yield word as an output to the OUTPUT_TAG_SHORT_WORDS tagged collection.
+                yield pvalue.TaggedOutput(self.OUTPUT_TAG_SHORT_WORDS, word)
+            else:
+                # yield word to add it to the main collection.
+                yield word
+```
+
+The method returns a `DoOutputsTuple` object, with the specified tags serving as attributes that provide `ParDo` with access to the corresponding output `PCollection` objects.
+
+```python
+with beam.Pipeline(options=pipeline_options) as p:
+    lines = p | ReadFromText(known_args.input)
+
+    # with_outputs allows accessing the explicitly tagged outputs of a DoFn.
+    split_lines_result = lines | beam.ParDo(SplitLinesToWordsFn()).with_outputs(
+        SplitLinesToWordsFn.OUTPUT_TAG_SHORT_WORDS,
+        SplitLinesToWordsFn.OUTPUT_TAG_CHARACTER_COUNT,
+        main="words",
+    )
+
+    # split_lines_result is an object of type DoOutputsTuple
+    words, _, _ = split_lines_result
+    short_words = split_lines_result[SplitLinesToWordsFn.OUTPUT_TAG_SHORT_WORDS]
+    character_count = split_lines_result.tag_character_count
+```
+
+Refer to the multiple output `ParDo` example in the Apache Beam GitHub for the complete example code.
diff --git a/learning/prompts/documentation-lookup-nolinks/31_advanced_batched_dofn.md b/learning/prompts/documentation-lookup-nolinks/31_advanced_batched_dofn.md
new file mode 100644
index 000000000000..061ac084f8ca
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/31_advanced_batched_dofn.md
@@ -0,0 +1,39 @@
+Prompt:
+What is a batched `DoFn` in Apache Beam?
+
+Response:
+In Apache Beam, a batched `DoFn` is a specialized type of `DoFn` that operates on batches of logical elements within a `PCollection`. Unlike regular `DoFn` functions, which process each element individually, batched `DoFn` functions enable the efficient processing of multiple elements together as batches.
+
+Batched `DoFn` functions are currently only available in the Apache Beam Python SDK. They leverage powerful Python vectorized libraries, such as NumPy, SciPy, and pandas, which are designed for analysis and computing at scale. Batched `DoFn` functions are particularly useful when dealing with large amounts of data, allowing for parallel and optimized processing. Additionally, you can chain multiple batched `DoFn` functions together to create a more efficient data processing pipeline.
+
+To implement a batched `DoFn`, you define a `process_batch` method instead of the typical `process` method used in a regular `DoFn`. The `process_batch` method takes a batch of elements as input and produces a batch of elements as an output. It is important to note that batches must have a single set of timing properties (event time, window, etc.) for every logical element within the batch; batches cannot span multiple timestamps. For some `DoFn` functions, you can provide both a batched and an element-wise implementation of your desired logic by defining both `process` and `process_batch` methods.
+
+Here is a simple example of a flexible `DoFn` with both batched and element-wise implementations:
+
+```python
+class MultiplyByTen(beam.DoFn):
+  def process(self, element: np.int64) -> Iterator[np.int64]:
+    yield element * 10
+
+  def process_batch(self, batch: np.ndarray) -> Iterator[np.ndarray]:
+    yield batch * 10
+```
+
+In this example, when `MultiplyByTen` is applied to a `PCollection`, Apache Beam recognizes that `np.ndarray` is an acceptable batch type to use in conjunction with `np.int64` elements. The `np.int64` type represents the individual element. The `process` method multiplies an element by ten, yielding a single element. The `np.ndarray` type represents the batch. The `process_batch` method multiplies each element in the batch by ten, yielding a single batch. During pipeline execution, Apache Beam will automatically select the best implementation based on the context.
+
+By default, Apache Beam implicitly buffers elements and creates batches on the input side, then explodes batches back into individual elements on the output side. However, if batched `DoFn` functions with equivalent types are chained together, this batch creation and explosion process is skipped, and the batches are passed through for more efficient processing.
+
+Here is an example with chained `DoFn` functions of equivalent types:
+
+```python
+(p | beam.Create([1, 2, 3, 4]).with_output_types(np.int64)
+   | beam.ParDo(MultiplyByTen()) # Implicit buffering and batch creation
+   | beam.ParDo(MultiplyByTen()) # Batches passed through
+   | beam.ParDo(MultiplyByTen()))
+```
+
+In this example, the `PTransform.with_output_types` method sets the element-wise typehint for the output. Thus, when the `MultiplyByTen` class is applied to a `PCollection`, Apache Beam recognizes that `np.ndarray` is an acceptable batch type to use in conjunction with `np.int64` elements.
+
+The `process_batch` method is expected to produce batched outputs, while the `process` method should produce individual elements. You can customize this production logic with the `@beam.DoFn.yields_elements` and `@beam.DoFn.yields_batches` decorators.
+
+For more details, including advanced usage and code samples, you can refer to the Apache Beam documentation on batched `DoFn` types.
diff --git a/learning/prompts/documentation-lookup-nolinks/53_dataflow_runner.md b/learning/prompts/documentation-lookup-nolinks/53_dataflow_runner.md
new file mode 100644
index 000000000000..7a9990c93b15
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/53_dataflow_runner.md
@@ -0,0 +1,80 @@
+Prompt:
+How to execute Apache Beam pipelines with Cloud Dataflow?
+
+Response:
+Apache Beam supports multiple runners for executing pipelines on various platforms. One such runner is the Cloud Dataflow Runner, tailored for running pipelines on the Google Cloud Dataflow service. Cloud Dataflow offers fully managed and unified stream and batch data processing, boasting dynamic work rebalancing and built-in autoscaling capabilities.
+
+When you execute a pipeline on Cloud Dataflow, the Runner uploads your code and dependencies to a Cloud Storage bucket and creates a Dataflow job, which then executes your pipeline on managed resources within the Google Cloud Platform.
+
+To execute Apache Beam pipelines using the Cloud Dataflow Runner, follow these steps:
+
+***1. Setup Your Cloud Project and Resources:***
+
+Complete the steps outlined in the 'Before You Begin' section of the Cloud Dataflow quickstart for your chosen programming language:
+1. Select or create a Google Cloud Platform Console project.
+2. Enable billing for your project.
+3. Enable the required Google Cloud APIs, including Cloud Dataflow, Compute Engine, Stackdriver Logging, Cloud Storage, Cloud Storage JSON, and Cloud Resource Manager. Additional APIs may be necessary depending on your pipeline code.
+4. Authenticate with Google Cloud Platform.
+5. Install the Google Cloud SDK.
+6. Create a Cloud Storage bucket.
+
+***2. Specify Dependencies (Java Only):***
+
+When using the Apache Beam Java SDK, specify your dependency on the Cloud Dataflow Runner in the `pom.xml` file of your Java project directory.
+
+```java
+<dependency>
+  <groupId>org.apache.beam</groupId>
+  <artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
+  <version>2.54.0</version>
+  <scope>runtime</scope>
+</dependency>
+```
+
+Ensure that you include all necessary dependencies to create a self-contained application. In some cases, such as when starting a pipeline using a scheduler, you'll need to package a self-executing JAR by explicitly adding a dependency in the Project section of your `pom.xml` file. For more details about running self-executing JARs on Cloud Dataflow, refer to the 'Self-executing JAR' section in the Apache Beam documentation on Cloud Dataflow Runner.
+
+***3. Configure Pipeline Options:***
+
+Configure the execution details, including the runner (set to `dataflow` or `DataflowRunner`), Cloud project ID, region, and streaming mode, using the `GoogleCloudOptions` interface for Python or the `DataflowPipelineOptions` interface for Java.
+
+You can utilize pipeline options to control various aspects of how Cloud Dataflow executes your job. For instance, you can specify whether your pipeline runs on worker virtual machines, on the Cloud Dataflow service backend, or locally. For additional pipeline configuration options, refer to the reference documentation for the respective interface.
+
+***4. Run Your Pipeline on Cloud Dataflow:***
+
+Execute your pipeline on Cloud Dataflow using the appropriate command for your SDK.
+
+The following example code, taken from the Cloud Dataflow quickstarts for Java and Python, shows how to run the WordCount example pipeline on Dataflow.
+
+For the Apache Beam Java SDK, in your terminal, run the following command (from your `word-count-beam` directory):
+
+```java
+mvn -Pdataflow-runner compile exec:java \
+  -Dexec.mainClass=org.apache.beam.examples.WordCount \
+  -Dexec.args="--project=PROJECT_ID \
+  --gcpTempLocation=gs://BUCKET_NAME/temp/ \
+  --output=gs://BUCKET_NAME/output \
+  --runner=DataflowRunner \
+  --region=REGION
+```
+
+For the Apache Beam Python SDK, in your terminal, run the following command:
+
+```python
+python -m apache_beam.examples.wordcount \
+    --region DATAFLOW_REGION \
+    --input gs://dataflow-samples/shakespeare/kinglear.txt \
+    --output gs://STORAGE_BUCKET/results/outputs \
+    --runner DataflowRunner \
+    --project PROJECT_ID \
+    --temp_location gs://STORAGE_BUCKET/tmp/
+```
+
+Replace placeholders such as PROJECT_ID, BUCKET_NAME, and REGION with your Cloud project-specific details.
+
+To learn more about running pipelines on Cloud Dataflow, visit the Cloud Dataflow quickstart for your preferred programming language.
+
+***5. Monitor Your Cloud Dataflow Job:***
+
+Monitor the job's progress, view execution details, and receive updates on the pipeline's results using the Dataflow Monitoring Interface or the Dataflow Command-line Interface.
+
+For more information on the supported capabilities of the Cloud Dataflow Runner, refer to the Beam Capability Matrix in the Apache Beam documentation.
diff --git a/learning/prompts/documentation-lookup-nolinks/54_spark_runner.md b/learning/prompts/documentation-lookup-nolinks/54_spark_runner.md
new file mode 100644
index 000000000000..a692aa144eac
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/54_spark_runner.md
@@ -0,0 +1,148 @@
+Prompt:
+How to execute Apache Beam pipelines with Apache Spark?
+
+Response:
+Apache Beam supports multiple runners for executing pipelines on various platforms. One such runner is the Spark Runner, tailored for running pipelines using Apache Spark. The Spark Runner enables you to leverage the scalability and parallel processing capabilities of Apache Spark for your data processing tasks.
+
+Key features of the Spark Runner include:
+* Support for batch, streaming, and unified pipelines.
+* Offering the same fault-tolerance guarantees and security features as provided by Apache Spark.
+* Built-in metrics reporting using Apache Spark’s metrics system, which also reports Beam Aggregators.
+* Native support for Apache Beam side inputs via Apache Spark’s broadcast variables.
+
+There are three types of Spark Runners available:
+1. Legacy Spark Runner: supports Java (and other JVM-based languages) exclusively, based on Apache Spark’s RDD and DStream.
+2. Structured Streaming Spark Runner: supports Java (and other JVM-based languages) exclusively, based on Apache Spark's Datasets and Structured Streaming framework. Currently, it only supports batch mode with limited coverage of the Apache Beam model.
+3. Portable Spark Runner: supports Java, Python, and Go.
+
+For Java-based applications, consider using the Java-based runners, while for Python or Go pipelines, opt for the portable Runner.
+
+The Spark Runner can execute Spark pipelines similar to a native Spark application, allowing deployment as a self-contained application for local mode, running on Spark Standalone Resource Manager (RM), or using YARN or Mesos.
+
+To execute your Apache Beam pipeline on a Spark Standalone RM, follow these steps:
+
+***Java-based Non-portable Spark Runners (Java Only)***
+
+***1. Specify Dependencies:***
+
+In the `pom.xml` file of your Java project directory, specify your dependency on the latest version of the Spark Runner:
+
+```java
+<dependency>
+  <groupId>org.apache.beam</groupId>
+  <artifactId>beam-runners-spark-3</artifactId>
+  <version>2.54.0</version>
+</dependency>
+```
+
+***2. Deploy Spark with Your Application:***
+
+When running pipelines in a Spark Standalone mode, ensure that your self-contained application includes Spark dependencies explicitly in your `pom.xml` file:
+
+```java
+<dependency>
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-core_2.12</artifactId>
+  <version>${spark.version}</version>
+</dependency>
+
+<dependency>
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-streaming_2.12</artifactId>
+  <version>${spark.version}</version>
+</dependency>
+```
+
+Shade the application JAR using the Maven shade plugin and make sure the shaded JAR file is visible in the target directory by running `is target`.
+
+To run pipelines in a Spark Standalone mode using the legacy RDD/DStream-based Spark Runner, use the following command:
+
+```java
+spark-submit --class com.beam.examples.BeamPipeline --master spark://HOST:PORT target/beam-examples-1.0.0-shaded.jar --runner=SparkRunner
+```
+
+To run pipelines in a Spark Standalone mode using the Structured Streaming Spark Runner, run the following command:
+
+```java
+spark-submit --class com.beam.examples.BeamPipeline --master spark://HOST:PORT target/beam-examples-1.0.0-shaded.jar --runner=SparkStructuredStreamingRunner
+```
+
+***3. Configure Pipeline Options:***
+
+Set the runner option in your pipeline options to specify that you want to use the Spark Runner. In Java, you can do this as follows:
+
+```java
+SparkPipelineOptions options = PipelineOptionsFactory.as(SparkPipelineOptions.class);
+options.setRunner(SparkRunner.class);
+```
+
+For additional pipeline configuration options, refer to the Spark Runner documentation.
+
+***4. Run Your Pipeline:***
+
+In Java, you can use the `PipelineRunner` to run your pipeline:
+
+```java
+Pipeline p = Pipeline.create(options);
+// Add transforms to your pipeline
+p.run();
+```
+
+***5. Monitor Your Job:***
+
+Monitor the execution of your pipeline using the Apache Spark Web Interfaces, which provides information about tasks, stages, and overall progress. Access the Spark UI by navigating to the appropriate URL (usually `localhost:4040`). Metrics are also accessible via the Apache Beam REST API. Apache Spark offers a metrics system for reporting metrics to various sinks.
+
+***Portable Spark Runner (Python)***
+
+***1. Deploy Spark with Your Application:***
+
+You will need Docker installed in your execution environment. Pre-built Spark Job Service Docker images are available on Docker Hub.
+
+Start the JobService endpoint:
+
+```python
+docker run --net=host apache/beam_spark_job_server:latest
+```
+A Beam JobService is a central instance where you submit your Apache Beam pipeline. It needs to be provided with the Spark master address to create a job for execution on your Spark cluster.
+
+Submit the Python pipeline to this endpoint, providing Beam JobService with the Spark master address to execute the job on a Spark cluster:
+
+```python
+import apache_beam as beam
+from apache_beam.options.pipeline_options import PipelineOptions
+
+options = PipelineOptions([
+    "--runner=PortableRunner",
+    "--job_endpoint=localhost:8099", ​​# localhost:8099 is the default address of the JobService
+    "--environment_type=LOOPBACK"
+])
+with beam.Pipeline(options) as p:
+    ...
+```
+
+***2. Configure Pipeline Options:***
+
+Set the runner option in your pipeline options to specify that you want to use the Spark Runner. In Python, you can do this as follows:
+
+```python
+from apache_beam.options.pipeline_options import PipelineOptions
+options = PipelineOptions()
+options.view_as(SparkRunnerOptions).runner = 'SparkRunner'
+```
+
+For additional pipeline configuration options, refer to the Spark Runner documentation.
+
+***3. Run Your Pipeline:***
+
+In Python, you can use the `run()` method of your pipeline object to execute the pipeline:
+
+```python
+# Run your pipeline
+p.run()
+```
+
+***4. Monitor Your Job:***
+
+You can monitor the execution of your pipeline using the Apache Spark Web Interfaces, which provides information about tasks, stages, and overall progress. Access the Spark UI by navigating to the appropriate URL (usually `localhost:4040`) in your web browser. Spark metrics are not yet supported on the portable runner.
+
+For more details and alternative methods of deployment, check the Apache Beam documentation on Spark Runner capabilities.
diff --git a/learning/prompts/documentation-lookup-nolinks/55_direct_runner.md b/learning/prompts/documentation-lookup-nolinks/55_direct_runner.md
new file mode 100644
index 000000000000..8a28c543249d
--- /dev/null
+++ b/learning/prompts/documentation-lookup-nolinks/55_direct_runner.md
@@ -0,0 +1,68 @@
+Prompt:
+How to run Apache Beam pipelines locally?
+
+Response:
+You can run Apache Beam pipelines locally on your machine using the Apache Beam Direct Runner. It is designed for testing and development, ensuring that pipelines adhere closely to the Apache Beam model and are robust across different Beam runners. This option is suitable for testing and debugging pipelines, allowing quick iteration on code with immediate result visualization and unit testing using preferred local debugging tools.
+
+The Direct Runner performs additional checks to ensure users do not rely on semantics not guaranteed by the model. These checks include:
+* Enforcing immutability of elements.
+* Enforcing encodability of elements.
+* Ensuring elements are processed in an arbitrary order at all points.
+* Ensuring serialization of user functions (`DoFn`, `CombineFn`, etc.).
+
+To execute your Apache Beam pipeline locally using the Direct Runner, follow these steps:
+
+***1. Specify Dependencies (Java Only):***
+
+When using the Apache Beam Java SDK, specify your dependency on the Direct Runner in the `pom.xml` file of your Java project directory:
+
+```java
+<dependency>
+   <groupId>org.apache.beam</groupId>
+   <artifactId>beam-runners-direct-java</artifactId>
+   <version>2.54.0</version>
+   <scope>runtime</scope>
+</dependency>
+```
+
+Make sure to include all necessary dependencies to create a self-contained application and compile your Java code into a JAR file.
+
+***2. Configure Pipeline Options:***
+
+While you can configure your pipeline by creating a `PipelineOptions` object and setting fields directly, the Beam SDKs include a command-line parser for setting fields using command-line arguments.
+
+To read options from the command line, construct your `PipelineOptions` object as shown:
+
+Java SDK:
+
+```java
+PipelineOptions options =
+    PipelineOptionsFactory.fromArgs(args).withValidation().create();
+```
+
+Python SDK:
+
+```python
+from apache_beam.options.pipeline_options import PipelineOptions
+beam_options = PipelineOptions()
+```
+
+This method enables specifying options as command-line arguments in the format `--<option>=<value>`. Set the `runner` to `direct` or `DirectRunner`. Default values for other pipeline options are usually sufficient. For additional options, refer to the reference documentation for the `DirectOptions` interface for Python and Java.
+
+***3. Run the Pipeline:***
+
+Execute your pipeline on the Direct Runner from the command line.
+
+For Python SDK, use the `python -m` syntax to invoke the Python module containing your pipeline code:
+
+```python
+python -m your_module_name
+```
+
+For Java SDK, run your Java JAR file using the `java -jar` command:
+
+```java
+java -jar your-project.jar
+```
+
+For additional information and considerations, refer to the Apache Beam documentation on the Direct Runner.
diff --git a/learning/prompts/documentation-lookup/04_basic_pcollections.md b/learning/prompts/documentation-lookup/04_basic_pcollections.md
index 5fefa5873f5a..e2431c8e3e58 100644
--- a/learning/prompts/documentation-lookup/04_basic_pcollections.md
+++ b/learning/prompts/documentation-lookup/04_basic_pcollections.md
@@ -1,7 +1,7 @@
 Prompt:
-What is a PCollection in Apache Beam?
-Response:
+What is a `PCollection` in Apache Beam?
 
+Response:
 A `PCollection` in Apache Beam is an unordered bag of elements. Each `PCollection` is a potentially distributed, homogeneous data set or data stream, and is owned by the specific `Pipeline` object for which it is created. It’s the primary data structure used in Apache Beam pipelines to handle large-scale data processing in batch and streaming modes.
 
 The following example shows how to create a `PCollection` using the `Create` transform:
@@ -13,7 +13,7 @@ with beam.Pipeline() as pipeline:
   pcollection = pipeline | beam.Create([...])  # Create a PCollection
 ```
 
-A `PCollection` can either be bounded or unbounded, making it versatile for different [data source types](https://beam.apache.org/documentation/basics/#pcollection). Bounded `PCollections` represent a finite data set, such as files or databases, ideal for batch processing. Unbounded `PCollections`, on the other hand, represent data streams that continuously grow over time, such as real-time event logs, suitable for stream processing.
+A `PCollection` can either be bounded or unbounded, making it versatile for different [data source types](https://beam.apache.org/documentation/basics/#pcollection). Bounded `PCollection` objects represent a finite data set, such as files or databases, ideal for batch processing. Unbounded `PCollection` objects, on the other hand, represent data streams that continuously grow over time, such as real-time event logs, suitable for stream processing.
 
 Beam’s computational patterns and transforms are focused on situations where distributed data-parallel computation is required. Therefore, a `PCollection` has the following key characteristics:
    - All elements must be of the same type (with support of structured types).
diff --git a/learning/prompts/documentation-lookup/05_basic_ptransforms.md b/learning/prompts/documentation-lookup/05_basic_ptransforms.md
index fcdc6481d694..44f17e762801 100644
--- a/learning/prompts/documentation-lookup/05_basic_ptransforms.md
+++ b/learning/prompts/documentation-lookup/05_basic_ptransforms.md
@@ -1,7 +1,7 @@
 Prompt:
 What is a PTransform in Apache Beam?
-Response:
 
+Response:
 A [`PTransform`](https://beam.apache.org/documentation/programming-guide/#transforms) (or transform) represents a data processing operation, or a step, in a Beam pipeline. A transform is applied to zero or more `PCollection` objects and produces zero or more `PCollection` objects.
 
 Transforms have the following key characteristics:
@@ -31,4 +31,4 @@ def SomeUserCode(element):
 with beam.Pipeline() as pipeline:
     input_collection = pipeline | beam.Create([...])
     output_collection = input_collection | beam.ParDo(SomeUserCode())
-```
\ No newline at end of file
+```
diff --git a/learning/prompts/documentation-lookup/06_basic_schema.md b/learning/prompts/documentation-lookup/06_basic_schema.md
index cecbba174576..db53c04b8388 100644
--- a/learning/prompts/documentation-lookup/06_basic_schema.md
+++ b/learning/prompts/documentation-lookup/06_basic_schema.md
@@ -1,8 +1,8 @@
 Prompt:
-What are schemas in Apache Beam
-Response:
+What are schemas in Apache Beam?
 
- A [Schema in Apache Beam](https://beam.apache.org/documentation/programming-guide/#schemas) is a language-independent type definition for a `PCollection`. The schema defines elements of that `PCollection` as an ordered list of named fields.
+Response:
+A [Schema in Apache Beam](https://beam.apache.org/documentation/programming-guide/#schemas) is a language-independent type definition for a `PCollection`. The schema defines elements of that `PCollection` as an ordered list of named fields.
 
 In many cases, the element type in a `PCollection` has a structure that can be introspected. Some examples are JSON, Protocol Buffer, Avro, and database row objects. All of these formats can be converted to Beam Schemas.
 
@@ -22,4 +22,4 @@ The following list includes key capabilities provided by schema transforms:
 * type conversion
 * enhanced ParDo functionality
 
-For more information about schemas and for Beam best practices when working with structured data, see [Schema Usage Patterns](https://beam.apache.org/documentation/patterns/schema/).
\ No newline at end of file
+For more information about schemas and the Apache Beam best practices when working with structured data, see [Schema Usage Patterns](https://beam.apache.org/documentation/patterns/schema/).
diff --git a/learning/prompts/documentation-lookup/07_basic_runners.md b/learning/prompts/documentation-lookup/07_basic_runners.md
index 982c06a44151..7345eac102f0 100644
--- a/learning/prompts/documentation-lookup/07_basic_runners.md
+++ b/learning/prompts/documentation-lookup/07_basic_runners.md
@@ -1,11 +1,12 @@
 Prompt:
 What is a Runner in Apache Beam?
+
 Response:
-Apache Beam Runners are the execution engines that run the pipelines. They translate or adapt the pipeline into a form that can be executed on a massively parallel big data processing system, such as Apache Flink, Apache Spark, Google Cloud Dataflow, and more.
+Apache Beam runners are the execution engines that run the pipelines. They translate or adapt the pipeline into a form that can be executed on a massively parallel big data processing system, such as Apache Flink, Apache Spark, Google Cloud Dataflow, and more.
 
-Choosing a runner is an important step in the pipeline development process. The runner you choose determines where and how your pipeline runs. For more information about available runners and their capabilities, see the [capabilities matrix](https://beam.apache.org/documentation/runners/capability-matrix/).
+Choosing a runner is an important step in the pipeline development process. The runner you choose determines where and how your pipeline runs. For more information about available runners and their capabilities, see the [Beam Capability Matrix](https://beam.apache.org/documentation/runners/capability-matrix/).
 
-To specify a runner, when you execute the pipeline, use the `--runner` flag . For example, to run the WordCount pipeline on Google Cloud Dataflow, you would run the following command:
+To specify a runner, when you execute the pipeline, use the `--runner` flag. For example, to run the WordCount pipeline on Google Cloud Dataflow, you would run the following command:
 
 ```python
 python -m apache_beam.examples.wordcount \
@@ -16,8 +17,9 @@ python -m apache_beam.examples.wordcount \
   --project PROJECT_ID \
   --temp_location gs://STORAGE_BUCKET/tmp/
   ```
+
 For more details about the WordCount example, see the [WordCount source code in GitHub](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/wordcount.py).
 
 The [Direct Runner](https://beam.apache.org/documentation/runners/direct/) executes the pipeline on your local machine. It is helpful for testing and debugging purposes.
 
-For instructions about how to configure your development environment and runner, see the quickstarts for [Java](https://beam.apache.org/get-started/quickstart-java), [Python](https://beam.apache.org/get-started/quickstart-py) and [Go](https://beam.apache.org/get-started/quickstart-go).
\ No newline at end of file
+For instructions about how to configure your development environment and runner, see the quickstarts for [Java](https://beam.apache.org/get-started/quickstart-java), [Python](https://beam.apache.org/get-started/quickstart-py), and [Go](https://beam.apache.org/get-started/quickstart-go).
diff --git a/learning/prompts/documentation-lookup/08_basic_windowing.md b/learning/prompts/documentation-lookup/08_basic_windowing.md
index 7a9d7a100d21..8e116b2114c3 100644
--- a/learning/prompts/documentation-lookup/08_basic_windowing.md
+++ b/learning/prompts/documentation-lookup/08_basic_windowing.md
@@ -1,13 +1,15 @@
 Prompt:
-What is Windowing in Apache Beam?
+What is windowing in Apache Beam?
+
 Response:
-In stream processing, windowing lets you to divide data streams into logical units for efficient and correct parallel processing.
+In stream processing, windowing allows you to divide data streams into logical units for efficient and correct parallel processing.
 
 With an unbounded data set, collecting all of the elements is impossible, because new elements are constantly being added. In the Beam model, any `PCollection` (including unbounded `PCollection` objects) can be subdivided into [logical windows](https://beam.apache.org/documentation/programming-guide/#windowing-basics). Grouping transforms then consider the elements of each `PCollection` on a per-window basis.
 
-Because Beam's default windowing strategy is to assign each element to a single, global window, you must explicitly specify a [windowing function](https://beam.apache.org/documentation/programming-guide/#setting-your-pcollections-windowing-function) for your pipeline.
+Because Apache Beam's default windowing strategy is to assign each element to a single, global window, you must explicitly specify a [windowing function](https://beam.apache.org/documentation/programming-guide/#setting-your-pcollections-windowing-function) for your pipeline.
+
+The following code snippet shows how to divide a `PCollection` into 60-second windows:
 
-The following code snippet shows how  to divide a `PCollection` into 60-second windows:
 ```python
 from apache_beam import beam
 from apache_beam import window
@@ -22,8 +24,8 @@ Beam provides a number of [built-in windowing functions](https://beam.apache.org
 - Single Global Window
 - Calendar-based Windows (not supported by the Beam SDK for Python or Go)
 
-You can also create your own custom windowing function [WindowFn](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/transforms/window.py).
+You can also create your own custom windowing function [`WindowFn`](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/transforms/window.py).
 
-You also need to specify a [triggering strategy](https://beam.apache.org/documentation/programming-guide/#triggers) to determine when to emit the results of your pipeline’s windowed computations.
+You need to specify a [triggering strategy](https://beam.apache.org/documentation/programming-guide/#triggers) to determine when to emit the results of your pipeline’s windowed computations.
 
 You can adjust the windowing strategy to allow for [late data](https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data), which is data that arrives after the watermark has passed the end of the window. You can also specify how to handle late data, such as discarding or adding it to the next window.
diff --git a/learning/prompts/documentation-lookup/09_basic_triggers.md b/learning/prompts/documentation-lookup/09_basic_triggers.md
index 69d7b7bd9145..587effafa064 100644
--- a/learning/prompts/documentation-lookup/09_basic_triggers.md
+++ b/learning/prompts/documentation-lookup/09_basic_triggers.md
@@ -1,12 +1,12 @@
 Prompt:
-What are Triggers in Apache Beam?
+What is a trigger in Apache Beam?
+
 Response:
-Beam uses triggers to determine when to emit the aggregated results of each [window](https://beam.apache.org/documentation/programming-guide/#windowing), referred to as a pane.
+Apache Beam uses triggers to determine when to emit the aggregated results of each [window](https://beam.apache.org/documentation/programming-guide/#windowing), referred to as a pane.
 
 Triggers provide two additional capabilities compared to [outputting at the end of a window](https://beam.apache.org/documentation/programming-guide/#default-trigger):
-
-- Allowing early results to be output before the end of the window
-- Allowing late data to be handled after the end of the window
+- Allowing early results to be output before the end of the window.
+- Allowing late data to be handled after the end of the window.
 
 This feature lets you control the flow of your data and balance between completeness, latency, and cost.
 
@@ -21,12 +21,10 @@ You set the triggers for a `PCollection` by setting the `trigger` parameter of t
 
 When a trigger fires, it emits the current contents of the window as a pane. Because a trigger can fire multiple times, the accumulation mode determines whether the system accumulates the window panes as the trigger fires, or discards them. This behavior is controlled by the [window accumulation mode](https://beam.apache.org/documentation/programming-guide/#window-accumulation-modes) parameter of the `WindowInto` transform.
 
-
 Beam provides several [built-in triggers](https://beam.apache.org/documentation/programming-guide/#triggers) that you can use to determine when to emit the results of your pipeline's windowed computations:
 - Event time triggers
 - Processing time triggers
 - Data-driven triggers
 - Composite triggers
 
-One of the most useful trigger patterns is the [AfterWatermark](https://beam.apache.org/documentation/programming-guide/#composite-afterwatermark) trigger, which fires a single time when Beam estimates that all the data has arrived, such as when the watermark passes the end of the window.
-
+One of the most useful trigger patterns is the [`AfterWatermark`](https://beam.apache.org/documentation/programming-guide/#composite-afterwatermark) trigger, which fires a single time when Apache Beam estimates that all the data has arrived, such as when the watermark passes the end of the window.
diff --git a/learning/prompts/documentation-lookup/10_basic_metrics.md b/learning/prompts/documentation-lookup/10_basic_metrics.md
index 5899819acbf0..9b33569c4848 100644
--- a/learning/prompts/documentation-lookup/10_basic_metrics.md
+++ b/learning/prompts/documentation-lookup/10_basic_metrics.md
@@ -1,7 +1,8 @@
 Prompt:
-What are Metrics in Apache Beam?
+What are the metrics in Apache Beam?
+
 Response:
-[Metrics](https://beam.apache.org/documentation/programming-guide/#metrics) in Apache Beam provides a way to get insight into the current state of your pipeline, including during pipeline execution.
+In the Apache Beam model, [metrics](https://beam.apache.org/documentation/programming-guide/#metrics) provide insights into the current state of your pipeline, including during pipeline execution.
 
 Metrics are named and scoped to a specific step in the pipeline. They can be created dynamically during pipeline execution. If a runner doesn't support some part of reporting metrics, the fallback behavior is to drop the metric updates rather than fail the pipeline.
 
@@ -11,6 +12,7 @@ Beam provides a number of [built-in metric types](https://beam.apache.org/docume
 * Gauges
 
 To declare a metric, use the `beam.metrics.Metrics` class. For example:
+
 ```python
 self.words_counter = Metrics.counter(self.__class__, 'words')
 self.word_lengths_counter = Metrics.counter(self.__class__, 'word_lengths')
@@ -20,5 +22,4 @@ self.empty_line_counter = Metrics.counter(self.__class__, 'empty_lines')
 
 For implementation details, see the [WordCount example with metrics](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/wordcount_with_metrics.py).
 
-
-You can export metrics to external sinks. Spark and Flink runners support REST HTTP and Graphite.
\ No newline at end of file
+You can export metrics to external sinks. Spark and Flink runners support REST HTTP and Graphite.
diff --git a/learning/prompts/documentation-lookup/11_basic_state.md b/learning/prompts/documentation-lookup/11_basic_state.md
index a4bf757a11b4..c5235da2e17d 100644
--- a/learning/prompts/documentation-lookup/11_basic_state.md
+++ b/learning/prompts/documentation-lookup/11_basic_state.md
@@ -1,20 +1,21 @@
 Prompt:
-What is a State in Apache Beam?
+What is a state in Apache Beam?
+
 Response:
-[State](https://beam.apache.org/documentation/basics/#state-and-timers) in Apache Beam is an API that allows for managing per-key state in element-wise operations like [ParDo](https://beam.apache.org/documentation/programming-guide/#pardo) or Map. It enables fine-grained control over aggregations, particularly in situations where [windowing](https://beam.apache.org/documentation/programming-guide/#windowing) and [triggers](https://beam.apache.org/documentation/programming-guide/#triggers) are insufficient.
+[State](https://beam.apache.org/documentation/basics/#state-and-timers) in Apache Beam is an API that allows for managing per-key state in element-wise operations like [`ParDo`](https://beam.apache.org/documentation/programming-guide/#pardo) or `Map`. It enables fine-grained control over aggregations, particularly in situations where [windowing](https://beam.apache.org/documentation/programming-guide/#windowing) and [triggers](https://beam.apache.org/documentation/programming-guide/#triggers) are insufficient.
 
 State is [scoped](https://beam.apache.org/documentation/programming-guide/#state-and-timers) to the current processing key within a window, meaning each key has its own state. State is particularly useful for complex aggregations across multiple elements.
 
-Beam provides several [types of state](https://beam.apache.org/documentation/programming-guide/#types-of-state) that can be used for different use cases, including scalar and collection values:
-- ValueState
-- BagState
-- SetState
-- MapState
-- OrderedListState
-- CombiningState
+Apache Beam provides several [types of state](https://beam.apache.org/documentation/programming-guide/#types-of-state) that can be used for different use cases, including scalar and collection values:
+- `ValueState`
+- `BagState`
+- `SetState`
+- `MapState`
+- `OrderedListState`
+- `CombiningState`
 
-The per-key state needs to be garbage collected, because the increasing size of the state can negatively impact performance. For more information about common garbage collection strategies, see [Garbage collecting state](https://beam.apache.org/documentation/programming-guide/#garbage-collecting-state) .
+The per-key state needs to be garbage-collected because the increasing size of the state can negatively impact performance. For more information about common garbage collection strategies, see [Garbage collecting state](https://beam.apache.org/documentation/programming-guide/#garbage-collecting-state).
 
 For more information, read the community blog post about [Stateful Processing](https://beam.apache.org/blog/stateful-processing/).
 
-For additional examples of using State in Apache Beam see [State and Time examples](https://beam.apache.org/documentation/programming-guide/#state-timers-examples).
\ No newline at end of file
+For additional examples of using state in Apache Beam, see [State and Timers examples](https://beam.apache.org/documentation/programming-guide/#state-timers-examples).
diff --git a/learning/prompts/documentation-lookup/12_basic_timers.md b/learning/prompts/documentation-lookup/12_basic_timers.md
index 7a4e85bdf594..cd59f603138e 100644
--- a/learning/prompts/documentation-lookup/12_basic_timers.md
+++ b/learning/prompts/documentation-lookup/12_basic_timers.md
@@ -1,11 +1,12 @@
 Prompt:
-What is a Timer in Apache Beam?
+What is a timer in Apache Beam?
+
 Response:
-In Apache Beam, a [Timer](https://beam.apache.org/documentation/basics/#state-and-timers) is a per-key timer callback API that enables delayed processing of data stored using the [State API](https://beam.apache.org/documentation/programming-guide/#state-and-timers)
+In Apache Beam, a [timer](https://beam.apache.org/documentation/basics/#state-and-timers) is a per-key timer callback API that enables delayed processing of data stored using the [State API](https://beam.apache.org/documentation/programming-guide/#state-and-timers).
 
 Apache Beam provides two [types of timers](https://beam.apache.org/documentation/programming-guide/#timers): processing time timers and event time timers. Processing time timers are based on the system clock. Event time timers are based on the timestamps of the data elements.
 
-Beam also supports dynamically setting a timer tag using `TimerMap` in the Java SDK. This feature lets you set multiple different timers in a `DoFn`. You can set the timer tags to be chosen dynamically, for example, based on data in the input elements.
+Apache Beam also supports dynamically setting a timer tag using `TimerMap` in the Java SDK. This feature lets you set multiple different timers in a `DoFn`. You can set the timer tags to be chosen dynamically, for example, based on data in the input elements.
 
 For more information, see the community blog post about [Timely Processing](https://beam.apache.org/blog/timely-processing/).
 
diff --git a/learning/prompts/documentation-lookup/13_advanced_splittable_dofn.md b/learning/prompts/documentation-lookup/13_advanced_splittable_dofn.md
index 41822a931fbf..228127bf812f 100644
--- a/learning/prompts/documentation-lookup/13_advanced_splittable_dofn.md
+++ b/learning/prompts/documentation-lookup/13_advanced_splittable_dofn.md
@@ -1,18 +1,18 @@
 Prompt:
-What is Splittable DoFn in Apache Beam?
+What is a splittable `DoFn` in Apache Beam?
+
 Response:
-A splittable `DoFn` (SDF) is a generalization of [`DoFn`](https://beam.apache.org/documentation/programming-guide/#pardo) that lets you process elements in a non-monolithic way. A splittable `DoFn` makes it easier to create complex, modular I/O connectors in Beam .
+A splittable `DoFn` (SDF) is a generalization of [`DoFn`](https://beam.apache.org/documentation/programming-guide/#pardo) that lets you process elements in a non-monolithic way. A splittable `DoFn` makes it easier to create complex, modular I/O connectors in Apache Beam.
 When you apply a splittable `DoFn` to an element, the runner can split the element’s processing into smaller tasks. You can checkpoint the processing of an element, and you can split the remaining work to yield additional parallelism.
 
 At a high level, an SDF is responsible for processing element and restriction pairs. A restriction represents a subset of work that would have been necessary to have been done when processing the element.
 
-Executing a [Splittable `DoFn`](https://beam.apache.org/documentation/programming-guide/#splittable-dofns) uses the following steps:
-1. Each element is paired with a restriction. For example, filename is paired with offset range representing the whole file).
+Executing a [splittable `DoFn`](https://beam.apache.org/documentation/programming-guide/#splittable-dofns) uses the following steps:
+1. Each element is paired with a restriction. For example, filename is paired with offset range representing the whole file.
 2. Each element and restriction pair is split. For example, offset ranges are broken up into smaller pieces.
 3. The runner redistributes the element and restriction pairs to several workers.
 4. Element and restriction pairs are processed in parallel. For example, the file is read. Within this last step, the element and restriction pair can pause its own processing or be split into further element and restriction pairs.
 
-For an example, see the [Splittable DoFn module](https://tour.beam.apache.org/tour/python/splittable-dofn/splittable) in the Tour of Beam.
+For an example, see the [Splittable `DoFn` module](https://tour.beam.apache.org/tour/python/splittable-dofn/splittable) in the Tour of Beam.
 
 For more information, see the [community blog post](https://beam.apache.org/blog/splittable-do-fn-is-available/).
-
diff --git a/learning/prompts/documentation-lookup/14_advanced_pipeline_patterns.md b/learning/prompts/documentation-lookup/14_advanced_pipeline_patterns.md
index f8cfaa86accd..b347b965761a 100644
--- a/learning/prompts/documentation-lookup/14_advanced_pipeline_patterns.md
+++ b/learning/prompts/documentation-lookup/14_advanced_pipeline_patterns.md
@@ -1,7 +1,8 @@
 Prompt:
 What pipeline patterns exist in Apache Beam?
+
 Response:
-Beam pipeline patterns are a set of best practices for building Beam pipelines. They are based on real-world Beam deployments and are designed to help you build resilient, flexible, and portable Beam pipelines.
+Beam pipeline patterns are a set of best practices for building Apache Beam pipelines. They are based on real-world Beam deployments and are designed to help you build resilient, flexible, and portable pipelines.
 
 The following list includes some of the most common pipeline patterns:
 - [File processing patterns](https://beam.apache.org/documentation/patterns/file-processing/)
@@ -10,4 +11,4 @@ The following list includes some of the most common pipeline patterns:
 - [Windowing patterns](https://beam.apache.org/documentation/patterns/custom-windows/)
 - [State and Timers patterns](https://beam.apache.org/documentation/patterns/grouping-elements-for-efficient-external-service-calls/)
 
-For more information, see the [Pipeline Patterns](https://beam.apache.org/documentation/patterns/overview/) section of the Beam programming guide.
+For more information, see the [Pipeline Patterns](https://beam.apache.org/documentation/patterns/overview/) section of the Beam Programming Guide.
diff --git a/learning/prompts/documentation-lookup/15_advanced_xlang.md b/learning/prompts/documentation-lookup/15_advanced_xlang.md
index f7df6785fabd..6f35a43d320b 100644
--- a/learning/prompts/documentation-lookup/15_advanced_xlang.md
+++ b/learning/prompts/documentation-lookup/15_advanced_xlang.md
@@ -1,15 +1,13 @@
 Prompt:
 What is a multi-language pipeline in Apache Beam?
+
 Response:
-Beam lets you combine transforms written in any supported SDK language (currently, [Java](https://beam.apache.org/documentation/programming-guide/#1311-creating-cross-language-java-transforms) and [Python](https://beam.apache.org/documentation/programming-guide/#1312-creating-cross-language-python-transforms)) and use them in one multi-language pipeline. For example, a multi-language pipeline might read from a Python source, process the data using a Java transform, and write the data to a Python sink.
+Apache Beam lets you combine transforms written in any supported SDK language (currently, [Java](https://beam.apache.org/documentation/programming-guide/#1311-creating-cross-language-java-transforms) and [Python](https://beam.apache.org/documentation/programming-guide/#1312-creating-cross-language-python-transforms)) and use them in one multi-language pipeline. For example, a multi-language pipeline might read from a Python source, process the data using a Java transform, and write the data to a Python sink.
 
 For example, you can use the [Apache Kafka connector](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/io/kafka.py) and [SQL transform](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/transforms/sql.py) from the Java SDK in Python pipelines.
 
 To learn how to create a multi-language pipeline, see the quickstart examples for [Java](https://beam.apache.org/documentation/sdks/java-multi-language-pipelines) and [Python](https://beam.apache.org/documentation/sdks/python-multi-language-pipelines).
 
-Depending on the SDK language of the pipeline, you can use a high-level SDK-wrapper class or a low-level transform class to access a cross-language transform. For more information, see [Using cross-language transforms](https://beam.apache.org/documentation/programming-guide/#use-x-lang-transforms).
-
-To develop a cross-language transform, you need to define a Uniform Resourse Name(URN) for registering the transform with an expansion service. For more information, see [Defining a URN](https://beam.apache.org/documentation/programming-guide/#1314-defining-a-urn).
-
-
+Depending on the SDK language of the pipeline, you can use a high-level SDK wrapper class or a low-level transform class to access a cross-language transform. For more information, see [Using cross-language transforms](https://beam.apache.org/documentation/programming-guide/#use-x-lang-transforms).
 
+To develop a cross-language transform, you need to define a Uniform Resource Name(URN) for registering the transform with an expansion service. For more information, see [Defining a URN](https://beam.apache.org/documentation/programming-guide/#1314-defining-a-urn).
diff --git a/learning/prompts/documentation-lookup/16_advanced_pipeline_lifecycle.md b/learning/prompts/documentation-lookup/16_advanced_pipeline_lifecycle.md
index c59a177c2525..3189483739a9 100644
--- a/learning/prompts/documentation-lookup/16_advanced_pipeline_lifecycle.md
+++ b/learning/prompts/documentation-lookup/16_advanced_pipeline_lifecycle.md
@@ -1,9 +1,8 @@
 Prompt:
 What is a pipeline development lifecycle in Apache Beam?
-Response:
 
+Response:
 The Apache Beam pipeline development lifecycle is an iterative process that usually involves the following steps:
-
 - Design your pipeline.
 - Develop your pipeline code.
 - Test your pipeline.
@@ -19,7 +18,6 @@ To design a pipeline, you need answers to the following questions:
 
 Beam documentation has more information about [pipeline design](https://beam.apache.org/documentation/pipelines/design-your-pipeline/) and about common [pipeline patterns](https://beam.apache.org/documentation/patterns/overview/).
 
-
 An Apache Beam program expresses a data processing pipeline, from start to finish. To construct a pipeline using the classes in the Apache Beam SDKs, your program needs to perform the following steps:
 
 - Create a `Pipeline` object.
@@ -30,7 +28,6 @@ An Apache Beam program expresses a data processing pipeline, from start to finis
 
 The Apache Beam documentation has more information about [developing](https://beam.apache.org/documentation/programming-guide/) and [executing](https://beam.apache.org/documentation/pipelines/create-your-pipeline/) pipelines.
 
-Testing pipelines is a particularly important step in developing an effective data processing solution. The indirect nature of the Beam model, in which your user code constructs a pipeline graph to be executed remotely, can make debugging-failed runs difficult. For more information about pipeline testing strategies, see [Test Your Pipeline](https://beam.apache.org/documentation/pipelines/test-your-pipeline/).
+Testing pipelines is a particularly important step in developing an effective data processing solution. The indirect nature of the Beam model, in which your user code constructs a pipeline graph to be executed remotely, can make debugging failed runs difficult. For more information about pipeline testing strategies, see [Test Your Pipeline](https://beam.apache.org/documentation/pipelines/test-your-pipeline/).
 
-Choosing a [runner](https://beam.apache.org/documentation/#choosing-a-runner) is a crucial step in deploying your pipeline. The runner you choose determines where and how your pipeline executes.
-For more information about pipeline deployment, see [Container environments](https://beam.apache.org/documentation/runtime/environments/).
+Choosing a [runner](https://beam.apache.org/documentation/#choosing-a-runner) is a crucial step in deploying your pipeline. The runner you choose determines where and how your pipeline executes. For more information about pipeline deployment, see [Container environments](https://beam.apache.org/documentation/runtime/environments/).
diff --git a/learning/prompts/documentation-lookup/17_advanced_ai_ml.md b/learning/prompts/documentation-lookup/17_advanced_ai_ml.md
index f758c7e5c278..09475ad796d2 100644
--- a/learning/prompts/documentation-lookup/17_advanced_ai_ml.md
+++ b/learning/prompts/documentation-lookup/17_advanced_ai_ml.md
@@ -1,5 +1,6 @@
 Prompt:
-What are AI and ML capabilities in Apache Beam?
+What are the AI and ML capabilities in Apache Beam?
+
 Response:
 Apache Beam has several built-in [AI and ML capabilities](https://beam.apache.org/documentation/ml/overview/) that enable you to:
 - Process large datasets for both preprocessing and model inference.
@@ -10,7 +11,7 @@ For common AI platform integration patterns in Apache Beam, see [AI Platform int
 
 The recommended way to implement inference in Apache Beam is by using the [RunInference API](https://beam.apache.org/documentation/sdks/python-machine-learning/). For more information about how to use RunInference for PyTorch, scikit-learn, and TensorFlow, see the [Use RunInference in Apache Beam](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch_tensorflow_sklearn.ipynb) example in GitHub.
 
-Using pre-trained models in Apache Beam is also supported with [PyTorch](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch.ipynb), [Scikit-learn](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_sklearn.ipynb), and [Tensorflow](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_tensorflow.ipynb). Running inference on  [custom models](https://beam.apache.org/documentation/ml/about-ml/#use-custom-models) is also supported.
+Using pre-trained models in Apache Beam is also supported with [PyTorch](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_pytorch.ipynb), [scikit-learn](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_sklearn.ipynb), and [TensorFlow](https://github.com/apache/beam/blob/master/examples/notebooks/beam-ml/run_inference_tensorflow.ipynb). Running inference on  [custom models](https://beam.apache.org/documentation/ml/about-ml/#use-custom-models) is also supported.
 
 Apache Beam also supports automatically updating the model being used with the `RunInference PTransform` in streaming pipelines without stopping the pipeline. The feature lets you avoid downtime downtime. For more information, see [Automatic model refresh](https://beam.apache.org/documentation/ml/about-ml/#automatic-model-refresh).
 For more information about using machine learning models with Apache Beam, see [Running ML models now easier with new Dataflow ML innovations on Apache Beam](https://cloud.google.com/blog/products/ai-machine-learning/dataflow-ml-innovations-on-apache-beam/).
diff --git a/learning/prompts/documentation-lookup/18_io_kafka.md b/learning/prompts/documentation-lookup/18_io_kafka.md
index 28c22508109d..b5727fa8c6ff 100644
--- a/learning/prompts/documentation-lookup/18_io_kafka.md
+++ b/learning/prompts/documentation-lookup/18_io_kafka.md
@@ -3,15 +3,16 @@ Is Apache Kafka supported in Apache Beam?
 
 Response:
 [Apache Kafka](https://kafka.apache.org/) is a distributed streaming platform that is used for building real-time data pipelines and streaming applications. It is horizontally scalable, fault-tolerant, and fast.
-Apache Beam has a built-in KafkaIO connector for Apache Kafka that lets you read data from and write data to Kafka. Apache Kafka is supported in both batch pipelines and streaming pipelines, and as a source and a sink. For more information, see the KafkaIO connector documentation:
+
+Apache Beam has a built-in KafkaIO connector for Apache Kafka that enables Apache Beam pipelines to read data from and write data to Kafka. Apache Kafka is supported in both batch and streaming pipelines and can serve as both a source and a sink. For more information, refer to the KafkaIO connector documentation in the following languages:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/kafka/KafkaIO.html)
 * [Python (using cross-language transforms)](https://beam.apache.org/releases/pydoc/current/apache_beam.io.kafka.html)
 * [Go (using cross-language transforms)](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/kafkaio)
 * [Typescript (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/kafka.ts)
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Apache Kafka and Apache Beam.
+To get started with Apache Kafka and Apache Beam, refer to the [Dataflow Cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
 
-Apache Beam pipeline code for reading data from a Kafka topic might look like the following example:
+Here is an example of Apache Beam pipeline code for reading data from a Kafka topic:
 
 ```python
 with beam.Pipeline(options=options) as p:
@@ -21,7 +22,7 @@ with beam.Pipeline(options=options) as p:
                         with_metadata=False)
                         | "Log Data" >> Map(logging.info))
 ```
-This code reads data from a Kafka topic `my_topic` on `localhost:9092` and logs the data to the console.
 
-The Apache Beam repository has a [KafkaIO example](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/kafkataxi/kafka_taxi.py) that demonstrates how to use the KafkaIO connector.
+This code reads data from a Kafka topic `my_topic` on `localhost:9092` and logs the data to the console.
 
+For a detailed demonstration of using the KafkaIO connector, refer to the [KafkaIO example](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/examples/kafkataxi/kafka_taxi.py) in the Apache Beam GitHub repository.
diff --git a/learning/prompts/documentation-lookup/19_io_pubsub.md b/learning/prompts/documentation-lookup/19_io_pubsub.md
index c1447459a830..e6f2d99d8cfc 100644
--- a/learning/prompts/documentation-lookup/19_io_pubsub.md
+++ b/learning/prompts/documentation-lookup/19_io_pubsub.md
@@ -1,19 +1,18 @@
 Prompt:
 Is Pub/Sub supported in Apache Beam?
-Response:
-[Pub/Sub](https://cloud.google.com/pubsub) is a [Google Cloud](https://cloud.google.com/) service that provides a simple, reliable, scalable, and secure real-time messaging service for sending and receiving messages between independent applications. Apache Beam provides a PubSubIO connector that allows you to read and write messages from and to Pub/Sub.
-Pub/Sub is currently supported only in streaming pipelines.
 
+Response:
+[Pub/Sub](https://cloud.google.com/pubsub) is a [Google Cloud](https://cloud.google.com/) service that provides a simple, reliable, scalable, and secure real-time messaging service for sending and receiving messages between independent applications. Apache Beam provides a PubSubIO connector that allows you to read and write messages from and to Pub/Sub. Pub/Sub is currently supported only in streaming pipelines.
 
 Pub/Sub is supported in the following Beam SDKs:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.html)
 * [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.pubsub.html)
 * [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/pubsubio)
-* [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/pubsub.ts) through X Language.
+* [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/pubsub.ts) (through X Language)
 
-The [Dataflow Cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) will help you to get started with Pub/Sub and Apache Beam. See here for [read](https://github.com/GoogleCloudPlatform/dataflow-cookbook/blob/main/Python/pubsub/read_pubsub_multiple.py) and [write](https://github.com/GoogleCloudPlatform/dataflow-cookbook/blob/main/Python/pubsub/write_pubsub.py) examples in Python.
+To get started with Pub/Sub and Apache Beam, refer to the [Dataflow Cookbook GitHub repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook). The repository provides Python code examples that [read](https://github.com/GoogleCloudPlatform/dataflow-cookbook/blob/main/Python/pubsub/read_pubsub_multiple.py) and [write](https://github.com/GoogleCloudPlatform/dataflow-cookbook/blob/main/Python/pubsub/write_pubsub.py) data from and to Pub/Sub.
 
-Apache Beam pipeline code for reading data from Pub/Sub topic and writing it back to another topic might look like the following example:
+Here is an example of the Apache Beam pipeline code for reading data from a Pub/Sub topic and writing it to another topic:
 
 ```python
 from apache_beam.io import ReadFromPubSub
@@ -23,5 +22,3 @@ from apache_beam.io import WriteToPubSub
     (p | "Read from Pub/Sub" >> ReadFromPubSub(topic="input_topic")
        | "Write to Pub/Sub" >> WriteToPubSub(topic="output_topic"))
 ```
-
-
diff --git a/learning/prompts/documentation-lookup/20_io_biguery.md b/learning/prompts/documentation-lookup/20_io_biguery.md
index 5e29f17590b5..a9df1ddb8464 100644
--- a/learning/prompts/documentation-lookup/20_io_biguery.md
+++ b/learning/prompts/documentation-lookup/20_io_biguery.md
@@ -1,17 +1,18 @@
 Prompt:
 Is BigQuery supported in Apache Beam?
+
 Response:
-Yes, Apache Beam supports BigQuery. [BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse offered by [Google Cloud](https://cloud.google.com/). Apache Beam provides a `BigQueryIO` connector to read and write data to and from BigQuery. The `BigQueryIO` connector supports both batch and streaming pipelines.
+Yes, Apache Beam supports BigQuery. [BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse offered by [Google Cloud](https://cloud.google.com/). Apache Beam provides a BigQueryIO connector to read and write data from and to BigQuery. The BigQueryIO connector supports both batch and streaming pipelines.
 
-The following Apache Beam SDKs support the `BigQueryIO` connector:
+The following Apache Beam SDKs support the BigQueryIO connector:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.html)
 * [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.bigquery.html)
-* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigqueryio) native and though [X Language](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigqueryio)
-* [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/bigqueryio.ts) through X Language
+* [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigqueryio) (native and through [X Language](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigqueryio))
+* [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/bigqueryio.ts) (through X Language)
 
-To read data from BigQuery, use `ReadFromBigQuery`. Apache Beam can read data directly from a BigQuery table or using a SQL query. The default mode is to return table rows read from a BigQuery source as dictionaries. Built-in `TableRow` objects can also be returned.
+To read data from BigQuery, use the `ReadFromBigQuery` function. Apache Beam can read data directly from a BigQuery table or using an SQL query. The default mode is to return table rows read from a BigQuery source as dictionaries. Built-in `TableRow` objects can also be returned.
 
-Apache Beam pipeline code for reading from BigQuery might look like the following example:
+Here is an example of Apache Beam pipeline code for reading from BigQuery:
 
 ```python
 from apache_beam.io.gcp.bigquery import ReadFromBigQuery
@@ -21,9 +22,9 @@ with beam.Pipeline(options=options) as p:
     lines_table = p | 'Read' >> ReadFromBigQuery(table=table)
   # read from a query
     lines_query = p | 'Read' >> ReadFromBigQuery(query="SELECT * FROM table")
-
 ```
-Apache Beam pipeline code for writing from BigQuery might look like the following example:
+
+Here is an example of Apache Beam pipeline code for writing to BigQuery:
 
 ```python
 from apache_beam.io.gcp.bigquery import WriteToBigQuery
@@ -36,4 +37,5 @@ with beam.Pipeline(options=options) as p:
         create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
         write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND)
 ```
+
 To get started with BigQuery and Apache Beam, see the examples in the [Dataflow Cookbook](https://github.com/GoogleCloudPlatform/dataflow-cookbook) GitHub repository. For Python read examples, see the [read_table_bigquery.py](https://github.com/GoogleCloudPlatform/dataflow-cookbook/blob/main/Python/bigquery/read_table_bigquery.py) example. For Python write examples, see the [write_bigquery.py](https://github.com/GoogleCloudPlatform/dataflow-cookbook/blob/main/Python/bigquery/write_bigquery.py) example.
diff --git a/learning/prompts/documentation-lookup/21_io_bigtable.md b/learning/prompts/documentation-lookup/21_io_bigtable.md
index 60f564256f4b..29c5def98ef8 100644
--- a/learning/prompts/documentation-lookup/21_io_bigtable.md
+++ b/learning/prompts/documentation-lookup/21_io_bigtable.md
@@ -1,5 +1,5 @@
 Prompt:
-Is Cloud BigTable supported in Apache Beam?
+Is Cloud Bigtable supported in Apache Beam?
 
 Response:
 [Cloud Bigtable](https://cloud.google.com/bigtable) is a [Google Cloud](https://cloud.google.com/) service that provides enterprise-grade NoSQL database service with single-digit millisecond latency, limitless scale, and 99.999% availability for large analytical and operational workloads.
@@ -11,9 +11,9 @@ Apache Beam has a built-in connector for Cloud Bigtable that lets you read data
 * [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/bigtableio)
 * [Go (using cross-language transforms)](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/bigtableio)
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Cloud Bigtable and Apache Beam.
+To get started with Cloud Bigtable and Apache Beam, refer to the [Dataflow Cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
 
-Apache Beam pipeline code for reading data from a Bigtable table might look like the following example:
+Here is an example of Apache Beam pipeline code for reading data from a Bigtable table:
 
 ```python
 with beam.Pipeline(options=options) as p:
@@ -25,4 +25,5 @@ output = (p | "Read from Bigtable" >> ReadFromBigtable(
                     | "Extract cells" >> beam.Map(lambda row: row._cells)
                     | "Log Data" >> Map(logging.info))
 ```
-For performance benchmarks of the Bigtable IO connector, see [Bigtable performance](https://beam.apache.org/performance/bigtable/).
\ No newline at end of file
+
+For performance benchmarks of the Bigtable IO connector, see [Bigtable performance](https://beam.apache.org/performance/bigtable/).
diff --git a/learning/prompts/documentation-lookup/22_io_spanner.md b/learning/prompts/documentation-lookup/22_io_spanner.md
index b7a4cfb18aad..3b6ec417d847 100644
--- a/learning/prompts/documentation-lookup/22_io_spanner.md
+++ b/learning/prompts/documentation-lookup/22_io_spanner.md
@@ -4,14 +4,14 @@ Is Cloud Spanner supported in Apache Beam?
 Response:
 [Cloud Spanner](https://cloud.google.com/spanner) is a [Google Cloud](https://cloud.google.com/) service that provides a fully managed, mission-critical, relational database service that offers transactional consistency at global scale, schemas, SQL (ANSI 2011 with extensions), and automatic, synchronous replication for high availability.
 
-Apache Beam has a built-in SpannerIO connector for Cloud Spanner that lets you read data from and write data to Cloud Spanner. Cloud Spanner is supported in both batch pipelines and streaming pipelines, and as a source and a sink. For more information, see the SpannerIO connector documentation:
+Apache Beam has a built-in SpannerIO connector for Cloud Spanner that enables Apache Beam pipelines to read data from and write data to Cloud Spanner. Cloud Spanner is supported in both batch pipelines and streaming pipelines and can serve both as a source and a sink. For more information, see the SpannerIO connector documentation:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.html)
 * [Python (using cross-language transforms)](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.spanner.html)
 * [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/spannerio)
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Cloud Spanner and Apache Beam.
+To get started with Cloud Spanner and Apache Beam, refer to the [Dataflow Cookbook GitHub repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
 
-Apache Beam pipeline code for reading data from a Cloud Spanner table using a SQL query might look like the following example:
+Here is an example of Apache Beam pipeline code for reading data from a Cloud Spanner table using an SQL query:
 
 ```python
 class ExampleRow(NamedTuple):
diff --git a/learning/prompts/documentation-lookup/23_io_gcs.md b/learning/prompts/documentation-lookup/23_io_gcs.md
index e67d6fb02ccf..c9e7dde62415 100644
--- a/learning/prompts/documentation-lookup/23_io_gcs.md
+++ b/learning/prompts/documentation-lookup/23_io_gcs.md
@@ -1,23 +1,22 @@
 Prompt:
-Is Google Cloud Storage file system supported in Apache Beam?
+Is the Google Cloud Storage file system supported in Apache Beam?
 
 Response:
 [Google Cloud Storage](https://cloud.google.com/storage) is an object storage service for storing and accessing data on Google Cloud. Apache Beam supports both working with the Google Cloud Storage file system and reading data from and writing data to Google Cloud Storage buckets.
 
-Apache Beam has a built-in TextIO connector for reading data from and writing data to Google Cloud Storage buckets. For more information, see the TextIO connector documentation:
+Apache Beam has a built-in TextIO connector for reading data from and writing data to Google Cloud Storage buckets. For more information, see the TextIO connector documentation for your programming language of choice:
 * [Java connector](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/TextIO.html)
 * [Python connector](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html)
 * [Go connector](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio)
 * [Typescript connector (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/textio.ts)
 
-Reading and writing data in different file formats in Google Cloud Storage buckets is also supported in the AvroIO, XMLIO, TFRecordIO, and ParquetIO connectors.
+The Apache Beam AvroIO, XMLIO, TFRecordIO, and ParquetIO connectors also support reading data from and writing data to different file formats within Google Cloud Storage buckets.
 
-Specify the Google Cloud Storage path using the following format: `gs://<bucket>/<path>`. For example, `gs://my-bucket/my-file.txt`.
-You can use wildcards in read transforms and write transforms to read data from and write data to multiple files. For example, `gs://my-bucket/my-files-*.txt`.
+You can specify the Google Cloud Storage file path using the following format: `gs://<bucket>/<path>`. For example, `gs://my-bucket/my-file.txt`.
 
-Apache Beam also has support for the Google Cloud Storage file system that lets you work with the Google Cloud Storage file system directly. For example, you can use the Google Cloud Storage file system to check whether a file exists, list files in a directory, or delete a file. The Google Cloud Storage file system is supported in the following languages:
+You can use wildcards in read and write transforms to process data from multiple files or write data to multiple files simultaneously. For instance, you can specify `gs://my-bucket/my-files-*.txt` to read or write to files with names matching the specified pattern.
+
+Apache Beam also allows pipelines to interact directly with the Google Cloud Storage file system. For instance, you can use this file system to verify the existence of a file, retrieve a list of files in a directory, or delete a file. The Google Cloud Storage file system is supported in the following languages:
 * [Java file system](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/extensions/gcp/storage/GcsFileSystemRegistrar.html)
 * [Python file system](https://beam.apache.org/releases/pydoc/current/apache_beam.io.gcp.gcsfilesystem.html)
 * [Go file system](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/filesystem/gcs)
-
-
diff --git a/learning/prompts/documentation-lookup/24_io_json.md b/learning/prompts/documentation-lookup/24_io_json.md
index b1c2d89d7429..49f5628b5994 100644
--- a/learning/prompts/documentation-lookup/24_io_json.md
+++ b/learning/prompts/documentation-lookup/24_io_json.md
@@ -2,15 +2,15 @@ Prompt:
 Is JSON supported in Apache Beam?
 
 Response:
-[JSON](https://www.json.org/) is a data format for storing and exchanging data. Apache Beam supports reading and writing JSON files using the `ReadFromJson` and `WriteToJson` transforms in the `TextIO` module. For more information, see the TextIO connector documentation:
+[JSON](https://www.json.org/) is a data format for storing and exchanging data. Apache Beam supports reading data from and writing data to JSON files using the `ReadFromJson` and `WriteToJson` transforms within the `TextIO` module. For more information, see the TextIO connector documentation in the following languages:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/TextIO.html)
 * [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.textio.html)
 * [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio)
 * [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/textio.ts)
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with JSON and Apache Beam.
+To get started with JSON and Apache Beam, refer to the [Dataflow Cookbook GitHub repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
 
-Apache Beam pipeline code for reading data from a JSON file might look like the following example:
+Here is an example of Apache Beam pipeline code for reading data from a JSON file:
 
 ```python
 class JsonOptions(PipelineOptions):
@@ -29,4 +29,4 @@ with beam.Pipeline(options=options) as p:
         lines=False
         )
         | "Log Data" >> Map(logging.info))
-```
\ No newline at end of file
+```
diff --git a/learning/prompts/documentation-lookup/25_io_csv.md b/learning/prompts/documentation-lookup/25_io_csv.md
index 8803fa28c799..3ce5ddd2b5bb 100644
--- a/learning/prompts/documentation-lookup/25_io_csv.md
+++ b/learning/prompts/documentation-lookup/25_io_csv.md
@@ -8,9 +8,10 @@ Response:
 * [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio)
 * [Typescript](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/textio.ts)
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with CSV and Apache Beam.
+To get started with CSV and Apache Beam, refer to the [Dataflow Cookbook GitHub repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
+
+Here's an example of Apache Beam pipeline code for reading data from a CSV file:
 
-Apache Beam pipeline code for reading data from a CSV file might look like the following example:
 ```python
 class CsvOptions(PipelineOptions):
     @classmethod
@@ -29,4 +30,4 @@ with beam.Pipeline(options=options) as p:
         )
         | "Log Data" >> Map(logging.info))
 
-```
\ No newline at end of file
+```
diff --git a/learning/prompts/documentation-lookup/26_io_avro.md b/learning/prompts/documentation-lookup/26_io_avro.md
index 4161c47710c8..a5199eadb38b 100644
--- a/learning/prompts/documentation-lookup/26_io_avro.md
+++ b/learning/prompts/documentation-lookup/26_io_avro.md
@@ -2,17 +2,17 @@ Prompt:
 Is Apache Avro format supported in Apache Beam?
 
 Response:
-[Apache Avro](https://avro.apache.org/) is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Avro files using the `ReadFromAvro` and `WriteToAvro` transforms in the `AvroIO` module. For more information, see the AvroIO connector documentation:
+[Apache Avro](https://avro.apache.org/) is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Avro files using the `ReadFromAvro` and `WriteToAvro` transforms in the `AvroIO` module. For more information, see the AvroIO connector documentation for the following languages:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/extensions/avro/io/AvroIO.html)
 * [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.avroio.html)
 * [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/avroio)
 * [Typescript (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/avroio.ts)
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Avro and Apache Beam.
+To get started with Avro and Apache Beam, refer to the [Dataflow Cookbook GitHub repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
 
-Apache Beam pipeline code for reading data from an Avro file might look like the following example:
-```python
+Here is an example of Apache Beam pipeline code for reading data from an Avro file:
 
+```python
 class ReadAvroOptions(PipelineOptions):
 
     @classmethod
diff --git a/learning/prompts/documentation-lookup/27_io_parquet.md b/learning/prompts/documentation-lookup/27_io_parquet.md
index 34ec68924218..543569b110f9 100644
--- a/learning/prompts/documentation-lookup/27_io_parquet.md
+++ b/learning/prompts/documentation-lookup/27_io_parquet.md
@@ -2,16 +2,16 @@ Prompt:
 Is Apache Parquet format supported in Apache Beam?
 
 Response:
-[Apache Parquet](https://parquet.apache.org/) is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Parquet files using the `ReadFromParquet` and `WriteToParquet` transforms in the `ParquetIO` module. For more information, see the ParquetIO connector documentation:
+[Apache Parquet](https://parquet.apache.org/) is a data format for storing and exchanging data. Apache Beam supports reading from and writing to Parquet files using the `ReadFromParquet` and `WriteToParquet` transforms in the `ParquetIO` module. For more information, see the ParquetIO connector documentation for the following languages:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/parquet/ParquetIO.html)
 * [Python](https://beam.apache.org/releases/pydoc/current/apache_beam.io.parquetio.html)
 * [Go](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/parquetio)
 * [Typescript (using cross-language transforms)](https://github.com/apache/beam/blob/master/sdks/typescript/src/apache_beam/io/parquetio.ts)
 
+To get started with Avro and Apache Beam, refer to the [Dataflow Cookbook GitHub repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with Avro and Apache Beam.
+Here is an example of Apache Beam pipeline code for reading data from a Parquet file:
 
-Apache Beam pipeline code for reading data from a Parquet file might look like the following example:
 ```python
 class ReadParquetOptions(PipelineOptions):
 
diff --git a/learning/prompts/documentation-lookup/28_io_jdbc.md b/learning/prompts/documentation-lookup/28_io_jdbc.md
index 13d6502c5bc8..0a67d144fc8e 100644
--- a/learning/prompts/documentation-lookup/28_io_jdbc.md
+++ b/learning/prompts/documentation-lookup/28_io_jdbc.md
@@ -2,20 +2,20 @@ Prompt:
 Is Oracle Java Database Connectivity (JDBC) supported in Apache Beam?
 
 Response:
-The [Oracle Java Database Connectivity (JDBC) API](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) is a Java API for connecting to relational databases. Apache Beam supports reading and writing data with JDBC using the `ReadFromJdbc` and `WriteToJdbc` transforms in the `JdbcIO` module. For more information, see the JdbcIO connector documentation:
+The [Oracle Java Database Connectivity (JDBC) API](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) is a Java API for connecting to relational databases. Apache Beam supports reading and writing data with JDBC using the `ReadFromJdbc` and `WriteToJdbc` transforms in the `JdbcIO` module. For more information, see the JdbcIO connector documentation for the following languages:
 * [Java](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/jdbc/JdbcIO.html)
 * [Python (using cross-language transforms)](https://beam.apache.org/releases/pydoc/current/apache_beam.io.jdbc.html)
 * [Go (using cross-language transforms)](https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/jdbcio)
 
-The [Dataflow cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook) can help you to get started with JDBC and Apache Beam.
+To get started with JDBC and Apache Beam, refer to the [Dataflow Cookbook repository](https://github.com/GoogleCloudPlatform/dataflow-cookbook).
+
+Here is an example of Apache Beam pipeline code for reading data with JDBC:
 
-Apache Beam pipeline code for reading data with JDBC might look like the following example:
 ```python
 class ExampleRow(NamedTuple):
     id: int
     name: str
 
-
 class ReadJdbcOptions(PipelineOptions):
 
     @classmethod
@@ -49,4 +49,3 @@ with beam.Pipeline(options=options) as p:
       query=options.query)
      | Map(logging.info))
 ```
-
diff --git a/learning/prompts/documentation-lookup/30_advanced_additional_outputs.md b/learning/prompts/documentation-lookup/30_advanced_additional_outputs.md
index 9ca5a9e64105..840286553dd5 100644
--- a/learning/prompts/documentation-lookup/30_advanced_additional_outputs.md
+++ b/learning/prompts/documentation-lookup/30_advanced_additional_outputs.md
@@ -2,15 +2,15 @@ Prompt:
 What are additional outputs in Apache Beam?
 
 Response:
-In Apache Beam, additional outputs, also known as tagged or side outputs, refer to one or multiple extra [`PCollection`s](https://beam.apache.org/documentation/basics/#pcollection) produced by a single [`ParDo`](https://beam.apache.org/documentation/programming-guide/#pardo) transform in addition to the main output `PCollection`. The `ParDo` transform has the flexibility to produce any number of additional output `PCollection`s and return them bundled together with the main output `PCollection`.
+In Apache Beam, additional outputs, also known as tagged or side outputs, refer to one or multiple extra [`PCollection`](https://beam.apache.org/documentation/basics/#pcollection) objects produced by a single [`ParDo`](https://beam.apache.org/documentation/programming-guide/#pardo) transform in addition to the main output `PCollection`. The `ParDo` transform has the flexibility to produce any number of additional output `PCollection` objects and return them bundled together with the main output `PCollection`.
 
-Additional outputs serve as a mechanism to implement [pipeline branching](https://beam.apache.org/documentation/pipelines/design-your-pipeline/#branching-pcollections). You can use them when there is a need to split the output of a single transform into several `PCollection`s or produce outputs in different formats. Additional outputs become particularly beneficial when a transform’s computation per element is time-consuming because they enable transforms to process each element in the input `PCollection` just once.
+Additional outputs serve as a mechanism to implement [pipeline branching](https://beam.apache.org/documentation/pipelines/design-your-pipeline/#branching-pcollections). You can use them when there is a need to split the output of a single transform into several `PCollection` objects or produce outputs in different formats. Additional outputs become particularly beneficial when a transform’s computation per element is time-consuming because they enable transforms to process each element in the input `PCollection` just once.
 
 Producing additional outputs requires [tagging](https://beam.apache.org/documentation/programming-guide/#output-tags) each output `PCollection` with a unique identifier, which is then used to [emit](https://beam.apache.org/documentation/programming-guide/#multiple-outputs-dofn) elements to the corresponding output.
 
-In the Apache Beam Java SDK, you can implement additional outputs by creating a `TupleTag` object to identify each collection produced by the `ParDo` transform. After specifying the `TupleTag`s for each of the outputs, the tags are passed to the `ParDo` using the `.withOutputTags` method. You can find a sample Apache Beam Java pipeline that applies one transform to output two `PCollection`s in the [Branching `PCollection`s](https://beam.apache.org/documentation/pipelines/design-your-pipeline/#a-single-transform-that-produces-multiple-outputs) section in the Apache Beam documentation.
+In the Apache Beam Java SDK, you can implement additional outputs by creating a `TupleTag` object to identify each collection produced by the `ParDo` transform. After specifying the `TupleTag` objects for each of the outputs, the tags are passed to the `ParDo` using the `.withOutputTags` method. You can find a sample Apache Beam Java pipeline that applies one transform to output two `PCollection` objects in the [Branching `PCollection`s](https://beam.apache.org/documentation/pipelines/design-your-pipeline/#a-single-transform-that-produces-multiple-outputs) section in the Apache Beam documentation.
 
-The following Java code implements two additional output `PCollection`s for string and integer values in addition to the main output `PCollection` of strings:
+The following Java code implements two additional output `PCollection` objects for string and integer values in addition to the main output `PCollection` of strings:
 
 ```java
 // Input PCollection that contains strings.
@@ -55,7 +55,7 @@ public void processElement(@Element String word, MultiOutputReceiver out) {
 
 In the Apache Beam Python SDK, you can implement additional outputs by invoking the `with_outputs()` method on the `ParDo` and specifying the expected tags for the multiple outputs.
 
-The following Python code demonstrates how to implement additional outputs for a `ParDo` transform that outputs two `PCollection`s of strings and integers in addition to the main output `PCollection` of strings:
+The following Python code demonstrates how to implement additional outputs for a `ParDo` transform that outputs two `PCollection` objects of strings and integers in addition to the main output `PCollection` of strings:
 
 ```python
 class SplitLinesToWordsFn(beam.DoFn):
@@ -78,7 +78,7 @@ class SplitLinesToWordsFn(beam.DoFn):
         yield word
 ```
 
-The method returns a `DoOutputsTuple` object, with the specified tags serving as attributes that provide `ParDo` with access to the corresponding output `PCollection`s.
+The method returns a `DoOutputsTuple` object, with the specified tags serving as attributes that provide `ParDo` with access to the corresponding output `PCollection` objects.
 
 ```python
 with beam.Pipeline(options=pipeline_options) as p:
diff --git a/learning/prompts/documentation-lookup/31_advanced_batched_dofn.md b/learning/prompts/documentation-lookup/31_advanced_batched_dofn.md
index 0828096815f5..73b697a7206d 100644
--- a/learning/prompts/documentation-lookup/31_advanced_batched_dofn.md
+++ b/learning/prompts/documentation-lookup/31_advanced_batched_dofn.md
@@ -1,18 +1,17 @@
 Prompt:
-What is a batched 'DoFn' in Apache Beam?
+What is a batched `DoFn` in Apache Beam?
 
 Response:
-In Apache Beam, a [batched ‘DoFn’](https://beam.apache.org/documentation/programming-guide/#batched-dofns) is a specialized type of [‘DoFn’](https://beam.apache.org/documentation/programming-guide/#pardo) that operates on batches of logical elements within a [‘PCollection’](https://beam.apache.org/documentation/basics/#pcollection). Unlike regular ‘DoFn’s, which process each element individually, batched ‘DoFn’s enable the efficient processing of multiple elements together as batches.
+In Apache Beam, a [batched `DoFn`](https://beam.apache.org/documentation/programming-guide/#batched-dofns) is a specialized type of [`DoFn`](https://beam.apache.org/documentation/programming-guide/#pardo) that operates on batches of logical elements within a [`PCollection`](https://beam.apache.org/documentation/basics/#pcollection). Unlike regular `DoFn` functions, which process each element individually, batched `DoFn` functions enable the efficient processing of multiple elements together as batches.
 
-Batched `DoFns` are currently only available in the Apache Beam Python SDK. They leverage powerful Python vectorized libraries, such as [NumPy](https://numpy.org/), [SciPy](https://scipy.org/), and [pandas](https://pandas.pydata.org/), which are designed for analysis and computing at scale.
-Batched 'DoFn's are particularly useful when dealing with large amounts of data, allowing for parallel and optimized processing. Additionally, you can chain multiple batched ‘DoFn’s together to create a more efficient data processing pipeline.
+Batched `DoFn` functions are currently only available in the Apache Beam Python SDK. They leverage powerful Python vectorized libraries, such as [NumPy](https://numpy.org/), [SciPy](https://scipy.org/), and [pandas](https://pandas.pydata.org/), which are designed for analysis and computing at scale.
 
-To implement a batched ‘DoFn’, you define a [‘process_batch’](https://beam.apache.org/releases/pydoc/current/_modules/apache_beam/transforms/core.html#DoFn.process_batch) method instead of the typical [‘process’](https://beam.apache.org/releases/pydoc/current/_modules/apache_beam/transforms/core.html#DoFn.process) method used in regular ‘DoFn’s. The ‘process_batch’ method takes a batch of elements as input and produces a batch of elements as an output. It is important to note that batches must have a single set of timing properties (event time, window, etc.) for every logical element within the batch; batches cannot span multiple timestamps.
-For some ‘DoFn’s, you can provide both a batched and an element-wise implementation of your desired logic by defining both ‘process’ and ‘process_batch’ methods.
+Batched `DoFn` functions are particularly useful when dealing with large amounts of data, allowing for parallel and optimized processing. Additionally, you can chain multiple batched `DoFn` functions together to create a more efficient data processing pipeline.
 
-Here is a simple example of a flexible ‘DoFn’ with both batched and element-wise implementations:
+To implement a batched `DoFn`, you define a [`process_batch`](https://beam.apache.org/releases/pydoc/current/_modules/apache_beam/transforms/core.html#DoFn.process_batch) method instead of the typical [`process`](https://beam.apache.org/releases/pydoc/current/_modules/apache_beam/transforms/core.html#DoFn.process) method used in a regular `DoFn`. The `process_batch` method takes a batch of elements as input and produces a batch of elements as an output. It is important to note that batches must have a single set of timing properties (event time, window, etc.) for every logical element within the batch; batches cannot span multiple timestamps.
+For some `DoFn` functions, you can provide both a batched and an element-wise implementation of your desired logic by defining both `process` and `process_batch` methods.
 
-In example below, when `MultiplyByTen` is applied to a `PCollection`, Beam recognizes that `np.ndarray` is an acceptable batch type to use in conjunction with `np.int64` elements.
+Here is a simple example of a flexible `DoFn` with both batched and element-wise implementations:
 
 ```python
 class MultiplyByTen(beam.DoFn):
@@ -23,11 +22,11 @@ class MultiplyByTen(beam.DoFn):
     yield batch * 10
 ```
 
-In this example, the ‘np.int64’ type represents the individual element. The 'process' method multiplies an element by ten, yielding a single element. The ‘np.ndarray’ type represents the batch. The 'process_batch' method multiplies each element in the batch by ten, yielding a single batch. During pipeline execution, Apache Beam will automatically select the best implementation based on the context.
+In this example, when `MultiplyByTen` is applied to a `PCollection`, Apache Beam recognizes that `np.ndarray` is an acceptable batch type to use in conjunction with `np.int64` elements. The `np.int64` type represents the individual element. The `process` method multiplies an element by ten, yielding a single element. The `np.ndarray` type represents the batch. The `process_batch` method multiplies each element in the batch by ten, yielding a single batch. During pipeline execution, Apache Beam will automatically select the best implementation based on the context.
 
-By default, Apache Beam implicitly buffers elements and creates batches on the input side, then explodes batches back into individual elements on the output side. However, if batched 'DoFn's with equivalent types are chained together, this batch creation and explosion process is skipped, and the batches are passed through for more efficient processing.
+By default, Apache Beam implicitly buffers elements and creates batches on the input side, then explodes batches back into individual elements on the output side. However, if batched `DoFn` functions with equivalent types are chained together, this batch creation and explosion process is skipped, and the batches are passed through for more efficient processing.
 
-Here’s an example with chained ‘DoFn’s of equivalent types:
+Here is an example with chained `DoFn` functions of equivalent types:
 
 ```python
 (p | beam.Create([1, 2, 3, 4]).with_output_types(np.int64)
@@ -36,8 +35,8 @@ Here’s an example with chained ‘DoFn’s of equivalent types:
    | beam.ParDo(MultiplyByTen()))
 ```
 
-In this example, the ‘PTransform.with_output_types’ method sets the element-wise typehint for the output. Thus, when the `MultiplyByTen` class is applied to a `PCollection`, Apache Beam recognizes that `np.ndarray` is an acceptable batch type to use in conjunction with `np.int64` elements.
+In this example, the `PTransform.with_output_types` method sets the element-wise typehint for the output. Thus, when the `MultiplyByTen` class is applied to a `PCollection`, Apache Beam recognizes that `np.ndarray` is an acceptable batch type to use in conjunction with `np.int64` elements.
 
-The ‘process_batch’ method is expected to produce batched outputs, while the ‘process’ method should produce individual elements. You can customize this production logic with the ‘@beam.DoFn.yields_elements’ and ‘@beam.DoFn.yields_batches` decorators.
+The `process_batch` method is expected to produce batched outputs, while the `process` method should produce individual elements. You can customize this production logic with the `@beam.DoFn.yields_elements` and `@beam.DoFn.yields_batches` decorators.
 
-For more details, including advanced usage and code samples, you can refer to the [Apache Beam documentation on batched ‘DoFn’s](https://beam.apache.org/documentation/programming-guide/#batched-dofn-types).
\ No newline at end of file
+For more details, including advanced usage and code samples, you can refer to the [Apache Beam documentation on batched `DoFn` type](https://beam.apache.org/documentation/programming-guide/#batched-dofn-types).

From f39a179fbf32dd57f425e73027fcb595a6595572 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 Apr 2024 17:25:04 -0400
Subject: [PATCH 07/18] Bump github.com/aws/smithy-go from 1.20.1 to 1.20.2 in
 /sdks (#30883)

Bumps [github.com/aws/smithy-go](https://github.com/aws/smithy-go) from 1.20.1 to 1.20.2.
- [Release notes](https://github.com/aws/smithy-go/releases)
- [Changelog](https://github.com/aws/smithy-go/blob/main/CHANGELOG.md)
- [Commits](https://github.com/aws/smithy-go/compare/v1.20.1...v1.20.2)

---
updated-dependencies:
- dependency-name: github.com/aws/smithy-go
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 sdks/go.mod | 2 +-
 sdks/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdks/go.mod b/sdks/go.mod
index 05565b4df479..65d244b265cc 100644
--- a/sdks/go.mod
+++ b/sdks/go.mod
@@ -35,7 +35,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/credentials v1.17.8
 	github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.13.8
 	github.com/aws/aws-sdk-go-v2/service/s3 v1.42.2
-	github.com/aws/smithy-go v1.20.1
+	github.com/aws/smithy-go v1.20.2
 	github.com/docker/go-connections v0.5.0
 	github.com/dustin/go-humanize v1.0.1
 	github.com/go-sql-driver/mysql v1.8.0
diff --git a/sdks/go.sum b/sdks/go.sum
index d15eeac85893..4a51032d86b2 100644
--- a/sdks/go.sum
+++ b/sdks/go.sum
@@ -128,8 +128,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.6.0/go.mod h1:q7o0j7d7HrJk/vr9uUt3BV
 github.com/aws/aws-sdk-go-v2/service/sts v1.28.5 h1:J/PpTf/hllOjx8Xu9DMflff3FajfLxqM5+tepvVXmxg=
 github.com/aws/aws-sdk-go-v2/service/sts v1.28.5/go.mod h1:0ih0Z83YDH/QeQ6Ori2yGE2XvWYv/Xm+cZc01LC6oK0=
 github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=
-github.com/aws/smithy-go v1.20.1 h1:4SZlSlMr36UEqC7XOyRVb27XMeZubNcBNN+9IgEPIQw=
-github.com/aws/smithy-go v1.20.1/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E=
+github.com/aws/smithy-go v1.20.2 h1:tbp628ireGtzcHDDmLT/6ADHidqnwgF57XOXZe6tp4Q=
+github.com/aws/smithy-go v1.20.2/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E=
 github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
 github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=

From 812e98fac243bab2a88f6ea5fad6147ff8e54a97 Mon Sep 17 00:00:00 2001
From: Yi Hu <yathu@google.com>
Date: Tue, 9 Apr 2024 00:26:44 -0400
Subject: [PATCH 08/18] Bump GCP-BOM to 26.36.0 (#30868)

* Bump GCP-BOM to 26.36.0

* Fix test

* Fix test

* Fix analyzeClassesDependencies
---
 .../beam/gradle/BeamModulePlugin.groovy       | 20 +++++++++----------
 scripts/tools/bomupgrader.py                  |  2 +-
 .../license_scripts/dep_urls_java.yaml        |  2 +-
 .../gcp/bigtable/BigtableServiceImplTest.java |  2 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index d4a163256413..1a0164a0e53a 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -601,16 +601,16 @@ class BeamModulePlugin implements Plugin<Project> {
     def dbcp2_version = "2.9.0"
     def errorprone_version = "2.10.0"
     // [bomupgrader] determined by: com.google.api:gax, consistent with: google_cloud_platform_libraries_bom
-    def gax_version = "2.42.0"
+    def gax_version = "2.46.1"
     def google_ads_version = "26.0.0"
     def google_clients_version = "2.0.0"
     def google_cloud_bigdataoss_version = "2.2.16"
     // [bomupgrader] determined by: com.google.cloud:google-cloud-spanner, consistent with: google_cloud_platform_libraries_bom
-    def google_cloud_spanner_version = "6.58.0"
+    def google_cloud_spanner_version = "6.62.0"
     def google_code_gson_version = "2.10.1"
     def google_oauth_clients_version = "1.34.1"
     // [bomupgrader] determined by: io.grpc:grpc-netty, consistent with: google_cloud_platform_libraries_bom
-    def grpc_version = "1.61.0"
+    def grpc_version = "1.62.2"
     def guava_version = "32.1.2-jre"
     def hadoop_version = "2.10.2"
     def hamcrest_version = "2.1"
@@ -640,7 +640,7 @@ class BeamModulePlugin implements Plugin<Project> {
     def spotbugs_version = "4.0.6"
     def testcontainers_version = "1.17.3"
     // [bomupgrader] determined by: org.apache.arrow:arrow-memory-core, consistent with: google_cloud_platform_libraries_bom
-    def arrow_version = "15.0.0"
+    def arrow_version = "15.0.1"
     def jmh_version = "1.34"
     def jupiter_version = "5.7.0"
 
@@ -730,12 +730,12 @@ class BeamModulePlugin implements Plugin<Project> {
         google_api_client_jackson2                  : "com.google.api-client:google-api-client-jackson2:$google_clients_version",
         google_api_client_java6                     : "com.google.api-client:google-api-client-java6:$google_clients_version",
         google_api_common                           : "com.google.api:api-common", // google_cloud_platform_libraries_bom sets version
-        google_api_services_bigquery                : "com.google.apis:google-api-services-bigquery:v2-rev20240124-2.0.0",  // [bomupgrader] sets version
-        google_api_services_cloudresourcemanager    : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20240128-2.0.0",  // [bomupgrader] sets version
+        google_api_services_bigquery                : "com.google.apis:google-api-services-bigquery:v2-rev20240229-2.0.0",  // [bomupgrader] sets version
+        google_api_services_cloudresourcemanager    : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20240310-2.0.0",  // [bomupgrader] sets version
         google_api_services_dataflow                : "com.google.apis:google-api-services-dataflow:v1b3-rev20240218-$google_clients_version",
         google_api_services_healthcare              : "com.google.apis:google-api-services-healthcare:v1-rev20240130-$google_clients_version",
         google_api_services_pubsub                  : "com.google.apis:google-api-services-pubsub:v1-rev20220904-$google_clients_version",
-        google_api_services_storage                 : "com.google.apis:google-api-services-storage:v1-rev20240205-2.0.0",  // [bomupgrader] sets version
+        google_api_services_storage                 : "com.google.apis:google-api-services-storage:v1-rev20240311-2.0.0",  // [bomupgrader] sets version
         google_auth_library_credentials             : "com.google.auth:google-auth-library-credentials", // google_cloud_platform_libraries_bom sets version
         google_auth_library_oauth2_http             : "com.google.auth:google-auth-library-oauth2-http", // google_cloud_platform_libraries_bom sets version
         google_cloud_bigquery                       : "com.google.cloud:google-cloud-bigquery", // google_cloud_platform_libraries_bom sets version
@@ -747,13 +747,13 @@ class BeamModulePlugin implements Plugin<Project> {
         google_cloud_core_grpc                      : "com.google.cloud:google-cloud-core-grpc", // google_cloud_platform_libraries_bom sets version
         google_cloud_datacatalog_v1beta1            : "com.google.cloud:google-cloud-datacatalog", // google_cloud_platform_libraries_bom sets version
         google_cloud_dataflow_java_proto_library_all: "com.google.cloud.dataflow:google-cloud-dataflow-java-proto-library-all:0.5.160304",
-        google_cloud_datastore_v1_proto_client      : "com.google.cloud.datastore:datastore-v1-proto-client:2.18.3",   // [bomupgrader] sets version
+        google_cloud_datastore_v1_proto_client      : "com.google.cloud.datastore:datastore-v1-proto-client:2.19.0",   // [bomupgrader] sets version
         google_cloud_firestore                      : "com.google.cloud:google-cloud-firestore", // google_cloud_platform_libraries_bom sets version
         google_cloud_pubsub                         : "com.google.cloud:google-cloud-pubsub", // google_cloud_platform_libraries_bom sets version
         google_cloud_pubsublite                     : "com.google.cloud:google-cloud-pubsublite",  // google_cloud_platform_libraries_bom sets version
         // [bomupgrader] the BOM version is set by scripts/tools/bomupgrader.py. If update manually, also update
         // libraries-bom version on sdks/java/container/license_scripts/dep_urls_java.yaml
-        google_cloud_platform_libraries_bom         : "com.google.cloud:libraries-bom:26.32.0",
+        google_cloud_platform_libraries_bom         : "com.google.cloud:libraries-bom:26.36.0",
         google_cloud_spanner                        : "com.google.cloud:google-cloud-spanner", // google_cloud_platform_libraries_bom sets version
         google_cloud_spanner_test                   : "com.google.cloud:google-cloud-spanner:$google_cloud_spanner_version:tests",
         google_cloud_vertexai                       : "com.google.cloud:google-cloud-vertexai", // google_cloud_platform_libraries_bom sets version
@@ -1329,7 +1329,7 @@ class BeamModulePlugin implements Plugin<Project> {
         "com.google.auto.service:auto-service-annotations:$autoservice_version",
         "com.google.auto.value:auto-value-annotations:$autovalue_version",
         "com.google.code.findbugs:jsr305:$jsr305_version",
-        "com.google.j2objc:j2objc-annotations:2.8",
+        "com.google.j2objc:j2objc-annotations:3.0.0",
         // These dependencies are needed to avoid error-prone warnings on package-info.java files,
         // also to include the annotations to suppress warnings.
         //
diff --git a/scripts/tools/bomupgrader.py b/scripts/tools/bomupgrader.py
index afec1ee1ff42..8d27e0a34046 100644
--- a/scripts/tools/bomupgrader.py
+++ b/scripts/tools/bomupgrader.py
@@ -58,7 +58,7 @@ class BeamModulePluginProcessor:
   # So we need to add it to the example project to get the version to sync
   OTHER_CONSTRANTS = [
       "com.google.cloud:google-cloud-bigquery",  # for arrow
-      'com.google.cloud:google-cloud-nio',  # for google-api-services-storage
+      'com.google.cloud:google-cloud-storage',  # for google-api-services-storage
       'com.google.cloud:google-cloud-resourcemanager',  # for google-api-services-cloudresourcemanager
       'com.google.cloud:google-cloud-datastore',  # for google-cloud-dataflow-java-proto-library-all
   ]
diff --git a/sdks/java/container/license_scripts/dep_urls_java.yaml b/sdks/java/container/license_scripts/dep_urls_java.yaml
index 03aff94adae4..b17487ce1053 100644
--- a/sdks/java/container/license_scripts/dep_urls_java.yaml
+++ b/sdks/java/container/license_scripts/dep_urls_java.yaml
@@ -46,7 +46,7 @@ jaxen:
   '1.1.6':
     type: "3-Clause BSD"
 libraries-bom:
-  '26.32.0':
+  '26.36.0':
     license: "https://raw.githubusercontent.com/GoogleCloudPlatform/cloud-opensource-java/master/LICENSE"
     type: "Apache License 2.0"
 paranamer:
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImplTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImplTest.java
index 9a07f625ca1c..8291c21e7e84 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImplTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImplTest.java
@@ -811,7 +811,7 @@ public void run() {
    */
   @Test
   public void testWrite() throws IOException {
-    doReturn(mockBatcher).when(mockBigtableDataClient).newBulkMutationBatcher(any());
+    doReturn(mockBatcher).when(mockBigtableDataClient).newBulkMutationBatcher((String) any());
     SettableApiFuture<Void> fakeFuture = SettableApiFuture.create();
     when(mockBatcher.closeAsync()).thenReturn(fakeFuture);
     ArgumentCaptor<RowMutationEntry> captor = ArgumentCaptor.forClass(RowMutationEntry.class);

From 0437a8dcfcc50407c20a458348c9e1a1fdd6bef8 Mon Sep 17 00:00:00 2001
From: Byron Ellis <byronellis@google.com>
Date: Thu, 16 Nov 2023 18:11:37 -0800
Subject: [PATCH 09/18] Initial Iceberg connector

---
 .../beam/gradle/BeamModulePlugin.groovy       |   2 +-
 sdks/java/io/catalog/build.gradle             |  34 ++
 .../apache/beam/sdk/io/catalog/Catalog.java   |  21 +
 .../sdk/io/catalog/CatalogEnvironment.java    |  29 ++
 .../beam/sdk/io/catalog/CatalogResource.java  |  37 ++
 .../io/catalog/CatalogResourceIdentifier.java |  39 ++
 .../sdk/io/catalog/CatalogSinkResource.java   |  26 ++
 .../sdk/io/catalog/CatalogSourceResource.java |  26 ++
 .../sdk/io/catalog/CatalogTableResource.java  |  20 +
 .../beam/sdk/io/catalog/package-info.java     |   1 +
 sdks/java/io/iceberg/build.gradle             |  98 +++++
 .../beam/io/iceberg/CombinedScanReader.java   | 200 +++++++++
 .../beam/io/iceberg/DynamicDestinations.java  | 163 +++++++
 .../org/apache/beam/io/iceberg/Iceberg.java   | 412 ++++++++++++++++++
 .../beam/io/iceberg/IcebergBoundedSource.java | 143 ++++++
 .../beam/io/iceberg/IcebergDestination.java   |  46 ++
 .../apache/beam/io/iceberg/IcebergSink.java   | 229 ++++++++++
 .../beam/io/iceberg/IcebergWriteResult.java   |  95 ++++
 .../beam/io/iceberg/MetadataUpdate.java       | 172 ++++++++
 .../beam/io/iceberg/MetadataUpdates.java      |  56 +++
 .../apache/beam/io/iceberg/PrepareWrite.java  |  91 ++++
 .../apache/beam/io/iceberg/RecordWriter.java  | 114 +++++
 .../beam/io/iceberg/RecordWriterFactory.java  |  75 ++++
 .../apache/beam/io/iceberg/TableFactory.java  |  51 +++
 .../beam/io/iceberg/WriteBundlesToFiles.java  | 312 +++++++++++++
 .../iceberg/WriteGroupedRecordsToFiles.java   |  60 +++
 .../apache/beam/io/iceberg/package-info.java  |   1 +
 .../beam/io/iceberg/util/PropertyBuilder.java |  38 ++
 .../beam/io/iceberg/util/RowHelper.java       |  95 ++++
 .../beam/io/iceberg/util/ScanHelper.java      |  37 ++
 .../beam/io/iceberg/util/SchemaHelper.java    | 116 +++++
 .../beam/io/iceberg/BoundedScanTests.java     |  96 ++++
 .../org/apache/beam/io/iceberg/SinkTests.java |  71 +++
 .../beam/io/iceberg/TestDataWarehouse.java    | 142 ++++++
 .../apache/beam/io/iceberg/TestFixtures.java  | 102 +++++
 settings.gradle.kts                           |   4 +
 36 files changed, 3253 insertions(+), 1 deletion(-)
 create mode 100644 sdks/java/io/catalog/build.gradle
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java
 create mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java
 create mode 100644 sdks/java/io/iceberg/build.gradle
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/PropertyBuilder.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/RowHelper.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/SchemaHelper.java
 create mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java
 create mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java
 create mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java
 create mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java

diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 1a0164a0e53a..ab002456bcf8 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -1151,7 +1151,7 @@ class BeamModulePlugin implements Plugin<Project> {
         options.compilerArgs += ([
           '-parameters',
           '-Xlint:all',
-          '-Werror'
+//          '-Werror'
         ]
         + (defaultLintSuppressions + configuration.disableLintWarnings).collect { "-Xlint:-${it}" })
       }
diff --git a/sdks/java/io/catalog/build.gradle b/sdks/java/io/catalog/build.gradle
new file mode 100644
index 000000000000..b00417f973a8
--- /dev/null
+++ b/sdks/java/io/catalog/build.gradle
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+plugins { id 'org.apache.beam.module' }
+applyJavaNature(
+        automaticModuleName: 'org.apache.beam.sdk.io.catalog'
+)
+
+description = "Apache Beam :: SDKs :: Java :: IO :: Catalog"
+ext.summary = "Beam Catalog"
+
+dependencies {
+    implementation project(path: ":sdks:java:core", configuration: "shadow")
+    implementation library.java.vendored_guava_32_1_2_jre
+    testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
+    testImplementation library.java.junit
+    testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
+    testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration")
+}
\ No newline at end of file
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java
new file mode 100644
index 000000000000..0ab87a267f03
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.catalog;
+
+/** Static Catalog class */
+public class Catalog {}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java
new file mode 100644
index 000000000000..14e156fcfd28
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.catalog;
+
+public interface CatalogEnvironment {
+
+  String defaultNamespace();
+
+  CatalogResource find(CatalogResourceIdentifier id);
+
+  default CatalogResource find(String... path) {
+    return find(new CatalogResourceIdentifier(path));
+  }
+}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java
new file mode 100644
index 000000000000..916a6bc514c7
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.catalog;
+
+/** Generic interface for catalog resources. */
+public interface CatalogResource {
+
+  /** @return Whether or not you can use this resource as a source */
+  default boolean isSource() {
+    return false;
+  }
+
+  /** @return Whether or not you can use this resource as a sink */
+  default boolean isSink() {
+    return false;
+  }
+
+  /** @return Whether or not you can use this resource as a function/transform. */
+  default boolean isTransform() {
+    return false;
+  }
+}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java
new file mode 100644
index 000000000000..31b76b470fc1
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.catalog;
+
+import java.util.Arrays;
+
+public class CatalogResourceIdentifier {
+  private String[] namespace;
+  private String name;
+
+  public CatalogResourceIdentifier(String... name) {
+    if (name.length == 1) {
+      this.name = name[0];
+      this.namespace = new String[0];
+    } else {
+      this.name = name[name.length - 1];
+      this.namespace = Arrays.copyOf(name, name.length - 1);
+    }
+  }
+
+  public static CatalogResourceIdentifier of(String... name) {
+    return new CatalogResourceIdentifier(name);
+  }
+}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java
new file mode 100644
index 000000000000..d3a7ccaa71da
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.catalog;
+
+public interface CatalogSinkResource extends CatalogResource {
+
+  @Override
+  default boolean isSink() {
+    return true;
+  }
+}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java
new file mode 100644
index 000000000000..b892a307a7ee
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.catalog;
+
+public interface CatalogSourceResource extends CatalogResource {
+
+  @Override
+  default boolean isSource() {
+    return true;
+  }
+}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java
new file mode 100644
index 000000000000..c75084635816
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.catalog;
+
+public interface CatalogTableResource extends CatalogSinkResource, CatalogSourceResource {}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java
new file mode 100644
index 000000000000..08d120cdd5c4
--- /dev/null
+++ b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java
@@ -0,0 +1 @@
+package org.apache.beam.sdk.io.catalog;
diff --git a/sdks/java/io/iceberg/build.gradle b/sdks/java/io/iceberg/build.gradle
new file mode 100644
index 000000000000..6e39575c1679
--- /dev/null
+++ b/sdks/java/io/iceberg/build.gradle
@@ -0,0 +1,98 @@
+import java.util.stream.Collectors
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+plugins { id 'org.apache.beam.module' }
+applyJavaNature(
+        automaticModuleName: 'org.apache.beam.sdk.io.iceberg',
+)
+
+description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg"
+ext.summary = "Integration with Iceberg data warehouses."
+
+def hadoopVersions = [
+        "285": "2.8.5",
+        "292": "2.9.2",
+        "2102": "2.10.2",
+        "324": "3.2.4",
+]
+
+hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
+
+def iceberg_version = "1.4.2"
+def parquet_version = "1.12.0"
+def orc_version = "1.9.2"
+def hive_version = "3.1.3"
+
+dependencies {
+    implementation library.java.vendored_guava_32_1_2_jre
+    implementation project(path: ":sdks:java:core", configuration: "shadow")
+    implementation project(":sdks:java:io:hadoop-common")
+    implementation library.java.slf4j_api
+    implementation "org.apache.parquet:parquet-column:$parquet_version"
+    implementation "org.apache.parquet:parquet-common:$parquet_version"
+    implementation "org.apache.orc:orc-core:$orc_version"
+    implementation "org.apache.iceberg:iceberg-core:$iceberg_version"
+    implementation "org.apache.iceberg:iceberg-api:$iceberg_version"
+    implementation "org.apache.iceberg:iceberg-parquet:$iceberg_version"
+    implementation "org.apache.iceberg:iceberg-orc:$iceberg_version"
+    implementation "org.apache.iceberg:iceberg-arrow:$iceberg_version"
+    implementation "org.apache.iceberg:iceberg-data:$iceberg_version"
+
+
+
+    provided library.java.avro
+    provided library.java.hadoop_client
+    permitUnusedDeclared library.java.hadoop_client
+    provided library.java.hadoop_common
+    testImplementation library.java.hadoop_client
+
+    testImplementation "org.apache.iceberg:iceberg-gcp:$iceberg_version"
+    testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
+    testImplementation library.java.junit
+    testRuntimeOnly library.java.slf4j_jdk14
+    testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
+    hadoopVersions.each {kv ->
+        "hadoopVersion$kv.key" "org.apache.hadoop:hadoop-client:$kv.value"
+    }
+}
+
+hadoopVersions.each {kv ->
+    configurations."hadoopVersion$kv.key" {
+        resolutionStrategy {
+            force "org.apache.hadoop:hadoop-client:$kv.value"
+        }
+    }
+}
+
+task hadoopVersionsTest(group: "Verification") {
+    description = "Runs Iceberg tests with different Hadoop versions"
+    def taskNames = hadoopVersions.keySet().stream()
+            .map{num -> "hadoopVersion${num}Test"}
+            .collect(Collectors.toList())
+    dependsOn taskNames
+}
+
+hadoopVersions.each { kv ->
+    task "hadoopVersion${kv.key}Test"(type: Test, group: "Verification") {
+        description = "Runs Iceberg tests with Hadoop version $kv.value"
+        classpath = configurations."hadoopVersion$kv.key" + sourceSets.test.runtimeClasspath
+        include '**/*Test.class'
+    }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java
new file mode 100644
index 000000000000..74ec62af66f8
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.NoSuchElementException;
+import javax.annotation.Nullable;
+import org.apache.beam.io.iceberg.util.SchemaHelper;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.CombinedScanTask;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.avro.Avro;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.data.avro.DataReader;
+import org.apache.iceberg.data.orc.GenericOrcReader;
+import org.apache.iceberg.data.parquet.GenericParquetReaders;
+import org.apache.iceberg.encryption.EncryptionManager;
+import org.apache.iceberg.encryption.InputFilesDecryptor;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.CloseableIterator;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.orc.ORC;
+import org.apache.iceberg.parquet.Parquet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@SuppressWarnings("all")
+public class CombinedScanReader extends BoundedSource.BoundedReader<Row> {
+  private static final Logger LOG = LoggerFactory.getLogger(CombinedScanReader.class);
+
+  IcebergBoundedSource source;
+
+  @Nullable CombinedScanTask task;
+
+  @Nullable Schema schema;
+
+  transient @Nullable org.apache.iceberg.Schema project;
+
+  transient @Nullable FileIO io;
+  transient @Nullable EncryptionManager encryptionManager;
+
+  transient @Nullable InputFilesDecryptor decryptor;
+
+  transient LinkedList<FileScanTask> files = new LinkedList<>();
+
+  transient CloseableIterator<Record> baseIter = null;
+
+  transient Record current;
+
+  public CombinedScanReader(
+      IcebergBoundedSource source, @Nullable CombinedScanTask task, @Nullable Schema schema) {
+    this.source = source;
+    this.task = task;
+    this.schema = schema;
+    if (this.schema != null) {
+      project = SchemaHelper.convert(schema);
+    }
+  }
+
+  @Override
+  public boolean start() throws IOException {
+    if (task == null) {
+      return false;
+    }
+
+    Table table = source.table();
+
+    io = table.io();
+    encryptionManager = table.encryption();
+    decryptor = new InputFilesDecryptor(task, io, encryptionManager);
+
+    files.addAll(task.files());
+
+    return advance();
+  }
+
+  @Override
+  public boolean advance() throws IOException {
+    do {
+      // If our current iterator is working... do that.
+      if (baseIter != null && baseIter.hasNext()) {
+        current = baseIter.next();
+        return true;
+      }
+
+      // Close out the current iterator and try to open a new one
+      if (baseIter != null) {
+        baseIter.close();
+        baseIter = null;
+      }
+
+      LOG.info("Trying to open new file.");
+      FileScanTask fileTask = null;
+      while (files.size() > 0 && fileTask == null) {
+        fileTask = files.removeFirst();
+        if (fileTask.isDataTask()) {
+          LOG.error("{} is a DataTask. Skipping.", fileTask.toString());
+          fileTask = null;
+        }
+      }
+
+      // We have a new file to start reading
+      if (fileTask != null) {
+        DataFile file = fileTask.file();
+        InputFile input = decryptor.getInputFile(fileTask);
+
+        CloseableIterable<Record> iterable = null;
+        switch (file.format()) {
+          case ORC:
+            LOG.info("Preparing ORC input");
+            iterable =
+                ORC.read(input)
+                    .project(project)
+                    .createReaderFunc(
+                        fileSchema -> GenericOrcReader.buildReader(project, fileSchema))
+                    .filter(fileTask.residual())
+                    .build();
+            break;
+          case PARQUET:
+            LOG.info("Preparing Parquet input.");
+            iterable =
+                Parquet.read(input)
+                    .project(project)
+                    .createReaderFunc(
+                        fileSchema -> GenericParquetReaders.buildReader(project, fileSchema))
+                    .filter(fileTask.residual())
+                    .build();
+            break;
+          case AVRO:
+            LOG.info("Preparing Avro input.");
+            iterable =
+                Avro.read(input).project(project).createReaderFunc(DataReader::create).build();
+            break;
+          default:
+            throw new UnsupportedOperationException("Cannot read format: " + file.format());
+        }
+
+        if (iterable != null) {
+          baseIter = iterable.iterator();
+        }
+      } else {
+        LOG.info("We have exhausted all available files in this CombinedScanTask");
+      }
+
+    } while (baseIter != null);
+    return false;
+  }
+
+  private Row convert(Record record) {
+    Row.Builder b = Row.withSchema(schema);
+    for (int i = 0; i < schema.getFieldCount(); i++) {
+      // TODO: A lot obviously
+      b.addValue(record.getField(schema.getField(i).getName()));
+    }
+    return b.build();
+  }
+
+  @Override
+  public Row getCurrent() throws NoSuchElementException {
+    if (current == null) {
+      throw new NoSuchElementException();
+    }
+    return convert(current);
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (baseIter != null) {
+      baseIter.close();
+    }
+    files.clear();
+    io.close();
+  }
+
+  @Override
+  public BoundedSource<Row> getCurrentSource() {
+    return source;
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java
new file mode 100644
index 000000000000..8032e5c1fbbb
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.beam.sdk.values.TypeDescriptors.extractFromTypeParameters;
+import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
+
+import java.io.Serializable;
+import java.util.List;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.beam.sdk.values.TypeDescriptors;
+import org.apache.beam.sdk.values.ValueInSingleWindow;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+
+@SuppressWarnings("all") // TODO: Remove this once development is stable.
+public abstract class DynamicDestinations<T, DestinationT> implements Serializable {
+
+  interface SideInputAccessor {
+    <SideInputT> SideInputT sideInput(PCollectionView<SideInputT> view);
+  }
+
+  private transient SideInputAccessor sideInputAccessor;
+  private transient PipelineOptions options;
+
+  static class ProcessContextSideInputAccessor implements SideInputAccessor {
+    private DoFn<?, ?>.ProcessContext processContext;
+
+    public ProcessContextSideInputAccessor(DoFn<?, ?>.ProcessContext processContext) {
+      this.processContext = processContext;
+    }
+
+    @Override
+    public <SideInputT> SideInputT sideInput(PCollectionView<SideInputT> view) {
+      return processContext.sideInput(view);
+    }
+  }
+
+  public PipelineOptions getOptions() {
+    return options;
+  }
+
+  public List<PCollectionView<?>> getSideInputs() {
+    return Lists.newArrayList();
+  }
+
+  protected final <SideInputT> SideInputT sideInput(PCollectionView<SideInputT> view) {
+    checkState(
+        getSideInputs().contains(view),
+        "View %s not declared in getSideInputs() (%s)",
+        view,
+        getSideInputs());
+    if (sideInputAccessor == null) {
+      throw new IllegalStateException("sideInputAccessor (transient field) is null");
+    }
+    return sideInputAccessor.sideInput(view);
+  }
+
+  void setSideInputProcessContext(DoFn<?, ?>.ProcessContext context) {
+    this.sideInputAccessor = new ProcessContextSideInputAccessor(context);
+    this.options = context.getPipelineOptions();
+  }
+
+  public abstract DestinationT getDestination(ValueInSingleWindow<T> element);
+
+  public Coder<DestinationT> getDestinationCoder() {
+    return null;
+  }
+
+  public abstract Table getTable(DestinationT destination);
+
+  public abstract Schema getSchema(DestinationT destination);
+
+  public abstract PartitionSpec getPartitionSpec(DestinationT destination);
+
+  public abstract FileFormat getFileFormat(DestinationT destination);
+
+  Coder<DestinationT> getDestinationCoderWithDefault(CoderRegistry registry)
+      throws CannotProvideCoderException {
+    Coder<DestinationT> destinationCoder = getDestinationCoder();
+    if (destinationCoder != null) {
+      return destinationCoder;
+    }
+    TypeDescriptor<DestinationT> descriptor =
+        extractFromTypeParameters(
+            this,
+            DynamicDestinations.class,
+            new TypeDescriptors.TypeVariableExtractor<
+                DynamicDestinations<T, DestinationT>, DestinationT>() {});
+    try {
+      return registry.getCoder(descriptor);
+    } catch (CannotProvideCoderException e) {
+      throw new CannotProvideCoderException(
+          "Failed to infer coder for DestinationT from type "
+              + descriptor
+              + ", please provide it explicitly by overriding getDestinationCoder()",
+          e);
+    }
+  }
+
+  public static class StaticTableDestination<ElementT>
+      extends DynamicDestinations<ElementT, String> {
+
+    final Iceberg.Table table;
+
+    public StaticTableDestination(Iceberg.Table table) {
+      this.table = table;
+    }
+
+    @Override
+    public String getDestination(ValueInSingleWindow<ElementT> element) {
+      return table.table().name();
+    }
+
+    @Override
+    public Table getTable(String destination) {
+      return table.table();
+    }
+
+    @Override
+    public Schema getSchema(String destination) {
+      return getTable(destination).schema();
+    }
+
+    @Override
+    public PartitionSpec getPartitionSpec(String destination) {
+      return getTable(destination).spec();
+    }
+
+    @Override
+    public FileFormat getFileFormat(String destination) {
+      return FileFormat.PARQUET;
+    }
+  }
+
+  public static <ElementT> StaticTableDestination<ElementT> constant(Iceberg.Table table) {
+    return new StaticTableDestination<>(table);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java
new file mode 100644
index 000000000000..58dc5a6287e9
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java
@@ -0,0 +1,412 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.beam.io.iceberg.util.PropertyBuilder;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SerializableBiFunction;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.expressions.Expression;
+
+@SuppressWarnings("all") // TODO: Remove this once development is stable.
+public class Iceberg {
+
+  public static String DEFAULT_CATALOG_NAME = "default";
+
+  public enum ScanType {
+    TABLE,
+    BATCH
+  }
+
+  public enum WriteFormat {
+    AVRO,
+    PARQUET,
+    ORC
+  }
+
+  public static Catalog catalog(String name) {
+    return Catalog.builder().name(name).build();
+  }
+
+  public static Catalog catalog() {
+    return catalog(DEFAULT_CATALOG_NAME);
+  }
+
+  @AutoValue
+  public abstract static class Scan implements Serializable {
+
+    public abstract ScanType getType();
+
+    public abstract Catalog getCatalog();
+
+    public abstract ImmutableList<String> getTable();
+
+    public abstract Schema getSchema();
+
+    public abstract @Nullable Expression getFilter();
+
+    public abstract @Nullable Boolean getCaseSensitive();
+
+    public abstract ImmutableMap<String, String> getOptions();
+
+    public abstract @Nullable Long getSnapshot();
+
+    public abstract @Nullable Long getTimestamp();
+
+    public abstract @Nullable Long getFromSnapshotInclusive();
+
+    public abstract @Nullable String getFromSnapshotRefInclusive();
+
+    public abstract @Nullable Long getFromSnapshotExclusive();
+
+    public abstract @Nullable String getFromSnapshotRefExclusive();
+
+    public abstract @Nullable Long getToSnapshot();
+
+    public abstract @Nullable String getToSnapshotRef();
+
+    public abstract @Nullable String getTag();
+
+    public abstract @Nullable String getBranch();
+
+    public static Scan.Builder builder() {
+      return new AutoValue_Iceberg_Scan.Builder()
+          .type(ScanType.TABLE)
+          .filter(null)
+          .caseSensitive(null)
+          .options(ImmutableMap.of())
+          .snapshot(null)
+          .timestamp(null)
+          .fromSnapshotInclusive(null)
+          .fromSnapshotRefInclusive(null)
+          .fromSnapshotExclusive(null)
+          .fromSnapshotRefExclusive(null)
+          .toSnapshot(null)
+          .toSnapshotRef(null)
+          .tag(null)
+          .branch(null);
+    }
+
+    @AutoValue.Builder
+    public abstract static class Builder {
+      public abstract Builder type(ScanType type);
+
+      public abstract Builder catalog(Catalog catalog);
+
+      public abstract Builder table(ImmutableList<String> table);
+
+      public Builder table(String... table) {
+        return table(ImmutableList.copyOf(table));
+      }
+
+      public abstract Builder schema(Schema schema);
+
+      public abstract Builder filter(@Nullable Expression filter);
+
+      public abstract Builder caseSensitive(@Nullable Boolean caseSensitive);
+
+      public abstract Builder options(ImmutableMap<String, String> options);
+
+      public abstract Builder snapshot(@Nullable Long snapshot);
+
+      public abstract Builder timestamp(@Nullable Long timestamp);
+
+      public abstract Builder fromSnapshotInclusive(@Nullable Long fromInclusive);
+
+      public abstract Builder fromSnapshotRefInclusive(@Nullable String ref);
+
+      public abstract Builder fromSnapshotExclusive(@Nullable Long fromExclusive);
+
+      public abstract Builder fromSnapshotRefExclusive(@Nullable String ref);
+
+      public abstract Builder toSnapshot(@Nullable Long snapshot);
+
+      public abstract Builder toSnapshotRef(@Nullable String ref);
+
+      public abstract Builder tag(@Nullable String tag);
+
+      public abstract Builder branch(@Nullable String branch);
+
+      public abstract Scan build();
+    }
+  }
+
+  @AutoValue
+  public abstract static class Catalog implements Serializable {
+
+    public abstract String getName();
+
+    /* Core Properties */
+    public abstract @Nullable String getIcebergCatalogType();
+
+    public abstract @Nullable String getCatalogImplementation();
+
+    public abstract @Nullable String getFileIOImplementation();
+
+    public abstract @Nullable String getWarehouseLocation();
+
+    public abstract @Nullable String getMetricsReporterImplementation();
+
+    /* Caching */
+    public abstract boolean getCacheEnabled();
+
+    public abstract boolean getCacheCaseSensitive();
+
+    public abstract long getCacheExpirationIntervalMillis();
+
+    public abstract boolean getIOManifestCacheEnabled();
+
+    public abstract long getIOManifestCacheExpirationIntervalMillis();
+
+    public abstract long getIOManifestCacheMaxTotalBytes();
+
+    public abstract long getIOManifestCacheMaxContentLength();
+
+    public abstract @Nullable String getUri();
+
+    public abstract int getClientPoolSize();
+
+    public abstract long getClientPoolEvictionIntervalMs();
+
+    public abstract @Nullable String getClientPoolCacheKeys();
+
+    public abstract @Nullable String getLockImplementation();
+
+    public abstract long getLockHeartbeatIntervalMillis();
+
+    public abstract long getLockHeartbeatTimeoutMillis();
+
+    public abstract int getLockHeartbeatThreads();
+
+    public abstract long getLockAcquireIntervalMillis();
+
+    public abstract long getLockAcquireTimeoutMillis();
+
+    public abstract @Nullable String getAppIdentifier();
+
+    public abstract @Nullable String getUser();
+
+    public abstract long getAuthSessionTimeoutMillis();
+
+    public abstract @Nullable Configuration getConfiguration();
+
+    public static Catalog.Builder builder() {
+      return new AutoValue_Iceberg_Catalog.Builder()
+          .icebergCatalogType(null)
+          .catalogImplementation(null)
+          .fileIOImplementation(null)
+          .warehouseLocation(null)
+          .metricsReporterImplementation(null) // TODO: Set this to our implementation
+          .cacheEnabled(CatalogProperties.CACHE_ENABLED_DEFAULT)
+          .cacheCaseSensitive(CatalogProperties.CACHE_CASE_SENSITIVE_DEFAULT)
+          .cacheExpirationIntervalMillis(CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS_DEFAULT)
+          .iOManifestCacheEnabled(CatalogProperties.IO_MANIFEST_CACHE_ENABLED_DEFAULT)
+          .iOManifestCacheExpirationIntervalMillis(
+              CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS_DEFAULT)
+          .iOManifestCacheMaxTotalBytes(CatalogProperties.IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT)
+          .iOManifestCacheMaxContentLength(
+              CatalogProperties.IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT)
+          .uri(null)
+          .clientPoolSize(CatalogProperties.CLIENT_POOL_SIZE_DEFAULT)
+          .clientPoolEvictionIntervalMs(
+              CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS_DEFAULT)
+          .clientPoolCacheKeys(null)
+          .lockImplementation(null)
+          .lockHeartbeatIntervalMillis(CatalogProperties.LOCK_HEARTBEAT_INTERVAL_MS_DEFAULT)
+          .lockHeartbeatTimeoutMillis(CatalogProperties.LOCK_HEARTBEAT_TIMEOUT_MS_DEFAULT)
+          .lockHeartbeatThreads(CatalogProperties.LOCK_HEARTBEAT_THREADS_DEFAULT)
+          .lockAcquireIntervalMillis(CatalogProperties.LOCK_ACQUIRE_INTERVAL_MS_DEFAULT)
+          .lockAcquireTimeoutMillis(CatalogProperties.LOCK_HEARTBEAT_TIMEOUT_MS_DEFAULT)
+          .appIdentifier(null)
+          .user(null)
+          .authSessionTimeoutMillis(CatalogProperties.AUTH_SESSION_TIMEOUT_MS_DEFAULT)
+          .configuration(null);
+    }
+
+    public ImmutableMap<String, String> properties() {
+      return new PropertyBuilder()
+          .put(CatalogUtil.ICEBERG_CATALOG_TYPE, getIcebergCatalogType())
+          .put(CatalogProperties.CATALOG_IMPL, getCatalogImplementation())
+          .put(CatalogProperties.FILE_IO_IMPL, getFileIOImplementation())
+          .put(CatalogProperties.WAREHOUSE_LOCATION, getWarehouseLocation())
+          .put(CatalogProperties.METRICS_REPORTER_IMPL, getMetricsReporterImplementation())
+          .put(CatalogProperties.CACHE_ENABLED, getCacheEnabled())
+          .put(CatalogProperties.CACHE_CASE_SENSITIVE, getCacheCaseSensitive())
+          .put(CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS, getCacheExpirationIntervalMillis())
+          .build();
+    }
+
+    public org.apache.iceberg.catalog.Catalog catalog() {
+      Configuration conf = getConfiguration();
+      if (conf == null) {
+        conf = new Configuration();
+      }
+      return CatalogUtil.buildIcebergCatalog(getName(), properties(), conf);
+    }
+
+    public Table.Builder table() {
+      return new AutoValue_Iceberg_Table.Builder().catalog(this);
+    }
+
+    @AutoValue.Builder
+    public abstract static class Builder {
+      public abstract Builder name(String name);
+
+      /* Core Properties */
+      public abstract Builder icebergCatalogType(@Nullable String icebergType);
+
+      public abstract Builder catalogImplementation(@Nullable String catalogImpl);
+
+      public abstract Builder fileIOImplementation(@Nullable String fileIOImpl);
+
+      public abstract Builder warehouseLocation(@Nullable String warehouse);
+
+      public abstract Builder metricsReporterImplementation(@Nullable String metricsImpl);
+
+      /* Caching */
+      public abstract Builder cacheEnabled(boolean cacheEnabled);
+
+      public abstract Builder cacheCaseSensitive(boolean cacheCaseSensitive);
+
+      public abstract Builder cacheExpirationIntervalMillis(long expiration);
+
+      public abstract Builder iOManifestCacheEnabled(boolean enabled);
+
+      public abstract Builder iOManifestCacheExpirationIntervalMillis(long expiration);
+
+      public abstract Builder iOManifestCacheMaxTotalBytes(long bytes);
+
+      public abstract Builder iOManifestCacheMaxContentLength(long length);
+
+      public abstract Builder uri(@Nullable String uri);
+
+      public abstract Builder clientPoolSize(int size);
+
+      public abstract Builder clientPoolEvictionIntervalMs(long interval);
+
+      public abstract Builder clientPoolCacheKeys(@Nullable String keys);
+
+      public abstract Builder lockImplementation(@Nullable String lockImplementation);
+
+      public abstract Builder lockHeartbeatIntervalMillis(long interval);
+
+      public abstract Builder lockHeartbeatTimeoutMillis(long timeout);
+
+      public abstract Builder lockHeartbeatThreads(int threads);
+
+      public abstract Builder lockAcquireIntervalMillis(long interval);
+
+      public abstract Builder lockAcquireTimeoutMillis(long timeout);
+
+      public abstract Builder appIdentifier(@Nullable String id);
+
+      public abstract Builder user(@Nullable String user);
+
+      public abstract Builder authSessionTimeoutMillis(long timeout);
+
+      public abstract Builder configuration(@Nullable Configuration conf);
+
+      public abstract Catalog build();
+
+      public Builder withProperties(Map<String, Object> properties) {
+        return this;
+      }
+    }
+  }
+
+  @AutoValue
+  public abstract static class Table implements Serializable {
+
+    public abstract @Nullable Catalog catalog();
+
+    public abstract @Nullable List<String> tablePath();
+
+    public TableIdentifier identifier() {
+      return TableIdentifier.of(tablePath().toArray(new String[0]));
+    }
+
+    public org.apache.iceberg.Table table() {
+      return catalog().catalog().loadTable(identifier());
+    }
+
+    @AutoValue.Builder
+    public abstract static class Builder {
+
+      public abstract Builder catalog(Catalog catalog);
+
+      public abstract Builder tablePath(List<String> tablePath);
+
+      public abstract Table build();
+    }
+  }
+
+  public static class Write<ElementT>
+      extends PTransform<PCollection<ElementT>, IcebergWriteResult> {
+
+    private final DynamicDestinations<ElementT, String> dynamicDestinations;
+    private final Catalog catalog;
+
+    private final SerializableBiFunction<Record, ElementT, Record> toRecord;
+
+    public Write(
+        Catalog catalog,
+        DynamicDestinations<ElementT, String> dynamicDestinations,
+        SerializableBiFunction<Record, ElementT, Record> toRecord) {
+      this.catalog = catalog;
+      this.dynamicDestinations = dynamicDestinations;
+      this.toRecord = toRecord;
+    }
+
+    @Override
+    public IcebergWriteResult expand(PCollection<ElementT> input) {
+      try {
+        return input
+            .apply(
+                "Set Output Location",
+                new PrepareWrite<ElementT, String, ElementT>(
+                    dynamicDestinations, SerializableFunctions.identity(), input.getCoder()))
+            .apply(
+                "Dynamic Write",
+                new IcebergSink<String, ElementT>(
+                    dynamicDestinations,
+                    dynamicDestinations.getDestinationCoderWithDefault(
+                        input.getPipeline().getCoderRegistry()),
+                    RecordWriterFactory.tableRecords(toRecord, dynamicDestinations),
+                    TableFactory.forCatalog(catalog)));
+      } catch (Exception e) {
+        RuntimeException e1 = new RuntimeException("Unable to expand transforms");
+        e1.addSuppressed(e);
+        throw e1;
+      }
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java
new file mode 100644
index 000000000000..0c6028c85487
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import javax.annotation.Nullable;
+import org.apache.beam.io.iceberg.util.SchemaHelper;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.RowCoder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.CombinedScanTask;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.TableScan;
+import org.apache.iceberg.catalog.Catalog;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.io.CloseableIterable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@SuppressWarnings("all") // TODO: Remove this once development is stable.
+public class IcebergBoundedSource extends BoundedSource<Row> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergBoundedSource.class);
+
+  private @Nullable CombinedScanTask task;
+  private Iceberg.Scan scan;
+
+  public IcebergBoundedSource(Iceberg.Scan scan, @Nullable CombinedScanTask task) {
+    this.task = task;
+    this.scan = scan;
+  }
+
+  public IcebergBoundedSource(Iceberg.Scan scan) {
+    this(scan, null);
+  }
+
+  public @Nullable Catalog catalog() {
+    return scan.getCatalog().catalog();
+  }
+
+  public @Nullable Table table() {
+    Catalog catalog = catalog();
+    if (catalog != null) {
+      return catalog.loadTable(
+          TableIdentifier.of(scan.getTable().toArray(new String[scan.getTable().size()])));
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  public List<? extends BoundedSource<Row>> split(
+      long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
+    ArrayList<IcebergBoundedSource> tasks = new ArrayList<>();
+    Table table = table();
+    if (table != null) {
+
+      switch (scan.getType()) {
+        case TABLE:
+          // Override the split size with our desired size
+          TableScan tableScan = table.newScan();
+
+          if (desiredBundleSizeBytes > 0) {
+            tableScan = tableScan.option(TableProperties.SPLIT_SIZE, "" + desiredBundleSizeBytes);
+          }
+
+          // Always project to our destination schema
+          tableScan = tableScan.project(SchemaHelper.convert(scan.getSchema()));
+
+          if (scan.getFilter() != null) {
+            tableScan = tableScan.filter(scan.getFilter());
+          }
+          if (scan.getCaseSensitive() != null) {
+            tableScan = tableScan.caseSensitive(scan.getCaseSensitive());
+          }
+          if (scan.getSnapshot() != null) {
+            tableScan = tableScan.useSnapshot(scan.getSnapshot());
+          }
+          if (scan.getBranch() != null) {
+            tableScan = tableScan.useRef(scan.getBranch());
+          } else if (scan.getTag() != null) {
+            tableScan = tableScan.useRef(scan.getTag());
+          }
+          try (CloseableIterable<CombinedScanTask> t = tableScan.planTasks()) {
+            for (CombinedScanTask c : t) {
+              tasks.add(new IcebergBoundedSource(scan, c));
+            }
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+          break;
+        case BATCH:
+          // TODO: Add batch scan
+          break;
+      }
+    }
+    return tasks;
+  }
+
+  @Override
+  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
+    if (task == null) {
+      return 0;
+    } else {
+      return task.sizeBytes();
+    }
+  }
+
+  @Override
+  public void populateDisplayData(DisplayData.Builder builder) {
+    super.populateDisplayData(builder);
+  }
+
+  @Override
+  public Coder<Row> getOutputCoder() {
+    return RowCoder.of(scan.getSchema());
+  }
+
+  @Override
+  public BoundedReader<Row> createReader(PipelineOptions options) throws IOException {
+    return new CombinedScanReader(this, task, scan.getSchema());
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java
new file mode 100644
index 000000000000..9f9790aed4ef
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.Serializable;
+import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.iceberg.Schema;
+
+public class IcebergDestination implements Serializable {
+
+  ResourceId resourceId;
+  String table;
+  Schema schema;
+  Iceberg.WriteFormat writeFormat;
+
+  public IcebergDestination(
+      ResourceId resourceId, String table, Schema schema, Iceberg.WriteFormat writeFormat) {
+    this.resourceId = resourceId;
+    this.table = table;
+    this.schema = schema;
+    this.writeFormat = writeFormat;
+  }
+
+  public Iceberg.WriteFormat getWriteFormat() {
+    return writeFormat;
+  }
+
+  public Schema getSchema() {
+    return schema;
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java
new file mode 100644
index 000000000000..f8ef8721161e
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import com.google.common.collect.ImmutableList;
+import java.util.UUID;
+import org.apache.beam.io.iceberg.WriteBundlesToFiles.Result;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.IterableCoder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.SerializableCoder;
+import org.apache.beam.sdk.coders.ShardedKeyCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.transforms.windowing.DefaultTrigger;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.ShardedKey;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.TupleTagList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang.NotImplementedException;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.log4j.Logger;
+
+public class IcebergSink<DestinationT extends Object, ElementT>
+    extends PTransform<PCollection<KV<DestinationT, ElementT>>, IcebergWriteResult> {
+
+  private static final Logger LOG = Logger.getLogger(IcebergSink.class);
+
+  @VisibleForTesting static final int DEFAULT_MAX_WRITERS_PER_BUNDLE = 20;
+  @VisibleForTesting static final int DEFAULT_MAX_FILES_PER_PARTITION = 10_000;
+  @VisibleForTesting static final long DEFAULT_MAX_BYTES_PER_PARTITION = 10L * (1L << 40); // 10TB
+  static final long DEFAULT_MAX_BYTES_PER_FILE = (1L << 40); // 1TB
+  static final int DEFAULT_NUM_FILE_SHARDS = 0;
+  static final int FILE_TRIGGERING_RECORD_COUNT = 50_000;
+
+  final DynamicDestinations<?, DestinationT> dynamicDestinations;
+  final Coder<DestinationT> destinationCoder;
+
+  final RecordWriterFactory<ElementT, DestinationT> recordWriterFactory;
+  final TableFactory<String> tableFactory;
+
+  boolean triggered;
+
+  public IcebergSink(
+      DynamicDestinations<?, DestinationT> dynamicDestinations,
+      Coder<DestinationT> destinationCoder,
+      RecordWriterFactory<ElementT, DestinationT> recordWriterFactory,
+      TableFactory<String> tableFactory) {
+    this.dynamicDestinations = dynamicDestinations;
+    this.destinationCoder = destinationCoder;
+    this.triggered = false;
+    this.recordWriterFactory = recordWriterFactory;
+    this.tableFactory = tableFactory;
+  }
+
+  private IcebergWriteResult expandTriggered(PCollection<KV<DestinationT, ElementT>> input) {
+
+    throw new NotImplementedException("Not yet implemented");
+  }
+
+  private IcebergWriteResult expandUntriggered(PCollection<KV<DestinationT, ElementT>> input) {
+
+    final PCollectionView<String> fileView = createJobIdPrefixView(input.getPipeline());
+    // We always do the equivalent of a dynamically sharded file creation
+    TupleTag<WriteBundlesToFiles.Result<DestinationT>> writtenFilesTag =
+        new TupleTag<>("writtenFiles");
+    TupleTag<KV<ShardedKey<DestinationT>, ElementT>> successfulWritesTag =
+        new TupleTag<>("successfulWrites");
+    TupleTag<KV<ShardedKey<DestinationT>, ElementT>> failedWritesTag =
+        new TupleTag<>("failedWrites");
+    TupleTag<KV<TableIdentifier, Snapshot>> snapshotsTag = new TupleTag<>("snapshots");
+
+    final Coder<ElementT> elementCoder =
+        ((KvCoder<DestinationT, ElementT>) input.getCoder()).getValueCoder();
+
+    // Write everything to files
+    PCollectionTuple writeBundlesToFiles =
+        input.apply(
+            "Write Bundles To Files",
+            ParDo.of(
+                    new WriteBundlesToFiles<>(
+                        fileView,
+                        successfulWritesTag,
+                        failedWritesTag,
+                        DEFAULT_MAX_WRITERS_PER_BUNDLE,
+                        DEFAULT_MAX_BYTES_PER_FILE,
+                        recordWriterFactory))
+                .withSideInputs(fileView)
+                .withOutputTags(
+                    writtenFilesTag,
+                    TupleTagList.of(ImmutableList.of(successfulWritesTag, failedWritesTag))));
+
+    PCollection<KV<ShardedKey<DestinationT>, ElementT>> successfulWrites =
+        writeBundlesToFiles
+            .get(successfulWritesTag)
+            .setCoder(KvCoder.of(ShardedKeyCoder.of(destinationCoder), elementCoder));
+
+    PCollection<KV<ShardedKey<DestinationT>, ElementT>> failedWrites =
+        writeBundlesToFiles
+            .get(failedWritesTag)
+            .setCoder(KvCoder.of(ShardedKeyCoder.of(destinationCoder), elementCoder));
+
+    PCollection<WriteBundlesToFiles.Result<DestinationT>> writtenFilesGrouped =
+        failedWrites
+            .apply("Group By Destination", GroupByKey.create())
+            .apply(
+                "Strip Shard ID",
+                MapElements.via(
+                    new SimpleFunction<
+                        KV<ShardedKey<DestinationT>, Iterable<ElementT>>,
+                        KV<DestinationT, Iterable<ElementT>>>() {
+                      @Override
+                      public KV<DestinationT, Iterable<ElementT>> apply(
+                          KV<ShardedKey<DestinationT>, Iterable<ElementT>> input) {
+                        return KV.of(input.getKey().getKey(), input.getValue());
+                      }
+                    }))
+            .setCoder(KvCoder.of(destinationCoder, IterableCoder.of(elementCoder)))
+            .apply(
+                "Write Grouped Records",
+                ParDo.of(
+                    new WriteGroupedRecordsToFiles<>(
+                        fileView, DEFAULT_MAX_BYTES_PER_FILE, recordWriterFactory)))
+            .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder));
+
+    PCollection<WriteBundlesToFiles.Result<DestinationT>> catalogUpdates =
+        PCollectionList.of(
+                writeBundlesToFiles
+                    .get(writtenFilesTag)
+                    .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder)))
+            .and(writtenFilesGrouped)
+            .apply("Flatten Files", Flatten.pCollections())
+            .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder));
+
+    // Apply any sharded writes and flatten everything for catalog updates
+    PCollection<KV<String, Snapshot>> snapshots =
+        catalogUpdates
+            .apply(
+                "Extract Data File",
+                ParDo.of(
+                    new DoFn<Result<DestinationT>, KV<String, MetadataUpdate>>() {
+                      @ProcessElement
+                      public void processElement(
+                          ProcessContext c, @Element Result<DestinationT> element) {
+                        c.output(
+                            KV.of(
+                                element.tableId,
+                                MetadataUpdate.of(
+                                    element.partitionSpec, element.update.getDataFiles().get(0))));
+                      }
+                    }))
+            .setCoder(KvCoder.of(StringUtf8Coder.of(), MetadataUpdate.coder()))
+            .apply(GroupByKey.create())
+            .apply("Write Metadata Updates", ParDo.of(new MetadataUpdates<>(tableFactory)))
+            .setCoder(KvCoder.of(StringUtf8Coder.of(), SerializableCoder.of(Snapshot.class)));
+
+    return new IcebergWriteResult(
+        input.getPipeline(),
+        successfulWrites,
+        catalogUpdates,
+        snapshots,
+        successfulWritesTag,
+        writtenFilesTag,
+        snapshotsTag);
+  }
+
+  private PCollectionView<String> createJobIdPrefixView(Pipeline p) {
+
+    final String jobName = p.getOptions().getJobName();
+
+    return p.apply("JobIdCreationRoot_", Create.of((Void) null))
+        .apply(
+            "CreateJobId",
+            ParDo.of(
+                new DoFn<Void, String>() {
+                  @ProcessElement
+                  public void process(ProcessContext c) {
+                    c.output(jobName + "-" + UUID.randomUUID().toString());
+                  }
+                }))
+        .apply("JobIdSideInput", View.asSingleton());
+  }
+
+  public IcebergWriteResult expand(PCollection<KV<DestinationT, ElementT>> input) {
+
+    String jobName = input.getPipeline().getOptions().getJobName();
+
+    // We always window into global as far as I can tell?
+    PCollection<KV<DestinationT, ElementT>> globalInput =
+        input.apply(
+            "rewindowIntoGlobal",
+            Window.<KV<DestinationT, ElementT>>into(new GlobalWindows())
+                .triggering(DefaultTrigger.of())
+                .discardingFiredPanes());
+    return triggered ? expandTriggered(input) : expandUntriggered(input);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java
new file mode 100644
index 000000000000..0040e385457c
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.Snapshot;
+
+@SuppressWarnings("all")
+public final class IcebergWriteResult<ElementT> implements POutput {
+
+  private final Pipeline pipeline;
+  @Nullable PCollection<ElementT> successfulInserts;
+  @Nullable TupleTag<ElementT> successfulInsertsTag;
+
+  @Nullable PCollection<KV<String, KV<String, DataFile>>> catalogUpdates;
+  @Nullable TupleTag<KV<String, KV<String, DataFile>>> catalogUpdatesTag;
+
+  @Nullable PCollection<KV<String, Snapshot>> snapshots;
+
+  @Nullable TupleTag<KV<String, Snapshot>> snapshotsTag;
+
+  public IcebergWriteResult(
+      Pipeline pipeline,
+      @Nullable PCollection<ElementT> successfulInserts,
+      @Nullable PCollection<KV<String, KV<String, DataFile>>> catalogUpdates,
+      @Nullable PCollection<KV<String, Snapshot>> snapshots,
+      @Nullable TupleTag<ElementT> successfulInsertsTag,
+      @Nullable TupleTag<KV<String, KV<String, DataFile>>> catalogUpdatesTag,
+      @Nullable TupleTag<KV<String, Snapshot>> snapshotsTag) {
+    this.pipeline = pipeline;
+    this.successfulInserts = successfulInserts;
+    this.catalogUpdates = catalogUpdates;
+    this.snapshots = snapshots;
+
+    this.successfulInsertsTag = successfulInsertsTag;
+    this.catalogUpdatesTag = catalogUpdatesTag;
+    this.snapshotsTag = snapshotsTag;
+  }
+
+  @Override
+  public Pipeline getPipeline() {
+    return pipeline;
+  }
+
+  public PCollection<ElementT> getSuccessfulInserts() {
+    return successfulInserts;
+  }
+
+  @Override
+  public Map<TupleTag<?>, PValue> expand() {
+    ImmutableMap.Builder<TupleTag<?>, PValue> output = ImmutableMap.builder();
+    if (successfulInsertsTag != null) {
+      output.put(successfulInsertsTag, Preconditions.checkNotNull(successfulInserts));
+    }
+    if (catalogUpdatesTag != null) {
+      output.put(catalogUpdatesTag, Preconditions.checkNotNull(catalogUpdates));
+    }
+    if (snapshotsTag != null) {
+      output.put(snapshotsTag, Preconditions.checkNotNull(snapshots));
+    }
+
+    return output.build();
+  }
+
+  @Override
+  public void finishSpecifyingOutput(
+      String transformName, PInput input, PTransform<?, ?> transform) {}
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java
new file mode 100644
index 000000000000..bfde156d5c8b
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+import java.util.Map;
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.avro.specific.SpecificData.SchemaConstructable;
+import org.apache.beam.sdk.coders.ByteArrayCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.avro.AvroEncoderUtil;
+import org.apache.iceberg.avro.AvroSchemaUtil;
+import org.apache.iceberg.types.Types.StructType;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+
+@SuppressWarnings("all")
+public class MetadataUpdate implements IndexedRecord, SchemaConstructable {
+
+  private List<DataFile> dataFiles;
+  private List<DeleteFile> deleteFiles;
+
+  private final Schema avroSchema;
+
+  public MetadataUpdate(Schema avroSchema) {
+    this.avroSchema = avroSchema;
+  }
+
+  public MetadataUpdate(
+      StructType partitionType, List<DataFile> dataFiles, List<DeleteFile> deleteFiles) {
+    this.dataFiles = dataFiles;
+    this.deleteFiles = deleteFiles;
+
+    StructType dataFileStruct = DataFile.getType(partitionType);
+    Map<StructType, String> dataFileNames =
+        ImmutableMap.of(
+            dataFileStruct, "org.apache.iceberg.GenericDataFile",
+            partitionType, "org.apache.iceberg.PartitionData");
+    Schema dataFileSchema = AvroSchemaUtil.convert(dataFileStruct, dataFileNames);
+    Map<StructType, String> deleteFileNames =
+        ImmutableMap.of(
+            dataFileStruct, "org.apache.iceberg.GenericDeleteFile",
+            partitionType, "org.apache.iceberg.PartitionData");
+    Schema deleteFileSchema = AvroSchemaUtil.convert(dataFileStruct, deleteFileNames);
+
+    this.avroSchema =
+        SchemaBuilder.builder()
+            .record(getClass().getName())
+            .fields()
+            .name("dataFiles")
+            .prop(AvroSchemaUtil.FIELD_ID_PROP, "-1")
+            .type()
+            .nullable()
+            .array()
+            .items(dataFileSchema)
+            .noDefault()
+            .name("deleteFiles")
+            .prop(AvroSchemaUtil.FIELD_ID_PROP, "-1")
+            .type()
+            .nullable()
+            .array()
+            .items(deleteFileSchema)
+            .noDefault()
+            .endRecord();
+  }
+
+  public static MetadataUpdate of(PartitionSpec partitionSpec, DataFile dataFile) {
+    return new MetadataUpdate(partitionSpec.partitionType(), ImmutableList.of(dataFile), null);
+  }
+
+  public List<DataFile> getDataFiles() {
+    return this.dataFiles;
+  }
+
+  public List<DeleteFile> getDeleteFiles() {
+    return this.deleteFiles;
+  }
+
+  @Override
+  public void put(int i, Object v) {
+    switch (i) {
+      case 0:
+        this.dataFiles = (List<DataFile>) v;
+        return;
+      case 1:
+        this.deleteFiles = (List<DeleteFile>) v;
+        return;
+      default:
+    }
+  }
+
+  @Override
+  public Object get(int i) {
+    switch (i) {
+      case 0:
+        return this.dataFiles;
+      case 1:
+        return this.deleteFiles;
+      default:
+        throw new UnsupportedOperationException("Unknown field ordinal: " + i);
+    }
+  }
+
+  @Override
+  public Schema getSchema() {
+    return avroSchema;
+  }
+
+  protected static class MetadataUpdateCoder extends Coder<MetadataUpdate> {
+
+    private static final ByteArrayCoder bytesCoder = ByteArrayCoder.of();
+
+    @Override
+    public void encode(
+        MetadataUpdate value, @UnknownKeyFor @NonNull @Initialized OutputStream outStream)
+        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
+            @Initialized IOException {
+      bytesCoder.encode(AvroEncoderUtil.encode(value, value.getSchema()), outStream);
+    }
+
+    @Override
+    public MetadataUpdate decode(@UnknownKeyFor @NonNull @Initialized InputStream inStream)
+        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
+            @Initialized IOException {
+      byte[] updateBytes = bytesCoder.decode(inStream);
+      return AvroEncoderUtil.decode(updateBytes);
+    }
+
+    @Override
+    public @UnknownKeyFor @NonNull @Initialized List<
+            ? extends
+                @UnknownKeyFor @NonNull @Initialized Coder<@UnknownKeyFor @NonNull @Initialized ?>>
+        getCoderArguments() {
+      return ImmutableList.of();
+    }
+
+    @Override
+    public void verifyDeterministic()
+        throws @UnknownKeyFor @NonNull @Initialized NonDeterministicException {}
+  }
+
+  public static Coder<MetadataUpdate> coder() {
+    return new MetadataUpdateCoder();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java
new file mode 100644
index 000000000000..a19cb3b9bc4d
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+import org.apache.iceberg.AppendFiles;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.Table;
+
+public class MetadataUpdates<IdentifierT>
+    extends DoFn<KV<IdentifierT, Iterable<MetadataUpdate>>, KV<IdentifierT, Snapshot>> {
+
+  final TableFactory<IdentifierT> tableFactory;
+
+  public MetadataUpdates(TableFactory<IdentifierT> tableFactory) {
+    this.tableFactory = tableFactory;
+  }
+
+  @ProcessElement
+  public void processElement(
+      ProcessContext c,
+      @Element KV<IdentifierT, Iterable<MetadataUpdate>> element,
+      BoundedWindow window) {
+    Table table = tableFactory.getTable(element.getKey());
+    AppendFiles update = table.newAppend();
+    Iterable<MetadataUpdate> metadataUpdates = element.getValue();
+    if (metadataUpdates != null) {
+      for (MetadataUpdate metadata : metadataUpdates) {
+        for (DataFile file : metadata.getDataFiles()) {
+          update.appendFile(file);
+        }
+      }
+      update.commit();
+      c.outputWithTimestamp(
+          KV.of(element.getKey(), table.currentSnapshot()), window.maxTimestamp());
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java
new file mode 100644
index 000000000000..9b380c822e58
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.IOException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.ValueInSingleWindow;
+import org.joda.time.Instant;
+
+@SuppressWarnings("all") // TODO: Remove this once development is stable.
+public class PrepareWrite<InputT, DestinationT, OutputT>
+    extends PTransform<PCollection<InputT>, PCollection<KV<DestinationT, OutputT>>> {
+
+  private DynamicDestinations<InputT, DestinationT> dynamicDestinations;
+  private SerializableFunction<InputT, OutputT> formatFunction;
+  private Coder outputCoder;
+
+  public PrepareWrite(
+      DynamicDestinations<InputT, DestinationT> dynamicDestinations,
+      SerializableFunction<InputT, OutputT> formatFunction,
+      Coder outputCoder) {
+    this.dynamicDestinations = dynamicDestinations;
+    this.formatFunction = formatFunction;
+    this.outputCoder = outputCoder;
+  }
+
+  @Override
+  public PCollection<KV<DestinationT, OutputT>> expand(PCollection<InputT> input) {
+
+    final Coder destCoder;
+    try {
+      destCoder =
+          KvCoder.of(
+              dynamicDestinations.getDestinationCoderWithDefault(
+                  input.getPipeline().getCoderRegistry()),
+              outputCoder);
+    } catch (Exception e) {
+      RuntimeException e1 = new RuntimeException("Unable to expand PrepareWrite");
+      e1.addSuppressed(e);
+      throw e1;
+    }
+    return input
+        .apply(
+            ParDo.of(
+                    new DoFn<InputT, KV<DestinationT, OutputT>>() {
+
+                      @ProcessElement
+                      public void processElement(
+                          ProcessContext c,
+                          @Element InputT element,
+                          @Timestamp Instant timestamp,
+                          BoundedWindow window,
+                          PaneInfo pane)
+                          throws IOException {
+                        ValueInSingleWindow<InputT> windowedElement =
+                            ValueInSingleWindow.of(element, timestamp, window, pane);
+                        dynamicDestinations.setSideInputProcessContext(c);
+                        DestinationT tableDestination =
+                            dynamicDestinations.getDestination(windowedElement);
+                        OutputT outputValue = formatFunction.apply(element);
+                        c.output(KV.of(tableDestination, outputValue));
+                      }
+                    })
+                .withSideInputs(dynamicDestinations.getSideInputs()))
+        .setCoder(destCoder);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java
new file mode 100644
index 000000000000..2f7380f1ced0
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.IOException;
+import org.apache.beam.sdk.transforms.SerializableBiFunction;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.avro.Avro;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.data.orc.GenericOrcWriter;
+import org.apache.iceberg.data.parquet.GenericParquetWriter;
+import org.apache.iceberg.io.DataWriter;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.orc.ORC;
+import org.apache.iceberg.parquet.Parquet;
+
+@SuppressWarnings("all") // TODO: Remove this once development is stable.
+class RecordWriter<ElementT> {
+
+  final Table table;
+
+  final DataWriter<Record> writer;
+  final GenericRecord baseRecord;
+  final SerializableBiFunction<Record, ElementT, Record> toRecord;
+
+  final String location;
+
+  RecordWriter(
+      Table table,
+      String location,
+      Schema schema,
+      PartitionSpec partitionSpec,
+      FileFormat format,
+      SerializableBiFunction<Record, ElementT, Record> toRecord)
+      throws IOException {
+    this.table = table;
+    this.baseRecord = GenericRecord.create(schema);
+    this.toRecord = toRecord;
+    this.location = table.locationProvider().newDataLocation(partitionSpec, baseRecord, location);
+
+    OutputFile outputFile = table.io().newOutputFile(this.location);
+    switch (format) {
+      case AVRO:
+        writer =
+            Avro.writeData(outputFile).schema(schema).withSpec(partitionSpec).overwrite().build();
+        break;
+      case PARQUET:
+        writer =
+            Parquet.writeData(outputFile)
+                .createWriterFunc(GenericParquetWriter::buildWriter)
+                .schema(schema)
+                .withSpec(partitionSpec)
+                .overwrite()
+                .build();
+        break;
+      case ORC:
+        writer =
+            ORC.writeData(outputFile)
+                .createWriterFunc(GenericOrcWriter::buildWriter)
+                .schema(schema)
+                .withSpec(partitionSpec)
+                .overwrite()
+                .build();
+        break;
+      default:
+        throw new RuntimeException("Unrecognized File Format. This should be impossible.");
+    }
+  }
+
+  public void write(ElementT element) throws IOException {
+    Record record = toRecord.apply(baseRecord, element);
+    writer.write(record);
+  }
+
+  public void close() throws IOException {
+    writer.close();
+  }
+
+  public long bytesWritten() {
+    return writer.length();
+  }
+
+  public Table table() {
+    return table;
+  }
+
+  public String location() {
+    return location;
+  }
+
+  public DataFile dataFile() {
+    return writer.toDataFile();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java
new file mode 100644
index 000000000000..9058d150c530
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.Serializable;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.transforms.SerializableBiFunction;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.data.Record;
+
+@SuppressWarnings("all") // TODO: Remove this once development is stable.
+abstract class RecordWriterFactory<ElementT, DestinationT> implements Serializable {
+  private RecordWriterFactory() {}
+
+  public abstract RecordWriterFactory<ElementT, DestinationT> prepare(
+      DynamicDestinations<?, DestinationT> destination);
+
+  public abstract RecordWriter<ElementT> createWriter(String location, DestinationT destination)
+      throws Exception;
+
+  static <ElementT, DestinationT> TableRecordWriterFactory<ElementT, DestinationT> tableRecords(
+      SerializableBiFunction<Record, ElementT, Record> toRecord,
+      @Nullable DynamicDestinations<?, DestinationT> dynamicDestinations) {
+    return new TableRecordWriterFactory<>(toRecord, dynamicDestinations);
+  }
+
+  static final class TableRecordWriterFactory<ElementT, DestinationT>
+      extends RecordWriterFactory<ElementT, DestinationT> {
+
+    final SerializableBiFunction<Record, ElementT, Record> toRecord;
+
+    final DynamicDestinations<?, DestinationT> dynamicDestinations;
+
+    TableRecordWriterFactory(
+        SerializableBiFunction<Record, ElementT, Record> toRecord,
+        DynamicDestinations<?, DestinationT> dynamicDestinations) {
+      this.toRecord = toRecord;
+      this.dynamicDestinations = dynamicDestinations;
+    }
+
+    @Override
+    public RecordWriterFactory<ElementT, DestinationT> prepare(
+        DynamicDestinations<?, DestinationT> destination) {
+      return new TableRecordWriterFactory<>(toRecord, destination);
+    }
+
+    @Override
+    public RecordWriter<ElementT> createWriter(String location, DestinationT destination)
+        throws Exception {
+      Table table = dynamicDestinations.getTable(destination);
+      Schema schema = dynamicDestinations.getSchema(destination);
+      PartitionSpec partitionSpec = dynamicDestinations.getPartitionSpec(destination);
+      FileFormat format = dynamicDestinations.getFileFormat(destination);
+      return new RecordWriter<>(table, location, schema, partitionSpec, format, toRecord);
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java
new file mode 100644
index 000000000000..135faf3decfe
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
+
+@SuppressWarnings("all")
+public abstract class TableFactory<IdentifierT> implements Serializable {
+
+  private TableFactory() {}
+
+  public abstract Table getTable(IdentifierT id);
+
+  public static TableFactory<String> forCatalog(final Iceberg.Catalog catalog) {
+    return new TableFactory<String>() {
+      @Override
+      public Table getTable(String id) {
+        TableIdentifier tableId = TableIdentifier.parse(id);
+        // If the first element in the namespace is our catalog, remove that.
+        if (tableId.hasNamespace()) {
+          Namespace ns = tableId.namespace();
+          if (catalog.catalog().name().equals(ns.level(0))) {
+            String[] levels = ns.levels();
+            levels = Arrays.copyOfRange(levels, 1, levels.length);
+            tableId = TableIdentifier.of(Namespace.of(levels), tableId.name());
+          }
+        }
+        return catalog.catalog().loadTable(tableId);
+      }
+    };
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java
new file mode 100644
index 000000000000..b9ef6fe55db9
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.beam.io.iceberg.WriteBundlesToFiles.Result;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.SerializableCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.StructuredCoder;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.ShardedKey;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Objects;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Table;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
+
+@SuppressWarnings("all") // TODO: Remove this once development is stable.
+public class WriteBundlesToFiles<DestinationT extends Object, ElementT>
+    extends DoFn<KV<DestinationT, ElementT>, Result<DestinationT>> {
+
+  private transient Map<DestinationT, RecordWriter<ElementT>> writers;
+  private transient Map<DestinationT, BoundedWindow> windows;
+
+  private static final int SPILLED_RECORD_SHARDING_FACTOR = 10;
+
+  private final PCollectionView<String> locationPrefixView;
+
+  private final TupleTag<KV<ShardedKey<DestinationT>, ElementT>> successfulWritesTag;
+  private final TupleTag<KV<ShardedKey<DestinationT>, ElementT>> unwrittenRecordsTag;
+  private final int maxWritersPerBundle;
+  private final long maxFileSize;
+
+  private final RecordWriterFactory<ElementT, DestinationT> recordWriterFactory;
+
+  private int spilledShardNumber;
+
+  static final class Result<DestinationT> implements Serializable {
+    private static final long serialVersionUID = 1L;
+
+    public final String tableId;
+    public final String location;
+
+    public final PartitionSpec partitionSpec;
+
+    public final MetadataUpdate update;
+
+    public final DestinationT destination;
+
+    public Result(
+        String tableId,
+        String location,
+        DataFile dataFile,
+        PartitionSpec partitionSpec,
+        DestinationT destination) {
+      this.tableId = tableId;
+      this.location = location;
+      this.update = MetadataUpdate.of(partitionSpec, dataFile);
+      this.partitionSpec = partitionSpec;
+      this.destination = destination;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (obj instanceof Result) {
+        Result result = (Result) obj;
+        return Objects.equal(result.tableId, tableId)
+            && Objects.equal(result.location, location)
+            && Objects.equal(result.partitionSpec, partitionSpec)
+            && Objects.equal(result.update.getDataFiles().get(0), update.getDataFiles().get(0))
+            && Objects.equal(destination, result.destination);
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(
+          tableId, location, update.getDataFiles().get(0), partitionSpec, destination);
+    }
+
+    @Override
+    public String toString() {
+      return "Result{"
+          + "table='"
+          + tableId
+          + '\''
+          + "location='"
+          + location
+          + '\''
+          + ", fileByteSize="
+          + update.getDataFiles().get(0).fileSizeInBytes()
+          + ", destination="
+          + destination
+          + '}';
+    }
+  }
+
+  public static class ResultCoder<DestinationT> extends StructuredCoder<Result<DestinationT>> {
+    private static final StringUtf8Coder stringCoder = StringUtf8Coder.of();
+    private static final Coder<MetadataUpdate> metadataCoder = MetadataUpdate.coder();
+
+    private static final SerializableCoder<PartitionSpec> partitionSpecCoder =
+        SerializableCoder.of(PartitionSpec.class);
+
+    private final Coder<DestinationT> destinationCoder;
+
+    public ResultCoder(Coder<DestinationT> destinationCoder) {
+      this.destinationCoder = destinationCoder;
+    }
+
+    @Override
+    public void encode(
+        Result<DestinationT> value, @UnknownKeyFor @NonNull @Initialized OutputStream outStream)
+        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
+            @Initialized IOException {
+
+      // Convert most everything to Avro for serialization
+
+      // Table id and location are strings
+      stringCoder.encode(value.tableId, outStream);
+      stringCoder.encode(value.location, outStream);
+      // PartitionSpec is Java serialized because we need it to decode DataFile
+      destinationCoder.encode(value.destination, outStream);
+      metadataCoder.encode(value.update, outStream);
+      partitionSpecCoder.encode(value.partitionSpec, outStream);
+    }
+
+    @Override
+    public Result<DestinationT> decode(InputStream inStream) throws CoderException, IOException {
+      String tableId = stringCoder.decode(inStream);
+      String location = stringCoder.decode(inStream);
+      DestinationT dest = destinationCoder.decode(inStream);
+      MetadataUpdate update = metadataCoder.decode(inStream);
+      PartitionSpec spec = partitionSpecCoder.decode(inStream);
+      return new Result<>(tableId, location, update.getDataFiles().get(0), spec, dest);
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Collections.singletonList(destinationCoder);
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {}
+
+    public static <DestinationT> ResultCoder<DestinationT> of(
+        Coder<DestinationT> destinationCoder) {
+      return new ResultCoder<>(destinationCoder);
+    }
+  }
+
+  public WriteBundlesToFiles(
+      PCollectionView<String> locationPrefixView,
+      TupleTag<KV<ShardedKey<DestinationT>, ElementT>> successfulWritesTag,
+      TupleTag<KV<ShardedKey<DestinationT>, ElementT>> unwrittenRecordsTag,
+      int maximumWritersPerBundle,
+      long maxFileSize,
+      RecordWriterFactory<ElementT, DestinationT> recordWriterFactory) {
+    this.locationPrefixView = locationPrefixView;
+    this.successfulWritesTag = successfulWritesTag;
+    this.unwrittenRecordsTag = unwrittenRecordsTag;
+    this.maxWritersPerBundle = maximumWritersPerBundle;
+    this.maxFileSize = maxFileSize;
+    this.recordWriterFactory = recordWriterFactory;
+  }
+
+  @StartBundle
+  public void startBundle() {
+    this.writers = Maps.newHashMap();
+    this.windows = Maps.newHashMap();
+    this.spilledShardNumber = ThreadLocalRandom.current().nextInt(SPILLED_RECORD_SHARDING_FACTOR);
+  }
+
+  RecordWriter<ElementT> createWriter(
+      DestinationT destination, String location, BoundedWindow window) throws Exception {
+    Map<DestinationT, BoundedWindow> windows = Preconditions.checkNotNull(this.windows);
+    Map<DestinationT, RecordWriter<ElementT>> writers = Preconditions.checkNotNull(this.writers);
+    RecordWriter<ElementT> writer = recordWriterFactory.createWriter(location, destination);
+    windows.put(destination, window);
+    writers.put(destination, writer);
+    return writer;
+  }
+
+  @ProcessElement
+  public void processElement(
+      ProcessContext c, @Element KV<DestinationT, ElementT> element, BoundedWindow window)
+      throws Exception {
+    Map<DestinationT, RecordWriter<ElementT>> writers = Preconditions.checkNotNull(this.writers);
+    String locationPrefix = c.sideInput(locationPrefixView);
+    DestinationT destination = element.getKey();
+    RecordWriter<ElementT> writer;
+    if (writers.containsKey(destination)) {
+      writer = writers.get(destination);
+    } else {
+      if (writers.size() <= maxWritersPerBundle) {
+        writer = createWriter(destination, locationPrefix, window);
+      } else {
+        c.output(
+            unwrittenRecordsTag,
+            KV.of(
+                ShardedKey.of(destination, ++spilledShardNumber % SPILLED_RECORD_SHARDING_FACTOR),
+                element.getValue()));
+        return;
+      }
+    }
+
+    if (writer.bytesWritten() > maxFileSize) {
+      writer.close();
+      Table t = writer.table();
+
+      c.output(new Result<>(t.name(), writer.location(), writer.dataFile(), t.spec(), destination));
+      writer = createWriter(destination, locationPrefix, window);
+    }
+
+    try {
+      writer.write(element.getValue());
+      c.output(
+          successfulWritesTag,
+          KV.of(
+              ShardedKey.of(destination, spilledShardNumber % SPILLED_RECORD_SHARDING_FACTOR),
+              element.getValue()));
+    } catch (Exception e) {
+      try {
+        writer.close();
+      } catch (Exception closeException) {
+        e.addSuppressed(closeException);
+      }
+      throw e;
+    }
+  }
+
+  @FinishBundle
+  public void finishBundle(FinishBundleContext c) throws Exception {
+    Map<DestinationT, BoundedWindow> windows = Preconditions.checkNotNull(this.windows);
+    Map<DestinationT, RecordWriter<ElementT>> writers = Preconditions.checkNotNull(this.writers);
+    List<Exception> exceptionList = Lists.newArrayList();
+    for (RecordWriter<ElementT> writer : writers.values()) {
+      try {
+        writer.close();
+      } catch (Exception e) {
+        exceptionList.add(e);
+      }
+    }
+    if (!exceptionList.isEmpty()) {
+      Exception e = new IOException("Exception closing some writers.");
+      for (Exception thrown : exceptionList) {
+        e.addSuppressed(thrown);
+      }
+      throw e;
+    }
+
+    exceptionList.clear();
+    for (Map.Entry<DestinationT, RecordWriter<ElementT>> entry : writers.entrySet()) {
+      try {
+        DestinationT destination = entry.getKey();
+
+        RecordWriter<ElementT> writer = entry.getValue();
+        BoundedWindow window = windows.get(destination);
+        Preconditions.checkNotNull(window);
+        Table t = writer.table();
+        c.output(
+            new Result<>(t.name(), writer.location(), writer.dataFile(), t.spec(), destination),
+            window.maxTimestamp(),
+            window);
+      } catch (Exception e) {
+        exceptionList.add(e);
+      }
+    }
+    writers.clear();
+    if (!exceptionList.isEmpty()) {
+      Exception e = new IOException("Exception emitting writer metadata.");
+      for (Exception thrown : exceptionList) {
+        e.addSuppressed(thrown);
+      }
+      throw e;
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java
new file mode 100644
index 000000000000..53dbf43fa7f3
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import org.apache.beam.io.iceberg.WriteBundlesToFiles.Result;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+
+@SuppressWarnings("all")
+public class WriteGroupedRecordsToFiles<DestinationT, ElementT>
+    extends DoFn<KV<DestinationT, Iterable<ElementT>>, Result<DestinationT>> {
+
+  private final PCollectionView<String> locationPrefixView;
+  private final long maxFileSize;
+  private final RecordWriterFactory<ElementT, DestinationT> recordWriterFactory;
+
+  WriteGroupedRecordsToFiles(
+      PCollectionView<String> locationPrefixView,
+      long maxFileSize,
+      RecordWriterFactory<ElementT, DestinationT> recordWriterFactory) {
+    this.locationPrefixView = locationPrefixView;
+    this.maxFileSize = maxFileSize;
+    this.recordWriterFactory = recordWriterFactory;
+  }
+
+  @ProcessElement
+  public void processElement(
+      ProcessContext c, @Element KV<DestinationT, Iterable<ElementT>> element) throws Exception {
+    String locationPrefix = c.sideInput(locationPrefixView);
+    DestinationT destination = element.getKey();
+    RecordWriter<ElementT> writer = recordWriterFactory.createWriter(locationPrefix, destination);
+    for (ElementT e : element.getValue()) {
+      writer.write(e);
+    }
+    writer.close();
+    c.output(
+        new Result<>(
+            writer.table().name(),
+            writer.location(),
+            writer.dataFile(),
+            writer.table().spec(),
+            destination));
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java
new file mode 100644
index 000000000000..835b3421ccc4
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java
@@ -0,0 +1 @@
+package org.apache.beam.io.iceberg;
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/PropertyBuilder.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/PropertyBuilder.java
new file mode 100644
index 000000000000..9dc67992aeae
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/PropertyBuilder.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg.util;
+
+import com.google.common.collect.ImmutableMap;
+import javax.annotation.Nullable;
+
+/** Convenience utility class to build immutable maps that drops attempts to set null values. */
+public class PropertyBuilder {
+
+  ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
+
+  public PropertyBuilder put(String key, @Nullable Object value) {
+    if (value != null) {
+      builder = builder.put(key, "" + value);
+    }
+    return this;
+  }
+
+  public ImmutableMap<String, String> build() {
+    return builder.build();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/RowHelper.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/RowHelper.java
new file mode 100644
index 000000000000..0ad5f663a2cf
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/RowHelper.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg.util;
+
+import java.util.Optional;
+import org.apache.beam.sdk.transforms.SerializableBiFunction;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.types.Types.NestedField;
+
+public class RowHelper {
+  private RowHelper() {}
+
+  public static void copyInto(Record rec, NestedField field, Row value) {
+    String name = field.name();
+    switch (field.type().typeId()) {
+      case BOOLEAN:
+        Optional.ofNullable(value.getBoolean(name)).ifPresent(v -> rec.setField(name, v));
+        break;
+      case INTEGER:
+        Optional.ofNullable(value.getInt32(name)).ifPresent(v -> rec.setField(name, v));
+        break;
+      case LONG:
+        Optional.ofNullable(value.getInt64(name)).ifPresent(v -> rec.setField(name, v));
+        break;
+      case FLOAT:
+        Optional.ofNullable(value.getFloat(name)).ifPresent(v -> rec.setField(name, v));
+        break;
+      case DOUBLE:
+        Optional.ofNullable(value.getDouble(name)).ifPresent(v -> rec.setField(name, v));
+        break;
+      case DATE:
+        break;
+      case TIME:
+        break;
+      case TIMESTAMP:
+        break;
+      case STRING:
+        Optional.ofNullable(value.getString(name)).ifPresent(v -> rec.setField(name, v));
+        break;
+      case UUID:
+        break;
+      case FIXED:
+        break;
+      case BINARY:
+        break;
+      case DECIMAL:
+        break;
+      case STRUCT:
+        Optional.ofNullable(value.getRow(name))
+            .ifPresent(
+                row ->
+                    rec.setField(
+                        name, copy(GenericRecord.create(field.type().asStructType()), row)));
+        break;
+      case LIST:
+        break;
+      case MAP:
+        break;
+    }
+  }
+
+  public static Record copy(Record baseRecord, Row value) {
+    Record rec = baseRecord.copy();
+    for (NestedField f : rec.struct().fields()) {
+      copyInto(rec, f, value);
+    }
+    return rec;
+  }
+
+  public static SerializableBiFunction<Record, Row, Record> recordsFromRows() {
+    return new SerializableBiFunction<Record, Row, Record>() {
+      @Override
+      public Record apply(Record record, Row row) {
+        return copy(record, row);
+      }
+    };
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java
new file mode 100644
index 000000000000..268ad6b4550b
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg.util;
+
+import org.apache.beam.io.iceberg.Iceberg;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableScan;
+
+public class ScanHelper {
+
+  public static boolean isIncremental(Iceberg.Scan scan) {
+    if (scan.getFromSnapshotExclusive() != null) {
+      return true;
+    }
+    return false;
+  }
+
+  public static TableScan tableScan(Table table, Iceberg.Scan scan) {
+    TableScan tableScan = table.newScan();
+    return tableScan;
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/SchemaHelper.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/SchemaHelper.java
new file mode 100644
index 000000000000..c1e25351b640
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/SchemaHelper.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg.util;
+
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.Schema.FieldType;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+
+@SuppressWarnings({"dereference.of.nullable"})
+public class SchemaHelper {
+
+  private SchemaHelper() {}
+
+  public static String ICEBERG_TYPE_OPTION_NAME = "icebergTypeID";
+
+  public static Schema.FieldType fieldTypeForType(final Type type) {
+    switch (type.typeId()) {
+      case BOOLEAN:
+        return FieldType.BOOLEAN;
+      case INTEGER:
+        return FieldType.INT32;
+      case LONG:
+        return FieldType.INT64;
+      case FLOAT:
+        return FieldType.FLOAT;
+      case DOUBLE:
+        return FieldType.DOUBLE;
+      case DATE:
+      case TIME:
+      case TIMESTAMP: // TODO: Logical types?
+        return FieldType.DATETIME;
+      case STRING:
+        return FieldType.STRING;
+      case UUID:
+      case BINARY:
+        return FieldType.BYTES;
+      case FIXED:
+      case DECIMAL:
+        return FieldType.DECIMAL;
+      case STRUCT:
+        return FieldType.row(convert(type.asStructType()));
+      case LIST:
+        return FieldType.iterable(fieldTypeForType(type.asListType().elementType()));
+      case MAP:
+        return FieldType.map(
+            fieldTypeForType(type.asMapType().keyType()),
+            fieldTypeForType(type.asMapType().valueType()));
+    }
+    throw new RuntimeException("Unrecognized Iceberg Type");
+  }
+
+  public static Schema.Field convert(final Types.NestedField field) {
+    return Schema.Field.of(field.name(), fieldTypeForType(field.type()))
+        .withOptions(
+            Schema.Options.builder()
+                .setOption(
+                    ICEBERG_TYPE_OPTION_NAME, Schema.FieldType.STRING, field.type().typeId().name())
+                .build())
+        .withNullable(field.isOptional());
+  }
+
+  public static Schema convert(final org.apache.iceberg.Schema schema) {
+    Schema.Builder builder = Schema.builder();
+    for (Types.NestedField f : schema.columns()) {
+      builder.addField(convert(f));
+    }
+    return builder.build();
+  }
+
+  public static Schema convert(final Types.StructType struct) {
+    Schema.Builder builder = Schema.builder();
+    for (Types.NestedField f : struct.fields()) {
+      builder.addField(convert(f));
+    }
+    return builder.build();
+  }
+
+  public static Types.NestedField convert(int fieldId, final Schema.Field field) {
+    String typeId = field.getOptions().getValue(ICEBERG_TYPE_OPTION_NAME, String.class);
+    if (typeId != null) {
+      return Types.NestedField.of(
+          fieldId,
+          field.getType().getNullable(),
+          field.getName(),
+          Types.fromPrimitiveString(typeId));
+    } else {
+      return Types.NestedField.of(
+          fieldId, field.getType().getNullable(), field.getName(), Types.StringType.get());
+    }
+  }
+
+  public static org.apache.iceberg.Schema convert(final Schema schema) {
+    Types.NestedField[] fields = new Types.NestedField[schema.getFieldCount()];
+    int fieldId = 0;
+    for (Schema.Field f : schema.getFields()) {
+      fields[fieldId++] = convert(fieldId, f);
+    }
+    return new org.apache.iceberg.Schema(fields);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java
new file mode 100644
index 000000000000..3a813dab930b
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import org.apache.beam.io.iceberg.util.SchemaHelper;
+import org.apache.beam.sdk.coders.RowCoder;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.Table;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(JUnit4.class)
+public class BoundedScanTests {
+
+  private static Logger LOG = LoggerFactory.getLogger(BoundedScanTests.class);
+  @ClassRule public static final TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+  @Rule public TestDataWarehouse warehouse = new TestDataWarehouse(temporaryFolder, "default");
+
+  @Rule public TestPipeline testPipeline = TestPipeline.create();
+
+  static class PrintRow extends DoFn<Row, Row> {
+
+    @ProcessElement
+    public void process(@Element Row row, OutputReceiver<Row> output) throws Exception {
+      LOG.info("Got row {}", row);
+      output.output(row);
+    }
+  }
+
+  @Test
+  public void testSimpleScan() throws Exception {
+    Table simpleTable = warehouse.createTable(TestFixtures.SCHEMA);
+    simpleTable
+        .newFastAppend()
+        .appendFile(
+            warehouse.writeRecords(
+                "file1s1.parquet", simpleTable.schema(), TestFixtures.FILE1SNAPSHOT1))
+        .appendFile(
+            warehouse.writeRecords(
+                "file2s1.parquet", simpleTable.schema(), TestFixtures.FILE2SNAPSHOT1))
+        .appendFile(
+            warehouse.writeRecords(
+                "file3s1.parquet", simpleTable.schema(), TestFixtures.FILE3SNAPSHOT1))
+        .commit();
+
+    PCollection<Row> output =
+        testPipeline
+            .apply(
+                Read.from(
+                    new IcebergBoundedSource(
+                        Iceberg.Scan.builder()
+                            .catalog(
+                                Iceberg.Catalog.builder()
+                                    .name("hadoop")
+                                    .icebergCatalogType(CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP)
+                                    .warehouseLocation(warehouse.location)
+                                    .build())
+                            .type(Iceberg.ScanType.TABLE)
+                            .table(simpleTable.name().replace("hadoop.", "").split("\\."))
+                            .schema(SchemaHelper.convert(TestFixtures.SCHEMA))
+                            .build())))
+            .apply(ParDo.of(new PrintRow()))
+            .setCoder(RowCoder.of(SchemaHelper.convert(TestFixtures.SCHEMA)));
+    PAssert.that(output);
+    testPipeline.run();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java
new file mode 100644
index 000000000000..8a0eaadab99b
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.beam.io.iceberg.util.RowHelper;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.Table;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(JUnit4.class)
+public class SinkTests {
+  private static Logger LOG = LoggerFactory.getLogger(SinkTests.class);
+  @ClassRule public static final TemporaryFolder temporaryFolder = new TemporaryFolder();
+
+  @Rule public TestDataWarehouse warehouse = new TestDataWarehouse(temporaryFolder, "default");
+
+  @Rule public TestPipeline testPipeline = TestPipeline.create();
+
+  @Test
+  public void testSimpleAppend() throws Exception {
+    // Create a table and add records to it.
+    Table table = warehouse.createTable(TestFixtures.SCHEMA);
+
+    Iceberg.Catalog catalog =
+        Iceberg.Catalog.builder()
+            .name("hadoop")
+            .icebergCatalogType(CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP)
+            .warehouseLocation(warehouse.location)
+            .build();
+
+    String[] tablePath = table.name().replace("hadoop.", "").split("\\.");
+    DynamicDestinations<Row, String> destination =
+        DynamicDestinations.constant(
+            catalog.table().tablePath(ImmutableList.copyOf(tablePath)).build());
+    LOG.info("Table created. Making pipeline");
+    testPipeline
+        .apply("Records To Add", Create.of(TestFixtures.asRows(TestFixtures.FILE1SNAPSHOT1)))
+        .apply(
+            "Append To Table",
+            new Iceberg.Write(catalog, destination, RowHelper.recordsFromRows()));
+    LOG.info("Executing pipeline");
+    testPipeline.run().waitUntilFinish();
+    LOG.info("Done running pipeline");
+  }
+}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java
new file mode 100644
index 000000000000..f9112e5a61a3
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.iceberg.hadoop.HadoopOutputFile.fromPath;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.UUID;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.Catalog;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.data.orc.GenericOrcWriter;
+import org.apache.iceberg.data.parquet.GenericParquetWriter;
+import org.apache.iceberg.hadoop.HadoopCatalog;
+import org.apache.iceberg.hadoop.HadoopInputFile;
+import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.orc.ORC;
+import org.apache.iceberg.parquet.Parquet;
+import org.junit.Assert;
+import org.junit.rules.ExternalResource;
+import org.junit.rules.TemporaryFolder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestDataWarehouse extends ExternalResource {
+  private static final Logger LOG = LoggerFactory.getLogger(TestDataWarehouse.class);
+
+  protected final TemporaryFolder temporaryFolder;
+  protected final String database;
+
+  protected final Configuration hadoopConf;
+
+  protected String location;
+  protected Catalog catalog;
+
+  public TestDataWarehouse(TemporaryFolder temporaryFolder, String database) {
+    this.temporaryFolder = temporaryFolder;
+    this.database = database;
+    this.hadoopConf = new Configuration();
+  }
+
+  @Override
+  protected void before() throws Throwable {
+    File warehouseFile = temporaryFolder.newFolder();
+    Assert.assertTrue(warehouseFile.delete());
+    location = "file:" + warehouseFile.toString();
+    catalog =
+        CatalogUtil.loadCatalog(
+            CatalogUtil.ICEBERG_CATALOG_HADOOP,
+            "hadoop",
+            ImmutableMap.of(CatalogProperties.WAREHOUSE_LOCATION, location),
+            hadoopConf);
+  }
+
+  @Override
+  protected void after() {
+    List<TableIdentifier> tables = catalog.listTables(Namespace.of(database));
+    LOG.info("Cleaning up {} tables in test warehouse", tables.size());
+    for (TableIdentifier t : tables) {
+      try {
+        LOG.info("Removing table {}", t);
+        catalog.dropTable(t);
+      } catch (Exception e) {
+        LOG.error("Unable to remove table", e);
+      }
+    }
+    try {
+      ((HadoopCatalog) catalog).close();
+    } catch (Exception e) {
+      LOG.error("Unable to close catalog", e);
+    }
+  }
+
+  public DataFile writeRecords(String filename, Schema schema, List<Record> records)
+      throws IOException {
+    Path path = new Path(location, filename);
+    FileFormat format = FileFormat.fromFileName(filename);
+
+    FileAppender<Record> appender;
+    switch (format) {
+      case PARQUET:
+        appender =
+            Parquet.write(fromPath(path, hadoopConf))
+                .createWriterFunc(GenericParquetWriter::buildWriter)
+                .schema(schema)
+                .overwrite()
+                .build();
+        break;
+      case ORC:
+        appender =
+            ORC.write(fromPath(path, hadoopConf))
+                .createWriterFunc(GenericOrcWriter::buildWriter)
+                .schema(schema)
+                .overwrite()
+                .build();
+        break;
+      default:
+        throw new IOException("Unable to create appender for " + format);
+    }
+    appender.addAll(records);
+    appender.close();
+    return DataFiles.builder(PartitionSpec.unpartitioned())
+        .withInputFile(HadoopInputFile.fromPath(path, hadoopConf))
+        .withMetrics(appender.metrics())
+        .build();
+  }
+
+  public Table createTable(Schema schema) {
+    TableIdentifier table =
+        TableIdentifier.of(database, "table" + Long.toString(UUID.randomUUID().hashCode(), 16));
+    return catalog.createTable(table, schema);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java
new file mode 100644
index 000000000000..05af551f7fc1
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import org.apache.beam.io.iceberg.util.SchemaHelper;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.types.Types;
+
+public class TestFixtures {
+  public static final Schema SCHEMA =
+      new Schema(
+          required(1, "id", Types.LongType.get()), optional(2, "data", Types.StringType.get()));
+
+  private static final Record genericRecord = GenericRecord.create(SCHEMA);
+
+  /* First file in test table */
+  public static final ImmutableList<Record> FILE1SNAPSHOT1 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 0L, "data", "clarification")),
+          genericRecord.copy(ImmutableMap.of("id", 1L, "data", "risky")),
+          genericRecord.copy(ImmutableMap.of("id", 2L, "data", "falafel")));
+  public static final ImmutableList<Record> FILE1SNAPSHOT2 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 4L, "data", "obscure")),
+          genericRecord.copy(ImmutableMap.of("id", 5L, "data", "secure")),
+          genericRecord.copy(ImmutableMap.of("id", 6L, "data", "feta")));
+  public static final ImmutableList<Record> FILE1SNAPSHOT3 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 6L, "data", "brainy")),
+          genericRecord.copy(ImmutableMap.of("id", 7L, "data", "film")),
+          genericRecord.copy(ImmutableMap.of("id", 8L, "data", "feta")));
+
+  /* Second file in test table */
+  public static final ImmutableList<Record> FILE2SNAPSHOT1 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 10L, "data", "clammy")),
+          genericRecord.copy(ImmutableMap.of("id", 11L, "data", "evacuate")),
+          genericRecord.copy(ImmutableMap.of("id", 12L, "data", "tissue")));
+  public static final ImmutableList<Record> FILE2SNAPSHOT2 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 14L, "data", "radical")),
+          genericRecord.copy(ImmutableMap.of("id", 15L, "data", "collocation")),
+          genericRecord.copy(ImmutableMap.of("id", 16L, "data", "book")));
+  public static final ImmutableList<Record> FILE2SNAPSHOT3 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 16L, "data", "cake")),
+          genericRecord.copy(ImmutableMap.of("id", 17L, "data", "intrinsic")),
+          genericRecord.copy(ImmutableMap.of("id", 18L, "data", "paper")));
+
+  /* Third file in test table */
+  public static final ImmutableList<Record> FILE3SNAPSHOT1 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 20L, "data", "ocean")),
+          genericRecord.copy(ImmutableMap.of("id", 21L, "data", "holistic")),
+          genericRecord.copy(ImmutableMap.of("id", 22L, "data", "preventative")));
+  public static final ImmutableList<Record> FILE3SNAPSHOT2 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 24L, "data", "cloud")),
+          genericRecord.copy(ImmutableMap.of("id", 25L, "data", "zen")),
+          genericRecord.copy(ImmutableMap.of("id", 26L, "data", "sky")));
+  public static final ImmutableList<Record> FILE3SNAPSHOT3 =
+      ImmutableList.of(
+          genericRecord.copy(ImmutableMap.of("id", 26L, "data", "belleview")),
+          genericRecord.copy(ImmutableMap.of("id", 27L, "data", "overview")),
+          genericRecord.copy(ImmutableMap.of("id", 28L, "data", "tender")));
+
+  public static final ImmutableList<Row> asRows(Iterable<Record> records) {
+    ArrayList<Row> rows = new ArrayList<>();
+    for (Record record : records) {
+      rows.add(
+          Row.withSchema(SchemaHelper.convert(SCHEMA))
+              .withFieldValue("id", record.getField("id"))
+              .withFieldValue("data", record.getField("data"))
+              .build());
+    }
+    return ImmutableList.copyOf(rows);
+  }
+}
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 1e52e425b215..632b283ec072 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -355,3 +355,7 @@ include("sdks:java:io:kafka:kafka-01103")
 findProject(":sdks:java:io:kafka:kafka-01103")?.name = "kafka-01103"
 include("sdks:java:managed")
 findProject(":sdks:java:managed")?.name = "managed"
+include("sdks:java:io:iceberg")
+findProject(":sdks:java:io:iceberg")?.name = "iceberg"
+include("sdks:java:io:catalog")
+findProject(":sdks:java:io:catalog")?.name = "catalog"

From fd08eb49f315acc90f9bc3a96f48c4b0a778e4e7 Mon Sep 17 00:00:00 2001
From: Kenneth Knowles <klk@google.com>
Date: Tue, 26 Mar 2024 15:49:41 -0400
Subject: [PATCH 10/18] Fix up IcebergIO Write path

 - remove Read path (will propose separately)
 - re-enable checking, fix type errors
 - some style adjustments
---
 .../beam/gradle/BeamModulePlugin.groovy       |   2 +-
 sdks/java/io/catalog/build.gradle             |  34 --
 .../apache/beam/sdk/io/catalog/Catalog.java   |  21 -
 .../sdk/io/catalog/CatalogEnvironment.java    |  29 --
 .../beam/sdk/io/catalog/CatalogResource.java  |  37 --
 .../io/catalog/CatalogResourceIdentifier.java |  39 --
 .../sdk/io/catalog/CatalogSinkResource.java   |  26 --
 .../sdk/io/catalog/CatalogSourceResource.java |  26 --
 .../sdk/io/catalog/CatalogTableResource.java  |  20 -
 .../beam/sdk/io/catalog/package-info.java     |   1 -
 .../beam/io/iceberg/AppendFilesToTables.java  | 102 +++++
 .../beam/io/iceberg/AssignDestinations.java   |  65 +++
 .../beam/io/iceberg/CombinedScanReader.java   | 200 ---------
 .../beam/io/iceberg/DynamicDestinations.java  | 145 +-----
 .../beam/io/iceberg/FileWriteResult.java      | 203 +++++++++
 .../org/apache/beam/io/iceberg/Iceberg.java   | 412 ------------------
 .../beam/io/iceberg/IcebergBoundedSource.java | 143 ------
 .../beam/io/iceberg/IcebergCatalogConfig.java | 233 ++++++++++
 .../beam/io/iceberg/IcebergDestination.java   |  63 ++-
 .../org/apache/beam/io/iceberg/IcebergIO.java |  50 +++
 .../apache/beam/io/iceberg/IcebergSink.java   | 229 ----------
 .../io/iceberg/IcebergTableCreateConfig.java  |  50 +++
 .../beam/io/iceberg/IcebergWriteResult.java   |  50 +--
 .../beam/io/iceberg/MetadataUpdate.java       | 172 --------
 .../beam/io/iceberg/MetadataUpdates.java      |  56 ---
 .../iceberg/OneTableDynamicDestinations.java  |  65 +++
 .../apache/beam/io/iceberg/PrepareWrite.java  |  91 ----
 .../iceberg/{util => }/PropertyBuilder.java   |   8 +-
 .../apache/beam/io/iceberg/RecordWriter.java  |  87 ++--
 .../beam/io/iceberg/RecordWriterFactory.java  |  75 ----
 .../beam/io/iceberg/{util => }/RowHelper.java |  57 +--
 .../io/iceberg/{util => }/SchemaHelper.java   |   8 +-
 .../apache/beam/io/iceberg/TableFactory.java  |  51 ---
 .../beam/io/iceberg/WriteBundlesToFiles.java  | 312 -------------
 .../iceberg/WriteGroupedRecordsToFiles.java   |  60 ---
 .../io/iceberg/WriteGroupedRowsToFiles.java   | 103 +++++
 .../beam/io/iceberg/WriteToDestinations.java  | 123 ++++++
 .../io/iceberg/WriteUngroupedRowsToFiles.java | 350 +++++++++++++++
 .../apache/beam/io/iceberg/package-info.java  |  19 +
 .../beam/io/iceberg/util/ScanHelper.java      |  37 --
 .../beam/io/iceberg/BoundedScanTests.java     |  96 ----
 .../beam/io/iceberg/FileWriteResultTest.java  | 147 +++++++
 .../beam/io/iceberg/IcebergIOWriteTest.java   | 293 +++++++++++++
 .../apache/beam/io/iceberg/RowHelperTest.java | 128 ++++++
 .../org/apache/beam/io/iceberg/SinkTests.java |  71 ---
 .../beam/io/iceberg/TestDataWarehouse.java    |  14 +-
 .../apache/beam/io/iceberg/TestFixtures.java  |  11 +-
 settings.gradle.kts                           |   2 -
 48 files changed, 2081 insertions(+), 2535 deletions(-)
 delete mode 100644 sdks/java/io/catalog/build.gradle
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java
 delete mode 100644 sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AppendFilesToTables.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AssignDestinations.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/FileWriteResult.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergCatalogConfig.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergIO.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergTableCreateConfig.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/OneTableDynamicDestinations.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java
 rename sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/{util => }/PropertyBuilder.java (85%)
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java
 rename sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/{util => }/RowHelper.java (65%)
 rename sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/{util => }/SchemaHelper.java (95%)
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRowsToFiles.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteToDestinations.java
 create mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteUngroupedRowsToFiles.java
 delete mode 100644 sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java
 delete mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java
 create mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/FileWriteResultTest.java
 create mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/IcebergIOWriteTest.java
 create mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/RowHelperTest.java
 delete mode 100644 sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java

diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index ab002456bcf8..1a0164a0e53a 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -1151,7 +1151,7 @@ class BeamModulePlugin implements Plugin<Project> {
         options.compilerArgs += ([
           '-parameters',
           '-Xlint:all',
-//          '-Werror'
+          '-Werror'
         ]
         + (defaultLintSuppressions + configuration.disableLintWarnings).collect { "-Xlint:-${it}" })
       }
diff --git a/sdks/java/io/catalog/build.gradle b/sdks/java/io/catalog/build.gradle
deleted file mode 100644
index b00417f973a8..000000000000
--- a/sdks/java/io/catalog/build.gradle
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-plugins { id 'org.apache.beam.module' }
-applyJavaNature(
-        automaticModuleName: 'org.apache.beam.sdk.io.catalog'
-)
-
-description = "Apache Beam :: SDKs :: Java :: IO :: Catalog"
-ext.summary = "Beam Catalog"
-
-dependencies {
-    implementation project(path: ":sdks:java:core", configuration: "shadow")
-    implementation library.java.vendored_guava_32_1_2_jre
-    testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
-    testImplementation library.java.junit
-    testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
-    testImplementation project(path: ":sdks:java:io:common", configuration: "testRuntimeMigration")
-}
\ No newline at end of file
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java
deleted file mode 100644
index 0ab87a267f03..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/Catalog.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.catalog;
-
-/** Static Catalog class */
-public class Catalog {}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java
deleted file mode 100644
index 14e156fcfd28..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogEnvironment.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.catalog;
-
-public interface CatalogEnvironment {
-
-  String defaultNamespace();
-
-  CatalogResource find(CatalogResourceIdentifier id);
-
-  default CatalogResource find(String... path) {
-    return find(new CatalogResourceIdentifier(path));
-  }
-}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java
deleted file mode 100644
index 916a6bc514c7..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResource.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.catalog;
-
-/** Generic interface for catalog resources. */
-public interface CatalogResource {
-
-  /** @return Whether or not you can use this resource as a source */
-  default boolean isSource() {
-    return false;
-  }
-
-  /** @return Whether or not you can use this resource as a sink */
-  default boolean isSink() {
-    return false;
-  }
-
-  /** @return Whether or not you can use this resource as a function/transform. */
-  default boolean isTransform() {
-    return false;
-  }
-}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java
deleted file mode 100644
index 31b76b470fc1..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogResourceIdentifier.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.catalog;
-
-import java.util.Arrays;
-
-public class CatalogResourceIdentifier {
-  private String[] namespace;
-  private String name;
-
-  public CatalogResourceIdentifier(String... name) {
-    if (name.length == 1) {
-      this.name = name[0];
-      this.namespace = new String[0];
-    } else {
-      this.name = name[name.length - 1];
-      this.namespace = Arrays.copyOf(name, name.length - 1);
-    }
-  }
-
-  public static CatalogResourceIdentifier of(String... name) {
-    return new CatalogResourceIdentifier(name);
-  }
-}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java
deleted file mode 100644
index d3a7ccaa71da..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSinkResource.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.catalog;
-
-public interface CatalogSinkResource extends CatalogResource {
-
-  @Override
-  default boolean isSink() {
-    return true;
-  }
-}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java
deleted file mode 100644
index b892a307a7ee..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogSourceResource.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.catalog;
-
-public interface CatalogSourceResource extends CatalogResource {
-
-  @Override
-  default boolean isSource() {
-    return true;
-  }
-}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java
deleted file mode 100644
index c75084635816..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/CatalogTableResource.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.io.catalog;
-
-public interface CatalogTableResource extends CatalogSinkResource, CatalogSourceResource {}
diff --git a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java b/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java
deleted file mode 100644
index 08d120cdd5c4..000000000000
--- a/sdks/java/io/catalog/src/main/java/org/apache/beam/sdk/io/catalog/package-info.java
+++ /dev/null
@@ -1 +0,0 @@
-package org.apache.beam.sdk.io.catalog;
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AppendFilesToTables.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AppendFilesToTables.java
new file mode 100644
index 000000000000..468bdcf5790d
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AppendFilesToTables.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.SerializableCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.WithKeys;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.iceberg.AppendFiles;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.Catalog;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+
+class AppendFilesToTables
+    extends PTransform<PCollection<FileWriteResult>, PCollection<KV<String, Snapshot>>> {
+
+  private final IcebergCatalogConfig catalogConfig;
+
+  AppendFilesToTables(IcebergCatalogConfig catalogConfig) {
+    this.catalogConfig = catalogConfig;
+  }
+
+  @Override
+  public PCollection<KV<String, Snapshot>> expand(PCollection<FileWriteResult> writtenFiles) {
+
+    // Apply any sharded writes and flatten everything for catalog updates
+    return writtenFiles
+        .apply(
+            "Key metadata updates by table",
+            WithKeys.of(
+                new SerializableFunction<FileWriteResult, String>() {
+                  @Override
+                  public String apply(FileWriteResult input) {
+                    return input.getTableIdentifier().toString();
+                  }
+                }))
+        .apply("Group metadata updates by table", GroupByKey.create())
+        .apply(
+            "Append metadata updates to tables",
+            ParDo.of(new AppendFilesToTablesDoFn(catalogConfig)))
+        .setCoder(KvCoder.of(StringUtf8Coder.of(), SerializableCoder.of(Snapshot.class)));
+  }
+
+  private static class AppendFilesToTablesDoFn
+      extends DoFn<KV<String, Iterable<FileWriteResult>>, KV<String, Snapshot>> {
+
+    private final IcebergCatalogConfig catalogConfig;
+
+    private transient @MonotonicNonNull Catalog catalog;
+
+    private AppendFilesToTablesDoFn(IcebergCatalogConfig catalogConfig) {
+      this.catalogConfig = catalogConfig;
+    }
+
+    private Catalog getCatalog() {
+      if (catalog == null) {
+        catalog = catalogConfig.catalog();
+      }
+      return catalog;
+    }
+
+    @ProcessElement
+    public void processElement(
+        @Element KV<String, Iterable<FileWriteResult>> element,
+        OutputReceiver<KV<String, Snapshot>> out,
+        BoundedWindow window) {
+      Table table = getCatalog().loadTable(TableIdentifier.parse(element.getKey()));
+      AppendFiles update = table.newAppend();
+      for (FileWriteResult writtenFile : element.getValue()) {
+        update.appendFile(writtenFile.getDataFile());
+      }
+      update.commit();
+      out.outputWithTimestamp(
+          KV.of(element.getKey(), table.currentSnapshot()), window.maxTimestamp());
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AssignDestinations.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AssignDestinations.java
new file mode 100644
index 000000000000..a9c4e82a7767
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/AssignDestinations.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+
+/**
+ * Assigns the destination metadata for each input record.
+ *
+ * <p>The output record will have the format { dest: ..., data: ...} where the dest field has the
+ * assigned metadata and the data field has the original row.
+ */
+class AssignDestinations extends PTransform<PCollection<Row>, PCollection<Row>> {
+
+  private DynamicDestinations dynamicDestinations;
+
+  public AssignDestinations(DynamicDestinations dynamicDestinations) {
+    this.dynamicDestinations = dynamicDestinations;
+  }
+
+  @Override
+  public PCollection<Row> expand(PCollection<Row> input) {
+
+    final Schema inputSchema = input.getSchema();
+    final Schema outputSchema =
+        Schema.builder()
+            .addRowField("data", inputSchema)
+            .addRowField("dest", dynamicDestinations.getMetadataSchema())
+            .build();
+
+    return input
+        .apply(
+            ParDo.of(
+                new DoFn<Row, Row>() {
+                  @ProcessElement
+                  public void processElement(@Element Row data, OutputReceiver<Row> out) {
+                    out.output(
+                        Row.withSchema(outputSchema)
+                            .addValues(data, dynamicDestinations.assignDestinationMetadata(data))
+                            .build());
+                  }
+                }))
+        .setRowSchema(outputSchema);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java
deleted file mode 100644
index 74ec62af66f8..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/CombinedScanReader.java
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import java.io.IOException;
-import java.util.LinkedList;
-import java.util.NoSuchElementException;
-import javax.annotation.Nullable;
-import org.apache.beam.io.iceberg.util.SchemaHelper;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.schemas.Schema;
-import org.apache.beam.sdk.values.Row;
-import org.apache.iceberg.CombinedScanTask;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.FileScanTask;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.avro.Avro;
-import org.apache.iceberg.data.Record;
-import org.apache.iceberg.data.avro.DataReader;
-import org.apache.iceberg.data.orc.GenericOrcReader;
-import org.apache.iceberg.data.parquet.GenericParquetReaders;
-import org.apache.iceberg.encryption.EncryptionManager;
-import org.apache.iceberg.encryption.InputFilesDecryptor;
-import org.apache.iceberg.io.CloseableIterable;
-import org.apache.iceberg.io.CloseableIterator;
-import org.apache.iceberg.io.FileIO;
-import org.apache.iceberg.io.InputFile;
-import org.apache.iceberg.orc.ORC;
-import org.apache.iceberg.parquet.Parquet;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings("all")
-public class CombinedScanReader extends BoundedSource.BoundedReader<Row> {
-  private static final Logger LOG = LoggerFactory.getLogger(CombinedScanReader.class);
-
-  IcebergBoundedSource source;
-
-  @Nullable CombinedScanTask task;
-
-  @Nullable Schema schema;
-
-  transient @Nullable org.apache.iceberg.Schema project;
-
-  transient @Nullable FileIO io;
-  transient @Nullable EncryptionManager encryptionManager;
-
-  transient @Nullable InputFilesDecryptor decryptor;
-
-  transient LinkedList<FileScanTask> files = new LinkedList<>();
-
-  transient CloseableIterator<Record> baseIter = null;
-
-  transient Record current;
-
-  public CombinedScanReader(
-      IcebergBoundedSource source, @Nullable CombinedScanTask task, @Nullable Schema schema) {
-    this.source = source;
-    this.task = task;
-    this.schema = schema;
-    if (this.schema != null) {
-      project = SchemaHelper.convert(schema);
-    }
-  }
-
-  @Override
-  public boolean start() throws IOException {
-    if (task == null) {
-      return false;
-    }
-
-    Table table = source.table();
-
-    io = table.io();
-    encryptionManager = table.encryption();
-    decryptor = new InputFilesDecryptor(task, io, encryptionManager);
-
-    files.addAll(task.files());
-
-    return advance();
-  }
-
-  @Override
-  public boolean advance() throws IOException {
-    do {
-      // If our current iterator is working... do that.
-      if (baseIter != null && baseIter.hasNext()) {
-        current = baseIter.next();
-        return true;
-      }
-
-      // Close out the current iterator and try to open a new one
-      if (baseIter != null) {
-        baseIter.close();
-        baseIter = null;
-      }
-
-      LOG.info("Trying to open new file.");
-      FileScanTask fileTask = null;
-      while (files.size() > 0 && fileTask == null) {
-        fileTask = files.removeFirst();
-        if (fileTask.isDataTask()) {
-          LOG.error("{} is a DataTask. Skipping.", fileTask.toString());
-          fileTask = null;
-        }
-      }
-
-      // We have a new file to start reading
-      if (fileTask != null) {
-        DataFile file = fileTask.file();
-        InputFile input = decryptor.getInputFile(fileTask);
-
-        CloseableIterable<Record> iterable = null;
-        switch (file.format()) {
-          case ORC:
-            LOG.info("Preparing ORC input");
-            iterable =
-                ORC.read(input)
-                    .project(project)
-                    .createReaderFunc(
-                        fileSchema -> GenericOrcReader.buildReader(project, fileSchema))
-                    .filter(fileTask.residual())
-                    .build();
-            break;
-          case PARQUET:
-            LOG.info("Preparing Parquet input.");
-            iterable =
-                Parquet.read(input)
-                    .project(project)
-                    .createReaderFunc(
-                        fileSchema -> GenericParquetReaders.buildReader(project, fileSchema))
-                    .filter(fileTask.residual())
-                    .build();
-            break;
-          case AVRO:
-            LOG.info("Preparing Avro input.");
-            iterable =
-                Avro.read(input).project(project).createReaderFunc(DataReader::create).build();
-            break;
-          default:
-            throw new UnsupportedOperationException("Cannot read format: " + file.format());
-        }
-
-        if (iterable != null) {
-          baseIter = iterable.iterator();
-        }
-      } else {
-        LOG.info("We have exhausted all available files in this CombinedScanTask");
-      }
-
-    } while (baseIter != null);
-    return false;
-  }
-
-  private Row convert(Record record) {
-    Row.Builder b = Row.withSchema(schema);
-    for (int i = 0; i < schema.getFieldCount(); i++) {
-      // TODO: A lot obviously
-      b.addValue(record.getField(schema.getField(i).getName()));
-    }
-    return b.build();
-  }
-
-  @Override
-  public Row getCurrent() throws NoSuchElementException {
-    if (current == null) {
-      throw new NoSuchElementException();
-    }
-    return convert(current);
-  }
-
-  @Override
-  public void close() throws IOException {
-    if (baseIter != null) {
-      baseIter.close();
-    }
-    files.clear();
-    io.close();
-  }
-
-  @Override
-  public BoundedSource<Row> getCurrentSource() {
-    return source;
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java
index 8032e5c1fbbb..a395086403f6 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/DynamicDestinations.java
@@ -17,147 +17,20 @@
  */
 package org.apache.beam.io.iceberg;
 
-import static org.apache.beam.sdk.values.TypeDescriptors.extractFromTypeParameters;
-import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
-
 import java.io.Serializable;
-import java.util.List;
-import org.apache.beam.sdk.coders.CannotProvideCoderException;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderRegistry;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TypeDescriptor;
-import org.apache.beam.sdk.values.TypeDescriptors;
-import org.apache.beam.sdk.values.ValueInSingleWindow;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
-import org.apache.iceberg.FileFormat;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.Table;
-
-@SuppressWarnings("all") // TODO: Remove this once development is stable.
-public abstract class DynamicDestinations<T, DestinationT> implements Serializable {
-
-  interface SideInputAccessor {
-    <SideInputT> SideInputT sideInput(PCollectionView<SideInputT> view);
-  }
-
-  private transient SideInputAccessor sideInputAccessor;
-  private transient PipelineOptions options;
-
-  static class ProcessContextSideInputAccessor implements SideInputAccessor {
-    private DoFn<?, ?>.ProcessContext processContext;
-
-    public ProcessContextSideInputAccessor(DoFn<?, ?>.ProcessContext processContext) {
-      this.processContext = processContext;
-    }
-
-    @Override
-    public <SideInputT> SideInputT sideInput(PCollectionView<SideInputT> view) {
-      return processContext.sideInput(view);
-    }
-  }
-
-  public PipelineOptions getOptions() {
-    return options;
-  }
-
-  public List<PCollectionView<?>> getSideInputs() {
-    return Lists.newArrayList();
-  }
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.catalog.TableIdentifier;
 
-  protected final <SideInputT> SideInputT sideInput(PCollectionView<SideInputT> view) {
-    checkState(
-        getSideInputs().contains(view),
-        "View %s not declared in getSideInputs() (%s)",
-        view,
-        getSideInputs());
-    if (sideInputAccessor == null) {
-      throw new IllegalStateException("sideInputAccessor (transient field) is null");
-    }
-    return sideInputAccessor.sideInput(view);
-  }
+public interface DynamicDestinations extends Serializable {
 
-  void setSideInputProcessContext(DoFn<?, ?>.ProcessContext context) {
-    this.sideInputAccessor = new ProcessContextSideInputAccessor(context);
-    this.options = context.getPipelineOptions();
-  }
+  Schema getMetadataSchema();
 
-  public abstract DestinationT getDestination(ValueInSingleWindow<T> element);
+  Row assignDestinationMetadata(Row data);
 
-  public Coder<DestinationT> getDestinationCoder() {
-    return null;
-  }
-
-  public abstract Table getTable(DestinationT destination);
-
-  public abstract Schema getSchema(DestinationT destination);
-
-  public abstract PartitionSpec getPartitionSpec(DestinationT destination);
-
-  public abstract FileFormat getFileFormat(DestinationT destination);
-
-  Coder<DestinationT> getDestinationCoderWithDefault(CoderRegistry registry)
-      throws CannotProvideCoderException {
-    Coder<DestinationT> destinationCoder = getDestinationCoder();
-    if (destinationCoder != null) {
-      return destinationCoder;
-    }
-    TypeDescriptor<DestinationT> descriptor =
-        extractFromTypeParameters(
-            this,
-            DynamicDestinations.class,
-            new TypeDescriptors.TypeVariableExtractor<
-                DynamicDestinations<T, DestinationT>, DestinationT>() {});
-    try {
-      return registry.getCoder(descriptor);
-    } catch (CannotProvideCoderException e) {
-      throw new CannotProvideCoderException(
-          "Failed to infer coder for DestinationT from type "
-              + descriptor
-              + ", please provide it explicitly by overriding getDestinationCoder()",
-          e);
-    }
-  }
-
-  public static class StaticTableDestination<ElementT>
-      extends DynamicDestinations<ElementT, String> {
-
-    final Iceberg.Table table;
-
-    public StaticTableDestination(Iceberg.Table table) {
-      this.table = table;
-    }
-
-    @Override
-    public String getDestination(ValueInSingleWindow<ElementT> element) {
-      return table.table().name();
-    }
-
-    @Override
-    public Table getTable(String destination) {
-      return table.table();
-    }
-
-    @Override
-    public Schema getSchema(String destination) {
-      return getTable(destination).schema();
-    }
-
-    @Override
-    public PartitionSpec getPartitionSpec(String destination) {
-      return getTable(destination).spec();
-    }
-
-    @Override
-    public FileFormat getFileFormat(String destination) {
-      return FileFormat.PARQUET;
-    }
-  }
+  IcebergDestination instantiateDestination(Row dest);
 
-  public static <ElementT> StaticTableDestination<ElementT> constant(Iceberg.Table table) {
-    return new StaticTableDestination<>(table);
+  static DynamicDestinations singleTable(TableIdentifier tableId) {
+    return new OneTableDynamicDestinations(tableId);
   }
 }
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/FileWriteResult.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/FileWriteResult.java
new file mode 100644
index 000000000000..09645cdd827b
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/FileWriteResult.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
+
+import com.google.auto.value.AutoValue;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import org.apache.avro.Schema;
+import org.apache.beam.sdk.coders.ByteArrayCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.CoderProvider;
+import org.apache.beam.sdk.coders.CoderProviders;
+import org.apache.beam.sdk.coders.DefaultCoder;
+import org.apache.beam.sdk.coders.SerializableCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.StructuredCoder;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.avro.AvroEncoderUtil;
+import org.apache.iceberg.avro.AvroSchemaUtil;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.types.Types;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+@AutoValue
+@DefaultCoder(FileWriteResult.FileWriteResultCoder.class)
+abstract class FileWriteResult {
+  public abstract TableIdentifier getTableIdentifier();
+
+  public abstract PartitionSpec getPartitionSpec();
+
+  public abstract DataFile getDataFile();
+
+  public static Builder builder() {
+    return new AutoValue_FileWriteResult.Builder();
+  }
+
+  @AutoValue.Builder
+  abstract static class Builder {
+    public abstract Builder setTableIdentifier(TableIdentifier tableId);
+
+    public abstract Builder setPartitionSpec(PartitionSpec partitionSpec);
+
+    public abstract Builder setDataFile(DataFile dataFiles);
+
+    public abstract FileWriteResult build();
+  }
+
+  public static class FileWriteResultCoder extends StructuredCoder<FileWriteResult> {
+    private static final FileWriteResultCoder SINGLETON = new FileWriteResultCoder();
+
+    private static final Coder<String> tableIdentifierCoder = StringUtf8Coder.of();
+    private static final Coder<PartitionSpec> partitionSpecCoder =
+        SerializableCoder.of(PartitionSpec.class);
+    private static final Coder<byte[]> dataFileBytesCoder = ByteArrayCoder.of();
+
+    private static Schema getDataFileAvroSchema(FileWriteResult fileWriteResult) {
+      Types.StructType partitionType = fileWriteResult.getPartitionSpec().partitionType();
+      Types.StructType dataFileStruct = DataFile.getType(partitionType);
+      Map<Types.StructType, String> dataFileNames =
+          ImmutableMap.of(
+              dataFileStruct, "org.apache.iceberg.GenericDataFile",
+              partitionType, "org.apache.iceberg.PartitionData");
+      return AvroSchemaUtil.convert(dataFileStruct, dataFileNames);
+    }
+
+    @Override
+    public void encode(FileWriteResult value, OutputStream outStream)
+        throws CoderException, IOException {
+      tableIdentifierCoder.encode(value.getTableIdentifier().toString(), outStream);
+      partitionSpecCoder.encode(value.getPartitionSpec(), outStream);
+      dataFileBytesCoder.encode(
+          AvroEncoderUtil.encode(value.getDataFile(), getDataFileAvroSchema(value)), outStream);
+    }
+
+    @Override
+    public FileWriteResult decode(InputStream inStream) throws CoderException, IOException {
+      TableIdentifier tableId = TableIdentifier.parse(tableIdentifierCoder.decode(inStream));
+      PartitionSpec partitionSpec = partitionSpecCoder.decode(inStream);
+      DataFile dataFile =
+          checkArgumentNotNull(
+              AvroEncoderUtil.decode(dataFileBytesCoder.decode(inStream)),
+              "Decoding of dataFile resulted in null");
+      return FileWriteResult.builder()
+          .setTableIdentifier(tableId)
+          .setDataFile(dataFile)
+          .setPartitionSpec(partitionSpec)
+          .build();
+    }
+
+    @Override
+    public List<? extends Coder<?>> getCoderArguments() {
+      return Collections.emptyList();
+    }
+
+    @Override
+    public Object structuralValue(FileWriteResult fileWriteResult) {
+      return new FileWriteResultDeepEqualityWrapper(fileWriteResult);
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {}
+
+    @Override
+    public TypeDescriptor<FileWriteResult> getEncodedTypeDescriptor() {
+      return TypeDescriptor.of(FileWriteResult.class);
+    }
+
+    public static FileWriteResultCoder of() {
+      return SINGLETON;
+    }
+
+    @SuppressWarnings("unused") // used via `DefaultCoder` annotation
+    public static CoderProvider getCoderProvider() {
+      return CoderProviders.forCoder(
+          TypeDescriptor.of(FileWriteResult.class), FileWriteResultCoder.of());
+    }
+  }
+
+  private static class FileWriteResultDeepEqualityWrapper {
+    private final FileWriteResult fileWriteResult;
+
+    private FileWriteResultDeepEqualityWrapper(FileWriteResult fileWriteResult) {
+      this.fileWriteResult = fileWriteResult;
+    }
+
+    @Override
+    public boolean equals(@Nullable Object obj) {
+      if (obj == this) {
+        return true;
+      }
+      if (obj == null) {
+        return false;
+      }
+      if (!(obj instanceof FileWriteResultDeepEqualityWrapper)) {
+        return false;
+      }
+      FileWriteResultDeepEqualityWrapper other = (FileWriteResultDeepEqualityWrapper) obj;
+
+      return Objects.equals(
+              fileWriteResult.getTableIdentifier(), other.fileWriteResult.getTableIdentifier())
+          && Objects.equals(
+              fileWriteResult.getPartitionSpec(), other.fileWriteResult.getPartitionSpec())
+          && dataFilesEqual(fileWriteResult.getDataFile(), other.fileWriteResult.getDataFile());
+    }
+
+    private boolean dataFilesEqual(DataFile first, DataFile second) {
+      return Objects.equals(first.pos(), second.pos())
+          && first.specId() == second.specId()
+          && Objects.equals(first.content(), second.content())
+          && Objects.equals(first.path(), second.path())
+          && Objects.equals(first.format(), second.format())
+          && Objects.equals(first.partition(), second.partition())
+          && first.recordCount() == second.recordCount()
+          && first.fileSizeInBytes() == second.fileSizeInBytes()
+          && Objects.equals(first.columnSizes(), second.columnSizes())
+          && Objects.equals(first.valueCounts(), second.valueCounts())
+          && Objects.equals(first.nullValueCounts(), second.nullValueCounts())
+          && Objects.equals(first.nanValueCounts(), second.nanValueCounts())
+          && Objects.equals(first.lowerBounds(), second.lowerBounds())
+          && Objects.equals(first.upperBounds(), second.upperBounds())
+          && Objects.equals(first.keyMetadata(), second.keyMetadata())
+          && Objects.equals(first.splitOffsets(), second.splitOffsets())
+          && Objects.equals(first.equalityFieldIds(), second.equalityFieldIds())
+          && Objects.equals(first.sortOrderId(), second.sortOrderId())
+          && Objects.equals(first.dataSequenceNumber(), second.dataSequenceNumber())
+          && Objects.equals(first.fileSequenceNumber(), second.fileSequenceNumber());
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(
+          fileWriteResult.getTableIdentifier(),
+          fileWriteResult.getPartitionSpec(),
+          fileWriteResult.getDataFile());
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java
deleted file mode 100644
index 58dc5a6287e9..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/Iceberg.java
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import com.google.auto.value.AutoValue;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import java.io.Serializable;
-import java.util.List;
-import java.util.Map;
-import javax.annotation.Nullable;
-import org.apache.beam.io.iceberg.util.PropertyBuilder;
-import org.apache.beam.sdk.schemas.Schema;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.SerializableBiFunction;
-import org.apache.beam.sdk.transforms.SerializableFunctions;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.iceberg.CatalogProperties;
-import org.apache.iceberg.CatalogUtil;
-import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.data.Record;
-import org.apache.iceberg.expressions.Expression;
-
-@SuppressWarnings("all") // TODO: Remove this once development is stable.
-public class Iceberg {
-
-  public static String DEFAULT_CATALOG_NAME = "default";
-
-  public enum ScanType {
-    TABLE,
-    BATCH
-  }
-
-  public enum WriteFormat {
-    AVRO,
-    PARQUET,
-    ORC
-  }
-
-  public static Catalog catalog(String name) {
-    return Catalog.builder().name(name).build();
-  }
-
-  public static Catalog catalog() {
-    return catalog(DEFAULT_CATALOG_NAME);
-  }
-
-  @AutoValue
-  public abstract static class Scan implements Serializable {
-
-    public abstract ScanType getType();
-
-    public abstract Catalog getCatalog();
-
-    public abstract ImmutableList<String> getTable();
-
-    public abstract Schema getSchema();
-
-    public abstract @Nullable Expression getFilter();
-
-    public abstract @Nullable Boolean getCaseSensitive();
-
-    public abstract ImmutableMap<String, String> getOptions();
-
-    public abstract @Nullable Long getSnapshot();
-
-    public abstract @Nullable Long getTimestamp();
-
-    public abstract @Nullable Long getFromSnapshotInclusive();
-
-    public abstract @Nullable String getFromSnapshotRefInclusive();
-
-    public abstract @Nullable Long getFromSnapshotExclusive();
-
-    public abstract @Nullable String getFromSnapshotRefExclusive();
-
-    public abstract @Nullable Long getToSnapshot();
-
-    public abstract @Nullable String getToSnapshotRef();
-
-    public abstract @Nullable String getTag();
-
-    public abstract @Nullable String getBranch();
-
-    public static Scan.Builder builder() {
-      return new AutoValue_Iceberg_Scan.Builder()
-          .type(ScanType.TABLE)
-          .filter(null)
-          .caseSensitive(null)
-          .options(ImmutableMap.of())
-          .snapshot(null)
-          .timestamp(null)
-          .fromSnapshotInclusive(null)
-          .fromSnapshotRefInclusive(null)
-          .fromSnapshotExclusive(null)
-          .fromSnapshotRefExclusive(null)
-          .toSnapshot(null)
-          .toSnapshotRef(null)
-          .tag(null)
-          .branch(null);
-    }
-
-    @AutoValue.Builder
-    public abstract static class Builder {
-      public abstract Builder type(ScanType type);
-
-      public abstract Builder catalog(Catalog catalog);
-
-      public abstract Builder table(ImmutableList<String> table);
-
-      public Builder table(String... table) {
-        return table(ImmutableList.copyOf(table));
-      }
-
-      public abstract Builder schema(Schema schema);
-
-      public abstract Builder filter(@Nullable Expression filter);
-
-      public abstract Builder caseSensitive(@Nullable Boolean caseSensitive);
-
-      public abstract Builder options(ImmutableMap<String, String> options);
-
-      public abstract Builder snapshot(@Nullable Long snapshot);
-
-      public abstract Builder timestamp(@Nullable Long timestamp);
-
-      public abstract Builder fromSnapshotInclusive(@Nullable Long fromInclusive);
-
-      public abstract Builder fromSnapshotRefInclusive(@Nullable String ref);
-
-      public abstract Builder fromSnapshotExclusive(@Nullable Long fromExclusive);
-
-      public abstract Builder fromSnapshotRefExclusive(@Nullable String ref);
-
-      public abstract Builder toSnapshot(@Nullable Long snapshot);
-
-      public abstract Builder toSnapshotRef(@Nullable String ref);
-
-      public abstract Builder tag(@Nullable String tag);
-
-      public abstract Builder branch(@Nullable String branch);
-
-      public abstract Scan build();
-    }
-  }
-
-  @AutoValue
-  public abstract static class Catalog implements Serializable {
-
-    public abstract String getName();
-
-    /* Core Properties */
-    public abstract @Nullable String getIcebergCatalogType();
-
-    public abstract @Nullable String getCatalogImplementation();
-
-    public abstract @Nullable String getFileIOImplementation();
-
-    public abstract @Nullable String getWarehouseLocation();
-
-    public abstract @Nullable String getMetricsReporterImplementation();
-
-    /* Caching */
-    public abstract boolean getCacheEnabled();
-
-    public abstract boolean getCacheCaseSensitive();
-
-    public abstract long getCacheExpirationIntervalMillis();
-
-    public abstract boolean getIOManifestCacheEnabled();
-
-    public abstract long getIOManifestCacheExpirationIntervalMillis();
-
-    public abstract long getIOManifestCacheMaxTotalBytes();
-
-    public abstract long getIOManifestCacheMaxContentLength();
-
-    public abstract @Nullable String getUri();
-
-    public abstract int getClientPoolSize();
-
-    public abstract long getClientPoolEvictionIntervalMs();
-
-    public abstract @Nullable String getClientPoolCacheKeys();
-
-    public abstract @Nullable String getLockImplementation();
-
-    public abstract long getLockHeartbeatIntervalMillis();
-
-    public abstract long getLockHeartbeatTimeoutMillis();
-
-    public abstract int getLockHeartbeatThreads();
-
-    public abstract long getLockAcquireIntervalMillis();
-
-    public abstract long getLockAcquireTimeoutMillis();
-
-    public abstract @Nullable String getAppIdentifier();
-
-    public abstract @Nullable String getUser();
-
-    public abstract long getAuthSessionTimeoutMillis();
-
-    public abstract @Nullable Configuration getConfiguration();
-
-    public static Catalog.Builder builder() {
-      return new AutoValue_Iceberg_Catalog.Builder()
-          .icebergCatalogType(null)
-          .catalogImplementation(null)
-          .fileIOImplementation(null)
-          .warehouseLocation(null)
-          .metricsReporterImplementation(null) // TODO: Set this to our implementation
-          .cacheEnabled(CatalogProperties.CACHE_ENABLED_DEFAULT)
-          .cacheCaseSensitive(CatalogProperties.CACHE_CASE_SENSITIVE_DEFAULT)
-          .cacheExpirationIntervalMillis(CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS_DEFAULT)
-          .iOManifestCacheEnabled(CatalogProperties.IO_MANIFEST_CACHE_ENABLED_DEFAULT)
-          .iOManifestCacheExpirationIntervalMillis(
-              CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS_DEFAULT)
-          .iOManifestCacheMaxTotalBytes(CatalogProperties.IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT)
-          .iOManifestCacheMaxContentLength(
-              CatalogProperties.IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT)
-          .uri(null)
-          .clientPoolSize(CatalogProperties.CLIENT_POOL_SIZE_DEFAULT)
-          .clientPoolEvictionIntervalMs(
-              CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS_DEFAULT)
-          .clientPoolCacheKeys(null)
-          .lockImplementation(null)
-          .lockHeartbeatIntervalMillis(CatalogProperties.LOCK_HEARTBEAT_INTERVAL_MS_DEFAULT)
-          .lockHeartbeatTimeoutMillis(CatalogProperties.LOCK_HEARTBEAT_TIMEOUT_MS_DEFAULT)
-          .lockHeartbeatThreads(CatalogProperties.LOCK_HEARTBEAT_THREADS_DEFAULT)
-          .lockAcquireIntervalMillis(CatalogProperties.LOCK_ACQUIRE_INTERVAL_MS_DEFAULT)
-          .lockAcquireTimeoutMillis(CatalogProperties.LOCK_HEARTBEAT_TIMEOUT_MS_DEFAULT)
-          .appIdentifier(null)
-          .user(null)
-          .authSessionTimeoutMillis(CatalogProperties.AUTH_SESSION_TIMEOUT_MS_DEFAULT)
-          .configuration(null);
-    }
-
-    public ImmutableMap<String, String> properties() {
-      return new PropertyBuilder()
-          .put(CatalogUtil.ICEBERG_CATALOG_TYPE, getIcebergCatalogType())
-          .put(CatalogProperties.CATALOG_IMPL, getCatalogImplementation())
-          .put(CatalogProperties.FILE_IO_IMPL, getFileIOImplementation())
-          .put(CatalogProperties.WAREHOUSE_LOCATION, getWarehouseLocation())
-          .put(CatalogProperties.METRICS_REPORTER_IMPL, getMetricsReporterImplementation())
-          .put(CatalogProperties.CACHE_ENABLED, getCacheEnabled())
-          .put(CatalogProperties.CACHE_CASE_SENSITIVE, getCacheCaseSensitive())
-          .put(CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS, getCacheExpirationIntervalMillis())
-          .build();
-    }
-
-    public org.apache.iceberg.catalog.Catalog catalog() {
-      Configuration conf = getConfiguration();
-      if (conf == null) {
-        conf = new Configuration();
-      }
-      return CatalogUtil.buildIcebergCatalog(getName(), properties(), conf);
-    }
-
-    public Table.Builder table() {
-      return new AutoValue_Iceberg_Table.Builder().catalog(this);
-    }
-
-    @AutoValue.Builder
-    public abstract static class Builder {
-      public abstract Builder name(String name);
-
-      /* Core Properties */
-      public abstract Builder icebergCatalogType(@Nullable String icebergType);
-
-      public abstract Builder catalogImplementation(@Nullable String catalogImpl);
-
-      public abstract Builder fileIOImplementation(@Nullable String fileIOImpl);
-
-      public abstract Builder warehouseLocation(@Nullable String warehouse);
-
-      public abstract Builder metricsReporterImplementation(@Nullable String metricsImpl);
-
-      /* Caching */
-      public abstract Builder cacheEnabled(boolean cacheEnabled);
-
-      public abstract Builder cacheCaseSensitive(boolean cacheCaseSensitive);
-
-      public abstract Builder cacheExpirationIntervalMillis(long expiration);
-
-      public abstract Builder iOManifestCacheEnabled(boolean enabled);
-
-      public abstract Builder iOManifestCacheExpirationIntervalMillis(long expiration);
-
-      public abstract Builder iOManifestCacheMaxTotalBytes(long bytes);
-
-      public abstract Builder iOManifestCacheMaxContentLength(long length);
-
-      public abstract Builder uri(@Nullable String uri);
-
-      public abstract Builder clientPoolSize(int size);
-
-      public abstract Builder clientPoolEvictionIntervalMs(long interval);
-
-      public abstract Builder clientPoolCacheKeys(@Nullable String keys);
-
-      public abstract Builder lockImplementation(@Nullable String lockImplementation);
-
-      public abstract Builder lockHeartbeatIntervalMillis(long interval);
-
-      public abstract Builder lockHeartbeatTimeoutMillis(long timeout);
-
-      public abstract Builder lockHeartbeatThreads(int threads);
-
-      public abstract Builder lockAcquireIntervalMillis(long interval);
-
-      public abstract Builder lockAcquireTimeoutMillis(long timeout);
-
-      public abstract Builder appIdentifier(@Nullable String id);
-
-      public abstract Builder user(@Nullable String user);
-
-      public abstract Builder authSessionTimeoutMillis(long timeout);
-
-      public abstract Builder configuration(@Nullable Configuration conf);
-
-      public abstract Catalog build();
-
-      public Builder withProperties(Map<String, Object> properties) {
-        return this;
-      }
-    }
-  }
-
-  @AutoValue
-  public abstract static class Table implements Serializable {
-
-    public abstract @Nullable Catalog catalog();
-
-    public abstract @Nullable List<String> tablePath();
-
-    public TableIdentifier identifier() {
-      return TableIdentifier.of(tablePath().toArray(new String[0]));
-    }
-
-    public org.apache.iceberg.Table table() {
-      return catalog().catalog().loadTable(identifier());
-    }
-
-    @AutoValue.Builder
-    public abstract static class Builder {
-
-      public abstract Builder catalog(Catalog catalog);
-
-      public abstract Builder tablePath(List<String> tablePath);
-
-      public abstract Table build();
-    }
-  }
-
-  public static class Write<ElementT>
-      extends PTransform<PCollection<ElementT>, IcebergWriteResult> {
-
-    private final DynamicDestinations<ElementT, String> dynamicDestinations;
-    private final Catalog catalog;
-
-    private final SerializableBiFunction<Record, ElementT, Record> toRecord;
-
-    public Write(
-        Catalog catalog,
-        DynamicDestinations<ElementT, String> dynamicDestinations,
-        SerializableBiFunction<Record, ElementT, Record> toRecord) {
-      this.catalog = catalog;
-      this.dynamicDestinations = dynamicDestinations;
-      this.toRecord = toRecord;
-    }
-
-    @Override
-    public IcebergWriteResult expand(PCollection<ElementT> input) {
-      try {
-        return input
-            .apply(
-                "Set Output Location",
-                new PrepareWrite<ElementT, String, ElementT>(
-                    dynamicDestinations, SerializableFunctions.identity(), input.getCoder()))
-            .apply(
-                "Dynamic Write",
-                new IcebergSink<String, ElementT>(
-                    dynamicDestinations,
-                    dynamicDestinations.getDestinationCoderWithDefault(
-                        input.getPipeline().getCoderRegistry()),
-                    RecordWriterFactory.tableRecords(toRecord, dynamicDestinations),
-                    TableFactory.forCatalog(catalog)));
-      } catch (Exception e) {
-        RuntimeException e1 = new RuntimeException("Unable to expand transforms");
-        e1.addSuppressed(e);
-        throw e1;
-      }
-    }
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java
deleted file mode 100644
index 0c6028c85487..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergBoundedSource.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import javax.annotation.Nullable;
-import org.apache.beam.io.iceberg.util.SchemaHelper;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.RowCoder;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.values.Row;
-import org.apache.iceberg.CombinedScanTask;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.TableScan;
-import org.apache.iceberg.catalog.Catalog;
-import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.io.CloseableIterable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings("all") // TODO: Remove this once development is stable.
-public class IcebergBoundedSource extends BoundedSource<Row> {
-  private static final Logger LOG = LoggerFactory.getLogger(IcebergBoundedSource.class);
-
-  private @Nullable CombinedScanTask task;
-  private Iceberg.Scan scan;
-
-  public IcebergBoundedSource(Iceberg.Scan scan, @Nullable CombinedScanTask task) {
-    this.task = task;
-    this.scan = scan;
-  }
-
-  public IcebergBoundedSource(Iceberg.Scan scan) {
-    this(scan, null);
-  }
-
-  public @Nullable Catalog catalog() {
-    return scan.getCatalog().catalog();
-  }
-
-  public @Nullable Table table() {
-    Catalog catalog = catalog();
-    if (catalog != null) {
-      return catalog.loadTable(
-          TableIdentifier.of(scan.getTable().toArray(new String[scan.getTable().size()])));
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  public List<? extends BoundedSource<Row>> split(
-      long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-    ArrayList<IcebergBoundedSource> tasks = new ArrayList<>();
-    Table table = table();
-    if (table != null) {
-
-      switch (scan.getType()) {
-        case TABLE:
-          // Override the split size with our desired size
-          TableScan tableScan = table.newScan();
-
-          if (desiredBundleSizeBytes > 0) {
-            tableScan = tableScan.option(TableProperties.SPLIT_SIZE, "" + desiredBundleSizeBytes);
-          }
-
-          // Always project to our destination schema
-          tableScan = tableScan.project(SchemaHelper.convert(scan.getSchema()));
-
-          if (scan.getFilter() != null) {
-            tableScan = tableScan.filter(scan.getFilter());
-          }
-          if (scan.getCaseSensitive() != null) {
-            tableScan = tableScan.caseSensitive(scan.getCaseSensitive());
-          }
-          if (scan.getSnapshot() != null) {
-            tableScan = tableScan.useSnapshot(scan.getSnapshot());
-          }
-          if (scan.getBranch() != null) {
-            tableScan = tableScan.useRef(scan.getBranch());
-          } else if (scan.getTag() != null) {
-            tableScan = tableScan.useRef(scan.getTag());
-          }
-          try (CloseableIterable<CombinedScanTask> t = tableScan.planTasks()) {
-            for (CombinedScanTask c : t) {
-              tasks.add(new IcebergBoundedSource(scan, c));
-            }
-          } catch (IOException e) {
-            throw new RuntimeException(e);
-          }
-          break;
-        case BATCH:
-          // TODO: Add batch scan
-          break;
-      }
-    }
-    return tasks;
-  }
-
-  @Override
-  public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
-    if (task == null) {
-      return 0;
-    } else {
-      return task.sizeBytes();
-    }
-  }
-
-  @Override
-  public void populateDisplayData(DisplayData.Builder builder) {
-    super.populateDisplayData(builder);
-  }
-
-  @Override
-  public Coder<Row> getOutputCoder() {
-    return RowCoder.of(scan.getSchema());
-  }
-
-  @Override
-  public BoundedReader<Row> createReader(PipelineOptions options) throws IOException {
-    return new CombinedScanReader(this, task, scan.getSchema());
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergCatalogConfig.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergCatalogConfig.java
new file mode 100644
index 000000000000..06a29ac14652
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergCatalogConfig.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import com.google.auto.value.AutoValue;
+import java.io.Serializable;
+import javax.annotation.Nullable;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.CatalogUtil;
+import org.checkerframework.dataflow.qual.Pure;
+
+@AutoValue
+public abstract class IcebergCatalogConfig implements Serializable {
+
+  @Pure
+  public abstract String getName();
+
+  /* Core Properties */
+  @Pure
+  public abstract @Nullable String getIcebergCatalogType();
+
+  @Pure
+  public abstract @Nullable String getCatalogImplementation();
+
+  @Pure
+  public abstract @Nullable String getFileIOImplementation();
+
+  @Pure
+  public abstract @Nullable String getWarehouseLocation();
+
+  @Pure
+  public abstract @Nullable String getMetricsReporterImplementation();
+
+  /* Caching */
+  @Pure
+  public abstract boolean getCacheEnabled();
+
+  @Pure
+  public abstract boolean getCacheCaseSensitive();
+
+  @Pure
+  public abstract long getCacheExpirationIntervalMillis();
+
+  @Pure
+  public abstract boolean getIOManifestCacheEnabled();
+
+  @Pure
+  public abstract long getIOManifestCacheExpirationIntervalMillis();
+
+  @Pure
+  public abstract long getIOManifestCacheMaxTotalBytes();
+
+  @Pure
+  public abstract long getIOManifestCacheMaxContentLength();
+
+  @Pure
+  public abstract @Nullable String getUri();
+
+  @Pure
+  public abstract int getClientPoolSize();
+
+  @Pure
+  public abstract long getClientPoolEvictionIntervalMs();
+
+  @Pure
+  public abstract @Nullable String getClientPoolCacheKeys();
+
+  @Pure
+  public abstract @Nullable String getLockImplementation();
+
+  @Pure
+  public abstract long getLockHeartbeatIntervalMillis();
+
+  @Pure
+  public abstract long getLockHeartbeatTimeoutMillis();
+
+  @Pure
+  public abstract int getLockHeartbeatThreads();
+
+  @Pure
+  public abstract long getLockAcquireIntervalMillis();
+
+  @Pure
+  public abstract long getLockAcquireTimeoutMillis();
+
+  @Pure
+  public abstract @Nullable String getAppIdentifier();
+
+  @Pure
+  public abstract @Nullable String getUser();
+
+  @Pure
+  public abstract long getAuthSessionTimeoutMillis();
+
+  @Pure
+  public abstract @Nullable Configuration getConfiguration();
+
+  @Pure
+  public static Builder builder() {
+    return new AutoValue_IcebergCatalogConfig.Builder()
+        .setIcebergCatalogType(null)
+        .setCatalogImplementation(null)
+        .setFileIOImplementation(null)
+        .setWarehouseLocation(null)
+        .setMetricsReporterImplementation(null) // TODO: Set this to our implementation
+        .setCacheEnabled(CatalogProperties.CACHE_ENABLED_DEFAULT)
+        .setCacheCaseSensitive(CatalogProperties.CACHE_CASE_SENSITIVE_DEFAULT)
+        .setCacheExpirationIntervalMillis(CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS_DEFAULT)
+        .setIOManifestCacheEnabled(CatalogProperties.IO_MANIFEST_CACHE_ENABLED_DEFAULT)
+        .setIOManifestCacheExpirationIntervalMillis(
+            CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS_DEFAULT)
+        .setIOManifestCacheMaxTotalBytes(
+            CatalogProperties.IO_MANIFEST_CACHE_MAX_TOTAL_BYTES_DEFAULT)
+        .setIOManifestCacheMaxContentLength(
+            CatalogProperties.IO_MANIFEST_CACHE_MAX_CONTENT_LENGTH_DEFAULT)
+        .setUri(null)
+        .setClientPoolSize(CatalogProperties.CLIENT_POOL_SIZE_DEFAULT)
+        .setClientPoolEvictionIntervalMs(
+            CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS_DEFAULT)
+        .setClientPoolCacheKeys(null)
+        .setLockImplementation(null)
+        .setLockHeartbeatIntervalMillis(CatalogProperties.LOCK_HEARTBEAT_INTERVAL_MS_DEFAULT)
+        .setLockHeartbeatTimeoutMillis(CatalogProperties.LOCK_HEARTBEAT_TIMEOUT_MS_DEFAULT)
+        .setLockHeartbeatThreads(CatalogProperties.LOCK_HEARTBEAT_THREADS_DEFAULT)
+        .setLockAcquireIntervalMillis(CatalogProperties.LOCK_ACQUIRE_INTERVAL_MS_DEFAULT)
+        .setLockAcquireTimeoutMillis(CatalogProperties.LOCK_HEARTBEAT_TIMEOUT_MS_DEFAULT)
+        .setAppIdentifier(null)
+        .setUser(null)
+        .setAuthSessionTimeoutMillis(CatalogProperties.AUTH_SESSION_TIMEOUT_MS_DEFAULT)
+        .setConfiguration(null);
+  }
+
+  @Pure
+  public ImmutableMap<String, String> properties() {
+    return new PropertyBuilder()
+        .put(CatalogUtil.ICEBERG_CATALOG_TYPE, getIcebergCatalogType())
+        .put(CatalogProperties.CATALOG_IMPL, getCatalogImplementation())
+        .put(CatalogProperties.FILE_IO_IMPL, getFileIOImplementation())
+        .put(CatalogProperties.WAREHOUSE_LOCATION, getWarehouseLocation())
+        .put(CatalogProperties.METRICS_REPORTER_IMPL, getMetricsReporterImplementation())
+        .put(CatalogProperties.CACHE_ENABLED, getCacheEnabled())
+        .put(CatalogProperties.CACHE_CASE_SENSITIVE, getCacheCaseSensitive())
+        .put(CatalogProperties.CACHE_EXPIRATION_INTERVAL_MS, getCacheExpirationIntervalMillis())
+        .build();
+  }
+
+  public org.apache.iceberg.catalog.Catalog catalog() {
+    Configuration conf = getConfiguration();
+    if (conf == null) {
+      conf = new Configuration();
+    }
+    return CatalogUtil.buildIcebergCatalog(getName(), properties(), conf);
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    /* Core Properties */
+    public abstract Builder setName(String name);
+
+    public abstract Builder setIcebergCatalogType(@Nullable String icebergType);
+
+    public abstract Builder setCatalogImplementation(@Nullable String catalogImpl);
+
+    public abstract Builder setFileIOImplementation(@Nullable String fileIOImpl);
+
+    public abstract Builder setWarehouseLocation(@Nullable String warehouse);
+
+    public abstract Builder setMetricsReporterImplementation(@Nullable String metricsImpl);
+
+    /* Caching */
+    public abstract Builder setCacheEnabled(boolean cacheEnabled);
+
+    public abstract Builder setCacheCaseSensitive(boolean cacheCaseSensitive);
+
+    public abstract Builder setCacheExpirationIntervalMillis(long expiration);
+
+    public abstract Builder setIOManifestCacheEnabled(boolean enabled);
+
+    public abstract Builder setIOManifestCacheExpirationIntervalMillis(long expiration);
+
+    public abstract Builder setIOManifestCacheMaxTotalBytes(long bytes);
+
+    public abstract Builder setIOManifestCacheMaxContentLength(long length);
+
+    public abstract Builder setUri(@Nullable String uri);
+
+    public abstract Builder setClientPoolSize(int size);
+
+    public abstract Builder setClientPoolEvictionIntervalMs(long interval);
+
+    public abstract Builder setClientPoolCacheKeys(@Nullable String keys);
+
+    public abstract Builder setLockImplementation(@Nullable String lockImplementation);
+
+    public abstract Builder setLockHeartbeatIntervalMillis(long interval);
+
+    public abstract Builder setLockHeartbeatTimeoutMillis(long timeout);
+
+    public abstract Builder setLockHeartbeatThreads(int threads);
+
+    public abstract Builder setLockAcquireIntervalMillis(long interval);
+
+    public abstract Builder setLockAcquireTimeoutMillis(long timeout);
+
+    public abstract Builder setAppIdentifier(@Nullable String id);
+
+    public abstract Builder setUser(@Nullable String user);
+
+    public abstract Builder setAuthSessionTimeoutMillis(long timeout);
+
+    public abstract Builder setConfiguration(@Nullable Configuration conf);
+
+    public abstract IcebergCatalogConfig build();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java
index 9f9790aed4ef..dcd69c87e9d0 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergDestination.java
@@ -17,30 +17,49 @@
  */
 package org.apache.beam.io.iceberg;
 
-import java.io.Serializable;
-import org.apache.beam.sdk.io.fs.ResourceId;
-import org.apache.iceberg.Schema;
-
-public class IcebergDestination implements Serializable {
-
-  ResourceId resourceId;
-  String table;
-  Schema schema;
-  Iceberg.WriteFormat writeFormat;
-
-  public IcebergDestination(
-      ResourceId resourceId, String table, Schema schema, Iceberg.WriteFormat writeFormat) {
-    this.resourceId = resourceId;
-    this.table = table;
-    this.schema = schema;
-    this.writeFormat = writeFormat;
-  }
+import com.google.auto.value.AutoValue;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.checkerframework.dataflow.qual.Pure;
+
+@AutoValue
+public abstract class IcebergDestination {
+
+  /**
+   * The iceberg table identifier to write data to. This is relative to the catalog, which is
+   * presumed to be configured outside of this destination specification.
+   */
+  @Pure
+  public abstract TableIdentifier getTableIdentifier();
+
+  /** File format for created files. */
+  @Pure
+  public abstract FileFormat getFileFormat();
 
-  public Iceberg.WriteFormat getWriteFormat() {
-    return writeFormat;
+  /**
+   * Metadata and constraints for creating a new table, if it must be done dynamically.
+   *
+   * <p>If null, dynamic table creation will fail, and this should be disallowed at the top level
+   * configuration.
+   */
+  @Pure
+  public abstract @Nullable IcebergTableCreateConfig getTableCreateConfig();
+
+  @Pure
+  public static Builder builder() {
+    return new AutoValue_IcebergDestination.Builder();
   }
 
-  public Schema getSchema() {
-    return schema;
+  @AutoValue.Builder
+  public abstract static class Builder {
+    public abstract Builder setTableIdentifier(TableIdentifier tableId);
+
+    public abstract Builder setFileFormat(FileFormat fileFormat);
+
+    public abstract Builder setTableCreateConfig(@Nullable IcebergTableCreateConfig createConfig);
+
+    @Pure
+    public abstract IcebergDestination build();
   }
 }
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergIO.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergIO.java
new file mode 100644
index 000000000000..4b9e3102a02e
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergIO.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+
+public class IcebergIO {
+
+  public static WriteRows writeToDynamicDestinations(
+      IcebergCatalogConfig catalog, DynamicDestinations dynamicDestinations) {
+    return new WriteRows(catalog, dynamicDestinations);
+  }
+
+  static class WriteRows extends PTransform<PCollection<Row>, IcebergWriteResult> {
+
+    private final IcebergCatalogConfig catalog;
+    private final DynamicDestinations dynamicDestinations;
+
+    private WriteRows(IcebergCatalogConfig catalog, DynamicDestinations dynamicDestinations) {
+      this.catalog = catalog;
+      this.dynamicDestinations = dynamicDestinations;
+    }
+
+    @Override
+    public IcebergWriteResult expand(PCollection<Row> input) {
+
+      return input
+          .apply("Set Destination Metadata", new AssignDestinations(dynamicDestinations))
+          .apply(
+              "Write Rows to Destinations", new WriteToDestinations(catalog, dynamicDestinations));
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java
deleted file mode 100644
index f8ef8721161e..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergSink.java
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import com.google.common.collect.ImmutableList;
-import java.util.UUID;
-import org.apache.beam.io.iceberg.WriteBundlesToFiles.Result;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.IterableCoder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.SerializableCoder;
-import org.apache.beam.sdk.coders.ShardedKeyCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.transforms.GroupByKey;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.transforms.windowing.DefaultTrigger;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionList;
-import org.apache.beam.sdk.values.PCollectionTuple;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.ShardedKey;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.TupleTagList;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
-import org.apache.commons.lang.NotImplementedException;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.log4j.Logger;
-
-public class IcebergSink<DestinationT extends Object, ElementT>
-    extends PTransform<PCollection<KV<DestinationT, ElementT>>, IcebergWriteResult> {
-
-  private static final Logger LOG = Logger.getLogger(IcebergSink.class);
-
-  @VisibleForTesting static final int DEFAULT_MAX_WRITERS_PER_BUNDLE = 20;
-  @VisibleForTesting static final int DEFAULT_MAX_FILES_PER_PARTITION = 10_000;
-  @VisibleForTesting static final long DEFAULT_MAX_BYTES_PER_PARTITION = 10L * (1L << 40); // 10TB
-  static final long DEFAULT_MAX_BYTES_PER_FILE = (1L << 40); // 1TB
-  static final int DEFAULT_NUM_FILE_SHARDS = 0;
-  static final int FILE_TRIGGERING_RECORD_COUNT = 50_000;
-
-  final DynamicDestinations<?, DestinationT> dynamicDestinations;
-  final Coder<DestinationT> destinationCoder;
-
-  final RecordWriterFactory<ElementT, DestinationT> recordWriterFactory;
-  final TableFactory<String> tableFactory;
-
-  boolean triggered;
-
-  public IcebergSink(
-      DynamicDestinations<?, DestinationT> dynamicDestinations,
-      Coder<DestinationT> destinationCoder,
-      RecordWriterFactory<ElementT, DestinationT> recordWriterFactory,
-      TableFactory<String> tableFactory) {
-    this.dynamicDestinations = dynamicDestinations;
-    this.destinationCoder = destinationCoder;
-    this.triggered = false;
-    this.recordWriterFactory = recordWriterFactory;
-    this.tableFactory = tableFactory;
-  }
-
-  private IcebergWriteResult expandTriggered(PCollection<KV<DestinationT, ElementT>> input) {
-
-    throw new NotImplementedException("Not yet implemented");
-  }
-
-  private IcebergWriteResult expandUntriggered(PCollection<KV<DestinationT, ElementT>> input) {
-
-    final PCollectionView<String> fileView = createJobIdPrefixView(input.getPipeline());
-    // We always do the equivalent of a dynamically sharded file creation
-    TupleTag<WriteBundlesToFiles.Result<DestinationT>> writtenFilesTag =
-        new TupleTag<>("writtenFiles");
-    TupleTag<KV<ShardedKey<DestinationT>, ElementT>> successfulWritesTag =
-        new TupleTag<>("successfulWrites");
-    TupleTag<KV<ShardedKey<DestinationT>, ElementT>> failedWritesTag =
-        new TupleTag<>("failedWrites");
-    TupleTag<KV<TableIdentifier, Snapshot>> snapshotsTag = new TupleTag<>("snapshots");
-
-    final Coder<ElementT> elementCoder =
-        ((KvCoder<DestinationT, ElementT>) input.getCoder()).getValueCoder();
-
-    // Write everything to files
-    PCollectionTuple writeBundlesToFiles =
-        input.apply(
-            "Write Bundles To Files",
-            ParDo.of(
-                    new WriteBundlesToFiles<>(
-                        fileView,
-                        successfulWritesTag,
-                        failedWritesTag,
-                        DEFAULT_MAX_WRITERS_PER_BUNDLE,
-                        DEFAULT_MAX_BYTES_PER_FILE,
-                        recordWriterFactory))
-                .withSideInputs(fileView)
-                .withOutputTags(
-                    writtenFilesTag,
-                    TupleTagList.of(ImmutableList.of(successfulWritesTag, failedWritesTag))));
-
-    PCollection<KV<ShardedKey<DestinationT>, ElementT>> successfulWrites =
-        writeBundlesToFiles
-            .get(successfulWritesTag)
-            .setCoder(KvCoder.of(ShardedKeyCoder.of(destinationCoder), elementCoder));
-
-    PCollection<KV<ShardedKey<DestinationT>, ElementT>> failedWrites =
-        writeBundlesToFiles
-            .get(failedWritesTag)
-            .setCoder(KvCoder.of(ShardedKeyCoder.of(destinationCoder), elementCoder));
-
-    PCollection<WriteBundlesToFiles.Result<DestinationT>> writtenFilesGrouped =
-        failedWrites
-            .apply("Group By Destination", GroupByKey.create())
-            .apply(
-                "Strip Shard ID",
-                MapElements.via(
-                    new SimpleFunction<
-                        KV<ShardedKey<DestinationT>, Iterable<ElementT>>,
-                        KV<DestinationT, Iterable<ElementT>>>() {
-                      @Override
-                      public KV<DestinationT, Iterable<ElementT>> apply(
-                          KV<ShardedKey<DestinationT>, Iterable<ElementT>> input) {
-                        return KV.of(input.getKey().getKey(), input.getValue());
-                      }
-                    }))
-            .setCoder(KvCoder.of(destinationCoder, IterableCoder.of(elementCoder)))
-            .apply(
-                "Write Grouped Records",
-                ParDo.of(
-                    new WriteGroupedRecordsToFiles<>(
-                        fileView, DEFAULT_MAX_BYTES_PER_FILE, recordWriterFactory)))
-            .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder));
-
-    PCollection<WriteBundlesToFiles.Result<DestinationT>> catalogUpdates =
-        PCollectionList.of(
-                writeBundlesToFiles
-                    .get(writtenFilesTag)
-                    .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder)))
-            .and(writtenFilesGrouped)
-            .apply("Flatten Files", Flatten.pCollections())
-            .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder));
-
-    // Apply any sharded writes and flatten everything for catalog updates
-    PCollection<KV<String, Snapshot>> snapshots =
-        catalogUpdates
-            .apply(
-                "Extract Data File",
-                ParDo.of(
-                    new DoFn<Result<DestinationT>, KV<String, MetadataUpdate>>() {
-                      @ProcessElement
-                      public void processElement(
-                          ProcessContext c, @Element Result<DestinationT> element) {
-                        c.output(
-                            KV.of(
-                                element.tableId,
-                                MetadataUpdate.of(
-                                    element.partitionSpec, element.update.getDataFiles().get(0))));
-                      }
-                    }))
-            .setCoder(KvCoder.of(StringUtf8Coder.of(), MetadataUpdate.coder()))
-            .apply(GroupByKey.create())
-            .apply("Write Metadata Updates", ParDo.of(new MetadataUpdates<>(tableFactory)))
-            .setCoder(KvCoder.of(StringUtf8Coder.of(), SerializableCoder.of(Snapshot.class)));
-
-    return new IcebergWriteResult(
-        input.getPipeline(),
-        successfulWrites,
-        catalogUpdates,
-        snapshots,
-        successfulWritesTag,
-        writtenFilesTag,
-        snapshotsTag);
-  }
-
-  private PCollectionView<String> createJobIdPrefixView(Pipeline p) {
-
-    final String jobName = p.getOptions().getJobName();
-
-    return p.apply("JobIdCreationRoot_", Create.of((Void) null))
-        .apply(
-            "CreateJobId",
-            ParDo.of(
-                new DoFn<Void, String>() {
-                  @ProcessElement
-                  public void process(ProcessContext c) {
-                    c.output(jobName + "-" + UUID.randomUUID().toString());
-                  }
-                }))
-        .apply("JobIdSideInput", View.asSingleton());
-  }
-
-  public IcebergWriteResult expand(PCollection<KV<DestinationT, ElementT>> input) {
-
-    String jobName = input.getPipeline().getOptions().getJobName();
-
-    // We always window into global as far as I can tell?
-    PCollection<KV<DestinationT, ElementT>> globalInput =
-        input.apply(
-            "rewindowIntoGlobal",
-            Window.<KV<DestinationT, ElementT>>into(new GlobalWindows())
-                .triggering(DefaultTrigger.of())
-                .discardingFiredPanes());
-    return triggered ? expandTriggered(input) : expandUntriggered(input);
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergTableCreateConfig.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergTableCreateConfig.java
new file mode 100644
index 000000000000..c1041f026c31
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergTableCreateConfig.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import com.google.auto.value.AutoValue;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.checkerframework.dataflow.qual.Pure;
+
+@AutoValue
+public abstract class IcebergTableCreateConfig {
+
+  /** Schema for the destination, in the event that it must be dynamically created. */
+  @Pure
+  public abstract Schema getSchema();
+
+  /** Partition spec destination, in the event that it must be dynamically created. */
+  @Pure
+  public abstract PartitionSpec getPartitionSpec();
+
+  @Pure
+  public Builder builder() {
+    return new AutoValue_IcebergTableCreateConfig.Builder();
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+    public abstract Builder setSchema(Schema schema);
+
+    public abstract Builder setPartitionSpec(PartitionSpec partitionSpec);
+
+    @Pure
+    public abstract IcebergTableCreateConfig build();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java
index 0040e385457c..94ac576674ed 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/IcebergWriteResult.java
@@ -18,7 +18,6 @@
 package org.apache.beam.io.iceberg;
 
 import java.util.Map;
-import javax.annotation.Nullable;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.values.KV;
@@ -27,41 +26,25 @@
 import org.apache.beam.sdk.values.POutput;
 import org.apache.beam.sdk.values.PValue;
 import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.DataFile;
 import org.apache.iceberg.Snapshot;
 
-@SuppressWarnings("all")
-public final class IcebergWriteResult<ElementT> implements POutput {
+public final class IcebergWriteResult implements POutput {
 
-  private final Pipeline pipeline;
-  @Nullable PCollection<ElementT> successfulInserts;
-  @Nullable TupleTag<ElementT> successfulInsertsTag;
+  private static final TupleTag<KV<String, Snapshot>> SNAPSHOTS_TAG =
+      new TupleTag<KV<String, Snapshot>>() {};
 
-  @Nullable PCollection<KV<String, KV<String, DataFile>>> catalogUpdates;
-  @Nullable TupleTag<KV<String, KV<String, DataFile>>> catalogUpdatesTag;
+  private final Pipeline pipeline;
 
-  @Nullable PCollection<KV<String, Snapshot>> snapshots;
+  private final PCollection<KV<String, Snapshot>> snapshots;
 
-  @Nullable TupleTag<KV<String, Snapshot>> snapshotsTag;
+  public PCollection<KV<String, Snapshot>> getSnapshots() {
+    return snapshots;
+  }
 
-  public IcebergWriteResult(
-      Pipeline pipeline,
-      @Nullable PCollection<ElementT> successfulInserts,
-      @Nullable PCollection<KV<String, KV<String, DataFile>>> catalogUpdates,
-      @Nullable PCollection<KV<String, Snapshot>> snapshots,
-      @Nullable TupleTag<ElementT> successfulInsertsTag,
-      @Nullable TupleTag<KV<String, KV<String, DataFile>>> catalogUpdatesTag,
-      @Nullable TupleTag<KV<String, Snapshot>> snapshotsTag) {
+  IcebergWriteResult(Pipeline pipeline, PCollection<KV<String, Snapshot>> snapshots) {
     this.pipeline = pipeline;
-    this.successfulInserts = successfulInserts;
-    this.catalogUpdates = catalogUpdates;
     this.snapshots = snapshots;
-
-    this.successfulInsertsTag = successfulInsertsTag;
-    this.catalogUpdatesTag = catalogUpdatesTag;
-    this.snapshotsTag = snapshotsTag;
   }
 
   @Override
@@ -69,23 +52,10 @@ public Pipeline getPipeline() {
     return pipeline;
   }
 
-  public PCollection<ElementT> getSuccessfulInserts() {
-    return successfulInserts;
-  }
-
   @Override
   public Map<TupleTag<?>, PValue> expand() {
     ImmutableMap.Builder<TupleTag<?>, PValue> output = ImmutableMap.builder();
-    if (successfulInsertsTag != null) {
-      output.put(successfulInsertsTag, Preconditions.checkNotNull(successfulInserts));
-    }
-    if (catalogUpdatesTag != null) {
-      output.put(catalogUpdatesTag, Preconditions.checkNotNull(catalogUpdates));
-    }
-    if (snapshotsTag != null) {
-      output.put(snapshotsTag, Preconditions.checkNotNull(snapshots));
-    }
-
+    output.put(SNAPSHOTS_TAG, snapshots);
     return output.build();
   }
 
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java
deleted file mode 100644
index bfde156d5c8b..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdate.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.List;
-import java.util.Map;
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.avro.specific.SpecificData.SchemaConstructable;
-import org.apache.beam.sdk.coders.ByteArrayCoder;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.DeleteFile;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.avro.AvroEncoderUtil;
-import org.apache.iceberg.avro.AvroSchemaUtil;
-import org.apache.iceberg.types.Types.StructType;
-import org.checkerframework.checker.initialization.qual.Initialized;
-import org.checkerframework.checker.nullness.qual.NonNull;
-import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
-
-@SuppressWarnings("all")
-public class MetadataUpdate implements IndexedRecord, SchemaConstructable {
-
-  private List<DataFile> dataFiles;
-  private List<DeleteFile> deleteFiles;
-
-  private final Schema avroSchema;
-
-  public MetadataUpdate(Schema avroSchema) {
-    this.avroSchema = avroSchema;
-  }
-
-  public MetadataUpdate(
-      StructType partitionType, List<DataFile> dataFiles, List<DeleteFile> deleteFiles) {
-    this.dataFiles = dataFiles;
-    this.deleteFiles = deleteFiles;
-
-    StructType dataFileStruct = DataFile.getType(partitionType);
-    Map<StructType, String> dataFileNames =
-        ImmutableMap.of(
-            dataFileStruct, "org.apache.iceberg.GenericDataFile",
-            partitionType, "org.apache.iceberg.PartitionData");
-    Schema dataFileSchema = AvroSchemaUtil.convert(dataFileStruct, dataFileNames);
-    Map<StructType, String> deleteFileNames =
-        ImmutableMap.of(
-            dataFileStruct, "org.apache.iceberg.GenericDeleteFile",
-            partitionType, "org.apache.iceberg.PartitionData");
-    Schema deleteFileSchema = AvroSchemaUtil.convert(dataFileStruct, deleteFileNames);
-
-    this.avroSchema =
-        SchemaBuilder.builder()
-            .record(getClass().getName())
-            .fields()
-            .name("dataFiles")
-            .prop(AvroSchemaUtil.FIELD_ID_PROP, "-1")
-            .type()
-            .nullable()
-            .array()
-            .items(dataFileSchema)
-            .noDefault()
-            .name("deleteFiles")
-            .prop(AvroSchemaUtil.FIELD_ID_PROP, "-1")
-            .type()
-            .nullable()
-            .array()
-            .items(deleteFileSchema)
-            .noDefault()
-            .endRecord();
-  }
-
-  public static MetadataUpdate of(PartitionSpec partitionSpec, DataFile dataFile) {
-    return new MetadataUpdate(partitionSpec.partitionType(), ImmutableList.of(dataFile), null);
-  }
-
-  public List<DataFile> getDataFiles() {
-    return this.dataFiles;
-  }
-
-  public List<DeleteFile> getDeleteFiles() {
-    return this.deleteFiles;
-  }
-
-  @Override
-  public void put(int i, Object v) {
-    switch (i) {
-      case 0:
-        this.dataFiles = (List<DataFile>) v;
-        return;
-      case 1:
-        this.deleteFiles = (List<DeleteFile>) v;
-        return;
-      default:
-    }
-  }
-
-  @Override
-  public Object get(int i) {
-    switch (i) {
-      case 0:
-        return this.dataFiles;
-      case 1:
-        return this.deleteFiles;
-      default:
-        throw new UnsupportedOperationException("Unknown field ordinal: " + i);
-    }
-  }
-
-  @Override
-  public Schema getSchema() {
-    return avroSchema;
-  }
-
-  protected static class MetadataUpdateCoder extends Coder<MetadataUpdate> {
-
-    private static final ByteArrayCoder bytesCoder = ByteArrayCoder.of();
-
-    @Override
-    public void encode(
-        MetadataUpdate value, @UnknownKeyFor @NonNull @Initialized OutputStream outStream)
-        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
-            @Initialized IOException {
-      bytesCoder.encode(AvroEncoderUtil.encode(value, value.getSchema()), outStream);
-    }
-
-    @Override
-    public MetadataUpdate decode(@UnknownKeyFor @NonNull @Initialized InputStream inStream)
-        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
-            @Initialized IOException {
-      byte[] updateBytes = bytesCoder.decode(inStream);
-      return AvroEncoderUtil.decode(updateBytes);
-    }
-
-    @Override
-    public @UnknownKeyFor @NonNull @Initialized List<
-            ? extends
-                @UnknownKeyFor @NonNull @Initialized Coder<@UnknownKeyFor @NonNull @Initialized ?>>
-        getCoderArguments() {
-      return ImmutableList.of();
-    }
-
-    @Override
-    public void verifyDeterministic()
-        throws @UnknownKeyFor @NonNull @Initialized NonDeterministicException {}
-  }
-
-  public static Coder<MetadataUpdate> coder() {
-    return new MetadataUpdateCoder();
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java
deleted file mode 100644
index a19cb3b9bc4d..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/MetadataUpdates.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.values.KV;
-import org.apache.iceberg.AppendFiles;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.Table;
-
-public class MetadataUpdates<IdentifierT>
-    extends DoFn<KV<IdentifierT, Iterable<MetadataUpdate>>, KV<IdentifierT, Snapshot>> {
-
-  final TableFactory<IdentifierT> tableFactory;
-
-  public MetadataUpdates(TableFactory<IdentifierT> tableFactory) {
-    this.tableFactory = tableFactory;
-  }
-
-  @ProcessElement
-  public void processElement(
-      ProcessContext c,
-      @Element KV<IdentifierT, Iterable<MetadataUpdate>> element,
-      BoundedWindow window) {
-    Table table = tableFactory.getTable(element.getKey());
-    AppendFiles update = table.newAppend();
-    Iterable<MetadataUpdate> metadataUpdates = element.getValue();
-    if (metadataUpdates != null) {
-      for (MetadataUpdate metadata : metadataUpdates) {
-        for (DataFile file : metadata.getDataFiles()) {
-          update.appendFile(file);
-        }
-      }
-      update.commit();
-      c.outputWithTimestamp(
-          KV.of(element.getKey(), table.currentSnapshot()), window.maxTimestamp());
-    }
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/OneTableDynamicDestinations.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/OneTableDynamicDestinations.java
new file mode 100644
index 000000000000..dc6815235a06
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/OneTableDynamicDestinations.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+
+class OneTableDynamicDestinations implements DynamicDestinations {
+
+  private static final Schema EMPTY_SCHEMA = Schema.builder().build();
+  private static final Row EMPTY_ROW = Row.nullRow(EMPTY_SCHEMA);
+
+  // TableId represented as String for serializability
+  private final String tableIdString;
+
+  private transient @MonotonicNonNull TableIdentifier tableId;
+
+  private TableIdentifier getTableIdentifier() {
+    if (tableId == null) {
+      tableId = TableIdentifier.parse(tableIdString);
+    }
+    return tableId;
+  }
+
+  OneTableDynamicDestinations(TableIdentifier tableId) {
+    this.tableIdString = tableId.toString();
+  }
+
+  @Override
+  public Schema getMetadataSchema() {
+    return EMPTY_SCHEMA;
+  }
+
+  @Override
+  public Row assignDestinationMetadata(Row data) {
+    return EMPTY_ROW;
+  }
+
+  @Override
+  public IcebergDestination instantiateDestination(Row dest) {
+    return IcebergDestination.builder()
+        .setTableIdentifier(getTableIdentifier())
+        .setTableCreateConfig(null)
+        .setFileFormat(FileFormat.PARQUET)
+        .build();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java
deleted file mode 100644
index 9b380c822e58..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PrepareWrite.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import java.io.IOException;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SerializableFunction;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.ValueInSingleWindow;
-import org.joda.time.Instant;
-
-@SuppressWarnings("all") // TODO: Remove this once development is stable.
-public class PrepareWrite<InputT, DestinationT, OutputT>
-    extends PTransform<PCollection<InputT>, PCollection<KV<DestinationT, OutputT>>> {
-
-  private DynamicDestinations<InputT, DestinationT> dynamicDestinations;
-  private SerializableFunction<InputT, OutputT> formatFunction;
-  private Coder outputCoder;
-
-  public PrepareWrite(
-      DynamicDestinations<InputT, DestinationT> dynamicDestinations,
-      SerializableFunction<InputT, OutputT> formatFunction,
-      Coder outputCoder) {
-    this.dynamicDestinations = dynamicDestinations;
-    this.formatFunction = formatFunction;
-    this.outputCoder = outputCoder;
-  }
-
-  @Override
-  public PCollection<KV<DestinationT, OutputT>> expand(PCollection<InputT> input) {
-
-    final Coder destCoder;
-    try {
-      destCoder =
-          KvCoder.of(
-              dynamicDestinations.getDestinationCoderWithDefault(
-                  input.getPipeline().getCoderRegistry()),
-              outputCoder);
-    } catch (Exception e) {
-      RuntimeException e1 = new RuntimeException("Unable to expand PrepareWrite");
-      e1.addSuppressed(e);
-      throw e1;
-    }
-    return input
-        .apply(
-            ParDo.of(
-                    new DoFn<InputT, KV<DestinationT, OutputT>>() {
-
-                      @ProcessElement
-                      public void processElement(
-                          ProcessContext c,
-                          @Element InputT element,
-                          @Timestamp Instant timestamp,
-                          BoundedWindow window,
-                          PaneInfo pane)
-                          throws IOException {
-                        ValueInSingleWindow<InputT> windowedElement =
-                            ValueInSingleWindow.of(element, timestamp, window, pane);
-                        dynamicDestinations.setSideInputProcessContext(c);
-                        DestinationT tableDestination =
-                            dynamicDestinations.getDestination(windowedElement);
-                        OutputT outputValue = formatFunction.apply(element);
-                        c.output(KV.of(tableDestination, outputValue));
-                      }
-                    })
-                .withSideInputs(dynamicDestinations.getSideInputs()))
-        .setCoder(destCoder);
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/PropertyBuilder.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PropertyBuilder.java
similarity index 85%
rename from sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/PropertyBuilder.java
rename to sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PropertyBuilder.java
index 9dc67992aeae..53184c70dfca 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/PropertyBuilder.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/PropertyBuilder.java
@@ -15,13 +15,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.beam.io.iceberg.util;
+package org.apache.beam.io.iceberg;
 
-import com.google.common.collect.ImmutableMap;
-import javax.annotation.Nullable;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /** Convenience utility class to build immutable maps that drops attempts to set null values. */
-public class PropertyBuilder {
+class PropertyBuilder {
 
   ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
 
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java
index 2f7380f1ced0..d9a3427c11fc 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriter.java
@@ -17,98 +17,83 @@
  */
 package org.apache.beam.io.iceberg;
 
+import static org.apache.beam.io.iceberg.RowHelper.rowToRecord;
+
 import java.io.IOException;
-import org.apache.beam.sdk.transforms.SerializableBiFunction;
+import org.apache.beam.sdk.values.Row;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileFormat;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Schema;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.avro.Avro;
-import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.catalog.Catalog;
 import org.apache.iceberg.data.Record;
-import org.apache.iceberg.data.orc.GenericOrcWriter;
 import org.apache.iceberg.data.parquet.GenericParquetWriter;
 import org.apache.iceberg.io.DataWriter;
 import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.orc.ORC;
 import org.apache.iceberg.parquet.Parquet;
 
-@SuppressWarnings("all") // TODO: Remove this once development is stable.
-class RecordWriter<ElementT> {
-
-  final Table table;
+class RecordWriter {
 
-  final DataWriter<Record> writer;
-  final GenericRecord baseRecord;
-  final SerializableBiFunction<Record, ElementT, Record> toRecord;
+  private final DataWriter<Record> icebergDataWriter;
 
-  final String location;
+  private final Table table;
 
-  RecordWriter(
-      Table table,
-      String location,
-      Schema schema,
-      PartitionSpec partitionSpec,
-      FileFormat format,
-      SerializableBiFunction<Record, ElementT, Record> toRecord)
+  RecordWriter(Catalog catalog, IcebergDestination destination, String filename)
       throws IOException {
+    this(
+        catalog.loadTable(destination.getTableIdentifier()), destination.getFileFormat(), filename);
+  }
+
+  RecordWriter(Table table, FileFormat fileFormat, String filename) throws IOException {
     this.table = table;
-    this.baseRecord = GenericRecord.create(schema);
-    this.toRecord = toRecord;
-    this.location = table.locationProvider().newDataLocation(partitionSpec, baseRecord, location);
 
-    OutputFile outputFile = table.io().newOutputFile(this.location);
-    switch (format) {
+    String absoluteFilename = table.location() + "/" + filename;
+    OutputFile outputFile = table.io().newOutputFile(absoluteFilename);
+    switch (fileFormat) {
       case AVRO:
-        writer =
-            Avro.writeData(outputFile).schema(schema).withSpec(partitionSpec).overwrite().build();
+        icebergDataWriter =
+            Avro.writeData(outputFile)
+                .createWriterFunc(org.apache.iceberg.data.avro.DataWriter::create)
+                .schema(table.schema())
+                .withSpec(table.spec())
+                .overwrite()
+                .build();
         break;
       case PARQUET:
-        writer =
+        icebergDataWriter =
             Parquet.writeData(outputFile)
                 .createWriterFunc(GenericParquetWriter::buildWriter)
-                .schema(schema)
-                .withSpec(partitionSpec)
+                .schema(table.schema())
+                .withSpec(table.spec())
                 .overwrite()
                 .build();
         break;
       case ORC:
-        writer =
-            ORC.writeData(outputFile)
-                .createWriterFunc(GenericOrcWriter::buildWriter)
-                .schema(schema)
-                .withSpec(partitionSpec)
-                .overwrite()
-                .build();
-        break;
+        throw new UnsupportedOperationException("ORC file format not currently supported.");
       default:
-        throw new RuntimeException("Unrecognized File Format. This should be impossible.");
+        throw new RuntimeException("Unknown File Format: " + fileFormat);
     }
   }
 
-  public void write(ElementT element) throws IOException {
-    Record record = toRecord.apply(baseRecord, element);
-    writer.write(record);
+  public void write(Row row) {
+    Record record = rowToRecord(table.schema(), row);
+    icebergDataWriter.write(record);
   }
 
   public void close() throws IOException {
-    writer.close();
-  }
-
-  public long bytesWritten() {
-    return writer.length();
+    icebergDataWriter.close();
   }
 
-  public Table table() {
+  public Table getTable() {
     return table;
   }
 
-  public String location() {
-    return location;
+  public long bytesWritten() {
+    return icebergDataWriter.length();
   }
 
   public DataFile dataFile() {
-    return writer.toDataFile();
+    return icebergDataWriter.toDataFile();
   }
 }
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java
deleted file mode 100644
index 9058d150c530..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RecordWriterFactory.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import java.io.Serializable;
-import javax.annotation.Nullable;
-import org.apache.beam.sdk.transforms.SerializableBiFunction;
-import org.apache.iceberg.FileFormat;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.data.Record;
-
-@SuppressWarnings("all") // TODO: Remove this once development is stable.
-abstract class RecordWriterFactory<ElementT, DestinationT> implements Serializable {
-  private RecordWriterFactory() {}
-
-  public abstract RecordWriterFactory<ElementT, DestinationT> prepare(
-      DynamicDestinations<?, DestinationT> destination);
-
-  public abstract RecordWriter<ElementT> createWriter(String location, DestinationT destination)
-      throws Exception;
-
-  static <ElementT, DestinationT> TableRecordWriterFactory<ElementT, DestinationT> tableRecords(
-      SerializableBiFunction<Record, ElementT, Record> toRecord,
-      @Nullable DynamicDestinations<?, DestinationT> dynamicDestinations) {
-    return new TableRecordWriterFactory<>(toRecord, dynamicDestinations);
-  }
-
-  static final class TableRecordWriterFactory<ElementT, DestinationT>
-      extends RecordWriterFactory<ElementT, DestinationT> {
-
-    final SerializableBiFunction<Record, ElementT, Record> toRecord;
-
-    final DynamicDestinations<?, DestinationT> dynamicDestinations;
-
-    TableRecordWriterFactory(
-        SerializableBiFunction<Record, ElementT, Record> toRecord,
-        DynamicDestinations<?, DestinationT> dynamicDestinations) {
-      this.toRecord = toRecord;
-      this.dynamicDestinations = dynamicDestinations;
-    }
-
-    @Override
-    public RecordWriterFactory<ElementT, DestinationT> prepare(
-        DynamicDestinations<?, DestinationT> destination) {
-      return new TableRecordWriterFactory<>(toRecord, destination);
-    }
-
-    @Override
-    public RecordWriter<ElementT> createWriter(String location, DestinationT destination)
-        throws Exception {
-      Table table = dynamicDestinations.getTable(destination);
-      Schema schema = dynamicDestinations.getSchema(destination);
-      PartitionSpec partitionSpec = dynamicDestinations.getPartitionSpec(destination);
-      FileFormat format = dynamicDestinations.getFileFormat(destination);
-      return new RecordWriter<>(table, location, schema, partitionSpec, format, toRecord);
-    }
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/RowHelper.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RowHelper.java
similarity index 65%
rename from sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/RowHelper.java
rename to sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RowHelper.java
index 0ad5f663a2cf..92f55208a0da 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/RowHelper.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/RowHelper.java
@@ -15,19 +15,34 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.beam.io.iceberg.util;
+package org.apache.beam.io.iceberg;
 
+import java.nio.ByteBuffer;
 import java.util.Optional;
-import org.apache.beam.sdk.transforms.SerializableBiFunction;
+import java.util.UUID;
 import org.apache.beam.sdk.values.Row;
 import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.types.Types.NestedField;
 
-public class RowHelper {
+class RowHelper {
+
+  // static helper functions only
   private RowHelper() {}
 
-  public static void copyInto(Record rec, NestedField field, Row value) {
+  public static Record rowToRecord(org.apache.iceberg.Schema schema, Row row) {
+    return copy(GenericRecord.create(schema), row);
+  }
+
+  private static Record copy(Record baseRecord, Row value) {
+    Record rec = baseRecord.copy();
+    for (NestedField f : rec.struct().fields()) {
+      copyInto(rec, f, value);
+    }
+    return rec;
+  }
+
+  private static void copyInto(Record rec, NestedField field, Row value) {
     String name = field.name();
     switch (field.type().typeId()) {
       case BOOLEAN:
@@ -46,21 +61,28 @@ public static void copyInto(Record rec, NestedField field, Row value) {
         Optional.ofNullable(value.getDouble(name)).ifPresent(v -> rec.setField(name, v));
         break;
       case DATE:
-        break;
+        throw new UnsupportedOperationException("Date fields not yet supported");
       case TIME:
-        break;
+        throw new UnsupportedOperationException("Time fields not yet supported");
       case TIMESTAMP:
+        Optional.ofNullable(value.getDateTime(name))
+            .ifPresent(v -> rec.setField(name, v.getMillis()));
         break;
       case STRING:
         Optional.ofNullable(value.getString(name)).ifPresent(v -> rec.setField(name, v));
         break;
       case UUID:
+        Optional.ofNullable(value.getBytes(name))
+            .ifPresent(v -> rec.setField(name, UUID.nameUUIDFromBytes(v)));
         break;
       case FIXED:
-        break;
+        throw new UnsupportedOperationException("Fixed-precision fields are not yet supported.");
       case BINARY:
+        Optional.ofNullable(value.getBytes(name))
+            .ifPresent(v -> rec.setField(name, ByteBuffer.wrap(v)));
         break;
       case DECIMAL:
+        Optional.ofNullable(value.getDecimal(name)).ifPresent(v -> rec.setField(name, v));
         break;
       case STRUCT:
         Optional.ofNullable(value.getRow(name))
@@ -70,26 +92,9 @@ public static void copyInto(Record rec, NestedField field, Row value) {
                         name, copy(GenericRecord.create(field.type().asStructType()), row)));
         break;
       case LIST:
-        break;
+        throw new UnsupportedOperationException("List fields are not yet supported.");
       case MAP:
-        break;
-    }
-  }
-
-  public static Record copy(Record baseRecord, Row value) {
-    Record rec = baseRecord.copy();
-    for (NestedField f : rec.struct().fields()) {
-      copyInto(rec, f, value);
+        throw new UnsupportedOperationException("Map fields are not yet supported.");
     }
-    return rec;
-  }
-
-  public static SerializableBiFunction<Record, Row, Record> recordsFromRows() {
-    return new SerializableBiFunction<Record, Row, Record>() {
-      @Override
-      public Record apply(Record record, Row row) {
-        return copy(record, row);
-      }
-    };
   }
 }
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/SchemaHelper.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/SchemaHelper.java
similarity index 95%
rename from sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/SchemaHelper.java
rename to sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/SchemaHelper.java
index c1e25351b640..bbd23239de66 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/SchemaHelper.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/SchemaHelper.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.beam.io.iceberg.util;
+package org.apache.beam.io.iceberg;
 
 import org.apache.beam.sdk.schemas.Schema;
 import org.apache.beam.sdk.schemas.Schema.FieldType;
@@ -23,11 +23,11 @@
 import org.apache.iceberg.types.Types;
 
 @SuppressWarnings({"dereference.of.nullable"})
-public class SchemaHelper {
+class SchemaHelper {
 
   private SchemaHelper() {}
 
-  public static String ICEBERG_TYPE_OPTION_NAME = "icebergTypeID";
+  public static final String ICEBERG_TYPE_OPTION_NAME = "icebergTypeID";
 
   public static Schema.FieldType fieldTypeForType(final Type type) {
     switch (type.typeId()) {
@@ -62,7 +62,7 @@ public static Schema.FieldType fieldTypeForType(final Type type) {
             fieldTypeForType(type.asMapType().keyType()),
             fieldTypeForType(type.asMapType().valueType()));
     }
-    throw new RuntimeException("Unrecognized Iceberg Type");
+    throw new RuntimeException("Unrecognized IcebergIO Type");
   }
 
   public static Schema.Field convert(final Types.NestedField field) {
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java
deleted file mode 100644
index 135faf3decfe..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/TableFactory.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.catalog.Namespace;
-import org.apache.iceberg.catalog.TableIdentifier;
-
-@SuppressWarnings("all")
-public abstract class TableFactory<IdentifierT> implements Serializable {
-
-  private TableFactory() {}
-
-  public abstract Table getTable(IdentifierT id);
-
-  public static TableFactory<String> forCatalog(final Iceberg.Catalog catalog) {
-    return new TableFactory<String>() {
-      @Override
-      public Table getTable(String id) {
-        TableIdentifier tableId = TableIdentifier.parse(id);
-        // If the first element in the namespace is our catalog, remove that.
-        if (tableId.hasNamespace()) {
-          Namespace ns = tableId.namespace();
-          if (catalog.catalog().name().equals(ns.level(0))) {
-            String[] levels = ns.levels();
-            levels = Arrays.copyOfRange(levels, 1, levels.length);
-            tableId = TableIdentifier.of(Namespace.of(levels), tableId.name());
-          }
-        }
-        return catalog.catalog().loadTable(tableId);
-      }
-    };
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java
deleted file mode 100644
index b9ef6fe55db9..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteBundlesToFiles.java
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Serializable;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ThreadLocalRandom;
-import org.apache.beam.io.iceberg.WriteBundlesToFiles.Result;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.beam.sdk.coders.SerializableCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.StructuredCoder;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.ShardedKey;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Objects;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
-import org.apache.iceberg.DataFile;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Table;
-import org.checkerframework.checker.initialization.qual.Initialized;
-import org.checkerframework.checker.nullness.qual.NonNull;
-import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
-
-@SuppressWarnings("all") // TODO: Remove this once development is stable.
-public class WriteBundlesToFiles<DestinationT extends Object, ElementT>
-    extends DoFn<KV<DestinationT, ElementT>, Result<DestinationT>> {
-
-  private transient Map<DestinationT, RecordWriter<ElementT>> writers;
-  private transient Map<DestinationT, BoundedWindow> windows;
-
-  private static final int SPILLED_RECORD_SHARDING_FACTOR = 10;
-
-  private final PCollectionView<String> locationPrefixView;
-
-  private final TupleTag<KV<ShardedKey<DestinationT>, ElementT>> successfulWritesTag;
-  private final TupleTag<KV<ShardedKey<DestinationT>, ElementT>> unwrittenRecordsTag;
-  private final int maxWritersPerBundle;
-  private final long maxFileSize;
-
-  private final RecordWriterFactory<ElementT, DestinationT> recordWriterFactory;
-
-  private int spilledShardNumber;
-
-  static final class Result<DestinationT> implements Serializable {
-    private static final long serialVersionUID = 1L;
-
-    public final String tableId;
-    public final String location;
-
-    public final PartitionSpec partitionSpec;
-
-    public final MetadataUpdate update;
-
-    public final DestinationT destination;
-
-    public Result(
-        String tableId,
-        String location,
-        DataFile dataFile,
-        PartitionSpec partitionSpec,
-        DestinationT destination) {
-      this.tableId = tableId;
-      this.location = location;
-      this.update = MetadataUpdate.of(partitionSpec, dataFile);
-      this.partitionSpec = partitionSpec;
-      this.destination = destination;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (obj instanceof Result) {
-        Result result = (Result) obj;
-        return Objects.equal(result.tableId, tableId)
-            && Objects.equal(result.location, location)
-            && Objects.equal(result.partitionSpec, partitionSpec)
-            && Objects.equal(result.update.getDataFiles().get(0), update.getDataFiles().get(0))
-            && Objects.equal(destination, result.destination);
-      }
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hashCode(
-          tableId, location, update.getDataFiles().get(0), partitionSpec, destination);
-    }
-
-    @Override
-    public String toString() {
-      return "Result{"
-          + "table='"
-          + tableId
-          + '\''
-          + "location='"
-          + location
-          + '\''
-          + ", fileByteSize="
-          + update.getDataFiles().get(0).fileSizeInBytes()
-          + ", destination="
-          + destination
-          + '}';
-    }
-  }
-
-  public static class ResultCoder<DestinationT> extends StructuredCoder<Result<DestinationT>> {
-    private static final StringUtf8Coder stringCoder = StringUtf8Coder.of();
-    private static final Coder<MetadataUpdate> metadataCoder = MetadataUpdate.coder();
-
-    private static final SerializableCoder<PartitionSpec> partitionSpecCoder =
-        SerializableCoder.of(PartitionSpec.class);
-
-    private final Coder<DestinationT> destinationCoder;
-
-    public ResultCoder(Coder<DestinationT> destinationCoder) {
-      this.destinationCoder = destinationCoder;
-    }
-
-    @Override
-    public void encode(
-        Result<DestinationT> value, @UnknownKeyFor @NonNull @Initialized OutputStream outStream)
-        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
-            @Initialized IOException {
-
-      // Convert most everything to Avro for serialization
-
-      // Table id and location are strings
-      stringCoder.encode(value.tableId, outStream);
-      stringCoder.encode(value.location, outStream);
-      // PartitionSpec is Java serialized because we need it to decode DataFile
-      destinationCoder.encode(value.destination, outStream);
-      metadataCoder.encode(value.update, outStream);
-      partitionSpecCoder.encode(value.partitionSpec, outStream);
-    }
-
-    @Override
-    public Result<DestinationT> decode(InputStream inStream) throws CoderException, IOException {
-      String tableId = stringCoder.decode(inStream);
-      String location = stringCoder.decode(inStream);
-      DestinationT dest = destinationCoder.decode(inStream);
-      MetadataUpdate update = metadataCoder.decode(inStream);
-      PartitionSpec spec = partitionSpecCoder.decode(inStream);
-      return new Result<>(tableId, location, update.getDataFiles().get(0), spec, dest);
-    }
-
-    @Override
-    public List<? extends Coder<?>> getCoderArguments() {
-      return Collections.singletonList(destinationCoder);
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {}
-
-    public static <DestinationT> ResultCoder<DestinationT> of(
-        Coder<DestinationT> destinationCoder) {
-      return new ResultCoder<>(destinationCoder);
-    }
-  }
-
-  public WriteBundlesToFiles(
-      PCollectionView<String> locationPrefixView,
-      TupleTag<KV<ShardedKey<DestinationT>, ElementT>> successfulWritesTag,
-      TupleTag<KV<ShardedKey<DestinationT>, ElementT>> unwrittenRecordsTag,
-      int maximumWritersPerBundle,
-      long maxFileSize,
-      RecordWriterFactory<ElementT, DestinationT> recordWriterFactory) {
-    this.locationPrefixView = locationPrefixView;
-    this.successfulWritesTag = successfulWritesTag;
-    this.unwrittenRecordsTag = unwrittenRecordsTag;
-    this.maxWritersPerBundle = maximumWritersPerBundle;
-    this.maxFileSize = maxFileSize;
-    this.recordWriterFactory = recordWriterFactory;
-  }
-
-  @StartBundle
-  public void startBundle() {
-    this.writers = Maps.newHashMap();
-    this.windows = Maps.newHashMap();
-    this.spilledShardNumber = ThreadLocalRandom.current().nextInt(SPILLED_RECORD_SHARDING_FACTOR);
-  }
-
-  RecordWriter<ElementT> createWriter(
-      DestinationT destination, String location, BoundedWindow window) throws Exception {
-    Map<DestinationT, BoundedWindow> windows = Preconditions.checkNotNull(this.windows);
-    Map<DestinationT, RecordWriter<ElementT>> writers = Preconditions.checkNotNull(this.writers);
-    RecordWriter<ElementT> writer = recordWriterFactory.createWriter(location, destination);
-    windows.put(destination, window);
-    writers.put(destination, writer);
-    return writer;
-  }
-
-  @ProcessElement
-  public void processElement(
-      ProcessContext c, @Element KV<DestinationT, ElementT> element, BoundedWindow window)
-      throws Exception {
-    Map<DestinationT, RecordWriter<ElementT>> writers = Preconditions.checkNotNull(this.writers);
-    String locationPrefix = c.sideInput(locationPrefixView);
-    DestinationT destination = element.getKey();
-    RecordWriter<ElementT> writer;
-    if (writers.containsKey(destination)) {
-      writer = writers.get(destination);
-    } else {
-      if (writers.size() <= maxWritersPerBundle) {
-        writer = createWriter(destination, locationPrefix, window);
-      } else {
-        c.output(
-            unwrittenRecordsTag,
-            KV.of(
-                ShardedKey.of(destination, ++spilledShardNumber % SPILLED_RECORD_SHARDING_FACTOR),
-                element.getValue()));
-        return;
-      }
-    }
-
-    if (writer.bytesWritten() > maxFileSize) {
-      writer.close();
-      Table t = writer.table();
-
-      c.output(new Result<>(t.name(), writer.location(), writer.dataFile(), t.spec(), destination));
-      writer = createWriter(destination, locationPrefix, window);
-    }
-
-    try {
-      writer.write(element.getValue());
-      c.output(
-          successfulWritesTag,
-          KV.of(
-              ShardedKey.of(destination, spilledShardNumber % SPILLED_RECORD_SHARDING_FACTOR),
-              element.getValue()));
-    } catch (Exception e) {
-      try {
-        writer.close();
-      } catch (Exception closeException) {
-        e.addSuppressed(closeException);
-      }
-      throw e;
-    }
-  }
-
-  @FinishBundle
-  public void finishBundle(FinishBundleContext c) throws Exception {
-    Map<DestinationT, BoundedWindow> windows = Preconditions.checkNotNull(this.windows);
-    Map<DestinationT, RecordWriter<ElementT>> writers = Preconditions.checkNotNull(this.writers);
-    List<Exception> exceptionList = Lists.newArrayList();
-    for (RecordWriter<ElementT> writer : writers.values()) {
-      try {
-        writer.close();
-      } catch (Exception e) {
-        exceptionList.add(e);
-      }
-    }
-    if (!exceptionList.isEmpty()) {
-      Exception e = new IOException("Exception closing some writers.");
-      for (Exception thrown : exceptionList) {
-        e.addSuppressed(thrown);
-      }
-      throw e;
-    }
-
-    exceptionList.clear();
-    for (Map.Entry<DestinationT, RecordWriter<ElementT>> entry : writers.entrySet()) {
-      try {
-        DestinationT destination = entry.getKey();
-
-        RecordWriter<ElementT> writer = entry.getValue();
-        BoundedWindow window = windows.get(destination);
-        Preconditions.checkNotNull(window);
-        Table t = writer.table();
-        c.output(
-            new Result<>(t.name(), writer.location(), writer.dataFile(), t.spec(), destination),
-            window.maxTimestamp(),
-            window);
-      } catch (Exception e) {
-        exceptionList.add(e);
-      }
-    }
-    writers.clear();
-    if (!exceptionList.isEmpty()) {
-      Exception e = new IOException("Exception emitting writer metadata.");
-      for (Exception thrown : exceptionList) {
-        e.addSuppressed(thrown);
-      }
-      throw e;
-    }
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java
deleted file mode 100644
index 53dbf43fa7f3..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRecordsToFiles.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import org.apache.beam.io.iceberg.WriteBundlesToFiles.Result;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-
-@SuppressWarnings("all")
-public class WriteGroupedRecordsToFiles<DestinationT, ElementT>
-    extends DoFn<KV<DestinationT, Iterable<ElementT>>, Result<DestinationT>> {
-
-  private final PCollectionView<String> locationPrefixView;
-  private final long maxFileSize;
-  private final RecordWriterFactory<ElementT, DestinationT> recordWriterFactory;
-
-  WriteGroupedRecordsToFiles(
-      PCollectionView<String> locationPrefixView,
-      long maxFileSize,
-      RecordWriterFactory<ElementT, DestinationT> recordWriterFactory) {
-    this.locationPrefixView = locationPrefixView;
-    this.maxFileSize = maxFileSize;
-    this.recordWriterFactory = recordWriterFactory;
-  }
-
-  @ProcessElement
-  public void processElement(
-      ProcessContext c, @Element KV<DestinationT, Iterable<ElementT>> element) throws Exception {
-    String locationPrefix = c.sideInput(locationPrefixView);
-    DestinationT destination = element.getKey();
-    RecordWriter<ElementT> writer = recordWriterFactory.createWriter(locationPrefix, destination);
-    for (ElementT e : element.getValue()) {
-      writer.write(e);
-    }
-    writer.close();
-    c.output(
-        new Result<>(
-            writer.table().name(),
-            writer.location(),
-            writer.dataFile(),
-            writer.table().spec(),
-            destination));
-  }
-}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRowsToFiles.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRowsToFiles.java
new file mode 100644
index 000000000000..7a152b602581
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteGroupedRowsToFiles.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import java.io.IOException;
+import java.util.UUID;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.ShardedKey;
+import org.apache.iceberg.catalog.Catalog;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+
+class WriteGroupedRowsToFiles
+    extends PTransform<
+        PCollection<KV<ShardedKey<Row>, Iterable<Row>>>, PCollection<FileWriteResult>> {
+
+  static final long DEFAULT_MAX_BYTES_PER_FILE = (1L << 40); // 1TB
+
+  private final DynamicDestinations dynamicDestinations;
+  private final IcebergCatalogConfig catalogConfig;
+
+  WriteGroupedRowsToFiles(
+      IcebergCatalogConfig catalogConfig, DynamicDestinations dynamicDestinations) {
+    this.catalogConfig = catalogConfig;
+    this.dynamicDestinations = dynamicDestinations;
+  }
+
+  @Override
+  public PCollection<FileWriteResult> expand(
+      PCollection<KV<ShardedKey<Row>, Iterable<Row>>> input) {
+    return input.apply(
+        ParDo.of(
+            new WriteGroupedRowsToFilesDoFn(
+                catalogConfig, dynamicDestinations, DEFAULT_MAX_BYTES_PER_FILE)));
+  }
+
+  private static class WriteGroupedRowsToFilesDoFn
+      extends DoFn<KV<ShardedKey<Row>, Iterable<Row>>, FileWriteResult> {
+
+    private final DynamicDestinations dynamicDestinations;
+    private final IcebergCatalogConfig catalogConfig;
+    private transient @MonotonicNonNull Catalog catalog;
+
+    WriteGroupedRowsToFilesDoFn(
+        IcebergCatalogConfig catalogConfig,
+        DynamicDestinations dynamicDestinations,
+        long maxFileSize) {
+      this.catalogConfig = catalogConfig;
+      this.dynamicDestinations = dynamicDestinations;
+    }
+
+    private org.apache.iceberg.catalog.Catalog getCatalog() {
+      if (catalog == null) {
+        this.catalog = catalogConfig.catalog();
+      }
+      return catalog;
+    }
+
+    private RecordWriter createWriter(IcebergDestination destination) throws IOException {
+      return new RecordWriter(getCatalog(), destination, "-" + UUID.randomUUID());
+    }
+
+    @ProcessElement
+    public void processElement(
+        ProcessContext c, @Element KV<ShardedKey<Row>, Iterable<Row>> element) throws Exception {
+
+      Row destMetadata = element.getKey().getKey();
+      IcebergDestination destination = dynamicDestinations.instantiateDestination(destMetadata);
+      RecordWriter writer = createWriter(destination);
+
+      for (Row e : element.getValue()) {
+        writer.write(e);
+      }
+
+      writer.close();
+      c.output(
+          FileWriteResult.builder()
+              .setTableIdentifier(destination.getTableIdentifier())
+              .setDataFile(writer.dataFile())
+              .setPartitionSpec(writer.getTable().spec())
+              .build());
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteToDestinations.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteToDestinations.java
new file mode 100644
index 000000000000..22e27a4df7ca
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteToDestinations.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
+
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.RowCoder;
+import org.apache.beam.sdk.coders.ShardedKeyCoder;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.ShardedKey;
+import org.apache.iceberg.Snapshot;
+
+class WriteToDestinations extends PTransform<PCollection<Row>, IcebergWriteResult> {
+
+  static final long DEFAULT_MAX_BYTES_PER_FILE = (1L << 40); // 1TB
+  static final int DEFAULT_NUM_FILE_SHARDS = 0;
+  static final int FILE_TRIGGERING_RECORD_COUNT = 50_000;
+
+  private final IcebergCatalogConfig catalogConfig;
+  private final DynamicDestinations dynamicDestinations;
+
+  WriteToDestinations(IcebergCatalogConfig catalogConfig, DynamicDestinations dynamicDestinations) {
+    this.dynamicDestinations = dynamicDestinations;
+    this.catalogConfig = catalogConfig;
+  }
+
+  @Override
+  public IcebergWriteResult expand(PCollection<Row> input) {
+
+    // First, attempt to write directly to files without shuffling. If there are
+    // too many distinct destinations in a single bundle, the remaining
+    // elements will be emitted to take the "slow path" that involves a shuffle
+    WriteUngroupedRowsToFiles.Result writeUngroupedResult =
+        input.apply(
+            "Fast-path write rows",
+            new WriteUngroupedRowsToFiles(catalogConfig, dynamicDestinations));
+
+    // Then write the rest by shuffling on the destination metadata
+    Schema destSchema =
+        checkArgumentNotNull(
+            writeUngroupedResult
+                .getSpilledRows()
+                .getSchema()
+                .getField("dest")
+                .getType()
+                .getRowSchema(),
+            "Input schema missing `dest` field.");
+    Schema dataSchema =
+        checkArgumentNotNull(
+            writeUngroupedResult
+                .getSpilledRows()
+                .getSchema()
+                .getField("data")
+                .getType()
+                .getRowSchema(),
+            "Input schema missing `data` field");
+
+    PCollection<FileWriteResult> writeGroupedResult =
+        writeUngroupedResult
+            .getSpilledRows()
+            .apply(
+                "Key by destination and shard",
+                MapElements.via(
+                    new SimpleFunction<Row, KV<ShardedKey<Row>, Row>>() {
+                      private static final int SPILLED_ROWS_SHARDING_FACTOR = 10;
+                      private int shardNumber = 0;
+
+                      @Override
+                      public KV<ShardedKey<Row>, Row> apply(Row elem) {
+                        Row data =
+                            checkArgumentNotNull(
+                                elem.getRow("data"), "Element missing `data` field");
+                        Row dest =
+                            checkArgumentNotNull(
+                                elem.getRow("dest"), "Element missing `dest` field");
+                        return KV.of(
+                            ShardedKey.of(dest, shardNumber % SPILLED_ROWS_SHARDING_FACTOR), data);
+                      }
+                    }))
+            .setCoder(
+                KvCoder.of(ShardedKeyCoder.of(RowCoder.of(destSchema)), RowCoder.of(dataSchema)))
+            .apply("Group spilled rows by destination shard", GroupByKey.create())
+            .apply(
+                "Write remaining rows to files",
+                new WriteGroupedRowsToFiles(catalogConfig, dynamicDestinations));
+
+    PCollection<FileWriteResult> allWrittenFiles =
+        PCollectionList.of(writeUngroupedResult.getWrittenFiles())
+            .and(writeGroupedResult)
+            .apply("Flatten Written Files", Flatten.pCollections());
+
+    // Apply any sharded writes and flatten everything for catalog updates
+    PCollection<KV<String, Snapshot>> snapshots =
+        allWrittenFiles.apply(new AppendFilesToTables(catalogConfig));
+
+    return new IcebergWriteResult(input.getPipeline(), snapshots);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteUngroupedRowsToFiles.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteUngroupedRowsToFiles.java
new file mode 100644
index 000000000000..240678c83697
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/WriteUngroupedRowsToFiles.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.TupleTagList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
+import org.apache.iceberg.catalog.Catalog;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+/**
+ * A PTransform that writes rows to files according to their dynamic destination. If there are too
+ * many destinations in a single bundle, some rows will be written to a secondary output and must be
+ * written via another method.
+ */
+class WriteUngroupedRowsToFiles
+    extends PTransform<PCollection<Row>, WriteUngroupedRowsToFiles.Result> {
+
+  /**
+   * Maximum number of writers that will be created per bundle. Any elements requiring more writers
+   * will be spilled.
+   */
+  @VisibleForTesting static final int DEFAULT_MAX_WRITERS_PER_BUNDLE = 20;
+
+  private static final long DEFAULT_MAX_BYTES_PER_FILE = (1L << 29); // 512mb
+
+  private static final TupleTag<FileWriteResult> WRITTEN_FILES_TAG = new TupleTag<>("writtenFiles");
+  private static final TupleTag<Row> WRITTEN_ROWS_TAG = new TupleTag<Row>("writtenRows") {};
+  private static final TupleTag<Row> SPILLED_ROWS_TAG = new TupleTag<Row>("spilledRows") {};
+
+  private final String fileSuffix;
+  private final DynamicDestinations dynamicDestinations;
+  private final IcebergCatalogConfig catalogConfig;
+
+  WriteUngroupedRowsToFiles(
+      IcebergCatalogConfig catalogConfig, DynamicDestinations dynamicDestinations) {
+    this.catalogConfig = catalogConfig;
+    this.dynamicDestinations = dynamicDestinations;
+    this.fileSuffix = UUID.randomUUID().toString();
+  }
+
+  @Override
+  public Result expand(PCollection<Row> input) {
+
+    PCollectionTuple resultTuple =
+        input.apply(
+            ParDo.of(
+                    new WriteUngroupedRowsToFilesDoFn(
+                        catalogConfig,
+                        dynamicDestinations,
+                        fileSuffix,
+                        DEFAULT_MAX_WRITERS_PER_BUNDLE,
+                        DEFAULT_MAX_BYTES_PER_FILE))
+                .withOutputTags(
+                    WRITTEN_FILES_TAG,
+                    TupleTagList.of(ImmutableList.of(WRITTEN_ROWS_TAG, SPILLED_ROWS_TAG))));
+
+    return new Result(
+        input.getPipeline(),
+        resultTuple.get(WRITTEN_FILES_TAG),
+        resultTuple.get(WRITTEN_ROWS_TAG).setCoder(input.getCoder()),
+        resultTuple.get(SPILLED_ROWS_TAG).setCoder(input.getCoder()));
+  }
+
+  /**
+   * The result of this transform has two components: the records that were written and the records
+   * that spilled over and need to be written by a subsquent method.
+   */
+  static class Result implements POutput {
+
+    private final Pipeline pipeline;
+    private final PCollection<Row> writtenRows;
+    private final PCollection<Row> spilledRows;
+    private final PCollection<FileWriteResult> writtenFiles;
+
+    private Result(
+        Pipeline pipeline,
+        PCollection<FileWriteResult> writtenFiles,
+        PCollection<Row> writtenRows,
+        PCollection<Row> spilledRows) {
+      this.pipeline = pipeline;
+      this.writtenFiles = writtenFiles;
+      this.writtenRows = writtenRows;
+      this.spilledRows = spilledRows;
+    }
+
+    public PCollection<Row> getWrittenRows() {
+      return writtenRows;
+    }
+
+    public PCollection<Row> getSpilledRows() {
+      return spilledRows;
+    }
+
+    public PCollection<FileWriteResult> getWrittenFiles() {
+      return writtenFiles;
+    }
+
+    @Override
+    public Pipeline getPipeline() {
+      return pipeline;
+    }
+
+    @Override
+    public Map<TupleTag<?>, PValue> expand() {
+      return ImmutableMap.<TupleTag<?>, PValue>builder()
+          .put(WRITTEN_FILES_TAG, writtenFiles)
+          .put(WRITTEN_ROWS_TAG, writtenRows)
+          .put(SPILLED_ROWS_TAG, spilledRows)
+          .build();
+    }
+
+    @Override
+    public void finishSpecifyingOutput(
+        String transformName, PInput input, PTransform<?, ?> transform) {
+      // noop
+    }
+  }
+
+  /**
+   * A DoFn that writes each input row to its assigned destination and outputs a result object
+   * summarizing what it accomplished for a given bundle.
+   *
+   * <p>Specifically, the outputs are:
+   *
+   * <ul>
+   *   <li>(main output) the written files
+   *   <li>the written records
+   *   <li>the spilled records which were not written
+   * </ul>
+   */
+  private static class WriteUngroupedRowsToFilesDoFn extends DoFn<Row, FileWriteResult> {
+
+    private final String filename;
+    private final int maxWritersPerBundle;
+    private final long maxFileSize;
+    private final DynamicDestinations dynamicDestinations;
+    private final IcebergCatalogConfig catalogConfig;
+
+    private transient @MonotonicNonNull Map<IcebergDestination, RecordWriter> writers;
+    private transient @MonotonicNonNull Map<IcebergDestination, BoundedWindow> windows;
+    private transient @MonotonicNonNull Catalog catalog;
+
+    public WriteUngroupedRowsToFilesDoFn(
+        IcebergCatalogConfig catalogConfig,
+        DynamicDestinations dynamicDestinations,
+        String filename,
+        int maximumWritersPerBundle,
+        long maxFileSize) {
+      this.catalogConfig = catalogConfig;
+      this.dynamicDestinations = dynamicDestinations;
+      this.filename = filename;
+      this.maxWritersPerBundle = maximumWritersPerBundle;
+      this.maxFileSize = maxFileSize;
+    }
+
+    private Map<IcebergDestination, RecordWriter> getWriters() {
+      if (writers == null) {
+        writers = Maps.newHashMap();
+      }
+      return writers;
+    }
+
+    private Map<IcebergDestination, BoundedWindow> getWindows() {
+      if (windows == null) {
+        windows = Maps.newHashMap();
+      }
+      return windows;
+    }
+
+    private org.apache.iceberg.catalog.Catalog getCatalog() {
+      if (catalog == null) {
+        this.catalog = catalogConfig.catalog();
+      }
+      return catalog;
+    }
+
+    private RecordWriter createAndInsertWriter(IcebergDestination destination, BoundedWindow window)
+        throws IOException {
+      RecordWriter writer =
+          new RecordWriter(getCatalog(), destination, filename + "-" + UUID.randomUUID());
+      getWindows().put(destination, window);
+      getWriters().put(destination, writer);
+      return writer;
+    }
+
+    /**
+     * Returns active writer for this destination if possible. If this returns null then we have
+     * reached the maximum number of writers and should spill any records associated.
+     */
+    @Nullable
+    RecordWriter getWriterIfPossible(IcebergDestination destination, BoundedWindow window)
+        throws IOException {
+
+      RecordWriter existingWriter = getWriters().get(destination);
+      if (existingWriter != null) {
+        return existingWriter;
+      }
+
+      if (getWriters().size() > maxWritersPerBundle) {
+        return null;
+      }
+
+      return createAndInsertWriter(destination, window);
+    }
+
+    @StartBundle
+    public void startBundle() {}
+
+    @ProcessElement
+    public void processElement(@Element Row element, BoundedWindow window, MultiOutputReceiver out)
+        throws Exception {
+
+      Row data = checkArgumentNotNull(element.getRow("data"), "Input row missing `data` field.");
+      Row destMetadata =
+          checkArgumentNotNull(element.getRow("dest"), "Input row missing `dest` field.");
+      IcebergDestination destination = dynamicDestinations.instantiateDestination(destMetadata);
+
+      // Spill record if writer cannot be created
+      RecordWriter writer = getWriterIfPossible(destination, window);
+      if (writer == null) {
+        out.get(SPILLED_ROWS_TAG).output(element);
+        return;
+      }
+
+      // Reset writer if max file size reached
+      if (writer.bytesWritten() > maxFileSize) {
+        writer.close();
+        out.get(WRITTEN_FILES_TAG)
+            .output(
+                FileWriteResult.builder()
+                    .setDataFile(writer.dataFile())
+                    .setTableIdentifier(destination.getTableIdentifier())
+                    .build());
+        writer = createAndInsertWriter(destination, window);
+      }
+
+      // Actually write the data
+      try {
+        writer.write(data);
+        out.get(WRITTEN_ROWS_TAG).output(element);
+      } catch (Exception e) {
+        try {
+          writer.close();
+        } catch (Exception closeException) {
+          e.addSuppressed(closeException);
+        }
+        throw e;
+      }
+    }
+
+    @FinishBundle
+    public void finishBundle(FinishBundleContext c) throws Exception {
+      closeAllWriters();
+      outputFinalWrittenFiles(c);
+      getWriters().clear();
+    }
+
+    private void outputFinalWrittenFiles(DoFn<Row, FileWriteResult>.FinishBundleContext c)
+        throws Exception {
+      List<Exception> exceptionList = Lists.newArrayList();
+      for (Map.Entry<IcebergDestination, RecordWriter> entry : getWriters().entrySet()) {
+        try {
+          IcebergDestination destination = entry.getKey();
+
+          RecordWriter writer = entry.getValue();
+          BoundedWindow window =
+              checkStateNotNull(
+                  getWindows().get(destination), "internal error: no windows for destination");
+          c.output(
+              FileWriteResult.builder()
+                  .setDataFile(writer.dataFile())
+                  .setTableIdentifier(destination.getTableIdentifier())
+                  .setPartitionSpec(writer.getTable().spec())
+                  .build(),
+              window.maxTimestamp(),
+              window);
+        } catch (Exception e) {
+          exceptionList.add(e);
+        }
+      }
+
+      if (!exceptionList.isEmpty()) {
+        Exception e =
+            new IOException("Exception emitting writer metadata. See suppressed exceptions");
+        for (Exception thrown : exceptionList) {
+          e.addSuppressed(thrown);
+        }
+        throw e;
+      }
+    }
+
+    private void closeAllWriters() throws Exception {
+      List<Exception> exceptionList = Lists.newArrayList();
+      for (RecordWriter writer : getWriters().values()) {
+        try {
+          writer.close();
+        } catch (Exception e) {
+          exceptionList.add(e);
+        }
+      }
+
+      if (!exceptionList.isEmpty()) {
+        Exception e = new IOException("Exception closing some writers. See suppressed exceptions.");
+        for (Exception thrown : exceptionList) {
+          e.addSuppressed(thrown);
+        }
+        throw e;
+      }
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java
index 835b3421ccc4..f97ff98677f9 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/package-info.java
@@ -1 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Iceberg connectors. */
 package org.apache.beam.io.iceberg;
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java
deleted file mode 100644
index 268ad6b4550b..000000000000
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/io/iceberg/util/ScanHelper.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg.util;
-
-import org.apache.beam.io.iceberg.Iceberg;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.TableScan;
-
-public class ScanHelper {
-
-  public static boolean isIncremental(Iceberg.Scan scan) {
-    if (scan.getFromSnapshotExclusive() != null) {
-      return true;
-    }
-    return false;
-  }
-
-  public static TableScan tableScan(Table table, Iceberg.Scan scan) {
-    TableScan tableScan = table.newScan();
-    return tableScan;
-  }
-}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java
deleted file mode 100644
index 3a813dab930b..000000000000
--- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/BoundedScanTests.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import org.apache.beam.io.iceberg.util.SchemaHelper;
-import org.apache.beam.sdk.coders.RowCoder;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.testing.PAssert;
-import org.apache.beam.sdk.testing.TestPipeline;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.Row;
-import org.apache.iceberg.CatalogUtil;
-import org.apache.iceberg.Table;
-import org.junit.ClassRule;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@RunWith(JUnit4.class)
-public class BoundedScanTests {
-
-  private static Logger LOG = LoggerFactory.getLogger(BoundedScanTests.class);
-  @ClassRule public static final TemporaryFolder temporaryFolder = new TemporaryFolder();
-
-  @Rule public TestDataWarehouse warehouse = new TestDataWarehouse(temporaryFolder, "default");
-
-  @Rule public TestPipeline testPipeline = TestPipeline.create();
-
-  static class PrintRow extends DoFn<Row, Row> {
-
-    @ProcessElement
-    public void process(@Element Row row, OutputReceiver<Row> output) throws Exception {
-      LOG.info("Got row {}", row);
-      output.output(row);
-    }
-  }
-
-  @Test
-  public void testSimpleScan() throws Exception {
-    Table simpleTable = warehouse.createTable(TestFixtures.SCHEMA);
-    simpleTable
-        .newFastAppend()
-        .appendFile(
-            warehouse.writeRecords(
-                "file1s1.parquet", simpleTable.schema(), TestFixtures.FILE1SNAPSHOT1))
-        .appendFile(
-            warehouse.writeRecords(
-                "file2s1.parquet", simpleTable.schema(), TestFixtures.FILE2SNAPSHOT1))
-        .appendFile(
-            warehouse.writeRecords(
-                "file3s1.parquet", simpleTable.schema(), TestFixtures.FILE3SNAPSHOT1))
-        .commit();
-
-    PCollection<Row> output =
-        testPipeline
-            .apply(
-                Read.from(
-                    new IcebergBoundedSource(
-                        Iceberg.Scan.builder()
-                            .catalog(
-                                Iceberg.Catalog.builder()
-                                    .name("hadoop")
-                                    .icebergCatalogType(CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP)
-                                    .warehouseLocation(warehouse.location)
-                                    .build())
-                            .type(Iceberg.ScanType.TABLE)
-                            .table(simpleTable.name().replace("hadoop.", "").split("\\."))
-                            .schema(SchemaHelper.convert(TestFixtures.SCHEMA))
-                            .build())))
-            .apply(ParDo.of(new PrintRow()))
-            .setCoder(RowCoder.of(SchemaHelper.convert(TestFixtures.SCHEMA)));
-    PAssert.that(output);
-    testPipeline.run();
-  }
-}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/FileWriteResultTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/FileWriteResultTest.java
new file mode 100644
index 000000000000..2499331beadc
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/FileWriteResultTest.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.UUID;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.testing.CoderProperties;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.commons.compress.utils.Lists;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.types.Types;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class FileWriteResultTest implements Serializable {
+
+  @ClassRule public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+  @Rule public TestDataWarehouse warehouse = new TestDataWarehouse(TEMPORARY_FOLDER, "default");
+
+  private static final Coder<FileWriteResult> TEST_CODER =
+      FileWriteResult.FileWriteResultCoder.of();
+
+  private List<FileWriteResult> getTestValues() throws Exception {
+    TableIdentifier tableId =
+        TableIdentifier.of("default", "table" + Long.toString(UUID.randomUUID().hashCode(), 16));
+
+    // Create a table so we can have some DataFile objects
+    Table table = warehouse.createTable(tableId, TestFixtures.SCHEMA);
+    List<FileWriteResult> values = Lists.newArrayList();
+
+    // A parquet file
+    RecordWriter writer =
+        new RecordWriter(table, FileFormat.PARQUET, TEMPORARY_FOLDER.newFile().toString());
+    writer.write(
+        Row.withSchema(SchemaHelper.convert(TestFixtures.SCHEMA)).addValues(42L, "bizzle").build());
+    writer.close();
+    DataFile dataFile = writer.dataFile();
+    values.add(
+        FileWriteResult.builder()
+            .setDataFile(dataFile)
+            .setPartitionSpec(table.spec())
+            .setTableIdentifier(tableId)
+            .build());
+
+    // An avro file
+    writer = new RecordWriter(table, FileFormat.AVRO, TEMPORARY_FOLDER.newFile().toString());
+    writer.write(
+        Row.withSchema(SchemaHelper.convert(TestFixtures.SCHEMA)).addValues(42L, "bizzle").build());
+    writer.close();
+    dataFile = writer.dataFile();
+    values.add(
+        FileWriteResult.builder()
+            .setDataFile(dataFile)
+            .setPartitionSpec(table.spec())
+            .setTableIdentifier(tableId)
+            .build());
+
+    // Parquet file with a different schema
+    TableIdentifier tableId2 =
+        TableIdentifier.of(
+            "default", "othertable" + Long.toString(UUID.randomUUID().hashCode(), 16));
+    Schema schema =
+        new Schema(
+            required(1, "id", Types.LongType.get()),
+            optional(2, "data", Types.StringType.get()),
+            optional(
+                3,
+                "extra",
+                Types.StructType.of(
+                    Types.NestedField.required(4, "inner", Types.BinaryType.get()))));
+    Table table2 = warehouse.createTable(tableId2, schema);
+
+    // A parquet file in this other table
+    writer = new RecordWriter(table2, FileFormat.PARQUET, TEMPORARY_FOLDER.newFile().toString());
+    writer.write(
+        Row.withSchema(SchemaHelper.convert(schema))
+            .addValues(
+                42L,
+                "bizzle",
+                Row.withSchema(
+                        org.apache.beam.sdk.schemas.Schema.of(
+                            org.apache.beam.sdk.schemas.Schema.Field.of(
+                                "inner", org.apache.beam.sdk.schemas.Schema.FieldType.BYTES)))
+                    .addValues(new byte[] {0xa})
+                    .build())
+            .build());
+    writer.close();
+    DataFile dataFile2 = writer.dataFile();
+    values.add(
+        FileWriteResult.builder()
+            .setDataFile(dataFile2)
+            .setPartitionSpec(table2.spec())
+            .setTableIdentifier(tableId2)
+            .build());
+
+    return values;
+  }
+
+  @Test
+  public void testDecodeEncodeEqual() throws Exception {
+    for (FileWriteResult value : getTestValues()) {
+      CoderProperties.structuralValueDecodeEncodeEqual(TEST_CODER, value);
+    }
+  }
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+
+  @Test
+  public void testEncodedTypeDescriptor() throws Exception {
+    assertThat(
+        TEST_CODER.getEncodedTypeDescriptor(), equalTo(TypeDescriptor.of(FileWriteResult.class)));
+  }
+}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/IcebergIOWriteTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/IcebergIOWriteTest.java
new file mode 100644
index 000000000000..c77d162aafd4
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/IcebergIOWriteTest.java
@@ -0,0 +1,293 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.beam.io.iceberg.RowHelper.rowToRecord;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
+import org.apache.commons.compress.utils.Lists;
+import org.apache.iceberg.AppendFiles;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.IcebergGenerics;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.data.parquet.GenericParquetWriter;
+import org.apache.iceberg.io.DataWriter;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.parquet.Parquet;
+import org.hamcrest.Matchers;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(JUnit4.class)
+public class IcebergIOWriteTest implements Serializable {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergIOWriteTest.class);
+
+  @ClassRule public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+  @Rule
+  public transient TestDataWarehouse warehouse = new TestDataWarehouse(TEMPORARY_FOLDER, "default");
+
+  @Rule public transient TestPipeline testPipeline = TestPipeline.create();
+
+  @Test
+  public void testSimpleAppend() throws Exception {
+    TableIdentifier tableId =
+        TableIdentifier.of("default", "table" + Long.toString(UUID.randomUUID().hashCode(), 16));
+
+    // Create a table and add records to it.
+    Table table = warehouse.createTable(tableId, TestFixtures.SCHEMA);
+
+    IcebergCatalogConfig catalog =
+        IcebergCatalogConfig.builder()
+            .setName("hadoop")
+            .setIcebergCatalogType(CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP)
+            .setWarehouseLocation(warehouse.location)
+            .build();
+
+    DynamicDestinations destination = DynamicDestinations.singleTable(tableId);
+
+    testPipeline
+        .apply("Records To Add", Create.of(TestFixtures.asRows(TestFixtures.FILE1SNAPSHOT1)))
+        .setRowSchema(SchemaHelper.convert(TestFixtures.SCHEMA))
+        .apply("Append To Table", IcebergIO.writeToDynamicDestinations(catalog, destination));
+
+    LOG.info("Executing pipeline");
+    testPipeline.run().waitUntilFinish();
+    LOG.info("Done running pipeline");
+
+    List<Record> writtenRecords = ImmutableList.copyOf(IcebergGenerics.read(table).build());
+
+    assertThat(writtenRecords, Matchers.containsInAnyOrder(TestFixtures.FILE1SNAPSHOT1.toArray()));
+  }
+
+  /** Tests that a small write to three different tables with dynamic destinations works. */
+  @Test
+  public void testDynamicDestinationsWithoutSpillover() throws Exception {
+    final String salt = Long.toString(UUID.randomUUID().hashCode(), 16);
+    final TableIdentifier table1Id = TableIdentifier.of("default", "table1-" + salt);
+    final TableIdentifier table2Id = TableIdentifier.of("default", "table2-" + salt);
+    final TableIdentifier table3Id = TableIdentifier.of("default", "table3-" + salt);
+
+    // Create a table and add records to it.
+    Table table1 = warehouse.createTable(table1Id, TestFixtures.SCHEMA);
+    Table table2 = warehouse.createTable(table2Id, TestFixtures.SCHEMA);
+    Table table3 = warehouse.createTable(table3Id, TestFixtures.SCHEMA);
+
+    IcebergCatalogConfig catalog =
+        IcebergCatalogConfig.builder()
+            .setName("hadoop")
+            .setIcebergCatalogType(CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP)
+            .setWarehouseLocation(warehouse.location)
+            .build();
+
+    DynamicDestinations destination =
+        new DynamicDestinations() {
+          private final Schema schema = Schema.builder().addInt64Field("tableNumber").build();
+
+          @Override
+          public Schema getMetadataSchema() {
+            return schema;
+          }
+
+          @Override
+          public Row assignDestinationMetadata(Row data) {
+            long rowId = data.getInt64("id");
+            return Row.withSchema(schema).addValues((rowId / 3) + 1).build();
+          }
+
+          @Override
+          public IcebergDestination instantiateDestination(Row dest) {
+            return IcebergDestination.builder()
+                .setTableIdentifier(
+                    TableIdentifier.of(
+                        "default", "table" + dest.getInt64("tableNumber") + "-" + salt))
+                .setFileFormat(FileFormat.PARQUET)
+                .build();
+          }
+        };
+
+    testPipeline
+        .apply(
+            "Records To Add",
+            Create.of(
+                TestFixtures.asRows(
+                    Iterables.concat(
+                        TestFixtures.FILE1SNAPSHOT1,
+                        TestFixtures.FILE1SNAPSHOT2,
+                        TestFixtures.FILE1SNAPSHOT3))))
+        .setRowSchema(SchemaHelper.convert(TestFixtures.SCHEMA))
+        .apply("Append To Table", IcebergIO.writeToDynamicDestinations(catalog, destination));
+
+    LOG.info("Executing pipeline");
+    testPipeline.run().waitUntilFinish();
+    LOG.info("Done running pipeline");
+
+    List<Record> writtenRecords1 = ImmutableList.copyOf(IcebergGenerics.read(table1).build());
+    List<Record> writtenRecords2 = ImmutableList.copyOf(IcebergGenerics.read(table2).build());
+    List<Record> writtenRecords3 = ImmutableList.copyOf(IcebergGenerics.read(table3).build());
+
+    assertThat(writtenRecords1, Matchers.containsInAnyOrder(TestFixtures.FILE1SNAPSHOT1.toArray()));
+    assertThat(writtenRecords2, Matchers.containsInAnyOrder(TestFixtures.FILE1SNAPSHOT2.toArray()));
+    assertThat(writtenRecords3, Matchers.containsInAnyOrder(TestFixtures.FILE1SNAPSHOT3.toArray()));
+  }
+
+  /**
+   * Tests writing to enough destinations to spill over to the "slow" write path.
+   *
+   * <p>Note that we could have added a config to lower the spill number but exceeding the default
+   * is fine
+   */
+  @Test
+  public void testDynamicDestinationsWithSpillover() throws Exception {
+    final String salt = Long.toString(UUID.randomUUID().hashCode(), 16);
+
+    // Create far more tables than the max writers per bundle
+    int numDestinations = 5 * WriteUngroupedRowsToFiles.DEFAULT_MAX_WRITERS_PER_BUNDLE;
+    List<TableIdentifier> tableIdentifiers = Lists.newArrayList();
+    List<Table> tables = Lists.newArrayList();
+    for (int i = 0; i < numDestinations; ++i) {
+      TableIdentifier id = TableIdentifier.of("default", "table" + i + "-" + salt);
+      tableIdentifiers.add(id);
+      tables.add(warehouse.createTable(id, TestFixtures.SCHEMA));
+    }
+
+    // Create plenty of data to hit each table
+    int numElements = 10 * numDestinations;
+    List<Record> elements = Lists.newArrayList();
+    final Record genericRecord = GenericRecord.create(TestFixtures.SCHEMA);
+    Map<TableIdentifier, List<Record>> elementsPerTable = Maps.newHashMap();
+    for (int i = 0; i < numElements; ++i) {
+      Record element = genericRecord.copy(ImmutableMap.of("id", (long) i, "data", "data for " + i));
+      TableIdentifier tableId = tableIdentifiers.get(i % numDestinations);
+      elements.add(element);
+      elementsPerTable.computeIfAbsent(tableId, ignored -> Lists.newArrayList()).add(element);
+    }
+
+    IcebergCatalogConfig catalog =
+        IcebergCatalogConfig.builder()
+            .setName("hadoop")
+            .setIcebergCatalogType(CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP)
+            .setWarehouseLocation(warehouse.location)
+            .build();
+
+    DynamicDestinations destination =
+        new DynamicDestinations() {
+          private final Schema schema = Schema.builder().addInt64Field("tableNumber").build();
+
+          @Override
+          public Schema getMetadataSchema() {
+            return schema;
+          }
+
+          @Override
+          public Row assignDestinationMetadata(Row data) {
+            long rowId = data.getInt64("id");
+            return Row.withSchema(schema).addValues(rowId % numDestinations).build();
+          }
+
+          @Override
+          public IcebergDestination instantiateDestination(Row dest) {
+            return IcebergDestination.builder()
+                .setTableIdentifier(
+                    TableIdentifier.of(
+                        "default", "table" + dest.getInt64("tableNumber") + "-" + salt))
+                .setFileFormat(FileFormat.PARQUET)
+                .build();
+          }
+        };
+
+    testPipeline
+        .apply("Records To Add", Create.of(TestFixtures.asRows(elements)))
+        .setRowSchema(SchemaHelper.convert(TestFixtures.SCHEMA))
+        .apply("Append To Table", IcebergIO.writeToDynamicDestinations(catalog, destination));
+
+    LOG.info("Executing pipeline");
+    testPipeline.run().waitUntilFinish();
+    LOG.info("Done running pipeline");
+
+    for (int i = 0; i < numDestinations; ++i) {
+      TableIdentifier tableId = tableIdentifiers.get(i);
+      Table table = tables.get(i);
+      List<Record> writtenRecords = ImmutableList.copyOf(IcebergGenerics.read(table).build());
+      assertThat(
+          writtenRecords, Matchers.containsInAnyOrder(elementsPerTable.get(tableId).toArray()));
+    }
+  }
+
+  /**
+   * A test of our assumptions about how two commits of the same file work in iceberg's Java API.
+   */
+  @Test
+  public void testIdempotentCommit() throws Exception {
+    TableIdentifier tableId =
+        TableIdentifier.of("default", "table" + Long.toString(UUID.randomUUID().hashCode(), 16));
+
+    // Create a table and add records to it.
+    Table table = warehouse.createTable(tableId, TestFixtures.SCHEMA);
+    Record record =
+        rowToRecord(
+            table.schema(),
+            Row.withSchema(SchemaHelper.convert(TestFixtures.SCHEMA))
+                .addValues(42L, "bizzle")
+                .build());
+
+    OutputFile outputFile = table.io().newOutputFile(TEMPORARY_FOLDER.newFile().toString());
+    DataWriter<Record> icebergDataWriter =
+        Parquet.writeData(outputFile)
+            .createWriterFunc(GenericParquetWriter::buildWriter)
+            .schema(table.schema())
+            .withSpec(table.spec())
+            .overwrite()
+            .build();
+
+    icebergDataWriter.write(record);
+    icebergDataWriter.close();
+    DataFile dataFile = icebergDataWriter.toDataFile();
+
+    AppendFiles update = table.newAppend();
+    update.appendFile(dataFile);
+    update.commit();
+
+    AppendFiles secondUpdate = table.newAppend();
+    secondUpdate.appendFile(dataFile);
+    secondUpdate.commit();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/RowHelperTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/RowHelperTest.java
new file mode 100644
index 000000000000..931937f407dd
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/RowHelperTest.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.io.iceberg;
+
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+@RunWith(JUnit4.class)
+public class RowHelperTest implements Serializable {
+
+  /**
+   * Checks a value that when converted to Iceberg type is the same value when interpreted in Java.
+   */
+  private void checkTypeConversion(Schema.FieldType sourceType, Type destType, Object value) {
+    checkTypeConversion(sourceType, value, destType, value);
+  }
+
+  private void checkTypeConversion(
+      Schema.FieldType sourceType, Object sourceValue, Type destType, Object destValue) {
+    Schema beamSchema = Schema.of(Schema.Field.of("v", sourceType));
+    Row row = Row.withSchema(beamSchema).addValues(sourceValue).build();
+
+    org.apache.iceberg.Schema icebergSchema =
+        new org.apache.iceberg.Schema(required(0, "v", destType));
+    Record record = RowHelper.rowToRecord(icebergSchema, row);
+
+    assertThat(record.getField("v"), equalTo(destValue));
+  }
+
+  @Test
+  public void testBoolean() throws Exception {
+    checkTypeConversion(Schema.FieldType.BOOLEAN, Types.BooleanType.get(), true);
+    checkTypeConversion(Schema.FieldType.BOOLEAN, Types.BooleanType.get(), false);
+  }
+
+  @Test
+  public void testInteger() throws Exception {
+    checkTypeConversion(Schema.FieldType.INT32, Types.IntegerType.get(), -13);
+    checkTypeConversion(Schema.FieldType.INT32, Types.IntegerType.get(), 42);
+    checkTypeConversion(Schema.FieldType.INT32, Types.IntegerType.get(), 0);
+  }
+
+  @Test
+  public void testLong() throws Exception {
+    checkTypeConversion(Schema.FieldType.INT64, Types.LongType.get(), 13L);
+    checkTypeConversion(Schema.FieldType.INT64, Types.LongType.get(), 42L);
+  }
+
+  @Test
+  public void testFloat() throws Exception {
+    checkTypeConversion(Schema.FieldType.FLOAT, Types.FloatType.get(), 3.14159f);
+    checkTypeConversion(Schema.FieldType.FLOAT, Types.FloatType.get(), 42.0f);
+  }
+
+  @Test
+  public void testDouble() throws Exception {
+    checkTypeConversion(Schema.FieldType.DOUBLE, Types.DoubleType.get(), 3.14159);
+  }
+
+  @Test
+  public void testDate() throws Exception {}
+
+  @Test
+  public void testTime() throws Exception {}
+
+  @Test
+  public void testTimestamp() throws Exception {
+    DateTime dateTime =
+        new DateTime().withDate(1979, 03, 14).withTime(1, 2, 3, 4).withZone(DateTimeZone.UTC);
+
+    checkTypeConversion(
+        Schema.FieldType.DATETIME,
+        dateTime.toInstant(),
+        Types.TimestampType.withoutZone(),
+        dateTime.getMillis());
+  }
+
+  @Test
+  public void testFixed() throws Exception {}
+
+  @Test
+  public void testBinary() throws Exception {
+    byte[] bytes = new byte[] {1, 2, 3, 4};
+    checkTypeConversion(
+        Schema.FieldType.BYTES, bytes, Types.BinaryType.get(), ByteBuffer.wrap(bytes));
+  }
+
+  @Test
+  public void testDecimal() throws Exception {}
+
+  @Test
+  public void testStruct() throws Exception {}
+
+  @Test
+  public void testMap() throws Exception {}
+
+  @Test
+  public void testList() throws Exception {}
+}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java
deleted file mode 100644
index 8a0eaadab99b..000000000000
--- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/SinkTests.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.io.iceberg;
-
-import com.google.common.collect.ImmutableList;
-import org.apache.beam.io.iceberg.util.RowHelper;
-import org.apache.beam.sdk.testing.TestPipeline;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.values.Row;
-import org.apache.iceberg.CatalogUtil;
-import org.apache.iceberg.Table;
-import org.junit.ClassRule;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@RunWith(JUnit4.class)
-public class SinkTests {
-  private static Logger LOG = LoggerFactory.getLogger(SinkTests.class);
-  @ClassRule public static final TemporaryFolder temporaryFolder = new TemporaryFolder();
-
-  @Rule public TestDataWarehouse warehouse = new TestDataWarehouse(temporaryFolder, "default");
-
-  @Rule public TestPipeline testPipeline = TestPipeline.create();
-
-  @Test
-  public void testSimpleAppend() throws Exception {
-    // Create a table and add records to it.
-    Table table = warehouse.createTable(TestFixtures.SCHEMA);
-
-    Iceberg.Catalog catalog =
-        Iceberg.Catalog.builder()
-            .name("hadoop")
-            .icebergCatalogType(CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP)
-            .warehouseLocation(warehouse.location)
-            .build();
-
-    String[] tablePath = table.name().replace("hadoop.", "").split("\\.");
-    DynamicDestinations<Row, String> destination =
-        DynamicDestinations.constant(
-            catalog.table().tablePath(ImmutableList.copyOf(tablePath)).build());
-    LOG.info("Table created. Making pipeline");
-    testPipeline
-        .apply("Records To Add", Create.of(TestFixtures.asRows(TestFixtures.FILE1SNAPSHOT1)))
-        .apply(
-            "Append To Table",
-            new Iceberg.Write(catalog, destination, RowHelper.recordsFromRows()));
-    LOG.info("Executing pipeline");
-    testPipeline.run().waitUntilFinish();
-    LOG.info("Done running pipeline");
-  }
-}
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java
index f9112e5a61a3..0fc704cfc087 100644
--- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestDataWarehouse.java
@@ -22,7 +22,6 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
-import java.util.UUID;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -61,6 +60,7 @@ public class TestDataWarehouse extends ExternalResource {
 
   protected String location;
   protected Catalog catalog;
+  protected boolean someTableHasBeenCreated = false;
 
   public TestDataWarehouse(TemporaryFolder temporaryFolder, String database) {
     this.temporaryFolder = temporaryFolder;
@@ -83,7 +83,12 @@ protected void before() throws Throwable {
 
   @Override
   protected void after() {
+    if (!someTableHasBeenCreated) {
+      return;
+    }
+
     List<TableIdentifier> tables = catalog.listTables(Namespace.of(database));
+
     LOG.info("Cleaning up {} tables in test warehouse", tables.size());
     for (TableIdentifier t : tables) {
       try {
@@ -134,9 +139,8 @@ public DataFile writeRecords(String filename, Schema schema, List<Record> record
         .build();
   }
 
-  public Table createTable(Schema schema) {
-    TableIdentifier table =
-        TableIdentifier.of(database, "table" + Long.toString(UUID.randomUUID().hashCode(), 16));
-    return catalog.createTable(table, schema);
+  public Table createTable(TableIdentifier tableId, Schema schema) {
+    someTableHasBeenCreated = true;
+    return catalog.createTable(tableId, schema);
   }
 }
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java
index 05af551f7fc1..a39754fb7149 100644
--- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/io/iceberg/TestFixtures.java
@@ -20,11 +20,10 @@
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.apache.iceberg.types.Types.NestedField.required;
 
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
 import java.util.ArrayList;
-import org.apache.beam.io.iceberg.util.SchemaHelper;
 import org.apache.beam.sdk.values.Row;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.data.Record;
@@ -45,9 +44,9 @@ public class TestFixtures {
           genericRecord.copy(ImmutableMap.of("id", 2L, "data", "falafel")));
   public static final ImmutableList<Record> FILE1SNAPSHOT2 =
       ImmutableList.of(
-          genericRecord.copy(ImmutableMap.of("id", 4L, "data", "obscure")),
-          genericRecord.copy(ImmutableMap.of("id", 5L, "data", "secure")),
-          genericRecord.copy(ImmutableMap.of("id", 6L, "data", "feta")));
+          genericRecord.copy(ImmutableMap.of("id", 3L, "data", "obscure")),
+          genericRecord.copy(ImmutableMap.of("id", 4L, "data", "secure")),
+          genericRecord.copy(ImmutableMap.of("id", 5L, "data", "feta")));
   public static final ImmutableList<Record> FILE1SNAPSHOT3 =
       ImmutableList.of(
           genericRecord.copy(ImmutableMap.of("id", 6L, "data", "brainy")),
diff --git a/settings.gradle.kts b/settings.gradle.kts
index 632b283ec072..cca547c9e04e 100644
--- a/settings.gradle.kts
+++ b/settings.gradle.kts
@@ -357,5 +357,3 @@ include("sdks:java:managed")
 findProject(":sdks:java:managed")?.name = "managed"
 include("sdks:java:io:iceberg")
 findProject(":sdks:java:io:iceberg")?.name = "iceberg"
-include("sdks:java:io:catalog")
-findProject(":sdks:java:io:catalog")?.name = "catalog"

From a7a6515cf3ffea1c49dbb2f22ac5828dc22ea226 Mon Sep 17 00:00:00 2001
From: Kenneth Knowles <klk@google.com>
Date: Mon, 8 Apr 2024 13:48:15 -0400
Subject: [PATCH 11/18] Add IcebergIO GitHub Action workflow

---
 .github/workflows/IO_Iceberg.yml | 118 +++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 .github/workflows/IO_Iceberg.yml

diff --git a/.github/workflows/IO_Iceberg.yml b/.github/workflows/IO_Iceberg.yml
new file mode 100644
index 000000000000..abc75836322c
--- /dev/null
+++ b/.github/workflows/IO_Iceberg.yml
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: IcebergIO Unit Tests
+
+on:
+  push:
+    tags: ['v*']
+    branches: ['master', 'release-*']
+    paths:
+      - "sdks/java/io/iceberg/**"
+      - ".github/workflows/IO_Iceberg.yml"
+  pull_request_target:
+    branches: ['master', 'release-*']
+    paths:
+      - "sdks/java/io/iceberg/**"
+      - 'release/trigger_all_tests.json'
+      - '.github/trigger_files/IO_Iceberg.json'
+  issue_comment:
+    types: [created]
+  schedule:
+    - cron: '15 1/6 * * *'
+  workflow_dispatch:
+
+#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
+permissions:
+  actions: write
+  pull-requests: write
+  checks: write
+  contents: read
+  deployments: read
+  id-token: none
+  issues: write
+  discussions: read
+  packages: read
+  pages: read
+  repository-projects: read
+  security-events: read
+  statuses: read
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+  group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}'
+  cancel-in-progress: true
+
+env:
+  GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
+  GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+  GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+
+jobs:
+  IO_Iceberg:
+    name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+    strategy:
+      matrix:
+        job_name: ["IO_Iceberg"]
+    timeout-minutes: 60
+    if: |
+      github.event_name == 'push' ||
+      github.event_name == 'pull_request_target' ||
+      (github.event_name == 'schedule' && github.repository == 'apache/beam') ||
+      github.event_name == 'workflow_dispatch' ||
+      github.event.comment.body == 'Run Java_Amqp_IO_Direct PreCommit'
+    runs-on: [self-hosted, ubuntu-20.04, main]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup repository
+        uses: ./.github/actions/setup-action
+        with:
+          comment_phrase: ${{ matrix.job_phrase }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
+      - name: Setup environment
+        uses: ./.github/actions/setup-environment-action
+      - name: run Amqp IO build script
+        uses: ./.github/actions/gradle-command-self-hosted-action
+        with:
+          gradle-command: :sdks:java:io:iceberg:build
+          arguments: |
+            -PdisableSpotlessCheck=true \
+            -PdisableCheckStyle=true \
+      - name: Archive JUnit Test Results
+        uses: actions/upload-artifact@v4
+        if: ${{ !success() }}
+        with:
+          name: JUnit Test Results
+          path: "**/build/reports/tests/"
+      - name: Publish JUnit Test Results
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        if: always()
+        with:
+          commit: '${{ env.prsha || env.GITHUB_SHA }}'
+          comment_mode: ${{ github.event_name == 'issue_comment'  && 'always' || 'off' }}
+          files: '**/build/test-results/**/*.xml'
+      - name: Archive SpotBugs Results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: SpotBugs Results
+          path: '**/build/reports/spotbugs/*.html'
+      - name: Publish SpotBugs Results
+        uses: jwgmeligmeyling/spotbugs-github-action@v1.2
+        if: always()
+        with:
+          name: Publish SpotBugs
+          path: '**/build/reports/spotbugs/*.html'

From e3ce3bca115bc049ef23057e50ed39faa6673958 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 9 Apr 2024 12:34:56 -0400
Subject: [PATCH 12/18] Update Python Dependencies (#30816)

Co-authored-by: damccorm <actions@main-runner-zt478-vk55t.local>
---
 .../py310/base_image_requirements.txt         | 25 +++++++++++--------
 .../py311/base_image_requirements.txt         | 25 +++++++++++--------
 .../py38/base_image_requirements.txt          | 25 +++++++++++--------
 .../py39/base_image_requirements.txt          | 25 +++++++++++--------
 4 files changed, 56 insertions(+), 44 deletions(-)

diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt
index 06fd375a8315..849f2b21813e 100644
--- a/sdks/python/container/py310/base_image_requirements.txt
+++ b/sdks/python/container/py310/base_image_requirements.txt
@@ -21,11 +21,12 @@
 # https://s.apache.org/beam-python-dev-wiki
 # Reach out to a committer if you need help.
 
+annotated-types==0.6.0
 async-timeout==4.0.3
 attrs==23.2.0
 beautifulsoup4==4.12.3
 bs4==0.0.2
-build==1.1.1
+build==1.2.1
 cachetools==5.3.3
 certifi==2024.2.2
 cffi==1.16.0
@@ -50,11 +51,11 @@ fasteners==0.19
 freezegun==1.4.0
 future==1.0.0
 google-api-core==2.18.0
-google-api-python-client==2.123.0
+google-api-python-client==2.124.0
 google-apitools==0.5.31
 google-auth==2.29.0
-google-auth-httplib2==0.1.1
-google-cloud-aiplatform==1.44.0
+google-auth-httplib2==0.2.0
+google-cloud-aiplatform==1.45.0
 google-cloud-bigquery==3.19.0
 google-cloud-bigquery-storage==2.24.0
 google-cloud-bigtable==2.23.0
@@ -63,7 +64,7 @@ google-cloud-datastore==2.19.0
 google-cloud-dlp==3.16.0
 google-cloud-language==2.13.3
 google-cloud-profiler==4.1.0
-google-cloud-pubsub==2.20.3
+google-cloud-pubsub==2.21.0
 google-cloud-pubsublite==1.9.0
 google-cloud-recommendations-ai==0.10.10
 google-cloud-resource-manager==1.12.3
@@ -97,7 +98,7 @@ nose==1.3.7
 numpy==1.26.4
 oauth2client==4.1.3
 objsize==0.7.0
-orjson==3.9.15
+orjson==3.10.0
 overrides==7.7.0
 packaging==24.0
 pandas==2.0.3
@@ -108,13 +109,15 @@ protobuf==4.25.3
 psycopg2-binary==2.9.9
 pyarrow==14.0.2
 pyarrow-hotfix==0.6
-pyasn1==0.5.1
-pyasn1-modules==0.3.0
-pycparser==2.21
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
+pydantic==2.6.4
+pydantic_core==2.16.3
 pydot==1.4.2
 PyHamcrest==2.1.0
 pyjsparser==2.7.1
-pymongo==4.6.2
+pymongo==4.6.3
 PyMySQL==1.1.0
 pyparsing==3.1.2
 pyproject_hooks==1.0.0
@@ -129,7 +132,7 @@ redis==5.0.3
 referencing==0.34.0
 regex==2023.12.25
 requests==2.31.0
-requests-mock==1.11.0
+requests-mock==1.12.1
 rpds-py==0.18.0
 rsa==4.9
 scikit-learn==1.4.1.post1
diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt
index 71b3c48a35ba..f2f361177861 100644
--- a/sdks/python/container/py311/base_image_requirements.txt
+++ b/sdks/python/container/py311/base_image_requirements.txt
@@ -21,10 +21,11 @@
 # https://s.apache.org/beam-python-dev-wiki
 # Reach out to a committer if you need help.
 
+annotated-types==0.6.0
 attrs==23.2.0
 beautifulsoup4==4.12.3
 bs4==0.0.2
-build==1.1.1
+build==1.2.1
 cachetools==5.3.3
 certifi==2024.2.2
 cffi==1.16.0
@@ -47,11 +48,11 @@ fasteners==0.19
 freezegun==1.4.0
 future==1.0.0
 google-api-core==2.18.0
-google-api-python-client==2.123.0
+google-api-python-client==2.124.0
 google-apitools==0.5.31
 google-auth==2.29.0
-google-auth-httplib2==0.1.1
-google-cloud-aiplatform==1.44.0
+google-auth-httplib2==0.2.0
+google-cloud-aiplatform==1.45.0
 google-cloud-bigquery==3.19.0
 google-cloud-bigquery-storage==2.24.0
 google-cloud-bigtable==2.23.0
@@ -60,7 +61,7 @@ google-cloud-datastore==2.19.0
 google-cloud-dlp==3.16.0
 google-cloud-language==2.13.3
 google-cloud-profiler==4.1.0
-google-cloud-pubsub==2.20.3
+google-cloud-pubsub==2.21.0
 google-cloud-pubsublite==1.9.0
 google-cloud-recommendations-ai==0.10.10
 google-cloud-resource-manager==1.12.3
@@ -94,7 +95,7 @@ nose==1.3.7
 numpy==1.26.4
 oauth2client==4.1.3
 objsize==0.7.0
-orjson==3.9.15
+orjson==3.10.0
 overrides==7.7.0
 packaging==24.0
 pandas==2.0.3
@@ -105,13 +106,15 @@ protobuf==4.25.3
 psycopg2-binary==2.9.9
 pyarrow==14.0.2
 pyarrow-hotfix==0.6
-pyasn1==0.5.1
-pyasn1-modules==0.3.0
-pycparser==2.21
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
+pydantic==2.6.4
+pydantic_core==2.16.3
 pydot==1.4.2
 PyHamcrest==2.1.0
 pyjsparser==2.7.1
-pymongo==4.6.2
+pymongo==4.6.3
 PyMySQL==1.1.0
 pyparsing==3.1.2
 pyproject_hooks==1.0.0
@@ -125,7 +128,7 @@ redis==5.0.3
 referencing==0.34.0
 regex==2023.12.25
 requests==2.31.0
-requests-mock==1.11.0
+requests-mock==1.12.1
 rpds-py==0.18.0
 rsa==4.9
 scikit-learn==1.4.1.post1
diff --git a/sdks/python/container/py38/base_image_requirements.txt b/sdks/python/container/py38/base_image_requirements.txt
index 887aa8a2531b..b1930ae1c363 100644
--- a/sdks/python/container/py38/base_image_requirements.txt
+++ b/sdks/python/container/py38/base_image_requirements.txt
@@ -21,12 +21,13 @@
 # https://s.apache.org/beam-python-dev-wiki
 # Reach out to a committer if you need help.
 
+annotated-types==0.6.0
 async-timeout==4.0.3
 attrs==23.2.0
 backports.zoneinfo==0.2.1
 beautifulsoup4==4.12.3
 bs4==0.0.2
-build==1.1.1
+build==1.2.1
 cachetools==5.3.3
 certifi==2024.2.2
 cffi==1.16.0
@@ -51,11 +52,11 @@ fasteners==0.19
 freezegun==1.4.0
 future==1.0.0
 google-api-core==2.18.0
-google-api-python-client==2.123.0
+google-api-python-client==2.124.0
 google-apitools==0.5.31
 google-auth==2.29.0
-google-auth-httplib2==0.1.1
-google-cloud-aiplatform==1.44.0
+google-auth-httplib2==0.2.0
+google-cloud-aiplatform==1.45.0
 google-cloud-bigquery==3.19.0
 google-cloud-bigquery-storage==2.24.0
 google-cloud-bigtable==2.23.0
@@ -64,7 +65,7 @@ google-cloud-datastore==2.19.0
 google-cloud-dlp==3.16.0
 google-cloud-language==2.13.3
 google-cloud-profiler==4.1.0
-google-cloud-pubsub==2.20.3
+google-cloud-pubsub==2.21.0
 google-cloud-pubsublite==1.9.0
 google-cloud-recommendations-ai==0.10.10
 google-cloud-resource-manager==1.12.3
@@ -100,7 +101,7 @@ nose==1.3.7
 numpy==1.24.4
 oauth2client==4.1.3
 objsize==0.7.0
-orjson==3.9.15
+orjson==3.10.0
 overrides==7.7.0
 packaging==24.0
 pandas==2.0.3
@@ -112,13 +113,15 @@ protobuf==4.25.3
 psycopg2-binary==2.9.9
 pyarrow==14.0.2
 pyarrow-hotfix==0.6
-pyasn1==0.5.1
-pyasn1-modules==0.3.0
-pycparser==2.21
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
+pydantic==2.6.4
+pydantic_core==2.16.3
 pydot==1.4.2
 PyHamcrest==2.1.0
 pyjsparser==2.7.1
-pymongo==4.6.2
+pymongo==4.6.3
 PyMySQL==1.1.0
 pyparsing==3.1.2
 pyproject_hooks==1.0.0
@@ -133,7 +136,7 @@ redis==5.0.3
 referencing==0.34.0
 regex==2023.12.25
 requests==2.31.0
-requests-mock==1.11.0
+requests-mock==1.12.1
 rpds-py==0.18.0
 rsa==4.9
 scikit-learn==1.3.2
diff --git a/sdks/python/container/py39/base_image_requirements.txt b/sdks/python/container/py39/base_image_requirements.txt
index 9d8009c0048f..8aaafe0ab271 100644
--- a/sdks/python/container/py39/base_image_requirements.txt
+++ b/sdks/python/container/py39/base_image_requirements.txt
@@ -21,11 +21,12 @@
 # https://s.apache.org/beam-python-dev-wiki
 # Reach out to a committer if you need help.
 
+annotated-types==0.6.0
 async-timeout==4.0.3
 attrs==23.2.0
 beautifulsoup4==4.12.3
 bs4==0.0.2
-build==1.1.1
+build==1.2.1
 cachetools==5.3.3
 certifi==2024.2.2
 cffi==1.16.0
@@ -50,11 +51,11 @@ fasteners==0.19
 freezegun==1.4.0
 future==1.0.0
 google-api-core==2.18.0
-google-api-python-client==2.123.0
+google-api-python-client==2.124.0
 google-apitools==0.5.31
 google-auth==2.29.0
-google-auth-httplib2==0.1.1
-google-cloud-aiplatform==1.44.0
+google-auth-httplib2==0.2.0
+google-cloud-aiplatform==1.45.0
 google-cloud-bigquery==3.19.0
 google-cloud-bigquery-storage==2.24.0
 google-cloud-bigtable==2.23.0
@@ -63,7 +64,7 @@ google-cloud-datastore==2.19.0
 google-cloud-dlp==3.16.0
 google-cloud-language==2.13.3
 google-cloud-profiler==4.1.0
-google-cloud-pubsub==2.20.3
+google-cloud-pubsub==2.21.0
 google-cloud-pubsublite==1.9.0
 google-cloud-recommendations-ai==0.10.10
 google-cloud-resource-manager==1.12.3
@@ -98,7 +99,7 @@ nose==1.3.7
 numpy==1.26.4
 oauth2client==4.1.3
 objsize==0.7.0
-orjson==3.9.15
+orjson==3.10.0
 overrides==7.7.0
 packaging==24.0
 pandas==2.0.3
@@ -109,13 +110,15 @@ protobuf==4.25.3
 psycopg2-binary==2.9.9
 pyarrow==14.0.2
 pyarrow-hotfix==0.6
-pyasn1==0.5.1
-pyasn1-modules==0.3.0
-pycparser==2.21
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
+pydantic==2.6.4
+pydantic_core==2.16.3
 pydot==1.4.2
 PyHamcrest==2.1.0
 pyjsparser==2.7.1
-pymongo==4.6.2
+pymongo==4.6.3
 PyMySQL==1.1.0
 pyparsing==3.1.2
 pyproject_hooks==1.0.0
@@ -130,7 +133,7 @@ redis==5.0.3
 referencing==0.34.0
 regex==2023.12.25
 requests==2.31.0
-requests-mock==1.11.0
+requests-mock==1.12.1
 rpds-py==0.18.0
 rsa==4.9
 scikit-learn==1.4.1.post1

From 54c2a7277b4fc135fe9bfbbe2ed86cf30da9140c Mon Sep 17 00:00:00 2001
From: Julien Tournay <boudhevil@gmail.com>
Date: Tue, 9 Apr 2024 18:36:21 +0200
Subject: [PATCH 13/18] [Flink] Speed up file write in batch mode by using
 larger bundle size (#30802)

---
 .../runners/flink/FlinkPipelineOptions.java   | 42 +++++++++++++++++--
 .../flink/FlinkTransformOverrides.java        | 23 +++++-----
 .../flink/FlinkPipelineOptionsTest.java       | 12 +++++-
 3 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
index f0514c69891b..909789bbb129 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
@@ -19,6 +19,7 @@
 
 import org.apache.beam.sdk.options.ApplicationNameOptions;
 import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.DefaultValueFactory;
 import org.apache.beam.sdk.options.Description;
 import org.apache.beam.sdk.options.FileStagingOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
@@ -228,18 +229,51 @@ public interface FlinkPipelineOptions
 
   void setRetainExternalizedCheckpointsOnCancellation(Boolean retainOnCancellation);
 
-  @Description("The maximum number of elements in a bundle.")
-  @Default.Long(1000)
+  @Description(
+      "The maximum number of elements in a bundle. Default values are 1000 for a streaming job and 1,000,000 for batch")
+  @Default.InstanceFactory(MaxBundleSizeFactory.class)
   Long getMaxBundleSize();
 
   void setMaxBundleSize(Long size);
 
-  @Description("The maximum time to wait before finalising a bundle (in milliseconds).")
-  @Default.Long(1000)
+  /**
+   * Maximum bundle size factory. For a streaming job it's desireable to keep bundle size small to
+   * optimize latency. In batch, we optimize for throughput and hence bundle size is kept large.
+   */
+  class MaxBundleSizeFactory implements DefaultValueFactory<Long> {
+    @Override
+    public Long create(PipelineOptions options) {
+      if (options.as(StreamingOptions.class).isStreaming()) {
+        return 1000L;
+      } else {
+        return 1000000L;
+      }
+    }
+  }
+
+  @Description(
+      "The maximum time to wait before finalising a bundle (in milliseconds). Default values are 1000 for streaming and 10,000 for batch.")
+  @Default.InstanceFactory(MaxBundleTimeFactory.class)
   Long getMaxBundleTimeMills();
 
   void setMaxBundleTimeMills(Long time);
 
+  /**
+   * Maximum bundle time factory. For a streaming job it's desireable to keep the value small to
+   * optimize latency. In batch, we optimize for throughput and hence bundle time size is kept
+   * larger.
+   */
+  class MaxBundleTimeFactory implements DefaultValueFactory<Long> {
+    @Override
+    public Long create(PipelineOptions options) {
+      if (options.as(StreamingOptions.class).isStreaming()) {
+        return 1000L;
+      } else {
+        return 10000L;
+      }
+    }
+  }
+
   @Description(
       "Interval in milliseconds for sending latency tracking marks from the sources to the sinks. "
           + "Interval value <= 0 disables the feature.")
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkTransformOverrides.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkTransformOverrides.java
index 46458eccb83c..15ccc39c12c4 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkTransformOverrides.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkTransformOverrides.java
@@ -36,18 +36,19 @@
 class FlinkTransformOverrides {
   static List<PTransformOverride> getDefaultOverrides(FlinkPipelineOptions options) {
     ImmutableList.Builder<PTransformOverride> builder = ImmutableList.builder();
+    if (options.isStreaming()) {
+      builder.add(
+          PTransformOverride.of(
+              FlinkStreamingPipelineTranslator.StreamingShardedWriteFactory
+                  .writeFilesNeedsOverrides(),
+              new FlinkStreamingPipelineTranslator.StreamingShardedWriteFactory(
+                  checkNotNull(options))));
+    }
     if (options.isStreaming() || options.getUseDataStreamForBatch()) {
-      builder
-          .add(
-              PTransformOverride.of(
-                  FlinkStreamingPipelineTranslator.StreamingShardedWriteFactory
-                      .writeFilesNeedsOverrides(),
-                  new FlinkStreamingPipelineTranslator.StreamingShardedWriteFactory(
-                      checkNotNull(options))))
-          .add(
-              PTransformOverride.of(
-                  PTransformMatchers.urnEqualTo(PTransformTranslation.CREATE_VIEW_TRANSFORM_URN),
-                  CreateStreamingFlinkView.Factory.INSTANCE));
+      builder.add(
+          PTransformOverride.of(
+              PTransformMatchers.urnEqualTo(PTransformTranslation.CREATE_VIEW_TRANSFORM_URN),
+              CreateStreamingFlinkView.Factory.INSTANCE));
     }
     builder
         .add(
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
index da8c560690a6..c20bd077c3f2 100644
--- a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkPipelineOptionsTest.java
@@ -91,14 +91,22 @@ public void testDefaults() {
     assertThat(options.getStateBackendFactory(), is(nullValue()));
     assertThat(options.getStateBackend(), is(nullValue()));
     assertThat(options.getStateBackendStoragePath(), is(nullValue()));
-    assertThat(options.getMaxBundleSize(), is(1000L));
-    assertThat(options.getMaxBundleTimeMills(), is(1000L));
     assertThat(options.getExecutionModeForBatch(), is(ExecutionMode.PIPELINED.name()));
     assertThat(options.getUseDataStreamForBatch(), is(false));
     assertThat(options.getSavepointPath(), is(nullValue()));
     assertThat(options.getAllowNonRestoredState(), is(false));
     assertThat(options.getDisableMetrics(), is(false));
     assertThat(options.getFasterCopy(), is(false));
+
+    assertThat(options.isStreaming(), is(false));
+    assertThat(options.getMaxBundleSize(), is(1000000L));
+    assertThat(options.getMaxBundleTimeMills(), is(10000L));
+
+    // In streaming mode bundle size and bundle time are shorter
+    FlinkPipelineOptions optionsStreaming = FlinkPipelineOptions.defaults();
+    optionsStreaming.setStreaming(true);
+    assertThat(optionsStreaming.getMaxBundleSize(), is(1000L));
+    assertThat(optionsStreaming.getMaxBundleTimeMills(), is(1000L));
   }
 
   @Test(expected = Exception.class)

From 76710e8e7345a37191b5c7498ec1eb91195748a6 Mon Sep 17 00:00:00 2001
From: JayajP <jayajp@google.com>
Date: Tue, 9 Apr 2024 10:34:17 -0700
Subject: [PATCH 14/18] Implement LockFreeHistogram and use it for
 PerWorkerHistograms (#30769)

* Implement LockFreeHistogram and use it for PerWorkerHistograms

* Address comments

* Override update(double... values)

* Address comments on multithread test

* Address comments in Multithreaded test

* Variable renaming in test
---
 .../dataflow/worker/LockFreeHistogram.java    | 223 ++++++++++++++++++
 ...icsToPerStepNamespaceMetricsConverter.java |  38 +--
 .../worker/StreamingStepMetricsContainer.java |  15 +-
 .../worker/LockFreeHistogramTest.java         | 185 +++++++++++++++
 ...oPerStepNamespaceMetricsConverterTest.java |  60 +++--
 .../StreamingStepMetricsContainerTest.java    |  19 +-
 6 files changed, 488 insertions(+), 52 deletions(-)
 create mode 100644 runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogram.java
 create mode 100644 runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogramTest.java

diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogram.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogram.java
new file mode 100644
index 000000000000..bc42e1283240
--- /dev/null
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogram.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker;
+
+import com.google.auto.value.AutoValue;
+import com.google.auto.value.extension.memoized.Memoized;
+import java.io.Serializable;
+import java.util.Optional;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLongArray;
+import java.util.concurrent.atomic.AtomicReference;
+import javax.annotation.concurrent.ThreadSafe;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.metrics.Histogram;
+import org.apache.beam.sdk.metrics.MetricName;
+import org.apache.beam.sdk.util.HistogramData;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.primitives.ImmutableLongArray;
+
+/**
+ * A lock free implementation of {@link org.apache.beam.sdk.metrics.Histogram}. This class supports
+ * extracting delta updates with the {@link #getSnapshotAndReset} method.
+ */
+@ThreadSafe
+@Internal
+public final class LockFreeHistogram implements Histogram {
+  private final HistogramData.BucketType bucketType;
+  private final AtomicLongArray buckets;
+  private final MetricName name;
+  private final AtomicReference<OutlierStatistic> underflowStatistic;
+  private final AtomicReference<OutlierStatistic> overflowStatistic;
+
+  /**
+   * Whether this histogram has updates that have not been extracted by {@code getSnapshotAndReset}.
+   * This values should be flipped to true AFTER recording a value, and flipped to false BEFORE
+   * extracting a snapshot. This ensures that recorded values will always be seen by a future {@code
+   * getSnapshotAndReset} call.
+   */
+  private final AtomicBoolean dirty;
+
+  /** Create a histogram. */
+  public LockFreeHistogram(KV<MetricName, HistogramData.BucketType> kv) {
+    this.name = kv.getKey();
+    this.bucketType = kv.getValue();
+    this.buckets = new AtomicLongArray(bucketType.getNumBuckets());
+    this.underflowStatistic =
+        new AtomicReference<LockFreeHistogram.OutlierStatistic>(OutlierStatistic.EMPTY);
+    this.overflowStatistic =
+        new AtomicReference<LockFreeHistogram.OutlierStatistic>(OutlierStatistic.EMPTY);
+    this.dirty = new AtomicBoolean(false);
+  }
+
+  /**
+   * Represents the sum and mean of a collection of numbers. Used to represent the
+   * underflow/overflow statistics of a histogram.
+   */
+  @AutoValue
+  public abstract static class OutlierStatistic implements Serializable {
+    abstract double sum();
+
+    public abstract long count();
+
+    public static final OutlierStatistic EMPTY = create(0, 0);
+
+    public static OutlierStatistic create(double sum, long count) {
+      return new AutoValue_LockFreeHistogram_OutlierStatistic(sum, count);
+    }
+
+    public OutlierStatistic combine(double value) {
+      return create(sum() + value, count() + 1);
+    }
+
+    public double mean() {
+      if (count() == 0) {
+        return 0;
+      }
+      return sum() / count();
+    }
+  }
+
+  /**
+   * The snapshot of a histogram. The snapshot contains the overflow/underflow statistic, number of
+   * values recorded in each bucket, and the BucketType of the underlying histogram.
+   */
+  @AutoValue
+  public abstract static class Snapshot {
+    public abstract OutlierStatistic underflowStatistic();
+
+    public abstract OutlierStatistic overflowStatistic();
+
+    public abstract ImmutableLongArray buckets();
+
+    public abstract HistogramData.BucketType bucketType();
+
+    public static Snapshot create(
+        OutlierStatistic underflowStatistic,
+        OutlierStatistic overflowStatistic,
+        ImmutableLongArray buckets,
+        HistogramData.BucketType bucketType) {
+      return new AutoValue_LockFreeHistogram_Snapshot(
+          underflowStatistic, overflowStatistic, buckets, bucketType);
+    }
+
+    @Memoized
+    public long totalCount() {
+      long count = 0;
+      count += underflowStatistic().count();
+      count += overflowStatistic().count();
+      count += buckets().stream().sum();
+
+      return count;
+    }
+  }
+
+  /**
+   * Extract a delta update of this histogram. Update represents values that have been recorded in
+   * this histogram since the last time this method was called.
+   *
+   * <p>If this histogram is being updated concurrent to this method, then the returned snapshot is
+   * not guarenteed to contain those updates. However, those updates are not dropped and will be
+   * represented in a future call to this method.
+   *
+   * <p>If this histogram has not been updated since the last call to this method, an empty optional
+   * is returned.
+   */
+  public Optional<Snapshot> getSnapshotAndReset() {
+    if (!dirty.getAndSet(false)) {
+      return Optional.empty();
+    }
+
+    ImmutableLongArray.Builder bucketsSnapshotBuilder =
+        ImmutableLongArray.builder(buckets.length());
+    for (int i = 0; i < buckets.length(); i++) {
+      bucketsSnapshotBuilder.add(buckets.getAndSet(i, 0));
+    }
+    OutlierStatistic overflowSnapshot = overflowStatistic.getAndSet(OutlierStatistic.EMPTY);
+    OutlierStatistic underflowSnapshot = underflowStatistic.getAndSet(OutlierStatistic.EMPTY);
+
+    return Optional.of(
+        Snapshot.create(
+            underflowSnapshot, overflowSnapshot, bucketsSnapshotBuilder.build(), bucketType));
+  }
+
+  @Override
+  public MetricName getName() {
+    return name;
+  }
+
+  private void updateInternal(double value) {
+    double rangeTo = bucketType.getRangeTo();
+    double rangeFrom = bucketType.getRangeFrom();
+    if (value >= rangeTo) {
+      recordTopRecordsValue(value);
+    } else if (value < rangeFrom) {
+      recordBottomRecordsValue(value);
+    } else {
+      recordInBoundsValue(value);
+    }
+  }
+
+  @Override
+  public void update(double value) {
+    updateInternal(value);
+    dirty.set(true);
+  }
+
+  @Override
+  public void update(double... values) {
+    for (double value : values) {
+      updateInternal(value);
+    }
+    dirty.set(true);
+  }
+
+  /** Record a inbounds value to the appropriate bucket. */
+  private void recordInBoundsValue(double value) {
+    int index = bucketType.getBucketIndex(value);
+    if (index < 0 || index >= bucketType.getNumBuckets()) {
+      return;
+    }
+
+    buckets.getAndIncrement(index);
+  }
+
+  /**
+   * Record a new value in {@code overflowStatistic}. This method should only be called when a
+   * Histogram is recording a value greater than the upper bound of it's largest bucket.
+   *
+   * @param value
+   */
+  private void recordTopRecordsValue(double value) {
+    OutlierStatistic original;
+    do {
+      original = overflowStatistic.get();
+    } while (!overflowStatistic.compareAndSet(original, original.combine(value)));
+  }
+
+  /**
+   * Record a new value in {@code underflowStatistic}. This method should only be called when a
+   * Histogram is recording a value smaller than the lowerbound bound of it's smallest bucket.
+   */
+  private void recordBottomRecordsValue(double value) {
+    OutlierStatistic original;
+    do {
+      original = underflowStatistic.get();
+    } while (!underflowStatistic.compareAndSet(original, original.combine(value)));
+  }
+}
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java
index 8f9cbd350a25..1f54ee95dad7 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverter.java
@@ -70,9 +70,9 @@ private static Optional<MetricValue> convertCounterToMetricValue(
    * @param outputHistogram
    */
   private static void addOutlierStatsToHistogram(
-      HistogramData inputHistogram, DataflowHistogramValue outputHistogram) {
-    long overflowCount = inputHistogram.getTopBucketCount();
-    long underflowCount = inputHistogram.getBottomBucketCount();
+      LockFreeHistogram.Snapshot inputHistogram, DataflowHistogramValue outputHistogram) {
+    long overflowCount = inputHistogram.overflowStatistic().count();
+    long underflowCount = inputHistogram.underflowStatistic().count();
     if (underflowCount == 0 && overflowCount == 0) {
       return;
     }
@@ -81,12 +81,12 @@ private static void addOutlierStatsToHistogram(
     if (underflowCount > 0) {
       outlierStats
           .setUnderflowCount(underflowCount)
-          .setUnderflowMean(inputHistogram.getBottomBucketMean());
+          .setUnderflowMean(inputHistogram.underflowStatistic().mean());
     }
     if (overflowCount > 0) {
       outlierStats
           .setOverflowCount(overflowCount)
-          .setOverflowMean(inputHistogram.getTopBucketMean());
+          .setOverflowMean(inputHistogram.overflowStatistic().mean());
     }
     outputHistogram.setOutlierStats(outlierStats);
   }
@@ -99,8 +99,8 @@ private static void addOutlierStatsToHistogram(
    *     Otherwise returns an empty optional.
    */
   private static Optional<MetricValue> convertHistogramToMetricValue(
-      MetricName metricName, HistogramData inputHistogram) {
-    if (inputHistogram.getTotalCount() == 0L) {
+      MetricName metricName, LockFreeHistogram.Snapshot inputHistogram) {
+    if (inputHistogram.totalCount() == 0L) {
       return Optional.empty();
     }
 
@@ -111,20 +111,20 @@ private static Optional<MetricValue> convertHistogramToMetricValue(
     }
 
     DataflowHistogramValue outputHistogram = new DataflowHistogramValue();
-    int numberOfBuckets = inputHistogram.getBucketType().getNumBuckets();
+    int numberOfBuckets = inputHistogram.bucketType().getNumBuckets();
 
-    if (inputHistogram.getBucketType() instanceof HistogramData.LinearBuckets) {
+    if (inputHistogram.bucketType() instanceof HistogramData.LinearBuckets) {
       HistogramData.LinearBuckets buckets =
-          (HistogramData.LinearBuckets) inputHistogram.getBucketType();
+          (HistogramData.LinearBuckets) inputHistogram.bucketType();
       Linear linearOptions =
           new Linear()
               .setNumberOfBuckets(numberOfBuckets)
               .setWidth(buckets.getWidth())
               .setStart(buckets.getStart());
       outputHistogram.setBucketOptions(new BucketOptions().setLinear(linearOptions));
-    } else if (inputHistogram.getBucketType() instanceof HistogramData.ExponentialBuckets) {
+    } else if (inputHistogram.bucketType() instanceof HistogramData.ExponentialBuckets) {
       HistogramData.ExponentialBuckets buckets =
-          (HistogramData.ExponentialBuckets) inputHistogram.getBucketType();
+          (HistogramData.ExponentialBuckets) inputHistogram.bucketType();
       Base2Exponent expoenntialOptions =
           new Base2Exponent().setNumberOfBuckets(numberOfBuckets).setScale(buckets.getScale());
       outputHistogram.setBucketOptions(new BucketOptions().setExponential(expoenntialOptions));
@@ -132,12 +132,10 @@ private static Optional<MetricValue> convertHistogramToMetricValue(
       return Optional.empty();
     }
 
-    outputHistogram.setCount(inputHistogram.getTotalCount());
-    List<Long> bucketCounts = new ArrayList<>(inputHistogram.getBucketType().getNumBuckets());
+    outputHistogram.setCount(inputHistogram.totalCount());
+    List<Long> bucketCounts = new ArrayList<>(inputHistogram.buckets().length());
 
-    for (int i = 0; i < inputHistogram.getBucketType().getNumBuckets(); i++) {
-      bucketCounts.add(inputHistogram.getCount(i));
-    }
+    inputHistogram.buckets().forEach(val -> bucketCounts.add(val));
 
     // Remove trailing 0 buckets.
     for (int i = bucketCounts.size() - 1; i >= 0; i--) {
@@ -167,7 +165,9 @@ private static Optional<MetricValue> convertHistogramToMetricValue(
    *     stage, metrics namespace} pair.
    */
   public static Collection<PerStepNamespaceMetrics> convert(
-      String stepName, Map<MetricName, Long> counters, Map<MetricName, HistogramData> histograms) {
+      String stepName,
+      Map<MetricName, Long> counters,
+      Map<MetricName, LockFreeHistogram.Snapshot> histograms) {
 
     Map<String, PerStepNamespaceMetrics> metricsByNamespace = new HashMap<>();
     for (Entry<MetricName, Long> entry : counters.entrySet()) {
@@ -192,7 +192,7 @@ public static Collection<PerStepNamespaceMetrics> convert(
       stepNamespaceMetrics.getMetricValues().add(metricValue.get());
     }
 
-    for (Entry<MetricName, HistogramData> entry : histograms.entrySet()) {
+    for (Entry<MetricName, LockFreeHistogram.Snapshot> entry : histograms.entrySet()) {
       MetricName metricName = entry.getKey();
       Optional<MetricValue> metricValue =
           convertHistogramToMetricValue(metricName, entry.getValue());
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java
index 54a3ef49776b..71e6380ab108 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainer.java
@@ -32,7 +32,6 @@
 import javax.annotation.Nonnull;
 import org.apache.beam.runners.core.metrics.DistributionData;
 import org.apache.beam.runners.core.metrics.GaugeCell;
-import org.apache.beam.runners.core.metrics.HistogramCell;
 import org.apache.beam.runners.core.metrics.MetricsMap;
 import org.apache.beam.sdk.metrics.Counter;
 import org.apache.beam.sdk.metrics.Distribution;
@@ -71,8 +70,8 @@ public class StreamingStepMetricsContainer implements MetricsContainer {
   private MetricsMap<MetricName, DeltaDistributionCell> distributions =
       new MetricsMap<>(DeltaDistributionCell::new);
 
-  private MetricsMap<KV<MetricName, HistogramData.BucketType>, HistogramCell> perWorkerHistograms =
-      new MetricsMap<>(HistogramCell::new);
+  private MetricsMap<KV<MetricName, HistogramData.BucketType>, LockFreeHistogram>
+      perWorkerHistograms = new MetricsMap<>(LockFreeHistogram::new);
 
   private final Map<MetricName, Instant> perWorkerCountersByFirstStaleTime;
 
@@ -267,8 +266,8 @@ private void deleteStaleCounters(
   @VisibleForTesting
   Iterable<PerStepNamespaceMetrics> extractPerWorkerMetricUpdates() {
     ConcurrentHashMap<MetricName, Long> counters = new ConcurrentHashMap<MetricName, Long>();
-    ConcurrentHashMap<MetricName, HistogramData> histograms =
-        new ConcurrentHashMap<MetricName, HistogramData>();
+    ConcurrentHashMap<MetricName, LockFreeHistogram.Snapshot> histograms =
+        new ConcurrentHashMap<MetricName, LockFreeHistogram.Snapshot>();
     HashSet<MetricName> currentZeroValuedCounters = new HashSet<MetricName>();
 
     // Extract metrics updates.
@@ -283,11 +282,7 @@ Iterable<PerStepNamespaceMetrics> extractPerWorkerMetricUpdates() {
         });
     perWorkerHistograms.forEach(
         (k, v) -> {
-          HistogramData val = v.getCumulative().getAndReset();
-          if (val.getTotalCount() == 0) {
-            return;
-          }
-          histograms.put(k.getKey(), val);
+          v.getSnapshotAndReset().ifPresent(snapshot -> histograms.put(k.getKey(), snapshot));
         });
 
     deleteStaleCounters(currentZeroValuedCounters, Instant.now(clock));
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogramTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogramTest.java
new file mode 100644
index 000000000000..dfb63a36f836
--- /dev/null
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/LockFreeHistogramTest.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import org.apache.beam.sdk.metrics.MetricName;
+import org.apache.beam.sdk.util.HistogramData;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.primitives.ImmutableLongArray;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link LockFreeHistogram}. */
+@RunWith(JUnit4.class)
+public class LockFreeHistogramTest {
+
+  @Test
+  public void testUpdate_OverflowValues() {
+    HistogramData.BucketType bucketType = HistogramData.LinearBuckets.of(0, 10, 3);
+    LockFreeHistogram histogram =
+        new LockFreeHistogram(KV.of(MetricName.named("name", "namespace"), bucketType));
+    histogram.update(35, 40, 45);
+    Optional<LockFreeHistogram.Snapshot> snapshot = histogram.getSnapshotAndReset();
+
+    LockFreeHistogram.OutlierStatistic expectedOverflow =
+        LockFreeHistogram.OutlierStatistic.create(120.0, 3L);
+    LockFreeHistogram.OutlierStatistic expectedUnderflow = LockFreeHistogram.OutlierStatistic.EMPTY;
+    ImmutableLongArray expectedBuckets = ImmutableLongArray.of(0L, 0L, 0L);
+    LockFreeHistogram.Snapshot expectedSnapshot =
+        LockFreeHistogram.Snapshot.create(
+            expectedUnderflow, expectedOverflow, expectedBuckets, bucketType);
+
+    assertThat(snapshot.isPresent(), equalTo(true));
+    assertThat(snapshot.get(), equalTo(expectedSnapshot));
+    assertThat(snapshot.get().underflowStatistic().mean(), equalTo(0.0));
+    assertThat(snapshot.get().overflowStatistic(), equalTo(expectedOverflow));
+  }
+
+  @Test
+  public void testUpdate_UnderflowValues() {
+    HistogramData.BucketType bucketType = HistogramData.LinearBuckets.of(100, 10, 3);
+    LockFreeHistogram histogram =
+        new LockFreeHistogram(KV.of(MetricName.named("name", "namespace"), bucketType));
+    histogram.update(35, 40, 45);
+    Optional<LockFreeHistogram.Snapshot> snapshot = histogram.getSnapshotAndReset();
+
+    LockFreeHistogram.OutlierStatistic expectedUnderflow =
+        LockFreeHistogram.OutlierStatistic.create(120.0, 3L);
+    LockFreeHistogram.OutlierStatistic expectedOverflow = LockFreeHistogram.OutlierStatistic.EMPTY;
+    ImmutableLongArray expectedBuckets = ImmutableLongArray.of(0L, 0L, 0L);
+    LockFreeHistogram.Snapshot expectedSnapshot =
+        LockFreeHistogram.Snapshot.create(
+            expectedUnderflow, expectedOverflow, expectedBuckets, bucketType);
+
+    assertThat(snapshot.isPresent(), equalTo(true));
+    assertThat(snapshot.get(), equalTo(expectedSnapshot));
+    assertThat(snapshot.get().underflowStatistic(), equalTo(expectedUnderflow));
+  }
+
+  @Test
+  public void testUpdate_InBoundsValues() {
+    HistogramData.BucketType bucketType = HistogramData.LinearBuckets.of(0, 10, 3);
+    LockFreeHistogram histogram =
+        new LockFreeHistogram(KV.of(MetricName.named("name", "namespace"), bucketType));
+    histogram.update(5, 15, 25);
+    Optional<LockFreeHistogram.Snapshot> snapshot = histogram.getSnapshotAndReset();
+
+    LockFreeHistogram.OutlierStatistic expectedOverflow = LockFreeHistogram.OutlierStatistic.EMPTY;
+    LockFreeHistogram.OutlierStatistic expectedUnderflow = LockFreeHistogram.OutlierStatistic.EMPTY;
+    ImmutableLongArray expectedBuckets = ImmutableLongArray.of(1L, 1L, 1L);
+    LockFreeHistogram.Snapshot expectedSnapshot =
+        LockFreeHistogram.Snapshot.create(
+            expectedUnderflow, expectedOverflow, expectedBuckets, bucketType);
+
+    assertThat(snapshot.isPresent(), equalTo(true));
+    assertThat(snapshot.get(), equalTo(expectedSnapshot));
+  }
+
+  @Test
+  public void testUpdate_EmptySnapshot() {
+    HistogramData.BucketType bucketType = HistogramData.LinearBuckets.of(0, 10, 3);
+    LockFreeHistogram histogram =
+        new LockFreeHistogram(KV.of(MetricName.named("name", "namespace"), bucketType));
+    histogram.update(5, 15, 25);
+    Optional<LockFreeHistogram.Snapshot> snapshot_1 = histogram.getSnapshotAndReset();
+
+    assertThat(snapshot_1.isPresent(), equalTo(true));
+
+    Optional<LockFreeHistogram.Snapshot> snapshot_2 = histogram.getSnapshotAndReset();
+    assertThat(snapshot_2.isPresent(), equalTo(false));
+  }
+
+  /** A runnable records 200 values and then calls getSnapshotAndReset. */
+  private static class UpdateHistogramCallable implements Callable<Long> {
+    private final LockFreeHistogram histogram;
+    private final int val;
+    private Optional<LockFreeHistogram.Snapshot> snapshot;
+
+    private static final long valuesRecorded = 200L;
+
+    public UpdateHistogramCallable(LockFreeHistogram histogram, int val) {
+      this.histogram = histogram;
+      this.val = val;
+      this.snapshot = Optional.empty();
+    }
+
+    @Override
+    public Long call() {
+      for (long j = 0; j < valuesRecorded; j++) {
+        histogram.update(val);
+      }
+      snapshot = histogram.getSnapshotAndReset();
+
+      if (snapshot.isPresent()) {
+        return snapshot.get().totalCount();
+      } else {
+        return 0L;
+      }
+    }
+
+    public static long numValuesRecorded() {
+      return valuesRecorded;
+    }
+  }
+
+  @Test
+  public void testUpdateAndSnapshots_MultipleThreads() {
+    int numRunnables = 200;
+    ExecutorService executor = Executors.newFixedThreadPool(numRunnables);
+
+    HistogramData.BucketType bucketType = HistogramData.ExponentialBuckets.of(1, 10);
+    LockFreeHistogram histogram =
+        new LockFreeHistogram(KV.of(MetricName.named("name", "namespace"), bucketType));
+
+    List<UpdateHistogramCallable> callables = new ArrayList<>();
+
+    for (int i = 0; i < numRunnables; i++) {
+      callables.add(new UpdateHistogramCallable(histogram, i));
+    }
+
+    long totalValuesRecorded = 0;
+
+    try {
+      List<Future<Long>> futures = executor.invokeAll(callables);
+      for (Future<Long> future : futures) {
+        totalValuesRecorded += future.get();
+      }
+    } catch (Exception e) {
+      return;
+    }
+
+    Optional<LockFreeHistogram.Snapshot> finalSnapshot = histogram.getSnapshotAndReset();
+    if (finalSnapshot.isPresent()) {
+      totalValuesRecorded += finalSnapshot.get().totalCount();
+    }
+
+    assertThat(
+        totalValuesRecorded, equalTo(numRunnables * UpdateHistogramCallable.numValuesRecorded()));
+  }
+}
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java
index 4e5108399f62..b0a3d57487b0 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/MetricsToPerStepNamespaceMetricsConverterTest.java
@@ -34,13 +34,19 @@
 import java.util.Map;
 import org.apache.beam.sdk.metrics.MetricName;
 import org.apache.beam.sdk.util.HistogramData;
+import org.apache.beam.sdk.values.KV;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.primitives.ImmutableLongArray;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
 @RunWith(JUnit4.class)
 public class MetricsToPerStepNamespaceMetricsConverterTest {
+  private static final HistogramData.BucketType lienarBuckets =
+      HistogramData.LinearBuckets.of(0, 10, 10);
+  private static final HistogramData.BucketType exponentialBuckets =
+      HistogramData.ExponentialBuckets.of(0, 5);
 
   public static class TestBucketType implements HistogramData.BucketType {
     @Override
@@ -77,7 +83,7 @@ public double getAccumulatedBucketSize(int endIndex) {
   @Test
   public void testConvert_successfulyConvertCounters() {
     String step = "testStepName";
-    Map<MetricName, HistogramData> emptyHistograms = new HashMap<>();
+    Map<MetricName, LockFreeHistogram.Snapshot> emptyHistograms = new HashMap<>();
     Map<MetricName, Long> counters = new HashMap<MetricName, Long>();
     MetricName bigQueryMetric1 = MetricName.named("BigQuerySink", "metric1");
     MetricName bigQueryMetric2 =
@@ -115,11 +121,12 @@ public void testConvert_skipInvalidMetricNames() {
     MetricName invalidName1 = MetricName.named("BigQuerySink", "**");
     counters.put(invalidName1, 5L);
 
-    Map<MetricName, HistogramData> histograms = new HashMap<>();
+    Map<MetricName, LockFreeHistogram.Snapshot> histograms = new HashMap<>();
     MetricName invalidName2 = MetricName.named("BigQuerySink", "****");
-    HistogramData nonEmptyLinearHistogram = HistogramData.linear(0, 10, 10);
-    nonEmptyLinearHistogram.record(-5.0);
-    histograms.put(invalidName2, nonEmptyLinearHistogram);
+    LockFreeHistogram nonEmptyLinearHistogram =
+        new LockFreeHistogram(KV.of(invalidName2, lienarBuckets));
+    nonEmptyLinearHistogram.update(-5.0);
+    histograms.put(invalidName2, nonEmptyLinearHistogram.getSnapshotAndReset().get());
 
     Collection<PerStepNamespaceMetrics> conversionResult =
         MetricsToPerStepNamespaceMetricsConverter.convert("testStep", counters, histograms);
@@ -128,22 +135,29 @@ public void testConvert_skipInvalidMetricNames() {
 
   @Test
   public void testConvert_successfulConvertHistograms() {
-    Map<MetricName, HistogramData> histograms = new HashMap<MetricName, HistogramData>();
+    Map<MetricName, LockFreeHistogram.Snapshot> histograms = new HashMap<>();
     MetricName bigQueryMetric1 = MetricName.named("BigQuerySink", "baseLabel");
     MetricName bigQueryMetric2 =
         MetricName.named("BigQuerySink", "baseLabel*label1:val1;label2:val2;");
     MetricName bigQueryMetric3 = MetricName.named("BigQuerySink", "zeroValue");
 
-    HistogramData nonEmptyLinearHistogram = HistogramData.linear(0, 10, 10);
-    nonEmptyLinearHistogram.record(-5.0, 15.0, 25.0, 35.0, 105.0);
-    histograms.put(bigQueryMetric1, nonEmptyLinearHistogram);
+    LockFreeHistogram nonEmptyLinearHistogram =
+        new LockFreeHistogram(KV.of(bigQueryMetric1, lienarBuckets));
+    nonEmptyLinearHistogram.update(-5.0, 15.0, 25.0, 35.0, 105.0);
+    histograms.put(bigQueryMetric1, nonEmptyLinearHistogram.getSnapshotAndReset().get());
 
-    HistogramData noEmptyExponentialHistogram = HistogramData.exponential(0, 5);
-    noEmptyExponentialHistogram.record(-5.0, 15.0, 25.0, 35.0, 105.0);
-    histograms.put(bigQueryMetric2, noEmptyExponentialHistogram);
+    LockFreeHistogram noEmptyExponentialHistogram =
+        new LockFreeHistogram(KV.of(bigQueryMetric2, exponentialBuckets));
+    noEmptyExponentialHistogram.update(-5.0, 15.0, 25.0, 35.0, 105.0);
+    histograms.put(bigQueryMetric2, noEmptyExponentialHistogram.getSnapshotAndReset().get());
 
-    HistogramData emptyHistogram = HistogramData.linear(0, 10, 10);
-    histograms.put(bigQueryMetric3, emptyHistogram);
+    LockFreeHistogram.Snapshot emptySnapshot =
+        LockFreeHistogram.Snapshot.create(
+            LockFreeHistogram.OutlierStatistic.EMPTY,
+            LockFreeHistogram.OutlierStatistic.EMPTY,
+            ImmutableLongArray.of(),
+            lienarBuckets);
+    histograms.put(bigQueryMetric3, emptySnapshot);
 
     String step = "testStep";
     Map<MetricName, Long> emptyCounters = new HashMap<>();
@@ -217,12 +231,13 @@ public void testConvert_successfulConvertHistograms() {
   public void testConvert_skipUnknownHistogramBucketType() {
     String step = "testStep";
     Map<MetricName, Long> emptyCounters = new HashMap<>();
-    Map<MetricName, HistogramData> histograms = new HashMap<MetricName, HistogramData>();
+    Map<MetricName, LockFreeHistogram.Snapshot> histograms = new HashMap<>();
 
-    HistogramData histogram = new HistogramData(new TestBucketType());
-    histogram.record(1.0, 2.0);
     MetricName bigQueryMetric1 = MetricName.named("BigQuerySink", "baseLabel");
-    histograms.put(bigQueryMetric1, histogram);
+    LockFreeHistogram histogram =
+        new LockFreeHistogram(KV.of(bigQueryMetric1, new TestBucketType()));
+    histogram.update(1.0, 2.0);
+    histograms.put(bigQueryMetric1, histogram.getSnapshotAndReset().get());
 
     Collection<PerStepNamespaceMetrics> conversionResult =
         MetricsToPerStepNamespaceMetricsConverter.convert(step, emptyCounters, histograms);
@@ -233,15 +248,16 @@ public void testConvert_skipUnknownHistogramBucketType() {
   public void testConvert_convertCountersAndHistograms() {
     String step = "testStep";
     Map<MetricName, Long> counters = new HashMap<>();
-    Map<MetricName, HistogramData> histograms = new HashMap<MetricName, HistogramData>();
+    Map<MetricName, LockFreeHistogram.Snapshot> histograms = new HashMap<>();
 
     MetricName counterMetricName = MetricName.named("BigQuerySink", "counter*label1:val1;");
     counters.put(counterMetricName, 3L);
 
     MetricName histogramMetricName = MetricName.named("BigQuerySink", "histogram*label2:val2;");
-    HistogramData linearHistogram = HistogramData.linear(0, 10, 10);
-    linearHistogram.record(5.0);
-    histograms.put(histogramMetricName, linearHistogram);
+    LockFreeHistogram linearHistogram =
+        new LockFreeHistogram(KV.of(histogramMetricName, lienarBuckets));
+    linearHistogram.update(5.0);
+    histograms.put(histogramMetricName, linearHistogram.getSnapshotAndReset().get());
 
     Collection<PerStepNamespaceMetrics> conversionResult =
         MetricsToPerStepNamespaceMetricsConverter.convert(step, counters, histograms);
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java
index 6aecafbb10de..267a49bb771d 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingStepMetricsContainerTest.java
@@ -56,6 +56,7 @@
 import org.apache.beam.sdk.metrics.NoOpHistogram;
 import org.apache.beam.sdk.util.HistogramData;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
+import org.hamcrest.collection.IsEmptyIterable;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.Timeout;
@@ -223,7 +224,7 @@ public void testPerWorkerMetrics() {
   }
 
   @Test
-  public void testExtractPerWorkerMetricUpdates() {
+  public void testExtractPerWorkerMetricUpdates_populatedMetrics() {
     StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true);
     MetricName counterMetricName = MetricName.named("BigQuerySink", "counter");
     c1.getPerWorkerCounter(counterMetricName).inc(3);
@@ -272,6 +273,22 @@ public void testExtractPerWorkerMetricUpdates() {
     assertThat(updates, containsInAnyOrder(histograms, counters));
   }
 
+  @Test
+  public void testExtractPerWorkerMetricUpdates_emptyMetrics() {
+    StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true);
+    StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true);
+    MetricName counterMetricName = MetricName.named("BigQuerySink", "counter");
+    c1.getPerWorkerCounter(counterMetricName);
+
+    MetricName histogramMetricName = MetricName.named("BigQuerySink", "histogram");
+    HistogramData.LinearBuckets linearBuckets = HistogramData.LinearBuckets.of(0, 10, 10);
+    c2.getPerWorkerHistogram(histogramMetricName, linearBuckets);
+
+    Iterable<PerStepNamespaceMetrics> updates =
+        StreamingStepMetricsContainer.extractPerWorkerMetricUpdates(registry);
+    assertThat(updates, IsEmptyIterable.emptyIterable());
+  }
+
   public class TestClock extends Clock {
     private Instant currentTime;
 

From fcc37a0cc2cd4a4babe0f296174a98f6f72bd99f Mon Sep 17 00:00:00 2001
From: Jeff Kinard <jeff@thekinards.com>
Date: Tue, 9 Apr 2024 15:32:12 -0400
Subject: [PATCH 15/18] [yaml] remove Combine from yaml_experimental_features
 (#30897)

Signed-off-by: Jeffrey Kinard <jeff@thekinards.com>
---
 .../get-started/try-apache-beam-yaml.ipynb    |  6 ++----
 .../apache_beam/yaml/examples/README.md       |  9 --------
 .../aggregation/combine_count_minimal.yaml    |  3 ---
 .../aggregation/combine_max_minimal.yaml      |  3 ---
 .../aggregation/combine_mean_minimal.yaml     |  3 ---
 .../aggregation/combine_min_minimal.yaml      |  3 ---
 .../combine_multiple_aggregations.yaml        |  3 ---
 .../transforms/aggregation/combine_sum.yaml   |  3 ---
 .../aggregation/combine_sum_minimal.yaml      |  3 ---
 .../aggregation/group_into_batches.yaml       |  3 ---
 .../aggregation/top_largest_per_key.yaml      |  3 ---
 .../aggregation/top_smallest_per_key.yaml     |  3 ---
 .../yaml/examples/wordcount_minimal.yaml      |  3 ---
 .../yaml/programming_guide_test.py            | 15 +++++--------
 sdks/python/apache_beam/yaml/readme_test.py   |  5 +----
 sdks/python/apache_beam/yaml/yaml_combine.py  |  3 ---
 .../apache_beam/yaml/yaml_combine_test.py     | 21 +++++++------------
 .../apache_beam/yaml/yaml_transform_test.py   |  3 +--
 .../en/documentation/sdks/yaml-combine.md     |  2 --
 19 files changed, 16 insertions(+), 81 deletions(-)

diff --git a/examples/notebooks/get-started/try-apache-beam-yaml.ipynb b/examples/notebooks/get-started/try-apache-beam-yaml.ipynb
index b18b318e85ea..2c2653815050 100644
--- a/examples/notebooks/get-started/try-apache-beam-yaml.ipynb
+++ b/examples/notebooks/get-started/try-apache-beam-yaml.ipynb
@@ -533,7 +533,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Beam YAML has EXPERIMENTAL ability to do aggregations to group and combine values across records. The is accomplished via the `Combine` transform type. Currently `Combine` needs to be in the `yaml_experimental_features` option (see the bottom of the pipeline) to use this transform.\n",
+    "Beam YAML has EXPERIMENTAL ability to do aggregations to group and combine values across records. The is accomplished via the `Combine` transform type.\n",
     "\n",
     "In this example we'll aggregate our records based on the `is_adult` classification. We'll calculate an average age for each of the groups."
    ]
@@ -566,9 +566,7 @@
     "          total:\n",
     "            value: age\n",
     "            fn: mean\n",
-    "    - type: LogForTesting\n",
-    "options:\n",
-    "  yaml_experimental_features: Combine"
+    "    - type: LogForTesting",
    ]
   },
   {
diff --git a/sdks/python/apache_beam/yaml/examples/README.md b/sdks/python/apache_beam/yaml/examples/README.md
index 0d76f41bef67..06e64d0c08e1 100644
--- a/sdks/python/apache_beam/yaml/examples/README.md
+++ b/sdks/python/apache_beam/yaml/examples/README.md
@@ -54,14 +54,5 @@ These examples leverage the built-in mapping transforms including `MapToFields`,
 These examples leverage the built-in `Combine` transform for performing simple
 aggregations including sum, mean, count, etc.
 
-These examples are experimental and require that
-`yaml_experimental_features: Combine` be specified under the `options` tag, or
-by passing `--yaml_experimental_features=Combine` to the command to run the
-pipeline. i.e.
-```
-python -m apache_beam.yaml.main \
-  --pipeline_spec_file=/path/to/example.yaml \
-  --yaml_experimental_features=Combine
-```
 More information can be found about aggregation transforms
 [here](https://beam.apache.org/documentation/sdks/yaml-combine/).
\ No newline at end of file
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_count_minimal.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_count_minimal.yaml
index b746e55e3448..17efcc3e6eee 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_count_minimal.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_count_minimal.yaml
@@ -52,9 +52,6 @@ pipeline:
           produce: count
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(season='spring', produce=4)
 #  Row(season='summer', produce=3)
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_max_minimal.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_max_minimal.yaml
index 2588897a7179..2faadf9ac64a 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_max_minimal.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_max_minimal.yaml
@@ -44,9 +44,6 @@ pipeline:
           amount: max
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(produce='🥕', amount=3)
 #  Row(produce='🍆', amount=1)
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_mean_minimal.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_mean_minimal.yaml
index b00b7e817abe..a51e48b25974 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_mean_minimal.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_mean_minimal.yaml
@@ -44,9 +44,6 @@ pipeline:
           amount: mean
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(produce='🥕', amount=2.5)
 #  Row(produce='🍆', amount=1.0)
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_min_minimal.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_min_minimal.yaml
index 1e05fd3f755f..5fcadd1b2c3b 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_min_minimal.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_min_minimal.yaml
@@ -44,9 +44,6 @@ pipeline:
           amount: min
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(produce='🥕', amount=2)
 #  Row(produce='🍆', amount=1)
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_multiple_aggregations.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_multiple_aggregations.yaml
index 1263ac21fac4..0597d403de64 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_multiple_aggregations.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_multiple_aggregations.yaml
@@ -76,8 +76,5 @@ pipeline:
           max_price: max_price
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(min_price=1.0, mean_price=2.5, max_price=4.0)
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum.yaml
index 77cd1529b580..2308cee96061 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum.yaml
@@ -57,9 +57,6 @@ pipeline:
             fn: mean
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(fruit='raspberry', total_quantity=1, mean_price=3.5)
 #  Row(fruit='blackberry', total_quantity=1, mean_price=4.0)
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum_minimal.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum_minimal.yaml
index 905a30d13f08..d2f1887b5819 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum_minimal.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/combine_sum_minimal.yaml
@@ -44,9 +44,6 @@ pipeline:
           amount: sum
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(produce='🥕', amount=5)
 #  Row(produce='🍆', amount=1)
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/group_into_batches.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/group_into_batches.yaml
index a956ea604c65..e107a6cabadf 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/group_into_batches.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/group_into_batches.yaml
@@ -57,9 +57,6 @@ pipeline:
                 n: 3
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(season='spring', produce=['🥕', '🍓', '🍆'])
 #  Row(season='summer', produce=['🥕', '🍅', '🌽'])
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_largest_per_key.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_largest_per_key.yaml
index c35a5da1bd4f..d283b1f40860 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_largest_per_key.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_largest_per_key.yaml
@@ -48,9 +48,6 @@ pipeline:
                 n: 2
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(produce='🥕', biggest=[3, 2])
 #  Row(produce='🍆', biggest=[1])
diff --git a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_smallest_per_key.yaml b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_smallest_per_key.yaml
index ebd7c0c34fe1..bbf927492df3 100644
--- a/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_smallest_per_key.yaml
+++ b/sdks/python/apache_beam/yaml/examples/transforms/aggregation/top_smallest_per_key.yaml
@@ -50,9 +50,6 @@ pipeline:
                 reverse: true
     - type: LogForTesting
 
-options:
-  yaml_experimental_features: Combine
-
 # Expected:
 #  Row(produce='🥕', smallest=[2, 3])
 #  Row(produce='🍆', smallest=[1])
diff --git a/sdks/python/apache_beam/yaml/examples/wordcount_minimal.yaml b/sdks/python/apache_beam/yaml/examples/wordcount_minimal.yaml
index 044db2790acc..c51be0922702 100644
--- a/sdks/python/apache_beam/yaml/examples/wordcount_minimal.yaml
+++ b/sdks/python/apache_beam/yaml/examples/wordcount_minimal.yaml
@@ -70,9 +70,6 @@ pipeline:
 
     # Log out results
     - type: LogForTesting
-        
-options:
-  yaml_experimental_features: Combine
   
 # Expected:
 #  Row(word='king', count=311)
diff --git a/sdks/python/apache_beam/yaml/programming_guide_test.py b/sdks/python/apache_beam/yaml/programming_guide_test.py
index fe5e242f7f5b..2d62213e2869 100644
--- a/sdks/python/apache_beam/yaml/programming_guide_test.py
+++ b/sdks/python/apache_beam/yaml/programming_guide_test.py
@@ -65,8 +65,7 @@ class ProgrammingGuideTest(unittest.TestCase):
 
   def test_group_by(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -87,8 +86,7 @@ def test_group_by(self):
 
   def test_co_group_by(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       result = p | YamlTransform(
           '''
           type: composite
@@ -168,8 +166,7 @@ def test_co_group_by(self):
 
   def test_combine_ref(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -196,8 +193,7 @@ def test_combine_ref(self):
 
   def test_combine_globally(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -217,8 +213,7 @@ def test_combine_globally(self):
 
   def test_combine_per_key(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
diff --git a/sdks/python/apache_beam/yaml/readme_test.py b/sdks/python/apache_beam/yaml/readme_test.py
index ea7a015dab5d..592ef03ce33d 100644
--- a/sdks/python/apache_beam/yaml/readme_test.py
+++ b/sdks/python/apache_beam/yaml/readme_test.py
@@ -226,10 +226,7 @@ def test(self):
         if write in test_yaml:
           spec = replace_recursive(spec, write, 'path', env.output_file())
       modified_yaml = yaml.dump(spec)
-      options = {
-          'pickle_library': 'cloudpickle',
-          'yaml_experimental_features': ['Combine']
-      }
+      options = {'pickle_library': 'cloudpickle'}
       if RENDER_DIR is not None:
         options['runner'] = 'apache_beam.runners.render.RenderRunner'
         options['render_output'] = [
diff --git a/sdks/python/apache_beam/yaml/yaml_combine.py b/sdks/python/apache_beam/yaml/yaml_combine.py
index bb9d0964f912..bd43bac1a65a 100644
--- a/sdks/python/apache_beam/yaml/yaml_combine.py
+++ b/sdks/python/apache_beam/yaml/yaml_combine.py
@@ -29,7 +29,6 @@
 from apache_beam.typehints.decorators import get_type_hints
 from apache_beam.typehints.schemas import named_fields_from_element_type
 from apache_beam.utils import python_callable
-from apache_beam.yaml import options
 from apache_beam.yaml import yaml_mapping
 from apache_beam.yaml import yaml_provider
 
@@ -106,7 +105,6 @@ def __init__(
     self._language = language
 
   def expand(self, pcoll):
-    options.YamlOptions.check_enabled(pcoll.pipeline, 'Combine')
     input_types = dict(named_fields_from_element_type(pcoll.element_type))
     all_fields = list(input_types.keys())
     unknown_keys = set(self._group_by) - set(all_fields)
@@ -178,7 +176,6 @@ def extract_return_type(expr):
 @beam.ptransform.ptransform_fn
 def _SqlCombineTransform(
     pcoll, sql_transform_constructor, group_by, combine, language=None):
-  options.YamlOptions.check_enabled(pcoll.pipeline, 'Combine')
   all_fields = [
       x for x, _ in named_fields_from_element_type(pcoll.element_type)
   ]
diff --git a/sdks/python/apache_beam/yaml/yaml_combine_test.py b/sdks/python/apache_beam/yaml/yaml_combine_test.py
index 615b697e77b0..caf3de10078b 100644
--- a/sdks/python/apache_beam/yaml/yaml_combine_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_combine_test.py
@@ -34,8 +34,7 @@
 class YamlCombineTest(unittest.TestCase):
   def test_multiple_aggregations(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -55,8 +54,7 @@ def test_multiple_aggregations(self):
 
   def test_multiple_keys(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -76,8 +74,7 @@ def test_multiple_keys(self):
 
   def test_no_keys(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -95,8 +92,7 @@ def test_no_keys(self):
 
   def test_multiple_combines(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -120,8 +116,7 @@ def test_multiple_combines(self):
 
   def test_group(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -146,8 +141,7 @@ def test_group(self):
 
   def test_expression(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
@@ -169,8 +163,7 @@ def test_expression(self):
 
   def test_config(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       elements = p | beam.Create(DATA)
       result = elements | YamlTransform(
           '''
diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py b/sdks/python/apache_beam/yaml/yaml_transform_test.py
index 9641df0896f5..fbdae6679e96 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py
@@ -592,8 +592,7 @@ def test_windowing_on_outer(self):
 
   def test_assign_timestamps(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
-        pickle_library='cloudpickle', yaml_experimental_features=['Combine'
-                                                                  ])) as p:
+        pickle_library='cloudpickle')) as p:
       result = p | YamlTransform(
           '''
           type: chain
diff --git a/website/www/site/content/en/documentation/sdks/yaml-combine.md b/website/www/site/content/en/documentation/sdks/yaml-combine.md
index b7780c689fab..62a036cc81a0 100644
--- a/website/www/site/content/en/documentation/sdks/yaml-combine.md
+++ b/website/www/site/content/en/documentation/sdks/yaml-combine.md
@@ -25,8 +25,6 @@ title: "Apache Beam YAML Aggregations"
 
 Beam YAML has EXPERIMENTAL ability to do aggregations to group and combine
 values across records. The is accomplished via the `Combine` transform type.
-Currently `Combine` needs to be in the `yaml_experimental_features`
-option to use this transform.
 
 For example, one can write
 

From 282ff87726d8731be7eebfd0f969c72bfcc3fd6b Mon Sep 17 00:00:00 2001
From: tvalentyn <tvalentyn@users.noreply.github.com>
Date: Tue, 9 Apr 2024 13:40:33 -0700
Subject: [PATCH 16/18] Add another dependency restriction to unblock tests.
 (#30907)

---
 sdks/python/tox.ini | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 153b6acde8e7..91d6b6d7300f 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -447,6 +447,8 @@ commands =
 [testenv:py{38,39,310,311}-TFHubEmbeddings-{014,015}]
 deps =
   014: tensorflow-hub>=0.14.0,<0.15.0
+  # Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852
+  014: pydantic<2.7
   015: tensorflow-hub>=0.15.0,<0.16.0
   # Help pip resolve conflict with typing-extensions due to an old version of tensorboard https://github.com/apache/beam/issues/30852
   015: pydantic<2.7

From 3f4b2561c58c9c0bf76f4144fb9f06aaaad424f5 Mon Sep 17 00:00:00 2001
From: xianhualiu <122747878+xianhualiu@users.noreply.github.com>
Date: Tue, 9 Apr 2024 19:44:06 -0400
Subject: [PATCH 17/18] [bug30870]: make consumer polling timeout configurable
 for KafkaIO.Read (#30877)

* [bug30870]: make consumer polling timeout configurable for KafkaIO.Read

* fixed spotless complains

* fixed unit tests

* added logs and increased default polling timeout from 1 to 2 seconds.

* spotless apply changes

* Update CHANGES.md

updated changes.md with changes to make consumer polling timeout configurable for KafkaIO.Read

* Update CHANGES.md

* Update CHANGES.md

added break changes

* Update CHANGES.md
---
 CHANGES.md                                    |  1 +
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 35 ++++++++++++++++++-
 ...afkaIOReadImplementationCompatibility.java |  1 +
 .../beam/sdk/io/kafka/ReadFromKafkaDoFn.java  | 16 +++++++--
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java | 12 +++++++
 .../sdk/io/kafka/ReadFromKafkaDoFnTest.java   | 14 ++++++++
 6 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 5824c71a98dc..941ba23a7573 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -73,6 +73,7 @@
 ## Breaking Changes
 
 * X behavior was changed ([#X](https://github.com/apache/beam/issues/X)).
+* Default consumer polling timeout for KafkaIO.Read was increased from 1 second to 2 seconds. Use KafkaIO.read().withConsumerPollingTimeout(Duration duration) to configure this timeout value when necessary ([#30870](https://github.com/apache/beam/issues/30870)).
 
 ## Deprecations
 
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
index 231a1b9e49e1..c56071e85adb 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -587,6 +587,7 @@ public static <K, V> Read<K, V> read() {
         .setCommitOffsetsInFinalizeEnabled(false)
         .setDynamicRead(false)
         .setTimestampPolicyFactory(TimestampPolicyFactory.withProcessingTime())
+        .setConsumerPollingTimeout(Duration.standardSeconds(2L))
         .build();
   }
 
@@ -706,6 +707,9 @@ public abstract static class Read<K, V>
     @Pure
     public abstract @Nullable ErrorHandler<BadRecord, ?> getBadRecordErrorHandler();
 
+    @Pure
+    public abstract @Nullable Duration getConsumerPollingTimeout();
+
     abstract Builder<K, V> toBuilder();
 
     @AutoValue.Builder
@@ -762,6 +766,8 @@ Builder<K, V> setCheckStopReadingFn(
         return setCheckStopReadingFn(CheckStopReadingFnWrapper.of(checkStopReadingFn));
       }
 
+      abstract Builder<K, V> setConsumerPollingTimeout(Duration consumerPollingTimeout);
+
       abstract Read<K, V> build();
 
       static <K, V> void setupExternalBuilder(
@@ -1334,6 +1340,17 @@ public Read<K, V> withBadRecordErrorHandler(ErrorHandler<BadRecord, ?> badRecord
       return toBuilder().setBadRecordErrorHandler(badRecordErrorHandler).build();
     }
 
+    /**
+     * Sets the timeout time for Kafka consumer polling request in the {@link ReadFromKafkaDoFn}.
+     * The default is 2 second.
+     */
+    public Read<K, V> withConsumerPollingTimeout(Duration duration) {
+      checkState(
+          duration == null || duration.compareTo(Duration.ZERO) > 0,
+          "Consumer polling timeout must be greater than 0.");
+      return toBuilder().setConsumerPollingTimeout(duration).build();
+    }
+
     /** Returns a {@link PTransform} for PCollection of {@link KV}, dropping Kafka metatdata. */
     public PTransform<PBegin, PCollection<KV<K, V>>> withoutMetadata() {
       return new TypedWithoutMetadata<>(this);
@@ -1596,7 +1613,8 @@ public PCollection<KafkaRecord<K, V>> expand(PBegin input) {
                 .withValueDeserializerProvider(kafkaRead.getValueDeserializerProvider())
                 .withManualWatermarkEstimator()
                 .withTimestampPolicyFactory(kafkaRead.getTimestampPolicyFactory())
-                .withCheckStopReadingFn(kafkaRead.getCheckStopReadingFn());
+                .withCheckStopReadingFn(kafkaRead.getCheckStopReadingFn())
+                .withConsumerPollingTimeout(kafkaRead.getConsumerPollingTimeout());
         if (kafkaRead.isCommitOffsetsInFinalizeEnabled()) {
           readTransform = readTransform.commitOffsets();
         }
@@ -2036,6 +2054,9 @@ public abstract static class ReadSourceDescriptors<K, V>
     @Pure
     abstract ErrorHandler<BadRecord, ?> getBadRecordErrorHandler();
 
+    @Pure
+    abstract @Nullable Duration getConsumerPollingTimeout();
+
     abstract boolean isBounded();
 
     abstract ReadSourceDescriptors.Builder<K, V> toBuilder();
@@ -2086,6 +2107,9 @@ abstract ReadSourceDescriptors.Builder<K, V> setBadRecordRouter(
       abstract ReadSourceDescriptors.Builder<K, V> setBadRecordErrorHandler(
           ErrorHandler<BadRecord, ?> badRecordErrorHandler);
 
+      abstract ReadSourceDescriptors.Builder<K, V> setConsumerPollingTimeout(
+          @Nullable Duration duration);
+
       abstract ReadSourceDescriptors.Builder<K, V> setBounded(boolean bounded);
 
       abstract ReadSourceDescriptors<K, V> build();
@@ -2099,6 +2123,7 @@ public static <K, V> ReadSourceDescriptors<K, V> read() {
           .setBounded(false)
           .setBadRecordRouter(BadRecordRouter.THROWING_ROUTER)
           .setBadRecordErrorHandler(new ErrorHandler.DefaultErrorHandler<>())
+          .setConsumerPollingTimeout(Duration.standardSeconds(2L))
           .build()
           .withProcessingTime()
           .withMonotonicallyIncreasingWatermarkEstimator();
@@ -2360,6 +2385,14 @@ public ReadSourceDescriptors<K, V> withBadRecordErrorHandler(
           .build();
     }
 
+    /**
+     * Sets the timeout time for Kafka consumer polling request in the {@link ReadFromKafkaDoFn}.
+     * The default is 2 second.
+     */
+    public ReadSourceDescriptors<K, V> withConsumerPollingTimeout(@Nullable Duration duration) {
+      return toBuilder().setConsumerPollingTimeout(duration).build();
+    }
+
     ReadAllFromRow<K, V> forExternalBuild() {
       return new ReadAllFromRow<>(this);
     }
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java
index a2cc9aaeb4d9..7e54407300d4 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java
@@ -112,6 +112,7 @@ Object getDefaultValue() {
     VALUE_DESERIALIZER_PROVIDER,
     CHECK_STOP_READING_FN(SDF),
     BAD_RECORD_ERROR_HANDLER(SDF),
+    CONSUMER_POLLING_TIMEOUT,
     ;
 
     @Nonnull private final ImmutableSet<KafkaIOReadImplementation> supportedImplementations;
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java
index 924833290f13..3a821ef9519e 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java
@@ -191,6 +191,12 @@ private ReadFromKafkaDoFn(
     this.checkStopReadingFn = transform.getCheckStopReadingFn();
     this.badRecordRouter = transform.getBadRecordRouter();
     this.recordTag = recordTag;
+    if (transform.getConsumerPollingTimeout() != null) {
+      this.consumerPollingTimeout =
+          java.time.Duration.ofMillis(transform.getConsumerPollingTimeout().getMillis());
+    } else {
+      this.consumerPollingTimeout = KAFKA_POLL_TIMEOUT;
+    }
   }
 
   private static final Logger LOG = LoggerFactory.getLogger(ReadFromKafkaDoFn.class);
@@ -217,8 +223,9 @@ private ReadFromKafkaDoFn(
 
   private transient @Nullable LoadingCache<TopicPartition, AverageRecordSize> avgRecordSize;
 
-  private static final java.time.Duration KAFKA_POLL_TIMEOUT = java.time.Duration.ofSeconds(1);
+  private static final java.time.Duration KAFKA_POLL_TIMEOUT = java.time.Duration.ofSeconds(2);
 
+  @VisibleForTesting final java.time.Duration consumerPollingTimeout;
   @VisibleForTesting final DeserializerProvider<K> keyDeserializerProvider;
   @VisibleForTesting final DeserializerProvider<V> valueDeserializerProvider;
   @VisibleForTesting final Map<String, Object> consumerConfig;
@@ -508,7 +515,7 @@ private ConsumerRecords<byte[], byte[]> poll(
     java.time.Duration elapsed = java.time.Duration.ZERO;
     while (true) {
       final ConsumerRecords<byte[], byte[]> rawRecords =
-          consumer.poll(KAFKA_POLL_TIMEOUT.minus(elapsed));
+          consumer.poll(consumerPollingTimeout.minus(elapsed));
       if (!rawRecords.isEmpty()) {
         // return as we have found some entries
         return rawRecords;
@@ -518,8 +525,11 @@ private ConsumerRecords<byte[], byte[]> poll(
         return rawRecords;
       }
       elapsed = sw.elapsed();
-      if (elapsed.toMillis() >= KAFKA_POLL_TIMEOUT.toMillis()) {
+      if (elapsed.toMillis() >= consumerPollingTimeout.toMillis()) {
         // timeout is over
+        LOG.warn(
+            "No messages retrieved with polling timeout {} seconds. Consider increasing the consumer polling timeout using withConsumerPollingTimeout method.",
+            consumerPollingTimeout.getSeconds());
         return rawRecords;
       }
     }
diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java
index 9b15b86051f5..44c028f08a27 100644
--- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java
+++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java
@@ -2121,6 +2121,18 @@ public void testSinkMetrics() throws Exception {
     }
   }
 
+  @Test(expected = IllegalStateException.class)
+  public void testWithInvalidConsumerPollingTimeout() {
+    KafkaIO.<Integer, Long>read().withConsumerPollingTimeout(Duration.standardSeconds(-5));
+  }
+
+  @Test
+  public void testWithValidConsumerPollingTimeout() {
+    KafkaIO.Read<Integer, Long> reader =
+        KafkaIO.<Integer, Long>read().withConsumerPollingTimeout(Duration.standardSeconds(15));
+    assertEquals(15, reader.getConsumerPollingTimeout().getStandardSeconds());
+  }
+
   private static void verifyProducerRecords(
       MockProducer<Integer, Long> mockProducer,
       String topic,
diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java
index 48b5b060a295..8902f22164bc 100644
--- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java
+++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java
@@ -641,6 +641,20 @@ public void testUnbounded() {
     Assert.assertNotEquals(0, visitor.unboundedPCollections.size());
   }
 
+  @Test
+  public void testConstructorWithPollTimeout() {
+    ReadSourceDescriptors<String, String> descriptors = makeReadSourceDescriptor(consumer);
+    // default poll timeout = 1 scond
+    ReadFromKafkaDoFn<String, String> dofnInstance = ReadFromKafkaDoFn.create(descriptors, RECORDS);
+    Assert.assertEquals(Duration.ofSeconds(2L), dofnInstance.consumerPollingTimeout);
+    // updated timeout = 5 seconds
+    descriptors =
+        descriptors.withConsumerPollingTimeout(org.joda.time.Duration.standardSeconds(5L));
+    ReadFromKafkaDoFn<String, String> dofnInstanceNew =
+        ReadFromKafkaDoFn.create(descriptors, RECORDS);
+    Assert.assertEquals(Duration.ofSeconds(5L), dofnInstanceNew.consumerPollingTimeout);
+  }
+
   private BoundednessVisitor testBoundedness(
       Function<ReadSourceDescriptors<String, String>, ReadSourceDescriptors<String, String>>
           readSourceDescriptorsDecorator) {

From 065887451827f5e33d63644fe222dc933fba1147 Mon Sep 17 00:00:00 2001
From: Jeff Kinard <jeff@thekinards.com>
Date: Tue, 9 Apr 2024 19:54:15 -0400
Subject: [PATCH 18/18] [yaml] disable javascript mapping for python >=3.12
 (#30843)

* [yaml] disable javascript mapping for python >=3.12

Signed-off-by: Jeffrey Kinard <jeff@thekinards.com>

* use environmental marker for js2py

Signed-off-by: Jeffrey Kinard <jeff@thekinards.com>

* Update sdks/python/setup.py

---------

Signed-off-by: Jeffrey Kinard <jeff@thekinards.com>
Co-authored-by: tvalentyn <tvalentyn@users.noreply.github.com>
---
 sdks/python/apache_beam/yaml/yaml_mapping.py  | 26 ++++++++++++++-----
 sdks/python/apache_beam/yaml/yaml_udf_test.py | 10 +++++++
 sdks/python/setup.py                          |  3 ++-
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/sdks/python/apache_beam/yaml/yaml_mapping.py b/sdks/python/apache_beam/yaml/yaml_mapping.py
index 4839728dd886..32095fe39f2a 100644
--- a/sdks/python/apache_beam/yaml/yaml_mapping.py
+++ b/sdks/python/apache_beam/yaml/yaml_mapping.py
@@ -31,11 +31,6 @@
 from typing import TypeVar
 from typing import Union
 
-import js2py
-from js2py import base
-from js2py.constructors import jsdate
-from js2py.internals import simplex
-
 import apache_beam as beam
 from apache_beam.io.filesystems import FileSystems
 from apache_beam.portability.api import schema_pb2
@@ -52,6 +47,14 @@
 from apache_beam.yaml import yaml_provider
 from apache_beam.yaml.yaml_provider import dicts_to_rows
 
+# Import js2py package if it exists
+try:
+  import js2py
+  from js2py.base import JsObjectWrapper
+except ImportError:
+  js2py = None
+  JsObjectWrapper = object
+
 
 def normalize_mapping(spec):
   """
@@ -87,7 +90,7 @@ def _check_mapping_arguments(
 # js2py's JsObjectWrapper object has a self-referencing __dict__ property
 # that cannot be pickled without implementing the __getstate__ and
 # __setstate__ methods.
-class _CustomJsObjectWrapper(js2py.base.JsObjectWrapper):
+class _CustomJsObjectWrapper(JsObjectWrapper):
   def __init__(self, js_obj):
     super().__init__(js_obj.__dict__['_obj'])
 
@@ -116,6 +119,17 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
+  # Check for installed js2py package
+  if js2py is None:
+    raise ValueError(
+        "Javascript mapping functions are not supported on"
+        " Python 3.12 or later.")
+
+  # import remaining js2py objects
+  from js2py import base
+  from js2py.constructors import jsdate
+  from js2py.internals import simplex
+
   js_array_type = (
       base.PyJsArray,
       base.PyJsArrayBuffer,
diff --git a/sdks/python/apache_beam/yaml/yaml_udf_test.py b/sdks/python/apache_beam/yaml/yaml_udf_test.py
index 5f5ee1147ded..c26d8ec92dd5 100644
--- a/sdks/python/apache_beam/yaml/yaml_udf_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_udf_test.py
@@ -29,6 +29,12 @@
 from apache_beam.yaml.yaml_provider import dicts_to_rows
 from apache_beam.yaml.yaml_transform import YamlTransform
 
+try:
+  import js2py
+except ImportError:
+  js2py = None
+  logging.warning('js2py is not installed; some tests will be skipped.')
+
 
 def AsRows():
   return beam.Map(
@@ -55,6 +61,7 @@ def setUp(self):
   def tearDown(self):
     shutil.rmtree(self.tmpdir)
 
+  @unittest.skipIf(js2py is None, 'js2py not installed.')
   def test_map_to_fields_filter_inline_js(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
         pickle_library='cloudpickle', yaml_experimental_features=['javascript'
@@ -125,6 +132,7 @@ def test_map_to_fields_filter_inline_py(self):
               beam.Row(label='389ax', conductor=390, sum=24),
           ]))
 
+  @unittest.skipIf(js2py is None, 'js2py not installed.')
   def test_filter_inline_js(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
         pickle_library='cloudpickle', yaml_experimental_features=['javascript'
@@ -179,6 +187,7 @@ def test_filter_inline_py(self):
                   row=beam.Row(rank=2, values=[7, 8, 9])),
           ]))
 
+  @unittest.skipIf(js2py is None, 'js2py not installed.')
   def test_filter_expression_js(self):
     with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions(
         pickle_library='cloudpickle', yaml_experimental_features=['javascript'
@@ -222,6 +231,7 @@ def test_filter_expression_py(self):
                   row=beam.Row(rank=0, values=[1, 2, 3])),
           ]))
 
+  @unittest.skipIf(js2py is None, 'js2py not installed.')
   def test_filter_inline_js_file(self):
     data = '''
     function f(x) {
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 2975f16d40d8..400083e0a0ac 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -368,7 +368,8 @@ def get_portability_package_data():
           'grpcio>=1.33.1,!=1.48.0,<2',
           'hdfs>=2.1.0,<3.0.0',
           'httplib2>=0.8,<0.23.0',
-          'js2py>=0.74,<1',
+          # https://github.com/PiotrDabkowski/Js2Py/issues/317
+          'js2py>=0.74,<1; python_version<"3.12"',
           'jsonschema>=4.0.0,<5.0.0',
           'jsonpickle>=3.0.0,<4.0.0',
           # numpy can have breaking changes in minor versions.