diff --git a/.test-infra/metrics/grafana/provisioning/datasources/beamgithubjavatests-api.yaml b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubjavatests-api.yaml
index a3a0e9bf3118..34b4d3cf6542 100644
--- a/.test-infra/metrics/grafana/provisioning/datasources/beamgithubjavatests-api.yaml
+++ b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubjavatests-api.yaml
@@ -21,14 +21,13 @@ apiVersion: 1
 deleteDatasources:
 
 datasources:
-  - name: Java Tests
+  - name: "Java Tests"
     type: marcusolsson-json-datasource
     access: proxy
     orgId: 1
     url: https://api.github.com/repos/apache/beam/actions/workflows/java_tests.yml/runs
     jsonData:
       httpHeaderName1: "accept"
-      customQueryParameters: "per_page=100"
     secureJsonData:
       httpHeaderValue1: "application/vnd.github.v3+json"
     editable: false
diff --git a/.test-infra/metrics/grafana/provisioning/datasources/beamgithubpythontests-api.yaml b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubpythontests-api.yaml
index abcd060d1b37..4feac1bcd833 100644
--- a/.test-infra/metrics/grafana/provisioning/datasources/beamgithubpythontests-api.yaml
+++ b/.test-infra/metrics/grafana/provisioning/datasources/beamgithubpythontests-api.yaml
@@ -21,14 +21,13 @@ apiVersion: 1
 deleteDatasources:
 
 datasources:
-  - name: Python Tests
+  - name: "Python Tests"
     type: marcusolsson-json-datasource
     access: proxy
     orgId: 1
     url: https://api.github.com/repos/apache/beam/actions/workflows/python_tests.yml/runs
     jsonData:
       httpHeaderName1: "accept"
-      customQueryParameters: "per_page=100"
     secureJsonData:
       httpHeaderValue1: "application/vnd.github.v3+json"
     editable: false
diff --git a/.test-infra/metrics/kubernetes/beamgrafana-deploy.yaml b/.test-infra/metrics/kubernetes/beamgrafana-deploy.yaml
index 9ff402d4eef4..ad2de5fdd358 100644
--- a/.test-infra/metrics/kubernetes/beamgrafana-deploy.yaml
+++ b/.test-infra/metrics/kubernetes/beamgrafana-deploy.yaml
@@ -46,6 +46,8 @@ spec:
           value: "true"
         - name: GF_AUTH_ANONYMOUS_ORG_NAME
           value: Beam
+        - name: GF_INSTALL_PLUGINS
+          value: marcusolsson-json-datasource
         - name: GF_SECURITY_ADMIN_PASSWORD
           valueFrom:
             secretKeyRef:
@@ -89,6 +91,7 @@ spec:
         volumeMounts:
         - mountPath: /var/lib/grafana
           name: beam-grafana-libdata
+          readOnly: false
         - mountPath: /etc/grafana
           name: beam-grafana-etcdata
         - mountPath: /var/log/grafana
diff --git a/CHANGES.md b/CHANGES.md
index c68609116e5c..1c132dfae698 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -61,7 +61,7 @@
 ## I/Os
 
 * Support for X source added (Java/Python) ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
-* `ReadFromBigQuery` now runs queries with BATCH priority by default. The `query_priority` parameter is introduced to the same transform to allow configuring the query priority (Python) ([BEAM-12913](https://issues.apache.org/jira/browse/BEAM-12913)).
+* `ReadFromBigQuery` and `ReadAllFromBigQuery` now run queries with BATCH priority by default. The `query_priority` parameter is introduced to the same transforms to allow configuring the query priority (Python) ([BEAM-12913](https://issues.apache.org/jira/browse/BEAM-12913)).
 
 ## New Features / Improvements
 
@@ -123,7 +123,7 @@
   * Code depending on beam imports need to include v2 on the module path.
     * Fix by'v2' to the import paths, turning  `.../sdks/go/...` to `.../sdks/v2/go/...`
   * No other code change should be required to use v2.33.0 of the Go SDK.
-  
+
 ## Deprecations
 
 * X behavior is deprecated and will be removed in X versions ([BEAM-X](https://issues.apache.org/jira/browse/BEAM-X)).
diff --git a/build.gradle.kts b/build.gradle.kts
index 72a7725a6756..bee53bdad8c6 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -42,6 +42,7 @@ tasks.rat {
   val exclusions = mutableListOf(
     // Ignore files we track but do not distribute
     "**/.github/**/*",
+    "**/.gitkeep",
     "gradlew",
     "gradlew.bat",
     "gradle/wrapper/gradle-wrapper.properties",
@@ -117,7 +118,14 @@ tasks.rat {
     "sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/yarn.lock",
 
     // Sample text file for Java quickstart
-    "sdks/java/maven-archetypes/examples/sample.txt"
+    "sdks/java/maven-archetypes/examples/sample.txt",
+
+    // Ignore Flutter autogenerated files for Playground
+    "playground/frontend/.metadata",
+    "playground/frontend/pubspec.lock",
+
+    // Ignore .gitkeep file
+    "**/.gitkeep"
   )
 
   // Add .gitignore excludes to the Apache Rat exclusion list. We re-create the behavior
diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
index 293ea2328a22..3426ba598fc6 100644
--- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
+++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy
@@ -2337,7 +2337,7 @@ class BeamModulePlugin implements Plugin<Project> {
           def distTarBall = "${pythonRootDir}/build/apache-beam.tar.gz"
           project.exec {
             executable 'sh'
-            args '-c', ". ${project.ext.envdir}/bin/activate && pip install --retries 10 ${distTarBall}[gcp,test,aws,azure]"
+            args '-c', ". ${project.ext.envdir}/bin/activate && pip install --retries 10 ${distTarBall}[gcp,test,aws,azure,dataframe]"
           }
         }
       }
diff --git a/examples/notebooks/tour-of-beam/dataframes.ipynb b/examples/notebooks/tour-of-beam/dataframes.ipynb
index c19d991ba612..ac0ad1016e77 100644
--- a/examples/notebooks/tour-of-beam/dataframes.ipynb
+++ b/examples/notebooks/tour-of-beam/dataframes.ipynb
@@ -65,7 +65,7 @@
         "[Beam DataFrames overview](https://beam.apache.org/documentation/dsls/dataframes/overview) page.\n",
         "\n",
         "First, we need to install Apache Beam with the `interactive` extra for the Interactive runner.",
-        "We also need `pandas` for this notebook, but the Interactive runner already depends on it."
+        "We also need to install a version of `pandas` supported by the DataFrame API, which we can get with the `dataframe` extra in Beam 2.34.0 and newer."
       ],
       "metadata": {
         "id": "hDuXLLSZnI1D"
@@ -75,7 +75,7 @@
       "cell_type": "code",
       "execution_count": null,
       "source": [
-        "%pip install --quiet apache-beam[interactive]"
+        "%pip install --quiet apache-beam[interactive,dataframe]"
       ],
       "outputs": [],
       "metadata": {
@@ -663,7 +663,7 @@
         "\n",
         "> ℹ️ It's recommended to **only** do this if you need to use a Pandas operation that is\n",
         "> [not supported in Beam DataFrames](https://beam.apache.org/documentation/dsls/dataframes/differences-from-pandas/#classes-of-unsupported-operations).\n",
-        "> Converting a PCollection into a Pandas DataFrame consolidates elements from potentially multiple workers into a single worker, which could create a performance bottleneck.\n"
+        "> Converting a PCollection into a Pandas DataFrame consolidates elements from potentially multiple workers into a single worker, which could create a performance bottleneck.\n",
         "\n",
         "> ⚠️ Pandas DataFrames are in-memory data structures, so make sure all the elements in the PCollection fit into memory.\n",
         "> If they don't fit into memory, consider yielding multiple DataFrame elements via\n",
diff --git a/model/pipeline/src/main/proto/beam_runner_api.proto b/model/pipeline/src/main/proto/beam_runner_api.proto
index 80370c538bf0..39fd9d8df1bb 100644
--- a/model/pipeline/src/main/proto/beam_runner_api.proto
+++ b/model/pipeline/src/main/proto/beam_runner_api.proto
@@ -1718,7 +1718,14 @@ message LabelledPayload {
     string string_value = 2;
     bool bool_value = 3;
     double double_value = 4;
+    int64 int_value = 5;
   }
+
+  // (Required) The key identifies the actual content of the metadata.
+  string key = 6;
+
+  // (Required) The namespace describes the context that specified the key.
+  string namespace = 7;
 }
 
 // Static display data associated with a pipeline component. Display data is
diff --git a/model/pipeline/src/main/proto/metrics.proto b/model/pipeline/src/main/proto/metrics.proto
index 8f819b600dc9..913b2d0323b3 100644
--- a/model/pipeline/src/main/proto/metrics.proto
+++ b/model/pipeline/src/main/proto/metrics.proto
@@ -420,6 +420,10 @@ message MonitoringInfo {
     BIGTABLE_PROJECT_ID = 20 [(label_props) = { name: "BIGTABLE_PROJECT_ID"}];
     INSTANCE_ID = 21 [(label_props) = { name: "INSTANCE_ID"}];
     TABLE_ID = 22 [(label_props) = { name: "TABLE_ID"}];
+    SPANNER_PROJECT_ID = 23 [(label_props) = { name: "SPANNER_PROJECT_ID"}];
+    SPANNER_DATABASE_ID = 24 [(label_props) = { name: "SPANNER_DATABASE_ID"}];
+    SPANNER_INSTANCE_ID = 25 [(label_props) = { name: "SPANNER_INSTANCE_ID" }];
+    SPANNER_QUERY_NAME = 26 [(label_props) = { name: "SPANNER_QUERY_NAME" }];
   }
 
   // A set of key and value labels which define the scope of the metric. For
diff --git a/playground/backend/.gitkeep b/playground/backend/.gitkeep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/playground/backend/cmd/server/server.go b/playground/backend/cmd/server/server.go
new file mode 100644
index 000000000000..e4b0f9a362d9
--- /dev/null
+++ b/playground/backend/cmd/server/server.go
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"beam.apache.org/playground/backend/pkg/executors"
+)
+
+func main() {
+	_ = executors.GoExecutor{}
+}
diff --git a/playground/backend/containers/go/Dockerfile b/playground/backend/containers/go/Dockerfile
new file mode 100644
index 000000000000..5786f503aa26
--- /dev/null
+++ b/playground/backend/containers/go/Dockerfile
@@ -0,0 +1,19 @@
+###############################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+
+#Dokerfile to set up the Beam Go SDK
\ No newline at end of file
diff --git a/playground/backend/containers/java/Dockerfile b/playground/backend/containers/java/Dockerfile
new file mode 100644
index 000000000000..6684267f3aef
--- /dev/null
+++ b/playground/backend/containers/java/Dockerfile
@@ -0,0 +1,19 @@
+###############################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+
+#Dokerfile to set up the Beam Java SDK
\ No newline at end of file
diff --git a/playground/backend/go.mod b/playground/backend/go.mod
new file mode 100644
index 000000000000..db679664abcf
--- /dev/null
+++ b/playground/backend/go.mod
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+module beam.apache.org/playground/backend
+
+go 1.16
\ No newline at end of file
diff --git a/playground/backend/pkg/api/.gitkeep b/playground/backend/pkg/api/.gitkeep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/playground/backend/pkg/executors/executor.go b/playground/backend/pkg/executors/executor.go
new file mode 100644
index 000000000000..ecc655f1e61b
--- /dev/null
+++ b/playground/backend/pkg/executors/executor.go
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Interface for all executors (Java/Python/Go/SCIO)
+package executors
+
+type executor interface {
+	// Validate validates executable file.
+	// Return result of validation (true/false) and error if it occurs
+	Validate(filePath string) (bool, error)
+
+	// Compile compiles executable file.
+	// Return error if it occurs
+	Compile(filePath string) error
+
+	// Run runs executable file.
+	// Return logs and error if it occurs
+	Run(filePath string) (string, error)
+}
diff --git a/playground/backend/pkg/executors/goexecutor.go b/playground/backend/pkg/executors/goexecutor.go
new file mode 100644
index 000000000000..339d09fb879c
--- /dev/null
+++ b/playground/backend/pkg/executors/goexecutor.go
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Executor for Go
+package executors
+
+type GoExecutor struct{}
+
+func (goExec GoExecutor) Validate(filePath string) (bool, error) {
+	return true, nil
+}
+
+func (goExec GoExecutor) Compile(filePath string) error {
+	return nil
+}
+
+func (goExec GoExecutor) Run(filePath string) (string, error) {
+	return "", nil
+}
diff --git a/playground/backend/pkg/executors/javaexecutor.go b/playground/backend/pkg/executors/javaexecutor.go
new file mode 100644
index 000000000000..e67f7157463b
--- /dev/null
+++ b/playground/backend/pkg/executors/javaexecutor.go
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Executor for Java
+package executors
+
+type JavaExecutor struct{}
+
+func (javaExec JavaExecutor) Validate(filePath string) (bool, error) {
+	return true, nil
+}
+
+func (javaExec JavaExecutor) Compile(filePath string) error {
+	return nil
+}
+
+func (javaExec JavaExecutor) Run(filePath string) (string, error) {
+	return "", nil
+}
diff --git a/playground/backend/test/.gitkeep b/playground/backend/test/.gitkeep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/playground/frontend/.metadata b/playground/frontend/.metadata
new file mode 100644
index 000000000000..0f055bf16355
--- /dev/null
+++ b/playground/frontend/.metadata
@@ -0,0 +1,10 @@
+# This file tracks properties of this Flutter project.
+# Used by Flutter tool to assess capabilities and perform upgrades etc.
+#
+# This file should be version controlled and should not be manually edited.
+
+version:
+  revision: ffb2ecea5223acdd139a5039be2f9c796962833d
+  channel: stable
+
+project_type: app
diff --git a/playground/frontend/README.md b/playground/frontend/README.md
new file mode 100644
index 000000000000..661945f17045
--- /dev/null
+++ b/playground/frontend/README.md
@@ -0,0 +1,47 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Apache Beam Playground
+
+## About
+
+Apache Beam is an open-source, unified model for defining parallel processing pipelines for batch and streaming data.
+It provides a portable API layer for building sophisticated data-parallel processing pipelines that may be executed across a diversity of execution engines, or runners.
+
+## Getting Started
+
+Website development requires [Flutter](https://flutter.dev/docs/get-started/install) installed.
+
+The following command is used to build and serve the website locally:
+
+`$ flutter run`
+
+Run the following command to generate a release build:
+
+`flutter build web`
+
+Playground tests may be run using this command:
+
+`flutter test`
+
+Dart code should follow next [code style](https://dart-lang.github.io/linter/lints/index.html). Code may be analyzed using this command:
+
+`flutter analyze`
+
+The full list of command can be found [here](https://flutter.dev/docs/reference/flutter-cli)
diff --git a/playground/frontend/analysis_options.yaml b/playground/frontend/analysis_options.yaml
new file mode 100644
index 000000000000..3080e1b91edf
--- /dev/null
+++ b/playground/frontend/analysis_options.yaml
@@ -0,0 +1,46 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+# This file configures the analyzer, which statically analyzes Dart code to
+# check for errors, warnings, and lints.
+#
+# The issues identified by the analyzer are surfaced in the UI of Dart-enabled
+# IDEs (https://dart.dev/tools#ides-and-editors). The analyzer can also be
+# invoked from the command line by running `flutter analyze`.
+
+# The following line activates a set of recommended lints for Flutter apps,
+# packages, and plugins designed to encourage good coding practices.
+include: package:flutter_lints/flutter.yaml
+
+linter:
+  # The lint rules applied to this project can be customized in the
+  # section below to disable rules from the `package:flutter_lints/flutter.yaml`
+  # included above or to enable additional rules. A list of all available lints
+  # and their documentation is published at
+  # https://dart-lang.github.io/linter/lints/index.html.
+  #
+  # Instead of disabling a lint rule for the entire project in the
+  # section below, it can also be suppressed for a single line of code
+  # or a specific dart file by using the `// ignore: name_of_lint` and
+  # `// ignore_for_file: name_of_lint` syntax on the line or in the file
+  # producing the lint.
+  rules:
+    # avoid_print: false  # Uncomment to disable the `avoid_print` rule
+    # prefer_single_quotes: true  # Uncomment to enable the `prefer_single_quotes` rule
+
+# Additional information about this file can be found at
+# https://dart.dev/guides/language/analysis-options
diff --git a/playground/frontend/lib/main.dart b/playground/frontend/lib/main.dart
new file mode 100644
index 000000000000..585c11f67128
--- /dev/null
+++ b/playground/frontend/lib/main.dart
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import 'package:flutter/material.dart';
+
+void main() {
+  runApp(const PlaygroundApp());
+}
+
+class PlaygroundApp extends StatelessWidget {
+  const PlaygroundApp({Key? key}) : super(key: key);
+
+  @override
+  Widget build(BuildContext context) {
+    return MaterialApp(
+      title: 'Apache Beam Playground',
+      theme: ThemeData(
+        primarySwatch: Colors.blue,
+      ),
+      home: const HomePage(title: 'Apache Beam Playground'),
+    );
+  }
+}
+
+class HomePage extends StatelessWidget {
+  final String title;
+
+  const HomePage({Key? key, required this.title}) : super(key: key);
+
+  @override
+  Widget build(BuildContext context) {
+    return Scaffold(
+      appBar: AppBar(
+        title: Text(title),
+      ),
+      body: const Center(
+        child: Text('Playground'),
+      ),
+    );
+  }
+}
diff --git a/playground/frontend/pubspec.lock b/playground/frontend/pubspec.lock
new file mode 100644
index 000000000000..750761f7638e
--- /dev/null
+++ b/playground/frontend/pubspec.lock
@@ -0,0 +1,167 @@
+# Generated by pub
+# See https://dart.dev/tools/pub/glossary#lockfile
+packages:
+  async:
+    dependency: transitive
+    description:
+      name: async
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "2.8.1"
+  boolean_selector:
+    dependency: transitive
+    description:
+      name: boolean_selector
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "2.1.0"
+  characters:
+    dependency: transitive
+    description:
+      name: characters
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.1.0"
+  charcode:
+    dependency: transitive
+    description:
+      name: charcode
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.3.1"
+  clock:
+    dependency: transitive
+    description:
+      name: clock
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.1.0"
+  collection:
+    dependency: transitive
+    description:
+      name: collection
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.15.0"
+  cupertino_icons:
+    dependency: "direct main"
+    description:
+      name: cupertino_icons
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.0.3"
+  fake_async:
+    dependency: transitive
+    description:
+      name: fake_async
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.2.0"
+  flutter:
+    dependency: "direct main"
+    description: flutter
+    source: sdk
+    version: "0.0.0"
+  flutter_lints:
+    dependency: "direct dev"
+    description:
+      name: flutter_lints
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.0.4"
+  flutter_test:
+    dependency: "direct dev"
+    description: flutter
+    source: sdk
+    version: "0.0.0"
+  lints:
+    dependency: transitive
+    description:
+      name: lints
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.0.1"
+  matcher:
+    dependency: transitive
+    description:
+      name: matcher
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "0.12.10"
+  meta:
+    dependency: transitive
+    description:
+      name: meta
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.7.0"
+  path:
+    dependency: transitive
+    description:
+      name: path
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.8.0"
+  sky_engine:
+    dependency: transitive
+    description: flutter
+    source: sdk
+    version: "0.0.99"
+  source_span:
+    dependency: transitive
+    description:
+      name: source_span
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.8.1"
+  stack_trace:
+    dependency: transitive
+    description:
+      name: stack_trace
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.10.0"
+  stream_channel:
+    dependency: transitive
+    description:
+      name: stream_channel
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "2.1.0"
+  string_scanner:
+    dependency: transitive
+    description:
+      name: string_scanner
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.1.0"
+  term_glyph:
+    dependency: transitive
+    description:
+      name: term_glyph
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.2.0"
+  test_api:
+    dependency: transitive
+    description:
+      name: test_api
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "0.4.2"
+  typed_data:
+    dependency: transitive
+    description:
+      name: typed_data
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "1.3.0"
+  vector_math:
+    dependency: transitive
+    description:
+      name: vector_math
+      url: "https://pub.dartlang.org"
+    source: hosted
+    version: "2.1.0"
+sdks:
+  dart: ">=2.12.0 <3.0.0"
diff --git a/playground/frontend/pubspec.yaml b/playground/frontend/pubspec.yaml
new file mode 100644
index 000000000000..19da56c2707d
--- /dev/null
+++ b/playground/frontend/pubspec.yaml
@@ -0,0 +1,106 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+name: playground
+description: A new Flutter project.
+
+# The following line prevents the package from being accidentally published to
+# pub.dev using `flutter pub publish`. This is preferred for private packages.
+publish_to: 'none' # Remove this line if you wish to publish to pub.dev
+
+# The following defines the version and build number for your application.
+# A version number is three numbers separated by dots, like 1.2.43
+# followed by an optional build number separated by a +.
+# Both the version and the builder number may be overridden in flutter
+# build by specifying --build-name and --build-number, respectively.
+# In Android, build-name is used as versionName while build-number used as versionCode.
+# Read more about Android versioning at https://developer.android.com/studio/publish/versioning
+# In iOS, build-name is used as CFBundleShortVersionString while build-number used as CFBundleVersion.
+# Read more about iOS versioning at
+# https://developer.apple.com/library/archive/documentation/General/Reference/InfoPlistKeyReference/Articles/CoreFoundationKeys.html
+version: 1.0.0+1
+
+environment:
+  sdk: ">=2.12.0 <3.0.0"
+
+# Dependencies specify other packages that your package needs in order to work.
+# To automatically upgrade your package dependencies to the latest versions
+# consider running `flutter pub upgrade --major-versions`. Alternatively,
+# dependencies can be manually updated by changing the version numbers below to
+# the latest version available on pub.dev. To see which dependencies have newer
+# versions available, run `flutter pub outdated`.
+dependencies:
+  flutter:
+    sdk: flutter
+
+
+  # The following adds the Cupertino Icons font to your application.
+  # Use with the CupertinoIcons class for iOS style icons.
+  cupertino_icons: ^1.0.2
+
+dev_dependencies:
+  flutter_test:
+    sdk: flutter
+
+  # The "flutter_lints" package below contains a set of recommended lints to
+  # encourage good coding practices. The lint set provided by the package is
+  # activated in the `analysis_options.yaml` file located at the root of your
+  # package. See that file for information about deactivating specific lint
+  # rules and activating additional ones.
+  flutter_lints: ^1.0.0
+
+# For information on the generic Dart part of this file, see the
+# following page: https://dart.dev/tools/pub/pubspec
+
+# The following section is specific to Flutter.
+flutter:
+
+  # The following line ensures that the Material Icons font is
+  # included with your application, so that you can use the icons in
+  # the material Icons class.
+  uses-material-design: true
+
+  # To add assets to your application, add an assets section, like this:
+  # assets:
+  #   - images/a_dot_burr.jpeg
+  #   - images/a_dot_ham.jpeg
+
+  # An image asset can refer to one or more resolution-specific "variants", see
+  # https://flutter.dev/assets-and-images/#resolution-aware.
+
+  # For details regarding adding assets from package dependencies, see
+  # https://flutter.dev/assets-and-images/#from-packages
+
+  # To add custom fonts to your application, add a fonts section here,
+  # in this "flutter" section. Each entry in this list should have a
+  # "family" key with the font family name, and a "fonts" key with a
+  # list giving the asset and other descriptors for the font. For
+  # example:
+  # fonts:
+  #   - family: Schyler
+  #     fonts:
+  #       - asset: fonts/Schyler-Regular.ttf
+  #       - asset: fonts/Schyler-Italic.ttf
+  #         style: italic
+  #   - family: Trajan Pro
+  #     fonts:
+  #       - asset: fonts/TrajanPro.ttf
+  #       - asset: fonts/TrajanPro_Bold.ttf
+  #         weight: 700
+  #
+  # For details regarding fonts from package dependencies,
+  # see https://flutter.dev/custom-fonts/#from-packages
diff --git a/playground/frontend/test/widget_test.dart b/playground/frontend/test/widget_test.dart
new file mode 100644
index 000000000000..961e5380fe6c
--- /dev/null
+++ b/playground/frontend/test/widget_test.dart
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import 'package:flutter_test/flutter_test.dart';
+
+import 'package:playground/main.dart';
+
+void main() {
+  testWidgets('Home Page', (WidgetTester tester) async {
+    // Build our app and trigger a frame.
+    await tester.pumpWidget(const PlaygroundApp());
+
+    // Verify that Playground text is displayed
+    expect(find.text('Playground'), findsOneWidget);
+  });
+}
diff --git a/playground/frontend/web/favicon.png b/playground/frontend/web/favicon.png
new file mode 100644
index 000000000000..8aaa46ac1ae2
Binary files /dev/null and b/playground/frontend/web/favicon.png differ
diff --git a/playground/frontend/web/icons/Icon-192.png b/playground/frontend/web/icons/Icon-192.png
new file mode 100644
index 000000000000..b749bfef0747
Binary files /dev/null and b/playground/frontend/web/icons/Icon-192.png differ
diff --git a/playground/frontend/web/icons/Icon-512.png b/playground/frontend/web/icons/Icon-512.png
new file mode 100644
index 000000000000..88cfd48dff11
Binary files /dev/null and b/playground/frontend/web/icons/Icon-512.png differ
diff --git a/playground/frontend/web/icons/Icon-maskable-192.png b/playground/frontend/web/icons/Icon-maskable-192.png
new file mode 100644
index 000000000000..eb9b4d76e525
Binary files /dev/null and b/playground/frontend/web/icons/Icon-maskable-192.png differ
diff --git a/playground/frontend/web/icons/Icon-maskable-512.png b/playground/frontend/web/icons/Icon-maskable-512.png
new file mode 100644
index 000000000000..d69c56691fbd
Binary files /dev/null and b/playground/frontend/web/icons/Icon-maskable-512.png differ
diff --git a/playground/frontend/web/index.html b/playground/frontend/web/index.html
new file mode 100644
index 000000000000..32737866c152
--- /dev/null
+++ b/playground/frontend/web/index.html
@@ -0,0 +1,120 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+<!DOCTYPE html>
+<html>
+<head>
+  <!--
+    If you are serving your web app in a path other than the root, change the
+    href value below to reflect the base path you are serving from.
+
+    The path provided below has to start and end with a slash "/" in order for
+    it to work correctly.
+
+    For more details:
+    * https://developer.mozilla.org/en-US/docs/Web/HTML/Element/base
+
+    This is a placeholder for base href that will be replaced by the value of
+    the `--base-href` argument provided to `flutter build`.
+  -->
+  <base href="$FLUTTER_BASE_HREF">
+
+  <meta charset="UTF-8">
+  <meta content="IE=Edge" http-equiv="X-UA-Compatible">
+  <meta name="description" content="A new Flutter project.">
+
+  <!-- iOS meta tags & icons -->
+  <meta name="apple-mobile-web-app-capable" content="yes">
+  <meta name="apple-mobile-web-app-status-bar-style" content="black">
+  <meta name="apple-mobile-web-app-title" content="playground">
+  <link rel="apple-touch-icon" href="icons/Icon-192.png">
+
+  <title>playground</title>
+  <link rel="manifest" href="manifest.json">
+</head>
+<body>
+  <!-- This script installs service_worker.js to provide PWA functionality to
+       application. For more information, see:
+       https://developers.google.com/web/fundamentals/primers/service-workers -->
+  <script>
+    var serviceWorkerVersion = null;
+    var scriptLoaded = false;
+    function loadMainDartJs() {
+      if (scriptLoaded) {
+        return;
+      }
+      scriptLoaded = true;
+      var scriptTag = document.createElement('script');
+      scriptTag.src = 'main.dart.js';
+      scriptTag.type = 'application/javascript';
+      document.body.append(scriptTag);
+    }
+
+    if ('serviceWorker' in navigator) {
+      // Service workers are supported. Use them.
+      window.addEventListener('load', function () {
+        // Wait for registration to finish before dropping the <script> tag.
+        // Otherwise, the browser will load the script multiple times,
+        // potentially different versions.
+        var serviceWorkerUrl = 'flutter_service_worker.js?v=' + serviceWorkerVersion;
+        navigator.serviceWorker.register(serviceWorkerUrl)
+          .then((reg) => {
+            function waitForActivation(serviceWorker) {
+              serviceWorker.addEventListener('statechange', () => {
+                if (serviceWorker.state == 'activated') {
+                  console.log('Installed new service worker.');
+                  loadMainDartJs();
+                }
+              });
+            }
+            if (!reg.active && (reg.installing || reg.waiting)) {
+              // No active web worker and we have installed or are installing
+              // one for the first time. Simply wait for it to activate.
+              waitForActivation(reg.installing || reg.waiting);
+            } else if (!reg.active.scriptURL.endsWith(serviceWorkerVersion)) {
+              // When the app updates the serviceWorkerVersion changes, so we
+              // need to ask the service worker to update.
+              console.log('New service worker available.');
+              reg.update();
+              waitForActivation(reg.installing);
+            } else {
+              // Existing service worker is still good.
+              console.log('Loading app from service worker.');
+              loadMainDartJs();
+            }
+          });
+
+        // If service worker doesn't succeed in a reasonable amount of time,
+        // fallback to plaint <script> tag.
+        setTimeout(() => {
+          if (!scriptLoaded) {
+            console.warn(
+              'Failed to load app from service worker. Falling back to plain <script> tag.',
+            );
+            loadMainDartJs();
+          }
+        }, 4000);
+      });
+    } else {
+      // Service workers not supported. Just drop the <script> tag.
+      loadMainDartJs();
+    }
+  </script>
+</body>
+</html>
diff --git a/playground/frontend/web/manifest.json b/playground/frontend/web/manifest.json
new file mode 100644
index 000000000000..3274aaceeec6
--- /dev/null
+++ b/playground/frontend/web/manifest.json
@@ -0,0 +1,35 @@
+{
+    "name": "playground",
+    "short_name": "playground",
+    "start_url": ".",
+    "display": "standalone",
+    "background_color": "#0175C2",
+    "theme_color": "#0175C2",
+    "description": "A new Flutter project.",
+    "orientation": "portrait-primary",
+    "prefer_related_applications": false,
+    "icons": [
+        {
+            "src": "icons/Icon-192.png",
+            "sizes": "192x192",
+            "type": "image/png"
+        },
+        {
+            "src": "icons/Icon-512.png",
+            "sizes": "512x512",
+            "type": "image/png"
+        },
+        {
+            "src": "icons/Icon-maskable-192.png",
+            "sizes": "192x192",
+            "type": "image/png",
+            "purpose": "maskable"
+        },
+        {
+            "src": "icons/Icon-maskable-512.png",
+            "sizes": "512x512",
+            "type": "image/png",
+            "purpose": "maskable"
+        }
+    ]
+}
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/DisplayDataTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/DisplayDataTranslation.java
index b7765560a216..8677bf98190d 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/DisplayDataTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/DisplayDataTranslation.java
@@ -41,8 +41,7 @@ public class DisplayDataTranslation {
   }
 
   private static final Map<String, Function<DisplayData.Item, ByteString>>
-      WELL_KNOWN_URN_TRANSLATORS =
-          ImmutableMap.of(LABELLED, DisplayDataTranslation::translateStringUtf8);
+      WELL_KNOWN_URN_TRANSLATORS = ImmutableMap.of(LABELLED, DisplayDataTranslation::translate);
 
   public static List<RunnerApi.DisplayData> toProto(DisplayData displayData) {
     ImmutableList.Builder<RunnerApi.DisplayData> builder = ImmutableList.builder();
@@ -54,7 +53,7 @@ public static List<RunnerApi.DisplayData> toProto(DisplayData displayData) {
         urn = item.getKey();
       } else {
         urn = LABELLED;
-        translator = DisplayDataTranslation::translateStringUtf8;
+        translator = DisplayDataTranslation::translate;
       }
       builder.add(
           RunnerApi.DisplayData.newBuilder()
@@ -65,13 +64,24 @@ public static List<RunnerApi.DisplayData> toProto(DisplayData displayData) {
     return builder.build();
   }
 
-  private static ByteString translateStringUtf8(DisplayData.Item item) {
-    String value = String.valueOf(item.getValue() == null ? item.getShortValue() : item.getValue());
+  private static ByteString translate(DisplayData.Item item) {
     String label = item.getLabel() == null ? item.getKey() : item.getLabel();
-    return RunnerApi.LabelledPayload.newBuilder()
-        .setLabel(label)
-        .setStringValue(value)
-        .build()
-        .toByteString();
+    String namespace = item.getNamespace() == null ? "" : item.getNamespace().getName();
+    RunnerApi.LabelledPayload.Builder builder =
+        RunnerApi.LabelledPayload.newBuilder()
+            .setKey(item.getKey())
+            .setLabel(label)
+            .setNamespace(namespace);
+    Object valueObj = item.getValue() == null ? item.getShortValue() : item.getValue();
+    if (valueObj instanceof Boolean) {
+      builder.setBoolValue((Boolean) valueObj);
+    } else if (valueObj instanceof Integer || valueObj instanceof Long) {
+      builder.setIntValue((Long) valueObj);
+    } else if (valueObj instanceof Number) {
+      builder.setDoubleValue(((Number) valueObj).doubleValue());
+    } else {
+      builder.setStringValue(String.valueOf(valueObj));
+    }
+    return builder.build().toByteString();
   }
 }
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/DisplayDataTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/DisplayDataTranslationTest.java
index 9fe9b4b80128..502e2fe0d8cd 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/DisplayDataTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/DisplayDataTranslationTest.java
@@ -42,6 +42,11 @@ public void testTranslation() {
                   public void populateDisplayData(DisplayData.Builder builder) {
                     builder.add(DisplayData.item("foo", "value"));
                     builder.add(DisplayData.item("foo2", "value2").withLabel("label"));
+                    builder.add(DisplayData.item("foo3", true).withLabel("label"));
+                    builder.add(DisplayData.item("foo4", 123.4).withLabel("label"));
+                    builder.add(DisplayData.item("foo5", 4.321f).withLabel("label"));
+                    builder.add(DisplayData.item("foo6", 321).withLabel("label"));
+                    builder.add(DisplayData.item("foo7", 123L).withLabel("label"));
                   }
                 })),
         containsInAnyOrder(
@@ -51,6 +56,9 @@ public void populateDisplayData(DisplayData.Builder builder) {
                     RunnerApi.LabelledPayload.newBuilder()
                         .setLabel("foo")
                         .setStringValue("value")
+                        .setKey("foo")
+                        .setNamespace(
+                            "org.apache.beam.runners.core.construction.DisplayDataTranslationTest$1")
                         .build()
                         .toByteString())
                 .build(),
@@ -60,6 +68,69 @@ public void populateDisplayData(DisplayData.Builder builder) {
                     RunnerApi.LabelledPayload.newBuilder()
                         .setLabel("label")
                         .setStringValue("value2")
+                        .setKey("foo2")
+                        .setNamespace(
+                            "org.apache.beam.runners.core.construction.DisplayDataTranslationTest$1")
+                        .build()
+                        .toByteString())
+                .build(),
+            RunnerApi.DisplayData.newBuilder()
+                .setUrn(DisplayDataTranslation.LABELLED)
+                .setPayload(
+                    RunnerApi.LabelledPayload.newBuilder()
+                        .setLabel("label")
+                        .setBoolValue(true)
+                        .setKey("foo3")
+                        .setNamespace(
+                            "org.apache.beam.runners.core.construction.DisplayDataTranslationTest$1")
+                        .build()
+                        .toByteString())
+                .build(),
+            RunnerApi.DisplayData.newBuilder()
+                .setUrn(DisplayDataTranslation.LABELLED)
+                .setPayload(
+                    RunnerApi.LabelledPayload.newBuilder()
+                        .setLabel("label")
+                        .setDoubleValue(123.4)
+                        .setKey("foo4")
+                        .setNamespace(
+                            "org.apache.beam.runners.core.construction.DisplayDataTranslationTest$1")
+                        .build()
+                        .toByteString())
+                .build(),
+            RunnerApi.DisplayData.newBuilder()
+                .setUrn(DisplayDataTranslation.LABELLED)
+                .setPayload(
+                    RunnerApi.LabelledPayload.newBuilder()
+                        .setLabel("label")
+                        .setDoubleValue(4.321000099182129)
+                        .setKey("foo5")
+                        .setNamespace(
+                            "org.apache.beam.runners.core.construction.DisplayDataTranslationTest$1")
+                        .build()
+                        .toByteString())
+                .build(),
+            RunnerApi.DisplayData.newBuilder()
+                .setUrn(DisplayDataTranslation.LABELLED)
+                .setPayload(
+                    RunnerApi.LabelledPayload.newBuilder()
+                        .setLabel("label")
+                        .setIntValue(321)
+                        .setKey("foo6")
+                        .setNamespace(
+                            "org.apache.beam.runners.core.construction.DisplayDataTranslationTest$1")
+                        .build()
+                        .toByteString())
+                .build(),
+            RunnerApi.DisplayData.newBuilder()
+                .setUrn(DisplayDataTranslation.LABELLED)
+                .setPayload(
+                    RunnerApi.LabelledPayload.newBuilder()
+                        .setLabel("label")
+                        .setIntValue(123)
+                        .setKey("foo7")
+                        .setNamespace(
+                            "org.apache.beam.runners.core.construction.DisplayDataTranslationTest$1")
                         .build()
                         .toByteString())
                 .build()));
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java
index 4c388bf539cb..336f08d287de 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/GcpResourceIdentifiers.java
@@ -51,4 +51,13 @@ public static String datastoreResource(String projectId, String namespace) {
     return String.format(
         "//bigtable.googleapis.com/projects/%s/namespaces/%s", projectId, namespace);
   }
+
+  public static String spannerTable(String projectId, String databaseId, String tableId) {
+    return String.format(
+        "//spanner.googleapis.com/projects/%s/topics/%s/tables/%s", projectId, databaseId, tableId);
+  }
+
+  public static String spannerQuery(String projectId, String queryName) {
+    return String.format("//spanner.googleapis.com/projects/%s/queries/%s", projectId, queryName);
+  }
 }
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java
index c792719ea20c..03496fe38dbd 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/metrics/MonitoringInfoConstants.java
@@ -85,6 +85,10 @@ public static final class Labels {
     public static final String TABLE_ID = "TABLE_ID";
     public static final String GCS_BUCKET = "GCS_BUCKET";
     public static final String GCS_PROJECT_ID = "GCS_PROJECT_ID";
+    public static final String SPANNER_PROJECT_ID = "SPANNER_PROJECT_ID";
+    public static final String SPANNER_DATABASE_ID = "SPANNER_DATABASE_ID";
+    public static final String SPANNER_INSTANCE_ID = "SPANNER_INSTANCE_ID";
+    public static final String SPANNER_QUERY_NAME = "SPANNER_QUERY_NAME";
 
     static {
       // Note: One benefit of defining these strings above, instead of pulling them in from
@@ -120,6 +124,14 @@ public static final class Labels {
       checkArgument(TABLE_ID.equals(extractLabel(MonitoringInfoLabels.TABLE_ID)));
       checkArgument(GCS_BUCKET.equals(extractLabel(MonitoringInfoLabels.GCS_BUCKET)));
       checkArgument(GCS_PROJECT_ID.equals(extractLabel(MonitoringInfoLabels.GCS_PROJECT_ID)));
+      checkArgument(
+          SPANNER_PROJECT_ID.equals(extractLabel(MonitoringInfoLabels.SPANNER_PROJECT_ID)));
+      checkArgument(
+          SPANNER_DATABASE_ID.equals(extractLabel(MonitoringInfoLabels.SPANNER_DATABASE_ID)));
+      checkArgument(
+          SPANNER_INSTANCE_ID.equals(extractLabel(MonitoringInfoLabels.SPANNER_INSTANCE_ID)));
+      checkArgument(
+          SPANNER_QUERY_NAME.equals(extractLabel(MonitoringInfoLabels.SPANNER_QUERY_NAME)));
     }
   }
 
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java
index f0c16ff0aa43..5653d27f7889 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineDebugOptions.java
@@ -230,6 +230,28 @@ public Dataflow create(PipelineOptions options) {
 
   void setReaderCacheTimeoutSec(Integer value);
 
+  /** The max amount of time an UnboundedReader is consumed before checkpointing. */
+  @Description(
+      "The max amount of time before an UnboundedReader is consumed before checkpointing, in seconds.")
+  @Default.Integer(10)
+  Integer getUnboundedReaderMaxReadTimeSec();
+
+  void setUnboundedReaderMaxReadTimeSec(Integer value);
+
+  /** The max elements read from an UnboundedReader before checkpointing. */
+  @Description("The max elements read from an UnboundedReader before checkpointing. ")
+  @Default.Integer(10 * 1000)
+  Integer getUnboundedReaderMaxElements();
+
+  void setUnboundedReaderMaxElements(Integer value);
+
+  /** The max amount of time waiting for elements when reading from UnboundedReader. */
+  @Description("The max amount of time waiting for elements when reading from UnboundedReader.")
+  @Default.Integer(1000)
+  Integer getUnboundedReaderMaxWaitForElementsMs();
+
+  void setUnboundedReaderMaxWaitForElementsMs(Integer value);
+
   /**
    * CAUTION: This option implies dumpHeapOnOOM, and has similar caveats. Specifically, heap dumps
    * can of comparable size to the default boot disk. Consider increasing the boot disk size before
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternals.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternals.java
index 9ca72ccfb6d4..ac59769dc14d 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternals.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternals.java
@@ -70,6 +70,7 @@
 import org.apache.beam.sdk.state.MapState;
 import org.apache.beam.sdk.state.OrderedListState;
 import org.apache.beam.sdk.state.ReadableState;
+import org.apache.beam.sdk.state.ReadableStates;
 import org.apache.beam.sdk.state.SetState;
 import org.apache.beam.sdk.state.State;
 import org.apache.beam.sdk.state.StateContext;
@@ -1513,43 +1514,31 @@ public void put(K key, V value) {
     @Override
     public @UnknownKeyFor @NonNull @Initialized ReadableState<V> computeIfAbsent(
         K key, Function<? super K, ? extends V> mappingFunction) {
-      return new ReadableState<V>() {
-        @Override
-        public @Nullable V read() {
-          Future<V> persistedData = getFutureForKey(key);
-          try (Closeable scope = scopedReadState()) {
-            if (localRemovals.contains(key) || negativeCache.contains(key)) {
-              return null;
-            }
-            @Nullable V cachedValue = cachedValues.get(key);
-            if (cachedValue != null || complete) {
-              return cachedValue;
-            }
-
-            V persistedValue = persistedData.get();
-            if (persistedValue == null) {
-              // This is a new value. Add it to the map and return null.
-              put(key, mappingFunction.apply(key));
-              return null;
-            }
-            // TODO: Don't do this if it was already in cache.
-            cachedValues.put(key, persistedValue);
-            return persistedValue;
-          } catch (InterruptedException | ExecutionException | IOException e) {
-            if (e instanceof InterruptedException) {
-              Thread.currentThread().interrupt();
-            }
-            throw new RuntimeException("Unable to read state", e);
-          }
+      Future<V> persistedData = getFutureForKey(key);
+      try (Closeable scope = scopedReadState()) {
+        if (localRemovals.contains(key) || negativeCache.contains(key)) {
+          return ReadableStates.immediate(null);
+        }
+        @Nullable V cachedValue = cachedValues.get(key);
+        if (cachedValue != null || complete) {
+          return ReadableStates.immediate(cachedValue);
         }
 
-        @Override
-        @SuppressWarnings("FutureReturnValueIgnored")
-        public @UnknownKeyFor @NonNull @Initialized ReadableState<V> readLater() {
-          WindmillMap.this.getFutureForKey(key);
-          return this;
+        V persistedValue = persistedData.get();
+        if (persistedValue == null) {
+          // This is a new value. Add it to the map and return null.
+          put(key, mappingFunction.apply(key));
+          return ReadableStates.immediate(null);
         }
-      };
+        // TODO: Don't do this if it was already in cache.
+        cachedValues.put(key, persistedValue);
+        return ReadableStates.immediate(persistedValue);
+      } catch (InterruptedException | ExecutionException | IOException e) {
+        if (e instanceof InterruptedException) {
+          Thread.currentThread().interrupt();
+        }
+        throw new RuntimeException("Unable to read state", e);
+      }
     }
 
     @Override
diff --git a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSources.java b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSources.java
index 0c4a50859b2d..1236c61cab59 100644
--- a/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSources.java
+++ b/runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSources.java
@@ -47,6 +47,7 @@
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import org.apache.beam.runners.dataflow.internal.CustomSources;
+import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.runners.dataflow.util.CloudObject;
 import org.apache.beam.runners.dataflow.worker.util.common.worker.NativeReader;
@@ -435,7 +436,7 @@ public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() thro
 
       context.setActiveReader(reader);
 
-      return new UnboundedReaderIterator<>(reader, context, started);
+      return new UnboundedReaderIterator<>(reader, context, started, options);
     }
 
     @Override
@@ -754,34 +755,34 @@ public double getRemainingParallelism() {
     }
   }
 
-  // Commit at least once every 10 seconds or 10k records.  This keeps the watermark advancing
-  // smoothly, and ensures that not too much work will have to be reprocessed in the event of
-  // a crash.
-  @VisibleForTesting static int maxUnboundedBundleSize = 10000;
-
-  @VisibleForTesting
-  static final Duration MAX_UNBOUNDED_BUNDLE_READ_TIME = Duration.standardSeconds(10);
-  // Backoff starting at 100ms, for approximately 1s total. 100+150+225+337.5~=1000.
-  private static final FluentBackoff BACKOFF_FACTORY =
-      FluentBackoff.DEFAULT.withMaxRetries(4).withInitialBackoff(Duration.millis(100));
-
   private static class UnboundedReaderIterator<T>
       extends NativeReader.NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> {
     private final UnboundedSource.UnboundedReader<T> reader;
     private final StreamingModeExecutionContext context;
     private final boolean started;
     private final Instant endTime;
-    private int elemsRead;
+    private final int maxElems;
+    private final FluentBackoff backoffFactory;
+    private int elemsRead = 0;
 
     private UnboundedReaderIterator(
         UnboundedSource.UnboundedReader<T> reader,
         StreamingModeExecutionContext context,
-        boolean started) {
+        boolean started,
+        PipelineOptions options) {
       this.reader = reader;
       this.context = context;
-      this.endTime = Instant.now().plus(MAX_UNBOUNDED_BUNDLE_READ_TIME);
-      this.elemsRead = 0;
       this.started = started;
+      DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
+      this.endTime =
+          Instant.now()
+              .plus(Duration.standardSeconds(debugOptions.getUnboundedReaderMaxReadTimeSec()));
+      this.maxElems = debugOptions.getUnboundedReaderMaxElements();
+      this.backoffFactory =
+          FluentBackoff.DEFAULT
+              .withInitialBackoff(Duration.millis(10))
+              .withMaxCumulativeBackoff(
+                  Duration.millis(debugOptions.getUnboundedReaderMaxWaitForElementsMs()));
     }
 
     @Override
@@ -806,14 +807,16 @@ public boolean start() throws IOException {
 
     @Override
     public boolean advance() throws IOException {
-      if (elemsRead >= maxUnboundedBundleSize
-          || Instant.now().isAfter(endTime)
-          || context.isSinkFullHintSet()) {
-        return false;
-      }
-
-      BackOff backoff = BACKOFF_FACTORY.backoff();
+      // Limits are placed on how much data we allow to return, how long we process the input
+      // before checkpointing and how long we block for input to be available.  This ensures
+      // that there are regular checkpoints and that state does not become too large.
+      BackOff backoff = backoffFactory.backoff();
       while (true) {
+        if (elemsRead >= maxElems
+            || Instant.now().isAfter(endTime)
+            || context.isSinkFullHintSet()) {
+          return false;
+        }
         try {
           if (reader.advance()) {
             elemsRead++;
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
index 17b59ecf3737..5c62cf9854de 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/StreamingDataflowWorkerTest.java
@@ -76,6 +76,7 @@
 import org.apache.beam.runners.core.construction.SdkComponents;
 import org.apache.beam.runners.core.construction.WindowingStrategyTranslation;
 import org.apache.beam.runners.dataflow.internal.CustomSources;
+import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.runners.dataflow.util.CloudObject;
 import org.apache.beam.runners.dataflow.util.CloudObjects;
@@ -2802,209 +2803,206 @@ public void processElement(ProcessContext c, @StateId("int") ValueState<Integer>
 
   @Test
   public void testExceptionInvalidatesCache() throws Exception {
-    DataflowPipelineOptions options =
-        PipelineOptionsFactory.create().as(DataflowPipelineOptions.class);
+    // We'll need to force the system to limit bundles to one message at a time.
+    // Sequence is as follows:
+    // 01. GetWork[0] (token 0)
+    // 02. Create counter reader
+    // 03. Counter yields 0
+    // 04. GetData[0] (state as null)
+    // 05. Read state as null
+    // 06. Set state as 42
+    // 07. THROW on taking counter reader checkpoint
+    // 08. Create counter reader
+    // 09. Counter yields 0
+    // 10. GetData[1] (state as null)
+    // 11. Read state as null (*** not 42 ***)
+    // 12. Take counter reader checkpoint as 0
+    // 13. CommitWork[0] (message 0:0, state 42, checkpoint 0)
+    // 14. GetWork[1] (token 1, checkpoint as 0)
+    // 15. Counter yields 1
+    // 16. Read (cached) state as 42
+    // 17. Take counter reader checkpoint 1
+    // 18. CommitWork[1] (message 0:1, checkpoint 1)
+    // 19. GetWork[2] (token 2, checkpoint as 1)
+    // 20. Counter yields 2
+    // 21. THROW on processElement
+    // 22. Recreate reader from checkpoint 1
+    // 23. Counter yields 2 (*** not eof ***)
+    // 24. GetData[2] (state as 42)
+    // 25. Read state as 42
+    // 26. Take counter reader checkpoint 2
+    // 27. CommitWork[2] (message 0:2, checkpoint 2)
+    FakeWindmillServer server = new FakeWindmillServer(errorCollector);
+    server.setExpectedExceptionCount(2);
+
+    DataflowPipelineOptions options = createTestingPipelineOptions(server);
     options.setNumWorkers(1);
+    DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
+    debugOptions.setUnboundedReaderMaxElements(1);
 
-    // We'll need to force the system to limit bundles to one message at a time.
-    int originalMaxUnboundedBundleSize = WorkerCustomSources.maxUnboundedBundleSize;
-    WorkerCustomSources.maxUnboundedBundleSize = 1;
-    try {
-      // Sequence is as follows:
-      // 01. GetWork[0] (token 0)
-      // 02. Create counter reader
-      // 03. Counter yields 0
-      // 04. GetData[0] (state as null)
-      // 05. Read state as null
-      // 06. Set state as 42
-      // 07. THROW on taking counter reader checkpoint
-      // 08. Create counter reader
-      // 09. Counter yields 0
-      // 10. GetData[1] (state as null)
-      // 11. Read state as null (*** not 42 ***)
-      // 12. Take counter reader checkpoint as 0
-      // 13. CommitWork[0] (message 0:0, state 42, checkpoint 0)
-      // 14. GetWork[1] (token 1, checkpoint as 0)
-      // 15. Counter yields 1
-      // 16. Read (cached) state as 42
-      // 17. Take counter reader checkpoint 1
-      // 18. CommitWork[1] (message 0:1, checkpoint 1)
-      // 19. GetWork[2] (token 2, checkpoint as 1)
-      // 20. Counter yields 2
-      // 21. THROW on processElement
-      // 22. Recreate reader from checkpoint 1
-      // 23. Counter yields 2 (*** not eof ***)
-      // 24. GetData[2] (state as 42)
-      // 25. Read state as 42
-      // 26. Take counter reader checkpoint 2
-      // 27. CommitWork[2] (message 0:2, checkpoint 2)
-
-      CloudObject codec =
-          CloudObjects.asCloudObject(
-              WindowedValue.getFullCoder(
-                  ValueWithRecordId.ValueWithRecordIdCoder.of(
-                      KvCoder.of(VarIntCoder.of(), VarIntCoder.of())),
-                  GlobalWindow.Coder.INSTANCE),
-              /*sdkComponents=*/ null);
-
-      TestCountingSource counter = new TestCountingSource(3).withThrowOnFirstSnapshot(true);
-      List<ParallelInstruction> instructions =
-          Arrays.asList(
-              new ParallelInstruction()
-                  .setOriginalName("OriginalReadName")
-                  .setSystemName("Read")
-                  .setName(DEFAULT_PARDO_USER_NAME)
-                  .setRead(
-                      new ReadInstruction()
-                          .setSource(
-                              CustomSources.serializeToCloudSource(counter, options)
-                                  .setCodec(codec)))
-                  .setOutputs(
-                      Arrays.asList(
-                          new InstructionOutput()
-                              .setName("read_output")
-                              .setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME)
-                              .setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME)
-                              .setCodec(codec))),
-              makeDoFnInstruction(
-                  new TestExceptionInvalidatesCacheFn(),
-                  0,
-                  StringUtf8Coder.of(),
-                  WindowingStrategy.globalDefault()),
-              makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
-
-      FakeWindmillServer server = new FakeWindmillServer(errorCollector);
-      server.setExpectedExceptionCount(2);
-      StreamingDataflowWorker worker =
-          makeWorker(
-              instructions, createTestingPipelineOptions(server), true /* publishCounters */);
-      worker.setRetryLocallyDelayMs(100);
-      worker.start();
-
-      // Three GetData requests
-      for (int i = 0; i < 3; i++) {
-        ByteString state;
-        if (i == 0 || i == 1) {
-          state = ByteString.EMPTY;
-        } else {
-          state = ByteString.copyFrom(new byte[] {42});
-        }
-        Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
-        dataResponse
-            .addDataBuilder()
-            .setComputationId(DEFAULT_COMPUTATION_ID)
-            .addDataBuilder()
-            .setKey(ByteString.copyFromUtf8("0000000000000001"))
-            .setShardingKey(1)
-            .addValuesBuilder()
-            .setTag(ByteString.copyFromUtf8("//+uint"))
-            .setStateFamily(DEFAULT_PARDO_STATE_FAMILY)
-            .getValueBuilder()
-            .setTimestamp(0)
-            .setData(state);
-        server.addDataToOffer(dataResponse.build());
-      }
+    CloudObject codec =
+        CloudObjects.asCloudObject(
+            WindowedValue.getFullCoder(
+                ValueWithRecordId.ValueWithRecordIdCoder.of(
+                    KvCoder.of(VarIntCoder.of(), VarIntCoder.of())),
+                GlobalWindow.Coder.INSTANCE),
+            /*sdkComponents=*/ null);
 
-      // Three GetWork requests and commits
-      for (int i = 0; i < 3; i++) {
-        StringBuilder sb = new StringBuilder();
-        sb.append("work {\n");
-        sb.append("  computation_id: \"computation\"\n");
-        sb.append("  input_data_watermark: 0\n");
-        sb.append("  work {\n");
-        sb.append("    key: \"0000000000000001\"\n");
-        sb.append("    sharding_key: 1\n");
-        sb.append("    work_token: ");
-        sb.append(i);
-        sb.append("    cache_token: 1");
-        sb.append("\n");
-        if (i > 0) {
-          int previousCheckpoint = i - 1;
-          sb.append("    source_state {\n");
-          sb.append("      state: \"");
-          sb.append((char) previousCheckpoint);
-          sb.append("\"\n");
-          // We'll elide the finalize ids since it's not necessary to trigger the finalizer
-          // for this test.
-          sb.append("    }\n");
-        }
-        sb.append("  }\n");
-        sb.append("}\n");
+    TestCountingSource counter = new TestCountingSource(3).withThrowOnFirstSnapshot(true);
 
-        server.addWorkToOffer(buildInput(sb.toString(), null));
-
-        Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
-
-        Windmill.WorkItemCommitRequest commit = result.get((long) i);
-        UnsignedLong finalizeId =
-            UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
-
-        sb = new StringBuilder();
-        sb.append("key: \"0000000000000001\"\n");
-        sb.append("sharding_key: 1\n");
-        sb.append("work_token: ");
-        sb.append(i);
-        sb.append("\n");
-        sb.append("cache_token: 1\n");
-        sb.append("output_messages {\n");
-        sb.append("  destination_stream_id: \"out\"\n");
-        sb.append("  bundles {\n");
-        sb.append("    key: \"0000000000000001\"\n");
-
-        int messageNum = i;
-        sb.append("    messages {\n");
-        sb.append("      timestamp: ");
-        sb.append(messageNum * 1000);
-        sb.append("\n");
-        sb.append("      data: \"0:");
-        sb.append(messageNum);
+    List<ParallelInstruction> instructions =
+        Arrays.asList(
+            new ParallelInstruction()
+                .setOriginalName("OriginalReadName")
+                .setSystemName("Read")
+                .setName(DEFAULT_PARDO_USER_NAME)
+                .setRead(
+                    new ReadInstruction()
+                        .setSource(
+                            CustomSources.serializeToCloudSource(counter, options).setCodec(codec)))
+                .setOutputs(
+                    Arrays.asList(
+                        new InstructionOutput()
+                            .setName("read_output")
+                            .setOriginalName(DEFAULT_OUTPUT_ORIGINAL_NAME)
+                            .setSystemName(DEFAULT_OUTPUT_SYSTEM_NAME)
+                            .setCodec(codec))),
+            makeDoFnInstruction(
+                new TestExceptionInvalidatesCacheFn(),
+                0,
+                StringUtf8Coder.of(),
+                WindowingStrategy.globalDefault()),
+            makeSinkInstruction(StringUtf8Coder.of(), 1, GlobalWindow.Coder.INSTANCE));
+
+    StreamingDataflowWorker worker =
+        makeWorker(
+            instructions,
+            options.as(StreamingDataflowWorkerOptions.class),
+            true /* publishCounters */);
+    worker.setRetryLocallyDelayMs(100);
+    worker.start();
+
+    // Three GetData requests
+    for (int i = 0; i < 3; i++) {
+      ByteString state;
+      if (i == 0 || i == 1) {
+        state = ByteString.EMPTY;
+      } else {
+        state = ByteString.copyFrom(new byte[] {42});
+      }
+      Windmill.GetDataResponse.Builder dataResponse = Windmill.GetDataResponse.newBuilder();
+      dataResponse
+          .addDataBuilder()
+          .setComputationId(DEFAULT_COMPUTATION_ID)
+          .addDataBuilder()
+          .setKey(ByteString.copyFromUtf8("0000000000000001"))
+          .setShardingKey(1)
+          .addValuesBuilder()
+          .setTag(ByteString.copyFromUtf8("//+uint"))
+          .setStateFamily(DEFAULT_PARDO_STATE_FAMILY)
+          .getValueBuilder()
+          .setTimestamp(0)
+          .setData(state);
+      server.addDataToOffer(dataResponse.build());
+    }
+
+    // Three GetWork requests and commits
+    for (int i = 0; i < 3; i++) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("work {\n");
+      sb.append("  computation_id: \"computation\"\n");
+      sb.append("  input_data_watermark: 0\n");
+      sb.append("  work {\n");
+      sb.append("    key: \"0000000000000001\"\n");
+      sb.append("    sharding_key: 1\n");
+      sb.append("    work_token: ");
+      sb.append(i);
+      sb.append("    cache_token: 1");
+      sb.append("\n");
+      if (i > 0) {
+        int previousCheckpoint = i - 1;
+        sb.append("    source_state {\n");
+        sb.append("      state: \"");
+        sb.append((char) previousCheckpoint);
         sb.append("\"\n");
+        // We'll elide the finalize ids since it's not necessary to trigger the finalizer
+        // for this test.
         sb.append("    }\n");
+      }
+      sb.append("  }\n");
+      sb.append("}\n");
 
-        sb.append("    messages_ids: \"\"\n");
-        sb.append("  }\n");
-        sb.append("}\n");
-        if (i == 0) {
-          sb.append("value_updates {\n");
-          sb.append("  tag: \"//+uint\"\n");
-          sb.append("  value {\n");
-          sb.append("    timestamp: 0\n");
-          sb.append("    data: \"");
-          sb.append((char) 42);
-          sb.append("\"\n");
-          sb.append("  }\n");
-          sb.append("  state_family: \"parDoStateFamily\"\n");
-          sb.append("}\n");
-        }
+      server.addWorkToOffer(buildInput(sb.toString(), null));
 
-        int sourceState = i;
-        sb.append("source_state_updates {\n");
-        sb.append("  state: \"");
-        sb.append((char) sourceState);
+      Map<Long, Windmill.WorkItemCommitRequest> result = server.waitForAndGetCommits(1);
+
+      Windmill.WorkItemCommitRequest commit = result.get((long) i);
+      UnsignedLong finalizeId =
+          UnsignedLong.fromLongBits(commit.getSourceStateUpdates().getFinalizeIds(0));
+
+      sb = new StringBuilder();
+      sb.append("key: \"0000000000000001\"\n");
+      sb.append("sharding_key: 1\n");
+      sb.append("work_token: ");
+      sb.append(i);
+      sb.append("\n");
+      sb.append("cache_token: 1\n");
+      sb.append("output_messages {\n");
+      sb.append("  destination_stream_id: \"out\"\n");
+      sb.append("  bundles {\n");
+      sb.append("    key: \"0000000000000001\"\n");
+
+      int messageNum = i;
+      sb.append("    messages {\n");
+      sb.append("      timestamp: ");
+      sb.append(messageNum * 1000);
+      sb.append("\n");
+      sb.append("      data: \"0:");
+      sb.append(messageNum);
+      sb.append("\"\n");
+      sb.append("    }\n");
+
+      sb.append("    messages_ids: \"\"\n");
+      sb.append("  }\n");
+      sb.append("}\n");
+      if (i == 0) {
+        sb.append("value_updates {\n");
+        sb.append("  tag: \"//+uint\"\n");
+        sb.append("  value {\n");
+        sb.append("    timestamp: 0\n");
+        sb.append("    data: \"");
+        sb.append((char) 42);
         sb.append("\"\n");
-        sb.append("  finalize_ids: ");
-        sb.append(finalizeId);
+        sb.append("  }\n");
+        sb.append("  state_family: \"parDoStateFamily\"\n");
         sb.append("}\n");
-        sb.append("source_watermark: ");
-        sb.append((sourceState + 1) * 1000);
-        sb.append("\n");
-        sb.append("source_backlog_bytes: 7\n");
-
-        assertThat(
-            // The commit will include a timer to clean up state - this timer is irrelevant
-            // for the current test.
-            setValuesTimestamps(commit.toBuilder().clearOutputTimers()).build(),
-            equalTo(
-                setMessagesMetadata(
-                        PaneInfo.NO_FIRING,
-                        CoderUtils.encodeToByteArray(
-                            CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
-                            ImmutableList.of(GlobalWindow.INSTANCE)),
-                        parseCommitRequest(sb.toString()))
-                    .build()));
       }
-    } finally {
-      WorkerCustomSources.maxUnboundedBundleSize = originalMaxUnboundedBundleSize;
+
+      int sourceState = i;
+      sb.append("source_state_updates {\n");
+      sb.append("  state: \"");
+      sb.append((char) sourceState);
+      sb.append("\"\n");
+      sb.append("  finalize_ids: ");
+      sb.append(finalizeId);
+      sb.append("}\n");
+      sb.append("source_watermark: ");
+      sb.append((sourceState + 1) * 1000);
+      sb.append("\n");
+      sb.append("source_backlog_bytes: 7\n");
+
+      assertThat(
+          // The commit will include a timer to clean up state - this timer is irrelevant
+          // for the current test.
+          setValuesTimestamps(commit.toBuilder().clearOutputTimers()).build(),
+          equalTo(
+              setMessagesMetadata(
+                      PaneInfo.NO_FIRING,
+                      CoderUtils.encodeToByteArray(
+                          CollectionCoder.of(GlobalWindow.Coder.INSTANCE),
+                          ImmutableList.of(GlobalWindow.INSTANCE)),
+                      parseCommitRequest(sb.toString()))
+                  .build()));
     }
   }
 
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternalsTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternalsTest.java
index 727a0864da58..8ba5b3ad1f67 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternalsTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternalsTest.java
@@ -439,6 +439,69 @@ public void testMapPutIfAbsentFails() throws Exception {
     assertEquals(2, (int) mapState.get(tag2).read());
   }
 
+  @Test
+  public void testMapPutIfAbsentNoReadSucceeds() throws Exception {
+    StateTag<MapState<String, Integer>> addr =
+        StateTags.map("map", StringUtf8Coder.of(), VarIntCoder.of());
+    MapState<String, Integer> mapState = underTest.state(NAMESPACE, addr);
+
+    final String tag1 = "tag1";
+    SettableFuture<Integer> future = SettableFuture.create();
+    when(mockReader.valueFuture(
+            protoKeyFromUserKey(tag1, StringUtf8Coder.of()), STATE_FAMILY, VarIntCoder.of()))
+        .thenReturn(future);
+    waitAndSet(future, null, 50);
+    ReadableState<Integer> readableState = mapState.putIfAbsent(tag1, 42);
+    assertEquals(42, (int) mapState.get(tag1).read());
+    assertNull(readableState.read());
+  }
+
+  @Test
+  public void testMapPutIfAbsentNoReadFails() throws Exception {
+    StateTag<MapState<String, Integer>> addr =
+        StateTags.map("map", StringUtf8Coder.of(), VarIntCoder.of());
+    MapState<String, Integer> mapState = underTest.state(NAMESPACE, addr);
+
+    final String tag1 = "tag1";
+    mapState.put(tag1, 1);
+    ReadableState<Integer> readableState = mapState.putIfAbsent(tag1, 42);
+    assertEquals(1, (int) mapState.get(tag1).read());
+    assertEquals(1, (int) readableState.read());
+
+    final String tag2 = "tag2";
+    SettableFuture<Integer> future = SettableFuture.create();
+    when(mockReader.valueFuture(
+            protoKeyFromUserKey(tag2, StringUtf8Coder.of()), STATE_FAMILY, VarIntCoder.of()))
+        .thenReturn(future);
+    waitAndSet(future, 2, 50);
+    readableState = mapState.putIfAbsent(tag2, 42);
+    assertEquals(2, (int) mapState.get(tag2).read());
+    assertEquals(2, (int) readableState.read());
+  }
+
+  @Test
+  public void testMapMultiplePutIfAbsentNoRead() throws Exception {
+    StateTag<MapState<String, Integer>> addr =
+        StateTags.map("map", StringUtf8Coder.of(), VarIntCoder.of());
+    MapState<String, Integer> mapState = underTest.state(NAMESPACE, addr);
+
+    final String tag1 = "tag1";
+    SettableFuture<Integer> future = SettableFuture.create();
+    when(mockReader.valueFuture(
+            protoKeyFromUserKey(tag1, StringUtf8Coder.of()), STATE_FAMILY, VarIntCoder.of()))
+        .thenReturn(future);
+    waitAndSet(future, null, 50);
+    ReadableState<Integer> readableState = mapState.putIfAbsent(tag1, 42);
+    assertEquals(42, (int) mapState.get(tag1).read());
+    ReadableState<Integer> readableState2 = mapState.putIfAbsent(tag1, 43);
+    mapState.put(tag1, 1);
+    ReadableState<Integer> readableState3 = mapState.putIfAbsent(tag1, 44);
+    assertEquals(1, (int) mapState.get(tag1).read());
+    assertNull(readableState.read());
+    assertEquals(42, (int) readableState2.read());
+    assertEquals(1, (int) readableState3.read());
+  }
+
   @Test
   public void testMapNegativeCache() throws Exception {
     StateTag<MapState<String, Integer>> addr =
diff --git a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSourcesTest.java b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSourcesTest.java
index 680677458e05..61c4ad7d9e03 100644
--- a/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSourcesTest.java
+++ b/runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/WorkerCustomSourcesTest.java
@@ -76,6 +76,7 @@
 import org.apache.beam.runners.core.metrics.ExecutionStateSampler;
 import org.apache.beam.runners.dataflow.DataflowPipelineTranslator;
 import org.apache.beam.runners.dataflow.DataflowRunner;
+import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.runners.dataflow.util.CloudObject;
 import org.apache.beam.runners.dataflow.util.PropertyNames;
@@ -519,9 +520,12 @@ public void testReadUnboundedReader() throws Exception {
             Long.MAX_VALUE);
 
     options.setNumWorkers(5);
+    int maxElements = 10;
+    DataflowPipelineDebugOptions debugOptions = options.as(DataflowPipelineDebugOptions.class);
+    debugOptions.setUnboundedReaderMaxElements(maxElements);
 
     ByteString state = ByteString.EMPTY;
-    for (int i = 0; i < 10 * WorkerCustomSources.maxUnboundedBundleSize;
+    for (int i = 0; i < 10 * maxElements;
     /* Incremented in inner loop */ ) {
       // Initialize streaming context with state from previous iteration.
       context.start(
@@ -565,12 +569,12 @@ public void testReadUnboundedReader() throws Exception {
         numReadOnThisIteration++;
       }
       Instant afterReading = Instant.now();
+      long maxReadSec = debugOptions.getUnboundedReaderMaxReadTimeSec();
       assertThat(
           new Duration(beforeReading, afterReading).getStandardSeconds(),
-          lessThanOrEqualTo(
-              WorkerCustomSources.MAX_UNBOUNDED_BUNDLE_READ_TIME.getStandardSeconds() + 1));
+          lessThanOrEqualTo(maxReadSec + 1));
       assertThat(
-          numReadOnThisIteration, lessThanOrEqualTo(WorkerCustomSources.maxUnboundedBundleSize));
+          numReadOnThisIteration, lessThanOrEqualTo(debugOptions.getUnboundedReaderMaxElements()));
 
       // Extract and verify state modifications.
       context.flushState();
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java
index bbdd1f5d0e47..6bfec1e3cdff 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineRunner.java
@@ -18,7 +18,6 @@
 package org.apache.beam.runners.samza;
 
 import org.apache.beam.model.pipeline.v1.RunnerApi;
-import org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline;
 import org.apache.beam.runners.core.construction.PTransformTranslation;
 import org.apache.beam.runners.core.construction.graph.ExecutableStage;
 import org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser;
@@ -41,16 +40,16 @@ public class SamzaPipelineRunner implements PortablePipelineRunner {
   private final SamzaPipelineOptions options;
 
   @Override
-  public PortablePipelineResult run(final Pipeline pipeline, JobInfo jobInfo) {
+  public PortablePipelineResult run(final RunnerApi.Pipeline pipeline, JobInfo jobInfo) {
     // Expand any splittable DoFns within the graph to enable sizing and splitting of bundles.
-    Pipeline pipelineWithSdfExpanded =
+    RunnerApi.Pipeline pipelineWithSdfExpanded =
         ProtoOverrides.updateTransform(
             PTransformTranslation.PAR_DO_TRANSFORM_URN,
             pipeline,
             SplittableParDoExpander.createSizedReplacement());
 
     // Don't let the fuser fuse any subcomponents of native transforms.
-    Pipeline trimmedPipeline =
+    RunnerApi.Pipeline trimmedPipeline =
         TrivialNativeTransformExpander.forKnownUrns(
             pipelineWithSdfExpanded, SamzaPortablePipelineTranslator.knownUrns());
 
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReshuffleTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReshuffleTranslator.java
new file mode 100644
index 000000000000..37cb5ff5a98f
--- /dev/null
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/ReshuffleTranslator.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.samza.translation;
+
+import com.google.auto.service.AutoService;
+import org.apache.beam.model.pipeline.v1.RunnerApi;
+import org.apache.beam.runners.core.construction.NativeTransforms;
+import org.apache.beam.runners.core.construction.PTransformTranslation;
+import org.apache.beam.runners.core.construction.graph.PipelineNode;
+import org.apache.beam.runners.core.construction.graph.QueryablePipeline;
+import org.apache.beam.runners.samza.runtime.OpMessage;
+import org.apache.beam.runners.samza.util.SamzaCoders;
+import org.apache.beam.runners.samza.util.WindowUtils;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.samza.operators.MessageStream;
+import org.apache.samza.serializers.KVSerde;
+
+/**
+ * Translates Reshuffle transform into Samza's native partitionBy operator, which will partition
+ * each incoming message by the key into a Task corresponding to that key.
+ */
+public class ReshuffleTranslator<K, InT, OutT>
+    implements TransformTranslator<PTransform<PCollection<KV<K, InT>>, PCollection<KV<K, OutT>>>> {
+
+  @Override
+  public void translate(
+      PTransform<PCollection<KV<K, InT>>, PCollection<KV<K, OutT>>> transform,
+      TransformHierarchy.Node node,
+      TranslationContext ctx) {
+
+    final PCollection<KV<K, InT>> input = ctx.getInput(transform);
+    final PCollection<KV<K, OutT>> output = ctx.getOutput(transform);
+    final MessageStream<OpMessage<KV<K, InT>>> inputStream = ctx.getMessageStream(input);
+    // input will be OpMessage of Windowed<KV<K, Iterable<V>>>
+    final KvCoder<K, InT> inputCoder = (KvCoder<K, InT>) input.getCoder();
+    final Coder<WindowedValue<KV<K, InT>>> elementCoder = SamzaCoders.of(input);
+
+    final MessageStream<OpMessage<KV<K, InT>>> outputStream =
+        doTranslate(
+            inputStream,
+            inputCoder.getKeyCoder(),
+            elementCoder,
+            "rshfl-" + ctx.getTransformId(),
+            ctx.getPipelineOptions().getMaxSourceParallelism() > 1);
+
+    ctx.registerMessageStream(output, outputStream);
+  }
+
+  @Override
+  public void translatePortable(
+      PipelineNode.PTransformNode transform,
+      QueryablePipeline pipeline,
+      PortableTranslationContext ctx) {
+
+    final String inputId = ctx.getInputId(transform);
+    final MessageStream<OpMessage<KV<K, InT>>> inputStream = ctx.getMessageStreamById(inputId);
+    final WindowedValue.WindowedValueCoder<KV<K, InT>> windowedInputCoder =
+        WindowUtils.instantiateWindowedCoder(inputId, pipeline.getComponents());
+    final String outputId = ctx.getOutputId(transform);
+
+    final MessageStream<OpMessage<KV<K, InT>>> outputStream =
+        doTranslate(
+            inputStream,
+            ((KvCoder<K, InT>) windowedInputCoder.getValueCoder()).getKeyCoder(),
+            windowedInputCoder,
+            "rshfl-" + ctx.getTransformId(),
+            ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1);
+
+    ctx.registerMessageStream(outputId, outputStream);
+  }
+
+  private static <K, InT> MessageStream<OpMessage<KV<K, InT>>> doTranslate(
+      MessageStream<OpMessage<KV<K, InT>>> inputStream,
+      Coder<K> keyCoder,
+      Coder<WindowedValue<KV<K, InT>>> valueCoder,
+      String partitionById, // will be used in the intermediate stream name
+      boolean needRepartition) {
+
+    return needRepartition
+        ? inputStream
+            .filter(op -> OpMessage.Type.ELEMENT == op.getType())
+            .partitionBy(
+                opMessage -> opMessage.getElement().getValue().getKey(),
+                OpMessage::getElement, // windowed value
+                KVSerde.of(SamzaCoders.toSerde(keyCoder), SamzaCoders.toSerde(valueCoder)),
+                partitionById)
+            // convert back to OpMessage
+            .map(kv -> OpMessage.ofElement(kv.getValue()))
+        : inputStream.filter(op -> OpMessage.Type.ELEMENT == op.getType());
+  }
+
+  /** Predicate to determine whether a URN is a Samza native transform. */
+  @AutoService(NativeTransforms.IsNativeTransform.class)
+  public static class IsSamzaNativeTransform implements NativeTransforms.IsNativeTransform {
+    @Override
+    public boolean test(RunnerApi.PTransform pTransform) {
+      return PTransformTranslation.RESHUFFLE_URN.equals(
+          PTransformTranslation.urnForTransformOrNull(pTransform));
+    }
+  }
+}
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java
index 3514b4fd3195..1dd779d90b79 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPipelineTranslator.java
@@ -33,8 +33,6 @@
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.values.PValue;
 import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /** This class knows all the translators from a primitive BEAM transform to a Samza operator. */
 @SuppressWarnings({
@@ -42,7 +40,6 @@
   "nullness" // TODO(https://issues.apache.org/jira/browse/BEAM-10402)
 })
 public class SamzaPipelineTranslator {
-  private static final Logger LOG = LoggerFactory.getLogger(SamzaPipelineTranslator.class);
 
   private static final Map<String, TransformTranslator<?>> TRANSLATORS = loadTranslators();
 
@@ -117,7 +114,7 @@ private interface TransformVisitorFn {
   }
 
   private static class SamzaPipelineVisitor extends Pipeline.PipelineVisitor.Defaults {
-    private TransformVisitorFn visitorFn;
+    private final TransformVisitorFn visitorFn;
 
     private SamzaPipelineVisitor(TransformVisitorFn visitorFn) {
       this.visitorFn = visitorFn;
@@ -177,6 +174,7 @@ public static class SamzaTranslators implements SamzaTranslatorRegistrar {
     public Map<String, TransformTranslator<?>> getTransformTranslators() {
       return ImmutableMap.<String, TransformTranslator<?>>builder()
           .put(PTransformTranslation.READ_TRANSFORM_URN, new ReadTranslator<>())
+          .put(PTransformTranslation.RESHUFFLE_URN, new ReshuffleTranslator<>())
           .put(PTransformTranslation.PAR_DO_TRANSFORM_URN, new ParDoBoundMultiTranslator<>())
           .put(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>())
           .put(PTransformTranslation.COMBINE_PER_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>())
diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java
index 9158cd4dfd91..0cf796cef76b 100644
--- a/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java
+++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/translation/SamzaPortablePipelineTranslator.java
@@ -103,6 +103,7 @@ public static class SamzaTranslators implements SamzaPortableTranslatorRegistrar
     @Override
     public Map<String, TransformTranslator<?>> getTransformTranslators() {
       return ImmutableMap.<String, TransformTranslator<?>>builder()
+          .put(PTransformTranslation.RESHUFFLE_URN, new ReshuffleTranslator<>())
           .put(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN, new GroupByKeyTranslator<>())
           .put(PTransformTranslation.FLATTEN_TRANSFORM_URN, new FlattenPCollectionsTranslator<>())
           .put(PTransformTranslation.IMPULSE_TRANSFORM_URN, new ImpulseTranslator())
diff --git a/sdks/go/examples/snippets/09triggers.go b/sdks/go/examples/snippets/09triggers.go
new file mode 100644
index 000000000000..37afe64aced9
--- /dev/null
+++ b/sdks/go/examples/snippets/09triggers.go
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package snippets contains code used in the Beam Programming Guide
+// as examples for the Apache Beam Go SDK. These snippets are compiled
+// and their tests run to ensure correctness. However, due to their
+// piecemeal pedagogical use, they may not be the best example of
+// production code.
+//
+// The Beam Programming Guide can be found at https://beam.apache.org/documentation/programming-guide/.
+package snippets
+
+import (
+	"github.com/apache/beam/sdks/v2/go/pkg/beam"
+	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/window"
+	"github.com/apache/beam/sdks/v2/go/pkg/beam/testing/teststream"
+	"time"
+)
+
+func generateStream(s beam.Scope) beam.PCollection {
+	con := teststream.NewConfig()
+	con.AddElements(1000, 1.0, 2.0, 3.0)
+	con.AdvanceWatermark(11000)
+	return teststream.Create(s, con)
+}
+
+func TriggerAfterEndOfWindow(s beam.Scope) {
+	pCollection := generateStream(s)
+	windowSize := 10 * time.Second
+	// [START after_window_trigger]
+	trigger := window.TriggerAfterEndOfWindow().EarlyFiring(window.TriggerAfterProcessingTime(60000)).LateFiring(window.TriggerRepeat(window.TriggerAfterCount(1)))
+	// [END after_window_trigger]
+	beam.WindowInto(s, window.NewFixedWindows(windowSize), pCollection, beam.Trigger(trigger), beam.PanesDiscard())
+}
+
+func TriggerAlways(s beam.Scope) {
+	pCollection := generateStream(s)
+	// [START always_trigger]
+	windowSize := 10 * time.Second
+	trigger := window.TriggerAlways()
+	beam.WindowInto(s, window.NewFixedWindows(windowSize), pCollection, beam.Trigger(trigger), beam.PanesDiscard())
+	// [END always_trigger]
+}
diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go
index 08d9cb57f4e4..4d54db9a2d01 100644
--- a/sdks/go/examples/wordcount/wordcount.go
+++ b/sdks/go/examples/wordcount/wordcount.go
@@ -110,16 +110,16 @@ func init() {
 }
 
 var (
-	wordRE            = regexp.MustCompile(`[a-zA-Z]+('[a-z])?`)
-	empty             = beam.NewCounter("extract", "emptyLines")
-	small_word_length = flag.Int("small_word_length", 9, "small_word_length")
-	small_words       = beam.NewCounter("extract", "small_words")
-	lineLen           = beam.NewDistribution("extract", "lineLenDistro")
+	wordRE          = regexp.MustCompile(`[a-zA-Z]+('[a-z])?`)
+	empty           = beam.NewCounter("extract", "emptyLines")
+	smallWordLength = flag.Int("small_word_length", 9, "length of small words (default: 9)")
+	smallWords      = beam.NewCounter("extract", "smallWords")
+	lineLen         = beam.NewDistribution("extract", "lineLenDistro")
 )
 
 // extractFn is a DoFn that emits the words in a given line and keeps a count for small words.
 type extractFn struct {
-	SmallWordLength int `json:"min_length"`
+	SmallWordLength int `json:"smallWordLength"`
 }
 
 func (f *extractFn) ProcessElement(ctx context.Context, line string, emit func(string)) {
@@ -131,7 +131,7 @@ func (f *extractFn) ProcessElement(ctx context.Context, line string, emit func(s
 		// increment the counter for small words if length of words is
 		// less than small_word_length
 		if len(word) < f.SmallWordLength {
-			small_words.Inc(ctx, 1)
+			smallWords.Inc(ctx, 1)
 		}
 		emit(word)
 	}
@@ -160,7 +160,7 @@ func CountWords(s beam.Scope, lines beam.PCollection) beam.PCollection {
 	s = s.Scope("CountWords")
 
 	// Convert lines of text into individual words.
-	col := beam.ParDo(s, &extractFn{SmallWordLength: *small_word_length}, lines)
+	col := beam.ParDo(s, &extractFn{SmallWordLength: *smallWordLength}, lines)
 
 	// Count the number of times each word occurs.
 	return stats.Count(s, col)
diff --git a/sdks/go/pkg/beam/core/metrics/metrics.go b/sdks/go/pkg/beam/core/metrics/metrics.go
index 3633e0c7cb49..8d0d2a8b2879 100644
--- a/sdks/go/pkg/beam/core/metrics/metrics.go
+++ b/sdks/go/pkg/beam/core/metrics/metrics.go
@@ -643,9 +643,9 @@ func MergeGauges(
 	return res
 }
 
-// MetricsExtractor extracts the metrics.Results from Store using ctx.
+// ResultsExtractor extracts the metrics.Results from Store using ctx.
 // This is same as what metrics.dumperExtractor and metrics.dumpTo would do together.
-func MetricsExtractor(ctx context.Context) Results {
+func ResultsExtractor(ctx context.Context) Results {
 	store := GetStore(ctx)
 	m := make(map[Labels]interface{})
 	e := &Extractor{
diff --git a/sdks/go/pkg/beam/core/runtime/exec/data.go b/sdks/go/pkg/beam/core/runtime/exec/data.go
index 2b00a37533f8..f891c9c34d57 100644
--- a/sdks/go/pkg/beam/core/runtime/exec/data.go
+++ b/sdks/go/pkg/beam/core/runtime/exec/data.go
@@ -19,6 +19,8 @@ import (
 	"context"
 	"fmt"
 	"io"
+
+	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/harness/statecache"
 )
 
 // Port represents the connection port of external operations.
@@ -59,6 +61,8 @@ type StateReader interface {
 	OpenSideInput(ctx context.Context, id StreamID, sideInputID string, key, w []byte) (io.ReadCloser, error)
 	// OpenIterable opens a byte stream for reading unwindowed iterables from the runner.
 	OpenIterable(ctx context.Context, id StreamID, key []byte) (io.ReadCloser, error)
+	// GetSideInputCache returns the SideInputCache being used at the harness level.
+	GetSideInputCache() *statecache.SideInputCache
 }
 
 // TODO(herohde) 7/20/2018: user state management
diff --git a/sdks/go/pkg/beam/core/runtime/harness/harness.go b/sdks/go/pkg/beam/core/runtime/harness/harness.go
index a46f2c31483e..09c1847a0e71 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/harness.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/harness.go
@@ -318,7 +318,7 @@ func (c *control) handleInstruction(ctx context.Context, req *fnpb.InstructionRe
 		}
 
 		data := NewScopedDataManager(c.data, instID)
-		state := NewScopedStateReader(c.state, instID)
+		state := NewScopedStateReaderWithCache(c.state, instID, c.cache)
 		err = plan.Execute(ctx, string(instID), exec.DataContext{Data: data, State: state})
 		data.Close()
 		state.Close()
diff --git a/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache.go b/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache.go
index 5496d8b81252..99a093d62bf2 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache.go
@@ -23,13 +23,25 @@ package statecache
 import (
 	"sync"
 
-	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec"
 	"github.com/apache/beam/sdks/v2/go/pkg/beam/internal/errors"
 	fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1"
 )
 
 type token string
 
+// ReusableInput is a resettable value, notably used to unwind iterators cheaply
+// and cache materialized side input across invocations.
+//
+// Redefined from exec's input.go to avoid a cyclical dependency.
+type ReusableInput interface {
+	// Init initializes the value before use.
+	Init() error
+	// Value returns the side input value.
+	Value() interface{}
+	// Reset resets the value after use.
+	Reset() error
+}
+
 // SideInputCache stores a cache of reusable inputs for the purposes of
 // eliminating redundant calls to the runner during execution of ParDos
 // using side inputs.
@@ -44,7 +56,7 @@ type token string
 type SideInputCache struct {
 	capacity    int
 	mu          sync.Mutex
-	cache       map[token]exec.ReusableInput
+	cache       map[token]ReusableInput
 	idsToTokens map[string]token
 	validTokens map[token]int8 // Maps tokens to active bundle counts
 	metrics     CacheMetrics
@@ -66,7 +78,7 @@ func (c *SideInputCache) Init(cap int) error {
 	}
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	c.cache = make(map[token]exec.ReusableInput, cap)
+	c.cache = make(map[token]ReusableInput, cap)
 	c.idsToTokens = make(map[string]token)
 	c.validTokens = make(map[token]int8)
 	c.capacity = cap
@@ -148,7 +160,7 @@ func (c *SideInputCache) makeAndValidateToken(transformID, sideInputID string) (
 // input has been cached. A query having a bad token (e.g. one that doesn't make a known
 // token or one that makes a known but currently invalid token) is treated the same as a
 // cache miss.
-func (c *SideInputCache) QueryCache(transformID, sideInputID string) exec.ReusableInput {
+func (c *SideInputCache) QueryCache(transformID, sideInputID string) ReusableInput {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	tok, ok := c.makeAndValidateToken(transformID, sideInputID)
@@ -170,7 +182,7 @@ func (c *SideInputCache) QueryCache(transformID, sideInputID string) exec.Reusab
 // with its corresponding transform ID and side input ID. If the IDs do not pair with a known, valid token
 // then we silently do not cache the input, as this is an indication that the runner is treating that input
 // as uncacheable.
-func (c *SideInputCache) SetCache(transformID, sideInputID string, input exec.ReusableInput) {
+func (c *SideInputCache) SetCache(transformID, sideInputID string, input ReusableInput) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 	tok, ok := c.makeAndValidateToken(transformID, sideInputID)
diff --git a/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache_test.go b/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache_test.go
index b9970c398154..18f2f38ab497 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache_test.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/statecache/statecache_test.go
@@ -18,7 +18,6 @@ package statecache
 import (
 	"testing"
 
-	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec"
 	fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1"
 )
 
@@ -30,7 +29,7 @@ type TestReusableInput struct {
 	value       interface{}
 }
 
-func makeTestReusableInput(transformID, sideInputID string, value interface{}) exec.ReusableInput {
+func makeTestReusableInput(transformID, sideInputID string, value interface{}) ReusableInput {
 	return &TestReusableInput{transformID: transformID, sideInputID: sideInputID, value: value}
 }
 
diff --git a/sdks/go/pkg/beam/core/runtime/harness/statemgr.go b/sdks/go/pkg/beam/core/runtime/harness/statemgr.go
index 09daa1185dc4..31d75b656f99 100644
--- a/sdks/go/pkg/beam/core/runtime/harness/statemgr.go
+++ b/sdks/go/pkg/beam/core/runtime/harness/statemgr.go
@@ -24,6 +24,7 @@ import (
 	"time"
 
 	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec"
+	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/harness/statecache"
 	"github.com/apache/beam/sdks/v2/go/pkg/beam/internal/errors"
 	"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
 	fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1"
@@ -39,11 +40,18 @@ type ScopedStateReader struct {
 	opened []io.Closer // track open readers to force close all
 	closed bool
 	mu     sync.Mutex
+
+	cache *statecache.SideInputCache
 }
 
 // NewScopedStateReader returns a ScopedStateReader for the given instruction.
 func NewScopedStateReader(mgr *StateChannelManager, instID instructionID) *ScopedStateReader {
-	return &ScopedStateReader{mgr: mgr, instID: instID}
+	return &ScopedStateReader{mgr: mgr, instID: instID, cache: nil}
+}
+
+// NewScopedStateReaderWithCache returns a ScopedState reader for the given instruction with a pointer to a SideInputCache.
+func NewScopedStateReaderWithCache(mgr *StateChannelManager, instID instructionID, cache *statecache.SideInputCache) *ScopedStateReader {
+	return &ScopedStateReader{mgr: mgr, instID: instID, cache: cache}
 }
 
 // OpenSideInput opens a byte stream for reading iterable side input.
@@ -60,6 +68,11 @@ func (s *ScopedStateReader) OpenIterable(ctx context.Context, id exec.StreamID,
 	})
 }
 
+// GetSideInputCache returns a pointer to the SideInputCache being used by the SDK harness.
+func (s *ScopedStateReader) GetSideInputCache() *statecache.SideInputCache {
+	return s.cache
+}
+
 func (s *ScopedStateReader) openReader(ctx context.Context, id exec.StreamID, readerFn func(*StateChannel) *stateKeyReader) (*stateKeyReader, error) {
 	ch, err := s.open(ctx, id.Port)
 	if err != nil {
diff --git a/sdks/go/pkg/beam/runners/direct/direct.go b/sdks/go/pkg/beam/runners/direct/direct.go
index 172915a5bff3..399a74c5ac09 100644
--- a/sdks/go/pkg/beam/runners/direct/direct.go
+++ b/sdks/go/pkg/beam/runners/direct/direct.go
@@ -84,7 +84,7 @@ type directPipelineResult struct {
 }
 
 func newDirectPipelineResult(ctx context.Context) (*directPipelineResult, error) {
-	metrics := metrics.MetricsExtractor(ctx)
+	metrics := metrics.ResultsExtractor(ctx)
 	return &directPipelineResult{metrics: &metrics}, nil
 }
 
diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh
index 9c9c16ffd58c..6c34c30d2069 100755
--- a/sdks/go/test/run_validatesrunner_tests.sh
+++ b/sdks/go/test/run_validatesrunner_tests.sh
@@ -70,7 +70,8 @@
 #        jar from the appropriate gradle module, which may not succeed.
 
 set -e
-set -v
+trap '! [[ "$BASH_COMMAND" =~ ^(echo|read|if|ARGS|shift|SOCKET_SCRIPT|\[\[) ]] && \
+cmd=`eval echo "$BASH_COMMAND" 2>/dev/null` && echo "\$ $cmd"' DEBUG
 
 # Default test targets.
 TESTS="./test/integration/... ./test/regression"
@@ -395,7 +396,7 @@ ARGS="$ARGS $PIPELINE_OPTS"
 
 cd sdks/go
 echo ">>> RUNNING $RUNNER integration tests with pipeline options: $ARGS"
-go test -v $TESTS $ARGS \
+go test -v $TESTS $ARGS 1>&2 \
     || TEST_EXIT_CODE=$? # don't fail fast here; clean up environment before exiting
 cd ../..
 
diff --git a/sdks/java/container/license_scripts/dep_urls_java.yaml b/sdks/java/container/license_scripts/dep_urls_java.yaml
index 14dbb3e74188..fd4126aee648 100644
--- a/sdks/java/container/license_scripts/dep_urls_java.yaml
+++ b/sdks/java/container/license_scripts/dep_urls_java.yaml
@@ -56,3 +56,7 @@ jackson-bom:
   '2.12.4':
     license: "https://raw.githubusercontent.com/FasterXML/jackson-bom/master/LICENSE"
     type: "Apache License 2.0"
+junit-dep:
+  '4.11':
+    license: "https://opensource.org/licenses/cpl1.0.txt"
+    type: "Common Public License Version 1.0"
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java
index b2367837cc45..8d43fdddf3fb 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/AvroCoder.java
@@ -117,9 +117,19 @@ public class AvroCoder<T> extends CustomCoder<T> {
    * @param <T> the element type
    */
   public static <T> AvroCoder<T> of(TypeDescriptor<T> type) {
+    return of(type, true);
+  }
+
+  /**
+   * Returns an {@code AvroCoder} instance for the provided element type, respecting whether to use
+   * Avro's Reflect* or Specific* suite for encoding and decoding.
+   *
+   * @param <T> the element type
+   */
+  public static <T> AvroCoder<T> of(TypeDescriptor<T> type, boolean useReflectApi) {
     @SuppressWarnings("unchecked")
     Class<T> clazz = (Class<T>) type.getRawType();
-    return of(clazz);
+    return of(clazz, useReflectApi);
   }
 
   /**
@@ -128,7 +138,7 @@ public static <T> AvroCoder<T> of(TypeDescriptor<T> type) {
    * @param <T> the element type
    */
   public static <T> AvroCoder<T> of(Class<T> clazz) {
-    return of(clazz, false);
+    return of(clazz, true);
   }
 
   /**
@@ -140,8 +150,8 @@ public static AvroGenericCoder of(Schema schema) {
   }
 
   /**
-   * Returns an {@code AvroCoder} instance for the given class using Avro's Reflection API for
-   * encoding and decoding.
+   * Returns an {@code AvroCoder} instance for the given class, respecting whether to use Avro's
+   * Reflect* or Specific* suite for encoding and decoding.
    *
    * @param <T> the element type
    */
@@ -158,12 +168,12 @@ public static <T> AvroCoder<T> of(Class<T> type, boolean useReflectApi) {
    * @param <T> the element type
    */
   public static <T> AvroCoder<T> of(Class<T> type, Schema schema) {
-    return of(type, schema, false);
+    return of(type, schema, true);
   }
 
   /**
-   * Returns an {@code AvroCoder} instance for the given class and schema using Avro's Reflection
-   * API for encoding and decoding.
+   * Returns an {@code AvroCoder} instance for the given class and schema, respecting whether to use
+   * Avro's Reflect* or Specific* suite for encoding and decoding.
    *
    * @param <T> the element type
    */
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/AvroCoderTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/AvroCoderTest.java
index d7886c30b661..9443aad07743 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/AvroCoderTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/AvroCoderTest.java
@@ -323,7 +323,8 @@ public void testPojoEncoding() throws Exception {
 
   @Test
   public void testSpecificRecordEncoding() throws Exception {
-    AvroCoder<TestAvro> coder = AvroCoder.of(TestAvro.class, AVRO_SPECIFIC_RECORD.getSchema());
+    AvroCoder<TestAvro> coder =
+        AvroCoder.of(TestAvro.class, AVRO_SPECIFIC_RECORD.getSchema(), false);
 
     assertTrue(SpecificRecord.class.isAssignableFrom(coder.getType()));
     CoderProperties.coderDecodeEncodeEqual(coder, AVRO_SPECIFIC_RECORD);
@@ -415,8 +416,8 @@ public void testAvroCoderIsSerializable() throws Exception {
   }
 
   @Test
-  public void testAvroReflectCoderIsSerializable() throws Exception {
-    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class, true);
+  public void testAvroSpecificCoderIsSerializable() throws Exception {
+    AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class, false);
 
     // Check that the coder is serializable using the regular JSON approach.
     SerializableUtils.ensureSerializable(coder);
diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
index c4356959e727..543d32283a64 100644
--- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
+++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRel.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.extensions.sql.impl.rel;
 
+import static org.apache.beam.sdk.schemas.Schema.Field;
 import static org.apache.beam.sdk.schemas.Schema.FieldType;
 import static org.apache.beam.vendor.calcite.v1_26_0.com.google.common.base.Preconditions.checkArgument;
 
@@ -40,6 +41,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.TimeZone;
+import java.util.TreeSet;
 import java.util.stream.Collectors;
 import org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions;
 import org.apache.beam.sdk.extensions.sql.impl.JavaUdfLoader;
@@ -48,6 +50,7 @@
 import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
 import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.CharType;
 import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils.TimeWithLocalTzType;
+import org.apache.beam.sdk.schemas.FieldAccessDescriptor;
 import org.apache.beam.sdk.schemas.Schema;
 import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
 import org.apache.beam.sdk.transforms.DoFn;
@@ -157,15 +160,11 @@ public PCollection<Row> expand(PCollectionList<Row> pinput) {
       final RelOptPredicateList predicates = mq.getPulledUpPredicates(getInput());
       final RexSimplify simplify = new RexSimplify(rexBuilder, predicates, RexUtil.EXECUTOR);
       final RexProgram program = getProgram().normalize(rexBuilder, simplify);
+      final InputGetterImpl inputGetter = new InputGetterImpl(rowParam, upstream.getSchema());
 
       Expression condition =
           RexToLixTranslator.translateCondition(
-              program,
-              typeFactory,
-              builder,
-              new InputGetterImpl(rowParam, upstream.getSchema()),
-              null,
-              conformance);
+              program, typeFactory, builder, inputGetter, null, conformance);
 
       List<Expression> expressions =
           RexToLixTranslator.translateProjects(
@@ -175,7 +174,7 @@ public PCollection<Row> expand(PCollectionList<Row> pinput) {
               builder,
               physType,
               DataContext.ROOT,
-              new InputGetterImpl(rowParam, upstream.getSchema()),
+              inputGetter,
               null);
 
       builder.add(
@@ -192,10 +191,8 @@ public PCollection<Row> expand(PCollectionList<Row> pinput) {
               builder.toBlock().toString(),
               outputSchema,
               options.getVerifyRowValues(),
-              getJarPaths(program));
-
-      // validate generated code
-      calcFn.compile();
+              getJarPaths(program),
+              inputGetter.getFieldAccess());
 
       return upstream.apply(ParDo.of(calcFn)).setRowSchema(outputSchema);
     }
@@ -207,20 +204,29 @@ private static class CalcFn extends DoFn<Row, Row> {
     private final Schema outputSchema;
     private final boolean verifyRowValues;
     private final List<String> jarPaths;
+
+    @FieldAccess("row")
+    private final FieldAccessDescriptor fieldAccess;
+
     private transient @Nullable ScriptEvaluator se = null;
 
     public CalcFn(
         String processElementBlock,
         Schema outputSchema,
         boolean verifyRowValues,
-        List<String> jarPaths) {
+        List<String> jarPaths,
+        FieldAccessDescriptor fieldAccess) {
       this.processElementBlock = processElementBlock;
       this.outputSchema = outputSchema;
       this.verifyRowValues = verifyRowValues;
       this.jarPaths = jarPaths;
+      this.fieldAccess = fieldAccess;
+
+      // validate generated code
+      compile(processElementBlock, jarPaths);
     }
 
-    ScriptEvaluator compile() {
+    private static ScriptEvaluator compile(String processElementBlock, List<String> jarPaths) {
       ScriptEvaluator se = new ScriptEvaluator();
       if (!jarPaths.isEmpty()) {
         try {
@@ -246,22 +252,22 @@ ScriptEvaluator compile() {
 
     @Setup
     public void setup() {
-      this.se = compile();
+      this.se = compile(processElementBlock, jarPaths);
     }
 
     @ProcessElement
-    public void processElement(ProcessContext c) {
+    public void processElement(@FieldAccess("row") Row row, OutputReceiver<Row> r) {
       assert se != null;
       final Object[] v;
       try {
-        v = (Object[]) se.evaluate(new Object[] {c.element(), CONTEXT_INSTANCE});
+        v = (Object[]) se.evaluate(new Object[] {row, CONTEXT_INSTANCE});
       } catch (InvocationTargetException e) {
         throw new RuntimeException(
             "CalcFn failed to evaluate: " + processElementBlock, e.getCause());
       }
       if (v != null) {
-        Row row = toBeamRow(Arrays.asList(v), outputSchema, verifyRowValues);
-        c.output(row);
+        final Row output = toBeamRow(Arrays.asList(v), outputSchema, verifyRowValues);
+        r.output(output);
       }
     }
   }
@@ -411,14 +417,21 @@ private static class InputGetterImpl implements RexToLixTranslator.InputGetter {
 
     private final Expression input;
     private final Schema inputSchema;
+    private final Set<Integer> referencedColumns;
 
     private InputGetterImpl(Expression input, Schema inputSchema) {
       this.input = input;
       this.inputSchema = inputSchema;
+      this.referencedColumns = new TreeSet<>();
+    }
+
+    FieldAccessDescriptor getFieldAccess() {
+      return FieldAccessDescriptor.withFieldIds(this.referencedColumns);
     }
 
     @Override
     public Expression field(BlockBuilder list, int index, Type storageType) {
+      this.referencedColumns.add(index);
       return getBeamField(list, index, input, inputSchema);
     }
 
@@ -431,64 +444,66 @@ private static Expression getBeamField(
 
       final Expression expression = list.append(list.newName("current"), input);
 
-      FieldType fieldType = schema.getField(index).getType();
-      Expression value;
+      final Field field = schema.getField(index);
+      final FieldType fieldType = field.getType();
+      final Expression fieldName = Expressions.constant(field.getName());
+      final Expression value;
       switch (fieldType.getTypeName()) {
         case BYTE:
-          value = Expressions.call(expression, "getByte", Expressions.constant(index));
+          value = Expressions.call(expression, "getByte", fieldName);
           break;
         case INT16:
-          value = Expressions.call(expression, "getInt16", Expressions.constant(index));
+          value = Expressions.call(expression, "getInt16", fieldName);
           break;
         case INT32:
-          value = Expressions.call(expression, "getInt32", Expressions.constant(index));
+          value = Expressions.call(expression, "getInt32", fieldName);
           break;
         case INT64:
-          value = Expressions.call(expression, "getInt64", Expressions.constant(index));
+          value = Expressions.call(expression, "getInt64", fieldName);
           break;
         case DECIMAL:
-          value = Expressions.call(expression, "getDecimal", Expressions.constant(index));
+          value = Expressions.call(expression, "getDecimal", fieldName);
           break;
         case FLOAT:
-          value = Expressions.call(expression, "getFloat", Expressions.constant(index));
+          value = Expressions.call(expression, "getFloat", fieldName);
           break;
         case DOUBLE:
-          value = Expressions.call(expression, "getDouble", Expressions.constant(index));
+          value = Expressions.call(expression, "getDouble", fieldName);
           break;
         case STRING:
-          value = Expressions.call(expression, "getString", Expressions.constant(index));
+          value = Expressions.call(expression, "getString", fieldName);
           break;
         case DATETIME:
-          value = Expressions.call(expression, "getDateTime", Expressions.constant(index));
+          value = Expressions.call(expression, "getDateTime", fieldName);
           break;
         case BOOLEAN:
-          value = Expressions.call(expression, "getBoolean", Expressions.constant(index));
+          value = Expressions.call(expression, "getBoolean", fieldName);
           break;
         case BYTES:
-          value = Expressions.call(expression, "getBytes", Expressions.constant(index));
+          value = Expressions.call(expression, "getBytes", fieldName);
           break;
         case ARRAY:
-          value = Expressions.call(expression, "getArray", Expressions.constant(index));
+          value = Expressions.call(expression, "getArray", fieldName);
           break;
         case MAP:
-          value = Expressions.call(expression, "getMap", Expressions.constant(index));
+          value = Expressions.call(expression, "getMap", fieldName);
           break;
         case ROW:
-          value = Expressions.call(expression, "getRow", Expressions.constant(index));
+          value = Expressions.call(expression, "getRow", fieldName);
           break;
         case LOGICAL_TYPE:
           String identifier = fieldType.getLogicalType().getIdentifier();
           if (CharType.IDENTIFIER.equals(identifier)) {
-            value = Expressions.call(expression, "getString", Expressions.constant(index));
+            value = Expressions.call(expression, "getString", fieldName);
           } else if (TimeWithLocalTzType.IDENTIFIER.equals(identifier)) {
-            value = Expressions.call(expression, "getDateTime", Expressions.constant(index));
+            value = Expressions.call(expression, "getDateTime", fieldName);
           } else if (SqlTypes.DATE.getIdentifier().equals(identifier)) {
             value =
                 Expressions.convert_(
                     Expressions.call(
                         expression,
                         "getLogicalTypeValue",
-                        Expressions.constant(index),
+                        fieldName,
                         Expressions.constant(LocalDate.class)),
                     LocalDate.class);
           } else if (SqlTypes.TIME.getIdentifier().equals(identifier)) {
@@ -497,7 +512,7 @@ private static Expression getBeamField(
                     Expressions.call(
                         expression,
                         "getLogicalTypeValue",
-                        Expressions.constant(index),
+                        fieldName,
                         Expressions.constant(LocalTime.class)),
                     LocalTime.class);
           } else if (SqlTypes.DATETIME.getIdentifier().equals(identifier)) {
@@ -506,7 +521,7 @@ private static Expression getBeamField(
                     Expressions.call(
                         expression,
                         "getLogicalTypeValue",
-                        Expressions.constant(index),
+                        fieldName,
                         Expressions.constant(LocalDateTime.class)),
                     LocalDateTime.class);
           } else {
diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRelTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRelTest.java
index 27baad3b138d..656a5144eeb8 100644
--- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRelTest.java
+++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamCalcRelTest.java
@@ -18,20 +18,34 @@
 package org.apache.beam.sdk.extensions.sql.impl.rel;
 
 import java.math.BigDecimal;
+import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.extensions.sql.impl.BeamTableStatistics;
 import org.apache.beam.sdk.extensions.sql.impl.planner.NodeStats;
 import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestBoundedTable;
 import org.apache.beam.sdk.extensions.sql.meta.provider.test.TestUnboundedTable;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.schemas.FieldAccessDescriptor;
 import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.Row;
 import org.apache.beam.vendor.calcite.v1_26_0.org.apache.calcite.rel.RelNode;
 import org.joda.time.DateTime;
 import org.joda.time.Duration;
 import org.junit.Assert;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 
 /** Tests related to {@code BeamCalcRel}. */
 public class BeamCalcRelTest extends BaseRelTest {
+
+  @Rule public final TestPipeline pipeline = TestPipeline.create();
+
   private static final DateTime FIRST_DATE = new DateTime(1);
   private static final DateTime SECOND_DATE = new DateTime(1 + 3600 * 1000);
 
@@ -160,4 +174,74 @@ public void testNodeStatsNumberOfConditions() {
     Assert.assertTrue(doubleEqualEstimate.getRowCount() < equalEstimate.getRowCount());
     Assert.assertTrue(doubleEqualEstimate.getWindow() < equalEstimate.getWindow());
   }
+
+  private static class NodeGetter extends Pipeline.PipelineVisitor.Defaults {
+
+    private final PValue target;
+    private TransformHierarchy.Node producer;
+
+    private NodeGetter(PValue target) {
+      this.target = target;
+    }
+
+    @Override
+    public void visitValue(PValue value, TransformHierarchy.Node producer) {
+      if (value == target) {
+        assert this.producer == null;
+        this.producer = producer;
+      }
+    }
+  }
+
+  @Test
+  public void testSingleFieldAccess() throws IllegalAccessException {
+    String sql = "SELECT order_id FROM ORDER_DETAILS_BOUNDED";
+
+    PCollection<Row> rows = compilePipeline(sql, pipeline);
+
+    final NodeGetter nodeGetter = new NodeGetter(rows);
+    pipeline.traverseTopologically(nodeGetter);
+
+    ParDo.MultiOutput<Row, Row> pardo =
+        (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
+    DoFnSignature sig = DoFnSignatures.getSignature(pardo.getFn().getClass());
+
+    Assert.assertEquals(1, sig.fieldAccessDeclarations().size());
+    DoFnSignature.FieldAccessDeclaration dec =
+        sig.fieldAccessDeclarations().values().iterator().next();
+    FieldAccessDescriptor fieldAccess = (FieldAccessDescriptor) dec.field().get(pardo.getFn());
+
+    Assert.assertTrue(fieldAccess.referencesSingleField());
+
+    fieldAccess =
+        fieldAccess.resolve(nodeGetter.producer.getInputs().values().iterator().next().getSchema());
+    Assert.assertEquals("order_id", fieldAccess.fieldNamesAccessed().iterator().next());
+
+    pipeline.run().waitUntilFinish();
+  }
+
+  @Test
+  public void testNoFieldAccess() throws IllegalAccessException {
+    String sql = "SELECT 1 FROM ORDER_DETAILS_BOUNDED";
+
+    PCollection<Row> rows = compilePipeline(sql, pipeline);
+
+    final NodeGetter nodeGetter = new NodeGetter(rows);
+    pipeline.traverseTopologically(nodeGetter);
+
+    ParDo.MultiOutput<Row, Row> pardo =
+        (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
+    DoFnSignature sig = DoFnSignatures.getSignature(pardo.getFn().getClass());
+
+    Assert.assertEquals(1, sig.fieldAccessDeclarations().size());
+    DoFnSignature.FieldAccessDeclaration dec =
+        sig.fieldAccessDeclarations().values().iterator().next();
+    FieldAccessDescriptor fieldAccess = (FieldAccessDescriptor) dec.field().get(pardo.getFn());
+
+    Assert.assertFalse(fieldAccess.getAllFields());
+    Assert.assertTrue(fieldAccess.getFieldsAccessed().isEmpty());
+    Assert.assertTrue(fieldAccess.getNestedFieldsAccessed().isEmpty());
+
+    pipeline.run().waitUntilFinish();
+  }
 }
diff --git a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
index 38c604e19e27..1d93ed3a7157 100644
--- a/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
+++ b/sdks/java/extensions/sql/zetasql/src/main/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRel.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.extensions.sql.zetasql;
 
+import static org.apache.beam.sdk.schemas.Schema.Field;
 import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
 
 import com.google.auto.value.AutoValue;
@@ -39,6 +40,7 @@
 import org.apache.beam.sdk.extensions.sql.impl.utils.CalciteUtils;
 import org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamBigQuerySqlDialect;
 import org.apache.beam.sdk.extensions.sql.meta.provider.bigquery.BeamSqlUnparseContext;
+import org.apache.beam.sdk.schemas.FieldAccessDescriptor;
 import org.apache.beam.sdk.schemas.Schema;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
@@ -142,9 +144,6 @@ public PCollection<Row> expand(PCollectionList<Row> pinput) {
               options.getZetaSqlDefaultTimezone(),
               options.getVerifyRowValues());
 
-      // validate prepared expressions
-      calcFn.setup();
-
       return upstream.apply(ParDo.of(calcFn)).setRowSchema(outputSchema);
     }
   }
@@ -171,7 +170,11 @@ private static class CalcFn extends DoFn<Row, Row> {
     private final Schema outputSchema;
     private final String defaultTimezone;
     private final boolean verifyRowValues;
-    private transient List<Integer> referencedColumns = ImmutableList.of();
+    private final List<Integer> referencedColumns;
+
+    @FieldAccess("row")
+    private final FieldAccessDescriptor fieldAccess;
+
     private transient Map<BoundedWindow, Queue<TimestampedFuture>> pending = new HashMap<>();
     private transient PreparedExpression exp;
     private transient PreparedExpression.@Nullable Stream stream;
@@ -190,10 +193,21 @@ private static class CalcFn extends DoFn<Row, Row> {
       this.outputSchema = outputSchema;
       this.defaultTimezone = defaultTimezone;
       this.verifyRowValues = verifyRowValues;
+
+      try (PreparedExpression exp =
+          prepareExpression(sql, nullParams, inputSchema, defaultTimezone)) {
+        ImmutableList.Builder<Integer> columns = new ImmutableList.Builder<>();
+        for (String c : exp.getReferencedColumns()) {
+          columns.add(Integer.parseInt(c.substring(1)));
+        }
+        this.referencedColumns = columns.build();
+        this.fieldAccess = FieldAccessDescriptor.withFieldIds(this.referencedColumns);
+      }
     }
 
     /** exp cannot be reused and is transient so needs to be reinitialized. */
-    private void prepareExpression() {
+    private static PreparedExpression prepareExpression(
+        String sql, Map<String, Value> nullParams, Schema inputSchema, String defaultTimezone) {
       AnalyzerOptions options =
           SqlAnalyzer.getAnalyzerOptions(QueryParameters.ofNamed(nullParams), defaultTimezone);
       for (int i = 0; i < inputSchema.getFieldCount(); i++) {
@@ -202,21 +216,15 @@ private void prepareExpression() {
             ZetaSqlBeamTranslationUtils.toZetaSqlType(inputSchema.getField(i).getType()));
       }
 
-      exp = new PreparedExpression(sql);
+      PreparedExpression exp = new PreparedExpression(sql);
       exp.prepare(options);
+      return exp;
     }
 
     @Setup
     public void setup() {
-      prepareExpression();
-
-      ImmutableList.Builder<Integer> columns = new ImmutableList.Builder<>();
-      for (String c : exp.getReferencedColumns()) {
-        columns.add(Integer.parseInt(c.substring(1)));
-      }
-      referencedColumns = columns.build();
-
-      stream = exp.stream();
+      this.exp = prepareExpression(sql, nullParams, inputSchema, defaultTimezone);
+      this.stream = exp.stream();
     }
 
     @StartBundle
@@ -231,14 +239,15 @@ public Duration getAllowedTimestampSkew() {
 
     @ProcessElement
     public void processElement(
-        @Element Row row, @Timestamp Instant t, BoundedWindow w, OutputReceiver<Row> r)
+        @FieldAccess("row") Row row, @Timestamp Instant t, BoundedWindow w, OutputReceiver<Row> r)
         throws InterruptedException {
       Map<String, Value> columns = new HashMap<>();
       for (int i : referencedColumns) {
+        final Field field = inputSchema.getField(i);
         columns.put(
             columnName(i),
             ZetaSqlBeamTranslationUtils.toZetaSqlValue(
-                row.getBaseValue(i, Object.class), inputSchema.getField(i).getType()));
+                row.getBaseValue(field.getName(), Object.class), field.getType()));
       }
 
       @NonNull
diff --git a/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRelTest.java b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRelTest.java
new file mode 100644
index 000000000000..352e83aeb78b
--- /dev/null
+++ b/sdks/java/extensions/sql/zetasql/src/test/java/org/apache/beam/sdk/extensions/sql/zetasql/BeamZetaSqlCalcRelTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.extensions.sql.zetasql;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.extensions.sql.impl.QueryPlanner.QueryParameters;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamRelNode;
+import org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.schemas.FieldAccessDescriptor;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.Row;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+/** Tests related to {@code BeamZetaSqlCalcRel}. */
+public class BeamZetaSqlCalcRelTest extends ZetaSqlTestBase {
+
+  private PCollection<Row> compile(String sql) {
+    ZetaSQLQueryPlanner zetaSQLQueryPlanner = new ZetaSQLQueryPlanner(config);
+    BeamRelNode beamRelNode = zetaSQLQueryPlanner.convertToBeamRel(sql, QueryParameters.ofNone());
+    return BeamSqlRelUtils.toPCollection(pipeline, beamRelNode);
+  }
+
+  @Rule public final TestPipeline pipeline = TestPipeline.create();
+
+  @Before
+  public void setUp() {
+    initialize();
+  }
+
+  private static class NodeGetter extends Pipeline.PipelineVisitor.Defaults {
+
+    private final PValue target;
+    private TransformHierarchy.Node producer;
+
+    private NodeGetter(PValue target) {
+      this.target = target;
+    }
+
+    @Override
+    public void visitValue(PValue value, TransformHierarchy.Node producer) {
+      if (value == target) {
+        assert this.producer == null;
+        this.producer = producer;
+      }
+    }
+  }
+
+  @Test
+  public void testSingleFieldAccess() throws IllegalAccessException {
+    String sql = "SELECT Key FROM KeyValue";
+
+    PCollection<Row> rows = compile(sql);
+
+    final NodeGetter nodeGetter = new NodeGetter(rows);
+    pipeline.traverseTopologically(nodeGetter);
+
+    ParDo.MultiOutput<Row, Row> pardo =
+        (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
+    DoFnSignature sig = DoFnSignatures.getSignature(pardo.getFn().getClass());
+
+    Assert.assertEquals(1, sig.fieldAccessDeclarations().size());
+    DoFnSignature.FieldAccessDeclaration dec =
+        sig.fieldAccessDeclarations().values().iterator().next();
+    FieldAccessDescriptor fieldAccess = (FieldAccessDescriptor) dec.field().get(pardo.getFn());
+
+    Assert.assertTrue(fieldAccess.referencesSingleField());
+
+    fieldAccess =
+        fieldAccess.resolve(nodeGetter.producer.getInputs().values().iterator().next().getSchema());
+    Assert.assertEquals("Key", fieldAccess.fieldNamesAccessed().iterator().next());
+
+    pipeline.run().waitUntilFinish();
+  }
+
+  @Test
+  public void testNoFieldAccess() throws IllegalAccessException {
+    String sql = "SELECT 1 FROM KeyValue";
+
+    PCollection<Row> rows = compile(sql);
+
+    final NodeGetter nodeGetter = new NodeGetter(rows);
+    pipeline.traverseTopologically(nodeGetter);
+
+    ParDo.MultiOutput<Row, Row> pardo =
+        (ParDo.MultiOutput<Row, Row>) nodeGetter.producer.getTransform();
+    DoFnSignature sig = DoFnSignatures.getSignature(pardo.getFn().getClass());
+
+    Assert.assertEquals(1, sig.fieldAccessDeclarations().size());
+    DoFnSignature.FieldAccessDeclaration dec =
+        sig.fieldAccessDeclarations().values().iterator().next();
+    FieldAccessDescriptor fieldAccess = (FieldAccessDescriptor) dec.field().get(pardo.getFn());
+
+    Assert.assertFalse(fieldAccess.getAllFields());
+    Assert.assertTrue(fieldAccess.getFieldsAccessed().isEmpty());
+    Assert.assertTrue(fieldAccess.getNestedFieldsAccessed().isEmpty());
+
+    pipeline.run().waitUntilFinish();
+  }
+}
diff --git a/sdks/java/fn-execution/build.gradle b/sdks/java/fn-execution/build.gradle
index 8cb5f0a3d399..2d495ee6eeab 100644
--- a/sdks/java/fn-execution/build.gradle
+++ b/sdks/java/fn-execution/build.gradle
@@ -34,8 +34,10 @@ dependencies {
   compile library.java.slf4j_api
   compile library.java.joda_time
   provided library.java.junit
+  testCompile project(path: ":sdks:java:core", configuration: "shadowTest")
   testCompile library.java.junit
   testCompile library.java.mockito_core
   testCompile library.java.commons_lang3
+  testCompile "com.github.stefanbirkner:system-rules:1.19.0"
   testRuntimeOnly library.java.slf4j_jdk14
 }
diff --git a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java b/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java
index 221ebe76dd9c..ad408177c737 100644
--- a/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java
+++ b/sdks/java/fn-execution/src/main/java/org/apache/beam/sdk/fn/JvmInitializers.java
@@ -20,6 +20,8 @@
 import org.apache.beam.sdk.harness.JvmInitializer;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.util.common.ReflectHelpers;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /** Helpers for executing {@link JvmInitializer} implementations. */
 public class JvmInitializers {
@@ -30,6 +32,8 @@ public class JvmInitializers {
    */
   public static void runOnStartup() {
     for (JvmInitializer initializer : ReflectHelpers.loadServicesOrdered(JvmInitializer.class)) {
+      // We write to standard out since logging has yet to be initialized.
+      System.out.format("Running JvmInitializer#onStartup for %s%n", initializer);
       initializer.onStartup();
     }
   }
@@ -42,7 +46,11 @@ public static void runOnStartup() {
    * @param options The pipeline options passed to the worker.
    */
   public static void runBeforeProcessing(PipelineOptions options) {
+    // We load the logger in the the method to minimize the amount of class loading that happens
+    // during class initialization.
+    Logger logger = LoggerFactory.getLogger(JvmInitializers.class);
     for (JvmInitializer initializer : ReflectHelpers.loadServicesOrdered(JvmInitializer.class)) {
+      logger.info("Running JvmInitializer#beforeProcessing for {}", initializer);
       initializer.beforeProcessing(options);
     }
   }
diff --git a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java b/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java
index a20b3a981c77..e1da00ae94dd 100644
--- a/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java
+++ b/sdks/java/fn-execution/src/test/java/org/apache/beam/sdk/fn/JvmInitializersTest.java
@@ -17,15 +17,20 @@
  */
 package org.apache.beam.sdk.fn;
 
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.StringContains.containsString;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import com.google.auto.service.AutoService;
 import org.apache.beam.sdk.harness.JvmInitializer;
 import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.testing.ExpectedLogs;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.contrib.java.lang.system.SystemOutRule;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -33,6 +38,9 @@
 @RunWith(JUnit4.class)
 public final class JvmInitializersTest {
 
+  @Rule public ExpectedLogs expectedLogs = ExpectedLogs.none(JvmInitializers.class);
+  @Rule public SystemOutRule systemOutRule = new SystemOutRule().enableLog();
+
   private static Boolean onStartupRan;
   private static Boolean beforeProcessingRan;
   private static PipelineOptions receivedOptions;
@@ -64,6 +72,7 @@ public void runOnStartup_runsInitializers() {
     JvmInitializers.runOnStartup();
 
     assertTrue(onStartupRan);
+    assertThat(systemOutRule.getLog(), containsString("Running JvmInitializer#onStartup"));
   }
 
   @Test
@@ -74,5 +83,6 @@ public void runBeforeProcessing_runsInitializersWithOptions() {
 
     assertTrue(beforeProcessingRan);
     assertEquals(options, receivedOptions);
+    expectedLogs.verifyInfo("Running JvmInitializer#beforeProcessing");
   }
 }
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java
index 90b5fed4ae85..1aa5d52b2a9d 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java
@@ -23,6 +23,7 @@
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
+import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi;
 import org.apache.beam.model.fnexecution.v1.BeamFnControlGrpc;
 import org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor;
@@ -102,18 +103,28 @@ private class InboundObserver implements StreamObserver<BeamFnApi.InstructionReq
     }
 
     @Override
-    public void onNext(BeamFnApi.InstructionRequest value) {
-      LOG.debug("Received InstructionRequest {}", value);
-      executor.execute(
-          () -> {
-            try {
-              BeamFnApi.InstructionResponse response = delegateOnInstructionRequestType(value);
-              sendInstructionResponse(response);
-            } catch (Error e) {
-              sendErrorResponse(e);
-              throw e;
-            }
-          });
+    public void onNext(BeamFnApi.InstructionRequest request) {
+      try {
+        BeamFnLoggingMDC.setInstructionId(request.getInstructionId());
+        LOG.debug("Received InstructionRequest {}", request);
+        executor.execute(
+            () -> {
+              try {
+                // Ensure that we set and clear the MDC since processing the request will occur
+                // in a separate thread.
+                BeamFnLoggingMDC.setInstructionId(request.getInstructionId());
+                BeamFnApi.InstructionResponse response = delegateOnInstructionRequestType(request);
+                sendInstructionResponse(response);
+              } catch (Error e) {
+                sendErrorResponse(e);
+                throw e;
+              } finally {
+                BeamFnLoggingMDC.setInstructionId(null);
+              }
+            });
+      } finally {
+        BeamFnLoggingMDC.setInstructionId(null);
+      }
     }
 
     @Override
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
index 9cceb721542c..74ddab549e96 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
@@ -48,7 +48,6 @@
 import org.apache.beam.fn.harness.data.PCollectionConsumerRegistry;
 import org.apache.beam.fn.harness.data.PTransformFunctionRegistry;
 import org.apache.beam.fn.harness.data.QueueingBeamFnDataClient;
-import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC;
 import org.apache.beam.fn.harness.state.BeamFnStateClient;
 import org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache;
 import org.apache.beam.fn.harness.state.CachingBeamFnStateClient;
@@ -335,7 +334,6 @@ public BeamFnApi.InstructionResponse.Builder processBundle(BeamFnApi.Instruction
               }
             });
     try {
-      BeamFnLoggingMDC.setInstructionId(request.getInstructionId());
       PTransformFunctionRegistry startFunctionRegistry = bundleProcessor.getStartFunctionRegistry();
       PTransformFunctionRegistry finishFunctionRegistry =
           bundleProcessor.getFinishFunctionRegistry();
@@ -390,8 +388,6 @@ public BeamFnApi.InstructionResponse.Builder processBundle(BeamFnApi.Instruction
       // Make sure we clean-up from the active set of bundle processors.
       bundleProcessorCache.discard(bundleProcessor);
       throw e;
-    } finally {
-      BeamFnLoggingMDC.setInstructionId(null);
     }
   }
 
@@ -852,13 +848,13 @@ public void close() throws Exception {
     @Override
     @SuppressWarnings("FutureReturnValueIgnored") // async arriveAndDeregister task doesn't need
     // monitoring.
-    public void handle(
-        StateRequest.Builder requestBuilder, CompletableFuture<StateResponse> response) {
+    public CompletableFuture<StateResponse> handle(StateRequest.Builder requestBuilder) {
       // Register each request with the phaser and arrive and deregister each time a request
       // completes.
+      CompletableFuture<StateResponse> response = beamFnStateClient.handle(requestBuilder);
       phaser.register();
       response.whenComplete((stateResponse, throwable) -> phaser.arriveAndDeregister());
-      beamFnStateClient.handle(requestBuilder, response);
+      return response;
     }
   }
 
@@ -879,7 +875,7 @@ public void close() throws Exception {
     }
 
     @Override
-    public void handle(Builder requestBuilder, CompletableFuture<StateResponse> response) {
+    public CompletableFuture<StateResponse> handle(Builder requestBuilder) {
       throw new IllegalStateException(
           String.format(
               "State API calls are unsupported because the "
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BagUserState.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BagUserState.java
index 777036ab4c82..76664ba59aab 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BagUserState.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BagUserState.java
@@ -21,7 +21,6 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.concurrent.CompletableFuture;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.StateAppendRequest;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.StateClearRequest;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.StateRequest;
@@ -115,6 +114,7 @@ public void clear() {
     newValues = new ArrayList<>();
   }
 
+  @SuppressWarnings("FutureReturnValueIgnored")
   public void asyncClose() throws Exception {
     checkState(
         !isClosed,
@@ -122,8 +122,7 @@ public void asyncClose() throws Exception {
         request.getStateKey());
     if (oldValues == null) {
       beamFnStateClient.handle(
-          request.toBuilder().setClear(StateClearRequest.getDefaultInstance()),
-          new CompletableFuture<>());
+          request.toBuilder().setClear(StateClearRequest.getDefaultInstance()));
     }
     if (!newValues.isEmpty()) {
       ByteString.Output out = ByteString.newOutput();
@@ -134,8 +133,7 @@ public void asyncClose() throws Exception {
       beamFnStateClient.handle(
           request
               .toBuilder()
-              .setAppend(StateAppendRequest.newBuilder().setData(out.toByteString())),
-          new CompletableFuture<>());
+              .setAppend(StateAppendRequest.newBuilder().setData(out.toByteString())));
     }
     isClosed = true;
   }
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateClient.java
index ffef28d922b7..4cf03c41983c 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateClient.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateClient.java
@@ -31,9 +31,6 @@ public interface BeamFnStateClient {
    * Consumes a state request populating a unique id returning a future to the response.
    *
    * @param requestBuilder A partially completed state request. The id will be populated the client.
-   * @param response A future containing a corresponding {@link StateResponse} for the supplied
-   *     request.
    */
-  void handle(
-      BeamFnApi.StateRequest.Builder requestBuilder, CompletableFuture<StateResponse> response);
+  CompletableFuture<StateResponse> handle(BeamFnApi.StateRequest.Builder requestBuilder);
 }
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCache.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCache.java
index 633db1d99b6c..629c1fdd7ca2 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCache.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCache.java
@@ -93,15 +93,16 @@ private GrpcStateClient(ApiServiceDescriptor apiServiceDescriptor) {
     }
 
     @Override
-    public void handle(
-        StateRequest.Builder requestBuilder, CompletableFuture<StateResponse> response) {
+    public CompletableFuture<StateResponse> handle(StateRequest.Builder requestBuilder) {
       requestBuilder.setId(idGenerator.getId());
       StateRequest request = requestBuilder.build();
+      CompletableFuture<StateResponse> response = new CompletableFuture<>();
       outstandingRequests.put(request.getId(), response);
 
       // If the server closes, gRPC will throw an error if onNext is called.
       LOG.debug("Sending StateRequest {}", request);
       outboundObserver.onNext(request);
+      return response;
     }
 
     private synchronized void closeAndCleanUp(RuntimeException cause) {
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClient.java
index 888d23139028..0f39b456afde 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClient.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClient.java
@@ -76,16 +76,14 @@ public CachingBeamFnStateClient(
    */
   @Override
   @SuppressWarnings("FutureReturnValueIgnored")
-  public void handle(
-      StateRequest.Builder requestBuilder, CompletableFuture<StateResponse> response) {
+  public CompletableFuture<StateResponse> handle(StateRequest.Builder requestBuilder) {
 
     StateKey stateKey = requestBuilder.getStateKey();
     ByteString cacheToken = getCacheToken(stateKey);
 
     // If state is not cacheable proceed as normal.
     if (ByteString.EMPTY.equals(cacheToken)) {
-      beamFnStateClient.handle(requestBuilder, response);
-      return;
+      return beamFnStateClient.handle(requestBuilder);
     }
 
     switch (requestBuilder.getRequestCase()) {
@@ -98,37 +96,38 @@ public void handle(
 
         // If data is not cached, add callback to add response to cache on completion.
         // Otherwise, complete the response with the cached data.
+        CompletableFuture<StateResponse> response;
         if (cachedPage == null) {
+          response = beamFnStateClient.handle(requestBuilder);
           response.thenAccept(
               stateResponse ->
                   stateCache.getUnchecked(stateKey).put(cacheKey, stateResponse.getGet()));
-          beamFnStateClient.handle(requestBuilder, response);
 
         } else {
-          response.complete(
+          return CompletableFuture.completedFuture(
               StateResponse.newBuilder().setId(requestBuilder.getId()).setGet(cachedPage).build());
         }
 
-        return;
+        return response;
 
       case APPEND:
         // TODO(BEAM-12637): Support APPEND in CachingBeamFnStateClient.
-        beamFnStateClient.handle(requestBuilder, response);
+        response = beamFnStateClient.handle(requestBuilder);
 
         // Invalidate last page of cached values (entry with a blank continuation token response)
         Map<StateCacheKey, StateGetResponse> map = stateCache.getUnchecked(stateKey);
         map.entrySet()
             .removeIf(entry -> (entry.getValue().getContinuationToken().equals(ByteString.EMPTY)));
-        return;
+        return response;
 
       case CLEAR:
         // Remove all state key data and replace with an empty response.
-        beamFnStateClient.handle(requestBuilder, response);
+        response = beamFnStateClient.handle(requestBuilder);
         Map<StateCacheKey, StateGetResponse> clearedData = new HashMap<>();
         StateCacheKey newKey = StateCacheKey.create(cacheToken, ByteString.EMPTY);
         clearedData.put(newKey, StateGetResponse.getDefaultInstance());
         stateCache.put(stateKey, clearedData);
-        return;
+        return response;
 
       default:
         throw new IllegalStateException(
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java
index 49efa3542388..f6796087452f 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/MultimapUserState.java
@@ -28,7 +28,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.CompletableFuture;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.StateAppendRequest;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.StateClearRequest;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.StateRequest;
@@ -195,6 +194,7 @@ public void remove(K key) {
   }
 
   @SuppressWarnings({
+    "FutureReturnValueIgnored",
     "nullness" // TODO(https://issues.apache.org/jira/browse/BEAM-12687)
   })
   // Update data in persistent store
@@ -211,27 +211,30 @@ public void asyncClose() throws Exception {
 
     // Clear currently persisted key-values
     if (isCleared) {
-      beamFnStateClient.handle(
-          keysStateRequest.toBuilder().setClear(StateClearRequest.getDefaultInstance()),
-          new CompletableFuture<>());
+      beamFnStateClient
+          .handle(keysStateRequest.toBuilder().setClear(StateClearRequest.getDefaultInstance()))
+          .get();
     } else if (!pendingRemoves.isEmpty()) {
       for (K key : pendingRemoves) {
-        beamFnStateClient.handle(
-            createUserStateRequest(key)
-                .toBuilder()
-                .setClear(StateClearRequest.getDefaultInstance()),
-            new CompletableFuture<>());
+        beamFnStateClient
+            .handle(
+                createUserStateRequest(key)
+                    .toBuilder()
+                    .setClear(StateClearRequest.getDefaultInstance()))
+            .get();
       }
     }
 
     // Persist pending key-values
     if (!pendingAdds.isEmpty()) {
       for (Map.Entry<K, List<V>> entry : pendingAdds.entrySet()) {
-        beamFnStateClient.handle(
-            createUserStateRequest(entry.getKey())
-                .toBuilder()
-                .setAppend(StateAppendRequest.newBuilder().setData(encodeValues(entry.getValue()))),
-            new CompletableFuture<>());
+        beamFnStateClient
+            .handle(
+                createUserStateRequest(entry.getKey())
+                    .toBuilder()
+                    .setAppend(
+                        StateAppendRequest.newBuilder().setData(encodeValues(entry.getValue()))))
+            .get();
       }
     }
   }
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/StateFetchingIterators.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/StateFetchingIterators.java
index 1026ba590466..1b8d43a6960b 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/StateFetchingIterators.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/state/StateFetchingIterators.java
@@ -207,10 +207,9 @@ public T next() {
 
     private void prefetchFirstPage() {
       if (firstPageResponseFuture == null) {
-        firstPageResponseFuture = new CompletableFuture<>();
-        beamFnStateClient.handle(
-            stateRequestForFirstChunk.toBuilder().setGet(stateRequestForFirstChunk.getGet()),
-            firstPageResponseFuture);
+        firstPageResponseFuture =
+            beamFnStateClient.handle(
+                stateRequestForFirstChunk.toBuilder().setGet(stateRequestForFirstChunk.getGet()));
       }
     }
   }
@@ -256,12 +255,11 @@ public boolean isReady() {
     @Override
     public void prefetch() {
       if (currentState == State.READ_REQUIRED && prefetchedResponse == null) {
-        prefetchedResponse = new CompletableFuture<>();
-        beamFnStateClient.handle(
-            stateRequestForFirstChunk
-                .toBuilder()
-                .setGet(StateGetRequest.newBuilder().setContinuationToken(continuationToken)),
-            prefetchedResponse);
+        prefetchedResponse =
+            beamFnStateClient.handle(
+                stateRequestForFirstChunk
+                    .toBuilder()
+                    .setGet(StateGetRequest.newBuilder().setContinuationToken(continuationToken)));
       }
     }
 
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/BeamFnControlClientTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/BeamFnControlClientTest.java
index 404c79c73264..e13c93461db6 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/BeamFnControlClientTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/BeamFnControlClientTest.java
@@ -33,6 +33,8 @@
 import java.util.concurrent.Executors;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC;
+import org.apache.beam.fn.harness.logging.RestoreBeamFnLoggingMDC;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest;
 import org.apache.beam.model.fnexecution.v1.BeamFnApi.RegisterRequest;
@@ -47,7 +49,9 @@
 import org.apache.beam.vendor.grpc.v1p36p0.io.grpc.stub.CallStreamObserver;
 import org.apache.beam.vendor.grpc.v1p36p0.io.grpc.stub.StreamObserver;
 import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.Uninterruptibles;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TestRule;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
@@ -85,6 +89,8 @@ public class BeamFnControlClientTest {
           .setError(getStackTraceAsString(FAILURE))
           .build();
 
+  @Rule public TestRule restoreMDCAfterTest = new RestoreBeamFnLoggingMDC();
+
   @Test
   public void testDelegation() throws Exception {
     AtomicBoolean clientClosedStream = new AtomicBoolean();
@@ -120,12 +126,15 @@ public StreamObserver<BeamFnApi.InstructionResponse> control(
           handlers = new EnumMap<>(BeamFnApi.InstructionRequest.RequestCase.class);
       handlers.put(
           BeamFnApi.InstructionRequest.RequestCase.PROCESS_BUNDLE,
-          value ->
-              BeamFnApi.InstructionResponse.newBuilder()
-                  .setProcessBundle(BeamFnApi.ProcessBundleResponse.getDefaultInstance()));
+          value -> {
+            assertEquals(value.getInstructionId(), BeamFnLoggingMDC.getInstructionId());
+            return BeamFnApi.InstructionResponse.newBuilder()
+                .setProcessBundle(BeamFnApi.ProcessBundleResponse.getDefaultInstance());
+          });
       handlers.put(
           BeamFnApi.InstructionRequest.RequestCase.REGISTER,
           value -> {
+            assertEquals(value.getInstructionId(), BeamFnLoggingMDC.getInstructionId());
             throw FAILURE;
           });
 
@@ -199,6 +208,7 @@ public StreamObserver<BeamFnApi.InstructionResponse> control(
       handlers.put(
           BeamFnApi.InstructionRequest.RequestCase.REGISTER,
           value -> {
+            assertEquals(value.getInstructionId(), BeamFnLoggingMDC.getInstructionId());
             throw new Error("Test Error");
           });
 
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
index f947cd68df69..a82e1040c08e 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
@@ -960,8 +960,8 @@ public void testPendingStateCallsBlockTillCompletion() throws Exception {
             .build();
     Map<String, Message> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
 
-    CompletableFuture<StateResponse> successfulResponse = new CompletableFuture<>();
-    CompletableFuture<StateResponse> unsuccessfulResponse = new CompletableFuture<>();
+    CompletableFuture<StateResponse>[] successfulResponse = new CompletableFuture[1];
+    CompletableFuture<StateResponse>[] unsuccessfulResponse = new CompletableFuture[1];
 
     BeamFnStateGrpcClientCache mockBeamFnStateGrpcClient =
         Mockito.mock(BeamFnStateGrpcClientCache.class);
@@ -973,8 +973,7 @@ public void testPendingStateCallsBlockTillCompletion() throws Exception {
             invocation -> {
               StateRequest.Builder stateRequestBuilder =
                   (StateRequest.Builder) invocation.getArguments()[0];
-              CompletableFuture<StateResponse> completableFuture =
-                  (CompletableFuture<StateResponse>) invocation.getArguments()[1];
+              CompletableFuture<StateResponse> completableFuture = new CompletableFuture<>();
               new Thread(
                       () -> {
                         // Simulate sleeping which introduces a race which most of the time requires
@@ -990,10 +989,10 @@ public void testPendingStateCallsBlockTillCompletion() throws Exception {
                         }
                       })
                   .start();
-              return null;
+              return completableFuture;
             })
         .when(mockBeamFnStateClient)
-        .handle(any(), any());
+        .handle(any());
 
     ProcessBundleHandler handler =
         new ProcessBundleHandler(
@@ -1034,10 +1033,12 @@ public Object createRunnerForPTransform(
                   }
 
                   private void doStateCalls(BeamFnStateClient beamFnStateClient) {
-                    beamFnStateClient.handle(
-                        StateRequest.newBuilder().setInstructionId("SUCCESS"), successfulResponse);
-                    beamFnStateClient.handle(
-                        StateRequest.newBuilder().setInstructionId("FAIL"), unsuccessfulResponse);
+                    successfulResponse[0] =
+                        beamFnStateClient.handle(
+                            StateRequest.newBuilder().setInstructionId("SUCCESS"));
+                    unsuccessfulResponse[0] =
+                        beamFnStateClient.handle(
+                            StateRequest.newBuilder().setInstructionId("FAIL"));
                   }
                 }),
             new BundleProcessorCache());
@@ -1047,8 +1048,8 @@ private void doStateCalls(BeamFnStateClient beamFnStateClient) {
                 BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L"))
             .build());
 
-    assertTrue(successfulResponse.isDone());
-    assertTrue(unsuccessfulResponse.isDone());
+    assertTrue(successfulResponse[0].isDone());
+    assertTrue(unsuccessfulResponse[0].isDone());
   }
 
   @Test
@@ -1101,10 +1102,9 @@ public Object createRunnerForPTransform(
                     return null;
                   }
 
+                  @SuppressWarnings("FutureReturnValueIgnored")
                   private void doStateCalls(BeamFnStateClient beamFnStateClient) {
-                    beamFnStateClient.handle(
-                        StateRequest.newBuilder().setInstructionId("SUCCESS"),
-                        new CompletableFuture<>());
+                    beamFnStateClient.handle(StateRequest.newBuilder().setInstructionId("SUCCESS"));
                   }
                 }),
             new BundleProcessorCache());
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCacheTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCacheTest.java
index 75a871ab6eb6..4badcf922053 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCacheTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/BeamFnStateGrpcClientCacheTest.java
@@ -120,11 +120,10 @@ public void testCachingOfClient() throws Exception {
   public void testRequestResponses() throws Exception {
     BeamFnStateClient client = clientCache.forApiServiceDescriptor(apiServiceDescriptor);
 
-    CompletableFuture<StateResponse> successfulResponse = new CompletableFuture<>();
-    CompletableFuture<StateResponse> unsuccessfulResponse = new CompletableFuture<>();
-
-    client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS), successfulResponse);
-    client.handle(StateRequest.newBuilder().setInstructionId(FAIL), unsuccessfulResponse);
+    CompletableFuture<StateResponse> successfulResponse =
+        client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS));
+    CompletableFuture<StateResponse> unsuccessfulResponse =
+        client.handle(StateRequest.newBuilder().setInstructionId(FAIL));
 
     // Wait for the client to connect.
     StreamObserver<StateResponse> outboundServerObserver = outboundServerObservers.take();
@@ -149,8 +148,8 @@ public void testRequestResponses() throws Exception {
   public void testServerErrorCausesPendingAndFutureCallsToFail() throws Exception {
     BeamFnStateClient client = clientCache.forApiServiceDescriptor(apiServiceDescriptor);
 
-    CompletableFuture<StateResponse> inflight = new CompletableFuture<>();
-    client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS), inflight);
+    CompletableFuture<StateResponse> inflight =
+        client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS));
 
     // Wait for the client to connect.
     StreamObserver<StateResponse> outboundServerObserver = outboundServerObservers.take();
@@ -166,8 +165,8 @@ public void testServerErrorCausesPendingAndFutureCallsToFail() throws Exception
     }
 
     // Send a response after the client will have received an error.
-    CompletableFuture<StateResponse> late = new CompletableFuture<>();
-    client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS), late);
+    CompletableFuture<StateResponse> late =
+        client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS));
 
     try {
       inflight.get();
@@ -181,8 +180,8 @@ public void testServerErrorCausesPendingAndFutureCallsToFail() throws Exception
   public void testServerCompletionCausesPendingAndFutureCallsToFail() throws Exception {
     BeamFnStateClient client = clientCache.forApiServiceDescriptor(apiServiceDescriptor);
 
-    CompletableFuture<StateResponse> inflight = new CompletableFuture<>();
-    client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS), inflight);
+    CompletableFuture<StateResponse> inflight =
+        client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS));
 
     // Wait for the client to connect.
     StreamObserver<StateResponse> outboundServerObserver = outboundServerObservers.take();
@@ -197,8 +196,8 @@ public void testServerCompletionCausesPendingAndFutureCallsToFail() throws Excep
     }
 
     // Send a response after the client will have received an error.
-    CompletableFuture<StateResponse> late = new CompletableFuture<>();
-    client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS), late);
+    CompletableFuture<StateResponse> late =
+        client.handle(StateRequest.newBuilder().setInstructionId(SUCCESS));
 
     try {
       inflight.get();
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClientTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClientTest.java
index 8604934224d8..8f14f3bbcb74 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClientTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/CachingBeamFnStateClientTest.java
@@ -84,19 +84,16 @@ public void testNoCacheWithoutToken() throws Exception {
     CachingBeamFnStateClient cachingClient =
         new CachingBeamFnStateClient(fakeClient, stateCache, cacheTokenList);
 
-    CompletableFuture<BeamFnApi.StateResponse> response1 = new CompletableFuture<>();
-    CompletableFuture<BeamFnApi.StateResponse> response2 = new CompletableFuture<>();
-
     StateRequest.Builder request =
         StateRequest.newBuilder()
             .setStateKey(key("A"))
             .setGet(BeamFnApi.StateGetRequest.newBuilder().build());
 
-    cachingClient.handle(request, response1);
+    CompletableFuture<BeamFnApi.StateResponse> response1 = cachingClient.handle(request);
     assertEquals(1, fakeClient.getCallCount());
     request.clearId();
 
-    cachingClient.handle(request, response2);
+    CompletableFuture<BeamFnApi.StateResponse> response2 = cachingClient.handle(request);
     assertEquals(2, fakeClient.getCallCount());
   }
 
@@ -301,8 +298,7 @@ private void appendToKey(StateKey key, ByteString data, CachingBeamFnStateClient
             .setStateKey(key)
             .setAppend(StateAppendRequest.newBuilder().setData(data));
 
-    CompletableFuture<StateResponse> appendResponse = new CompletableFuture<>();
-    cachingClient.handle(appendRequestBuilder, appendResponse);
+    CompletableFuture<StateResponse> appendResponse = cachingClient.handle(appendRequestBuilder);
     appendResponse.get();
   }
 
@@ -310,8 +306,7 @@ private void clearKey(StateKey key, CachingBeamFnStateClient cachingClient) thro
     StateRequest.Builder clearRequestBuilder =
         StateRequest.newBuilder().setStateKey(key).setClear(StateClearRequest.getDefaultInstance());
 
-    CompletableFuture<StateResponse> clearResponse = new CompletableFuture<>();
-    cachingClient.handle(clearRequestBuilder, clearResponse);
+    CompletableFuture<StateResponse> clearResponse = cachingClient.handle(clearRequestBuilder);
     clearResponse.get();
   }
 
@@ -324,8 +319,7 @@ private ByteString getALlDataForKey(StateKey key, CachingBeamFnStateClient cachi
       requestBuilder
           .clearId()
           .setGet(StateGetRequest.newBuilder().setContinuationToken(continuationToken));
-      CompletableFuture<StateResponse> getResponse = new CompletableFuture<>();
-      cachingClient.handle(requestBuilder, getResponse);
+      CompletableFuture<StateResponse> getResponse = cachingClient.handle(requestBuilder);
       continuationToken = getResponse.get().getGet().getContinuationToken();
       allData = allData.concat(getResponse.get().getGet().getData());
     } while (!continuationToken.equals(ByteString.EMPTY));
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/FakeBeamFnStateClient.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/FakeBeamFnStateClient.java
index 64c429280c4d..04f05ae29576 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/FakeBeamFnStateClient.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/FakeBeamFnStateClient.java
@@ -73,8 +73,8 @@ public Map<StateKey, ByteString> getData() {
   }
 
   @Override
-  public void handle(
-      StateRequest.Builder requestBuilder, CompletableFuture<StateResponse> responseFuture) {
+  public CompletableFuture<StateResponse> handle(StateRequest.Builder requestBuilder) {
+
     // The id should never be filled out
     assertEquals("", requestBuilder.getId());
     requestBuilder.setId(generateId());
@@ -138,7 +138,8 @@ public void handle(
             String.format("Unknown request type %s", request.getRequestCase()));
     }
 
-    responseFuture.complete(response.setId(requestBuilder.getId()).build());
+    CompletableFuture<StateResponse> responseFuture = new CompletableFuture<>();
+    return CompletableFuture.completedFuture(response.setId(requestBuilder.getId()).build());
   }
 
   private String generateId() {
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/StateFetchingIteratorsTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/StateFetchingIteratorsTest.java
index 384d2df69219..ae897cbee97b 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/StateFetchingIteratorsTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/state/StateFetchingIteratorsTest.java
@@ -52,15 +52,14 @@ public class StateFetchingIteratorsTest {
 
   private static BeamFnStateClient fakeStateClient(
       AtomicInteger callCount, ByteString... expected) {
-    return (requestBuilder, response) -> {
+    return requestBuilder -> {
       callCount.incrementAndGet();
       if (expected.length == 0) {
-        response.complete(
+        return CompletableFuture.completedFuture(
             StateResponse.newBuilder()
                 .setId(requestBuilder.getId())
                 .setGet(StateGetResponse.newBuilder())
                 .build());
-        return;
       }
 
       ByteString continuationToken = requestBuilder.getGet().getContinuationToken();
@@ -75,7 +74,7 @@ private static BeamFnStateClient fakeStateClient(
       if (requestedPosition != expected.length - 1) {
         newContinuationToken = ByteString.copyFromUtf8(Integer.toString(requestedPosition + 1));
       }
-      response.complete(
+      return CompletableFuture.completedFuture(
           StateResponse.newBuilder()
               .setId(requestBuilder.getId())
               .setGet(
@@ -121,15 +120,49 @@ public void testMultiWithEmptyByteStrings() throws Exception {
           ByteString.EMPTY);
     }
 
+    private BeamFnStateClient fakeStateClient(AtomicInteger callCount, ByteString... expected) {
+      return (requestBuilder) -> {
+        callCount.incrementAndGet();
+        if (expected.length == 0) {
+          return CompletableFuture.completedFuture(
+              StateResponse.newBuilder()
+                  .setId(requestBuilder.getId())
+                  .setGet(StateGetResponse.newBuilder())
+                  .build());
+        }
+
+        ByteString continuationToken = requestBuilder.getGet().getContinuationToken();
+
+        int requestedPosition = 0; // Default position is 0
+        if (!ByteString.EMPTY.equals(continuationToken)) {
+          requestedPosition = Integer.parseInt(continuationToken.toStringUtf8());
+        }
+
+        // Compute the new continuation token
+        ByteString newContinuationToken = ByteString.EMPTY;
+        if (requestedPosition != expected.length - 1) {
+          newContinuationToken = ByteString.copyFromUtf8(Integer.toString(requestedPosition + 1));
+        }
+        return CompletableFuture.completedFuture(
+            StateResponse.newBuilder()
+                .setId(requestBuilder.getId())
+                .setGet(
+                    StateGetResponse.newBuilder()
+                        .setData(expected[requestedPosition])
+                        .setContinuationToken(newContinuationToken))
+                .build());
+      };
+    }
+
     @Test
     public void testPrefetchIgnoredWhenExistingPrefetchOngoing() throws Exception {
       AtomicInteger callCount = new AtomicInteger();
       BeamFnStateClient fakeStateClient =
           new BeamFnStateClient() {
             @Override
-            public void handle(
-                StateRequest.Builder requestBuilder, CompletableFuture<StateResponse> response) {
+            public CompletableFuture<StateResponse> handle(StateRequest.Builder requestBuilder) {
               callCount.incrementAndGet();
+              return new CompletableFuture<StateResponse>();
             }
           };
       PrefetchableIterator<ByteString> byteStrings =
diff --git a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/KinesisReaderCheckpointTest.java b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/KinesisReaderCheckpointTest.java
index aab5c1d677bb..ca23fd7b297e 100644
--- a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/KinesisReaderCheckpointTest.java
+++ b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/KinesisReaderCheckpointTest.java
@@ -27,7 +27,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 
 /** * */
 @RunWith(MockitoJUnitRunner.class)
diff --git a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/RecordFilterTest.java b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/RecordFilterTest.java
index 0058e2ea3166..05304bc476d2 100644
--- a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/RecordFilterTest.java
+++ b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/RecordFilterTest.java
@@ -26,7 +26,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 
 /** * */
 @RunWith(MockitoJUnitRunner.class)
diff --git a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/ShardCheckpointTest.java b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/ShardCheckpointTest.java
index aa4b21bb3185..1c7407a84ac5 100644
--- a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/ShardCheckpointTest.java
+++ b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/ShardCheckpointTest.java
@@ -36,7 +36,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber;
 
diff --git a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/SimplifiedKinesisClientTest.java b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/SimplifiedKinesisClientTest.java
index 83b3e544e83f..f7fd2ff28ecc 100644
--- a/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/SimplifiedKinesisClientTest.java
+++ b/sdks/java/io/amazon-web-services2/src/test/java/org/apache/beam/sdk/io/aws2/kinesis/SimplifiedKinesisClientTest.java
@@ -35,7 +35,7 @@
 import org.junit.runner.RunWith;
 import org.mockito.InjectMocks;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 import org.mockito.stubbing.Answer;
 import software.amazon.awssdk.core.SdkBytes;
 import software.amazon.awssdk.core.exception.SdkClientException;
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index 8b9b705bdb04..66f77ab7f97a 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -2627,7 +2627,7 @@ public WriteResult expand(PCollection<T> input) {
         if (getJsonTableRef() != null) {
           dynamicDestinations =
               DynamicDestinationsHelpers.ConstantTableDestinations.fromJsonTableRef(
-                  getJsonTableRef(), getTableDescription());
+                  getJsonTableRef(), getTableDescription(), getClustering() != null);
         } else if (getTableFunction() != null) {
           dynamicDestinations =
               new TableFunctionDestinations<>(getTableFunction(), getClustering() != null);
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java
index fb13ba85cff5..0f6b4ccc8b2b 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinationsHelpers.java
@@ -60,21 +60,28 @@ class DynamicDestinationsHelpers {
   static class ConstantTableDestinations<T> extends DynamicDestinations<T, TableDestination> {
     private final ValueProvider<String> tableSpec;
     private final @Nullable String tableDescription;
+    private final boolean clusteringEnabled;
 
-    ConstantTableDestinations(ValueProvider<String> tableSpec, @Nullable String tableDescription) {
+    ConstantTableDestinations(
+        ValueProvider<String> tableSpec,
+        @Nullable String tableDescription,
+        boolean clusteringEnabled) {
       this.tableSpec = tableSpec;
       this.tableDescription = tableDescription;
+      this.clusteringEnabled = clusteringEnabled;
     }
 
     static <T> ConstantTableDestinations<T> fromTableSpec(
-        ValueProvider<String> tableSpec, String tableDescription) {
-      return new ConstantTableDestinations<>(tableSpec, tableDescription);
+        ValueProvider<String> tableSpec, String tableDescription, boolean clusteringEnabled) {
+      return new ConstantTableDestinations<>(tableSpec, tableDescription, clusteringEnabled);
     }
 
     static <T> ConstantTableDestinations<T> fromJsonTableRef(
-        ValueProvider<String> jsonTableRef, String tableDescription) {
+        ValueProvider<String> jsonTableRef, String tableDescription, boolean clusteringEnabled) {
       return new ConstantTableDestinations<>(
-          NestedValueProvider.of(jsonTableRef, new JsonTableRefToTableSpec()), tableDescription);
+          NestedValueProvider.of(jsonTableRef, new JsonTableRefToTableSpec()),
+          tableDescription,
+          clusteringEnabled);
     }
 
     @Override
@@ -96,7 +103,11 @@ public TableDestination getTable(TableDestination destination) {
 
     @Override
     public Coder<TableDestination> getDestinationCoder() {
-      return TableDestinationCoderV2.of();
+      if (clusteringEnabled) {
+        return TableDestinationCoderV3.of();
+      } else {
+        return TableDestinationCoderV2.of();
+      }
     }
   }
 
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
index 32ed1fe61275..2a2697ac2fb1 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
@@ -332,7 +332,7 @@ public void populateDisplayData(DisplayData.Builder builder) {
     @FinishBundle
     public void finishBundle(FinishBundleContext c) throws Exception {
       DatasetService datasetService =
-          bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class));
+          getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class));
 
       PendingJobManager jobManager = new PendingJobManager();
       for (final PendingJobData pendingJob : pendingJobs) {
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java
index 2ade12941d7b..8046ce0b0d91 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/pubsub/PubsubIO.java
@@ -130,6 +130,12 @@ public class PubsubIO {
   private static final int PUBSUB_NAME_MIN_LENGTH = 3;
   private static final int PUBSUB_NAME_MAX_LENGTH = 255;
 
+  // See https://cloud.google.com/pubsub/quotas#resource_limits.
+  private static final int PUBSUB_MESSAGE_DATA_MAX_LENGTH = 10 << 20;
+  private static final int PUBSUB_MESSAGE_MAX_ATTRIBUTES = 100;
+  private static final int PUBSUB_MESSAGE_ATTRIBUTE_MAX_KEY_LENGTH = 256;
+  private static final int PUBSUB_MESSAGE_ATTRIBUTE_MAX_VALUE_LENGTH = 1024;
+
   private static final String SUBSCRIPTION_RANDOM_TEST_PREFIX = "_random/";
   private static final String SUBSCRIPTION_STARTING_SIGNAL = "_starting_signal/";
   private static final String TOPIC_DEV_NULL_TEST_NAME = "/topics/dev/null";
@@ -165,6 +171,48 @@ private static void validatePubsubName(String name) {
     }
   }
 
+  private static void validatePubsubMessage(PubsubMessage message)
+      throws SizeLimitExceededException {
+    if (message.getPayload().length > PUBSUB_MESSAGE_DATA_MAX_LENGTH) {
+      throw new SizeLimitExceededException(
+          "Pubsub message data field of length "
+              + message.getPayload().length
+              + " exceeds maximum of "
+              + PUBSUB_MESSAGE_DATA_MAX_LENGTH
+              + ". See https://cloud.google.com/pubsub/quotas#resource_limits");
+    }
+    @Nullable Map<String, String> attributes = message.getAttributeMap();
+    if (attributes != null) {
+      if (attributes.size() > PUBSUB_MESSAGE_MAX_ATTRIBUTES) {
+        throw new SizeLimitExceededException(
+            "Pubsub message contains "
+                + attributes.size()
+                + " attributes which exceeds the maximum of "
+                + PUBSUB_MESSAGE_MAX_ATTRIBUTES
+                + ". See https://cloud.google.com/pubsub/quotas#resource_limits");
+      }
+      for (Map.Entry<String, String> attribute : attributes.entrySet()) {
+        if (attribute.getKey().length() > PUBSUB_MESSAGE_ATTRIBUTE_MAX_KEY_LENGTH) {
+          throw new SizeLimitExceededException(
+              "Pubsub message attribute key "
+                  + attribute.getKey()
+                  + " exceeds the maximum of "
+                  + PUBSUB_MESSAGE_ATTRIBUTE_MAX_KEY_LENGTH
+                  + ". See https://cloud.google.com/pubsub/quotas#resource_limits");
+        }
+        String value = attribute.getValue();
+        if (value.length() > PUBSUB_MESSAGE_ATTRIBUTE_MAX_VALUE_LENGTH) {
+          throw new SizeLimitExceededException(
+              "Pubsub message attribute value starting with "
+                  + value.substring(0, Math.min(256, value.length()))
+                  + " exceeds the maximum of "
+                  + PUBSUB_MESSAGE_ATTRIBUTE_MAX_VALUE_LENGTH
+                  + ". See https://cloud.google.com/pubsub/quotas#resource_limits");
+        }
+      }
+    }
+  }
+
   /** Populate common {@link DisplayData} between Pubsub source and sink. */
   private static void populateCommonDisplayData(
       DisplayData.Builder builder,
@@ -1171,7 +1219,18 @@ public PDone expand(PCollection<T> input) {
           return PDone.in(input.getPipeline());
         case UNBOUNDED:
           return input
-              .apply(MapElements.into(new TypeDescriptor<PubsubMessage>() {}).via(getFormatFn()))
+              .apply(
+                  MapElements.into(new TypeDescriptor<PubsubMessage>() {})
+                      .via(
+                          elem -> {
+                            PubsubMessage message = getFormatFn().apply(elem);
+                            try {
+                              validatePubsubMessage(message);
+                            } catch (SizeLimitExceededException e) {
+                              throw new IllegalArgumentException(e);
+                            }
+                            return message;
+                          }))
               .apply(
                   new PubsubUnboundedSink(
                       getPubsubClientFactory(),
@@ -1233,6 +1292,7 @@ public void startBundle(StartBundleContext c) throws IOException {
       public void processElement(ProcessContext c) throws IOException, SizeLimitExceededException {
         byte[] payload;
         PubsubMessage message = getFormatFn().apply(c.element());
+        validatePubsubMessage(message);
         payload = message.getPayload();
         Map<String, String> attributes = message.getAttributeMap();
 
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/BatchSpannerRead.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/BatchSpannerRead.java
index fc24c8f7a0c1..5393c7dfa32e 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/BatchSpannerRead.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/BatchSpannerRead.java
@@ -21,9 +21,14 @@
 import com.google.cloud.spanner.BatchReadOnlyTransaction;
 import com.google.cloud.spanner.Partition;
 import com.google.cloud.spanner.ResultSet;
+import com.google.cloud.spanner.SpannerException;
 import com.google.cloud.spanner.Struct;
 import com.google.cloud.spanner.TimestampBound;
+import java.util.HashMap;
 import java.util.List;
+import org.apache.beam.runners.core.metrics.GcpResourceIdentifiers;
+import org.apache.beam.runners.core.metrics.MonitoringInfoConstants;
+import org.apache.beam.runners.core.metrics.ServiceCallMetric;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
@@ -158,18 +163,43 @@ public void teardown() throws Exception {
 
     @ProcessElement
     public void processElement(ProcessContext c) throws Exception {
+      ServiceCallMetric serviceCallMetric =
+          createServiceCallMetric(
+              this.config.getProjectId().toString(),
+              this.config.getDatabaseId().toString(),
+              this.config.getInstanceId().toString());
       Transaction tx = c.sideInput(txView);
 
       BatchReadOnlyTransaction batchTx =
           spannerAccessor.getBatchClient().batchReadOnlyTransaction(tx.transactionId());
 
+      serviceCallMetric.call("ok");
       Partition p = c.element();
       try (ResultSet resultSet = batchTx.execute(p)) {
         while (resultSet.next()) {
           Struct s = resultSet.getCurrentRowAsStruct();
           c.output(s);
         }
+      } catch (SpannerException e) {
+        serviceCallMetric.call(e.getErrorCode().getGrpcStatusCode().toString());
       }
     }
+
+    private ServiceCallMetric createServiceCallMetric(
+        String projectId, String databaseId, String tableId) {
+      HashMap<String, String> baseLabels = new HashMap<>();
+      baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+      baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "Spanner");
+      baseLabels.put(MonitoringInfoConstants.Labels.METHOD, "Read");
+      baseLabels.put(
+          MonitoringInfoConstants.Labels.RESOURCE,
+          GcpResourceIdentifiers.spannerTable(projectId, databaseId, tableId));
+      baseLabels.put(MonitoringInfoConstants.Labels.SPANNER_PROJECT_ID, projectId);
+      baseLabels.put(MonitoringInfoConstants.Labels.SPANNER_DATABASE_ID, databaseId);
+      baseLabels.put(MonitoringInfoConstants.Labels.SPANNER_INSTANCE_ID, tableId);
+      ServiceCallMetric serviceCallMetric =
+          new ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
+      return serviceCallMetric;
+    }
   }
 }
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java
index 1066115ee14e..0c9c42d8c224 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java
@@ -42,6 +42,8 @@ public static ReadOperation create() {
 
   public abstract @Nullable Statement getQuery();
 
+  public abstract @Nullable String getQueryName();
+
   public abstract @Nullable String getTable();
 
   public abstract @Nullable String getIndex();
@@ -57,6 +59,8 @@ abstract static class Builder {
 
     abstract Builder setQuery(Statement statement);
 
+    abstract Builder setQueryName(String queryName);
+
     abstract Builder setTable(String table);
 
     abstract Builder setIndex(String index);
@@ -92,6 +96,10 @@ public ReadOperation withQuery(String sql) {
     return withQuery(Statement.of(sql));
   }
 
+  public ReadOperation withQueryName(String queryName) {
+    return toBuilder().setQueryName(queryName).build();
+  }
+
   public ReadOperation withKeySet(KeySet keySet) {
     return toBuilder().setKeySet(keySet).build();
   }
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java
index 07ff21663736..1244b90fd12b 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java
@@ -43,9 +43,13 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.List;
 import java.util.OptionalInt;
 import java.util.concurrent.TimeUnit;
+import org.apache.beam.runners.core.metrics.GcpResourceIdentifiers;
+import org.apache.beam.runners.core.metrics.MonitoringInfoConstants;
+import org.apache.beam.runners.core.metrics.ServiceCallMetric;
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.annotations.Experimental.Kind;
 import org.apache.beam.sdk.coders.SerializableCoder;
@@ -661,6 +665,10 @@ public Read withQuery(String sql) {
       return withQuery(Statement.of(sql));
     }
 
+    public Read withQueryName(String queryName) {
+      return withReadOperation(getReadOperation().withQueryName(queryName));
+    }
+
     public Read withKeySet(KeySet keySet) {
       return withReadOperation(getReadOperation().withKeySet(keySet));
     }
@@ -1638,10 +1646,18 @@ public void processElement(ProcessContext c) throws Exception {
     private void spannerWriteWithRetryIfSchemaChange(Iterable<Mutation> batch)
         throws SpannerException {
       for (int retry = 1; ; retry++) {
+        ServiceCallMetric serviceCallMetric =
+            createServiceCallMetric(
+                this.spannerConfig.getProjectId().toString(),
+                this.spannerConfig.getDatabaseId().toString(),
+                this.spannerConfig.getInstanceId().toString(),
+                "Write");
         try {
           spannerAccessor.getDatabaseClient().writeAtLeastOnce(batch);
+          serviceCallMetric.call("ok");
           return;
         } catch (AbortedException e) {
+          serviceCallMetric.call(e.getErrorCode().getGrpcStatusCode().toString());
           if (retry >= ABORTED_RETRY_ATTEMPTS) {
             throw e;
           }
@@ -1649,10 +1665,30 @@ private void spannerWriteWithRetryIfSchemaChange(Iterable<Mutation> batch)
             continue;
           }
           throw e;
+        } catch (SpannerException e) {
+          serviceCallMetric.call(e.getErrorCode().getGrpcStatusCode().toString());
+          throw e;
         }
       }
     }
 
+    private ServiceCallMetric createServiceCallMetric(
+        String projectId, String databaseId, String tableId, String method) {
+      HashMap<String, String> baseLabels = new HashMap<>();
+      baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+      baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "Spanner");
+      baseLabels.put(MonitoringInfoConstants.Labels.METHOD, method);
+      baseLabels.put(
+          MonitoringInfoConstants.Labels.RESOURCE,
+          GcpResourceIdentifiers.spannerTable(projectId, databaseId, tableId));
+      baseLabels.put(MonitoringInfoConstants.Labels.SPANNER_PROJECT_ID, projectId);
+      baseLabels.put(MonitoringInfoConstants.Labels.SPANNER_DATABASE_ID, databaseId);
+      baseLabels.put(MonitoringInfoConstants.Labels.SPANNER_INSTANCE_ID, tableId);
+      ServiceCallMetric serviceCallMetric =
+          new ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
+      return serviceCallMetric;
+    }
+
     /** Write the Mutations to Spanner, handling DEADLINE_EXCEEDED with backoff/retries. */
     private void writeMutations(Iterable<Mutation> mutations) throws SpannerException, IOException {
       BackOff backoff = bundleWriteBackoff.backoff();
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
index 6799c6786378..09b1791b99ce 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
@@ -1735,7 +1735,7 @@ private void testWritePartition(
     boolean isSingleton = numTables == 1 && numFilesPerTable == 0;
     DynamicDestinations<String, TableDestination> dynamicDestinations =
         new DynamicDestinationsHelpers.ConstantTableDestinations<>(
-            ValueProvider.StaticValueProvider.of("SINGLETON"), "");
+            ValueProvider.StaticValueProvider.of("SINGLETON"), "", false);
     List<ShardedKey<TableDestination>> expectedPartitions = Lists.newArrayList();
     if (isSingleton) {
       expectedPartitions.add(ShardedKey.of(new TableDestination("SINGLETON", ""), 1));
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/ReadWriteIT.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/ReadWriteIT.java
index 80c362aa48bc..bd15310219fc 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/ReadWriteIT.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/pubsublite/ReadWriteIT.java
@@ -18,7 +18,6 @@
 package org.apache.beam.sdk.io.gcp.pubsublite;
 
 import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
-import static org.junit.Assert.fail;
 
 import com.google.cloud.pubsublite.AdminClient;
 import com.google.cloud.pubsublite.AdminClientSettings;
@@ -36,19 +35,18 @@
 import com.google.cloud.pubsublite.proto.Subscription.DeliveryConfig.DeliveryRequirement;
 import com.google.cloud.pubsublite.proto.Topic;
 import com.google.cloud.pubsublite.proto.Topic.PartitionConfig.Capacity;
-import com.google.errorprone.annotations.concurrent.GuardedBy;
 import com.google.protobuf.ByteString;
 import java.util.ArrayDeque;
-import java.util.ArrayList;
 import java.util.Deque;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.coders.BigEndianIntegerCoder;
 import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.io.gcp.pubsub.TestPubsubSignal;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.StreamingOptions;
 import org.apache.beam.sdk.testing.TestPipeline;
@@ -57,12 +55,11 @@
 import org.apache.beam.sdk.transforms.FlatMapElements;
 import org.apache.beam.sdk.transforms.MapElements;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.SimpleFunction;
 import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList;
 import org.joda.time.Duration;
 import org.junit.After;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -71,12 +68,12 @@
 import org.slf4j.LoggerFactory;
 
 @RunWith(JUnit4.class)
-@Ignore("https://issues.apache.org/jira/browse/BEAM-12908")
 public class ReadWriteIT {
   private static final Logger LOG = LoggerFactory.getLogger(ReadWriteIT.class);
   private static final CloudZone ZONE = CloudZone.parse("us-central1-b");
   private static final int MESSAGE_COUNT = 90;
 
+  @Rule public transient TestPubsubSignal signal = TestPubsubSignal.create();
   @Rule public transient TestPipeline pipeline = TestPipeline.create();
 
   private static ProjectId getProject(PipelineOptions options) {
@@ -208,28 +205,21 @@ public static PCollection<SequencedMessage> readMessages(
         "dedupeMessages", PubsubLiteIO.deduplicate(UuidDeduplicationOptions.newBuilder().build()));
   }
 
-  // This static out of band communication is needed to retain serializability.
-  @GuardedBy("ReadWriteIT.class")
-  private static final List<SequencedMessage> received = new ArrayList<>();
-
-  private static synchronized void addMessageReceived(SequencedMessage message) {
-    received.add(message);
-  }
-
-  private static synchronized List<SequencedMessage> getTestQuickstartReceived() {
-    return ImmutableList.copyOf(received);
+  public static SimpleFunction<SequencedMessage, Integer> extractIds() {
+    return new SimpleFunction<SequencedMessage, Integer>() {
+      @Override
+      public Integer apply(SequencedMessage input) {
+        return Integer.parseInt(input.getMessage().getData().toStringUtf8());
+      }
+    };
   }
 
-  private static PTransform<PCollection<? extends SequencedMessage>, PCollection<Void>>
-      collectTestQuickstart() {
-    return MapElements.via(
-        new SimpleFunction<SequencedMessage, Void>() {
-          @Override
-          public Void apply(SequencedMessage input) {
-            addMessageReceived(input);
-            return null;
-          }
-        });
+  public static SerializableFunction<Set<Integer>, Boolean> testIds() {
+    return ids -> {
+      LOG.info("Ids are: {}", ids);
+      Set<Integer> target = IntStream.range(0, MESSAGE_COUNT).boxed().collect(Collectors.toSet());
+      return target.equals(ids);
+    };
   }
 
   @Test
@@ -260,37 +250,17 @@ public void testReadWrite() throws Exception {
     // Read some messages. They should be deduplicated by the time we see them, so there should be
     // exactly numMessages, one for every index in [0,MESSAGE_COUNT).
     PCollection<SequencedMessage> messages = readMessages(subscription, pipeline);
-    messages.apply("messageReceiver", collectTestQuickstart());
-    pipeline.run();
+    PCollection<Integer> ids = messages.apply(MapElements.via(extractIds()));
+    ids.apply("PubsubSignalTest", signal.signalSuccessWhen(BigEndianIntegerCoder.of(), testIds()));
+    pipeline.apply(signal.signalStart());
+    PipelineResult job = pipeline.run();
     LOG.info("Running!");
-    for (int round = 0; round < 120; ++round) {
-      Thread.sleep(1000);
-      Map<Integer, Integer> receivedCounts = new HashMap<>();
-      for (SequencedMessage message : getTestQuickstartReceived()) {
-        int id = Integer.parseInt(message.getMessage().getData().toStringUtf8());
-        receivedCounts.put(id, receivedCounts.getOrDefault(id, 0) + 1);
-      }
-      LOG.info("Performing comparison round {}.\n", round);
-      boolean done = true;
-      List<Integer> missing = new ArrayList<>();
-      for (int id = 0; id < MESSAGE_COUNT; id++) {
-        int idCount = receivedCounts.getOrDefault(id, 0);
-        if (idCount == 0) {
-          missing.add(id);
-          done = false;
-        }
-        if (idCount > 1) {
-          fail(String.format("Failed to deduplicate message with id %s.", id));
-        }
-      }
-      LOG.info("Still messing messages: {}.\n", missing);
-      if (done) {
-        return;
-      }
+    signal.waitForSuccess(Duration.standardMinutes(5));
+    // A runner may not support cancel
+    try {
+      job.cancel();
+    } catch (UnsupportedOperationException exc) {
+      // noop
     }
-    fail(
-        String.format(
-            "Failed to receive all messages after 2 minutes. Received %s messages.",
-            getTestQuickstartReceived().size()));
   }
 }
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java
index 5977c2ebd979..061059613c3d 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.io.gcp.spanner;
 
+import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.when;
@@ -24,12 +25,14 @@
 import com.google.cloud.Timestamp;
 import com.google.cloud.spanner.BatchReadOnlyTransaction;
 import com.google.cloud.spanner.BatchTransactionId;
+import com.google.cloud.spanner.ErrorCode;
 import com.google.cloud.spanner.FakeBatchTransactionId;
 import com.google.cloud.spanner.FakePartitionFactory;
 import com.google.cloud.spanner.KeySet;
 import com.google.cloud.spanner.Partition;
 import com.google.cloud.spanner.PartitionOptions;
 import com.google.cloud.spanner.ResultSets;
+import com.google.cloud.spanner.SpannerExceptionFactory;
 import com.google.cloud.spanner.Statement;
 import com.google.cloud.spanner.Struct;
 import com.google.cloud.spanner.TimestampBound;
@@ -38,12 +41,19 @@
 import com.google.protobuf.ByteString;
 import java.io.Serializable;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import org.apache.beam.runners.core.metrics.GcpResourceIdentifiers;
+import org.apache.beam.runners.core.metrics.MetricsContainerImpl;
+import org.apache.beam.runners.core.metrics.MonitoringInfoConstants;
+import org.apache.beam.runners.core.metrics.MonitoringInfoMetricName;
+import org.apache.beam.sdk.metrics.MetricsEnvironment;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -83,6 +93,9 @@ public class SpannerIOReadTest implements Serializable {
   public void setUp() throws Exception {
     serviceFactory = new FakeServiceFactory();
     mockBatchTx = Mockito.mock(BatchReadOnlyTransaction.class);
+    // Setup the ProcessWideContainer for testing metrics are set.
+    MetricsContainerImpl container = new MetricsContainerImpl(null);
+    MetricsEnvironment.setProcessWideContainer(container);
   }
 
   @Test
@@ -90,12 +103,7 @@ public void runQuery() throws Exception {
     Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
     TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
 
-    SpannerConfig spannerConfig =
-        SpannerConfig.create()
-            .withProjectId("test")
-            .withInstanceId("123")
-            .withDatabaseId("aaa")
-            .withServiceFactory(serviceFactory);
+    SpannerConfig spannerConfig = getSpannerConfig();
 
     PCollection<Struct> one =
         pipeline.apply(
@@ -129,17 +137,20 @@ public void runQuery() throws Exception {
     pipeline.run();
   }
 
+  private SpannerConfig getSpannerConfig() {
+    return SpannerConfig.create()
+        .withProjectId("test")
+        .withInstanceId("123")
+        .withDatabaseId("aaa")
+        .withServiceFactory(serviceFactory);
+  }
+
   @Test
   public void runRead() throws Exception {
     Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
     TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
 
-    SpannerConfig spannerConfig =
-        SpannerConfig.create()
-            .withProjectId("test")
-            .withInstanceId("123")
-            .withDatabaseId("aaa")
-            .withServiceFactory(serviceFactory);
+    SpannerConfig spannerConfig = getSpannerConfig();
 
     PCollection<Struct> one =
         pipeline.apply(
@@ -178,17 +189,138 @@ public void runRead() throws Exception {
     pipeline.run();
   }
 
+  @Test
+  public void testQueryMetrics() throws Exception {
+    Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
+    TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
+
+    SpannerConfig spannerConfig = getSpannerConfig();
+
+    PCollection<Struct> one =
+        pipeline.apply(
+            "read q",
+            SpannerIO.read()
+                .withSpannerConfig(spannerConfig)
+                .withQuery("SELECT * FROM users")
+                .withQueryName("queryName")
+                .withTimestampBound(timestampBound));
+
+    FakeBatchTransactionId id = new FakeBatchTransactionId("runQueryTest");
+    when(mockBatchTx.getBatchTransactionId()).thenReturn(id);
+
+    when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(timestampBound))
+        .thenReturn(mockBatchTx);
+    when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(any(BatchTransactionId.class)))
+        .thenReturn(mockBatchTx);
+
+    Partition fakePartition =
+        FakePartitionFactory.createFakeQueryPartition(ByteString.copyFromUtf8("one"));
+
+    when(mockBatchTx.partitionQuery(
+            any(PartitionOptions.class), eq(Statement.of("SELECT * FROM users"))))
+        .thenReturn(Arrays.asList(fakePartition, fakePartition));
+    when(mockBatchTx.execute(any(Partition.class)))
+        .thenThrow(
+            SpannerExceptionFactory.newSpannerException(
+                ErrorCode.DEADLINE_EXCEEDED, "Simulated Timeout 1"))
+        .thenThrow(
+            SpannerExceptionFactory.newSpannerException(
+                ErrorCode.DEADLINE_EXCEEDED, "Simulated Timeout 2"))
+        .thenReturn(
+            ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(0, 2)),
+            ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(2, 6)));
+
+    pipeline.run();
+    verifyMetricWasSet("test", "aaa", "123", "deadline_exceeded", null, 2);
+    verifyMetricWasSet("test", "aaa", "123", "ok", null, 2);
+  }
+
+  @Test
+  public void testReadMetrics() throws Exception {
+    Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
+    TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
+
+    SpannerConfig spannerConfig = getSpannerConfig();
+
+    PCollection<Struct> one =
+        pipeline.apply(
+            "read q",
+            SpannerIO.read()
+                .withSpannerConfig(spannerConfig)
+                .withTable("users")
+                .withColumns("id", "name")
+                .withTimestampBound(timestampBound));
+
+    FakeBatchTransactionId id = new FakeBatchTransactionId("runReadTest");
+    when(mockBatchTx.getBatchTransactionId()).thenReturn(id);
+
+    when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(timestampBound))
+        .thenReturn(mockBatchTx);
+    when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(any(BatchTransactionId.class)))
+        .thenReturn(mockBatchTx);
+
+    Partition fakePartition =
+        FakePartitionFactory.createFakeReadPartition(ByteString.copyFromUtf8("one"));
+
+    when(mockBatchTx.partitionRead(
+            any(PartitionOptions.class),
+            eq("users"),
+            eq(KeySet.all()),
+            eq(Arrays.asList("id", "name"))))
+        .thenReturn(Arrays.asList(fakePartition, fakePartition, fakePartition));
+    when(mockBatchTx.execute(any(Partition.class)))
+        .thenThrow(
+            SpannerExceptionFactory.newSpannerException(
+                ErrorCode.DEADLINE_EXCEEDED, "Simulated Timeout 1"))
+        .thenThrow(
+            SpannerExceptionFactory.newSpannerException(
+                ErrorCode.DEADLINE_EXCEEDED, "Simulated Timeout 2"))
+        .thenReturn(
+            ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(0, 2)),
+            ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(2, 4)),
+            ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(4, 6)));
+
+    pipeline.run();
+    verifyMetricWasSet("test", "aaa", "123", "deadline_exceeded", null, 2);
+    verifyMetricWasSet("test", "aaa", "123", "ok", null, 3);
+  }
+
+  private void verifyMetricWasSet(
+      String projectId,
+      String databaseId,
+      String tableId,
+      String status,
+      @Nullable String queryName,
+      long count) {
+    // Verify the metric was reported.
+    HashMap<String, String> labels = new HashMap<>();
+    labels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+    labels.put(MonitoringInfoConstants.Labels.SERVICE, "Spanner");
+    labels.put(MonitoringInfoConstants.Labels.METHOD, "Read");
+    labels.put(
+        MonitoringInfoConstants.Labels.RESOURCE,
+        GcpResourceIdentifiers.spannerTable(projectId, databaseId, tableId));
+    labels.put(MonitoringInfoConstants.Labels.SPANNER_PROJECT_ID, projectId);
+    labels.put(MonitoringInfoConstants.Labels.SPANNER_DATABASE_ID, databaseId);
+    labels.put(MonitoringInfoConstants.Labels.SPANNER_INSTANCE_ID, tableId);
+    if (queryName != null) {
+      labels.put(MonitoringInfoConstants.Labels.SPANNER_QUERY_NAME, queryName);
+    }
+    labels.put(MonitoringInfoConstants.Labels.STATUS, status);
+
+    MonitoringInfoMetricName name =
+        MonitoringInfoMetricName.named(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, labels);
+    MetricsContainerImpl container =
+        (MetricsContainerImpl) MetricsEnvironment.getProcessWideContainer();
+    assertEquals(count, (long) container.getCounter(name).getCumulative());
+  }
+
   @Test
   public void runReadUsingIndex() throws Exception {
     Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
     TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
 
-    SpannerConfig spannerConfig =
-        SpannerConfig.create()
-            .withProjectId("test")
-            .withInstanceId("123")
-            .withDatabaseId("aaa")
-            .withServiceFactory(serviceFactory);
+    SpannerConfig spannerConfig = getSpannerConfig();
 
     PCollection<Struct> one =
         pipeline.apply(
@@ -237,12 +369,7 @@ public void readPipeline() throws Exception {
     Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
     TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
 
-    SpannerConfig spannerConfig =
-        SpannerConfig.create()
-            .withProjectId("test")
-            .withInstanceId("123")
-            .withDatabaseId("aaa")
-            .withServiceFactory(serviceFactory);
+    SpannerConfig spannerConfig = getSpannerConfig();
 
     PCollection<Struct> one =
         pipeline.apply(
@@ -281,12 +408,7 @@ public void readAllPipeline() throws Exception {
     Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
     TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
 
-    SpannerConfig spannerConfig =
-        SpannerConfig.create()
-            .withProjectId("test")
-            .withInstanceId("123")
-            .withDatabaseId("aaa")
-            .withServiceFactory(serviceFactory);
+    SpannerConfig spannerConfig = getSpannerConfig();
 
     PCollectionView<Transaction> tx =
         pipeline.apply(
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java
index 58ce51444087..d0239fc9c5eb 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOWriteTest.java
@@ -50,13 +50,19 @@
 import com.google.cloud.spanner.Type;
 import java.io.Serializable;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import org.apache.beam.runners.core.metrics.GcpResourceIdentifiers;
+import org.apache.beam.runners.core.metrics.MetricsContainerImpl;
+import org.apache.beam.runners.core.metrics.MonitoringInfoConstants;
+import org.apache.beam.runners.core.metrics.MonitoringInfoMetricName;
 import org.apache.beam.sdk.Pipeline.PipelineExecutionException;
 import org.apache.beam.sdk.coders.SerializableCoder;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerIO.BatchableMutationFilterFn;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerIO.FailureMode;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerIO.GatherSortCreateBatchesFn;
 import org.apache.beam.sdk.io.gcp.spanner.SpannerIO.WriteToSpannerFn;
+import org.apache.beam.sdk.metrics.MetricsEnvironment;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
@@ -120,6 +126,10 @@ public void setUp() throws Exception {
     // Simplest schema: a table with int64 key
     preparePkMetadata(tx, Arrays.asList(pkMetadata("tEsT", "key", "ASC")));
     prepareColumnMetadata(tx, Arrays.asList(columnMetadata("tEsT", "key", "INT64", CELLS_PER_KEY)));
+
+    // Setup the ProcessWideContainer for testing metrics are set.
+    MetricsContainerImpl container = new MetricsContainerImpl(null);
+    MetricsEnvironment.setProcessWideContainer(container);
   }
 
   private SpannerSchema getSchema() {
@@ -407,6 +417,62 @@ public void reportFailures() throws Exception {
     verify(serviceFactory.mockDatabaseClient(), times(20)).writeAtLeastOnce(any());
   }
 
+  @Test
+  public void testSpannerWriteMetricIsSet() {
+    Mutation mutation = m(2L);
+    PCollection<Mutation> mutations = pipeline.apply(Create.of(mutation));
+
+    // respond with 2 error codes and a success.
+    when(serviceFactory.mockDatabaseClient().writeAtLeastOnce(any()))
+        .thenThrow(
+            SpannerExceptionFactory.newSpannerException(
+                ErrorCode.DEADLINE_EXCEEDED, "Simulated Timeout 1"))
+        .thenThrow(
+            SpannerExceptionFactory.newSpannerException(
+                ErrorCode.DEADLINE_EXCEEDED, "Simulated Timeout 2"))
+        .thenReturn(Timestamp.now());
+
+    mutations.apply(
+        SpannerIO.write()
+            .withProjectId("test-project")
+            .withInstanceId("test-instance")
+            .withDatabaseId("test-database")
+            .withFailureMode(FailureMode.FAIL_FAST)
+            .withServiceFactory(serviceFactory));
+    pipeline.run();
+
+    verifyMetricWasSet(
+        "test-project", "test-database", "test-instance", "Write", "deadline_exceeded", 2);
+    verifyMetricWasSet("test-project", "test-database", "test-instance", "Write", "ok", 1);
+  }
+
+  private void verifyMetricWasSet(
+      String projectId,
+      String databaseId,
+      String tableId,
+      String method,
+      String status,
+      long count) {
+    // Verify the metric was reported.
+    HashMap<String, String> labels = new HashMap<>();
+    labels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+    labels.put(MonitoringInfoConstants.Labels.SERVICE, "Spanner");
+    labels.put(MonitoringInfoConstants.Labels.METHOD, method);
+    labels.put(
+        MonitoringInfoConstants.Labels.RESOURCE,
+        GcpResourceIdentifiers.spannerTable(projectId, databaseId, tableId));
+    labels.put(MonitoringInfoConstants.Labels.SPANNER_PROJECT_ID, projectId);
+    labels.put(MonitoringInfoConstants.Labels.SPANNER_DATABASE_ID, databaseId);
+    labels.put(MonitoringInfoConstants.Labels.SPANNER_INSTANCE_ID, tableId);
+    labels.put(MonitoringInfoConstants.Labels.STATUS, status);
+
+    MonitoringInfoMetricName name =
+        MonitoringInfoMetricName.named(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, labels);
+    MetricsContainerImpl container =
+        (MetricsContainerImpl) MetricsEnvironment.getProcessWideContainer();
+    assertEquals(count, (long) container.getCounter(name).getCumulative());
+  }
+
   @Test
   public void deadlineExceededRetries() throws InterruptedException {
     List<Mutation> mutationList = Arrays.asList(m((long) 1));
diff --git a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOWriteTest.java b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOWriteTest.java
index 9c41bcd0445f..6508fd7b34fb 100644
--- a/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOWriteTest.java
+++ b/sdks/java/io/hadoop-format/src/test/java/org/apache/beam/sdk/io/hadoop/format/HadoopFormatIOWriteTest.java
@@ -47,7 +47,7 @@
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.mockito.Mockito;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 
 /** Unit tests for {@link HadoopFormatIO.Write}. */
 @RunWith(MockitoJUnitRunner.class)
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java
index 437fd8610149..2fb5209320fb 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java
@@ -27,7 +27,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 
 /** * */
 @RunWith(MockitoJUnitRunner.class)
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java
index 1653daf958eb..9ce4b7015ed1 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java
@@ -27,7 +27,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 
 /** * */
 @RunWith(MockitoJUnitRunner.class)
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java
index e17fa86c0ccb..429024e654d7 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java
@@ -26,7 +26,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 
 /** * */
 @RunWith(MockitoJUnitRunner.class)
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
index 5abe60585fae..227542cb8055 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
@@ -38,7 +38,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 
 /** */
 @RunWith(MockitoJUnitRunner.class)
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java
index 9c8ea2989e17..4a7fed20af98 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java
@@ -62,7 +62,7 @@
 import org.junit.runner.RunWith;
 import org.mockito.InjectMocks;
 import org.mockito.Mock;
-import org.mockito.runners.MockitoJUnitRunner;
+import org.mockito.junit.MockitoJUnitRunner;
 import org.mockito.stubbing.Answer;
 
 /** * */
diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py
index 347cf26f9dd4..5a0e82670982 100644
--- a/sdks/python/apache_beam/dataframe/frames.py
+++ b/sdks/python/apache_beam/dataframe/frames.py
@@ -55,6 +55,9 @@
     'DeferredDataFrame',
 ]
 
+# Get major, minor version
+PD_VERSION = tuple(map(int, pd.__version__.split('.')[0:2]))
+
 
 def populate_not_implemented(pd_type):
   def wrapper(deferred_type):
@@ -294,10 +297,14 @@ def __init__(self, value):
             preserves_partition_by=partitionings.Arbitrary(),
             requires_partition_by=requires))
 
-  ffill = _fillna_alias('ffill')
-  bfill = _fillna_alias('bfill')
-  backfill = _fillna_alias('backfill')
-  pad = _fillna_alias('pad')
+  if hasattr(pd.DataFrame, 'ffill'):
+    ffill = _fillna_alias('ffill')
+  if hasattr(pd.DataFrame, 'bfill'):
+    bfill = _fillna_alias('bfill')
+  if hasattr(pd.DataFrame, 'backfill'):
+    backfill = _fillna_alias('backfill')
+  if hasattr(pd.DataFrame, 'pad'):
+    pad = _fillna_alias('pad')
 
   @frame_base.with_docs_from(pd.DataFrame)
   def first(self, offset):
@@ -1932,7 +1939,7 @@ def value_counts(
     else:
       column = self
 
-    result = column.groupby(column).size()
+    result = column.groupby(column, dropna=dropna).size()
 
     # groupby.size() names the index, which we don't need
     result.index.name = None
@@ -2392,8 +2399,8 @@ def aggregate(self, func, axis, *args, **kwargs):
     if func in ('quantile',):
       return getattr(self, func)(*args, axis=axis, **kwargs)
 
-    # Maps to a property, args are ignored
-    if func in ('size',):
+    # In pandas<1.3.0, maps to a property, args are ignored
+    if func in ('size',) and PD_VERSION < (1, 3):
       return getattr(self, func)
 
     # We also have specialized distributed implementations for these. They only
@@ -3390,25 +3397,32 @@ def melt(self, ignore_index, **kwargs):
             requires_partition_by=partitionings.Arbitrary(),
             preserves_partition_by=partitionings.Singleton()))
 
-  @frame_base.with_docs_from(pd.DataFrame)
-  def value_counts(self, subset=None, sort=False, normalize=False,
-                   ascending=False):
-    """``sort`` is ``False`` by default, and ``sort=True`` is not supported
-    because it imposes an ordering on the dataset which likely will not be
-    preserved."""
+  if hasattr(pd.DataFrame, 'value_counts'):
+    @frame_base.with_docs_from(pd.DataFrame)
+    def value_counts(self, subset=None, sort=False, normalize=False,
+                     ascending=False, dropna=True):
+      """``sort`` is ``False`` by default, and ``sort=True`` is not supported
+      because it imposes an ordering on the dataset which likely will not be
+      preserved."""
 
-    if sort:
-      raise frame_base.WontImplementError(
-          "value_counts(sort=True) is not supported because it imposes an "
-          "ordering on the dataset which likely will not be preserved.",
-          reason="order-sensitive")
-    columns = subset or list(self.columns)
-    result = self.groupby(columns).size()
+      if sort:
+        raise frame_base.WontImplementError(
+            "value_counts(sort=True) is not supported because it imposes an "
+            "ordering on the dataset which likely will not be preserved.",
+            reason="order-sensitive")
+      columns = subset or list(self.columns)
 
-    if normalize:
-      return result/self.dropna().length()
-    else:
-      return result
+      if dropna:
+        dropped = self.dropna()
+      else:
+        dropped = self
+
+      result = dropped.groupby(columns, dropna=dropna).size()
+
+      if normalize:
+        return result/dropped.length()
+      else:
+        return result
 
 
 for io_func in dir(io):
@@ -4239,6 +4253,9 @@ def func(df, *args, **kwargs):
   return func
 
 for method in ELEMENTWISE_STRING_METHODS:
+  if not hasattr(pd.core.strings.StringMethods, method):
+    # older versions (1.0.x) don't support some of these methods
+    continue
   setattr(_DeferredStringMethods,
           method,
           frame_base._elementwise_method(make_str_func(method),
@@ -4382,6 +4399,9 @@ def func(df, *args, **kwargs):
 ]
 
 for method in ELEMENTWISE_DATETIME_METHODS:
+  if not hasattr(pd.core.indexes.accessors.DatetimeProperties, method):
+    # older versions (1.0.x) don't support some of these methods
+    continue
   setattr(_DeferredDatetimeMethods,
           method,
           frame_base._elementwise_method(
diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py
index c3972ad7528f..a2703d801051 100644
--- a/sdks/python/apache_beam/dataframe/frames_test.py
+++ b/sdks/python/apache_beam/dataframe/frames_test.py
@@ -25,7 +25,8 @@
 from apache_beam.dataframe import frame_base
 from apache_beam.dataframe import frames
 
-PD_VERSION = tuple(map(int, pd.__version__.split('.')))
+# Get major, minor version
+PD_VERSION = tuple(map(int, pd.__version__.split('.')[0:2]))
 
 GROUPBY_DF = pd.DataFrame({
     'group': ['a' if i % 5 == 0 or i % 3 == 0 else 'b' for i in range(100)],
@@ -235,6 +236,17 @@ def test_dataframe_arithmetic(self):
     self._run_test(
         lambda df, df2: df.subtract(2).multiply(df2).divide(df), df, df2)
 
+  @unittest.skipIf(PD_VERSION < (1, 3), "dropna=False is new in pandas 1.3")
+  def test_value_counts_dropna_false(self):
+    df = pd.DataFrame({
+        'first_name': ['John', 'Anne', 'John', 'Beth'],
+        'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']
+    })
+    # TODO(BEAM-12495): Remove the assertRaises this when the underlying bug in
+    # https://github.com/pandas-dev/pandas/issues/36470 is fixed.
+    with self.assertRaises(NotImplementedError):
+      self._run_test(lambda df: df.value_counts(dropna=False), df)
+
   def test_get_column(self):
     df = pd.DataFrame({
         'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
@@ -369,10 +381,15 @@ def test_combine_dataframe_fill(self):
         nonparallel=True)
 
   def test_combine_Series(self):
-    with expressions.allow_non_parallel_operations():
-      s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
-      s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
-      self._run_test(lambda s1, s2: s1.combine(s2, max), s1, s2)
+    s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
+    s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
+    self._run_test(
+        lambda s1,
+        s2: s1.combine(s2, max),
+        s1,
+        s2,
+        nonparallel=True,
+        check_proxy=False)
 
   def test_combine_first_dataframe(self):
     df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})
@@ -587,8 +604,27 @@ def test_value_counts_with_nans(self):
     self._run_test(lambda df: df.value_counts(), df)
     self._run_test(lambda df: df.value_counts(normalize=True), df)
 
+    if PD_VERSION >= (1, 3):
+      # dropna=False is new in pandas 1.3
+      # TODO(BEAM-12495): Remove the assertRaises this when the underlying bug
+      # in https://github.com/pandas-dev/pandas/issues/36470 is fixed.
+      with self.assertRaises(NotImplementedError):
+        self._run_test(lambda df: df.value_counts(dropna=False), df)
+
+    # Test the defaults.
     self._run_test(lambda df: df.num_wings.value_counts(), df)
     self._run_test(lambda df: df.num_wings.value_counts(normalize=True), df)
+    self._run_test(lambda df: df.num_wings.value_counts(dropna=False), df)
+
+    # Test the combination interactions.
+    for normalize in (True, False):
+      for dropna in (True, False):
+        self._run_test(
+            lambda df,
+            dropna=dropna,
+            normalize=normalize: df.num_wings.value_counts(
+                dropna=dropna, normalize=normalize),
+            df)
 
   def test_value_counts_does_not_support_sort(self):
     df = pd.DataFrame({
diff --git a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py
index edc42f1b7cfd..755e4e5cbbdf 100644
--- a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py
+++ b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py
@@ -20,6 +20,7 @@
 import pandas as pd
 
 from apache_beam.dataframe import doctests
+from apache_beam.dataframe.frames import PD_VERSION
 from apache_beam.dataframe.pandas_top_level_functions import _is_top_level_function
 
 
@@ -68,7 +69,8 @@ def test_ndframe_tests(self):
                 "df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})"
             ],
             'pandas.core.generic.NDFrame.fillna': [
-                "df.fillna(method='ffill')",
+                'df.fillna(method=\'ffill\')',
+                'df.fillna(method="ffill")',
                 'df.fillna(value=values, limit=1)',
             ],
             'pandas.core.generic.NDFrame.sort_values': ['*'],
@@ -164,7 +166,8 @@ def test_dataframe_tests(self):
             'pandas.core.frame.DataFrame.cumprod': ['*'],
             'pandas.core.frame.DataFrame.diff': ['*'],
             'pandas.core.frame.DataFrame.fillna': [
-                "df.fillna(method='ffill')",
+                'df.fillna(method=\'ffill\')',
+                'df.fillna(method="ffill")',
                 'df.fillna(value=values, limit=1)',
             ],
             'pandas.core.frame.DataFrame.items': ['*'],
@@ -237,6 +240,8 @@ def test_dataframe_tests(self):
                 # reindex not supported
                 's2 = s.reindex([1, 0, 2, 3])',
             ],
+            'pandas.core.frame.DataFrame.resample': ['*'],
+            'pandas.core.frame.DataFrame.values': ['*'],
         },
         not_implemented_ok={
             'pandas.core.frame.DataFrame.transform': [
@@ -244,6 +249,8 @@ def test_dataframe_tests(self):
                 # frames_test.py::DeferredFrameTest::test_groupby_transform_sum
                 "df.groupby('Date')['Data'].transform('sum')",
             ],
+            'pandas.core.frame.DataFrame.swaplevel': ['*'],
+            'pandas.core.frame.DataFrame.melt': ['*'],
             'pandas.core.frame.DataFrame.reindex_axis': ['*'],
             'pandas.core.frame.DataFrame.round': [
                 'df.round(decimals)',
@@ -267,6 +274,11 @@ def test_dataframe_tests(self):
             'pandas.core.frame.DataFrame.set_index': [
                 "df.set_index([s, s**2])",
             ],
+
+            # TODO(BEAM-12495)
+            'pandas.core.frame.DataFrame.value_counts': [
+              'df.value_counts(dropna=False)'
+            ],
         },
         skip={
             # s2 created with reindex
@@ -274,6 +286,8 @@ def test_dataframe_tests(self):
                 'df.dot(s2)',
             ],
 
+            'pandas.core.frame.DataFrame.resample': ['df'],
+            'pandas.core.frame.DataFrame.asfreq': ['*'],
             # Throws NotImplementedError when modifying df
             'pandas.core.frame.DataFrame.axes': [
                 # Returns deferred index.
@@ -302,6 +316,14 @@ def test_dataframe_tests(self):
             'pandas.core.frame.DataFrame.to_markdown': ['*'],
             'pandas.core.frame.DataFrame.to_parquet': ['*'],
 
+            # Raises right exception, but testing framework has matching issues.
+            # Tested in `frames_test.py`.
+            'pandas.core.frame.DataFrame.insert': [
+                'df',
+                'df.insert(1, "newcol", [99, 99])',
+                'df.insert(0, "col1", [100, 100], allow_duplicates=True)'
+            ],
+
             'pandas.core.frame.DataFrame.to_records': [
                 'df.index = df.index.rename("I")',
                 'index_dtypes = f"<S{df.index.str.len().max()}"', # 1.x
@@ -385,7 +407,8 @@ def test_series_tests(self):
                 's.dot(arr)',  # non-deferred result
             ],
             'pandas.core.series.Series.fillna': [
-                "df.fillna(method='ffill')",
+                'df.fillna(method=\'ffill\')',
+                'df.fillna(method="ffill")',
                 'df.fillna(value=values, limit=1)',
             ],
             'pandas.core.series.Series.items': ['*'],
@@ -434,11 +457,11 @@ def test_series_tests(self):
                 's.drop_duplicates()',
                 "s.drop_duplicates(keep='last')",
             ],
-            'pandas.core.series.Series.repeat': [
-                's.repeat([1, 2, 3])'
-            ],
             'pandas.core.series.Series.reindex': ['*'],
             'pandas.core.series.Series.autocorr': ['*'],
+            'pandas.core.series.Series.repeat': ['s.repeat([1, 2, 3])'],
+            'pandas.core.series.Series.resample': ['*'],
+            'pandas.core.series.Series': ['ser.iloc[0] = 999'],
         },
         not_implemented_ok={
             'pandas.core.series.Series.transform': [
@@ -451,8 +474,11 @@ def test_series_tests(self):
                 'ser.groupby(["a", "b", "a", np.nan]).mean()',
                 'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()',
             ],
+            'pandas.core.series.Series.swaplevel' :['*']
         },
         skip={
+            # Relies on setting values with iloc
+            'pandas.core.series.Series': ['ser', 'r'],
             'pandas.core.series.Series.groupby': [
                 # TODO(BEAM-11393): This example requires aligning two series
                 # with non-unique indexes. It only works in pandas because
@@ -460,6 +486,7 @@ def test_series_tests(self):
                 # alignment.
                 'ser.groupby(ser > 100).mean()',
             ],
+            'pandas.core.series.Series.asfreq': ['*'],
             # error formatting
             'pandas.core.series.Series.append': [
                 's1.append(s2, verify_integrity=True)',
@@ -491,12 +518,12 @@ def test_series_tests(self):
                 # Inspection after modification.
                 's'
             ],
+            'pandas.core.series.Series.resample': ['df'],
         })
     self.assertEqual(result.failed, 0)
 
   def test_string_tests(self):
-    PD_VERSION = tuple(int(v) for v in pd.__version__.split('.'))
-    if PD_VERSION < (1, 2, 0):
+    if PD_VERSION < (1, 2):
       module = pd.core.strings
     else:
       # Definitions were moved to accessor in pandas 1.2.0
@@ -668,11 +695,13 @@ def test_groupby_tests(self):
             'pandas.core.groupby.generic.SeriesGroupBy.diff': ['*'],
             'pandas.core.groupby.generic.DataFrameGroupBy.hist': ['*'],
             'pandas.core.groupby.generic.DataFrameGroupBy.fillna': [
-                "df.fillna(method='ffill')",
+                'df.fillna(method=\'ffill\')',
+                'df.fillna(method="ffill")',
                 'df.fillna(value=values, limit=1)',
             ],
             'pandas.core.groupby.generic.SeriesGroupBy.fillna': [
-                "df.fillna(method='ffill')",
+                'df.fillna(method=\'ffill\')',
+                'df.fillna(method="ffill")',
                 'df.fillna(value=values, limit=1)',
             ],
         },
@@ -682,6 +711,7 @@ def test_groupby_tests(self):
             'pandas.core.groupby.generic.SeriesGroupBy.transform': ['*'],
             'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['*'],
             'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['*'],
+            'pandas.core.groupby.generic.SeriesGroupBy.apply': ['*'],
         },
         skip={
             'pandas.core.groupby.generic.SeriesGroupBy.cov': [
@@ -698,6 +728,14 @@ def test_groupby_tests(self):
             # These examples rely on grouping by a list
             'pandas.core.groupby.generic.SeriesGroupBy.aggregate': ['*'],
             'pandas.core.groupby.generic.DataFrameGroupBy.aggregate': ['*'],
+            'pandas.core.groupby.generic.SeriesGroupBy.transform': [
+                # Dropping invalid columns during a transform is unsupported.
+                'grouped.transform(lambda x: (x - x.mean()) / x.std())'
+            ],
+            'pandas.core.groupby.generic.DataFrameGroupBy.transform': [
+                # Dropping invalid columns during a transform is unsupported.
+                'grouped.transform(lambda x: (x - x.mean()) / x.std())'
+            ],
         })
     self.assertEqual(result.failed, 0)
 
diff --git a/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py b/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py
index 260d04328d0b..e3ea1447630f 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigtableio_it_test.py
@@ -179,8 +179,8 @@ def test_bigtable_write(self):
     with beam.Pipeline(options=pipeline_options) as pipeline:
       config_data = {
           'project_id': self.project,
-          'instance_id': self.instance,
-          'table_id': self.table
+          'instance_id': self.instance_id,
+          'table_id': self.table_id
       }
       _ = (
           pipeline
diff --git a/sdks/python/apache_beam/examples/dataframe/README.md b/sdks/python/apache_beam/examples/dataframe/README.md
index 25fca9ef7335..cba84c01a9a9 100644
--- a/sdks/python/apache_beam/examples/dataframe/README.md
+++ b/sdks/python/apache_beam/examples/dataframe/README.md
@@ -26,12 +26,10 @@ API](https://beam.apache.org/documentation/dsls/dataframes/overview/).
 
 You must have `apache-beam>=2.30.0` installed in order to run these pipelines,
 because the `apache_beam.examples.dataframe` module was added in that release.
-Additionally using the DataFrame API requires `pandas>=1.0.0` to be installed
-in your local Python session. The _same_ version should be installed on workers
-when executing DataFrame API pipelines on distributed runners. Reference
-[`base_image_requirements.txt`](../../../container/base_image_requirements.txt)
-for the Beam release you are using to see what version of pandas will be used
-by default on distributed workers.
+Using the DataFrame API also requires a compatible pandas version to be
+installed, see the
+[documentation](https://beam.apache.org/documentation/dsls/dataframes/overview/#pre-requisites)
+for details.
 
 ## Wordcount Pipeline
 
diff --git a/sdks/python/apache_beam/internal/metrics/metric.py b/sdks/python/apache_beam/internal/metrics/metric.py
index 47c534d2c2da..b76e895167a9 100644
--- a/sdks/python/apache_beam/internal/metrics/metric.py
+++ b/sdks/python/apache_beam/internal/metrics/metric.py
@@ -223,3 +223,43 @@ def convert_to_canonical_status_string(self, status):
         http_status_code in http_to_canonical_gcp_status):
       return http_to_canonical_gcp_status[http_status_code]
     return str(http_status_code)
+
+  @staticmethod
+  def bigtable_error_code_to_grpc_status_string(grpc_status_code):
+    # type: (int) -> str
+
+    """
+    Converts the bigtable error code to a canonical GCP status code string.
+
+    This Bigtable client library is not using the canonical http status code
+    values (i.e. https://cloud.google.com/apis/design/errors)"
+    Instead they are numbered using an enum with these values corresponding
+    to each status code: https://cloud.google.com/bigtable/docs/status-codes
+
+    Args:
+      grpc_status_code: An int that corresponds to an enum of status codes
+
+    Returns:
+      A GCP status code string
+    """
+    grpc_to_canonical_gcp_status = {
+        0: 'ok',
+        1: 'cancelled',
+        2: 'unknown',
+        3: 'invalid_argument',
+        4: 'deadline_exceeded',
+        5: 'not_found',
+        6: 'already_exists',
+        7: 'permission_denied',
+        8: 'resource_exhausted',
+        9: 'failed_precondition',
+        10: 'aborted',
+        11: 'out_of_range',
+        12: 'unimplemented',
+        13: 'internal',
+        14: 'unavailable'
+    }
+    if (grpc_status_code is not None and
+        grpc_status_code in grpc_to_canonical_gcp_status):
+      return grpc_to_canonical_gcp_status[grpc_status_code]
+    return str(grpc_status_code)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index 584b32eff492..1e7d0f95eb41 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -2068,7 +2068,8 @@ def __init__(
       validate: bool = False,
       kms_key: str = None,
       temp_dataset: Union[str, DatasetReference] = None,
-      bigquery_job_labels: Dict[str, str] = None):
+      bigquery_job_labels: Dict[str, str] = None,
+      query_priority: str = BigQueryQueryPriority.BATCH):
     if gcs_location:
       if not isinstance(gcs_location, (str, ValueProvider)):
         raise TypeError(
@@ -2081,6 +2082,7 @@ def __init__(
     self.kms_key = kms_key
     self.bigquery_job_labels = bigquery_job_labels
     self.temp_dataset = temp_dataset
+    self.query_priority = query_priority
 
   def expand(self, pcoll):
     job_name = pcoll.pipeline.options.view_as(GoogleCloudOptions).job_name
@@ -2105,7 +2107,8 @@ def expand(self, pcoll):
             unique_id=unique_id,
             kms_key=self.kms_key,
             project=project,
-            temp_dataset=self.temp_dataset)).with_outputs(
+            temp_dataset=self.temp_dataset,
+            query_priority=self.query_priority)).with_outputs(
         "location_to_cleanup", main="files_to_read")
     )
 
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py b/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py
index f5b959381d21..df6ba7faab5b 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py
@@ -149,7 +149,8 @@ def __init__(
       unique_id: str = None,
       kms_key: str = None,
       project: str = None,
-      temp_dataset: Union[str, DatasetReference] = None):
+      temp_dataset: Union[str, DatasetReference] = None,
+      query_priority: Optional[str] = None):
     self.options = options
     self.use_json_exports = use_json_exports
     self.gcs_location = gcs_location
@@ -160,6 +161,7 @@ def __init__(
     self.kms_key = kms_key
     self.project = project
     self.temp_dataset = temp_dataset or 'bq_read_all_%s' % uuid.uuid4().hex
+    self.query_priority = query_priority
     self.bq_io_metadata = None
 
   def display_data(self):
@@ -254,6 +256,7 @@ def _execute_query(
         not element.use_standard_sql,
         element.flatten_results,
         job_id=query_job_name,
+        priority=self.query_priority,
         kms_key=self.kms_key,
         job_labels=self._get_bq_metadata().add_additional_bq_job_labels(
             self.bigquery_job_labels))
diff --git a/sdks/python/apache_beam/io/gcp/bigtableio.py b/sdks/python/apache_beam/io/gcp/bigtableio.py
index 96ea26eac78e..af289edc8b40 100644
--- a/sdks/python/apache_beam/io/gcp/bigtableio.py
+++ b/sdks/python/apache_beam/io/gcp/bigtableio.py
@@ -40,13 +40,38 @@
 import logging
 
 import apache_beam as beam
+from apache_beam.internal.metrics.metric import ServiceCallMetric
+from apache_beam.io.gcp import resource_identifiers
 from apache_beam.metrics import Metrics
+from apache_beam.metrics import monitoring_infos
 from apache_beam.transforms.display import DisplayDataItem
 
 _LOGGER = logging.getLogger(__name__)
 
 try:
   from google.cloud.bigtable import Client
+  from google.cloud.bigtable.batcher import MutationsBatcher
+
+  FLUSH_COUNT = 1000
+  MAX_ROW_BYTES = 5242880  # 5MB
+
+  class _MutationsBatcher(MutationsBatcher):
+    def __init__(
+        self, table, flush_count=FLUSH_COUNT, max_row_bytes=MAX_ROW_BYTES):
+      super(_MutationsBatcher, self).__init__(table, flush_count, max_row_bytes)
+      self.rows = []
+
+    def set_flush_callback(self, callback_fn):
+      self.callback_fn = callback_fn
+
+    def flush(self):
+      if len(self.rows) != 0:
+        rows = self.table.mutate_rows(self.rows)
+        self.callback_fn(rows)
+        self.total_mutation_count = 0
+        self.total_size = 0
+        self.rows = []
+
 except ImportError:
   _LOGGER.warning(
       'ImportError: from google.cloud.bigtable import Client', exc_info=True)
@@ -78,6 +103,7 @@ def __init__(self, project_id, instance_id, table_id):
     }
     self.table = None
     self.batcher = None
+    self.service_call_metric = None
     self.written = Metrics.counter(self.__class__, 'Written Row')
 
   def __getstate__(self):
@@ -87,14 +113,44 @@ def __setstate__(self, options):
     self.beam_options = options
     self.table = None
     self.batcher = None
+    self.service_call_metric = None
     self.written = Metrics.counter(self.__class__, 'Written Row')
 
+  def write_mutate_metrics(self, rows):
+    for status in rows:
+      grpc_status_string = (
+          ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
+              status.code))
+      self.service_call_metric.call(grpc_status_string)
+
+  def start_service_call_metrics(self, project_id, instance_id, table_id):
+    resource = resource_identifiers.BigtableTable(
+        project_id, instance_id, table_id)
+    labels = {
+        monitoring_infos.SERVICE_LABEL: 'BigTable',
+        # TODO(JIRA-11985): Add Ptransform label.
+        monitoring_infos.METHOD_LABEL: 'google.bigtable.v2.MutateRows',
+        monitoring_infos.RESOURCE_LABEL: resource,
+        monitoring_infos.BIGTABLE_PROJECT_ID_LABEL: (
+            self.beam_options['project_id']),
+        monitoring_infos.INSTANCE_ID_LABEL: self.beam_options['instance_id'],
+        monitoring_infos.TABLE_ID_LABEL: self.beam_options['table_id']
+    }
+    return ServiceCallMetric(
+        request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
+        base_labels=labels)
+
   def start_bundle(self):
     if self.table is None:
       client = Client(project=self.beam_options['project_id'])
       instance = client.instance(self.beam_options['instance_id'])
       self.table = instance.table(self.beam_options['table_id'])
-    self.batcher = self.table.mutations_batcher()
+    self.service_call_metric = self.start_service_call_metrics(
+        self.beam_options['project_id'],
+        self.beam_options['instance_id'],
+        self.beam_options['table_id'])
+    self.batcher = _MutationsBatcher(self.table)
+    self.batcher.set_flush_callback(self.write_mutate_metrics)
 
   def process(self, row):
     self.written.inc()
diff --git a/sdks/python/apache_beam/io/gcp/bigtableio_test.py b/sdks/python/apache_beam/io/gcp/bigtableio_test.py
new file mode 100644
index 000000000000..29703ea3df5d
--- /dev/null
+++ b/sdks/python/apache_beam/io/gcp/bigtableio_test.py
@@ -0,0 +1,137 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Unit tests for BigTable service."""
+
+# pytype: skip-file
+import datetime
+import string
+import unittest
+import uuid
+from random import choice
+
+from mock import MagicMock
+from mock import patch
+
+from apache_beam.internal.metrics.metric import ServiceCallMetric
+from apache_beam.io.gcp import bigtableio
+from apache_beam.io.gcp import resource_identifiers
+from apache_beam.metrics import monitoring_infos
+from apache_beam.metrics.execution import MetricsEnvironment
+
+# Protect against environments where bigtable library is not available.
+try:
+  from google.cloud.bigtable import client, row
+  from google.cloud.bigtable.instance import Instance
+  from google.cloud.bigtable.table import Table
+  from google.rpc.code_pb2 import OK, ALREADY_EXISTS
+  from google.rpc.status_pb2 import Status
+except ImportError as e:
+  client = None
+
+
+@unittest.skipIf(client is None, 'Bigtable dependencies are not installed')
+class TestWriteBigTable(unittest.TestCase):
+  TABLE_PREFIX = "python-test"
+  _PROJECT_ID = TABLE_PREFIX + "-" + str(uuid.uuid4())[:8]
+  _INSTANCE_ID = TABLE_PREFIX + "-" + str(uuid.uuid4())[:8]
+  _TABLE_ID = TABLE_PREFIX + "-" + str(uuid.uuid4())[:8]
+
+  def setUp(self):
+    client = MagicMock()
+    instance = Instance(self._INSTANCE_ID, client)
+    self.table = Table(self._TABLE_ID, instance)
+
+  def test_write_metrics(self):
+    MetricsEnvironment.process_wide_container().reset()
+    write_fn = bigtableio._BigTableWriteFn(
+        self._PROJECT_ID, self._INSTANCE_ID, self._TABLE_ID)
+    write_fn.table = self.table
+    write_fn.start_bundle()
+    number_of_rows = 2
+    error = Status()
+    error.message = 'Entity already exists.'
+    error.code = ALREADY_EXISTS
+    success = Status()
+    success.message = 'Success'
+    success.code = OK
+    rows_response = [error, success] * number_of_rows
+    with patch.object(Table, 'mutate_rows', return_value=rows_response):
+      direct_rows = [self.generate_row(i) for i in range(number_of_rows * 2)]
+      for direct_row in direct_rows:
+        write_fn.process(direct_row)
+      write_fn.finish_bundle()
+      self.verify_write_call_metric(
+          self._PROJECT_ID,
+          self._INSTANCE_ID,
+          self._TABLE_ID,
+          ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
+              ALREADY_EXISTS),
+          2)
+      self.verify_write_call_metric(
+          self._PROJECT_ID,
+          self._INSTANCE_ID,
+          self._TABLE_ID,
+          ServiceCallMetric.bigtable_error_code_to_grpc_status_string(OK),
+          2)
+
+  def generate_row(self, index=0):
+    rand = choice(string.ascii_letters + string.digits)
+    value = ''.join(rand for i in range(100))
+    column_family_id = 'cf1'
+    key = "beam_key%s" % ('{0:07}'.format(index))
+    direct_row = row.DirectRow(row_key=key)
+    for column_id in range(10):
+      direct_row.set_cell(
+          column_family_id, ('field%s' % column_id).encode('utf-8'),
+          value,
+          datetime.datetime.now())
+    return direct_row
+
+  def verify_write_call_metric(
+      self, project_id, instance_id, table_id, status, count):
+    """Check if a metric was recorded for the Datastore IO write API call."""
+    process_wide_monitoring_infos = list(
+        MetricsEnvironment.process_wide_container().
+        to_runner_api_monitoring_infos(None).values())
+    resource = resource_identifiers.BigtableTable(
+        project_id, instance_id, table_id)
+    labels = {
+        monitoring_infos.SERVICE_LABEL: 'BigTable',
+        monitoring_infos.METHOD_LABEL: 'google.bigtable.v2.MutateRows',
+        monitoring_infos.RESOURCE_LABEL: resource,
+        monitoring_infos.BIGTABLE_PROJECT_ID_LABEL: project_id,
+        monitoring_infos.INSTANCE_ID_LABEL: instance_id,
+        monitoring_infos.TABLE_ID_LABEL: table_id,
+        monitoring_infos.STATUS_LABEL: status
+    }
+    expected_mi = monitoring_infos.int64_counter(
+        monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels)
+    expected_mi.ClearField("start_time")
+
+    found = False
+    for actual_mi in process_wide_monitoring_infos:
+      actual_mi.ClearField("start_time")
+      if expected_mi == actual_mi:
+        found = True
+        break
+    self.assertTrue(
+        found, "Did not find write call metric with status: %s" % status)
+
+
+if __name__ == '__main__':
+  unittest.main()
diff --git a/sdks/python/apache_beam/io/gcp/resource_identifiers.py b/sdks/python/apache_beam/io/gcp/resource_identifiers.py
index a89a9e17a324..573d8ac5ed1b 100644
--- a/sdks/python/apache_beam/io/gcp/resource_identifiers.py
+++ b/sdks/python/apache_beam/io/gcp/resource_identifiers.py
@@ -42,3 +42,8 @@ def GoogleCloudStorageBucket(bucket_id):
 def DatastoreNamespace(project_id, namespace_id):
   return '//bigtable.googleapis.com/projects/%s/namespaces/%s' % (
       project_id, namespace_id)
+
+
+def BigtableTable(project_id, instance_id, table_id):
+  return '//bigtable.googleapis.com/projects/%s/instances/%s/tables/%s' % (
+      project_id, instance_id, table_id)
diff --git a/sdks/python/apache_beam/metrics/monitoring_infos.py b/sdks/python/apache_beam/metrics/monitoring_infos.py
index 96d817a67286..ba3100c84f31 100644
--- a/sdks/python/apache_beam/metrics/monitoring_infos.py
+++ b/sdks/python/apache_beam/metrics/monitoring_infos.py
@@ -103,6 +103,11 @@
     common_urns.monitoring_info_labels.DATASTORE_PROJECT.label_props.name)
 DATASTORE_NAMESPACE_LABEL = (
     common_urns.monitoring_info_labels.DATASTORE_NAMESPACE.label_props.name)
+BIGTABLE_PROJECT_ID_LABEL = (
+    common_urns.monitoring_info_labels.BIGTABLE_PROJECT_ID.label_props.name)
+INSTANCE_ID_LABEL = (
+    common_urns.monitoring_info_labels.INSTANCE_ID.label_props.name)
+TABLE_ID_LABEL = (common_urns.monitoring_info_labels.TABLE_ID.label_props.name)
 
 
 def extract_counter_value(monitoring_info_proto):
diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py
index 5ee0c049a9e6..3cf0a5162f2f 100644
--- a/sdks/python/apache_beam/pipeline.py
+++ b/sdks/python/apache_beam/pipeline.py
@@ -394,6 +394,12 @@ def visit_transform(self, transform_node):
 
     self.visit(TransformUpdater(self))
 
+    # Ensure no type information is lost.
+    for old, new in output_map.items():
+      if new.element_type == typehints.Any:
+        # TODO(robertwb): Perhaps take the intersection?
+        new.element_type = old.element_type
+
     # Adjusting inputs and outputs
     class InputOutputUpdater(PipelineVisitor):  # pylint: disable=used-before-assignment
       """"A visitor that records input and output values to be replaced.
diff --git a/sdks/python/apache_beam/pipeline_test.py b/sdks/python/apache_beam/pipeline_test.py
index 8a30537ac920..8f8f44b78ad5 100644
--- a/sdks/python/apache_beam/pipeline_test.py
+++ b/sdks/python/apache_beam/pipeline_test.py
@@ -430,7 +430,7 @@ def get_replacement_transform_for_applied_ptransform(
           self, applied_ptransform):
         return ToStringParDo().with_input_types(int).with_output_types(str)
 
-    for override, expected_type in [(NoTypeHintOverride(), typehints.Any),
+    for override, expected_type in [(NoTypeHintOverride(), int),
                                     (WithTypeHintOverride(), str)]:
       p = TestPipeline()
       pcoll = (
@@ -1016,7 +1016,7 @@ def display_data(self):  # type: () -> dict
             'dd_string_value', label='dd_string_label')
         parent_dd['dd_string_2'] = DisplayDataItem('dd_string_value_2')
         parent_dd['dd_bool'] = DisplayDataItem(False, label='dd_bool_label')
-        parent_dd['dd_int'] = DisplayDataItem(1.1, label='dd_int_label')
+        parent_dd['dd_double'] = DisplayDataItem(1.1, label='dd_double_label')
         return parent_dd
 
     p = beam.Pipeline()
@@ -1037,41 +1037,57 @@ def display_data(self):  # type: () -> dict
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
                     label='p_dd_string_label',
+                    key='p_dd_string',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
                     string_value='p_dd_string_value').SerializeToString()),
             beam_runner_api_pb2.DisplayData(
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
                     label='p_dd_string_2',
+                    key='p_dd_string_2',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
                     string_value='p_dd_string_value_2').SerializeToString()),
             beam_runner_api_pb2.DisplayData(
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
                     label='p_dd_bool_label',
+                    key='p_dd_bool',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
                     bool_value=True).SerializeToString()),
             beam_runner_api_pb2.DisplayData(
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
                     label='p_dd_int_label',
-                    double_value=1).SerializeToString()),
+                    key='p_dd_int',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
+                    int_value=1).SerializeToString()),
             beam_runner_api_pb2.DisplayData(
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
                     label='dd_string_label',
+                    key='dd_string',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
                     string_value='dd_string_value').SerializeToString()),
             beam_runner_api_pb2.DisplayData(
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
                     label='dd_string_2',
+                    key='dd_string_2',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
                     string_value='dd_string_value_2').SerializeToString()),
             beam_runner_api_pb2.DisplayData(
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
                     label='dd_bool_label',
+                    key='dd_bool',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
                     bool_value=False).SerializeToString()),
             beam_runner_api_pb2.DisplayData(
                 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn,
                 payload=beam_runner_api_pb2.LabelledPayload(
-                    label='dd_int_label',
+                    label='dd_double_label',
+                    key='dd_double',
+                    namespace='apache_beam.pipeline_test.MyPTransform',
                     double_value=1.1).SerializeToString()),
         ])
 
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index e47dc81ec686..1e2d1ec5bf1f 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -508,13 +508,6 @@ def run_pipeline(self, pipeline, options):
       # in the proto representation of the graph.
       pipeline.replace_all(DataflowRunner._NON_PORTABLE_PTRANSFORM_OVERRIDES)
 
-    # Always upload graph out-of-band when explicitly using runner v2 with
-    # use_portable_job_submission to avoid irrelevant large graph limits.
-    if (apiclient._use_unified_worker(debug_options) and
-        debug_options.lookup_experiment('use_portable_job_submission') and
-        not debug_options.lookup_experiment('upload_graph')):
-      debug_options.add_experiment("upload_graph")
-
     # Add setup_options for all the BeamPlugin imports
     setup_options = options.view_as(SetupOptions)
     plugins = BeamPlugin.get_all_plugin_paths()
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index f6a2a6c70246..7e2ba13714ab 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -405,7 +405,7 @@ def shortstrings_registerer(encoding_name):
     # further modify it to not output too-long strings, aimed at the
     # 10,000+ character hex-encoded "serialized_fn" values.
     return json.dumps(
-        json.loads(encoding.MessageToJson(self.proto), encoding='shortstrings'),
+        json.loads(encoding.MessageToJson(self.proto)),
         indent=2,
         sort_keys=True)
 
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py
index 837989ae4c5a..7101a1e6297f 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/names.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py
@@ -36,10 +36,10 @@
 
 # Update this version to the next version whenever there is a change that will
 # require changes to legacy Dataflow worker execution environment.
-BEAM_CONTAINER_VERSION = 'beam-master-20210809'
+BEAM_CONTAINER_VERSION = 'beam-master-20210920'
 # Update this version to the next version whenever there is a change that
 # requires changes to SDK harness container or SDK harness launcher.
-BEAM_FNAPI_CONTAINER_VERSION = 'beam-master-20210809'
+BEAM_FNAPI_CONTAINER_VERSION = 'beam-master-20210920'
 
 DATAFLOW_CONTAINER_IMAGE_REPOSITORY = 'gcr.io/cloud-dataflow/v1beta3'
 
diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py b/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py
index a6b5ac4fb1f3..91c9b515bab0 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_fragment.py
@@ -190,6 +190,18 @@ def _mark_necessary_transforms_and_pcolls(self, runner_pcolls_to_user_pcolls):
             break
           # Mark the AppliedPTransform as necessary.
           necessary_transforms.add(producer)
+
+          # Also mark composites that are not the root transform. If the root
+          # transform is added, then all transforms are incorrectly marked as
+          # necessary. If composites are not handled, then there will be
+          # orphaned PCollections.
+          if producer.parent is not None:
+            necessary_transforms.update(producer.parts)
+
+            # This will recursively add all the PCollections in this composite.
+            for part in producer.parts:
+              updated_all_inputs.update(part.outputs.values())
+
           # Record all necessary input and side input PCollections.
           updated_all_inputs.update(producer.inputs)
           # pylint: disable=bad-option-value
diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_fragment_test.py b/sdks/python/apache_beam/runners/interactive/pipeline_fragment_test.py
index 7439e0e22869..cbd4e9b64b98 100644
--- a/sdks/python/apache_beam/runners/interactive/pipeline_fragment_test.py
+++ b/sdks/python/apache_beam/runners/interactive/pipeline_fragment_test.py
@@ -129,6 +129,41 @@ def test_fragment_does_not_prune_teststream(self):
     # resulting graph is invalid and the following call will raise an exception.
     fragment.to_runner_api()
 
+  @patch('IPython.get_ipython', new_callable=mock_get_ipython)
+  def test_pipeline_composites(self, cell):
+    """Tests that composites are supported.
+    """
+    with cell:  # Cell 1
+      p = beam.Pipeline(ir.InteractiveRunner())
+      ib.watch({'p': p})
+
+    with cell:  # Cell 2
+      # pylint: disable=range-builtin-not-iterating
+      init = p | 'Init' >> beam.Create(range(5))
+
+    with cell:  # Cell 3
+      # Have a composite within a composite to test that all transforms under a
+      # composite are added.
+
+      @beam.ptransform_fn
+      def Bar(pcoll):
+        return pcoll | beam.Map(lambda n: n)
+
+      @beam.ptransform_fn
+      def Foo(pcoll):
+        p1 = pcoll | beam.Map(lambda n: n)
+        p2 = pcoll | beam.Map(str)
+        bar = pcoll | Bar()
+        return {'pc1': p1, 'pc2': p2, 'bar': bar}
+
+      res = init | Foo()
+
+    ib.watch(locals())
+    pc = res['pc1']
+
+    result = pf.PipelineFragment([pc]).run()
+    self.assertEqual([0, 1, 2, 3, 4], list(result.get(pc)))
+
 
 if __name__ == '__main__':
   unittest.main()
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py
index dc536d22ec43..03b4f7e1e9e3 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py
@@ -24,7 +24,9 @@
 import functools
 import itertools
 import logging
+import operator
 from typing import Callable
+from typing import Collection
 from typing import Container
 from typing import DefaultDict
 from typing import Dict
@@ -36,6 +38,7 @@
 from typing import Set
 from typing import Tuple
 from typing import TypeVar
+from typing import Union
 
 from apache_beam import coders
 from apache_beam.internal import pickler
@@ -357,12 +360,29 @@ def wrapper(self, *args):
 
 class TransformContext(object):
 
-  _KNOWN_CODER_URNS = set(
+  _COMMON_CODER_URNS = set(
       value.urn for (key, value) in common_urns.coders.__dict__.items()
       if not key.startswith('_')
       # Length prefix Rows rather than re-coding them.
   ) - set([common_urns.coders.ROW.urn])
 
+  _REQUIRED_CODER_URNS = set([
+      common_urns.coders.WINDOWED_VALUE.urn,
+      # For impulse.
+      common_urns.coders.BYTES.urn,
+      common_urns.coders.GLOBAL_WINDOW.urn,
+      # For GBK.
+      common_urns.coders.KV.urn,
+      common_urns.coders.ITERABLE.urn,
+      # For SDF.
+      common_urns.coders.DOUBLE.urn,
+      # For timers.
+      common_urns.coders.TIMER.urn,
+      # For everything else.
+      common_urns.coders.LENGTH_PREFIX.urn,
+      common_urns.coders.CUSTOM_WINDOW.urn,
+  ])
+
   def __init__(
       self,
       components,  # type: beam_runner_api_pb2.Components
@@ -373,6 +393,14 @@ def __init__(
     self.known_runner_urns = known_runner_urns
     self.runner_only_urns = known_runner_urns - frozenset(
         [common_urns.primitives.FLATTEN.urn])
+    self._known_coder_urns = set.union(
+        # Those which are required.
+        self._REQUIRED_CODER_URNS,
+        # Those common coders which are understood by all environments.
+        self._COMMON_CODER_URNS.intersection(
+            *(
+                set(env.capabilities)
+                for env in self.components.environments.values())))
     self.use_state_iterables = use_state_iterables
     self.is_drain = is_drain
     # ok to pass None for context because BytesCoder has no components
@@ -456,7 +484,7 @@ def maybe_length_prefixed_and_safe_coder(self, coder_id):
     coder = self.components.coders[coder_id]
     if coder.spec.urn == common_urns.coders.LENGTH_PREFIX.urn:
       return coder_id, self.bytes_coder_id
-    elif coder.spec.urn in self._KNOWN_CODER_URNS:
+    elif coder.spec.urn in self._known_coder_urns:
       new_component_ids = [
           self.maybe_length_prefixed_coder(c) for c in coder.component_coder_ids
       ]
@@ -765,6 +793,27 @@ def _group_stages_by_key(stages, get_stage_key):
   return (grouped_stages, stages_with_none_key)
 
 
+def _group_stages_with_limit(stages, get_limit):
+  # type: (Iterable[Stage], Callable[[str], int]) -> Iterable[Collection[Stage]]
+  stages_with_limit = [(stage, get_limit(stage.name)) for stage in stages]
+  group: List[Stage] = []
+  group_limit = 0
+  for stage, limit in sorted(stages_with_limit, key=operator.itemgetter(1)):
+    if limit < 1:
+      raise Exception(
+          'expected get_limit to return an integer >= 1, '
+          'instead got: %d for stage: %s' % (limit, stage))
+    if not group:
+      group_limit = limit
+    assert len(group) < group_limit
+    group.append(stage)
+    if len(group) >= group_limit:
+      yield group
+      group = []
+  if group:
+    yield group
+
+
 def _remap_input_pcolls(transform, pcoll_id_remap):
   for input_key in list(transform.inputs.keys()):
     if transform.inputs[input_key] in pcoll_id_remap:
@@ -803,7 +852,7 @@ def _make_pack_name(names):
 
 
 def _eliminate_common_key_with_none(stages, context, can_pack=lambda s: True):
-  # type: (Iterable[Stage], TransformContext, Callable[[str], bool]) -> Iterable[Stage]
+  # type: (Iterable[Stage], TransformContext, Callable[[str], Union[bool, int]]) -> Iterable[Stage]
 
   """Runs common subexpression elimination for sibling KeyWithNone stages.
 
@@ -866,8 +915,11 @@ def get_stage_key(stage):
     yield stage
 
 
+_DEFAULT_PACK_COMBINERS_LIMIT = 128
+
+
 def pack_per_key_combiners(stages, context, can_pack=lambda s: True):
-  # type: (Iterable[Stage], TransformContext, Callable[[str], bool]) -> Iterator[Stage]
+  # type: (Iterable[Stage], TransformContext, Callable[[str], Union[bool, int]]) -> Iterator[Stage]
 
   """Packs sibling CombinePerKey stages into a single CombinePerKey.
 
@@ -919,6 +971,13 @@ def _try_fuse_stages(a, b):
     else:
       raise ValueError
 
+  def _get_limit(stage_name):
+    result = can_pack(stage_name)
+    if result is True:
+      return _DEFAULT_PACK_COMBINERS_LIMIT
+    else:
+      return int(result)
+
   # Partition stages by whether they are eligible for CombinePerKey packing
   # and group eligible CombinePerKey stages by parent and environment.
   def get_stage_key(stage):
@@ -939,7 +998,12 @@ def get_stage_key(stage):
   for stage in ineligible_stages:
     yield stage
 
-  for stage_key, packable_stages in grouped_eligible_stages.items():
+  grouped_packable_stages = [(stage_key, subgrouped_stages) for stage_key,
+                             grouped_stages in grouped_eligible_stages.items()
+                             for subgrouped_stages in _group_stages_with_limit(
+                                 grouped_stages, _get_limit)]
+
+  for stage_key, packable_stages in grouped_packable_stages:
     input_pcoll_id, _ = stage_key
     try:
       if not len(packable_stages) > 1:
@@ -1063,18 +1127,22 @@ def get_stage_key(stage):
 
 
 def pack_combiners(stages, context, can_pack=None):
-  # type: (Iterable[Stage], TransformContext, Optional[Callable[[str], bool]]) -> Iterator[Stage]
+  # type: (Iterable[Stage], TransformContext, Optional[Callable[[str], Union[bool, int]]]) -> Iterator[Stage]
   if can_pack is None:
-    can_pack_names = {}  # type: Dict[str, bool]
+    can_pack_names = {}  # type: Dict[str, Union[bool, int]]
     parents = context.parents_map()
 
-    def can_pack_fn(name: str) -> bool:
+    def can_pack_fn(name: str) -> Union[bool, int]:
       if name in can_pack_names:
         return can_pack_names[name]
       else:
         transform = context.components.transforms[name]
         if python_urns.APPLY_COMBINER_PACKING in transform.annotations:
-          result = True
+          try:
+            result = int(
+                transform.annotations[python_urns.APPLY_COMBINER_PACKING])
+          except ValueError:
+            result = True
         elif name in parents:
           result = can_pack_fn(parents[name])
         else:
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py
index 37882dc31ffa..144f067900f3 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py
@@ -307,6 +307,83 @@ def expand(self, pcoll):
       vals = [6, 3, 1, -1, 9, 1, 5, 2, 0, 6]
       _ = pipeline | Create(vals) | 'multiple-combines' >> MultipleCombines()
 
+  @pytest.mark.it_validatesrunner
+  def test_run_packable_combine_limit(self):
+    class MultipleLargeCombines(beam.PTransform):
+      def annotations(self):
+        # Limit to at most 2 combiners per packed combiner.
+        return {python_urns.APPLY_COMBINER_PACKING: b'2'}
+
+      def expand(self, pcoll):
+        assert_that(
+            pcoll | 'min-1-globally' >> core.CombineGlobally(min),
+            equal_to([-1]),
+            label='assert-min-1-globally')
+        assert_that(
+            pcoll | 'min-2-globally' >> core.CombineGlobally(min),
+            equal_to([-1]),
+            label='assert-min-2-globally')
+        assert_that(
+            pcoll | 'min-3-globally' >> core.CombineGlobally(min),
+            equal_to([-1]),
+            label='assert-min-3-globally')
+
+    class MultipleSmallCombines(beam.PTransform):
+      def annotations(self):
+        # Limit to at most 4 combiners per packed combiner.
+        return {python_urns.APPLY_COMBINER_PACKING: b'4'}
+
+      def expand(self, pcoll):
+        assert_that(
+            pcoll | 'min-4-globally' >> core.CombineGlobally(min),
+            equal_to([-1]),
+            label='assert-min-4-globally')
+        assert_that(
+            pcoll | 'min-5-globally' >> core.CombineGlobally(min),
+            equal_to([-1]),
+            label='assert-min-5-globally')
+
+    with TestPipeline() as pipeline:
+      vals = [6, 3, 1, -1, 9, 1, 5, 2, 0, 6]
+      pcoll = pipeline | Create(vals)
+      _ = pcoll | 'multiple-large-combines' >> MultipleLargeCombines()
+      _ = pcoll | 'multiple-small-combines' >> MultipleSmallCombines()
+
+    proto = pipeline.to_runner_api(
+        default_environment=environments.EmbeddedPythonEnvironment(
+            capabilities=environments.python_sdk_capabilities()))
+    optimized = translations.optimize_pipeline(
+        proto,
+        phases=[translations.pack_combiners],
+        known_runner_urns=frozenset(),
+        partial=True)
+    optimized_stage_names = [
+        t.unique_name for t in optimized.components.transforms.values()
+    ]
+    self.assertIn(
+        'multiple-large-combines/Packed[min-1-globally_CombinePerKey, '
+        'min-2-globally_CombinePerKey]/Pack',
+        optimized_stage_names)
+    self.assertIn(
+        'Packed[multiple-large-combines_min-3-globally_CombinePerKey, '
+        'multiple-small-combines_min-4-globally_CombinePerKey]/Pack',
+        optimized_stage_names)
+    self.assertIn(
+        'multiple-small-combines/min-5-globally/CombinePerKey',
+        optimized_stage_names)
+    self.assertNotIn(
+        'multiple-large-combines/min-1-globally/CombinePerKey',
+        optimized_stage_names)
+    self.assertNotIn(
+        'multiple-large-combines/min-2-globally/CombinePerKey',
+        optimized_stage_names)
+    self.assertNotIn(
+        'multiple-large-combines/min-3-globally/CombinePerKey',
+        optimized_stage_names)
+    self.assertNotIn(
+        'multiple-small-combines/min-4-globally/CombinePerKey',
+        optimized_stage_names)
+
   def test_conditionally_packed_combiners(self):
     class RecursiveCombine(beam.PTransform):
       def __init__(self, labels):
diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py
index 449e45ab1cc5..cd9de00be246 100644
--- a/sdks/python/apache_beam/transforms/display.py
+++ b/sdks/python/apache_beam/transforms/display.py
@@ -149,13 +149,28 @@ def create_payload(dd):
       value = display_data_dict['value']
       if isinstance(value, str):
         return beam_runner_api_pb2.LabelledPayload(
-            label=label, string_value=value)
+            label=label,
+            string_value=value,
+            key=display_data_dict['key'],
+            namespace=display_data_dict.get('namespace', ''))
       elif isinstance(value, bool):
         return beam_runner_api_pb2.LabelledPayload(
-            label=label, bool_value=value)
-      elif isinstance(value, (int, float, complex)):
+            label=label,
+            bool_value=value,
+            key=display_data_dict['key'],
+            namespace=display_data_dict.get('namespace', ''))
+      elif isinstance(value, int):
         return beam_runner_api_pb2.LabelledPayload(
-            label=label, double_value=value)
+            label=label,
+            int_value=value,
+            key=display_data_dict['key'],
+            namespace=display_data_dict.get('namespace', ''))
+      elif isinstance(value, (float, complex)):
+        return beam_runner_api_pb2.LabelledPayload(
+            label=label,
+            double_value=value,
+            key=display_data_dict['key'],
+            namespace=display_data_dict.get('namespace', ''))
       else:
         raise ValueError(
             'Unsupported type %s for value of display data %s' %
diff --git a/sdks/python/apache_beam/transforms/external.py b/sdks/python/apache_beam/transforms/external.py
index 9025aea7b5f6..7b3964a52c35 100644
--- a/sdks/python/apache_beam/transforms/external.py
+++ b/sdks/python/apache_beam/transforms/external.py
@@ -236,8 +236,7 @@ def with_constructor(self, *args, **kwargs):
     :param args: parameter values of the constructor.
     :param kwargs: parameter names and values of the constructor.
     """
-    if (self._constructor_method or self._constructor_param_args or
-        self._constructor_param_kwargs):
+    if self._has_constructor():
       raise ValueError(
           'Constructor or constructor method can only be specified once')
 
@@ -254,8 +253,7 @@ def with_constructor_method(self, method_name, *args, **kwargs):
     :param args: parameter values of the constructor method.
     :param kwargs: parameter names and values of the constructor method.
     """
-    if (self._constructor_method or self._constructor_param_args or
-        self._constructor_param_kwargs):
+    if self._has_constructor():
       raise ValueError(
           'Constructor or constructor method can only be specified once')
 
@@ -276,6 +274,46 @@ def add_builder_method(self, method_name, *args, **kwargs):
     """
     self._builder_methods_and_params[method_name] = (args, kwargs)
 
+  def _has_constructor(self):
+    return (
+        self._constructor_method or self._constructor_param_args or
+        self._constructor_param_kwargs)
+
+
+class JavaExternalTransform(ptransform.PTransform):
+  """A proxy for Java-implemented external transforms.
+
+  One builds these transforms just as one would in Java.
+  """
+  def __init__(self, class_name, expansion_service=None):
+    self._payload_builder = JavaClassLookupPayloadBuilder(class_name)
+    self._expansion_service = None
+
+  def __call__(self, *args, **kwargs):
+    self._payload_builder.with_constructor(*args, **kwargs)
+    return self
+
+  def __getattr__(self, name):
+    # Don't try to emulate special methods.
+    if name.startswith('__') and name.endswith('__'):
+      return super().__getattr__(name)
+
+    def construct(*args, **kwargs):
+      if self._payload_builder._has_constructor():
+        builder_method = self._payload_builder.add_builder_method
+      else:
+        builder_method = self._payload_builder.with_constructor_method
+      builder_method(name, *args, **kwargs)
+      return self
+
+    return construct
+
+  def expand(self, pcolls):
+    return pcolls | ExternalTransform(
+        common_urns.java_class_lookup,
+        self._payload_builder.build(),
+        self._expansion_service)
+
 
 class AnnotationBasedPayloadBuilder(SchemaBasedPayloadBuilder):
   """
diff --git a/sdks/python/apache_beam/transforms/external_test.py b/sdks/python/apache_beam/transforms/external_test.py
index d0a53ef934b6..83e72461139b 100644
--- a/sdks/python/apache_beam/transforms/external_test.py
+++ b/sdks/python/apache_beam/transforms/external_test.py
@@ -39,6 +39,7 @@
 from apache_beam.transforms.external import AnnotationBasedPayloadBuilder
 from apache_beam.transforms.external import ImplicitSchemaPayloadBuilder
 from apache_beam.transforms.external import JavaClassLookupPayloadBuilder
+from apache_beam.transforms.external import JavaExternalTransform
 from apache_beam.transforms.external import NamedTupleBasedPayloadBuilder
 from apache_beam.typehints import typehints
 from apache_beam.typehints.native_type_compatibility import convert_to_beam_type
@@ -509,6 +510,42 @@ def test_build_payload_with_constructor_twice_fails(self):
     with self.assertRaises(ValueError):
       payload_builder.with_constructor('def')
 
+  def test_implicit_builder_with_constructor(self):
+    constructor_transform = (
+        JavaExternalTransform('org.pkg.MyTransform')('abc').withIntProperty(5))
+
+    payload_bytes = constructor_transform._payload_builder.payload()
+    payload_from_bytes = proto_utils.parse_Bytes(
+        payload_bytes, JavaClassLookupPayload)
+    self.assertEqual('org.pkg.MyTransform', payload_from_bytes.class_name)
+    self._verify_row(
+        payload_from_bytes.constructor_schema,
+        payload_from_bytes.constructor_payload, {'ignore0': 'abc'})
+    builder_method = payload_from_bytes.builder_methods[0]
+    self.assertEqual('withIntProperty', builder_method.name)
+    self._verify_row(
+        builder_method.schema, builder_method.payload, {'ignore0': 5})
+
+  def test_implicit_builder_with_constructor_method(self):
+    constructor_transform = JavaExternalTransform('org.pkg.MyTransform').of(
+        str_field='abc').withProperty(int_field=1234).build()
+
+    payload_bytes = constructor_transform._payload_builder.payload()
+    payload_from_bytes = proto_utils.parse_Bytes(
+        payload_bytes, JavaClassLookupPayload)
+    self.assertEqual('of', payload_from_bytes.constructor_method)
+    self._verify_row(
+        payload_from_bytes.constructor_schema,
+        payload_from_bytes.constructor_payload, {'str_field': 'abc'})
+    with_property_method = payload_from_bytes.builder_methods[0]
+    self.assertEqual('withProperty', with_property_method.name)
+    self._verify_row(
+        with_property_method.schema,
+        with_property_method.payload, {'int_field': 1234})
+    build_method = payload_from_bytes.builder_methods[1]
+    self.assertEqual('build', build_method.name)
+    self._verify_row(build_method.schema, build_method.payload, {})
+
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/transforms/trigger.py b/sdks/python/apache_beam/transforms/trigger.py
index ba9cc5eb6fa1..d7a17aca7796 100644
--- a/sdks/python/apache_beam/transforms/trigger.py
+++ b/sdks/python/apache_beam/transforms/trigger.py
@@ -724,15 +724,8 @@ def reset(self, window, context):
     self.underlying.reset(window, context)
 
   def may_lose_data(self, windowing):
-    """Repeatedly may only lose data if the underlying trigger may not have
-    its condition met.
-
-    For underlying triggers that may finish, Repeatedly overrides that
-    behavior.
-    """
-    return (
-        self.underlying.may_lose_data(windowing)
-        & DataLossReason.CONDITION_NOT_GUARANTEED)
+    """Repeatedly will run in a loop and pick up whatever is left at GC."""
+    return DataLossReason.NO_POTENTIAL_LOSS
 
   @staticmethod
   def from_runner_api(proto, context):
diff --git a/sdks/python/apache_beam/transforms/trigger_test.py b/sdks/python/apache_beam/transforms/trigger_test.py
index ed43094088c1..f0f4902901d7 100644
--- a/sdks/python/apache_beam/transforms/trigger_test.py
+++ b/sdks/python/apache_beam/transforms/trigger_test.py
@@ -496,7 +496,7 @@ def test_after_watermark_condition_late(self):
     self._test(
         AfterWatermark(late=AfterCount(5)),
         60,
-        DataLossReason.CONDITION_NOT_GUARANTEED)
+        DataLossReason.NO_POTENTIAL_LOSS)
 
   def test_after_count_one(self):
     self._test(AfterCount(1), 0, DataLossReason.MAY_FINISH)
@@ -515,8 +515,7 @@ def test_repeatedly_may_finish_underlying(self):
     self._test(Repeatedly(AfterCount(1)), 0, DataLossReason.NO_POTENTIAL_LOSS)
 
   def test_repeatedly_condition_underlying(self):
-    self._test(
-        Repeatedly(AfterCount(2)), 0, DataLossReason.CONDITION_NOT_GUARANTEED)
+    self._test(Repeatedly(AfterCount(2)), 0, DataLossReason.NO_POTENTIAL_LOSS)
 
   def test_after_any_some_unsafe(self):
     self._test(
@@ -532,7 +531,7 @@ def test_after_any_same_reason(self):
 
   def test_after_any_different_reasons(self):
     self._test(
-        AfterAny(Repeatedly(AfterCount(2)), AfterProcessingTime()),
+        AfterAny(AfterCount(2), AfterProcessingTime()),
         0,
         DataLossReason.MAY_FINISH | DataLossReason.CONDITION_NOT_GUARANTEED)
 
diff --git a/sdks/python/container/base_image_requirements.txt b/sdks/python/container/base_image_requirements.txt
index 6c158cd672f5..b98d530b389a 100644
--- a/sdks/python/container/base_image_requirements.txt
+++ b/sdks/python/container/base_image_requirements.txt
@@ -29,11 +29,11 @@ fastavro==1.0.0.post1
 crcmod==1.7
 dill==0.3.1.1
 future==0.18.2
-grpcio==1.34.0
+grpcio==1.40.0
 hdfs==2.5.8
 httplib2==0.19.1
 oauth2client==4.1.3
-protobuf==3.12.2
+protobuf==3.17.3
 pyarrow==3.0.0
 pydot==1.4.1
 pymongo==3.10.1
@@ -78,10 +78,10 @@ numpy==1.19.5
 scipy==1.4.1
 scikit-learn==0.24.1
 pandas==1.1.5 ; python_version<"3.7"
-pandas==1.2.4 ; python_version>="3.7"
+pandas==1.3.3 ; python_version>="3.7"
 protorpc==0.12.0
 python-gflags==3.1.2
-tensorflow==2.5.1
+tensorflow==2.6.0
 nltk==3.5.0
 
 # Packages needed for testing.
diff --git a/sdks/python/container/license_scripts/dep_urls_py.yaml b/sdks/python/container/license_scripts/dep_urls_py.yaml
index cdc45a8ff5d0..ce2c7f63b49c 100644
--- a/sdks/python/container/license_scripts/dep_urls_py.yaml
+++ b/sdks/python/container/license_scripts/dep_urls_py.yaml
@@ -48,6 +48,8 @@ pip_dependencies:
     license: "https://raw.githubusercontent.com/chardet/chardet/master/LICENSE"
   certifi:
     license: "https://raw.githubusercontent.com/certifi/python-certifi/master/LICENSE"
+  clang:
+    license: "https://raw.githubusercontent.com/llvm/llvm-project/main/clang/LICENSE.TXT"
   cython:
     license: "https://raw.githubusercontent.com/cython/cython/master/LICENSE.txt"
   dataclasses:
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index c224f7935fce..e61dff1d8060 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -165,7 +165,7 @@ def get_version():
 REQUIRED_TEST_PACKAGES = [
     'freezegun>=0.3.12',
     'mock>=1.0.1,<3.0.0',
-    'pandas>=1.0,<1.3.0',
+    'pandas<2.0.0',
     'parameterized>=0.7.1,<0.8.0',
     'pyhamcrest>=1.9,!=1.10.0,<2.0.0',
     'pyyaml>=3.12,<6.0.0',
@@ -215,7 +215,7 @@ def get_version():
 INTERACTIVE_BEAM_TEST = [
     # notebok utils
     'nbformat>=5.0.5,<6',
-    'nbconvert>=5.6.1,<6',
+    'nbconvert>=6.2.0,<7',
     # headless chrome based integration tests
     'selenium>=3.141.0,<4',
     'needle>=0.5.0,<1',
@@ -305,7 +305,8 @@ def run(self):
         'interactive': INTERACTIVE_BEAM,
         'interactive_test': INTERACTIVE_BEAM_TEST,
         'aws': AWS_REQUIREMENTS,
-        'azure': AZURE_REQUIREMENTS
+        'azure': AZURE_REQUIREMENTS,
+        'dataframe': ['pandas>=1.0,<1.4']
     },
     zip_safe=False,
     # PyPI package information.
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 559f4155f81d..67dd2d640353 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -30,7 +30,7 @@ select = E3
 # allow apps that support color to use it.
 passenv=TERM
 # Set [] options for pip installation of apache-beam tarball.
-extras = test
+extras = test,dataframe
 # Don't warn that these commands aren't installed.
 whitelist_externals =
   false
@@ -88,7 +88,7 @@ commands =
   {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
 
 [testenv:py{36,37,38}-cloud]
-extras = test,gcp,interactive,aws,azure
+extras = test,gcp,interactive,dataframe,aws,azure
 commands =
   {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"
 
@@ -98,7 +98,7 @@ deps =
   codecov
   pytest-cov==2.9.0
 passenv = GIT_* BUILD_* ghprb* CHANGE_ID BRANCH_NAME JENKINS_* CODECOV_*
-extras = test,gcp,interactive,aws
+extras = test,gcp,interactive,dataframe,aws
 commands =
   -rm .coverage
   {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" "--cov-report=xml --cov=. --cov-append"
@@ -138,7 +138,7 @@ commands =
   python setup.py mypy
 
 [testenv:py38-docs]
-extras = test,gcp,docs,interactive
+extras = test,gcp,docs,interactive,dataframe
 deps =
   Sphinx==1.8.5
   sphinx_rtd_theme==0.4.3
@@ -197,7 +197,7 @@ commands =
 # pulls in the latest docutils. Uncomment this line once botocore does not
 # conflict with Sphinx:
 # extras = docs,test,gcp,aws,interactive,interactive_test
-extras = test,gcp,aws,interactive,interactive_test
+extras = test,gcp,aws,dataframe,interactive,interactive_test
 passenv = WORKSPACE
 commands =
   time {toxinidir}/scripts/run_dependency_check.sh
diff --git a/website/www/site/content/en/documentation/dsls/dataframes/overview.md b/website/www/site/content/en/documentation/dsls/dataframes/overview.md
index 8d2c0a8d21fa..0620da417bc3 100644
--- a/website/www/site/content/en/documentation/dsls/dataframes/overview.md
+++ b/website/www/site/content/en/documentation/dsls/dataframes/overview.md
@@ -30,9 +30,18 @@ The Beam DataFrame API is intended to provide access to a familiar programming i
 
 If you’re new to pandas DataFrames, you can get started by reading [10 minutes to pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html), which shows you how to import and work with the `pandas` package. pandas is an open-source Python library for data manipulation and analysis. It provides data structures that simplify working with relational or labeled data. One of these data structures is the [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html), which contains two-dimensional tabular data and provides labeled rows and columns for the data.
 
-## Using DataFrames
+## Pre-requisites
+
+To use Beam DataFrames, you need to install Beam python version 2.26.0 or higher (for complete setup instructions, see the [Apache Beam Python SDK Quickstart](https://beam.apache.org/get-started/quickstart-py/)) and a supported `pandas` version. In Beam 2.34.0 and newer the easiest way to do this is with the "dataframe" extra:
+
+```
+pip install apache_beam[dataframe]
+```
 
-To use Beam DataFrames, you need to install Apache Beam version 2.26.0 or higher (for complete setup instructions, see the [Apache Beam Python SDK Quickstart](https://beam.apache.org/get-started/quickstart-py/)) and pandas version 1.0 or higher. You can use DataFrames as shown in the following example, which reads New York City taxi data from a CSV file, performs a grouped aggregation, and writes the output back to CSV:
+Note that the _same_ `pandas` version should be installed on workers when executing DataFrame API pipelines on distributed runners.  Reference [`base_image_requirements.txt`](https://github.com/apache/beam/blob/master/sdks/python/container/base_image_requirements.txt) for the Beam release you are using to see what version of `pandas` will be used by default on workers.
+
+## Using DataFrames
+You can use DataFrames as shown in the following example, which reads New York City taxi data from a CSV file, performs a grouped aggregation, and writes the output back to CSV:
 
 {{< highlight py >}}
 from apache_beam.dataframe.io import read_csv
diff --git a/website/www/site/content/en/documentation/glossary.md b/website/www/site/content/en/documentation/glossary.md
index acca8a33b570..a4f6d0cce138 100644
--- a/website/www/site/content/en/documentation/glossary.md
+++ b/website/www/site/content/en/documentation/glossary.md
@@ -19,16 +19,16 @@ limitations under the License.
 
 ## Aggregation
 
-A transform pattern for computing a value from multiple input elements. Aggregation is similar to the reduce operation in the [MapReduce](https://en.wikipedia.org/wiki/MapReduce) model. Aggregation transforms include Count (computes the count of all elements in the aggregation), Max (computes the maximum element in the aggregation), and Sum (computes the sum of all elements in the aggregation).
+A transform pattern for computing a value from multiple input elements. Aggregation is similar to the reduce operation in the [MapReduce](https://en.wikipedia.org/wiki/MapReduce) model. Aggregation transforms include Combine (applies a user-defined function to all elements in the aggregation), Count (computes the count of all elements in the aggregation), Max (computes the maximum element in the aggregation), and Sum (computes the sum of all elements in the aggregation).
 
-For a complete list of aggregation transforms, see:
+For a list of built-in aggregation transforms, see:
 
 * [Java Transform catalog](/documentation/transforms/java/overview/#aggregation)
 * [Python Transform catalog](/documentation/transforms/python/overview/#aggregation)
 
 ## Apply
 
-A method for invoking a transform on a PCollection. Each transform in the Beam SDKs has a generic `apply` method (or pipe operator `|`). Invoking multiple Beam transforms is similar to method chaining, but with a difference: You apply the transform to the input PCollection, passing the transform itself as an argument, and the operation returns the output PCollection. Because of Beam’s deferred execution model, applying a transform does not immediately execute that transform.
+A method for invoking a transform on an input PCollection (or set of PCollections) to produce one or more output PCollections. The `apply` method is attached to the PCollection (or value). Invoking multiple Beam transforms is similar to method chaining, but with a difference: You apply the transform to the input PCollection, passing the transform itself as an argument, and the operation returns the output PCollection. Because of Beam’s deferred execution model, applying a transform does not immediately execute that transform.
 
 To learn more, see:
 
@@ -44,7 +44,7 @@ To learn more, see:
 
 ## Bounded data
 
-A dataset of a known, fixed size. A PCollection can be bounded or unbounded, depending on the source of the data that it represents. Reading from a batch data source, such as a file or a database, creates a bounded PCollection. Beam also supports reading a bounded amount of data from an unbounded source.
+A dataset of a known, fixed size (alternatively, a dataset that is not growing over time). A PCollection can be bounded or unbounded, depending on the source of the data that it represents. Reading from a batch data source, such as a file or a database, creates a bounded PCollection. Beam also supports reading a bounded amount of data from an unbounded source.
 
 To learn more, see:
 
@@ -52,7 +52,7 @@ To learn more, see:
 
 ## Bundle
 
-The processing unit for elements in a PCollection. Instead of processing all elements in a PCollection simultaneously, Beam processes the elements in bundles. The runner handles the division of the collection into bundles, and in doing so it may optimize the bundle size for the use case. For example, a streaming runner might process smaller bundles than a batch runner.
+The processing and commit/retry unit for elements in a PCollection. Instead of processing all elements in a PCollection simultaneously, Beam processes the elements in bundles. The runner handles the division of the collection into bundles, and in doing so it may optimize the bundle size for the use case. For example, a streaming runner might process smaller bundles than a batch runner.
 
 To learn more, see:
 
@@ -94,7 +94,7 @@ To learn more, see:
 
 ## Composite transform
 
-A PTransform that expands into many PTransforms. Composite transforms have a nested structure, in which a complex transform applies one or more simpler transforms. These simpler transforms could be existing Beam operations like ParDo, Combine, or GroupByKey, or they could be other composite transforms. Nesting multiple transforms inside a single composite transform can make your pipeline more modular and easier to understand.
+A PTransform that expands into many PTransforms. Composite transforms have a nested structure, in which a complex transform applies one or more simpler transforms. These simpler transforms could be existing Beam operations like ParDo, Combine, or GroupByKey, or they could be other composite transforms. Nesting multiple transforms inside a single composite transform can make your pipeline more modular and easier to understand. Many of the built-in transforms are composite transforms.
 
 To learn more, see:
 
@@ -118,7 +118,7 @@ To learn more, see:
 
 ## Deferred execution
 
-A feature of the Beam execution model. Beam operations are deferred, meaning that the result of a given operation may not be available for control flow. Deferred execution allows the Beam API to support parallel processing of data.
+A feature of the Beam execution model. Beam operations are deferred, meaning that the result of a given operation may not be available for control flow. Deferred execution allows the Beam API to support parallel processing of data and perform pipeline-level optimizations.
 
 ## Distribution (metric)
 
@@ -130,7 +130,7 @@ To learn more, see:
 
 ## DoFn
 
-A function object used by ParDo (or some other transform) to process the elements of a PCollection. A DoFn is a user-defined function, meaning that it contains custom code that defines a data processing task in your pipeline. The Beam system invokes a DoFn one or more times to process some arbitrary bundle of elements, but Beam doesn’t guarantee an exact number of invocations.
+A function object used by ParDo (or some other transform) to process the elements of a PCollection, often producing elements for an output PCollection. A DoFn is a user-defined function, meaning that it contains custom code that defines a data processing task in your pipeline. The Beam system invokes a DoFn one or more times to process some arbitrary bundle of elements, but Beam doesn’t guarantee an exact number of invocations.
 
 To learn more, see:
 
@@ -167,7 +167,7 @@ A data-processing system, such as Dataflow, Spark, or Flink. A Beam runner for a
 
 ## Event time
 
-The time a data event occurs, determined by a timestamp on an element. This is in contrast to processing time, which is when an element is processed in a pipeline. An event could be, for example, a user interaction or a write to an error log. There’s no guarantee that events will appear in a pipeline in order of event time.
+The time a data event occurs, determined by a timestamp on an element. This is in contrast to processing time, which is when an element is processed in a pipeline. An event could be, for example, a user interaction or a write to an error log. There’s no guarantee that events will appear in a pipeline in order of event time, but windowing and timers let you reason correctly about event time.
 
 To learn more, see:
 
@@ -176,7 +176,7 @@ To learn more, see:
 
 ## Expansion Service
 
-A service that enables a pipeline to apply (expand) cross-language transforms defined in other SDKs. For example, by connecting to a Java expansion service, the Python SDK can apply transforms implemented in Java. Currently SDKs define expansion services as local processes, but in the future Beam may support long-running expansion services. The development of expansion services is part of the ongoing effort to support multi-language pipelines.
+A service that enables a pipeline to apply (expand) cross-language transforms defined in other SDKs. For example, by connecting to a Java expansion service, the Python SDK can apply transforms implemented in Java. Currently, SDKs typically start up expansion services as local processes, but in the future Beam may support long-running expansion services. The development of expansion services is part of the ongoing effort to support multi-language pipelines.
 
 ## Flatten
 One of the core PTransforms. Flatten merges multiple PCollections into a single logical PCollection.
@@ -187,9 +187,13 @@ To learn more, see:
 * [Flatten (Java)](/documentation/transforms/java/other/flatten/)
 * [Flatten (Python)](/documentation/transforms/python/other/flatten/)
 
+## Fn API
+
+An interface that lets a runner invoke SDK-specific user-defined functions. The Fn API, together with the Runner API, supports the ability to mix and match SDKs and runners. Used together, the Fn and Runner APIs let new SDKs run on every runner, and let new runners run pipelines from every SDK.
+
 ## Fusion
 
-An optimization that Beam runners can apply before running a pipeline. When one transform outputs a PCollection that’s consumed by another transform, or when two or more transforms take the same PCollection as input, a runner may be able to fuse the transforms together into a single processing unit (a *stage* in Dataflow). Fusion can make pipeline execution more efficient by preventing I/O operations.
+An optimization that Beam runners can apply before running a pipeline. When one transform outputs a PCollection that’s consumed by another transform, or when two or more transforms take the same PCollection as input, a runner may be able to fuse the transforms together into a single processing unit (a *stage* in Dataflow). The consuming DoFn processes elements as they are emitted by the producing DoFn, rather than waiting for the entire intermediate PCollection to be computed. Fusion can make pipeline execution more efficient by preventing I/O operations.
 
 ## Gauge (metric)
 
@@ -220,7 +224,7 @@ To learn more, see:
 
 ## Map
 
-An element-wise PTransform that applies a user-defined function (UDF) to each element in a PCollection. Using Map, you can transform each individual element, but you can't change the number of elements.
+An element-wise PTransform that applies a user-defined function (UDF) to each element in a PCollection. Using Map, you can transform each individual element into a new element, but you can't change the number of elements.
 
 To learn more, see:
 
@@ -245,7 +249,7 @@ To learn more, see:
 
 ## ParDo
 
-The lowest-level element-wise PTransform. For each element in an input PCollection, ParDo applies a function and emits zero, one, or multiple elements to an output PCollection. “ParDo” is short for “Parallel Do.” It’s similar to the map operation in a [MapReduce](https://en.wikipedia.org/wiki/MapReduce) algorithm, the `apply` method from a DataFrame, or the `UPDATE` keyword from SQL.
+The lowest-level element-wise PTransform. For each element in an input PCollection, ParDo applies a function and emits zero, one, or multiple elements to an output PCollection. “ParDo” is short for “Parallel Do.” It’s similar to the map operation in a [MapReduce](https://en.wikipedia.org/wiki/MapReduce) algorithm and the reduce operation when following a GroupByKey. ParDo is also comparable to the `apply` method from a DataFrame, or the `UPDATE` keyword from SQL.
 
 To learn more, see:
 
@@ -255,7 +259,7 @@ To learn more, see:
 
 ## Partition
 
-An element-wise PTransform that splits a single PCollection into a fixed number of smaller PCollections. Partition requires a user-defined function (UDF) to determine how to split up the elements of the input collection into the resulting output collections. The number of partitions must be determined at graph construction time, meaning that you can’t determine the number of partitions using data calculated by the running pipeline.
+An element-wise PTransform that splits a single PCollection into a fixed number of smaller, disjoint PCollections. Partition requires a user-defined function (UDF) to determine how to split up the elements of the input collection into the resulting output collections. The number of partitions must be determined at graph construction time, meaning that you can’t determine the number of partitions using data calculated by the running pipeline.
 
 To learn more, see:
 
@@ -273,7 +277,7 @@ To learn more, see:
 
 ## Pipe operator (`|`)
 
-Delimits a step in a Python pipeline. For example: `[Final Output PCollection] = ([Initial Input PCollection] | [First Transform] | [Second Transform] | [Third Transform])`. The output of each transform is passed from left to right as input to the next transform. The pipe operator in Python is equivalent to the `apply` method in Java (in other words, the pipe applies a transform to a PCollection).
+Delimits a step in a Python pipeline. For example: `[Final Output PCollection] = ([Initial Input PCollection] | [First Transform] | [Second Transform] | [Third Transform])`. The output of each transform is passed from left to right as input to the next transform. The pipe operator in Python is equivalent to the `apply` method in Java (in other words, the pipe applies a transform to a PCollection), and usage is similar to the pipe operator in shell scripts, which lets you pass the output of one program into the input of another.
 
 To learn more, see:
 
@@ -281,7 +285,7 @@ To learn more, see:
 
 ## Pipeline
 
-An encapsulation of your entire data processing task, including reading input data from a source, transforming that data, and writing output data to a sink. You can think of a pipeline as a Beam program that uses PTransforms to process PCollections. The transforms in a pipeline can be represented as a directed acyclic graph (DAG). All Beam driver programs must create a pipeline.
+An encapsulation of your entire data processing task, including reading input data from a source, transforming that data, and writing output data to a sink. You can think of a pipeline as a Beam program that uses PTransforms to process PCollections. (Alternatively, you can think of it as a single, executable composite PTransform with no inputs or outputs.) The transforms in a pipeline can be represented as a directed acyclic graph (DAG). All Beam driver programs must create a pipeline.
 
 To learn more, see:
 
@@ -292,7 +296,7 @@ To learn more, see:
 
 ## Processing time
 
-The time at which an element is processed at some stage in a pipeline. Processing time is not the same as event time, which is the time at which a data event occurs. Processing time is determined by the clock on the system processing the element. There’s no guarantee that elements will be processed in order of event time.
+The real-world time at which an element is processed at some stage in a pipeline. Processing time is not the same as event time, which is the time at which a data event occurs. Processing time is determined by the clock on the system processing the element. There’s no guarantee that elements will be processed in order of event time.
 
 To learn more, see:
 
@@ -327,7 +331,7 @@ To learn more, see:
 
 ## Schema
 
-A language-independent type definition for a PCollection. The schema for a PCollection defines elements of that PCollection as an ordered list of named fields. Each field has a name, a type, and possibly a set of user options. Schemas provide a way to reason about types across different programming-language APIs.
+A language-independent type definition for the elements of a PCollection. The schema for a PCollection defines elements of that PCollection as an ordered list of named fields. Each field has a name, a type, and possibly a set of user options. Schemas provide a way to reason about types across different programming-language APIs. They also let you describe data transformations more succinctly and at a higher level.
 
 To learn more, see:
 
@@ -336,7 +340,7 @@ To learn more, see:
 
 ## Session
 
-A time interval for grouping data events. A session is defined by some minimum gap duration between events. For example, a data stream representing user mouse activity may have periods with high concentrations of clicks followed by periods of inactivity. A session can represent such a pattern of activity followed by inactivity.
+A time interval for grouping data events. A session is defined by some minimum gap duration between events. For example, a data stream representing user mouse activity may have periods with high concentrations of clicks followed by periods of inactivity. A session can represent such a pattern of activity delimited by inactivity.
 
 To learn more, see:
 
@@ -345,7 +349,7 @@ To learn more, see:
 
 ## Side input
 
-Additional input to a PTransform. Side input is input that you provide in addition to the main input PCollection. A DoFn can access side input each time it processes an element in the PCollection. Side inputs are useful if your transform needs to inject additional data at runtime.
+Additional input to a PTransform that is provided in its entirety, rather than element-by-element. Side input is input that you provide in addition to the main input PCollection. A DoFn can access side input each time it processes an element in the PCollection.
 
 To learn more, see:
 
@@ -380,9 +384,13 @@ To learn more, see:
 * [Splittable DoFns](/documentation/programming-guide/#splittable-dofns)
 * [Splittable DoFn in Apache Beam is Ready to Use](/blog/splittable-do-fn-is-available/)
 
+## Stage
+
+The unit of fused transforms in a pipeline. Runners can perform fusion optimization to make pipeline execution more efficient. In Dataflow, the pipeline is conceptualized as a graph of fused stages.
+
 ## State
 
-Persistent values that a PTransform can access. The state API lets you augment element-wise operations (for example, ParDo or Map) with mutable state. Using the state API, you can read from, and write to, state as you process each element of a PCollection. You can use the state API together with the timer API to create processing tasks that give you fine-grained control over the workflow.
+Persistent values that a PTransform can access. The state API lets you augment element-wise operations (for example, ParDo or Map) with mutable state. Using the state API, you can read from, and write to, state as you process each element of a PCollection. You can use the state API together with the timer API to create processing tasks that give you fine-grained control over the workflow. State is always local to a key and window.
 
 To learn more, see:
 
@@ -410,7 +418,7 @@ To learn more, see:
 
 ## Timestamp
 
-A point in time associated with an element in a PCollection and used to assign a window to the element. The source that creates the PCollection assigns each element an initial timestamp, often corresponding to when the element was read or added. But you can also manually assign timestamps. This can be useful if elements have an inherent timestamp, but the timestamp is somewhere in the structure of the element itself (for example, a time field in a server log entry).
+A point in event time associated with an element in a PCollection and used to assign a window to the element. The source that creates the PCollection assigns each element an initial timestamp, often corresponding to when the element was read or added. But you can also manually assign timestamps. This can be useful if elements have an inherent timestamp, but the timestamp is somewhere in the structure of the element itself (for example, a time field in a server log entry).
 
 To learn more, see:
 
@@ -431,7 +439,7 @@ To learn more, see:
 
 ## Unbounded data
 
-A dataset of unlimited size. A PCollection can be bounded or unbounded, depending on the source of the data that it represents. Reading from a streaming or continuously-updating data source, such as Pub/Sub or Kafka, typically creates an unbounded PCollection.
+A dataset that grows over time, with elements processed as they arrive. A PCollection can be bounded or unbounded, depending on the source of the data that it represents. Reading from a streaming or continuously-updating data source, such as Pub/Sub or Kafka, typically creates an unbounded PCollection.
 
 To learn more, see:
 
@@ -449,7 +457,7 @@ To learn more, see:
 
 ## Watermark
 
-The point in event time when all data in a window can be expected to have arrived in the pipeline. Watermarks provide a way to estimate the completeness of input data. Every PCollection has an associated watermark. Once the watermark progresses past the end of a window, any element that arrives with a timestamp in that window is considered late data.
+An estimate on the lower bound of the timestamps that will be seen (in the future) at this point of the pipeline. Watermarks provide a way to estimate the completeness of input data. Every PCollection has an associated watermark. Once the watermark progresses past the end of a window, any element that arrives with a timestamp in that window is considered late data.
 
 To learn more, see:
 
@@ -465,7 +473,7 @@ To learn more, see:
 
 ## Worker
 
-A container, process, or virtual machine (VM) that handles some part of the parallel processing of a pipeline. The Beam model doesn’t support synchronizing shared state across worker machines. Instead, each worker node has its own independent copy of state. A Beam runner might serialize elements between machines for communication purposes and for other reasons such as persistence.
+A container, process, or virtual machine (VM) that handles some part of the parallel processing of a pipeline. Each worker node has its own independent copy of state. A Beam runner might serialize elements between machines for communication purposes and for other reasons such as persistence.
 
 To learn more, see:
 
diff --git a/website/www/site/content/en/documentation/programming-guide.md b/website/www/site/content/en/documentation/programming-guide.md
index 61b6b97e9247..5d31fe389f5b 100644
--- a/website/www/site/content/en/documentation/programming-guide.md
+++ b/website/www/site/content/en/documentation/programming-guide.md
@@ -4553,7 +4553,8 @@ trigger emits the contents of a window after the
 timestamps attached to the data elements. The watermark is a global progress
 metric, and is Beam's notion of input completeness within your pipeline at any
 given point. <span class="language-java">`AfterWatermark.pastEndOfWindow()`</span>
-<span class="language-py">`AfterWatermark`</span> *only* fires when the
+<span class="language-py">`AfterWatermark`</span>
+<span class="language-go">`window.AfterEndOfWindow`</span> *only* fires when the
 watermark passes the end of the window.
 
 In addition, you can configure triggers that fire if your pipeline receives data
@@ -4578,6 +4579,10 @@ firings:
 {{< code_sample "sdks/python/apache_beam/examples/snippets/snippets_test.py" model_early_late_triggers >}}
 {{< /highlight >}}
 
+{{< highlight go >}}
+  {{< code_sample "sdks/go/examples/snippets/09triggers.go" after_window_trigger >}}
+{{< /highlight >}}
+
 #### 9.1.1. Default trigger {#default-trigger}
 
 The default trigger for a `PCollection` is based on event time, and emits the
@@ -4594,7 +4599,8 @@ modifying this behavior.
 
 The `AfterProcessingTime` trigger operates on *processing time*. For example,
 the <span class="language-java">`AfterProcessingTime.pastFirstElementInPane()`</span>
-<span class="language-py">`AfterProcessingTime`</span> trigger emits a window
+<span class="language-py">`AfterProcessingTime`</span>
+<span class="language-go">`window.TriggerAfterProcessingTime()`</span> trigger emits a window
 after a certain amount of processing time has passed since data was received.
 The processing time is determined by the system clock, rather than the data
 element's timestamp.
@@ -4607,14 +4613,16 @@ window.
 
 Beam provides one data-driven trigger,
 <span class="language-java">`AfterPane.elementCountAtLeast()`</span>
-<span class="language-py">`AfterCount`</span>. This trigger works on an element
+<span class="language-py">`AfterCount`</span>
+<span class="language-go">`window.TriggerAfterCount()`</span>. This trigger works on an element
 count; it fires after the current pane has collected at least *N* elements. This
 allows a window to emit early results (before all the data has accumulated),
 which can be particularly useful if you are using a single global window.
 
 It is important to note that if, for example, you specify
 <span class="language-java">`.elementCountAtLeast(50)`</span>
-<span class="language-py">AfterCount(50)</span> and only 32 elements arrive,
+<span class="language-py">AfterCount(50)</span>
+<span class="language-go">`window.TriggerAfterCount(50)`</span> and only 32 elements arrive,
 those 32 elements sit around forever. If the 32 elements are important to you,
 consider using [composite triggers](#composite-triggers) to combine multiple
 conditions. This allows you to specify multiple firing conditions such as "fire
@@ -4623,7 +4631,7 @@ either when I receive 50 elements, or every 1 second".
 ### 9.4. Setting a trigger {#setting-a-trigger}
 
 When you set a windowing function for a `PCollection` by using the
-<span class="language-java">`Window`</span><span class="language-py">`WindowInto`</span>
+<span class="language-java">`Window`</span><span class="language-py">`WindowInto`</span><span class="language-go">`beam.WindowInto`</span>
 transform, you can also specify a trigger.
 
 {{< paragraph class="language-java" >}}
@@ -4643,6 +4651,13 @@ element in that window has been processed. The `accumulation_mode` parameter
 sets the window's **accumulation mode**.
 {{< /paragraph >}}
 
+{{< paragraph class="language-go" >}}
+You set the trigger(s) for a `PCollection` by passing in the `beam.Trigger` parameter
+when you use the `beam.WindowInto` transform. This code sample sets an Always
+trigger for a `PCollection`, which emits results every time an element in that window has been processed. The `beam.AccumulationMode` parameter
+sets the window's **accumulation mode**.
+{{< /paragraph >}}
+
 {{< highlight java >}}
   PCollection<String> pc = ...;
   pc.apply(Window.<String>into(FixedWindows.of(1, TimeUnit.MINUTES))
@@ -4658,6 +4673,10 @@ sets the window's **accumulation mode**.
     accumulation_mode=AccumulationMode.DISCARDING)
 {{< /highlight >}}
 
+{{< highlight go >}}
+  {{< code_sample "sdks/go/examples/snippets/09triggers.go" always_trigger >}}
+{{< /highlight >}}
+
 #### 9.4.1. Window accumulation modes {#window-accumulation-modes}
 
 When you specify a trigger, you must also set the the window's **accumulation
@@ -4679,6 +4698,13 @@ trigger. To set a window to discard fired panes, set `accumulation_mode` to
 `DISCARDING`.
 {{< /paragraph >}}
 
+{{< paragraph class="language-go" >}}
+To set a window to accumulate the panes that are produced when the trigger
+fires, set the `beam.AccumulationMode` parameter to `beam.PanesAccumulate()` when you set the
+trigger. To set a window to discard fired panes, set `beam.AccumulationMode` to
+`beam.PanesDiscard()`.
+{{< /paragraph >}}
+
 Let's look an example that uses a `PCollection` with fixed-time windowing and a
 data-based trigger. This is something you might do if, for example, each window
 represented a ten-minute running average, but you wanted to display the current
@@ -6350,6 +6376,11 @@ $ python -m apache_beam.runners.portability.expansion_service_test -p $PORT_FOR_
 
 Currently Python external transforms are limited to dependencies available in core Beam SDK Harness.
 
+#### 13.1.3. Creating cross-language Go transforms
+
+Go currently does not support creating cross-language transforms, only using cross-language
+transforms from other languages; see more at [BEAM-9923](https://issues.apache.org/jira/browse/BEAM-9923).
+
 ### 13.2. Using cross-language transforms {#use-x-lang-transforms}
 
 Depending on the SDK language of the pipeline, you can use a high-level SDK-wrapper class, or a low-level transform class to access a cross-language transform.
@@ -6369,7 +6400,7 @@ Currently, to access cross-language transforms from the Java SDK, you have to us
 3. Include [External.of(...)](https://github.com/apache/beam/blob/master/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/External.java) when instantiating your pipeline. Reference the URN, payload, and expansion service. For examples, see the [cross-language transform test suite](https://github.com/apache/beam/blob/master/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ValidateRunnerXlangTest.java).
 4. After the job has been submitted to the Beam runner, shutdown the expansion service by terminating the expansion service process.
 
-#### 13.2.2 Using cross-language transforms in a Python pipeline
+#### 13.2.2. Using cross-language transforms in a Python pipeline
 
 If a Python-specific wrapper for a cross-language transform is available, use that; otherwise, you have to use the lower-level [ExternalTransform](https://github.com/apache/beam/blob/master/sdks/python/apache_beam/transforms/external.py) class to access the transform.
 
@@ -6416,6 +6447,68 @@ with pipeline as p:
 
 4. After the job has been submitted to the Beam runner, shutdown the expansion service by terminating the expansion service process.
 
+#### 13.2.3. Using cross-language transforms in a Go pipeline
+
+If a Go-specific wrapper for a cross-language is available, use that; otherwise, you have to use the
+lower-level [CrossLanguage](https://pkg.go.dev/github.com/apache/beam/sdks/go/pkg/beam#CrossLanguage)
+function to access the transform.
+
+**Expansion Services**
+
+The Go SDK does not yet support automatically starting an expansion service. In order to use
+cross-language transforms, you must manually start any necessary expansion services on your local
+machine and ensure they are accessible to your code during pipeline construction; see more at
+[BEAM-12862](https://issues.apache.org/jira/browse/BEAM-12862).
+
+**Using an SDK wrapper**
+
+To use a cross-language transform through an SDK wrapper, import the package for the SDK wrapper
+and call it from your pipeline as shown in the example:
+
+{{< highlight >}}
+import (
+    "github.com/apache/beam/sdks/v2/go/pkg/beam/io/xlang/kafkaio"
+)
+
+// Kafka Read using previously defined values.
+kafkaRecords := kafkaio.Read(
+    s,
+    expansionAddr, // Address of expansion service.
+    bootstrapAddr,
+    []string{topicName},
+    kafkaio.MaxNumRecords(numRecords),
+    kafkaio.ConsumerConfigs(map[string]string{"auto.offset.reset": "earliest"}))
+{{< /highlight >}}
+
+**Using the CrossLanguage function**
+
+When an SDK-specific wrapper isn't available, you will have to access the cross-language transform through the `beam.CrossLanguage` function.
+
+1. Make sure you have the appropriate expansion service running. See the expansion service section for details.
+2. Make sure the transform you're trying to use is available and can be used by the expansion service.
+   Refer to [Creating cross-language transforms](#create-x-lang-transforms) for details.
+3. Use the `beam.CrossLanguage` function in your pipeline as appropriate. Reference the URN, Payload,
+   expansion service address, and define inputs and outputs. You can use the
+   [beam.CrossLanguagePayload](https://pkg.go.dev/github.com/apache/beam/sdks/go/pkg/beam#CrossLanguagePayload)
+   function as a helper for encoding a payload. You can use the
+   [beam.UnnamedInput](https://pkg.go.dev/github.com/apache/beam/sdks/go/pkg/beam#UnnamedInput) and
+   [beam.UnnamedOutput](https://pkg.go.dev/github.com/apache/beam/sdks/go/pkg/beam#UnnamedOutput)
+   functions as shortcuts for single, unnamed inputs/outputs or define a map for named ones.
+
+   {{< highlight >}}
+type prefixPayload struct {
+    Data string `beam:"data"`
+}
+urn := "beam:transforms:xlang:test:prefix"
+payload := beam.CrossLanguagePayload(prefixPayload{Data: prefix})
+expansionAddr := "localhost:8097"
+outT := beam.UnnamedOutput(typex.New(reflectx.String))
+res := beam.CrossLanguage(s, urn, payload, expansionAddr, beam.UnnamedInput(inputPCol), outT)
+   {{< /highlight >}}
+
+4. After the job has been submitted to the Beam runner, shutdown the expansion service by
+   terminating the expansion service process.
+
 ### 13.3. Runner Support {#x-lang-transform-runner-support}
 
 Currently, portable runners such as Flink, Spark, and the Direct runner can be used with multi-language pipelines.