From f1c10f176e385d61f51204240471e4c276748168 Mon Sep 17 00:00:00 2001
From: Nicholas Cook <irataxy@users.noreply.github.com>
Date: Fri, 4 Feb 2022 10:47:25 -0800
Subject: [PATCH] feat: add samples and tests for adding captions to a job
 (#131)

* feat: add samples and tests for adding captions to a job

* remove space

* remove extraneous explanation
---
 .../google-cloud-video-transcoder/README.rst  |   2 +-
 .../create_job_with_embedded_captions.py      | 195 ++++++++++++++++++
 .../create_job_with_standalone_captions.py    | 192 +++++++++++++++++
 .../samples/snippets/job_test.py              | 113 +++++++++-
 4 files changed, 495 insertions(+), 7 deletions(-)
 create mode 100644 packages/google-cloud-video-transcoder/samples/snippets/create_job_with_embedded_captions.py
 create mode 100644 packages/google-cloud-video-transcoder/samples/snippets/create_job_with_standalone_captions.py
diff --git a/packages/google-cloud-video-transcoder/README.rst b/packages/google-cloud-video-transcoder/README.rst
index 0345e829a1e7..87242e1536f8 100644
--- a/packages/google-cloud-video-transcoder/README.rst
+++ b/packages/google-cloud-video-transcoder/README.rst
@@ -81,5 +81,5 @@ Next Steps
    APIs that we cover.
 
 .. _Samples: https://github.com/googleapis/python-video-transcoder/blob/main/samples/snippets/README.md
-.. _Transcoder API Product documentation:  https://cloud.google.com/transcoder/docs
+.. _Transcoder API Product documentation: https://cloud.google.com/transcoder/docs
 .. _README: https://github.com/googleapis/google-cloud-python/blob/main/README.rst
diff --git a/packages/google-cloud-video-transcoder/samples/snippets/create_job_with_embedded_captions.py b/packages/google-cloud-video-transcoder/samples/snippets/create_job_with_embedded_captions.py
new file mode 100644
index 000000000000..31e4216ce9b0
--- /dev/null
+++ b/packages/google-cloud-video-transcoder/samples/snippets/create_job_with_embedded_captions.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python
+
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Transcoder sample for creating a job that embeds captions in the output video.
+
+Example usage:
+    python create_job_with_embedded_captions.py --project_id <project-id> --location <location> \
+      --input_video_uri <uri> --input_captions_uri <uri> --output_uri <uri>
+"""
+
+# [START transcoder_create_job_with_embedded_captions]
+
+import argparse
+
+from google.cloud.video import transcoder_v1
+from google.cloud.video.transcoder_v1.services.transcoder_service import (
+    TranscoderServiceClient,
+)
+
+
+def create_job_with_embedded_captions(
+    project_id,
+    location,
+    input_video_uri,
+    input_captions_uri,
+    output_uri,
+):
+    """Creates a job based on an ad-hoc job configuration that embeds captions in the output video.
+
+    Args:
+        project_id (str): The GCP project ID.
+        location (str): The location to start the job in.
+        input_video_uri (str): Uri of the input video in the Cloud Storage
+          bucket.
+        input_captions_uri (str): Uri of the input captions file in the Cloud
+          Storage bucket.
+        output_uri (str): Uri of the video output folder in the Cloud Storage
+          bucket."""
+
+    client = TranscoderServiceClient()
+
+    parent = f"projects/{project_id}/locations/{location}"
+    job = transcoder_v1.types.Job()
+    job.output_uri = output_uri
+    job.config = transcoder_v1.types.JobConfig(
+        inputs=[
+            transcoder_v1.types.Input(
+                key="input0",
+                uri=input_video_uri,
+            ),
+            transcoder_v1.types.Input(
+                key="caption-input0",
+                uri=input_captions_uri,
+            ),
+        ],
+        edit_list=[
+            transcoder_v1.types.EditAtom(
+                key="atom0",
+                inputs=["input0", "caption-input0"],
+            ),
+        ],
+        elementary_streams=[
+            transcoder_v1.types.ElementaryStream(
+                key="video-stream0",
+                video_stream=transcoder_v1.types.VideoStream(
+                    h264=transcoder_v1.types.VideoStream.H264CodecSettings(
+                        height_pixels=360,
+                        width_pixels=640,
+                        bitrate_bps=550000,
+                        frame_rate=60,
+                    ),
+                ),
+            ),
+            transcoder_v1.types.ElementaryStream(
+                key="audio-stream0",
+                audio_stream=transcoder_v1.types.AudioStream(
+                    codec="aac", bitrate_bps=64000
+                ),
+            ),
+            transcoder_v1.types.ElementaryStream(
+                key="cea-stream0",
+                # The following doesn't work because "mapping" is a reserved
+                # argument name in GCP python client libraries (see
+                # https://github.com/googleapis/proto-plus-python/blob/main/proto/message.py#L447):
+                #
+                # text_stream=transcoder_v1.types.TextStream(
+                #   codec="cea608",
+                #   mapping=[
+                #     transcoder_v1.types.TextStream.TextMapping(
+                #       atom_key="atom0",
+                #       input_key="caption-input0",
+                #       input_track=0,
+                #     ),
+                #   ],
+                # ),
+                # Use a python dictionary as a workaround:
+                text_stream={
+                    "codec": "cea608",
+                    "mapping": [
+                        {
+                            "atom_key": "atom0",
+                            "input_key": "caption-input0",
+                            "input_track": 0,
+                        }
+                    ],
+                },
+            ),
+        ],
+        mux_streams=[
+            transcoder_v1.types.MuxStream(
+                key="sd",
+                container="mp4",
+                elementary_streams=["video-stream0", "audio-stream0"],
+            ),
+            transcoder_v1.types.MuxStream(
+                key="sd-hls",
+                container="ts",
+                elementary_streams=["video-stream0", "audio-stream0"],
+            ),
+            transcoder_v1.types.MuxStream(
+                key="sd-dash",
+                container="fmp4",
+                elementary_streams=["video-stream0"],
+            ),
+            transcoder_v1.types.MuxStream(
+                key="audio-dash",
+                container="fmp4",
+                elementary_streams=["audio-stream0"],
+            ),
+        ],
+        manifests=[
+            transcoder_v1.types.Manifest(
+                file_name="manifest.m3u8",
+                type_="HLS",
+                mux_streams=["sd-hls"],
+            ),
+            transcoder_v1.types.Manifest(
+                file_name="manifest.mpd",
+                type_="DASH",
+                mux_streams=["sd-dash", "audio-dash"],
+            ),
+        ],
+    )
+    response = client.create_job(parent=parent, job=job)
+    print(f"Job: {response.name}")
+    return response
+
+
+# [END transcoder_create_job_with_embedded_captions]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--project_id", help="Your Cloud project ID.", required=True)
+    parser.add_argument(
+        "--location",
+        help="The location to start this job in.",
+        default="us-central1",
+    )
+    parser.add_argument(
+        "--input_video_uri",
+        help="Uri of the input video in the Cloud Storage bucket.",
+        required=True,
+    )
+    parser.add_argument(
+        "--input_captions_uri",
+        help="Uri of the input captions file in the Cloud Storage bucket.",
+        required=True,
+    )
+    parser.add_argument(
+        "--output_uri",
+        help="Uri of the video output folder in the Cloud Storage bucket. "
+        + "Must end in '/'.",
+        required=True,
+    )
+    args = parser.parse_args()
+    create_job_with_embedded_captions(
+        args.project_id,
+        args.location,
+        args.input_video_uri,
+        args.input_captions_uri,
+        args.output_uri,
+    )
diff --git a/packages/google-cloud-video-transcoder/samples/snippets/create_job_with_standalone_captions.py b/packages/google-cloud-video-transcoder/samples/snippets/create_job_with_standalone_captions.py
new file mode 100644
index 000000000000..5ed546763c9f
--- /dev/null
+++ b/packages/google-cloud-video-transcoder/samples/snippets/create_job_with_standalone_captions.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Transcoder sample for creating a job that can use captions from a standalone file.
+
+Example usage:
+    python create_job_with_standalone_captions.py --project_id <project-id> --location <location> \
+      --input_video_uri <uri> --input_captions_uri <uri> --output_uri <uri>
+"""
+
+# [START transcoder_create_job_with_standalone_captions]
+
+import argparse
+
+from google.cloud.video import transcoder_v1
+from google.cloud.video.transcoder_v1.services.transcoder_service import (
+    TranscoderServiceClient,
+)
+from google.protobuf import duration_pb2 as duration
+
+
+def create_job_with_standalone_captions(
+    project_id,
+    location,
+    input_video_uri,
+    input_captions_uri,
+    output_uri,
+):
+    """Creates a job based on an ad-hoc job configuration that can use captions from a standalone file.
+
+    Args:
+        project_id (str): The GCP project ID.
+        location (str): The location to start the job in.
+        input_video_uri (str): Uri of the input video in the Cloud Storage
+          bucket.
+        input_captions_uri (str): Uri of the input captions file in the Cloud
+          Storage bucket.
+        output_uri (str): Uri of the video output folder in the Cloud Storage
+          bucket."""
+
+    client = TranscoderServiceClient()
+
+    parent = f"projects/{project_id}/locations/{location}"
+    job = transcoder_v1.types.Job()
+    job.output_uri = output_uri
+    job.config = transcoder_v1.types.JobConfig(
+        inputs=[
+            transcoder_v1.types.Input(
+                key="input0",
+                uri=input_video_uri,
+            ),
+            transcoder_v1.types.Input(
+                key="caption-input0",
+                uri=input_captions_uri,
+            ),
+        ],
+        edit_list=[
+            transcoder_v1.types.EditAtom(
+                key="atom0",
+                inputs=["input0", "caption-input0"],
+            ),
+        ],
+        elementary_streams=[
+            transcoder_v1.types.ElementaryStream(
+                key="video-stream0",
+                video_stream=transcoder_v1.types.VideoStream(
+                    h264=transcoder_v1.types.VideoStream.H264CodecSettings(
+                        height_pixels=360,
+                        width_pixels=640,
+                        bitrate_bps=550000,
+                        frame_rate=60,
+                    ),
+                ),
+            ),
+            transcoder_v1.types.ElementaryStream(
+                key="audio-stream0",
+                audio_stream=transcoder_v1.types.AudioStream(
+                    codec="aac", bitrate_bps=64000
+                ),
+            ),
+            transcoder_v1.types.ElementaryStream(
+                key="vtt-stream0",
+                # The following doesn't work because "mapping" is a reserved
+                # argument name in GCP python client libraries (see
+                # https://github.com/googleapis/proto-plus-python/blob/main/proto/message.py#L447):
+                #
+                # text_stream=transcoder_v1.types.TextStream(
+                #   codec="webvtt",
+                #   mapping=[
+                #     transcoder_v1.types.TextStream.TextMapping(
+                #       atom_key="atom0",
+                #       input_key="caption-input0",
+                #       input_track=0,
+                #     ),
+                #   ],
+                # ),
+                # Use a python dictionary as a workaround:
+                text_stream={
+                    "codec": "webvtt",
+                    "mapping": [
+                        {
+                            "atom_key": "atom0",
+                            "input_key": "caption-input0",
+                            "input_track": 0,
+                        }
+                    ],
+                },
+            ),
+        ],
+        mux_streams=[
+            transcoder_v1.types.MuxStream(
+                key="sd-hls-fmp4",
+                container="fmp4",
+                elementary_streams=["video-stream0"],
+            ),
+            transcoder_v1.types.MuxStream(
+                key="audio-hls-fmp4",
+                container="fmp4",
+                elementary_streams=["audio-stream0"],
+            ),
+            transcoder_v1.types.MuxStream(
+                key="text-vtt",
+                container="vtt",
+                elementary_streams=["vtt-stream0"],
+                segment_settings=transcoder_v1.types.SegmentSettings(
+                    segment_duration=duration.Duration(
+                        seconds=6,
+                    ),
+                    individual_segments=True,
+                ),
+            ),
+        ],
+        manifests=[
+            transcoder_v1.types.Manifest(
+                file_name="manifest.m3u8",
+                type_="HLS",
+                mux_streams=["sd-hls-fmp4", "audio-hls-fmp4", "text-vtt"],
+            ),
+        ],
+    )
+    response = client.create_job(parent=parent, job=job)
+    print(f"Job: {response.name}")
+    return response
+
+
+# [END transcoder_create_job_with_standalone_captions]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--project_id", help="Your Cloud project ID.", required=True)
+    parser.add_argument(
+        "--location",
+        help="The location to start this job in.",
+        default="us-central1",
+    )
+    parser.add_argument(
+        "--input_video_uri",
+        help="Uri of the input video in the Cloud Storage bucket.",
+        required=True,
+    )
+    parser.add_argument(
+        "--input_captions_uri",
+        help="Uri of the input captions file in the Cloud Storage bucket.",
+        required=True,
+    )
+    parser.add_argument(
+        "--output_uri",
+        help="Uri of the video output folder in the Cloud Storage bucket. "
+        + "Must end in '/'.",
+        required=True,
+    )
+    args = parser.parse_args()
+    create_job_with_standalone_captions(
+        args.project_id,
+        args.location,
+        args.input_video_uri,
+        args.input_captions_uri,
+        args.output_uri,
+    )
diff --git a/packages/google-cloud-video-transcoder/samples/snippets/job_test.py b/packages/google-cloud-video-transcoder/samples/snippets/job_test.py
index 2d9a6bab3eae..fa9a93e0f89c 100644
--- a/packages/google-cloud-video-transcoder/samples/snippets/job_test.py
+++ b/packages/google-cloud-video-transcoder/samples/snippets/job_test.py
@@ -27,8 +27,10 @@
 import create_job_template
 import create_job_with_animated_overlay
 import create_job_with_concatenated_inputs
+import create_job_with_embedded_captions
 import create_job_with_periodic_images_spritesheet
 import create_job_with_set_number_images_spritesheet
+import create_job_with_standalone_captions
 import create_job_with_static_overlay
 import delete_job
 import delete_job_template
@@ -47,11 +49,13 @@
 test_overlay_image_file_name = "overlay.jpg"
 test_concat1_file_name = "ForBiggerEscapes.mp4"
 test_concat2_file_name = "ForBiggerJoyrides.mp4"
+test_captions_file_name = "caption.srt"
 
 input_uri = f"gs://{input_bucket_name}{test_video_file_name}"
 overlay_image_uri = f"gs://{input_bucket_name}{test_overlay_image_file_name}"
 concat1_uri = f"gs://{input_bucket_name}{test_concat1_file_name}"
 concat2_uri = f"gs://{input_bucket_name}{test_concat2_file_name}"
+captions_uri = f"gs://{input_bucket_name}{test_captions_file_name}"
 output_uri_for_preset = f"gs://{output_bucket_name}/test-output-preset/"
 output_uri_for_template = f"gs://{output_bucket_name}/test-output-template/"
 output_uri_for_adhoc = f"gs://{output_bucket_name}/test-output-adhoc/"
@@ -59,6 +63,9 @@
 output_uri_for_animated_overlay = (
     f"gs://{output_bucket_name}/test-output-animated-overlay/"
 )
+output_uri_for_embedded_captions = f"gs://{output_bucket_name}/test-output-embedded-captions/"
+output_uri_for_standalone_captions = f"gs://{output_bucket_name}/test-output-standalone-captions/"
+
 small_spritesheet_file_prefix = "small-sprite-sheet"
 large_spritesheet_file_prefix = "large-sprite-sheet"
 spritesheet_file_suffix = "0000000000.jpeg"
@@ -75,6 +82,7 @@
 
 preset = "preset/web-hd"
 job_succeeded_state = "ProcessingState.SUCCEEDED"
+job_running_state = "ProcessingState.RUNNING"
 
 
 @pytest.fixture(scope="module")
@@ -105,7 +113,7 @@ def test_create_job_from_preset(capsys, test_bucket):
 
     time.sleep(30)
 
-    _assert_job_state_succeeded(capsys, job_id)
+    _assert_job_state_succeeded_or_running(capsys, job_id)
 
     list_jobs.list_jobs(project_id, location)
     out, _ = capsys.readouterr()
@@ -144,7 +152,7 @@ def test_create_job_from_template(capsys, test_bucket):
 
     time.sleep(30)
 
-    _assert_job_state_succeeded(capsys, job_id)
+    _assert_job_state_succeeded_or_running(capsys, job_id)
 
     list_jobs.list_jobs(project_id, location)
     out, _ = capsys.readouterr()
@@ -178,7 +186,7 @@ def test_create_job_from_ad_hoc(capsys, test_bucket):
 
     time.sleep(30)
 
-    _assert_job_state_succeeded(capsys, job_id)
+    _assert_job_state_succeeded_or_running(capsys, job_id)
 
     list_jobs.list_jobs(project_id, location)
     out, _ = capsys.readouterr()
@@ -259,7 +267,10 @@ def test_create_job_with_animated_overlay(capsys, test_bucket):
 
 def test_create_job_with_set_number_spritesheet(capsys, test_bucket):
     create_job_with_set_number_images_spritesheet.create_job_with_set_number_images_spritesheet(
-        project_id, location, input_uri, output_uri_for_set_number_spritesheet,
+        project_id,
+        location,
+        input_uri,
+        output_uri_for_set_number_spritesheet,
     )
     out, _ = capsys.readouterr()
     job_name_prefix = f"projects/{project_number}/locations/{location}/jobs/"
@@ -307,7 +318,10 @@ def test_create_job_with_set_number_spritesheet(capsys, test_bucket):
 
 def test_create_job_with_periodic_spritesheet(capsys, test_bucket):
     create_job_with_periodic_images_spritesheet.create_job_with_periodic_images_spritesheet(
-        project_id, location, input_uri, output_uri_for_periodic_spritesheet,
+        project_id,
+        location,
+        input_uri,
+        output_uri_for_periodic_spritesheet,
     )
     out, _ = capsys.readouterr()
     job_name_prefix = f"projects/{project_number}/locations/{location}/jobs/"
@@ -393,7 +407,80 @@ def test_create_job_with_concatenated_inputs(capsys, test_bucket):
     assert "Deleted job" in out
 
 
-# Retrying up to 10 mins.
+def test_create_job_with_embedded_captions(capsys, test_bucket):
+    create_job_with_embedded_captions.create_job_with_embedded_captions(
+        project_id,
+        location,
+        input_uri,
+        captions_uri,
+        output_uri_for_embedded_captions,
+    )
+    out, _ = capsys.readouterr()
+    job_name_prefix = f"projects/{project_number}/locations/{location}/jobs/"
+    assert job_name_prefix in out
+
+    str_slice = out.split("/")
+    job_id = str_slice[len(str_slice) - 1].rstrip("\n")
+    job_name = f"projects/{project_number}/locations/{location}/jobs/{job_id}"
+    assert job_name in out
+
+    get_job.get_job(project_id, location, job_id)
+    out, _ = capsys.readouterr()
+    assert job_name in out
+
+    time.sleep(
+        30
+    )  # Transcoding jobs need time to complete. Once the job completes, check the job state.
+
+    _assert_job_state_succeeded(capsys, job_id)
+
+    list_jobs.list_jobs(project_id, location)
+    out, _ = capsys.readouterr()
+    assert job_name in out
+
+    delete_job.delete_job(project_id, location, job_id)
+    out, _ = capsys.readouterr()
+    assert "Deleted job" in out
+
+
+def test_create_job_with_standalone_captions(capsys, test_bucket):
+    create_job_with_standalone_captions.create_job_with_standalone_captions(
+        project_id,
+        location,
+        input_uri,
+        captions_uri,
+        output_uri_for_standalone_captions,
+    )
+    out, _ = capsys.readouterr()
+    job_name_prefix = f"projects/{project_number}/locations/{location}/jobs/"
+    assert job_name_prefix in out
+
+    str_slice = out.split("/")
+    job_id = str_slice[len(str_slice) - 1].rstrip("\n")
+    job_name = f"projects/{project_number}/locations/{location}/jobs/{job_id}"
+    assert job_name in out
+
+    get_job.get_job(project_id, location, job_id)
+    out, _ = capsys.readouterr()
+    assert job_name in out
+
+    time.sleep(
+        30
+    )  # Transcoding jobs need time to complete. Once the job completes, check the job state.
+
+    _assert_job_state_succeeded(capsys, job_id)
+
+    list_jobs.list_jobs(project_id, location)
+    out, _ = capsys.readouterr()
+    assert job_name in out
+
+    delete_job.delete_job(project_id, location, job_id)
+    out, _ = capsys.readouterr()
+    assert "Deleted job" in out
+
+
+# Retrying up to 10 mins. This function checks if the job completed
+# successfully.
 @backoff.on_exception(backoff.expo, AssertionError, max_time=600)
 def _assert_job_state_succeeded(capsys, job_id):
     try:
@@ -405,6 +492,20 @@ def _assert_job_state_succeeded(capsys, job_id):
     assert job_succeeded_state in out
 
 
+# Retrying up to 10 mins. This function checks if the job is running or has
+# completed. Both of these conditions signal the API is functioning. The test
+# can list or delete a job that is running or completed with no ill effects.
+@backoff.on_exception(backoff.expo, AssertionError, max_time=600)
+def _assert_job_state_succeeded_or_running(capsys, job_id):
+    try:
+        get_job_state.get_job_state(project_id, location, job_id)
+    except HttpError as err:
+        raise AssertionError(f"Could not get job state: {err.resp.status}")
+
+    out, _ = capsys.readouterr()
+    assert (job_succeeded_state in out) or (job_running_state in out)
+
+
 def _assert_file_in_bucket(capsys, test_bucket, directory_and_filename):
     blob = test_bucket.blob(directory_and_filename)
     assert blob.exists()