diff --git a/docs/source/cloud_storage.rst b/docs/source/cloud_storage.rst new file mode 100644 index 0000000..7b07d7e --- /dev/null +++ b/docs/source/cloud_storage.rst @@ -0,0 +1,97 @@ +.. + Copyright 2024 Google LLC + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Cloud Storage +============= +Shaka Streamer can output to an HTTP/HTTPS server or to cloud storage. + +HTTP or HTTPS URLs will be passed directly to Shaka Packager, which will make +PUT requests to the HTTP/HTTPS server to write output files. The URL you pass +will be a base for the URLs Packager writes to. For example, if you pass +https://localhost:8080/foo/bar/, Packager would make a PUT request to +https://localhost:8080/foo/bar/dash.mpd to write the manifest (with default +settings). + +Cloud storage URLs can be either Google Cloud Storage URLs (beginning with +gs://) or Amazon S3 URLs (beginning with s3://). Like the HTTP support +described above, these are a base URL. If you ask for output to gs://foo/bar/, +Streamer will write to gs://foo/bar/dash.mpd (with default settings). + +Cloud storage output uses the storage provider's Python libraries. Find more +details on setup and authentication below. + + +Google Cloud Storage Setup +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Install the Python module if you haven't yet: + +.. code:: sh + + python3 -m pip install google-cloud-storage + +To use the default authentication, you will need default application +credentials installed. On Linux, these live in +``~/.config/gcloud/application_default_credentials.json``. + +The easiest way to install default credentials is through the Google Cloud SDK. +See https://cloud.google.com/sdk/docs/install-sdk to install the SDK. Then run: + +.. code:: sh + + gcloud init + gcloud auth application-default login + +Follow the instructions given to you by gcloud to initialize the environment +and login. + +Example command-line for live streaming to Google Cloud Storage: + +.. code:: sh + + python3 shaka-streamer \ + -i config_files/input_looped_file_config.yaml \ + -p config_files/pipeline_live_config.yaml \ + -o gs://my_gcs_bucket/folder/ + + +Amazon S3 Setup +~~~~~~~~~~~~~~~ + +Install the Python module if you haven't yet: + +.. code:: sh + + python3 -m pip install boto3 + +To authenticate to Amazon S3, you can either add credentials to your `boto +config file`_ or login interactively using the `AWS CLI`_. + +.. code:: sh + + aws configure + +Example command-line for live streaming to Amazon S3: + +.. code:: sh + + python3 shaka-streamer \ + -i config_files/input_looped_file_config.yaml \ + -p config_files/pipeline_live_config.yaml \ + -o s3://my_s3_bucket/folder/ + + +.. _boto config file: http://boto.cloudhackers.com/en/latest/boto_config_tut.html +.. _AWS CLI: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html diff --git a/docs/source/index.rst b/docs/source/index.rst index b622129..6fe04de 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,6 +31,7 @@ Shaka Streamer documentation overview prerequisites + cloud_storage hardware_encoding configuration_fields module_api diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 344e44d..b945faf 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -64,25 +64,6 @@ downloaded individually over HTTPS or all at once through gsutil: gsutil -m cp gs://shaka-streamer-assets/sample-inputs/* . -Example command-line for live streaming to Google Cloud Storage: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: sh - - python3 shaka-streamer \ - -i config_files/input_looped_file_config.yaml \ - -p config_files/pipeline_live_config.yaml \ - -c gs://my_gcs_bucket/folder/ - -Example command-line for live streaming to Amazon S3: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code:: sh - - python3 shaka-streamer \ - -i config_files/input_looped_file_config.yaml \ - -p config_files/pipeline_live_config.yaml \ - -c s3://my_s3_bucket/folder/ Features @@ -95,6 +76,8 @@ Features * VOD multi-period DASH (and equivalent HLS output) * Clear or encrypted output * Hardware encoding (if available from the platform) + * Output to HTTP/HTTPS server or cloud storage provider (see + :doc:`cloud_storage`) * Lots of options for input @@ -154,7 +137,7 @@ All input types are read directly by ``TranscoderNode``. If the input type is ``looped_file``, then ``TranscoderNode`` will add additional FFmpeg options to loop that input file indefinitely. -If the ``-c`` option is given with a Google Cloud Storage URL, then an +If the ``-o`` option is given with a Google Cloud Storage URL, then an additional node called ``ProxyNode`` is added after ``PackagerNode``. It runs a local webserver which takes the output of packager and pushes to cloud storage. diff --git a/docs/source/prerequisites.rst b/docs/source/prerequisites.rst index 963bf89..5f128fb 100644 --- a/docs/source/prerequisites.rst +++ b/docs/source/prerequisites.rst @@ -154,47 +154,7 @@ Cloud Storage (optional) Shaka Streamer can push content directly to a Google Cloud Storage or Amazon S3 bucket. To use this feature, additional Python modules are required. - -Google Cloud Storage -~~~~~~~~~~~~~~~~~~~~ - -First install the Python module if you haven't yet: - -.. code:: sh - - python3 -m pip install google-cloud-storage - -To use the default authentication, you will need default application -credentials installed. On Linux, these live in -``~/.config/gcloud/application_default_credentials.json``. - -The easiest way to install default credentials is through the Google Cloud SDK. -See https://cloud.google.com/sdk/docs/install-sdk to install the SDK. Then run: - -.. code:: sh - - gcloud init - gcloud auth application-default login - -Follow the instructions given to you by gcloud to initialize the environment -and login. - - -Amazon S3 -~~~~~~~~~ - -First install the Python module if you haven't yet: - -.. code:: sh - - python3 -m pip install boto3 - -To authenticate to Amazon S3, you can either add credentials to your `boto -config file`_ or login interactively using the `AWS CLI`_. - -.. code:: sh - - aws configure +See :doc:`cloud_storage` for details. Test Dependencies (optional) @@ -213,8 +173,7 @@ To install Node.js and NPM on any other platform, you can try one of these: * https://github.com/nodesource/distributions * https://nodejs.org/en/download/ + .. _Shaka Packager: https://github.com/shaka-project/shaka-packager .. _FFmpeg: https://ffmpeg.org/ .. _Homebrew: https://brew.sh/ -.. _boto config file: http://boto.cloudhackers.com/en/latest/boto_config_tut.html -.. _AWS CLI: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html diff --git a/shaka-streamer b/shaka-streamer index 1cc45c0..25d2111 100755 --- a/shaka-streamer +++ b/shaka-streamer @@ -48,7 +48,12 @@ def main(): description = __doc__.format(version=streamer.__version__) parser = argparse.ArgumentParser(description=description, - formatter_class=CustomArgParseFormatter) + formatter_class=CustomArgParseFormatter, + epilog=""" +The output location can be a local filsystem folder. It will be created if it +does not exist. It can also be an HTTP or HTTPS URL, or a cloud storage URL. +See docs: https://shaka-project.github.io/shaka-streamer/cloud_storage.html +""") parser.add_argument('-i', '--input-config', required=True, @@ -64,11 +69,11 @@ def main(): parser.add_argument('-c', '--cloud-url', default=None, help='The Google Cloud Storage or Amazon S3 URL to ' + - 'upload to. (Starts with gs:// or s3://)') + 'upload to. (Starts with gs:// or s3://) (DEPRECATED, use -o)') parser.add_argument('-o', '--output', default='output_files', - help='The output folder to write files to, or an HTTP ' + - 'or HTTPS URL where files will be PUT.') + help='The output folder or URL to write files to. See ' + + 'below for details.') parser.add_argument('--skip-deps-check', action='store_true', help='Skip checks for dependencies and their versions. ' + @@ -96,8 +101,13 @@ def main(): bitrate_config_dict = yaml.safe_load(f) try: + if args.cloud_url: + print('Warning: -c/--cloud-url is deprecated; use -o/--output instead', + file=sys.stderr) + args.output = args.cloud_url + with controller.start(args.output, input_config_dict, pipeline_config_dict, - bitrate_config_dict, args.cloud_url, + bitrate_config_dict, not args.skip_deps_check, not args.use_system_binaries): # Sleep so long as the pipeline is still running. diff --git a/streamer/controller_node.py b/streamer/controller_node.py index f390739..bdcc76e 100644 --- a/streamer/controller_node.py +++ b/streamer/controller_node.py @@ -43,7 +43,7 @@ from streamer.periodconcat_node import PeriodConcatNode from streamer.proxy_node import ProxyNode import streamer.subprocessWindowsPatch # side-effects only -from streamer.util import is_url +from streamer.util import is_http_url, is_url from streamer.pipe import Pipe @@ -75,7 +75,6 @@ def start(self, output_location: str, input_config_dict: Dict[str, Any], pipeline_config_dict: Dict[str, Any], bitrate_config_dict: Dict[Any, Any] = {}, - bucket_url: Union[str, None] = None, check_deps: bool = True, use_hermetic: bool = True) -> 'ControllerNode': """Create and start all other nodes. @@ -166,24 +165,28 @@ def next_short_version(version: str) -> str: self._input_config = InputConfig(input_config_dict) self._pipeline_config = PipelineConfig(pipeline_config_dict) - if bucket_url is not None: - # Check some restrictions and other details on HTTP output. - if not ProxyNode.is_understood(bucket_url): + if is_http_url(output_location): + if not self._pipeline_config.segment_per_file: + raise RuntimeError( + 'For HTTP PUT uploads, the pipeline segment_per_file setting ' + + 'must be set to True!') + elif is_url(output_location): + if not ProxyNode.is_understood(output_location): url_prefixes = [ protocol + '://' for protocol in ProxyNode.ALL_SUPPORTED_PROTOCOLS] raise RuntimeError( 'Invalid cloud URL! Only these are supported: ' + ', '.join(url_prefixes)) - if not ProxyNode.is_supported(bucket_url): - raise RuntimeError('Missing libraries for cloud URL: ' + bucket_url) + if not ProxyNode.is_supported(output_location): + raise RuntimeError('Missing libraries for cloud URL: ' + output_location) if not self._pipeline_config.segment_per_file: raise RuntimeError( - 'For HTTP PUT uploads, the pipeline segment_per_file setting ' + + 'For cloud uploads, the pipeline segment_per_file setting ' + 'must be set to True!') - upload_proxy = ProxyNode.create(bucket_url) + upload_proxy = ProxyNode.create(output_location) upload_proxy.start() # All the outputs now should be sent to the proxy server instead. @@ -213,9 +216,9 @@ def next_short_version(version: str) -> str: output_location) else: # InputConfig contains multiperiod_inputs_list only. - if bucket_url: + if is_url(output_location): raise RuntimeError( - 'Direct cloud upload is incompatible with multiperiod support.') + 'Direct cloud/HTTP upload is incompatible with multiperiod support.') # Create one Transcoder node and one Packager node for each period. for i, singleperiod in enumerate(self._input_config.multiperiod_inputs_list): @@ -315,7 +318,7 @@ def _append_nodes_for_inputs_list(self, inputs: List[Input], # If the inputs list was a period in multiperiod_inputs_list, create a nested directory # and put that period in it. - if period_dir: + if period_dir and not is_url(output_location): output_location = os.path.join(output_location, period_dir) os.mkdir(output_location) diff --git a/streamer/util.py b/streamer/util.py index 7250df9..162dec7 100644 --- a/streamer/util.py +++ b/streamer/util.py @@ -14,7 +14,13 @@ """Utility functions used by multiple modules.""" +import urllib.parse + def is_url(output_location: str) -> bool: """Returns True if the output location is a URL.""" - return (output_location.startswith('http:') or - output_location.startswith('https:')) + return urllib.parse.urlparse(output_location).scheme != '' + +def is_http_url(output_location: str) -> bool: + """Returns True if the output location is an HTTP/HTTPS URL.""" + scheme = urllib.parse.urlparse(output_location).scheme + return scheme in ['http', 'https']