From e685b5caa65f1cac1d5e88dc5dfd4d5f3375f776 Mon Sep 17 00:00:00 2001 From: "Oriol Ricart Vilarrubias." <45894267+uriii3@users.noreply.github.com> Date: Fri, 11 Oct 2024 18:15:40 +0200 Subject: [PATCH] chore: compression level as only option (#162) No need to specify `--netcdf-compression-enabled` to be able to compress. Works also as a flag to compress to level 1. --- .../catalogue_parser/request_structure.py | 3 +-- .../command_line_interface/group_subset.py | 18 ++++-------------- copernicusmarine/core_functions/subset.py | 13 +------------ .../download_functions/common_download.py | 19 +++++++------------ .../download_arco_series.py | 5 +---- copernicusmarine/python_interface/subset.py | 7 +------ doc/usage/subset-usage.rst | 6 +++--- .../test_help_command_interface.ambr | 3 --- tests/test_command_line_interface.py | 9 ++------- tests/test_python_interface.py | 2 +- 10 files changed, 21 insertions(+), 64 deletions(-) diff --git a/copernicusmarine/catalogue_parser/request_structure.py b/copernicusmarine/catalogue_parser/request_structure.py index ec8cba09..308bb64e 100644 --- a/copernicusmarine/catalogue_parser/request_structure.py +++ b/copernicusmarine/catalogue_parser/request_structure.py @@ -75,8 +75,7 @@ class SubsetRequest: output_directory: pathlib.Path = pathlib.Path(".") force_download: bool = False overwrite_output_data: bool = False - netcdf_compression_enabled: bool = False - netcdf_compression_level: Optional[int] = None + netcdf_compression_level: int = 0 netcdf3_compatible: bool = False dry_run: bool = False diff --git a/copernicusmarine/command_line_interface/group_subset.py b/copernicusmarine/command_line_interface/group_subset.py index 425021c2..639f4f8e 100644 --- a/copernicusmarine/command_line_interface/group_subset.py +++ b/copernicusmarine/command_line_interface/group_subset.py @@ -296,20 +296,12 @@ def cli_subset() -> None: is_flag=True, hidden=True, ) -@click.option( - "--netcdf-compression-enabled", - type=bool, - default=False, - is_flag=True, - help=( - "Enable compression level 1 to the NetCDF output file. " - "Use --netcdf-compression-level option to customize the compression " - "level" - ), -) @click.option( "--netcdf-compression-level", type=click.IntRange(0, 9), + is_flag=False, + flag_value=1, + default=0, help=( "Specify a compression level to apply on the NetCDF output file. " "A value of 0 means no compression, and 9 is the highest level of " @@ -343,8 +335,7 @@ def subset( coordinates_selection_method: CoordinatesSelectionMethod, output_filename: Optional[str], file_format: FileFormat, - netcdf_compression_enabled: bool, - netcdf_compression_level: Optional[int], + netcdf_compression_level: int, netcdf3_compatible: bool, service: Optional[str], create_template: bool, @@ -406,7 +397,6 @@ def subset( dry_run=dry_run, disable_progress_bar=disable_progress_bar, staging=staging, - netcdf_compression_enabled=netcdf_compression_enabled, netcdf_compression_level=netcdf_compression_level, netcdf3_compatible=netcdf3_compatible, ) diff --git a/copernicusmarine/core_functions/subset.py b/copernicusmarine/core_functions/subset.py index 6e3a5f33..8085594c 100644 --- a/copernicusmarine/core_functions/subset.py +++ b/copernicusmarine/core_functions/subset.py @@ -69,8 +69,7 @@ def subset_function( dry_run: bool, disable_progress_bar: bool, staging: bool, - netcdf_compression_enabled: bool, - netcdf_compression_level: Optional[int], + netcdf_compression_level: int, netcdf3_compatible: bool, ) -> ResponseSubset: VersionVerifier.check_version_subset(staging) @@ -80,15 +79,6 @@ def subset_function( "Data will come from the staging environment." ) - if ( - netcdf_compression_level is not None - and netcdf_compression_enabled is False - ): - raise ValueError( - "You must provide --netcdf-compression-enabled if you want to use " - "--netcdf-compression-level option" - ) - subset_request = SubsetRequest(dataset_id=dataset_id or "") if request_file: subset_request.from_file(request_file) @@ -115,7 +105,6 @@ def subset_function( "file_format": file_format, "force_service": force_service, "output_directory": output_directory, - "netcdf_compression_enabled": netcdf_compression_enabled, "netcdf_compression_level": netcdf_compression_level, "netcdf3_compatible": netcdf3_compatible, "dry_run": dry_run, diff --git a/copernicusmarine/download_functions/common_download.py b/copernicusmarine/download_functions/common_download.py index 16d36ce2..3d61a778 100644 --- a/copernicusmarine/download_functions/common_download.py +++ b/copernicusmarine/download_functions/common_download.py @@ -1,6 +1,5 @@ import logging import pathlib -from typing import Optional import xarray import zarr @@ -17,12 +16,11 @@ def get_delayed_download( dataset: xarray.Dataset, output_path: pathlib.Path, - netcdf_compression_enabled: bool, - netcdf_compression_level: Optional[int], + netcdf_compression_level: int, netcdf3_compatible: bool, ): if output_path.suffix == ".zarr": - if netcdf_compression_enabled: + if netcdf_compression_level > 0: raise NetCDFCompressionNotAvailable( "--netcdf-compression-enabled option cannot be used when " "writing to ZARR" @@ -32,7 +30,6 @@ def get_delayed_download( delayed = _prepare_download_dataset_as_netcdf( dataset, output_path, - netcdf_compression_enabled, netcdf_compression_level, netcdf3_compatible, ) @@ -52,21 +49,19 @@ def download_delayed_dataset( def _prepare_download_dataset_as_netcdf( dataset: xarray.Dataset, output_path: pathlib.Path, - netcdf_compression_enabled: bool, - netcdf_compression_level: Optional[int], + netcdf_compression_level: int, netcdf3_compatible: bool, ): logger.debug("Writing dataset to NetCDF") for coord in dataset.coords: dataset[coord].encoding["_FillValue"] = None - if netcdf_compression_enabled: - complevel = ( - 1 if netcdf_compression_level is None else netcdf_compression_level + if netcdf_compression_level > 0: + logger.info( + f"NetCDF compression enabled with level {netcdf_compression_level}" ) - logger.info(f"NetCDF compression enabled with level {complevel}") comp = dict( zlib=True, - complevel=complevel, + complevel=netcdf_compression_level, contiguous=False, shuffle=True, ) diff --git a/copernicusmarine/download_functions/download_arco_series.py b/copernicusmarine/download_functions/download_arco_series.py index c3e0c247..066f4bae 100644 --- a/copernicusmarine/download_functions/download_arco_series.py +++ b/copernicusmarine/download_functions/download_arco_series.py @@ -79,8 +79,7 @@ def download_dataset( file_format: FileFormat, variables: Optional[list[str]], disable_progress_bar: bool, - netcdf_compression_enabled: bool, - netcdf_compression_level: Optional[int], + netcdf_compression_level: int, netcdf3_compatible: bool, service: CopernicusMarineService, dry_run: bool, @@ -145,7 +144,6 @@ def download_dataset( delayed = get_delayed_download( dataset, output_path, - netcdf_compression_enabled, netcdf_compression_level, netcdf3_compatible, ) @@ -219,7 +217,6 @@ def download_zarr( disable_progress_bar=disable_progress_bar, force_download=force_download, overwrite_output_data=subset_request.overwrite_output_data, - netcdf_compression_enabled=subset_request.netcdf_compression_enabled, netcdf_compression_level=subset_request.netcdf_compression_level, netcdf3_compatible=subset_request.netcdf3_compatible, dry_run=subset_request.dry_run, diff --git a/copernicusmarine/python_interface/subset.py b/copernicusmarine/python_interface/subset.py index f58cc3d9..442d3435 100644 --- a/copernicusmarine/python_interface/subset.py +++ b/copernicusmarine/python_interface/subset.py @@ -55,8 +55,7 @@ def subset( dry_run: bool = False, disable_progress_bar: bool = False, staging: bool = False, - netcdf_compression_enabled: bool = False, - netcdf_compression_level: Optional[int] = None, + netcdf_compression_level: int = 0, netcdf3_compatible: bool = False, ) -> ResponseSubset: """ @@ -117,9 +116,6 @@ def subset( MOTU API request string. dry_run : bool, optional If True, runs query without downloading data. - netcdf_compression_enabled : bool, optional - Enable compression level 1 to the NetCDF output file. Use 'netcdf_compression_level' option to customize the - compression level. netcdf_compression_level : int, optional Specify a compression level to apply on the NetCDF output file. A value of 0 means no compression, and 9 is the highest level of compression available. @@ -171,7 +167,6 @@ def subset( dry_run, disable_progress_bar, staging=staging, - netcdf_compression_enabled=netcdf_compression_enabled, netcdf_compression_level=netcdf_compression_level, netcdf3_compatible=netcdf3_compatible, ) diff --git a/doc/usage/subset-usage.rst b/doc/usage/subset-usage.rst index 4c58d955..6b4fb27a 100644 --- a/doc/usage/subset-usage.rst +++ b/doc/usage/subset-usage.rst @@ -52,10 +52,10 @@ The ``--minimum-longitude`` and ``--maximum-longitude`` options work as follows: Note that any longitudes can be requested. The system applies a modulus operation to bring the result between -180° and 360°. For example, a request for [530, 560] will return data for longitudes [170, 200]. -About ``--netcdf-compression-enabled`` and ``--netcdf-compression-level`` options +About ``--netcdf-compression-level`` options """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -If writing data to a NetCDF file (the default format), the ``--netcdf-compression-enabled`` option can be provided to compress the downloaded file. This reduces file size but increases writing time. Without this option, the file is written faster but with a larger size. For Zarr format (`.zarr` extension), the default compression of the Copernicus Marine Data Store is applied, making the download fast and compressed without using ``--netcdf-compression-enabled``. +If writing data to a NetCDF file (the default format), the ``--netcdf-compression-level`` option can be set to compress the downloaded file. This reduces file size but increases writing time. Without this option, the file is written faster but with a larger size. For Zarr format (`.zarr` extension), the default compression of the Copernicus Marine Data Store is applied, making the download fast and compressed without using ``--netcdf-compression-level``. Default NetCDF compression settings for xarray: @@ -63,7 +63,7 @@ Default NetCDF compression settings for xarray: {'zlib': True, 'complevel': 1, 'contiguous': False, 'shuffle': True} -Additionally, you can use the ``--netcdf-compression-level`` option to set a custom compression level between 0 (no compression) and 9 (maximum compression). +Set the ``--netcdf-compression-level`` to a custom compression level between 0 (no compression, by default) and 9 (maximum compression). About ``--netcdf3-compatible`` option """""""""""""""""""""""""""""""""""""""" diff --git a/tests/__snapshots__/test_help_command_interface.ambr b/tests/__snapshots__/test_help_command_interface.ambr index 7b90c7d4..de6ed345 100644 --- a/tests/__snapshots__/test_help_command_interface.ambr +++ b/tests/__snapshots__/test_help_command_interface.ambr @@ -335,9 +335,6 @@ ' --log-level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET]', ' Set the details printed to console by the', ' command (based on standard logging library).', - ' --netcdf-compression-enabled Enable compression level 1 to the NetCDF', - ' output file. Use --netcdf-compression-level', - ' option to customize the compression level', ' --netcdf-compression-level INTEGER RANGE', ' Specify a compression level to apply on the', ' NetCDF output file. A value of 0 means no', diff --git a/tests/test_command_line_interface.py b/tests/test_command_line_interface.py index 1a93c186..38ad44a0 100644 --- a/tests/test_command_line_interface.py +++ b/tests/test_command_line_interface.py @@ -1480,7 +1480,7 @@ def test_netcdf_compression_option(self, tmp_path): filename_zarr_without_option = "filename_without_option.zarr" filename_zarr_with_option = "filename_with_option.zarr" - netcdf_compression_option = "--netcdf-compression-enabled" + netcdf_compression_option = "--netcdf-compression-level" base_command = [ "copernicusmarine", @@ -1616,7 +1616,6 @@ def test_subset_dataset_part_option(self, tmp_path): assert self.output.returncode == 0 def test_netcdf_compression_level(self, tmp_path): - netcdf_compression_enabled_option = "--netcdf-compression-enabled" forced_comp_level = 4 base_command = [ @@ -1649,14 +1648,10 @@ def test_netcdf_compression_level(self, tmp_path): f"{forced_comp_level}", ] - output_without_netcdf_compression_enabled = execute_in_terminal( - base_command - ) output_with_netcdf_compression_enabled = execute_in_terminal( - base_command + [netcdf_compression_enabled_option] + base_command ) - assert output_without_netcdf_compression_enabled.returncode != 0 assert output_with_netcdf_compression_enabled.returncode == 0 filepath = Path(tmp_path / "data.nc") diff --git a/tests/test_python_interface.py b/tests/test_python_interface.py index bcf459c4..7115c8fd 100644 --- a/tests/test_python_interface.py +++ b/tests/test_python_interface.py @@ -263,7 +263,7 @@ def test_subset_keeps_fillvalue_empty_w_compression(self, tmp_path): force_download=True, output_directory=tmp_path, output_filename="netcdf_fillval_compressed.nc", - netcdf_compression_enabled=True, + netcdf_compression_level=1, overwrite_output_data=True, )