Skip to content

Commit

Permalink
chore: compression level as only option (#162)
Browse files Browse the repository at this point in the history
No need to specify `--netcdf-compression-enabled` to be able to compress. Works also as a flag to compress to level 1.
  • Loading branch information
uriii3 authored and renaudjester committed Oct 28, 2024
1 parent 2c564ac commit e685b5c
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 64 deletions.
3 changes: 1 addition & 2 deletions copernicusmarine/catalogue_parser/request_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ class SubsetRequest:
output_directory: pathlib.Path = pathlib.Path(".")
force_download: bool = False
overwrite_output_data: bool = False
netcdf_compression_enabled: bool = False
netcdf_compression_level: Optional[int] = None
netcdf_compression_level: int = 0
netcdf3_compatible: bool = False
dry_run: bool = False

Expand Down
18 changes: 4 additions & 14 deletions copernicusmarine/command_line_interface/group_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,20 +296,12 @@ def cli_subset() -> None:
is_flag=True,
hidden=True,
)
@click.option(
"--netcdf-compression-enabled",
type=bool,
default=False,
is_flag=True,
help=(
"Enable compression level 1 to the NetCDF output file. "
"Use --netcdf-compression-level option to customize the compression "
"level"
),
)
@click.option(
"--netcdf-compression-level",
type=click.IntRange(0, 9),
is_flag=False,
flag_value=1,
default=0,
help=(
"Specify a compression level to apply on the NetCDF output file. "
"A value of 0 means no compression, and 9 is the highest level of "
Expand Down Expand Up @@ -343,8 +335,7 @@ def subset(
coordinates_selection_method: CoordinatesSelectionMethod,
output_filename: Optional[str],
file_format: FileFormat,
netcdf_compression_enabled: bool,
netcdf_compression_level: Optional[int],
netcdf_compression_level: int,
netcdf3_compatible: bool,
service: Optional[str],
create_template: bool,
Expand Down Expand Up @@ -406,7 +397,6 @@ def subset(
dry_run=dry_run,
disable_progress_bar=disable_progress_bar,
staging=staging,
netcdf_compression_enabled=netcdf_compression_enabled,
netcdf_compression_level=netcdf_compression_level,
netcdf3_compatible=netcdf3_compatible,
)
Expand Down
13 changes: 1 addition & 12 deletions copernicusmarine/core_functions/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ def subset_function(
dry_run: bool,
disable_progress_bar: bool,
staging: bool,
netcdf_compression_enabled: bool,
netcdf_compression_level: Optional[int],
netcdf_compression_level: int,
netcdf3_compatible: bool,
) -> ResponseSubset:
VersionVerifier.check_version_subset(staging)
Expand All @@ -80,15 +79,6 @@ def subset_function(
"Data will come from the staging environment."
)

if (
netcdf_compression_level is not None
and netcdf_compression_enabled is False
):
raise ValueError(
"You must provide --netcdf-compression-enabled if you want to use "
"--netcdf-compression-level option"
)

subset_request = SubsetRequest(dataset_id=dataset_id or "")
if request_file:
subset_request.from_file(request_file)
Expand All @@ -115,7 +105,6 @@ def subset_function(
"file_format": file_format,
"force_service": force_service,
"output_directory": output_directory,
"netcdf_compression_enabled": netcdf_compression_enabled,
"netcdf_compression_level": netcdf_compression_level,
"netcdf3_compatible": netcdf3_compatible,
"dry_run": dry_run,
Expand Down
19 changes: 7 additions & 12 deletions copernicusmarine/download_functions/common_download.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import pathlib
from typing import Optional

import xarray
import zarr
Expand All @@ -17,12 +16,11 @@
def get_delayed_download(
dataset: xarray.Dataset,
output_path: pathlib.Path,
netcdf_compression_enabled: bool,
netcdf_compression_level: Optional[int],
netcdf_compression_level: int,
netcdf3_compatible: bool,
):
if output_path.suffix == ".zarr":
if netcdf_compression_enabled:
if netcdf_compression_level > 0:
raise NetCDFCompressionNotAvailable(
"--netcdf-compression-enabled option cannot be used when "
"writing to ZARR"
Expand All @@ -32,7 +30,6 @@ def get_delayed_download(
delayed = _prepare_download_dataset_as_netcdf(
dataset,
output_path,
netcdf_compression_enabled,
netcdf_compression_level,
netcdf3_compatible,
)
Expand All @@ -52,21 +49,19 @@ def download_delayed_dataset(
def _prepare_download_dataset_as_netcdf(
dataset: xarray.Dataset,
output_path: pathlib.Path,
netcdf_compression_enabled: bool,
netcdf_compression_level: Optional[int],
netcdf_compression_level: int,
netcdf3_compatible: bool,
):
logger.debug("Writing dataset to NetCDF")
for coord in dataset.coords:
dataset[coord].encoding["_FillValue"] = None
if netcdf_compression_enabled:
complevel = (
1 if netcdf_compression_level is None else netcdf_compression_level
if netcdf_compression_level > 0:
logger.info(
f"NetCDF compression enabled with level {netcdf_compression_level}"
)
logger.info(f"NetCDF compression enabled with level {complevel}")
comp = dict(
zlib=True,
complevel=complevel,
complevel=netcdf_compression_level,
contiguous=False,
shuffle=True,
)
Expand Down
5 changes: 1 addition & 4 deletions copernicusmarine/download_functions/download_arco_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ def download_dataset(
file_format: FileFormat,
variables: Optional[list[str]],
disable_progress_bar: bool,
netcdf_compression_enabled: bool,
netcdf_compression_level: Optional[int],
netcdf_compression_level: int,
netcdf3_compatible: bool,
service: CopernicusMarineService,
dry_run: bool,
Expand Down Expand Up @@ -145,7 +144,6 @@ def download_dataset(
delayed = get_delayed_download(
dataset,
output_path,
netcdf_compression_enabled,
netcdf_compression_level,
netcdf3_compatible,
)
Expand Down Expand Up @@ -219,7 +217,6 @@ def download_zarr(
disable_progress_bar=disable_progress_bar,
force_download=force_download,
overwrite_output_data=subset_request.overwrite_output_data,
netcdf_compression_enabled=subset_request.netcdf_compression_enabled,
netcdf_compression_level=subset_request.netcdf_compression_level,
netcdf3_compatible=subset_request.netcdf3_compatible,
dry_run=subset_request.dry_run,
Expand Down
7 changes: 1 addition & 6 deletions copernicusmarine/python_interface/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ def subset(
dry_run: bool = False,
disable_progress_bar: bool = False,
staging: bool = False,
netcdf_compression_enabled: bool = False,
netcdf_compression_level: Optional[int] = None,
netcdf_compression_level: int = 0,
netcdf3_compatible: bool = False,
) -> ResponseSubset:
"""
Expand Down Expand Up @@ -117,9 +116,6 @@ def subset(
MOTU API request string.
dry_run : bool, optional
If True, runs query without downloading data.
netcdf_compression_enabled : bool, optional
Enable compression level 1 to the NetCDF output file. Use 'netcdf_compression_level' option to customize the
compression level.
netcdf_compression_level : int, optional
Specify a compression level to apply on the NetCDF output file. A value of 0 means no compression, and 9 is the
highest level of compression available.
Expand Down Expand Up @@ -171,7 +167,6 @@ def subset(
dry_run,
disable_progress_bar,
staging=staging,
netcdf_compression_enabled=netcdf_compression_enabled,
netcdf_compression_level=netcdf_compression_level,
netcdf3_compatible=netcdf3_compatible,
)
6 changes: 3 additions & 3 deletions doc/usage/subset-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,18 @@ The ``--minimum-longitude`` and ``--maximum-longitude`` options work as follows:

Note that any longitudes can be requested. The system applies a modulus operation to bring the result between -180° and 360°. For example, a request for [530, 560] will return data for longitudes [170, 200].

About ``--netcdf-compression-enabled`` and ``--netcdf-compression-level`` options
About ``--netcdf-compression-level`` options
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

If writing data to a NetCDF file (the default format), the ``--netcdf-compression-enabled`` option can be provided to compress the downloaded file. This reduces file size but increases writing time. Without this option, the file is written faster but with a larger size. For Zarr format (`.zarr` extension), the default compression of the Copernicus Marine Data Store is applied, making the download fast and compressed without using ``--netcdf-compression-enabled``.
If writing data to a NetCDF file (the default format), the ``--netcdf-compression-level`` option can be set to compress the downloaded file. This reduces file size but increases writing time. Without this option, the file is written faster but with a larger size. For Zarr format (`.zarr` extension), the default compression of the Copernicus Marine Data Store is applied, making the download fast and compressed without using ``--netcdf-compression-level``.

Default NetCDF compression settings for xarray:

.. code-block:: text
{'zlib': True, 'complevel': 1, 'contiguous': False, 'shuffle': True}
Additionally, you can use the ``--netcdf-compression-level`` option to set a custom compression level between 0 (no compression) and 9 (maximum compression).
Set the ``--netcdf-compression-level`` to a custom compression level between 0 (no compression, by default) and 9 (maximum compression).

About ``--netcdf3-compatible`` option
""""""""""""""""""""""""""""""""""""""""
Expand Down
3 changes: 0 additions & 3 deletions tests/__snapshots__/test_help_command_interface.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -335,9 +335,6 @@
' --log-level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET]',
' Set the details printed to console by the',
' command (based on standard logging library).',
' --netcdf-compression-enabled Enable compression level 1 to the NetCDF',
' output file. Use --netcdf-compression-level',
' option to customize the compression level',
' --netcdf-compression-level INTEGER RANGE',
' Specify a compression level to apply on the',
' NetCDF output file. A value of 0 means no',
Expand Down
9 changes: 2 additions & 7 deletions tests/test_command_line_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ def test_netcdf_compression_option(self, tmp_path):
filename_zarr_without_option = "filename_without_option.zarr"
filename_zarr_with_option = "filename_with_option.zarr"

netcdf_compression_option = "--netcdf-compression-enabled"
netcdf_compression_option = "--netcdf-compression-level"

base_command = [
"copernicusmarine",
Expand Down Expand Up @@ -1616,7 +1616,6 @@ def test_subset_dataset_part_option(self, tmp_path):
assert self.output.returncode == 0

def test_netcdf_compression_level(self, tmp_path):
netcdf_compression_enabled_option = "--netcdf-compression-enabled"
forced_comp_level = 4

base_command = [
Expand Down Expand Up @@ -1649,14 +1648,10 @@ def test_netcdf_compression_level(self, tmp_path):
f"{forced_comp_level}",
]

output_without_netcdf_compression_enabled = execute_in_terminal(
base_command
)
output_with_netcdf_compression_enabled = execute_in_terminal(
base_command + [netcdf_compression_enabled_option]
base_command
)

assert output_without_netcdf_compression_enabled.returncode != 0
assert output_with_netcdf_compression_enabled.returncode == 0

filepath = Path(tmp_path / "data.nc")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_python_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def test_subset_keeps_fillvalue_empty_w_compression(self, tmp_path):
force_download=True,
output_directory=tmp_path,
output_filename="netcdf_fillval_compressed.nc",
netcdf_compression_enabled=True,
netcdf_compression_level=1,
overwrite_output_data=True,
)

Expand Down

0 comments on commit e685b5c

Please sign in to comment.