Skip to content

Commit

Permalink
Add compression and documentation options to netCDF exporter (#352)
Browse files Browse the repository at this point in the history
* add scale, offset, fill_value, datatype and descriptions as option to netCDF exporter

* fix: revert realization to ens_number in createvariable for tests

* add tests using new function argument options
  • Loading branch information
RubenImhoff authored Apr 4, 2024
1 parent bdfa575 commit 1525178
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 22.6.0
rev: 24.3.0
hooks:
- id: black
language_version: python3
57 changes: 52 additions & 5 deletions pysteps/io/exporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,11 @@ def initialize_forecast_exporter_netcdf(
shape,
metadata,
n_ens_members=1,
datatype=np.float32,
incremental=None,
fill_value=None,
scale_factor=None,
offset=None,
**kwargs,
):
"""
Expand Down Expand Up @@ -401,12 +405,35 @@ def initialize_forecast_exporter_netcdf(
n_ens_members: int
Number of ensemble members in the forecast. This argument is ignored if
incremental is set to 'member'.
datatype: np.dtype, optional
The datatype of the output values. Defaults to np.float32.
incremental: {None,'timestep','member'}, optional
Allow incremental writing of datasets into the netCDF files.\n
The available options are: 'timestep' = write a forecast or a forecast
ensemble for a given time step; 'member' = write a forecast sequence
for a given ensemble member. If set to None, incremental writing is
disabled.
fill_value: int, optional
Fill_value for missing data. Defaults to None, which means that the
standard netCDF4 fill_value is used.
scale_factor: float, optional
The scale factor to scale the data as: store_value = scale_factor *
precipitation_value + offset. Defaults to None. The scale_factor
can be used to reduce data storage.
offset: float, optional
The offset to offset the data as: store_value = scale_factor *
precipitation_value + offset. Defaults to None.
Other Parameters
----------------
institution: str
The instute, company or community that has created the nowcast.
Default: the pySTEPS community (https://pysteps.github.io)
references: str
Any references to be included in the netCDF file. Defaults to " ".
comment: str
Any comments about the data or storage protocol that should be
included in the netCDF file. Defaults to " ".
Returns
-------
Expand Down Expand Up @@ -448,18 +475,25 @@ def initialize_forecast_exporter_netcdf(
if n_ens_members > 1:
n_ens_gt_one = True

# Kwargs to be used as description strings in the netCDF
institution = kwargs.get(
"institution", "the pySTEPS community (https://pysteps.github.io)"
)
references = kwargs.get("references", "")
comment = kwargs.get("comment", "")

exporter = {}

outfn = os.path.join(outpath, outfnprefix + ".nc")
ncf = netCDF4.Dataset(outfn, "w", format="NETCDF4")

ncf.Conventions = "CF-1.7"
ncf.title = "pysteps-generated nowcast"
ncf.institution = "the pySTEPS community (https://pysteps.github.io)"
ncf.institution = institution
ncf.source = "pysteps" # TODO(exporters): Add pySTEPS version here
ncf.history = ""
ncf.references = ""
ncf.comment = ""
ncf.references = references
ncf.comment = comment

h, w = shape

Expand Down Expand Up @@ -559,14 +593,22 @@ def initialize_forecast_exporter_netcdf(
if incremental == "member" or n_ens_gt_one:
var_f = ncf.createVariable(
var_name,
np.float32,
datatype=datatype,
dimensions=("ens_number", "time", "y", "x"),
compression="zlib",
zlib=True,
complevel=9,
fill_value=fill_value,
)
else:
var_f = ncf.createVariable(
var_name, np.float32, dimensions=("time", "y", "x"), zlib=True, complevel=9
var_name,
datatype=datatype,
dimensions=("time", "y", "x"),
compression="zlib",
zlib=True,
complevel=9,
fill_value=fill_value,
)

if var_standard_name is not None:
Expand All @@ -576,6 +618,11 @@ def initialize_forecast_exporter_netcdf(
var_f.units = var_unit
if grid_mapping_var_name is not None:
var_f.grid_mapping = grid_mapping_var_name
# Add gain and offset
if scale_factor is not None:
var_f.scale_factor = scale_factor
if offset is not None:
var_f.add_offset = offset

exporter["method"] = "netcdf"
exporter["ncfile"] = ncf
Expand Down
27 changes: 20 additions & 7 deletions pysteps/tests/test_exporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,21 @@
from pysteps.tests.helpers import get_precipitation_fields, get_invalid_mask

# Test arguments
exporter_arg_names = ("n_ens_members", "incremental")
exporter_arg_names = (
"n_ens_members",
"incremental",
"datatype",
"fill_value",
"scale_factor",
"offset",
)

exporter_arg_values = [
(1, None),
(1, "timestep"),
(2, None),
(2, "timestep"),
(2, "member"),
(1, None, np.float32, None, None, None),
(1, "timestep", np.float32, 65535, None, None),
(2, None, np.float32, 65535, None, None),
(2, "timestep", np.float32, None, None, None),
(2, "member", np.float64, None, 0.01, 1.0),
]


Expand All @@ -46,7 +53,9 @@ def test_get_geotiff_filename():


@pytest.mark.parametrize(exporter_arg_names, exporter_arg_values)
def test_io_export_netcdf_one_member_one_time_step(n_ens_members, incremental):
def test_io_export_netcdf_one_member_one_time_step(
n_ens_members, incremental, datatype, fill_value, scale_factor, offset
):
"""
Test the export netcdf.
Also, test that the exported file can be read by the importer.
Expand Down Expand Up @@ -78,7 +87,11 @@ def test_io_export_netcdf_one_member_one_time_step(n_ens_members, incremental):
shape,
metadata,
n_ens_members=n_ens_members,
datatype=datatype,
incremental=incremental,
fill_value=fill_value,
scale_factor=scale_factor,
offset=offset,
)

if n_ens_members > 1:
Expand Down

0 comments on commit 1525178

Please sign in to comment.