Skip to content

Commit

Permalink
chore: updating the attributes for the variables, coordinates and dat…
Browse files Browse the repository at this point in the history
…aset (#93)
  • Loading branch information
uriii3 authored and renaudjester committed Aug 20, 2024
1 parent 1b98eb3 commit ed29bf8
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 17 deletions.
1 change: 1 addition & 0 deletions copernicusmarine/core_functions/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def describe_function(
disable_progress_bar: bool,
staging: bool,
) -> str:

VersionVerifier.check_version_describe(staging)
if staging:
logger.warning(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ def download_dataset(
output_path = pathlib.Path(output_directory, filename)
if not output_directory.is_dir():
pathlib.Path.mkdir(output_directory, parents=True)

if not force_download:
logger.info(dataset)
logger.info(
Expand Down
72 changes: 67 additions & 5 deletions copernicusmarine/download_functions/subset_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,36 @@
"depth": ["depth", "deptht", "elevation"],
}

NETCDF_CONVENTION_VARIABLE_ATTRIBUTES = [
"standard_name",
"long_name",
"units",
"unit_long",
"valid_min",
"valid_max",
]
NETCDF_CONVENTION_COORDINATE_ATTRIBUTES = [
"standard_name",
"long_name",
"units",
"unit_long",
"axis",
"valid_min",
"valid_max",
]
NETCDF_CONVENTION_DATASET_ATTRIBUTES = [
"title",
"institution",
"source",
"history",
"references",
"comment",
"Conventions",
"producer",
"credit",
"contact",
]


def _dataset_custom_sel(
dataset: xarray.Dataset,
Expand Down Expand Up @@ -280,6 +310,16 @@ def _get_variable_name_from_standard_name(
return None


def _update_variables_attributes(
dataset: xarray.Dataset, variables: List[str]
) -> xarray.Dataset:
for variable in variables:
dataset[variable].attrs = _filter_attributes(
dataset[variable].attrs, NETCDF_CONVENTION_VARIABLE_ATTRIBUTES
)
return dataset


def _variables_subset(
dataset: xarray.Dataset, variables: List[str]
) -> xarray.Dataset:
Expand All @@ -298,17 +338,26 @@ def _variables_subset(
)
else:
raise VariableDoesNotExistInTheDataset(variable)
return dataset[numpy.array(dataset_variables_filter)]
dataset = dataset[numpy.array(dataset_variables_filter)]
return _update_variables_attributes(dataset, dataset_variables_filter)


def _filter_attributes(attributes: dict, attributes_to_keep: List[str]):
attributes_that_exist = set(attributes).intersection(attributes_to_keep)
return {key: attributes[key] for key in attributes_that_exist}


def _update_dataset_coordinate_valid_minmax_attributes(
def _update_dataset_coordinate_attributes(
dataset: xarray.Dataset,
) -> xarray.Dataset:
for coordinate_label in COORDINATES_LABEL:
for coordinate_alias in COORDINATES_LABEL[coordinate_label]:
if coordinate_alias in dataset.sizes:
coord = dataset[coordinate_alias]
attrs = coord.attrs
coordinate_attributes = (
NETCDF_CONVENTION_COORDINATE_ATTRIBUTES.copy()
)
if "time" in coordinate_label:
min_time_dimension = coord.values.min()
max_time_dimension = coord.values.max()
Expand All @@ -319,13 +368,26 @@ def _update_dataset_coordinate_valid_minmax_attributes(
valid_max = convert_datetime64_to_netcdf_timestamp(
max_time_dimension, netcdf_unit
)
attrs["standard_name"] = "time"
attrs["long_name"] = "Time"
attrs["valid_min"] = valid_min
attrs["valid_max"] = valid_max
else:
attrs["axis"] = "T"
attrs["unit_long"] = (
coord.encoding["units"].replace("_", " ").title()
)
coordinate_attributes.remove("units")
elif coordinate_label in ["latitude", "depth", "elevation"]:
attrs["valid_min"] = coord.values.min()
attrs["valid_max"] = coord.values.max()
elif coordinate_label == "longitude":
coordinate_attributes.remove("valid_min")
coordinate_attributes.remove("valid_max")
coord.attrs = _filter_attributes(attrs, coordinate_attributes)

coord.attrs = attrs
dataset.attrs = _filter_attributes(
dataset.attrs, NETCDF_CONVENTION_DATASET_ATTRIBUTES
)

return dataset

Expand All @@ -351,7 +413,7 @@ def subset(

dataset = _depth_subset(dataset, depth_parameters)

dataset = _update_dataset_coordinate_valid_minmax_attributes(dataset)
dataset = _update_dataset_coordinate_attributes(dataset)

return dataset

Expand Down
12 changes: 4 additions & 8 deletions tests/test_command_line_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,6 @@ def test_if_dataset_coordinate_valid_minmax_attributes_are_setted(

assert dataset.latitude.attrs["valid_min"] >= 0
assert dataset.latitude.attrs["valid_max"] <= 0.1
assert dataset.longitude.attrs["valid_min"] >= 0.2
assert dataset.longitude.attrs["valid_max"] <= 0.3
assert dataset.depth.attrs["valid_min"] >= 0
assert dataset.depth.attrs["valid_max"] <= 5
assert dataset.time.attrs["valid_min"] == 648672
Expand Down Expand Up @@ -1093,26 +1091,24 @@ def then_I_have_correct_sign_for_depth_coordinates_values(
assert dataset.elevation.min() >= -10
assert dataset.elevation.max() <= 0

def then_I_have_correct_attribute_value(
self, output_path, dimention_name, attribute_value
):
def then_I_have_correct_attribute_value(self, output_path, dimention_name):
filepath = pathlib.Path(output_path, "data.zarr")
dataset = xarray.open_dataset(filepath, engine="zarr")
assert dataset[dimention_name].attrs["positive"] == attribute_value
assert dataset[dimention_name].attrs["standard_name"] == dimention_name

def test_conversion_between_elevation_and_depth(self, tmp_path):
self.when_I_request_subset_dataset_with_zarr_service(tmp_path, True)
self.then_I_have_correct_sign_for_depth_coordinates_values(
tmp_path, "positive"
)
self.then_I_have_correct_attribute_value(tmp_path, "depth", "down")
self.then_I_have_correct_attribute_value(tmp_path, "depth")

def test_force_no_conversion_between_elevation_and_depth(self, tmp_path):
self.when_I_request_subset_dataset_with_zarr_service(tmp_path, False)
self.then_I_have_correct_sign_for_depth_coordinates_values(
tmp_path, "negative"
)
self.then_I_have_correct_attribute_value(tmp_path, "elevation", "up")
self.then_I_have_correct_attribute_value(tmp_path, "elevation")

def when_I_run_copernicus_marine_command_using_no_directories_option(
self, tmp_path, force_service: GetServiceToTest, output_directory=None
Expand Down
7 changes: 4 additions & 3 deletions tests/test_python_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def test_subset_modify_attr_for_depth(self):
dataset = open_dataset(
dataset_id="cmems_mod_arc_phy_anfc_6km_detided_P1D-m"
)
assert dataset.depth.attrs["positive"] == "down"
assert dataset.depth.attrs["standard_name"] == "depth"
assert dataset.depth.attrs["long_name"] == "Depth"

Expand Down Expand Up @@ -235,7 +234,10 @@ def test_subset_keeps_fillvalue_empty(self, tmp_path):
assert "_FillValue" not in subsetdata.time.attrs
assert "_FillValue" not in subsetdata.latitude.attrs
assert "_FillValue" not in subsetdata.depth.attrs
assert "valid_max" in subsetdata.longitude.attrs
assert "valid_max" not in subsetdata.longitude.attrs
assert "valid_min" not in subsetdata.longitude.attrs
assert "valid_max" in subsetdata.latitude.attrs
assert "valid_max" in subsetdata.latitude.attrs
assert subsetdata.time.attrs["calendar"] == "gregorian"
assert subsetdata.time.attrs["units"] == "hours since 1950-01-01"

Expand Down Expand Up @@ -265,7 +267,6 @@ def test_subset_keeps_fillvalue_empty_w_compression(self, tmp_path):
assert "_FillValue" not in subsetdata.time.attrs
assert "_FillValue" not in subsetdata.latitude.attrs
assert "_FillValue" not in subsetdata.depth.attrs
assert "valid_max" in subsetdata.longitude.attrs
assert subsetdata.time.attrs["calendar"] == "gregorian"
assert subsetdata.time.attrs["units"] == "hours since 1950-01-01"

Expand Down

0 comments on commit ed29bf8

Please sign in to comment.