Cf checker (#127)

Co-authored-by: Renaud <[email protected]> Add a compliance checker to make sure that the netcdf output files are as expected.
mercator-ocean · Oct 28, 2024 · 0cd9b5e · 0cd9b5e
1 parent 4787a37
commit 0cd9b5e
Show file tree

Hide file tree

Showing 6 changed files with 112 additions and 2 deletions.
diff --git a/ARCO_smaller_area_subset_method_default.nc b/ARCO_smaller_area_subset_method_default.nc
diff --git a/conda_environment_test.yaml b/conda_environment_test.yaml
@@ -7,6 +7,7 @@ dependencies:
   - tox==4.11.4
   - netcdf4==1.6.5
   - syrupy==4.6.1
+  - compliance-checker==5.1.1
   - pip:
     - pytest-order==1.2.1
     - freezegun==1.5.1
diff --git a/copernicusmarine/download_functions/subset_xarray.py b/copernicusmarine/download_functions/subset_xarray.py
@@ -434,10 +434,12 @@ def _adequate_dtypes_of_valid_minmax(
     dataset: xarray.Dataset, variable: str
 ) -> xarray.Dataset:
     dataset[variable].attrs["valid_min"] = numpy.array(
-        [dataset[variable].attrs["valid_min"]], dtype=dataset[variable].dtype
+        [dataset[variable].attrs["valid_min"]],
+        dtype=dataset[variable].encoding["dtype"],
     )[0]
     dataset[variable].attrs["valid_max"] = numpy.array(
-        [dataset[variable].attrs["valid_max"]], dtype=dataset[variable].dtype
+        [dataset[variable].attrs["valid_max"]],
+        dtype=dataset[variable].encoding["dtype"],
     )[0]
     return dataset
 

diff --git a/example.py b/example.py
diff --git a/tests/__snapshots__/test_cf_compliance.ambr b/tests/__snapshots__/test_cf_compliance.ambr
@@ -0,0 +1,31 @@
+# serializer version: 1
+# name: TestCFCompliance.test_subset_open
+  'cmems_mod_nws_bgc-pft_my_7km-3D-pico_P1M-m'
+# ---
+# name: TestCFCompliance.test_subset_open.1
+  160
+# ---
+# name: TestCFCompliance.test_subset_open.2
+  160
+# ---
+# name: TestCFCompliance.test_subset_open.3
+  list([
+  ])
+# ---
+# name: TestCFCompliance.test_subset_with_warns
+  'cmems_obs-sst_med_phy-sst_nrt_diurnal-oi-0.0625deg_PT1H-m'
+# ---
+# name: TestCFCompliance.test_subset_with_warns.1
+  135
+# ---
+# name: TestCFCompliance.test_subset_with_warns.2
+  136
+# ---
+# name: TestCFCompliance.test_subset_with_warns.3
+  list([
+    '§2.6 Attributes',
+    list([
+      '§2.6.1 Conventions global attribute does not contain "CF-1.6"',
+    ]),
+  ])
+# ---
diff --git a/tests/test_cf_compliance.py b/tests/test_cf_compliance.py
@@ -0,0 +1,76 @@
+import json
+
+import xarray
+
+from copernicusmarine import subset
+from tests.test_utils import execute_in_terminal
+
+
+class TestCFCompliance:
+    def test_subset_open(self, tmp_path, snapshot):
+        dataset_id = "cmems_mod_nws_bgc-pft_my_7km-3D-pico_P1M-m"
+        self.if_I_subset_a_dataset(dataset_id, tmp_path, "output_1.nc", "pico")
+        self.then_it_is_cf_compliant(
+            dataset_id, tmp_path, snapshot, "output_1"
+        )
+
+    def test_subset_with_warns(self, tmp_path, snapshot):
+        dataset_id = (
+            "cmems_obs-sst_med_phy-sst_nrt_diurnal-oi-0.0625deg_PT1H-m"
+        )
+        self.if_I_subset_a_dataset(
+            dataset_id,
+            tmp_path,
+            "output_2.nc",
+            "analysed_sst",
+        )
+        self.then_it_is_cf_compliant(
+            dataset_id, tmp_path, snapshot, "output_2"
+        )
+
+    def if_I_subset_a_dataset(
+        self, dataset_id, tmp_path, output_filename, variable
+    ):
+        subset(
+            dataset_id=dataset_id,
+            variables=[variable],
+            output_directory=tmp_path,
+            output_filename=output_filename,
+            start_datetime="2022-01-01T00:00:00",
+            end_datetime="2022-01-05T00:00:00",
+            force_download=True,
+        )
+        assert (tmp_path / output_filename).exists()
+
+    def then_it_is_cf_compliant(
+        self, dataset_id, tmp_path, snapshot, output_filename
+    ):
+        dataset_id = dataset_id
+        dataset = xarray.open_dataset(f"{tmp_path}/{output_filename}.nc")
+        CF_convention = dataset.attrs["Conventions"][-3:]
+        if CF_convention < "1.6":
+            CF_convention = "1.6"
+        command = [
+            "compliance-checker",
+            f"--test=cf:{CF_convention}",
+            f"{tmp_path}/{output_filename}.nc",
+            "-f",
+            "json",
+            "-o",
+            f"{tmp_path}/{output_filename}_checked.json",
+        ]
+        execute_in_terminal(command)
+
+        f = open(f"{tmp_path}/{output_filename}_checked.json")
+        data = json.load(f)
+
+        list_msgs = []
+        for diccionari in data[f"cf:{CF_convention}"]["all_priorities"]:
+            if len(diccionari["msgs"]) > 0:
+                list_msgs.append(diccionari["name"])
+                list_msgs.append(diccionari["msgs"])
+
+        assert dataset_id == snapshot
+        assert data[f"cf:{CF_convention}"]["scored_points"] == snapshot
+        assert data[f"cf:{CF_convention}"]["possible_points"] == snapshot
+        assert list_msgs == snapshot