From 1683d1626ad5ad4381117227bd79804598b69a80 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:08:16 +0100 Subject: [PATCH 01/27] add DANRA projection information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit created with ``` globe=ccrs.Globe(semimajor_axis=6367470,semiminor_axis=6367470) 4:41 ccrs.LambertConformal(central_longitude=proj_inf[‘LoV’]/1e6,standard_parallels=(proj_inf[‘Latin1’]/1e6,proj_inf[‘Latin2’]/1e6), central_latitude=((proj_inf[‘Latin1’]/1e6+proj_inf[‘Latin2’]/1e6))/2 ,false_easting=0, false_northing=0, globe=globe).to_cf() ``` --- example.danra.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/example.danra.yaml b/example.danra.yaml index 73aa0df..f7b61c1 100644 --- a/example.danra.yaml +++ b/example.danra.yaml @@ -35,13 +35,37 @@ inputs: dims: [time, x, y, altitude] variables: u: + attrs: + grid_mapping: crs altitude: values: [100,] units: m v: + attrs: + grid_mapping: crs altitude: values: [100, ] units: m + projections: + crs: + dims: [x, y] + attributes: + crs_wkt: 'PROJCRS["DMI HARMONIE DANRA lambert projection",BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS",DATUM["DMI HARMONIE DANRA lambert datum",ELLIPSOID["Sphere",6367470,0,LENGTHUNIT["metre",1,ID["EPSG",9001]]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8901]]],CONVERSION["unknown",METHOD["Lambert Conic Conformal (2SP)",ID["EPSG",9802]],PARAMETER["Latitude of false origin",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],PARAMETER["Longitude of false origin",0.025,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],PARAMETER["Latitude of 1st standard parallel",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],PARAMETER["Latitude of 2nd standard parallel",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],PARAMETER["Easting at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8826]],PARAMETER["Northing at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],AXIS["(E)",east,ORDER[1],LENGTHUNIT["metre",1,ID["EPSG",9001]]],AXIS["(N)",north,ORDER[2],LENGTHUNIT["metre",1,ID["EPSG",9001]]]]' + semi_major_axis: 6367470.0 + semi_minor_axis: 6367470.0 + inverse_flattening: 0.0 + reference_ellipsoid_name: 'Sphere' + longitude_of_prime_meridian: 0.0 + prime_meridian_name: 'Greenwich' + geographic_crs_name: 'DMI HARMONIE DANRA lambert CRS' + horizontal_datum_name: 'DMI HARMONIE DANRA lambert datum' + projected_crs_name: 'DMI HARMONIE DANRA lambert projection' + grid_mapping_name: 'lambert_conformal_conic' + standard_parallel: [0.0567, 0.0567] + latitude_of_projection_origin: 0.0567 + longitude_of_central_meridian: 0.025 + false_easting: 0.0 + false_northing: 0.0 dim_mapping: time: method: rename From aa87576c4109409a7413f0710693fea4a3d3a248 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 13 Nov 2024 17:51:29 +0100 Subject: [PATCH 02/27] add projection handlers --- mllam_data_prep/ops/projection.py | 231 ++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 mllam_data_prep/ops/projection.py diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py new file mode 100644 index 0000000..efd59bf --- /dev/null +++ b/mllam_data_prep/ops/projection.py @@ -0,0 +1,231 @@ +"""Collection of operators handling projection information.""" + +from typing import Any, Dict, List, Tuple, Union + +import pyproj +import xarray as xr +from pyproj import CRS + + +class ProjectionInconsistencyWarning(Warning): + pass + + +def _extract_grid_mapping_names(grid_mapping_attr: str) -> List[str]: + """Extract grid mapping names from the grid_mapping attribute. + + Parameters + ---------- + grid_mapping_attr : str + The grid_mapping attribute. + + Returns + ------- + list + List of grid mapping names. + + Examples + -------- + >>> m1 = "crsOSGB: x y crsWGS84: lat lon" + >>> _extract_grid_mapping_names(m1) + ['crsOSGB', 'crsWGS84'] + >>> m2 = "crsOSGB" + >>> _extract_grid_mapping_names(m2) + ['crsOSGB'] + + Note: merge of https://github.com/pydata/xarray/pull/9765 of will allow xarray to handle this directly. + """ + if ":" not in grid_mapping_attr: + return grid_mapping_attr.split(" ") + else: + return [ + key.split(":")[0].strip() + for key in grid_mapping_attr.split(" ") + if ":" in key + ] + + +def _get_projection_mappings(dataset: xr.Dataset) -> Dict[str, Any]: + """Get projections referenced by variables. + + This function extracts the projection variables from a dataset + by evaluating the grid_mapping attribute of each variable following + the CF-Conventions. + + Parameters + ---------- + dataset : xr.Dataset + Dataset to get the projection information from + + Returns + ------- + Dict[str, str] + Dictionary with the variable names as keys and the projection + variable names as values + """ + projections = {} + for var in dataset.variables: + if "grid_mapping" in dataset[var].attrs: + gm = _extract_grid_mapping_names(dataset[var].attrs["grid_mapping"]) + projections[var] = gm + return projections + + +def validate_projection_consistency( + projections: List[Dict[str, Union[str, dict]]] +) -> None: + """Validate the consistency of the projection information. + + Examples + -------- + >>> crs_wkt = ( + ... 'GEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble",' + ... 'MEMBER["World Geodetic System 1984 (Transit)"],' + ... 'MEMBER["World Geodetic System 1984 (G730)"],' + ... 'MEMBER["World Geodetic System 1984 (G873)"],' + ... 'MEMBER["World Geodetic System 1984 (G1150)"],' + ... 'MEMBER["World Geodetic System 1984 (G1674)"],' + ... 'MEMBER["World Geodetic System 1984 (G1762)"],' + ... 'MEMBER["World Geodetic System 1984 (G2139)"],' + ... 'MEMBER["World Geodetic System 1984 (G2296)"],' + ... 'ELLIPSOID["WGS 84",6378137,298.257223563,' + ... 'LENGTHUNIT["metre",1]],ENSEMBLEACCURACY[2.0]],' + ... 'PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],' + ... 'CS[ellipsoidal,2],AXIS["geodetic latitude (Lat)",north,' + ... 'ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],' + ... 'AXIS["geodetic longitude (Lon)",east,ORDER[2],' + ... 'ANGLEUNIT["degree",0.0174532925199433]],' + ... 'USAGE[SCOPE["Horizontal component of 3D system."],' + ... 'AREA["World."],BBOX[-90,-180,90,180]],ID["EPSG",4326]]' + ... ) + >>> proj1 = {'crs_wkt': crs_wkt} + >>> proj2 = {"crs_wkt": "EPSG:4326"} + >>> proj3 = { + ... "crs_wkt": crs_wkt, + ... 'semi_major_axis': 6378137.0, + ... 'semi_minor_axis': 6356752.314245179, + ... 'inverse_flattening': 298.257223563, + ... 'reference_ellipsoid_name': 'WGS 84', + ... 'longitude_of_prime_meridian': 0.0, + ... 'prime_meridian_name': 'Greenwich', + ... 'geographic_crs_name': 'WGS 84', + ... 'horizontal_datum_name': 'World Geodetic System 1984 ensemble', + ... 'grid_mapping_name': 'latitude_longitude' + ... } + >>> proj4 = {"crs_wkt": "AGD84"} + >>> validate_projection_consistency([proj1, proj2, proj3]) + Traceback (most recent call last): + ... + mllam_data_prep.ops.projection.ProjectionInconsistencyWarning: ["Key 'semi_major_axis' is missing ... + >>> validate_projection_consistency([proj2, proj4]) + Traceback (most recent call last): + ... + ValueError: Multiple projections found in the dataset.Currently only one projection is supported. + """ + proj_obs = [pyproj.CRS.from_cf(proj) for proj in projections] + + # Check that all projections are the same + if len(set(proj_obs)) > 1: + raise ValueError( + "Multiple projections found in the dataset." + "Currently only one projection is supported." + ) + + # Check that conversion to CF is consistent with input + # and all projection information is given + cf_output = set(proj_obs).pop().to_cf() + issues = [] + for proj in projections: + cf_wkt_set = "crs_wkt" in cf_output + cf_indv_attrs_set = set(cf_output.keys()) - {"crs_wkt"} + for key, value in cf_output.items(): + if key == "crs_wkt" and cf_wkt_set and value != proj["crs_wkt"]: + issues.append(f"'{key}' differs: {proj[key]} != {value}") + elif key != "crs_wkt" and cf_indv_attrs_set and key not in proj: + issues.append(f"Key '{key}' is missing in the projection information.") + elif key == "crs_wkt" and cf_indv_attrs_set and value != proj[key]: + issues.append(f"Value for key '{key}' differs: {proj[key]} != {value}") + for key in proj: + if key not in cf_output: + issues.append( + f"Key '{key}' is not expected in the projection information." + ) + if issues: + raise ProjectionInconsistencyWarning(issues) + + return + + +def get_projection(ds: xr.Dataset) -> Dict[str, Any]: + """Get the projection information from the dataset. + + Parameters + ---------- + ds : xr.Dataset + CF-conform dataset to extract projection information from. + `grid_mapping` attribute must be set for each variable that + references a projection variable. + Projection variables must have either a `crs_wkt` attribute + or CF-conform single-value attributes with individual projection + information. + + Returns + ------- + Dict[str, Any] + Projection information as a dictionary with the projection + variable names as keys and the projection objects as values + """ + vars_w_proj = _get_projection_mappings(ds) + proj_vars = set([proj for proj in vars_w_proj.values()]) + vars_wo_proj = set(ds.variables) - set(vars_w_proj.keys()) - proj_vars + + if len(proj_vars) > 1: + raise ValueError( + f"Multiple referenced projections found in the dataset {ds.encoding.get('source',None)}: {proj_vars}. " + "Currently only one projection is supported." + ) + + if len(vars_wo_proj) > 0: + raise ValueError( + f"Variables {vars_wo_proj} do not have a projection defined in the dataset {ds.encoding.get('source',None)}" + ) + + proj_objs = {} + for proj in proj_vars: + proj_objs[proj] = pyproj.CRS.from_cf(ds[proj].attrs) + + return proj_objs + + +def get_latitude_longitude_from_projection( + proj: CRS, coords: Tuple[Any, Any], output_proj: CRS = None +) -> Tuple[Any, Any]: + """Get the latitude and longitude from a projection object. + + Parameters + ---------- + proj : CRS + Projection object. + coords : Tuple[array-like, array-like] + Coordinates to convert. + + Returns + ------- + Tuple[array-like, array-like] + Latitude and longitude in degrees. + + Examples + -------- + >>> proj = pyproj.CRS.from_cf({"crs_wkt":"EPSG:3857"}) + >>> coords = (500000, 4649776) + >>> get_latitude_longitude_from_projection(proj, coords) + (38.496303121059576, 4.491576420597607) + >>> coords = ([400000, 500000], [3649776, 4649776]) + >>> get_latitude_longitude_from_projection(proj, coords) + ([31.13101769562677, 38.496303121059576], [3.5932611364780858, 4.491576420597607]) + """ + if output_proj is None: + output_proj = pyproj.CRS("EPSG:4326") + + transformer = pyproj.Transformer.from_crs(proj, output_proj) + return transformer.transform(coords[0], coords[1]) From 4826e20d2b6820b22e5995a0ae36e5e3e4b647d3 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:34:10 +0100 Subject: [PATCH 03/27] add doctest CI --- .github/workflows/python-package-pip.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/python-package-pip.yml b/.github/workflows/python-package-pip.yml index 0a1c5bb..0a72d83 100644 --- a/.github/workflows/python-package-pip.yml +++ b/.github/workflows/python-package-pip.yml @@ -24,6 +24,10 @@ jobs: python -m pip install . python -m pip install pytest + - name: Run doctests + run: | + python -m python --doctest-modules mllam_data_prep/ + - name: Run tests env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} From b67998d46cc49ba92953556ea5e9e3142167c657 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:36:38 +0100 Subject: [PATCH 04/27] add pyproj dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index ad8c668..9da20f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dependencies = [ "rich>=13.7.1", "dask>=2024.2.1", "psutil>=5.7.2", + "pyproj>=3.7.0", ] requires-python = ">=3.9" readme = "README.md" From ba4caa7988de12bc0412656140252f3f77865ae1 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 13 Nov 2024 19:07:50 +0100 Subject: [PATCH 05/27] save proj source instead of object --- mllam_data_prep/ops/projection.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index efd59bf..682ef92 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -156,7 +156,7 @@ def validate_projection_consistency( return -def get_projection(ds: xr.Dataset) -> Dict[str, Any]: +def get_projection_crs(ds: xr.Dataset) -> Dict[str, Any]: """Get the projection information from the dataset. Parameters @@ -190,11 +190,11 @@ def get_projection(ds: xr.Dataset) -> Dict[str, Any]: f"Variables {vars_wo_proj} do not have a projection defined in the dataset {ds.encoding.get('source',None)}" ) - proj_objs = {} - for proj in proj_vars: - proj_objs[proj] = pyproj.CRS.from_cf(ds[proj].attrs) + crss = {} + for crs in crss: + crss[crs] = ds[crs].attrs # pyproj.CRS.from_cf(ds[proj].attrs) - return proj_objs + return crss def get_latitude_longitude_from_projection( From 9244fdf983a254090fc30f279df9c5defd1aaca2 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 13 Nov 2024 19:09:43 +0100 Subject: [PATCH 06/27] add projection info to output --- mllam_data_prep/create_dataset.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py index ad14704..272ac88 100644 --- a/mllam_data_prep/create_dataset.py +++ b/mllam_data_prep/create_dataset.py @@ -4,6 +4,7 @@ from pathlib import Path import numpy as np +import pyproj import xarray as xr from loguru import logger from numcodecs import Blosc @@ -12,6 +13,11 @@ from .config import Config, InvalidConfigException from .ops.loading import load_and_subset_dataset from .ops.mapping import map_dims_and_variables +from .ops.projection import ( + ProjectionInconsistencyWarning, + get_projection_crs, + validate_projection_consistency, +) from .ops.selection import select_by_kwargs from .ops.statistics import calc_stats @@ -106,6 +112,7 @@ def create_dataset(config: Config): output_coord_ranges = output_config.coord_ranges dataarrays_by_target = defaultdict(list) + projections = [] for dataset_name, input_config in config.inputs.items(): path = input_config.path @@ -158,6 +165,10 @@ def create_dataset(config: Config): da_target.attrs["source_dataset"] = dataset_name + # get the projection information from the dataset and update it with the projection + # information given in the input config + projections.append(get_projection_crs(ds)) + # only need to do selection for the coordinates that the input dataset actually has if output_coord_ranges is not None: selection_kwargs = {} @@ -168,6 +179,17 @@ def create_dataset(config: Config): dataarrays_by_target[target_output_var].append(da_target) + # validate projections across input datasets + try: + validate_projection_consistency(projections) + except ProjectionInconsistencyWarning as e: + logger.warning(f"Projection information might be ambiguous: {e}") + projection = pyproj.CRS.from_cf(set(projections).pop()) + + # TODO: generalize the retrieval of x and y coords + # coords = (dataarrays_by_target[target_output_var]['x'], dataarrays_by_target[target_output_var]['y']) + # lat, lon = get_latitude_longitude_from_projection(projection, coords) + ds = _merge_dataarrays_by_target(dataarrays_by_target=dataarrays_by_target) # need to drop the encoding so that we can write to zarr with new chunksizes @@ -214,6 +236,11 @@ def create_dataset(config: Config): ) ds["splits"] = da_splits + # add the projection information to the dataset + ds["crs"] = projection.to_cf() + for var in ds.data_vars: + ds[var].attrs["grid_mapping"] = "crs" + ds.attrs = {} ds.attrs["schema_version"] = config.schema_version ds.attrs["dataset_version"] = config.dataset_version From 8dd485ad244721a432aa5d9523cb82763836f7d2 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 13 Nov 2024 19:27:06 +0100 Subject: [PATCH 07/27] ignore coordinates for projection evaluation --- mllam_data_prep/ops/projection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index 682ef92..5c23849 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -177,7 +177,7 @@ def get_projection_crs(ds: xr.Dataset) -> Dict[str, Any]: """ vars_w_proj = _get_projection_mappings(ds) proj_vars = set([proj for proj in vars_w_proj.values()]) - vars_wo_proj = set(ds.variables) - set(vars_w_proj.keys()) - proj_vars + vars_wo_proj = set(ds.data_vars) - set(vars_w_proj.keys()) - proj_vars if len(proj_vars) > 1: raise ValueError( From a8eacc4197e29c0418886f3c389c9d360b4e254d Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Wed, 13 Nov 2024 21:59:51 +0100 Subject: [PATCH 08/27] fix doctest --- .github/workflows/python-package-pip.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package-pip.yml b/.github/workflows/python-package-pip.yml index 0a72d83..cfccb7c 100644 --- a/.github/workflows/python-package-pip.yml +++ b/.github/workflows/python-package-pip.yml @@ -26,7 +26,7 @@ jobs: - name: Run doctests run: | - python -m python --doctest-modules mllam_data_prep/ + python -m pytest --doctest-modules mllam_data_prep/ - name: Run tests env: From db737aba488f7eda47fea379ef2938ed01e84b28 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 09:01:23 +0100 Subject: [PATCH 09/27] allow slight rounding deviations in doctest --- mllam_data_prep/ops/projection.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index 5c23849..eaad945 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -216,13 +216,15 @@ def get_latitude_longitude_from_projection( Examples -------- + >>> import numpy as np >>> proj = pyproj.CRS.from_cf({"crs_wkt":"EPSG:3857"}) >>> coords = (500000, 4649776) - >>> get_latitude_longitude_from_projection(proj, coords) - (38.496303121059576, 4.491576420597607) + >>> np.round(get_latitude_longitude_from_projection(proj, coords),3) + array([38.496, 4.492]) >>> coords = ([400000, 500000], [3649776, 4649776]) - >>> get_latitude_longitude_from_projection(proj, coords) - ([31.13101769562677, 38.496303121059576], [3.5932611364780858, 4.491576420597607]) + >>> np.round(get_latitude_longitude_from_projection(proj, coords),3) + array([[31.131, 38.496], + [ 3.593, 4.492]]) """ if output_proj is None: output_proj = pyproj.CRS("EPSG:4326") From 44dff49e315edbbf49f23c3b5abab0adf0688282 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 10:44:19 +0100 Subject: [PATCH 10/27] make projections optional --- mllam_data_prep/create_dataset.py | 32 +++++++++++++++++++------------ mllam_data_prep/ops/projection.py | 5 +++-- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py index 272ac88..ebbdac2 100644 --- a/mllam_data_prep/create_dataset.py +++ b/mllam_data_prep/create_dataset.py @@ -167,7 +167,13 @@ def create_dataset(config: Config): # get the projection information from the dataset and update it with the projection # information given in the input config - projections.append(get_projection_crs(ds)) + # TODO: remove this once grid_mapping is set in config file + for var in ds.data_vars: + ds[var].attrs["grid_mapping"] = "crs" + # TODO: read the projection information from the config + projection_crs = get_projection_crs(ds) + if projection_crs is not None: + projections.append(projection_crs) # only need to do selection for the coordinates that the input dataset actually has if output_coord_ranges is not None: @@ -180,15 +186,16 @@ def create_dataset(config: Config): dataarrays_by_target[target_output_var].append(da_target) # validate projections across input datasets - try: - validate_projection_consistency(projections) - except ProjectionInconsistencyWarning as e: - logger.warning(f"Projection information might be ambiguous: {e}") - projection = pyproj.CRS.from_cf(set(projections).pop()) + if projections: + try: + validate_projection_consistency(projections) + except ProjectionInconsistencyWarning as e: + logger.warning(f"Projection information might be ambiguous: {e}") + projection = pyproj.CRS.from_cf(set(projections).pop()) - # TODO: generalize the retrieval of x and y coords - # coords = (dataarrays_by_target[target_output_var]['x'], dataarrays_by_target[target_output_var]['y']) - # lat, lon = get_latitude_longitude_from_projection(projection, coords) + # TODO: generalize the retrieval of x and y coords + # coords = (dataarrays_by_target[target_output_var]['x'], dataarrays_by_target[target_output_var]['y']) + # lat, lon = get_latitude_longitude_from_projection(projection, coords) ds = _merge_dataarrays_by_target(dataarrays_by_target=dataarrays_by_target) @@ -237,9 +244,10 @@ def create_dataset(config: Config): ds["splits"] = da_splits # add the projection information to the dataset - ds["crs"] = projection.to_cf() - for var in ds.data_vars: - ds[var].attrs["grid_mapping"] = "crs" + if projections: + ds["crs"] = projection.to_cf() + for var in ds.data_vars: + ds[var].attrs["grid_mapping"] = "crs" ds.attrs = {} ds.attrs["schema_version"] = config.schema_version diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index eaad945..aa2f75f 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -176,7 +176,7 @@ def get_projection_crs(ds: xr.Dataset) -> Dict[str, Any]: variable names as keys and the projection objects as values """ vars_w_proj = _get_projection_mappings(ds) - proj_vars = set([proj for proj in vars_w_proj.values()]) + proj_vars = set(proj for sublist in vars_w_proj.values() for proj in sublist) vars_wo_proj = set(ds.data_vars) - set(vars_w_proj.keys()) - proj_vars if len(proj_vars) > 1: @@ -194,7 +194,8 @@ def get_projection_crs(ds: xr.Dataset) -> Dict[str, Any]: for crs in crss: crss[crs] = ds[crs].attrs # pyproj.CRS.from_cf(ds[proj].attrs) - return crss + if crss: + return crss def get_latitude_longitude_from_projection( From fdb984ba7cd900e2922af73b328da8858d855b62 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 11:11:36 +0100 Subject: [PATCH 11/27] fix VALID_EXAMPLE_CONFIG_YAML to work with current codebase --- tests/test_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 5459db6..a0da682 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -67,7 +67,7 @@ def test_get_config_issues(): inputs: danra_height_levels: - path: ~/Desktop/mldev/height_levels.zarr + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/height_levels.zarr dims: [time, x, y, altitude] variables: u: @@ -87,12 +87,12 @@ def test_get_config_issues(): dims: [altitude] name_format: f"{var_name}{altitude}m" grid_index: - method: flatten + method: stack dims: [x, y] target_output_variable: state danra_surface: - path: ~/Desktop/mldev/single_levels.zarr + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/single_levels.zarr dims: [time, x, y] variables: - pres_seasurface @@ -101,7 +101,7 @@ def test_get_config_issues(): method: rename dim: time grid_index: - method: flatten + method: stack dims: [x, y] forcing_feature: method: stack_variables_by_var_name From 91ddfc29f52fc2d1ad2b7b7c25ad4b379564a62b Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 11:18:25 +0100 Subject: [PATCH 12/27] add optional projection key to config schema --- mllam_data_prep/config.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/mllam_data_prep/config.py b/mllam_data_prep/config.py index 6112a0c..46c4a0e 100644 --- a/mllam_data_prep/config.py +++ b/mllam_data_prep/config.py @@ -122,6 +122,26 @@ class DimMapping: name_format: str = field(default=None) +@dataclass +class Projection: + """ + Definition of a single coordinate reference system (CRS) projection that + variables in the dataset refer to. + + Attributes + ---------- + dims : List[str] + The dimensions of the projection, usually ["x", "y"]. + attributes : Dict[str, Any] + The attributes that define the projection. The attributes need to adhere to the CF-conventions + and contain either a `crs_wkt` attribute or individual attributes that are specific to the used projection. + If both are given, then the `crs_wkt` attribute will be used to create the projection object. + """ + + dims: List[str] + attributes: Dict[str, Any] + + @dataclass class InputDataset: """ @@ -167,10 +187,15 @@ class InputDataset: path: str dims: List[str] - variables: Union[List[str], Dict[str, Dict[str, ValueSelection]]] + variables: Union[ + List[str], + Dict[str, Dict[str, ValueSelection]], + Dict[str, Dict[str, Dict[str, str]]], + ] dim_mapping: Dict[str, DimMapping] target_output_variable: str attributes: Dict[str, Any] = None + projections: Optional[Dict[str, Projection]] = None @dataclass From ac0792c4bfae795f045c067678e2003fc9080bd1 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 13:47:42 +0100 Subject: [PATCH 13/27] first working implementation --- example.danra.yaml | 4 --- mllam_data_prep/create_dataset.py | 29 ++++++++++----- mllam_data_prep/ops/projection.py | 17 +++++---- tests/test_from_config.py | 60 +++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 18 deletions(-) diff --git a/example.danra.yaml b/example.danra.yaml index f7b61c1..0087b56 100644 --- a/example.danra.yaml +++ b/example.danra.yaml @@ -35,14 +35,10 @@ inputs: dims: [time, x, y, altitude] variables: u: - attrs: - grid_mapping: crs altitude: values: [100,] units: m v: - attrs: - grid_mapping: crs altitude: values: [100, ] units: m diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py index ebbdac2..67ceda1 100644 --- a/mllam_data_prep/create_dataset.py +++ b/mllam_data_prep/create_dataset.py @@ -120,6 +120,7 @@ def create_dataset(config: Config): target_output_var = input_config.target_output_variable expected_input_attributes = input_config.attributes or {} expected_input_var_dims = input_config.dims + dataset_projections = input_config.projections or {} output_dims = output_config.variables[target_output_var] @@ -134,6 +135,10 @@ def create_dataset(config: Config): dataset_name=dataset_name, ) + # TODO: remove this once grid_mapping is set in config file + for var in ds.data_vars: + ds[var].attrs["grid_mapping"] = "crs" + dim_mapping = input_config.dim_mapping # check that there is an entry for each arch dimension @@ -167,13 +172,21 @@ def create_dataset(config: Config): # get the projection information from the dataset and update it with the projection # information given in the input config - # TODO: remove this once grid_mapping is set in config file - for var in ds.data_vars: - ds[var].attrs["grid_mapping"] = "crs" # TODO: read the projection information from the config projection_crs = get_projection_crs(ds) + if projection_crs is None and dataset_projections: + logger.warning( + f"Projection information not found in dataset {dataset_name}, using projection information from config." + ) + projection_crs = { + crs_var: p.attributes for crs_var, p in dataset_projections.items() + } + elif projection_crs is None and not dataset_projections: + logger.warning( + f"No projection information found neither in dataset {dataset_name}, nor in config." + ) if projection_crs is not None: - projections.append(projection_crs) + projections.extend(projection_crs.values()) # only need to do selection for the coordinates that the input dataset actually has if output_coord_ranges is not None: @@ -191,7 +204,7 @@ def create_dataset(config: Config): validate_projection_consistency(projections) except ProjectionInconsistencyWarning as e: logger.warning(f"Projection information might be ambiguous: {e}") - projection = pyproj.CRS.from_cf(set(projections).pop()) + projection = pyproj.CRS.from_cf(projections[0]) # TODO: generalize the retrieval of x and y coords # coords = (dataarrays_by_target[target_output_var]['x'], dataarrays_by_target[target_output_var]['y']) @@ -245,9 +258,9 @@ def create_dataset(config: Config): # add the projection information to the dataset if projections: - ds["crs"] = projection.to_cf() - for var in ds.data_vars: - ds[var].attrs["grid_mapping"] = "crs" + ds["crs"] = xr.DataArray(0, attrs=projection.to_cf()).astype("int16") + # for var in ["VARIABLES_WITH_PROKJECTION"]: + # ds[var].attrs["grid_mapping"] = "crs" ds.attrs = {} ds.attrs["schema_version"] = config.schema_version diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index aa2f75f..3cb0d17 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -98,7 +98,7 @@ def validate_projection_consistency( ... 'USAGE[SCOPE["Horizontal component of 3D system."],' ... 'AREA["World."],BBOX[-90,-180,90,180]],ID["EPSG",4326]]' ... ) - >>> proj1 = {'crs_wkt': crs_wkt} + >>> proj1 = {"crs_wkt": crs_wkt} >>> proj2 = {"crs_wkt": "EPSG:4326"} >>> proj3 = { ... "crs_wkt": crs_wkt, @@ -115,8 +115,8 @@ def validate_projection_consistency( >>> proj4 = {"crs_wkt": "AGD84"} >>> validate_projection_consistency([proj1, proj2, proj3]) Traceback (most recent call last): - ... - mllam_data_prep.ops.projection.ProjectionInconsistencyWarning: ["Key 'semi_major_axis' is missing ... + ... + mllam_data_prep.ops.projection.ProjectionInconsistencyWarning: ['\\'crs_wkt\\' differs: EPSG:4326 != GEOGCRS... >>> validate_projection_consistency([proj2, proj4]) Traceback (most recent call last): ... @@ -136,14 +136,19 @@ def validate_projection_consistency( cf_output = set(proj_obs).pop().to_cf() issues = [] for proj in projections: - cf_wkt_set = "crs_wkt" in cf_output - cf_indv_attrs_set = set(cf_output.keys()) - {"crs_wkt"} + cf_wkt_set = "crs_wkt" in proj + cf_indv_attrs_set = set(proj.keys()) - {"crs_wkt"} for key, value in cf_output.items(): if key == "crs_wkt" and cf_wkt_set and value != proj["crs_wkt"]: issues.append(f"'{key}' differs: {proj[key]} != {value}") elif key != "crs_wkt" and cf_indv_attrs_set and key not in proj: issues.append(f"Key '{key}' is missing in the projection information.") - elif key == "crs_wkt" and cf_indv_attrs_set and value != proj[key]: + elif ( + key == "crs_wkt" + and cf_indv_attrs_set + and cf_wkt_set + and value != proj[key] + ): issues.append(f"Value for key '{key}' differs: {proj[key]} != {value}") for key in proj: if key not in cf_output: diff --git a/tests/test_from_config.py b/tests/test_from_config.py index 5eb66ba..4fff6ff 100644 --- a/tests/test_from_config.py +++ b/tests/test_from_config.py @@ -1,12 +1,15 @@ import tempfile from pathlib import Path +from typing import Dict import isodate +import pyproj import pytest import yaml import mllam_data_prep as mdp import tests.data as testdata +import tests.test_config as testconfig def test_gen_data(): @@ -276,6 +279,63 @@ def test_feature_collision(use_common_feature_var_name): mdp.create_dataset_zarr(fp_config=fp_config) +@pytest.mark.parametrize( + "projection", + [ + { + "crs": { + "dims": "[x y]", + "attributes": { + "crs_wkt": 'GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]]],CS[ellipsoidal,2],AXIS["latitude",north,ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],AXIS["longitude",east,ORDER[2],ANGLEUNIT["degree",0.0174532925199433]],ID["EPSG",4326]]' + }, + } + }, + { + "crs": { + "dims": "[x y]", + "attributes": { + "grid_mapping_name": "rotated_latitude_longitude", + "grid_north_pole_latitude": 10, + "grid_north_pole_longitude": 50, + }, + } + }, + ], +) +def test_projection_from_config(projection: Dict): + """ + Test parsing of projection information from config + and check if it is written to the output file. + """ + # Adding projection config to the example config + config = yaml.safe_load(testconfig.VALID_EXAMPLE_CONFIG_YAML) + config["inputs"]["danra_surface"]["projections"] = projection + config["inputs"]["danra_height_levels"]["projections"] = projection + config_yaml = yaml.dump(config) + + config = mdp.Config.from_yaml(config_yaml) + + ds = mdp.create_dataset(config=config) + # Test CF-conform projection variable attributes + # for var in set(ds.data_vars).intersection(VARIABLES_ON_PROJECTION): + # assert "grid_mapping" in ds[var].attrs + # assert ds[var].attrs["grid_mapping"] in projection.keys() + # for var in set(ds.data_vars).difference(VARIABLES_ON_PROJECTION): + # assert "grid_mapping" not in ds[var].attrs + + for proj in projection.keys(): + assert proj in ds, "Projection variable not found in dataset" + if "crs_wkt" in projection[proj]["attributes"]: + assert pyproj.CRS.from_wkt( + ds[proj].attrs["crs_wkt"] + ) == pyproj.CRS.from_wkt(projection[proj]["attributes"]["crs_wkt"]) + else: + ds[proj].attrs.pop("crs_wkt") + assert pyproj.CRS.from_cf(ds[proj].attrs) == pyproj.CRS.from_cf( + projection[proj]["attributes"] + ) + + def test_danra_example(): fp_config = Path(__file__).parent.parent / "example.danra.yaml" with tempfile.TemporaryDirectory(suffix=".zarr") as tmpdir: From d8950941f69b45c5c91e8b4f220caecbb300d7b3 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:09:52 +0100 Subject: [PATCH 14/27] improve type annotation and docu --- mllam_data_prep/ops/projection.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index 3cb0d17..43a6691 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -212,13 +212,15 @@ def get_latitude_longitude_from_projection( ---------- proj : CRS Projection object. - coords : Tuple[array-like, array-like] - Coordinates to convert. + coords : Tuple[Any, Any] + Coordinates to convert. The first element is the x-coordinate (easting), the second element is the y-coordinate (northing). + output_proj : CRS, optional + Output projection object. By default this is set to EPSG:4326 (WGS84). Returns ------- - Tuple[array-like, array-like] - Latitude and longitude in degrees. + Tuple[Any, Any] + By default longitude and latitude in degrees depending on the output projection. Examples -------- @@ -235,5 +237,5 @@ def get_latitude_longitude_from_projection( if output_proj is None: output_proj = pyproj.CRS("EPSG:4326") - transformer = pyproj.Transformer.from_crs(proj, output_proj) + transformer = pyproj.Transformer.from_crs(proj, output_proj, always_xy=True) return transformer.transform(coords[0], coords[1]) From ae35d39c4b4d63c9ca38b7cd1f2dc4abd51288f2 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:10:20 +0100 Subject: [PATCH 15/27] fix example for lat,lon retrieval --- mllam_data_prep/create_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py index 67ceda1..27cf07f 100644 --- a/mllam_data_prep/create_dataset.py +++ b/mllam_data_prep/create_dataset.py @@ -207,8 +207,8 @@ def create_dataset(config: Config): projection = pyproj.CRS.from_cf(projections[0]) # TODO: generalize the retrieval of x and y coords - # coords = (dataarrays_by_target[target_output_var]['x'], dataarrays_by_target[target_output_var]['y']) - # lat, lon = get_latitude_longitude_from_projection(projection, coords) + # coords = (dataarrays_by_target[target_output_var][0]['x'], dataarrays_by_target[target_output_var][0]['y']) + # lon, lat = get_latitude_longitude_from_projection(projection, coords) ds = _merge_dataarrays_by_target(dataarrays_by_target=dataarrays_by_target) From 05c3921b6568a3935038e0f53718091a8a2af28c Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:13:38 +0100 Subject: [PATCH 16/27] fix doctest --- mllam_data_prep/ops/projection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index 43a6691..45de82f 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -228,11 +228,11 @@ def get_latitude_longitude_from_projection( >>> proj = pyproj.CRS.from_cf({"crs_wkt":"EPSG:3857"}) >>> coords = (500000, 4649776) >>> np.round(get_latitude_longitude_from_projection(proj, coords),3) - array([38.496, 4.492]) + array([ 4.492, 38.496]) >>> coords = ([400000, 500000], [3649776, 4649776]) >>> np.round(get_latitude_longitude_from_projection(proj, coords),3) - array([[31.131, 38.496], - [ 3.593, 4.492]]) + array([[ 3.593, 4.492], + [31.131, 38.496]]) """ if output_proj is None: output_proj = pyproj.CRS("EPSG:4326") From 350cf9258408687cca354ab76c28f7b2ed71496d Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 14:43:35 +0100 Subject: [PATCH 17/27] document changes --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6a8ae7..007f4a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- add support for reading, setting and outputting CF-Conventions complient projection information. ![\#38](https://github.com/mllam/mllam-data-prep/pull/38) - add optional output path argument to parser. ![\#26](https://github.com/mllam/mllam-data-prep/pull/26) ### Changed From 18980b3cfab3328629c2952457bb3fce278865b6 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:19:41 +0100 Subject: [PATCH 18/27] fix docstring --- mllam_data_prep/ops/projection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index 45de82f..922c4bd 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -178,7 +178,7 @@ def get_projection_crs(ds: xr.Dataset) -> Dict[str, Any]: ------- Dict[str, Any] Projection information as a dictionary with the projection - variable names as keys and the projection objects as values + variable names as keys and the projection attributes as values """ vars_w_proj = _get_projection_mappings(ds) proj_vars = set(proj for sublist in vars_w_proj.values() for proj in sublist) From 5da27df3afea710d33436826d45cc15abf07f5fb Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Mon, 18 Nov 2024 10:46:00 +0100 Subject: [PATCH 19/27] change standard CRS variable name --- example.danra.yaml | 2 +- mllam_data_prep/create_dataset.py | 6 +++--- tests/test_from_config.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/example.danra.yaml b/example.danra.yaml index 0087b56..a2e1166 100644 --- a/example.danra.yaml +++ b/example.danra.yaml @@ -43,7 +43,7 @@ inputs: values: [100, ] units: m projections: - crs: + __common__: dims: [x, y] attributes: crs_wkt: 'PROJCRS["DMI HARMONIE DANRA lambert projection",BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS",DATUM["DMI HARMONIE DANRA lambert datum",ELLIPSOID["Sphere",6367470,0,LENGTHUNIT["metre",1,ID["EPSG",9001]]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8901]]],CONVERSION["unknown",METHOD["Lambert Conic Conformal (2SP)",ID["EPSG",9802]],PARAMETER["Latitude of false origin",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],PARAMETER["Longitude of false origin",0.025,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],PARAMETER["Latitude of 1st standard parallel",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],PARAMETER["Latitude of 2nd standard parallel",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],PARAMETER["Easting at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8826]],PARAMETER["Northing at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],AXIS["(E)",east,ORDER[1],LENGTHUNIT["metre",1,ID["EPSG",9001]]],AXIS["(N)",north,ORDER[2],LENGTHUNIT["metre",1,ID["EPSG",9001]]]]' diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py index 27cf07f..7c4724c 100644 --- a/mllam_data_prep/create_dataset.py +++ b/mllam_data_prep/create_dataset.py @@ -137,7 +137,7 @@ def create_dataset(config: Config): # TODO: remove this once grid_mapping is set in config file for var in ds.data_vars: - ds[var].attrs["grid_mapping"] = "crs" + ds[var].attrs["grid_mapping"] = "__common__" dim_mapping = input_config.dim_mapping @@ -258,9 +258,9 @@ def create_dataset(config: Config): # add the projection information to the dataset if projections: - ds["crs"] = xr.DataArray(0, attrs=projection.to_cf()).astype("int16") + ds["__common__"] = xr.DataArray(0, attrs=projection.to_cf()).astype("int16") # for var in ["VARIABLES_WITH_PROKJECTION"]: - # ds[var].attrs["grid_mapping"] = "crs" + # ds[var].attrs["grid_mapping"] = "__common__" ds.attrs = {} ds.attrs["schema_version"] = config.schema_version diff --git a/tests/test_from_config.py b/tests/test_from_config.py index 4fff6ff..d0ba834 100644 --- a/tests/test_from_config.py +++ b/tests/test_from_config.py @@ -283,7 +283,7 @@ def test_feature_collision(use_common_feature_var_name): "projection", [ { - "crs": { + "__common__": { "dims": "[x y]", "attributes": { "crs_wkt": 'GEOGCRS["WGS 84",DATUM["World Geodetic System 1984",ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]]],CS[ellipsoidal,2],AXIS["latitude",north,ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],AXIS["longitude",east,ORDER[2],ANGLEUNIT["degree",0.0174532925199433]],ID["EPSG",4326]]' @@ -291,7 +291,7 @@ def test_feature_collision(use_common_feature_var_name): } }, { - "crs": { + "__common__": { "dims": "[x y]", "attributes": { "grid_mapping_name": "rotated_latitude_longitude", From 0677415fbaa9a2b692ab7fc3cb45cf93c71664f8 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:46:20 +0100 Subject: [PATCH 20/27] use cartopy plotting compatible WKT --- example.danra.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example.danra.yaml b/example.danra.yaml index a2e1166..db6bc3c 100644 --- a/example.danra.yaml +++ b/example.danra.yaml @@ -46,7 +46,7 @@ inputs: __common__: dims: [x, y] attributes: - crs_wkt: 'PROJCRS["DMI HARMONIE DANRA lambert projection",BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS",DATUM["DMI HARMONIE DANRA lambert datum",ELLIPSOID["Sphere",6367470,0,LENGTHUNIT["metre",1,ID["EPSG",9001]]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8901]]],CONVERSION["unknown",METHOD["Lambert Conic Conformal (2SP)",ID["EPSG",9802]],PARAMETER["Latitude of false origin",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],PARAMETER["Longitude of false origin",0.025,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],PARAMETER["Latitude of 1st standard parallel",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],PARAMETER["Latitude of 2nd standard parallel",0.0567,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],PARAMETER["Easting at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8826]],PARAMETER["Northing at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],AXIS["(E)",east,ORDER[1],LENGTHUNIT["metre",1,ID["EPSG",9001]]],AXIS["(N)",north,ORDER[2],LENGTHUNIT["metre",1,ID["EPSG",9001]]]]' + crs_wkt: 'PROJCRS["DMI HARMONIE DANRA lambert projection", BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS", DATUM["DMI HARMONIE DANRA lambert datum", ELLIPSOID["Sphere", 6367470, 0, LENGTHUNIT["metre", 1, ID["EPSG", 9001]]]], PRIMEM["Greenwich", 0, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8901]]], ID["EPSG",4035]], CONVERSION["Lambert Conic Conformal (2SP)", METHOD["Lambert Conic Conformal (2SP)", ID["EPSG", 9802]], PARAMETER["Latitude of false origin", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8821]]], PARAMETER["Longitude of false origin", 25, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8822]]], PARAMETER["Latitude of 1st standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8823]]], PARAMETER["Latitude of 2nd standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8824]]], PARAMETER["Easting at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8826]]], PARAMETER["Northing at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8827]]]], CS[Cartesian, 2], AXIS["(E)", east, ORDER[1], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], AXIS["(N)", north, ORDER[2], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], USAGE[AREA["Denmark and surrounding regions"], BBOX[47, -3, 65, 25], SCOPE["Danra reanalysis projection"]]]' semi_major_axis: 6367470.0 semi_minor_axis: 6367470.0 inverse_flattening: 0.0 From 967e9bd3fd99a841e88bda37ad8b229ce45ffd54 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:48:11 +0100 Subject: [PATCH 21/27] fix typo Co-authored-by: SimonKamuk <43374850+SimonKamuk@users.noreply.github.com> --- mllam_data_prep/ops/projection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py index 922c4bd..c7ed4a5 100644 --- a/mllam_data_prep/ops/projection.py +++ b/mllam_data_prep/ops/projection.py @@ -33,7 +33,7 @@ def _extract_grid_mapping_names(grid_mapping_attr: str) -> List[str]: >>> _extract_grid_mapping_names(m2) ['crsOSGB'] - Note: merge of https://github.com/pydata/xarray/pull/9765 of will allow xarray to handle this directly. + Note: merge of https://github.com/pydata/xarray/pull/9765 will allow xarray to handle this directly. """ if ":" not in grid_mapping_attr: return grid_mapping_attr.split(" ") From b830f9510a69ad0476e6d737792d720f44c9d824 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:53:46 +0100 Subject: [PATCH 22/27] add details to changes --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 007f4a8..c39bea8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- add support for reading, setting and outputting CF-Conventions complient projection information. ![\#38](https://github.com/mllam/mllam-data-prep/pull/38) +- add support for reading, setting (via `projections["__common__"]` section in config) and outputting CF-Conventions complient projection information. ![\#38](https://github.com/mllam/mllam-data-prep/pull/38) +- add `doctest-modules` to CI. ![\#38](https://github.com/mllam/mllam-data-prep/pull/38) - add optional output path argument to parser. ![\#26](https://github.com/mllam/mllam-data-prep/pull/26) ### Changed From ed7250aba40f59072e336c19c5692d4349e19738 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:02:29 +0100 Subject: [PATCH 23/27] add warning for overwriting of dataset projection info --- mllam_data_prep/create_dataset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py index 52a3b55..37c291d 100644 --- a/mllam_data_prep/create_dataset.py +++ b/mllam_data_prep/create_dataset.py @@ -201,6 +201,10 @@ def create_dataset(config: Config): logger.warning( f"No projection information found neither in dataset {dataset_name}, nor in config." ) + elif projection_crs is not None and dataset_projections: + logger.warning( + f"Projection information given in {dataset_name} is overwritten by that given in config." + ) if projection_crs is not None: projections.extend(projection_crs.values()) From a6005f6bb78afcd42a920ee9ac91f4f9fc015e60 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:21:01 +0100 Subject: [PATCH 24/27] use latest schema version for VALID_EXAMPLE_CONFIG_YAML --- tests/test_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index a0da682..60b7619 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,6 +2,7 @@ from dataclass_wizard.errors import MissingFields, UnknownJSONKey import mllam_data_prep as mdp +import tests.data as testdata INVALID_EXTRA_FIELDS_CONFIG_YAML = """ schema_version: v0.1.0 @@ -36,7 +37,7 @@ def test_get_config_issues(): VALID_EXAMPLE_CONFIG_YAML = """ -schema_version: v0.1.0 +schema_version: {schema_version} dataset_version: v0.1.0 output: @@ -107,7 +108,7 @@ def test_get_config_issues(): method: stack_variables_by_var_name name_format: f"{var_name}" target_output_variable: forcing -""" +""".format(schema_version=testdata.SCHEMA_VERSION) def test_get_config_nested(): From a63502d4acf26078cd11577f6f68dad9b0a8f6f3 Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:42:47 +0100 Subject: [PATCH 25/27] fix linting --- tests/test_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 60b7619..9fa486f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -108,8 +108,9 @@ def test_get_config_issues(): method: stack_variables_by_var_name name_format: f"{var_name}" target_output_variable: forcing -""".format(schema_version=testdata.SCHEMA_VERSION) - +""".format( + schema_version=testdata.SCHEMA_VERSION +) def test_get_config_nested(): config = mdp.Config.from_yaml(VALID_EXAMPLE_CONFIG_YAML) From d4480c031de10e2bd2388fed5ef9bbcf537a451c Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:49:01 +0100 Subject: [PATCH 26/27] fix early evaluation of var_name, altitude --- tests/test_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 9fa486f..cb523c5 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -86,7 +86,7 @@ def test_get_config_issues(): state_feature: method: stack_variables_by_var_name dims: [altitude] - name_format: f"{var_name}{altitude}m" + name_format: f"{{var_name}}{{altitude}}m" grid_index: method: stack dims: [x, y] @@ -106,7 +106,7 @@ def test_get_config_issues(): dims: [x, y] forcing_feature: method: stack_variables_by_var_name - name_format: f"{var_name}" + name_format: f"{{var_name}}" target_output_variable: forcing """.format( schema_version=testdata.SCHEMA_VERSION From 7e5c79caa52a158bfb2eab7c11597e1ebcf0349d Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:51:39 +0100 Subject: [PATCH 27/27] fix linting --- tests/test_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_config.py b/tests/test_config.py index cb523c5..eff5285 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -112,6 +112,7 @@ def test_get_config_issues(): schema_version=testdata.SCHEMA_VERSION ) + def test_get_config_nested(): config = mdp.Config.from_yaml(VALID_EXAMPLE_CONFIG_YAML)