diff --git a/.github/workflows/python-package-pip.yml b/.github/workflows/python-package-pip.yml index 0a1c5bb..cfccb7c 100644 --- a/.github/workflows/python-package-pip.yml +++ b/.github/workflows/python-package-pip.yml @@ -24,6 +24,10 @@ jobs: python -m pip install . python -m pip install pytest + - name: Run doctests + run: | + python -m pytest --doctest-modules mllam_data_prep/ + - name: Run tests env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} diff --git a/CHANGELOG.md b/CHANGELOG.md index d6a7576..527151c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,8 @@ interface and addresses bugs around optional dependencies for ### Added +- add support for reading, setting (via `projections["__common__"]` section in config) and outputting CF-Conventions complient projection information. ![\#38](https://github.com/mllam/mllam-data-prep/pull/38) +- add `doctest-modules` to CI. ![\#38](https://github.com/mllam/mllam-data-prep/pull/38) - add optional output path argument to parser. ![\#26](https://github.com/mllam/mllam-data-prep/pull/26) ### Changed diff --git a/example.danra.yaml b/example.danra.yaml index 3edf126..962e25e 100644 --- a/example.danra.yaml +++ b/example.danra.yaml @@ -42,6 +42,10 @@ inputs: altitude: values: [100, ] units: m + projections: + danra_projection: + dims: [x, y] + crs_wkt: 'PROJCRS["DMI HARMONIE DANRA lambert projection", BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS", DATUM["DMI HARMONIE DANRA lambert datum", ELLIPSOID["Sphere", 6367470, 0, LENGTHUNIT["metre", 1, ID["EPSG", 9001]]]], PRIMEM["Greenwich", 0, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8901]]], ID["EPSG",4035]], CONVERSION["Lambert Conic Conformal (2SP)", METHOD["Lambert Conic Conformal (2SP)", ID["EPSG", 9802]], PARAMETER["Latitude of false origin", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8821]]], PARAMETER["Longitude of false origin", 25, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8822]]], PARAMETER["Latitude of 1st standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8823]]], PARAMETER["Latitude of 2nd standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8824]]], PARAMETER["Easting at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8826]]], PARAMETER["Northing at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8827]]]], CS[Cartesian, 2], AXIS["(E)", east, ORDER[1], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], AXIS["(N)", north, ORDER[2], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], USAGE[AREA["Denmark and surrounding regions"], BBOX[47, -3, 65, 25], SCOPE["Danra reanalysis projection"]]]' dim_mapping: time: method: rename @@ -61,6 +65,10 @@ inputs: variables: # use surface incoming shortwave radiation as forcing - swavr0m + projections: + danra_projection: + dims: [x, y] + crs_wkt: 'PROJCRS["DMI HARMONIE DANRA lambert projection", BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS", DATUM["DMI HARMONIE DANRA lambert datum", ELLIPSOID["Sphere", 6367470, 0, LENGTHUNIT["metre", 1, ID["EPSG", 9001]]]], PRIMEM["Greenwich", 0, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8901]]], ID["EPSG",4035]], CONVERSION["Lambert Conic Conformal (2SP)", METHOD["Lambert Conic Conformal (2SP)", ID["EPSG", 9802]], PARAMETER["Latitude of false origin", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8821]]], PARAMETER["Longitude of false origin", 25, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8822]]], PARAMETER["Latitude of 1st standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8823]]], PARAMETER["Latitude of 2nd standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8824]]], PARAMETER["Easting at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8826]]], PARAMETER["Northing at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8827]]]], CS[Cartesian, 2], AXIS["(E)", east, ORDER[1], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], AXIS["(N)", north, ORDER[2], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], USAGE[AREA["Denmark and surrounding regions"], BBOX[47, -3, 65, 25], SCOPE["Danra reanalysis projection"]]]' dim_mapping: time: method: rename @@ -78,6 +86,10 @@ inputs: dims: [x, y] variables: - lsm + projections: + danra_projection: + dims: [x, y] + crs_wkt: 'PROJCRS["DMI HARMONIE DANRA lambert projection", BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS", DATUM["DMI HARMONIE DANRA lambert datum", ELLIPSOID["Sphere", 6367470, 0, LENGTHUNIT["metre", 1, ID["EPSG", 9001]]]], PRIMEM["Greenwich", 0, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8901]]], ID["EPSG",4035]], CONVERSION["Lambert Conic Conformal (2SP)", METHOD["Lambert Conic Conformal (2SP)", ID["EPSG", 9802]], PARAMETER["Latitude of false origin", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8821]]], PARAMETER["Longitude of false origin", 25, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8822]]], PARAMETER["Latitude of 1st standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8823]]], PARAMETER["Latitude of 2nd standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8824]]], PARAMETER["Easting at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8826]]], PARAMETER["Northing at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8827]]]], CS[Cartesian, 2], AXIS["(E)", east, ORDER[1], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], AXIS["(N)", north, ORDER[2], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], USAGE[AREA["Denmark and surrounding regions"], BBOX[47, -3, 65, 25], SCOPE["Danra reanalysis projection"]]]' dim_mapping: grid_index: method: stack diff --git a/mllam_data_prep/config.py b/mllam_data_prep/config.py index 14e80ef..0d84013 100644 --- a/mllam_data_prep/config.py +++ b/mllam_data_prep/config.py @@ -110,6 +110,27 @@ class DimMapping: name_format: str = field(default=None) +@dataclass +class Projection: + """ + Definition of a single coordinate reference system (CRS) projection that + variables in the dataset refer to. + + Attributes + ---------- + dims : List[str] + The dimensions of the projection, usually ["x", "y"]. + crs_wkt : str + The well-known text (WKT) representation of the CRS. For compatibility with cartopy the WKT string + needs to contain a BBOX parameter, e.g. `crs_wkt = "PROJCS[...], BBOX[...]"`. + Registered CRSs at e.g. EPSG can be used as well, e.g. `crs_wkt = "EPSG:4326"`. These often will resolve to + a WKT string that includes the BBOX parameter (see https://epsg.io/4326). + """ + + dims: List[str] + crs_wkt: str + + @dataclass class InputDataset: """ @@ -155,9 +176,14 @@ class InputDataset: path: str dims: List[str] - variables: Union[List[str], Dict[str, Dict[str, ValueSelection]]] + variables: Union[ + List[str], + Dict[str, Dict[str, ValueSelection]], + Dict[str, Dict[str, Dict[str, str]]], + ] dim_mapping: Dict[str, DimMapping] target_output_variable: str + projections: Dict[str, Projection] attributes: Dict[str, Any] = None diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py index 73996cf..36d8357 100644 --- a/mllam_data_prep/create_dataset.py +++ b/mllam_data_prep/create_dataset.py @@ -4,6 +4,7 @@ from pathlib import Path import numpy as np +import pyproj import xarray as xr from loguru import logger from numcodecs import Blosc @@ -12,6 +13,13 @@ from .config import Config, InvalidConfigException from .ops.loading import load_and_subset_dataset from .ops.mapping import map_dims_and_variables +from .ops.projection import ( + ProjectionCompatibilityWarning, + ProjectionInconsistencyWarning, + check_projection_compatibility, + get_projection_crs, + validate_projection_consistency, +) from .ops.selection import select_by_kwargs from .ops.statistics import calc_stats @@ -122,6 +130,7 @@ def create_dataset(config: Config): output_coord_ranges = output_config.coord_ranges dataarrays_by_target = defaultdict(list) + projections = {} for dataset_name, input_config in config.inputs.items(): path = input_config.path @@ -129,6 +138,7 @@ def create_dataset(config: Config): target_output_var = input_config.target_output_variable expected_input_attributes = input_config.attributes or {} expected_input_var_dims = input_config.dims + dataset_projections = input_config.projections or {} output_dims = output_config.variables[target_output_var] @@ -143,6 +153,40 @@ def create_dataset(config: Config): dataset_name=dataset_name, ) + # get the projection information from the dataset and update it with the projection + # information given in the input config + # TODO: read the projection information from the config + try: + projection_crs = get_projection_crs(ds) + except ValueError: + projection_crs = None + if projection_crs is None and dataset_projections: + logger.warning( + f"Projection information not found in dataset {dataset_name}, using projection information from config." + ) + projection_crs = { + crs_var: {"crs_wkt": p.crs_wkt} + for crs_var, p in dataset_projections.items() + } + elif projection_crs is None and not dataset_projections: + logger.warning( + f"No projection information found neither in dataset {dataset_name}, nor in config." + ) + elif projection_crs is not None and dataset_projections: + logger.warning( + f"Projection information given in {dataset_name} is overwritten by that given in config." + ) + projection_crs = { + crs_var: {"crs_wkt": p.crs_wkt} + for crs_var, p in dataset_projections.items() + } + if projection_crs is not None: + projections.update(projection_crs) + + # TODO: remove this once grid_mapping is set in config file + for var in ds.data_vars: + ds[var].attrs["grid_mapping"] = list(projection_crs.keys())[0] + dim_mapping = input_config.dim_mapping # check that there is an entry for each arch dimension @@ -184,6 +228,24 @@ def create_dataset(config: Config): dataarrays_by_target[target_output_var].append(da_target) + # validate projections across input datasets + if projections: + try: + validate_projection_consistency(projections.values()) + except ProjectionInconsistencyWarning as e: + logger.warning(f"Projection information might be ambiguous: {e}") + projection = pyproj.CRS.from_cf(list(projections.values())[0]) + try: + check_projection_compatibility(list(projections.values())[0]) + except ProjectionCompatibilityWarning: + logger.warning( + "WKT string should include a BBOX definition to be compatible with e.g. cartopy." + ) + + # TODO: generalize the retrieval of x and y coords + # coords = (dataarrays_by_target[target_output_var][0]['x'], dataarrays_by_target[target_output_var][0]['y']) + # lon, lat = get_latitude_longitude_from_projection(projection, coords) + ds = _merge_dataarrays_by_target(dataarrays_by_target=dataarrays_by_target) # need to drop the encoding so that we can write to zarr with new chunksizes @@ -230,6 +292,14 @@ def create_dataset(config: Config): ) ds["splits"] = da_splits + # add the projection information to the dataset + if projections: + ds[projections.keys()] = xr.DataArray(0, attrs=projection.to_cf()).astype( + "int16" + ) + # for var in ["VARIABLES_WITH_PROKJECTION"]: + # ds[var].attrs["grid_mapping"] = "__common__" + ds.attrs = {} ds.attrs["schema_version"] = config.schema_version ds.attrs["dataset_version"] = config.dataset_version diff --git a/mllam_data_prep/ops/projection.py b/mllam_data_prep/ops/projection.py new file mode 100644 index 0000000..3bcf160 --- /dev/null +++ b/mllam_data_prep/ops/projection.py @@ -0,0 +1,312 @@ +"""Collection of operators handling projection information.""" + +from typing import Any, Dict, List, Tuple, Union + +import pyproj +import xarray as xr +from pyproj import CRS + + +class ProjectionInconsistencyWarning(Warning): + pass + + +class ProjectionCompatibilityWarning(Warning): + pass + + +def _extract_grid_mapping_names(grid_mapping_attr: str) -> List[str]: + """Extract grid mapping names from the grid_mapping attribute. + + Parameters + ---------- + grid_mapping_attr : str + The grid_mapping attribute. + + Returns + ------- + list + List of grid mapping names. + + Examples + -------- + >>> m1 = "crsOSGB: x y crsWGS84: lat lon" + >>> _extract_grid_mapping_names(m1) + ['crsOSGB', 'crsWGS84'] + >>> m2 = "crsOSGB" + >>> _extract_grid_mapping_names(m2) + ['crsOSGB'] + + Note: merge of https://github.com/pydata/xarray/pull/9765 will allow xarray to handle this directly. + """ + if ":" not in grid_mapping_attr: + return grid_mapping_attr.split(" ") + else: + return [ + key.split(":")[0].strip() + for key in grid_mapping_attr.split(" ") + if ":" in key + ] + + +def _get_projection_mappings(dataset: xr.Dataset) -> Dict[str, Any]: + """Get projections referenced by variables. + + This function extracts the projection variables from a dataset + by evaluating the grid_mapping attribute of each variable following + the CF-Conventions. + + Parameters + ---------- + dataset : xr.Dataset + Dataset to get the projection information from + + Returns + ------- + Dict[str, str] + Dictionary with the variable names as keys and the projection + variable names as values + """ + projections = {} + for var in dataset.variables: + if "grid_mapping" in dataset[var].attrs: + gm = _extract_grid_mapping_names(dataset[var].attrs["grid_mapping"]) + projections[var] = gm + return projections + + +def check_projection_compatibility(proj: Dict[str, Union[str, dict]]) -> None: + """Check if the projection is compatible with mllam software. + + Checks: + - if information is given as WKT string + - if the WKT string includes a BBOX definition + + Parameters + ---------- + proj : Dict[str, Union[str, dict]] + Projection information as a dictionary with the projection + variable names as keys and the projection attributes as values + + Examples + -------- + >>> proj1 = {"crs_wkt": "EPSG:4326"} + >>> check_projection_compatibility(proj1) + >>> crs_wkt = ( + ... 'GEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble",' + ... 'MEMBER["World Geodetic System 1984 (Transit)"],' + ... 'MEMBER["World Geodetic System 1984 (G730)"],' + ... 'MEMBER["World Geodetic System 1984 (G873)"],' + ... 'MEMBER["World Geodetic System 1984 (G1150)"],' + ... 'MEMBER["World Geodetic System 1984 (G1674)"],' + ... 'MEMBER["World Geodetic System 1984 (G1762)"],' + ... 'MEMBER["World Geodetic System 1984 (G2139)"],' + ... 'MEMBER["World Geodetic System 1984 (G2296)"],' + ... 'ELLIPSOID["WGS 84",6378137,298.257223563,' + ... 'LENGTHUNIT["metre",1]],ENSEMBLEACCURACY[2.0]],' + ... 'PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],' + ... 'CS[ellipsoidal,2],AXIS["geodetic latitude (Lat)",north,' + ... 'ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],' + ... 'AXIS["geodetic longitude (Lon)",east,ORDER[2],' + ... 'ANGLEUNIT["degree",0.0174532925199433]],' + ... 'USAGE[SCOPE["Horizontal component of 3D system."],' + ... 'AREA["World."],],ID["EPSG",4326]]' + ... ) + >>> proj2 = {"crs_wkt": crs_wkt} + >>> check_projection_compatibility(proj2) + Traceback (most recent call last): + ... + mllam_data_prep.ops.projection.ProjectionCompatibilityWarning: WKT string should include a BBOX definition to be compatible with e.g. cartopy. + >>> proj3 = { + ... "semi_major_axis": 6367470.0, + ... "semi_minor_axis": 6367470.0, + ... "reference_ellipsoid_name": "Sphere", + ... "grid_mapping_name": "lambert_conformal_conic", + ... "standard_parallel": [0.0567, 0.0567], + ... "latitude_of_projection_origin": 0.0567, + ... "longitude_of_central_meridian": 0.025, + ... } + >>> check_projection_compatibility(proj3) + Traceback (most recent call last): + ... + KeyError: 'Projection information must contain a WKT string.' + """ + if "crs_wkt" not in proj: + raise KeyError("Projection information must contain a WKT string.") + + if "BBOX" not in pyproj.CRS.from_cf(proj).to_wkt(): + raise ProjectionCompatibilityWarning( + "WKT string should include a BBOX definition to be compatible with e.g. cartopy." + ) + + return + + +def validate_projection_consistency( + projections: List[Dict[str, Union[str, dict]]] +) -> None: + """Validate the consistency of the projection information. + + Examples + -------- + >>> crs_wkt = ( + ... 'GEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble",' + ... 'MEMBER["World Geodetic System 1984 (Transit)"],' + ... 'MEMBER["World Geodetic System 1984 (G730)"],' + ... 'MEMBER["World Geodetic System 1984 (G873)"],' + ... 'MEMBER["World Geodetic System 1984 (G1150)"],' + ... 'MEMBER["World Geodetic System 1984 (G1674)"],' + ... 'MEMBER["World Geodetic System 1984 (G1762)"],' + ... 'MEMBER["World Geodetic System 1984 (G2139)"],' + ... 'MEMBER["World Geodetic System 1984 (G2296)"],' + ... 'ELLIPSOID["WGS 84",6378137,298.257223563,' + ... 'LENGTHUNIT["metre",1]],ENSEMBLEACCURACY[2.0]],' + ... 'PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],' + ... 'CS[ellipsoidal,2],AXIS["geodetic latitude (Lat)",north,' + ... 'ORDER[1],ANGLEUNIT["degree",0.0174532925199433]],' + ... 'AXIS["geodetic longitude (Lon)",east,ORDER[2],' + ... 'ANGLEUNIT["degree",0.0174532925199433]],' + ... 'USAGE[SCOPE["Horizontal component of 3D system."],' + ... 'AREA["World."],BBOX[-90,-180,90,180]],ID["EPSG",4326]]' + ... ) + >>> proj1 = {"crs_wkt": crs_wkt} + >>> proj2 = {"crs_wkt": "EPSG:4326"} + >>> proj3 = { + ... "crs_wkt": crs_wkt, + ... 'semi_major_axis': 6378137.0, + ... 'semi_minor_axis': 6356752.314245179, + ... 'inverse_flattening': 298.257223563, + ... 'reference_ellipsoid_name': 'WGS 84', + ... 'longitude_of_prime_meridian': 0.0, + ... 'prime_meridian_name': 'Greenwich', + ... 'geographic_crs_name': 'WGS 84', + ... 'horizontal_datum_name': 'World Geodetic System 1984 ensemble', + ... 'grid_mapping_name': 'latitude_longitude' + ... } + >>> proj4 = {"crs_wkt": "AGD84"} + >>> validate_projection_consistency([proj1, proj2, proj3]) + Traceback (most recent call last): + ... + mllam_data_prep.ops.projection.ProjectionInconsistencyWarning: ['\\'crs_wkt\\' differs: EPSG:4326 != GEOGCRS... + >>> validate_projection_consistency([proj2, proj4]) + Traceback (most recent call last): + ... + NotImplementedError: Multiple projections found in the dataset. Currently only one projection is supported. + """ + proj_obs = [pyproj.CRS.from_cf(proj) for proj in projections] + + # Check that all projections are the same + if len(set(proj_obs)) > 1: + raise NotImplementedError( + "Multiple projections found in the dataset. " + "Currently only one projection is supported." + ) + + # Check that conversion to CF is consistent with input + # and all projection information is given + cf_output = set(proj_obs).pop().to_cf() + issues = [] + for proj in projections: + cf_wkt_set = "crs_wkt" in proj + cf_indv_attrs_set = set(proj.keys()) - {"crs_wkt"} + for key, value in cf_output.items(): + if key == "crs_wkt" and cf_wkt_set and value != proj["crs_wkt"]: + issues.append(f"'{key}' differs: {proj[key]} != {value}") + elif key != "crs_wkt" and cf_indv_attrs_set and key not in proj: + issues.append(f"Key '{key}' is missing in the projection information.") + elif ( + key == "crs_wkt" + and cf_indv_attrs_set + and cf_wkt_set + and value != proj[key] + ): + issues.append(f"Value for key '{key}' differs: {proj[key]} != {value}") + for key in proj: + if key not in cf_output: + issues.append( + f"Key '{key}' is not expected in the projection information." + ) + if issues: + raise ProjectionInconsistencyWarning(issues) + + return + + +def get_projection_crs(ds: xr.Dataset) -> Dict[str, Any]: + """Get the projection information from the dataset. + + Parameters + ---------- + ds : xr.Dataset + CF-conform dataset to extract projection information from. + `grid_mapping` attribute must be set for each variable that + references a projection variable. + Projection variables must have either a `crs_wkt` attribute + or CF-conform single-value attributes with individual projection + information. + + Returns + ------- + Dict[str, Any] + Projection information as a dictionary with the projection + variable names as keys and the projection attributes as values + """ + vars_w_proj = _get_projection_mappings(ds) + proj_vars = set(proj for sublist in vars_w_proj.values() for proj in sublist) + vars_wo_proj = set(ds.data_vars) - set(vars_w_proj.keys()) - proj_vars + + if len(proj_vars) > 1: + raise NotImplementedError( + f"Multiple referenced projections found in the dataset {ds.encoding.get('source',None)}: {proj_vars}. " + "Currently only one projection is supported." + ) + + if len(vars_wo_proj) > 0: + raise ValueError( + f"Variables {vars_wo_proj} do not have a projection defined in the dataset {ds.encoding.get('source',None)}" + ) + + crss = {} + for crs in crss: + crss[crs] = ds[crs].attrs # pyproj.CRS.from_cf(ds[proj].attrs) + + if crss: + return crss + + +def get_latitude_longitude_from_projection( + proj: CRS, coords: Tuple[Any, Any], output_proj: CRS = None +) -> Tuple[Any, Any]: + """Get the latitude and longitude from a projection object. + + Parameters + ---------- + proj : CRS + Projection object. + coords : Tuple[Any, Any] + Coordinates to convert. The first element is the x-coordinate (easting), the second element is the y-coordinate (northing). + output_proj : CRS, optional + Output projection object. By default this is set to EPSG:4326 (WGS84). + + Returns + ------- + Tuple[Any, Any] + By default longitude and latitude in degrees depending on the output projection. + + Examples + -------- + >>> import numpy as np + >>> proj = pyproj.CRS.from_cf({"crs_wkt":"EPSG:3857"}) + >>> coords = (500000, 4649776) + >>> np.round(get_latitude_longitude_from_projection(proj, coords),3) + array([ 4.492, 38.496]) + >>> coords = ([400000, 500000], [3649776, 4649776]) + >>> np.round(get_latitude_longitude_from_projection(proj, coords),3) + array([[ 3.593, 4.492], + [31.131, 38.496]]) + """ + if output_proj is None: + output_proj = pyproj.CRS("EPSG:4326") + + transformer = pyproj.Transformer.from_crs(proj, output_proj, always_xy=True) + return transformer.transform(coords[0], coords[1]) diff --git a/pyproject.toml b/pyproject.toml index 9c65473..c376bd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,8 +10,8 @@ authors = [ {name = "Joel Oskarsson", email = "joel.oskarsson@liu.se"}, ] dependencies = [ - "xarray>=2024.2.0", - "zarr>=2.17.0", + "xarray==2024.2.0", + "zarr==2.17.0", "pyyaml>=6.0.1", "loguru>=0.7.2", "isodate>=0.6.1", @@ -22,8 +22,9 @@ dependencies = [ "rich>=13.7.1", "dask>=2024.2.1", "psutil>=5.7.2", + "pyproj>=3.7.0", ] -requires-python = ">=3.9" +requires-python = ">=3.10" readme = "README.md" license = {text = "MIT"} diff --git a/tests/data.py b/tests/data.py index 78739ee..9bb2a3e 100644 --- a/tests/data.py +++ b/tests/data.py @@ -28,6 +28,7 @@ "forecast_on_levels", "static", ] +DANRA_CRS_WKT = """PROJCRS["DMI HARMONIE DANRA lambert projection", BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS", DATUM["DMI HARMONIE DANRA lambert datum", ELLIPSOID["Sphere", 6367470, 0, LENGTHUNIT["metre", 1, ID["EPSG", 9001]]]], PRIMEM["Greenwich", 0, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8901]]], ID["EPSG",4035]], CONVERSION["Lambert Conic Conformal (2SP)", METHOD["Lambert Conic Conformal (2SP)", ID["EPSG", 9802]], PARAMETER["Latitude of false origin", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8821]]], PARAMETER["Longitude of false origin", 25, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8822]]], PARAMETER["Latitude of 1st standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8823]]], PARAMETER["Latitude of 2nd standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8824]]], PARAMETER["Easting at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8826]]], PARAMETER["Northing at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8827]]]], CS[Cartesian, 2], AXIS["(E)", east, ORDER[1], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], AXIS["(N)", north, ORDER[2], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], USAGE[AREA["Denmark and surrounding regions"], BBOX[47, -3, 65, 25], SCOPE["Danra reanalysis projection"]]]""" def create_surface_forecast_dataset( diff --git a/tests/test_config.py b/tests/test_config.py index 5459db6..85b2bcf 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,6 +2,7 @@ from dataclass_wizard.errors import MissingFields, UnknownJSONKey import mllam_data_prep as mdp +import tests.data as testdata INVALID_EXTRA_FIELDS_CONFIG_YAML = """ schema_version: v0.1.0 @@ -36,7 +37,7 @@ def test_get_config_issues(): VALID_EXAMPLE_CONFIG_YAML = """ -schema_version: v0.1.0 +schema_version: {schema_version} dataset_version: v0.1.0 output: @@ -67,7 +68,7 @@ def test_get_config_issues(): inputs: danra_height_levels: - path: ~/Desktop/mldev/height_levels.zarr + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/height_levels.zarr dims: [time, x, y, altitude] variables: u: @@ -78,6 +79,10 @@ def test_get_config_issues(): altitude: values: [100, ] units: m + projections: + danra_projection: + dims: [x, y] + crs_wkt: {crs_wkt} dim_mapping: time: method: rename @@ -85,29 +90,36 @@ def test_get_config_issues(): state_feature: method: stack_variables_by_var_name dims: [altitude] - name_format: f"{var_name}{altitude}m" + name_format: f"{{var_name}}{{altitude}}m" grid_index: - method: flatten + method: stack dims: [x, y] target_output_variable: state danra_surface: - path: ~/Desktop/mldev/single_levels.zarr + path: https://mllam-test-data.s3.eu-north-1.amazonaws.com/single_levels.zarr dims: [time, x, y] variables: - pres_seasurface + projections: + danra_projection: + dims: [x, y] + crs_wkt: {crs_wkt} dim_mapping: time: method: rename dim: time grid_index: - method: flatten + method: stack dims: [x, y] forcing_feature: method: stack_variables_by_var_name - name_format: f"{var_name}" + name_format: f"{{var_name}}" target_output_variable: forcing -""" +""".format( + schema_version=testdata.SCHEMA_VERSION, + crs_wkt=testdata.DANRA_CRS_WKT, +) def test_get_config_nested(): diff --git a/tests/test_from_config.py b/tests/test_from_config.py index 06cf640..e20a59f 100644 --- a/tests/test_from_config.py +++ b/tests/test_from_config.py @@ -1,13 +1,16 @@ import shutil import tempfile from pathlib import Path +from typing import Dict import isodate +import pyproj import pytest import yaml import mllam_data_prep as mdp import tests.data as testdata +import tests.test_config as testconfig def test_gen_data(): @@ -68,6 +71,12 @@ def test_merging_static_and_surface_analysis(): path=datasets["surface_analysis"], dims=["analysis_time", "x", "y"], variables=testdata.DEFAULT_SURFACE_ANALYSIS_VARS, + projections=dict( + danra_projection=dict( + dims=["x", "y"], + crs_wkt=testdata.DANRA_CRS_WKT, + ) + ), dim_mapping=dict( time=dict( method="rename", @@ -88,6 +97,12 @@ def test_merging_static_and_surface_analysis(): path=datasets["static"], dims=["x", "y"], variables=testdata.DEFAULT_STATIC_VARS, + projections=dict( + danra_projection=dict( + dims=["x", "y"], + crs_wkt=testdata.DANRA_CRS_WKT, + ) + ), dim_mapping=dict( grid_index=dict( method="stack", @@ -161,6 +176,12 @@ def test_time_selection(source_data_contains_time_range, time_stepsize): path=datasets["surface_analysis"], dims=["analysis_time", "x", "y"], variables=testdata.DEFAULT_SURFACE_ANALYSIS_VARS, + projections=dict( + danra_projection=dict( + dims=["x", "y"], + crs_wkt=testdata.DANRA_CRS_WKT, + ) + ), dim_mapping=dict( time=dict( method="rename", @@ -229,6 +250,12 @@ def test_feature_collision(use_common_feature_var_name): path=datasets["surface_analysis"], dims=["analysis_time", "x", "y"], variables=testdata.DEFAULT_SURFACE_ANALYSIS_VARS, + projections=dict( + danra_projection=dict( + dims=["x", "y"], + crs_wkt=testdata.DANRA_CRS_WKT, + ) + ), dim_mapping={ "time": dict( method="rename", @@ -249,6 +276,12 @@ def test_feature_collision(use_common_feature_var_name): path=datasets["static"], dims=["x", "y"], variables=testdata.DEFAULT_STATIC_VARS, + projections=dict( + danra_projection=dict( + dims=["x", "y"], + crs_wkt=testdata.DANRA_CRS_WKT, + ) + ), dim_mapping={ "grid_index": dict( dims=["x", "y"], @@ -277,6 +310,64 @@ def test_feature_collision(use_common_feature_var_name): mdp.create_dataset_zarr(fp_config=fp_config) +@pytest.mark.parametrize( + "projection", + [ + { + "proj1": { + "dims": "[x y]", + "crs_wkt": """PROJCRS["DMI HARMONIE DANRA lambert projection", BASEGEOGCRS["DMI HARMONIE DANRA lambert CRS", DATUM["DMI HARMONIE DANRA lambert datum", ELLIPSOID["Sphere", 6367470, 0, LENGTHUNIT["metre", 1, ID["EPSG", 9001]]]], PRIMEM["Greenwich", 0, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8901]]], ID["EPSG",4035]], CONVERSION["Lambert Conic Conformal (2SP)", METHOD["Lambert Conic Conformal (2SP)", ID["EPSG", 9802]], PARAMETER["Latitude of false origin", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8821]]], PARAMETER["Longitude of false origin", 25, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8822]]], PARAMETER["Latitude of 1st standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8823]]], PARAMETER["Latitude of 2nd standard parallel", 56.7, ANGLEUNIT["degree", 0.0174532925199433, ID["EPSG", 8824]]], PARAMETER["Easting at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8826]]], PARAMETER["Northing at false origin", 0, LENGTHUNIT["metre", 1, ID["EPSG", 8827]]]], CS[Cartesian, 2], AXIS["(E)", east, ORDER[1], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], AXIS["(N)", north, ORDER[2], LENGTHUNIT["metre", 1, ID["EPSG", 9001]]], USAGE[AREA["Denmark and surrounding regions"], BBOX[47, -3, 65, 25], SCOPE["Danra reanalysis projection"]]]""", + } + }, + {"proj2": {"dims": "[x y]", "crs_wkt": "EPSG:4326"}}, + ], +) +def test_projection_from_config(projection: Dict): + """ + Test parsing of projection information from config + and check if it is written to the output file. + """ + # Adding projection config to the example config + config = yaml.safe_load(testconfig.VALID_EXAMPLE_CONFIG_YAML) + config["inputs"]["danra_surface"]["projections"] = projection + config["inputs"]["danra_height_levels"]["projections"] = projection + config_yaml = yaml.dump(config) + + config = mdp.Config.from_yaml(config_yaml) + + ds = mdp.create_dataset(config=config) + + for proj in projection.keys(): + assert proj in ds, "Projection variable not found in dataset" + assert pyproj.CRS.from_cf( + {"crs_wkt": ds[proj].attrs["crs_wkt"]} + ) == pyproj.CRS.from_cf( + {"crs_wkt": projection[proj]["crs_wkt"]} + ), "CRS mismatch" + + +@pytest.mark.parametrize( + "projection", + [ + {"proj2": {"dims": "[x y]", "crs_wkt": "EPSG:4326"}}, + ], +) +def test_requirement_of_single_projection(projection: Dict): + """ + Test that assertion is raised when projections + are missing from input datasets. + """ + # Adding projection config to the example config + config = yaml.safe_load(testconfig.VALID_EXAMPLE_CONFIG_YAML) + config["inputs"]["danra_surface"]["projections"] = projection + config_yaml = yaml.dump(config) + + config = mdp.Config.from_yaml(config_yaml) + + with pytest.raises(NotImplementedError): + mdp.create_dataset(config=config) + + def test_danra_example(): fp_config = Path(__file__).parent.parent / "example.danra.yaml" with tempfile.TemporaryDirectory(suffix=".zarr") as tmpdir: @@ -307,6 +398,12 @@ def test_optional_extra_section(extra_content): path=datasets["static"], dims=["x", "y"], variables=testdata.DEFAULT_STATIC_VARS, + projections=dict( + danra_projection=dict( + dims=["x", "y"], + crs_wkt=testdata.DANRA_CRS_WKT, + ) + ), dim_mapping=dict( grid_index=dict( method="stack",