diff --git a/nctotdb/__init__.py b/nctotdb/__init__.py index ae93c1b..99d1eb9 100644 --- a/nctotdb/__init__.py +++ b/nctotdb/__init__.py @@ -1,3 +1,4 @@ from .data_model import NCDataModel +from .grid_mappings import GridMapping, store_grid_mapping from .readers import TDBReader, ZarrReader from .writers import MultiAttrTDBWriter, TDBWriter, ZarrWriter \ No newline at end of file diff --git a/nctotdb/grid_mappings.py b/nctotdb/grid_mappings.py new file mode 100644 index 0000000..5937a49 --- /dev/null +++ b/nctotdb/grid_mappings.py @@ -0,0 +1,146 @@ +import json + +import iris.coord_systems + + +def store_grid_mapping(grid_mapping_variable): + """Store the metadata for a NetCDF grid mapping variable as a JSON string.""" + ncattrs = grid_mapping_variable.ncattrs() + return json.dumps({k: grid_mapping_variable.getncattr(k) for k in ncattrs}) + + +# Taken from https://github.com/SciTools/iris/blob/master/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb. +class GridMapping(object): + """ + Convert a NetCDF grid mapping variable (expressed as a JSON string) into an + Iris coordinate system. + + """ + # Grid mapping names. + GRID_MAPPING_ALBERS = 'albers_conical_equal_area' + GRID_MAPPING_AZIMUTHAL = 'azimuthal_equidistant' + GRID_MAPPING_LAMBERT_AZIMUTHAL = 'lambert_azimuthal_equal_area' + GRID_MAPPING_LAMBERT_CONFORMAL = 'lambert_conformal_conic' + GRID_MAPPING_LAMBERT_CYLINDRICAL = 'lambert_cylindrical_equal_area' + GRID_MAPPING_LAT_LON = 'latitude_longitude' + GRID_MAPPING_MERCATOR = 'mercator' + GRID_MAPPING_ORTHO = 'orthographic' + GRID_MAPPING_POLAR = 'polar_stereographic' + GRID_MAPPING_ROTATED_LAT_LON = 'rotated_latitude_longitude' + GRID_MAPPING_STEREO = 'stereographic' + GRID_MAPPING_TRANSVERSE = 'transverse_mercator' + GRID_MAPPING_VERTICAL = 'vertical_perspective' + + # Earth grid attributes. + SEMI_MAJOR_AXIS = 'semi_major_axis' + SEMI_MINOR_AXIS = 'semi_minor_axis' + INVERSE_FLATTENING = 'inverse_flattening' + EARTH_RADIUS = 'earth_radius' + LON_OF_PROJ_ORIGIN = 'longitude_of_projection_origin' + NORTH_POLE_LAT = 'grid_north_pole_latitude' + NORTH_POLE_LON = 'grid_north_pole_longitude' + NORTH_POLE_GRID_LON = 'north_pole_grid_longitude' + + def __init__(self, grid_mapping_string): + self.grid_mapping_string = grid_mapping_string + self.grid_mapping = json.loads(self.grid_mapping_string) + + def _get_ellipsoid(self): + """Return the ellipsoid definition.""" + major = self.grid_mapping.get(self.SEMI_MAJOR_AXIS, None) + minor = self.grid_mapping.get(self.SEMI_MINOR_AXIS, None) + inverse_flattening = self.grid_mapping.get(self.INVERSE_FLATTENING, None) + + if major is not None and minor is not None: + inverse_flattening = None + if major is None and minor is None and inverse_flattening is None: + major = self.grid_mapping.get(self.EARTH_RADIUS, None) + + return major, minor, inverse_flattening + + def build_coordinate_system(self): + """Create a coordinate system from the grid mapping variable.""" + major, minor, inverse_flattening = self._get_ellipsoid() + return iris.coord_systems.GeogCS(major, minor, inverse_flattening) + + def build_rotated_coordinate_system(self): + """Create a rotated coordinate system from the grid mapping variable.""" + major, minor, inverse_flattening = self._get_ellipsoid() + + north_pole_latitude = self.grid_mapping.get(self.NORTH_POLE_LAT, 90.0) + north_pole_longitude = self.grid_mapping.get(self.NORTH_POLE_LON, 0.0) + north_pole_grid_lon = self.grid_mapping.get(self.NORTH_POLE_GRID_LON, 0.0) + + ellipsoid = None + if major is not None or minor is not None or inverse_flattening is not None: + ellipsoid = iris.coord_systems.GeogCS(major, minor, inverse_flattening) + + return iris.coord_systems.RotatedGeogCS(north_pole_latitude, north_pole_longitude, + north_pole_grid_lon, ellipsoid) + + def build_mercator_coordinate_system(self): + """Create a Mercator coordinate system from the grid mapping variable.""" + major, minor, inverse_flattening = self._get_ellipsoid() + longitude_of_projection_origin = self.grid_mapping.get(self.LON_OF_PROJ_ORIGIN, None) + + ellipsoid = None + if major is not None or minor is not None or inverse_flattening is not None: + ellipsoid = iris.coord_systems.GeogCS(major, minor, inverse_flattening) + + return iris.coord_systems.Mercator(longitude_of_projection_origin, ellipsoid=ellipsoid) + + def get_grid_mapping(self): + """ + Determine the type of grid mapping variable we have and call the + appropriate converter. + + Note that we do not support translation of all grid mapping variables. + Only the following are supported: + * latitude_longitude + * mercator + * rotated_latitude_longitude + + """ + grid_mapping_name = self.grid_mapping.get('grid_mapping_name').lower() + + if grid_mapping_name == self.GRID_MAPPING_LAT_LON: + result = self.build_coordinate_system() + elif grid_mapping_name == self.GRID_MAPPING_ROTATED_LAT_LON: + result = self.build_rotated_coordinate_system() + elif grid_mapping_name == self.GRID_MAPPING_MERCATOR: + result = self.build_mercator_coordinate_system() + elif grid_mapping_name == self.GRID_MAPPING_ALBERS: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_AZIMUTHAL: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_LAMBERT_AZIMUTHAL: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_LAMBERT_CONFORMAL: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_LAMBERT_CYLINDRICAL: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_ORTHO: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_POLAR: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_STEREO: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_TRANSVERSE: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + elif grid_mapping_name == self.GRID_MAPPING_VERTICAL: + # Grid mapping type not handled yet. + raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.') + else: + # Not a valid grid mapping name. + raise ValueError(f'{grid_mapping_name!r} is not a valid grid mapping name.') + + return result diff --git a/nctotdb/readers.py b/nctotdb/readers.py index f146e51..a1a7f93 100644 --- a/nctotdb/readers.py +++ b/nctotdb/readers.py @@ -1,5 +1,6 @@ from itertools import chain import os +import warnings import cf_units import dask.array as da @@ -10,6 +11,8 @@ import xarray as xr import zarr +from .grid_mappings import GridMapping + # Ref Iris: https://github.com/SciTools/iris/blob/master/lib/iris/_cube_coord_common.py#L75 IRIS_FORBIDDEN_KEYS = set([ @@ -70,6 +73,12 @@ class Reader(object): TODO replace all os usages with tiledb ls'. """ + horizontal_coord_names = ['latitude', + 'longitude', + 'grid_latitude', + 'grid_longitude', + 'projection_x_coordinate', + 'projection_y_coordinate'] def __init__(self, input_filepath): self.input_filepath = input_filepath @@ -303,11 +312,11 @@ def _from_tdb_array(self, array_path, naming_key, array_name=None, to_dask=False points = A[array_inds][array_name] return metadata, points - def _load_dim(self, dim_path): + def _load_dim(self, dim_path, grid_mapping): """ Create an Iris DimCoord from a TileDB array describing a dimension. - # TODO not handled here: circular, coord_system. + # TODO not handled here: circular. """ metadata, points = self._from_tdb_array(dim_path, 'coord') @@ -317,6 +326,13 @@ def _load_dim(self, dim_path): long_name = metadata.pop('long_name', None) var_name = metadata.pop('var_name', None) + # Check if we've a known horizontal coord name in order to write the + # grid mapping as it's coord system. + if standard_name in self.horizontal_coord_names: + coord_system = grid_mapping + else: + coord_system = None + units = metadata.pop('units') if coord_name == 'time': # Handle special-case complicated time coords. @@ -328,18 +344,19 @@ def _load_dim(self, dim_path): standard_name=standard_name, long_name=long_name, units=units, - attributes=safe_attrs) + attributes=safe_attrs, + coord_system=coord_system) return coord_name, coord - def _load_group_dims(self, group_dim_paths): + def _load_group_dims(self, group_dim_paths, grid_mapping): """Load all dimension-describing (coordinate) arrays in the group.""" group_dims = {} for dim_path in group_dim_paths: - name, coord = self._load_dim(dim_path) + name, coord = self._load_dim(dim_path, grid_mapping) group_dims[name] = coord return group_dims - def _load_data(self, array_path, group_dims, + def _load_data(self, array_path, group_dims, grid_mapping, attr_name=None, separator='__'): """ Create an Iris cube from a TileDB array describing a data variable and @@ -375,14 +392,14 @@ def _load_data(self, array_path, group_dims, # Dim Coords And Dims (mapping of coords to cube axes). dcad = [(group_dims[name], i) for i, name in enumerate(dim_names)] safe_attrs = self._handle_attributes(metadata, - exclude_keys=['dataset', 'multiattr']) + exclude_keys=['dataset', 'multiattr', 'grid_mapping']) std_name = metadata.pop('standard_name', None) long_name = metadata.pop('long_name', None) var_name = metadata.pop('var_name', None) if all(itm is None for itm in [std_name, long_name, var_name]): long_name = attr_name - return Cube(lazy_data, + cube = Cube(lazy_data, standard_name=std_name, long_name=long_name, var_name=var_name, @@ -390,16 +407,18 @@ def _load_data(self, array_path, group_dims, dim_coords_and_dims=dcad, cell_methods=cell_methods, attributes=safe_attrs) + cube.coord_system = grid_mapping + return cube - def _load_group_arrays(self, data_paths, group_dims): + def _load_group_arrays(self, data_paths, group_dims, grid_mapping): """Load all data-describing (cube) arrays in the group.""" cubes = [] for data_path in data_paths: - cube = self._load_data(data_path, group_dims) + cube = self._load_data(data_path, group_dims, grid_mapping) cubes.append(cube) return cubes - def _load_multiattr_arrays(self, data_paths, group_dims, attr_names=None): + def _load_multiattr_arrays(self, data_paths, group_dims, grid_mapping, attr_names=None): """Load all data-describing (cube) attrs from a multi-attr array.""" if isinstance(attr_names, str): attr_names = [attr_names] @@ -410,10 +429,36 @@ def _load_multiattr_arrays(self, data_paths, group_dims, attr_names=None): with tiledb.open(data_path, 'r') as A: attr_names = A.meta['dataset'].split(',') for attr_name in attr_names: - cube = self._load_data(data_path, group_dims, attr_name=attr_name) + cube = self._load_data(data_path, group_dims, grid_mapping, + attr_name=attr_name) cubes.append(cube) return cubes + def _get_grid_mapping(self, data_array_path): + """ + Get the grid mapping (Iris coord_system) from the data array metadata. + Grid mapping is stored as a JSON string in the array meta, + which is translated by `.grid_mappings.GridMapping`. + + """ + grid_mapping = None + with tiledb.open(data_array_path, 'r') as A: + try: + grid_mapping_str = A.meta['grid_mapping'] + except KeyError: + grid_mapping_str = None + print(grid_mapping_str) + if grid_mapping_str is not None and grid_mapping_str != 'none': + # Cannot write NoneType into TileDB array meta, so `'none'` is a + # stand-in that must be caught. + translator = GridMapping(grid_mapping_str) + try: + grid_mapping = translator.get_grid_mapping() + except Exception as e: + exception_type = e.__class__.__name__ + warnings.warn(f'Re-raised as warning: {exception_type}: {e}.\nGrid mapping will be None.') + return grid_mapping + def _get_arrays_and_dims(self, group_array_paths): """ Sort the contents of a TileDB group into data arrays and dimension arrays @@ -459,12 +504,15 @@ def to_iris(self, name=None): cubes = [] for group_path, group_array_paths in iter_groups.items(): dim_paths, data_paths = self._get_arrays_and_dims(group_array_paths) - group_coords = self._load_group_dims(dim_paths) + grid_mapping = self._get_grid_mapping(data_paths[0]) + group_coords = self._load_group_dims(dim_paths, grid_mapping) if self.data_array_name is not None: - group_cubes = self._load_multiattr_arrays(data_paths, group_coords, + group_cubes = self._load_multiattr_arrays(data_paths, + group_coords, + grid_mapping, attr_names=name) else: - group_cubes = self._load_group_arrays(data_paths, group_coords) + group_cubes = self._load_group_arrays(data_paths, group_coords, grid_mapping) cubes.extend(group_cubes) self.artifact = cubes[0] if len(cubes) == 1 else CubeList(cubes) diff --git a/nctotdb/writers.py b/nctotdb/writers.py index 3436c1b..759bd9c 100644 --- a/nctotdb/writers.py +++ b/nctotdb/writers.py @@ -8,6 +8,7 @@ import zarr from .data_model import NCDataModel +from .grid_mappings import store_grid_mapping class Writer(object): @@ -182,6 +183,21 @@ def _array_indices(self, shape, start_index): """Set the array indices to write the array data into.""" return _array_indices(shape, start_index) + def _get_grid_mapping(self, data_var): + """ + Get the data variable's grid mapping variable, encode it as a JSON string + for easy storage in the TileDB array's meta and return it for storage as + array metadata. + + """ + grid_mapping_name = data_var.getncattr("grid_mapping") + result = 'none' # TileDB probably won't support `NoneType` in array meta. + if grid_mapping_name is not None: + assert grid_mapping_name in self.data_model.grid_mapping + grid_mapping_var = self.data_model.variables[grid_mapping_name] + result = store_grid_mapping(grid_mapping_var) + return result + def populate_array(self, var_name, data_var, group_dirname, start_index=None, write_meta=True): """Write the contents of a netcdf data variable into a tiledb array.""" @@ -203,6 +219,9 @@ def populate_array(self, var_name, data_var, group_dirname, A.meta['dataset'] = var_name # Define this as not being a multi-attr array. A.meta['multiattr'] = False + # Add grid mapping metadata as a JSON string. + grid_mapping_string = self._get_grid_mapping(data_var) + A.meta['grid_mapping'] = grid_mapping_string # XXX: can't add list or tuple as values to metadata dictionary... dim_coord_names = self.data_model.variables[var_name].dimensions A.meta['dimensions'] = ','.join(n for n in dim_coord_names) @@ -419,6 +438,9 @@ def populate_multiattr_array(self, data_array_name, data_var_names, group_dirnam A.meta['dataset'] = ','.join(data_var_names) # Define this as being a multi-attr array. A.meta['multiattr'] = True + # Add grid mapping metadata as a JSON string. + grid_mapping_string = self._get_grid_mapping(data_vars[0]) + A.meta['grid_mapping'] = grid_mapping_string # XXX: can't add list or tuple as values to metadata dictionary... dim_coord_names = self.data_model.variables[data_var_names[0]].dimensions A.meta['dimensions'] = ','.join(n for n in dim_coord_names)