Skip to content

Commit

Permalink
Pass grid mapping metadata (#24)
Browse files Browse the repository at this point in the history
* Add grid mapping translation

* Add grid mapping metadata to tiledb array meta

* Add to init

* Add grid_mapping on TileDB array read to Iris

* Usage fixes
  • Loading branch information
DPeterK authored Mar 5, 2020
1 parent f953112 commit 660fe9d
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 15 deletions.
1 change: 1 addition & 0 deletions nctotdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .data_model import NCDataModel
from .grid_mappings import GridMapping, store_grid_mapping
from .readers import TDBReader, ZarrReader
from .writers import MultiAttrTDBWriter, TDBWriter, ZarrWriter
146 changes: 146 additions & 0 deletions nctotdb/grid_mappings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import json

import iris.coord_systems


def store_grid_mapping(grid_mapping_variable):
"""Store the metadata for a NetCDF grid mapping variable as a JSON string."""
ncattrs = grid_mapping_variable.ncattrs()
return json.dumps({k: grid_mapping_variable.getncattr(k) for k in ncattrs})


# Taken from https://github.com/SciTools/iris/blob/master/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb.
class GridMapping(object):
"""
Convert a NetCDF grid mapping variable (expressed as a JSON string) into an
Iris coordinate system.
"""
# Grid mapping names.
GRID_MAPPING_ALBERS = 'albers_conical_equal_area'
GRID_MAPPING_AZIMUTHAL = 'azimuthal_equidistant'
GRID_MAPPING_LAMBERT_AZIMUTHAL = 'lambert_azimuthal_equal_area'
GRID_MAPPING_LAMBERT_CONFORMAL = 'lambert_conformal_conic'
GRID_MAPPING_LAMBERT_CYLINDRICAL = 'lambert_cylindrical_equal_area'
GRID_MAPPING_LAT_LON = 'latitude_longitude'
GRID_MAPPING_MERCATOR = 'mercator'
GRID_MAPPING_ORTHO = 'orthographic'
GRID_MAPPING_POLAR = 'polar_stereographic'
GRID_MAPPING_ROTATED_LAT_LON = 'rotated_latitude_longitude'
GRID_MAPPING_STEREO = 'stereographic'
GRID_MAPPING_TRANSVERSE = 'transverse_mercator'
GRID_MAPPING_VERTICAL = 'vertical_perspective'

# Earth grid attributes.
SEMI_MAJOR_AXIS = 'semi_major_axis'
SEMI_MINOR_AXIS = 'semi_minor_axis'
INVERSE_FLATTENING = 'inverse_flattening'
EARTH_RADIUS = 'earth_radius'
LON_OF_PROJ_ORIGIN = 'longitude_of_projection_origin'
NORTH_POLE_LAT = 'grid_north_pole_latitude'
NORTH_POLE_LON = 'grid_north_pole_longitude'
NORTH_POLE_GRID_LON = 'north_pole_grid_longitude'

def __init__(self, grid_mapping_string):
self.grid_mapping_string = grid_mapping_string
self.grid_mapping = json.loads(self.grid_mapping_string)

def _get_ellipsoid(self):
"""Return the ellipsoid definition."""
major = self.grid_mapping.get(self.SEMI_MAJOR_AXIS, None)
minor = self.grid_mapping.get(self.SEMI_MINOR_AXIS, None)
inverse_flattening = self.grid_mapping.get(self.INVERSE_FLATTENING, None)

if major is not None and minor is not None:
inverse_flattening = None
if major is None and minor is None and inverse_flattening is None:
major = self.grid_mapping.get(self.EARTH_RADIUS, None)

return major, minor, inverse_flattening

def build_coordinate_system(self):
"""Create a coordinate system from the grid mapping variable."""
major, minor, inverse_flattening = self._get_ellipsoid()
return iris.coord_systems.GeogCS(major, minor, inverse_flattening)

def build_rotated_coordinate_system(self):
"""Create a rotated coordinate system from the grid mapping variable."""
major, minor, inverse_flattening = self._get_ellipsoid()

north_pole_latitude = self.grid_mapping.get(self.NORTH_POLE_LAT, 90.0)
north_pole_longitude = self.grid_mapping.get(self.NORTH_POLE_LON, 0.0)
north_pole_grid_lon = self.grid_mapping.get(self.NORTH_POLE_GRID_LON, 0.0)

ellipsoid = None
if major is not None or minor is not None or inverse_flattening is not None:
ellipsoid = iris.coord_systems.GeogCS(major, minor, inverse_flattening)

return iris.coord_systems.RotatedGeogCS(north_pole_latitude, north_pole_longitude,
north_pole_grid_lon, ellipsoid)

def build_mercator_coordinate_system(self):
"""Create a Mercator coordinate system from the grid mapping variable."""
major, minor, inverse_flattening = self._get_ellipsoid()
longitude_of_projection_origin = self.grid_mapping.get(self.LON_OF_PROJ_ORIGIN, None)

ellipsoid = None
if major is not None or minor is not None or inverse_flattening is not None:
ellipsoid = iris.coord_systems.GeogCS(major, minor, inverse_flattening)

return iris.coord_systems.Mercator(longitude_of_projection_origin, ellipsoid=ellipsoid)

def get_grid_mapping(self):
"""
Determine the type of grid mapping variable we have and call the
appropriate converter.
Note that we do not support translation of all grid mapping variables.
Only the following are supported:
* latitude_longitude
* mercator
* rotated_latitude_longitude
"""
grid_mapping_name = self.grid_mapping.get('grid_mapping_name').lower()

if grid_mapping_name == self.GRID_MAPPING_LAT_LON:
result = self.build_coordinate_system()
elif grid_mapping_name == self.GRID_MAPPING_ROTATED_LAT_LON:
result = self.build_rotated_coordinate_system()
elif grid_mapping_name == self.GRID_MAPPING_MERCATOR:
result = self.build_mercator_coordinate_system()
elif grid_mapping_name == self.GRID_MAPPING_ALBERS:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_AZIMUTHAL:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_LAMBERT_AZIMUTHAL:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_LAMBERT_CONFORMAL:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_LAMBERT_CYLINDRICAL:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_ORTHO:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_POLAR:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_STEREO:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_TRANSVERSE:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
elif grid_mapping_name == self.GRID_MAPPING_VERTICAL:
# Grid mapping type not handled yet.
raise NotImplementedError(f'Grid mapping name {grid_mapping_name} is not currently supported.')
else:
# Not a valid grid mapping name.
raise ValueError(f'{grid_mapping_name!r} is not a valid grid mapping name.')

return result
78 changes: 63 additions & 15 deletions nctotdb/readers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from itertools import chain
import os
import warnings

import cf_units
import dask.array as da
Expand All @@ -10,6 +11,8 @@
import xarray as xr
import zarr

from .grid_mappings import GridMapping


# Ref Iris: https://github.com/SciTools/iris/blob/master/lib/iris/_cube_coord_common.py#L75
IRIS_FORBIDDEN_KEYS = set([
Expand Down Expand Up @@ -70,6 +73,12 @@ class Reader(object):
TODO replace all os usages with tiledb ls'.
"""
horizontal_coord_names = ['latitude',
'longitude',
'grid_latitude',
'grid_longitude',
'projection_x_coordinate',
'projection_y_coordinate']
def __init__(self, input_filepath):
self.input_filepath = input_filepath

Expand Down Expand Up @@ -303,11 +312,11 @@ def _from_tdb_array(self, array_path, naming_key, array_name=None, to_dask=False
points = A[array_inds][array_name]
return metadata, points

def _load_dim(self, dim_path):
def _load_dim(self, dim_path, grid_mapping):
"""
Create an Iris DimCoord from a TileDB array describing a dimension.
# TODO not handled here: circular, coord_system.
# TODO not handled here: circular.
"""
metadata, points = self._from_tdb_array(dim_path, 'coord')
Expand All @@ -317,6 +326,13 @@ def _load_dim(self, dim_path):
long_name = metadata.pop('long_name', None)
var_name = metadata.pop('var_name', None)

# Check if we've a known horizontal coord name in order to write the
# grid mapping as it's coord system.
if standard_name in self.horizontal_coord_names:
coord_system = grid_mapping
else:
coord_system = None

units = metadata.pop('units')
if coord_name == 'time':
# Handle special-case complicated time coords.
Expand All @@ -328,18 +344,19 @@ def _load_dim(self, dim_path):
standard_name=standard_name,
long_name=long_name,
units=units,
attributes=safe_attrs)
attributes=safe_attrs,
coord_system=coord_system)
return coord_name, coord

def _load_group_dims(self, group_dim_paths):
def _load_group_dims(self, group_dim_paths, grid_mapping):
"""Load all dimension-describing (coordinate) arrays in the group."""
group_dims = {}
for dim_path in group_dim_paths:
name, coord = self._load_dim(dim_path)
name, coord = self._load_dim(dim_path, grid_mapping)
group_dims[name] = coord
return group_dims

def _load_data(self, array_path, group_dims,
def _load_data(self, array_path, group_dims, grid_mapping,
attr_name=None, separator='__'):
"""
Create an Iris cube from a TileDB array describing a data variable and
Expand Down Expand Up @@ -375,31 +392,33 @@ def _load_data(self, array_path, group_dims,
# Dim Coords And Dims (mapping of coords to cube axes).
dcad = [(group_dims[name], i) for i, name in enumerate(dim_names)]
safe_attrs = self._handle_attributes(metadata,
exclude_keys=['dataset', 'multiattr'])
exclude_keys=['dataset', 'multiattr', 'grid_mapping'])
std_name = metadata.pop('standard_name', None)
long_name = metadata.pop('long_name', None)
var_name = metadata.pop('var_name', None)
if all(itm is None for itm in [std_name, long_name, var_name]):
long_name = attr_name

return Cube(lazy_data,
cube = Cube(lazy_data,
standard_name=std_name,
long_name=long_name,
var_name=var_name,
units=metadata.pop('units', '1'),
dim_coords_and_dims=dcad,
cell_methods=cell_methods,
attributes=safe_attrs)
cube.coord_system = grid_mapping
return cube

def _load_group_arrays(self, data_paths, group_dims):
def _load_group_arrays(self, data_paths, group_dims, grid_mapping):
"""Load all data-describing (cube) arrays in the group."""
cubes = []
for data_path in data_paths:
cube = self._load_data(data_path, group_dims)
cube = self._load_data(data_path, group_dims, grid_mapping)
cubes.append(cube)
return cubes

def _load_multiattr_arrays(self, data_paths, group_dims, attr_names=None):
def _load_multiattr_arrays(self, data_paths, group_dims, grid_mapping, attr_names=None):
"""Load all data-describing (cube) attrs from a multi-attr array."""
if isinstance(attr_names, str):
attr_names = [attr_names]
Expand All @@ -410,10 +429,36 @@ def _load_multiattr_arrays(self, data_paths, group_dims, attr_names=None):
with tiledb.open(data_path, 'r') as A:
attr_names = A.meta['dataset'].split(',')
for attr_name in attr_names:
cube = self._load_data(data_path, group_dims, attr_name=attr_name)
cube = self._load_data(data_path, group_dims, grid_mapping,
attr_name=attr_name)
cubes.append(cube)
return cubes

def _get_grid_mapping(self, data_array_path):
"""
Get the grid mapping (Iris coord_system) from the data array metadata.
Grid mapping is stored as a JSON string in the array meta,
which is translated by `.grid_mappings.GridMapping`.
"""
grid_mapping = None
with tiledb.open(data_array_path, 'r') as A:
try:
grid_mapping_str = A.meta['grid_mapping']
except KeyError:
grid_mapping_str = None
print(grid_mapping_str)
if grid_mapping_str is not None and grid_mapping_str != 'none':
# Cannot write NoneType into TileDB array meta, so `'none'` is a
# stand-in that must be caught.
translator = GridMapping(grid_mapping_str)
try:
grid_mapping = translator.get_grid_mapping()
except Exception as e:
exception_type = e.__class__.__name__
warnings.warn(f'Re-raised as warning: {exception_type}: {e}.\nGrid mapping will be None.')
return grid_mapping

def _get_arrays_and_dims(self, group_array_paths):
"""
Sort the contents of a TileDB group into data arrays and dimension arrays
Expand Down Expand Up @@ -459,12 +504,15 @@ def to_iris(self, name=None):
cubes = []
for group_path, group_array_paths in iter_groups.items():
dim_paths, data_paths = self._get_arrays_and_dims(group_array_paths)
group_coords = self._load_group_dims(dim_paths)
grid_mapping = self._get_grid_mapping(data_paths[0])
group_coords = self._load_group_dims(dim_paths, grid_mapping)
if self.data_array_name is not None:
group_cubes = self._load_multiattr_arrays(data_paths, group_coords,
group_cubes = self._load_multiattr_arrays(data_paths,
group_coords,
grid_mapping,
attr_names=name)
else:
group_cubes = self._load_group_arrays(data_paths, group_coords)
group_cubes = self._load_group_arrays(data_paths, group_coords, grid_mapping)
cubes.extend(group_cubes)

self.artifact = cubes[0] if len(cubes) == 1 else CubeList(cubes)
Expand Down
22 changes: 22 additions & 0 deletions nctotdb/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import zarr

from .data_model import NCDataModel
from .grid_mappings import store_grid_mapping


class Writer(object):
Expand Down Expand Up @@ -182,6 +183,21 @@ def _array_indices(self, shape, start_index):
"""Set the array indices to write the array data into."""
return _array_indices(shape, start_index)

def _get_grid_mapping(self, data_var):
"""
Get the data variable's grid mapping variable, encode it as a JSON string
for easy storage in the TileDB array's meta and return it for storage as
array metadata.
"""
grid_mapping_name = data_var.getncattr("grid_mapping")
result = 'none' # TileDB probably won't support `NoneType` in array meta.
if grid_mapping_name is not None:
assert grid_mapping_name in self.data_model.grid_mapping
grid_mapping_var = self.data_model.variables[grid_mapping_name]
result = store_grid_mapping(grid_mapping_var)
return result

def populate_array(self, var_name, data_var, group_dirname,
start_index=None, write_meta=True):
"""Write the contents of a netcdf data variable into a tiledb array."""
Expand All @@ -203,6 +219,9 @@ def populate_array(self, var_name, data_var, group_dirname,
A.meta['dataset'] = var_name
# Define this as not being a multi-attr array.
A.meta['multiattr'] = False
# Add grid mapping metadata as a JSON string.
grid_mapping_string = self._get_grid_mapping(data_var)
A.meta['grid_mapping'] = grid_mapping_string
# XXX: can't add list or tuple as values to metadata dictionary...
dim_coord_names = self.data_model.variables[var_name].dimensions
A.meta['dimensions'] = ','.join(n for n in dim_coord_names)
Expand Down Expand Up @@ -419,6 +438,9 @@ def populate_multiattr_array(self, data_array_name, data_var_names, group_dirnam
A.meta['dataset'] = ','.join(data_var_names)
# Define this as being a multi-attr array.
A.meta['multiattr'] = True
# Add grid mapping metadata as a JSON string.
grid_mapping_string = self._get_grid_mapping(data_vars[0])
A.meta['grid_mapping'] = grid_mapping_string
# XXX: can't add list or tuple as values to metadata dictionary...
dim_coord_names = self.data_model.variables[data_var_names[0]].dimensions
A.meta['dimensions'] = ','.join(n for n in dim_coord_names)
Expand Down

0 comments on commit 660fe9d

Please sign in to comment.