Skip to content

Commit

Permalink
Change .geobox to look for CRS via CF convention first
Browse files Browse the repository at this point in the history
Order of resolution is something like:

- x.coords[x.grid_mapping].spatial_ref
- x.coords[<looks like crs coord>].spatial_ref
- x.{x,y,latitude,longitude}.attrs['crs']
- x.attrs['crs']
  • Loading branch information
Kirill888 committed Apr 2, 2020
1 parent 597dc72 commit 6f94f22
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 14 deletions.
98 changes: 84 additions & 14 deletions datacube/utils/xarray_geoextensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,17 @@ def _norm_crs(crs):
raise ValueError('Can not interpret {} as CRS'.format(type(crs)))


def _get_crs(obj):
if not isinstance(obj, xarray.Dataset) and not isinstance(obj, xarray.DataArray):
raise ValueError('Can not get crs from {}'.format(type(obj)))

def _get_crs_from_attrs(obj):
""" Looks for attribute named `crs` containing CRS string
1. Checks spatials coords attrs
2. Checks data variable attrs
3. Checks dataset attrs
Returns
=======
Content for `.attrs[crs]` usually it's a string
None if not present in any of the places listed above
"""
if isinstance(obj, xarray.Dataset):
if len(obj.data_vars) > 0:
data_array = next(iter(obj.data_vars.values()))
Expand All @@ -39,22 +46,69 @@ def _get_crs(obj):
data_array = obj

sdims = spatial_dims(data_array, relaxed=True)
if sdims is None:
return None

crs_set = set(data_array[d].attrs.get('crs', None) for d in sdims)
crs = None
if len(crs_set) > 1:
raise ValueError('Spatial dimensions have different crs.')
elif len(crs_set) == 1:
crs = crs_set.pop()
if sdims is not None:
crs_set = set(data_array[d].attrs.get('crs', None) for d in sdims)
crs = None
if len(crs_set) > 1:
raise ValueError('Spatial dimensions have different crs.')
elif len(crs_set) == 1:
crs = crs_set.pop()
else:
crs = None

if crs is None:
# fall back option
crs = data_array.attrs.get('crs', None) or obj.attrs.get('crs', None)
return crs


def _get_crs_from_coord(obj, mode='strict'):
""" Looks for dimensionless coordinate with `spatial_ref` attribute.
obj: Dataset | DataArray
mode: strict|any|all
strict -- raise Error if multiple candidates
any -- return first one
all -- return a list of all found CRSs
Returns
=======
None - if none found
crs:str - if found one
crs:str - if found several but mode is any
(crs: str, crs: str) - if found several and mode=all
"""
grid_mapping = obj.attrs.get('grid_mapping', None)

# First check CF convention "pointer"
if grid_mapping is not None and grid_mapping in obj.coords:
coord = obj.coords[grid_mapping]
spatial_ref = coord.attrs.get('spatial_ref', None)
if spatial_ref is not None:
return spatial_ref
else:
raise ValueError(f"Coordinate '{grid_mapping}' has no `spatial_ref` attribute")

# No explicit `grid_mapping` find some "CRS" coordinate
candidates = tuple(coord.attrs['spatial_ref'] for coord in obj.coords.values()
if coord.ndim == 0 and 'spatial_ref' in coord.attrs)

if len(candidates) == 0:
return None
if len(candidates) == 1:
return candidates[0]

if mode == 'strict':
raise ValueError("Too many candidates when looking for CRS")
elif mode == 'all':
return candidates
elif mode == 'any':
return candidates[0]
else:
raise ValueError(f"Mode needs to be: strict|any|all got {mode}")


def _xarray_affine(obj):
sdims = spatial_dims(obj, relaxed=True)
if sdims is None:
Expand All @@ -72,10 +126,26 @@ def _xarray_extent(obj):


def _xarray_geobox(obj):
crs = _norm_crs(_get_crs(obj))
crs = None
try:
crs = _get_crs_from_coord(obj)
except ValueError:
pass

if crs is None:
try:
crs = _get_crs_from_attrs(obj)
except ValueError:
pass

if crs is None:
return None

try:
crs = _norm_crs(crs)
except ValueError:
return None

dims = crs.dimensions
return geometry.GeoBox(obj[dims[1]].size, obj[dims[0]].size, obj.affine, crs)

Expand Down
83 changes: 83 additions & 0 deletions tests/test_xarray_extension.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import pytest
import xarray as xr
from datacube.testutils.geom import epsg4326, epsg3857
from datacube.testutils import mk_sample_xr_dataset, remove_crs
from datacube.utils.xarray_geoextensions import (
_norm_crs,
_xarray_affine,
_xarray_geobox,
_xarray_extent,
_get_crs_from_attrs,
_get_crs_from_coord,
)


Expand Down Expand Up @@ -49,6 +52,7 @@ def test_xr_geobox():

assert ds.band[:, :2, :2].affine*(0, 0) == xy
assert ds.band[:, :2, :2].affine*(1, 1) == tuple(a+b for a, b in zip(xy, rxy))
assert ds.band.isel(time=0, x=0).affine is None

xx = ds.band + 1000
assert xx.geobox is not None
Expand All @@ -59,3 +63,82 @@ def test_xr_geobox():

assert mk_sample_xr_dataset(crs=None).geobox is None
assert mk_sample_xr_dataset(crs=None).affine is not None


def test_xr_geobox_unhappy():
xx = mk_sample_xr_dataset(crs=None)

# test that exceptions from get_crs_from_{coord,attrs} are caught
xx.band.attrs.update(grid_mapping='x') # force exception in coord
xx.x.attrs.update(crs='EPSG:4326') # force exception in attr
xx.y.attrs.update(crs='EPSG:3857')
assert xx.band.geobox is None

# test _norm_crs exception is caught
xx = mk_sample_xr_dataset(crs=None)
xx.attrs['crs'] = ['this will not parse']
assert xx.geobox is None


def test_crs_from_coord():
xx_none = mk_sample_xr_dataset(crs=None)
xx_3857 = mk_sample_xr_dataset(crs=epsg3857)
xx_4326 = mk_sample_xr_dataset(crs=epsg4326)
cc_4326 = xx_4326.geobox.xr_coords(with_crs='epsg_4326')['epsg_4326']
cc_3857 = xx_3857.geobox.xr_coords(with_crs='epsg_3857')['epsg_3857']

assert _get_crs_from_coord(xx_none.band) is None
assert _get_crs_from_coord(xx_none) is None
assert _get_crs_from_coord(xx_3857.band) == epsg3857
assert _get_crs_from_coord(xx_3857) == epsg3857
assert _get_crs_from_coord(xx_4326.band) == epsg4326
assert _get_crs_from_coord(xx_4326) == epsg4326

xx = xx_none.band.assign_attrs(grid_mapping='x')
with pytest.raises(ValueError):
_get_crs_from_coord(xx)
xx = xx_none.band.assign_attrs(grid_mapping='no_such_coord')
assert _get_crs_from_coord(xx) is None

xx_2crs = xx_none.assign_coords(cc_4326=cc_4326, cc_3857=cc_3857)
assert xx_2crs.geobox is None

# two coords, no grid mapping, strict mode
with pytest.raises(ValueError):
_get_crs_from_coord(xx_2crs)
with pytest.raises(ValueError):
_get_crs_from_coord(xx_2crs.band)

# any should just return "first" guess, we not sure which one
crs = _get_crs_from_coord(xx_2crs, 'any')
assert epsg4326 == crs or epsg3857 == crs

# all should return a list of length 2
crss = _get_crs_from_coord(xx_2crs, 'all')
assert len(crss) == 2
assert any(crs == epsg3857 for crs in crss)
assert any(crs == epsg4326 for crs in crss)

with pytest.raises(ValueError):
_get_crs_from_coord(xx_2crs, 'no-such-mode')


def test_crs_from_attrs():
xx_none = mk_sample_xr_dataset(crs=None)
xx_3857 = mk_sample_xr_dataset(crs=epsg3857)
xx_4326 = mk_sample_xr_dataset(crs=epsg4326)

assert _get_crs_from_attrs(xx_none) is None
assert _get_crs_from_attrs(xx_none.band) is None
assert _get_crs_from_attrs(xx_3857.band) == epsg3857
assert _get_crs_from_attrs(xx_3857) == epsg3857
assert _get_crs_from_attrs(xx_4326.band) == epsg4326
assert _get_crs_from_attrs(xx_4326) == epsg4326

assert _get_crs_from_attrs(xr.Dataset()) is None

# check inconsistent CRSs
xx = xx_3857.copy()
xx.x.attrs['crs'] = xx_4326.attrs['crs']
with pytest.raises(ValueError):
_get_crs_from_attrs(xx)

0 comments on commit 6f94f22

Please sign in to comment.