From ed7c67c238c4873ef82b9e58212e9697a8d204ec Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 11 Dec 2019 03:12:14 +0000 Subject: [PATCH 1/3] Added 'crs' to the spatial dimension. 'crs' is now a string in the spatial dimensions and attributes. Tests adjusted to suit. --- datacube/api/core.py | 6 ++++-- datacube/utils/xarray_geoextensions.py | 30 ++++++++++++++++++++++++-- tests/test_utils.py | 2 ++ tests/test_utils_cog.py | 2 ++ tests/test_xarray_extension.py | 2 ++ 5 files changed, 38 insertions(+), 4 deletions(-) diff --git a/datacube/api/core.py b/datacube/api/core.py index 6e1c2f1784..97482b52c4 100644 --- a/datacube/api/core.py +++ b/datacube/api/core.py @@ -447,12 +447,14 @@ def empty_func(measurement_): for name, coord in coords.items(): result[name] = coord for name, coord in geobox.coordinates.items(): - result[name] = (name, coord.values, {'units': coord.units, 'resolution': coord.resolution}) + result[name] = (name, coord.values, {'units': coord.units, + 'resolution': coord.resolution, + 'crs': str(geobox.crs)}) for measurement in measurements: data = data_func(measurement) attrs = measurement.dataarray_attrs() - attrs['crs'] = geobox.crs + attrs['crs'] = str(geobox.crs) dims = tuple(coords.keys()) + tuple(geobox.dimensions) result[measurement.name] = (dims, data, attrs) diff --git a/datacube/utils/xarray_geoextensions.py b/datacube/utils/xarray_geoextensions.py index e2486e12b3..3f9da1e5a3 100644 --- a/datacube/utils/xarray_geoextensions.py +++ b/datacube/utils/xarray_geoextensions.py @@ -25,8 +25,34 @@ def _norm_crs(crs): raise ValueError('Can not interpret {} as CRS'.format(type(crs))) +def _get_crs(obj): + if not isinstance(obj, xarray.Dataset) and not isinstance(obj, xarray.DataArray): + raise ValueError('Can not get crs from {}'.format(type(obj))) + + if isinstance(obj, xarray.Dataset): + if len(obj.data_vars) > 0: + data_array = next(iter(obj.data_vars.values())) + else: + # fall back option + return obj.crs + else: + data_array = obj + + # Assumption: spatial dimensions are always the last 2) + spatial_dims = data_array.dims[-2:] + + if 'crs' in data_array[data_array.dims[-1]].attrs: + if data_array[spatial_dims[0]].crs != data_array[spatial_dims[1]].crs: + raise ValueError('Spatial dimensions have different crs.') + crs = data_array[data_array.dims[-1]].crs + else: + # fall back option + crs = obj.crs + return crs + + def _xarray_affine(obj): - crs = _norm_crs(obj.crs) + crs = _norm_crs(_get_crs(obj)) if crs is None: return None @@ -51,7 +77,7 @@ def _xarray_extent(obj): def _xarray_geobox(obj): - crs = _norm_crs(obj.crs) + crs = _norm_crs(_get_crs(obj)) if crs is None: return None diff --git a/tests/test_utils.py b/tests/test_utils.py index cef6c4a759..c7f05ec259 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -259,6 +259,8 @@ def test_write_geotiff_str_crs(tmpdir, odc_style_xr_dataset): assert (written_data == odc_style_xr_dataset['B10']).all() del odc_style_xr_dataset.attrs['crs'] + for dim in odc_style_xr_dataset.B10.dims: + del odc_style_xr_dataset[dim].attrs['crs'] with pytest.raises(ValueError): write_geotiff(filename, odc_style_xr_dataset) diff --git a/tests/test_utils_cog.py b/tests/test_utils_cog.py index 030c954372..4249d3a2dc 100644 --- a/tests/test_utils_cog.py +++ b/tests/test_utils_cog.py @@ -147,6 +147,8 @@ def test_cog_no_crs(tmpdir, with_dask): xx, ds = gen_test_data(pp, dask=with_dask) del xx.attrs['crs'] + for dim in xx.dims: + del xx[dim].attrs['crs'] with pytest.raises(ValueError): write_cog(xx, ":mem:") diff --git a/tests/test_xarray_extension.py b/tests/test_xarray_extension.py index 80f3a34641..893a49dbd5 100644 --- a/tests/test_xarray_extension.py +++ b/tests/test_xarray_extension.py @@ -22,6 +22,8 @@ def test_xr_extension(odc_style_xr_dataset): assert (zz0, zz1) == (0, 0) odc_style_xr_dataset.attrs['crs'] = None + for dim in odc_style_xr_dataset.B10.dims: + odc_style_xr_dataset[dim].attrs['crs'] = None assert _xarray_affine(odc_style_xr_dataset) is None assert _xarray_geobox(odc_style_xr_dataset) is None assert _xarray_extent(odc_style_xr_dataset) is None From a8000775ef8621846b353e867d929b0127afb037 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 11 Dec 2019 05:13:44 +0000 Subject: [PATCH 2/3] Address PR comments + fixed one missed str(geobox.crs). --- datacube/api/core.py | 6 +++--- datacube/drivers/netcdf/_write.py | 9 ++++++--- datacube/utils/xarray_geoextensions.py | 12 ++++++------ datacube_apps/stacker/fixer.py | 8 +++++++- datacube_apps/stacker/stacker.py | 8 +++++++- tests/storage/test_netcdfwriter.py | 2 +- tests/test_utils.py | 1 + tests/test_xarray_extension.py | 1 + 8 files changed, 32 insertions(+), 15 deletions(-) diff --git a/datacube/api/core.py b/datacube/api/core.py index 97482b52c4..60fc3bb670 100644 --- a/datacube/api/core.py +++ b/datacube/api/core.py @@ -443,18 +443,18 @@ def empty_func(measurement_): data_func = data_func or empty_func - result = xarray.Dataset(attrs={'crs': geobox.crs}) + result = xarray.Dataset(attrs={'crs': str(geobox.crs)}) for name, coord in coords.items(): result[name] = coord for name, coord in geobox.coordinates.items(): result[name] = (name, coord.values, {'units': coord.units, 'resolution': coord.resolution, - 'crs': str(geobox.crs)}) + 'crs': result.crs}) for measurement in measurements: data = data_func(measurement) attrs = measurement.dataarray_attrs() - attrs['crs'] = str(geobox.crs) + attrs['crs'] = result.crs dims = tuple(coords.keys()) + tuple(geobox.dimensions) result[measurement.name] = (dims, data, attrs) diff --git a/datacube/drivers/netcdf/_write.py b/datacube/drivers/netcdf/_write.py index e731dc72db..34330fb7c0 100644 --- a/datacube/drivers/netcdf/_write.py +++ b/datacube/drivers/netcdf/_write.py @@ -79,11 +79,14 @@ def write_dataset_to_netcdf(dataset, filename, global_attributes=None, variable_ if not dataset.data_vars.keys(): raise DatacubeException('Cannot save empty dataset to disk.') - if not hasattr(dataset, 'crs'): - raise DatacubeException('Dataset does not contain CRS, cannot write to NetCDF file.') + if dataset.geobox is None: + raise DatacubeException('Dataset geobox property is None, cannot write to NetCDF file.') + + if dataset.geobox.crs is None: + raise DatacubeException('Dataset geobox.crs property is None, cannot write to NetCDF file.') nco = create_netcdf_storage_unit(filename, - dataset.crs, + dataset.geobox.crs, dataset.coords, dataset.data_vars, variable_params, diff --git a/datacube/utils/xarray_geoextensions.py b/datacube/utils/xarray_geoextensions.py index 3f9da1e5a3..772c55705a 100644 --- a/datacube/utils/xarray_geoextensions.py +++ b/datacube/utils/xarray_geoextensions.py @@ -34,20 +34,20 @@ def _get_crs(obj): data_array = next(iter(obj.data_vars.values())) else: # fall back option - return obj.crs + return obj.attrs.get('crs', None) else: data_array = obj # Assumption: spatial dimensions are always the last 2) spatial_dims = data_array.dims[-2:] - - if 'crs' in data_array[data_array.dims[-1]].attrs: - if data_array[spatial_dims[0]].crs != data_array[spatial_dims[1]].crs: - raise ValueError('Spatial dimensions have different crs.') + crs_set = set(data_array[d].attrs.get('crs', None) for d in spatial_dims) + if len(crs_set) > 1: + raise ValueError('Spatial dimensions have different crs.') + elif len(crs_set) == 1 and None not in crs_set: crs = data_array[data_array.dims[-1]].crs else: # fall back option - crs = obj.crs + crs = data_array.attrs.get('crs', None) or obj.attrs.get('crs', None) return crs diff --git a/datacube_apps/stacker/fixer.py b/datacube_apps/stacker/fixer.py index b01619c560..99fe8116b0 100644 --- a/datacube_apps/stacker/fixer.py +++ b/datacube_apps/stacker/fixer.py @@ -181,8 +181,14 @@ def do_fixer_task(config, task): data['dataset'] = datasets_to_doc(unwrapped_datasets) try: + if data.geobox is None: + raise DatacubeException('Dataset geobox property is None, cannot write to NetCDF file.') + + if data.geobox.crs is None: + raise DatacubeException('Dataset geobox.crs property is None, cannot write to NetCDF file.') + nco = create_netcdf_storage_unit(temp_filename, - data.crs, + data.geobox.crs, data.coords, data.data_vars, variable_params, diff --git a/datacube_apps/stacker/stacker.py b/datacube_apps/stacker/stacker.py index f499e956a5..dc13f67154 100644 --- a/datacube_apps/stacker/stacker.py +++ b/datacube_apps/stacker/stacker.py @@ -158,8 +158,14 @@ def do_stack_task(config, task): data['dataset'] = datasets_to_doc(unwrapped_datasets) try: + if data.geobox is None: + raise DatacubeException('Dataset geobox property is None, cannot write to NetCDF file.') + + if data.geobox.crs is None: + raise DatacubeException('Dataset geobox.crs property is None, cannot write to NetCDF file.') + nco = create_netcdf_storage_unit(temp_filename, - data.crs, + data.geobox.crs, data.coords, data.data_vars, variable_params, diff --git a/tests/storage/test_netcdfwriter.py b/tests/storage/test_netcdfwriter.py index 1b8520922a..371a09b5d1 100644 --- a/tests/storage/test_netcdfwriter.py +++ b/tests/storage/test_netcdfwriter.py @@ -284,4 +284,4 @@ def test_useful_error_on_write_empty_dataset(tmpnetcdf_filename): with pytest.raises(DatacubeException) as excinfo: ds = xr.Dataset(data_vars={'blue': (('time',), numpy.array([0, 1, 2]))}) write_dataset_to_netcdf(ds, tmpnetcdf_filename) - assert 'CRS' in str(excinfo.value) + assert 'geobox' in str(excinfo.value) diff --git a/tests/test_utils.py b/tests/test_utils.py index c7f05ec259..424e35eb75 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -259,6 +259,7 @@ def test_write_geotiff_str_crs(tmpdir, odc_style_xr_dataset): assert (written_data == odc_style_xr_dataset['B10']).all() del odc_style_xr_dataset.attrs['crs'] + del odc_style_xr_dataset.B10.attrs['crs'] for dim in odc_style_xr_dataset.B10.dims: del odc_style_xr_dataset[dim].attrs['crs'] with pytest.raises(ValueError): diff --git a/tests/test_xarray_extension.py b/tests/test_xarray_extension.py index 893a49dbd5..63858f9650 100644 --- a/tests/test_xarray_extension.py +++ b/tests/test_xarray_extension.py @@ -22,6 +22,7 @@ def test_xr_extension(odc_style_xr_dataset): assert (zz0, zz1) == (0, 0) odc_style_xr_dataset.attrs['crs'] = None + odc_style_xr_dataset.B10.attrs['crs'] = None for dim in odc_style_xr_dataset.B10.dims: odc_style_xr_dataset[dim].attrs['crs'] = None assert _xarray_affine(odc_style_xr_dataset) is None From 18388e323f5b337474ddab3f626d8113292e9d79 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 11 Dec 2019 05:45:24 +0000 Subject: [PATCH 3/3] Address PR comment. --- datacube/utils/xarray_geoextensions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datacube/utils/xarray_geoextensions.py b/datacube/utils/xarray_geoextensions.py index 772c55705a..a3a8ec750a 100644 --- a/datacube/utils/xarray_geoextensions.py +++ b/datacube/utils/xarray_geoextensions.py @@ -41,11 +41,13 @@ def _get_crs(obj): # Assumption: spatial dimensions are always the last 2) spatial_dims = data_array.dims[-2:] crs_set = set(data_array[d].attrs.get('crs', None) for d in spatial_dims) + crs = None if len(crs_set) > 1: raise ValueError('Spatial dimensions have different crs.') - elif len(crs_set) == 1 and None not in crs_set: - crs = data_array[data_array.dims[-1]].crs - else: + elif len(crs_set) == 1: + crs = crs_set.pop() + + if crs is None: # fall back option crs = data_array.attrs.get('crs', None) or obj.attrs.get('crs', None) return crs