diff --git a/CHANGELOG.md b/CHANGELOG.md index 550bda2c6..13c0bbb0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,11 @@ updated: ### Added -- `climada.engine.impact.Impact` objects have new methods `from_hdf5` and `write_hdf5` for reading their data from, and writing it to, H5 files [#606](https://github.com/CLIMADA-project/climada_python/pull/606). +- `climada.hazard.Hazard.from_xarray_raster(_file)` class methods for reading `Hazard` objects from an `xarray.Dataset`, or from a file that can be read by `xarray`. +[#507](https://github.com/CLIMADA-project/climada_python/pull/507), +[#589](https://github.com/CLIMADA-project/climada_python/pull/589), +[#652](https://github.com/CLIMADA-project/climada_python/pull/652). +- `climada.engine.impact.Impact` objects have new methods `from_hdf5` and `write_hdf5` for reading their data from, and writing it to, H5 files [#606](https://github.com/CLIMADA-project/climada_python/pull/606) - `climada.engine.impact.Impact` objects has a new class method `concat` for concatenation of impacts based on the same exposures [#529](https://github.com/CLIMADA-project/climada_python/pull/529). - `climada.engine.impact_calc`: this module was separated from `climada.engine.impact` and contains the code that dealing with impact _calculation_ while the latter focuses on impact _data_ [#560](https://github.com/CLIMADA-project/climada_python/pull/560). - The classes `Hazard`, `Impact` and `ImpactFreqCurve` have a novel attribute `frequency_unit`. Before it was implicitly set to annual, now it can be specified and accordingly displayed in plots. diff --git a/climada/hazard/base.py b/climada/hazard/base.py index 22b2e4ac9..52990a230 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -410,9 +410,52 @@ def set_vector(self, *args, **kwargs): self.__dict__ = Hazard.from_vector(*args, **kwargs).__dict__ @classmethod - def from_raster_xarray( + def from_xarray_raster_file( + cls, filepath: Union[pathlib.Path, str], *args, **kwargs + ): + """Read raster-like data from a file that can be loaded with xarray + + This wraps :py:meth:`~Hazard.from_xarray_raster` by first opening the target file + as xarray dataset and then passing it to that classmethod. Use this wrapper as a + simple alternative to opening the file yourself. The signature is exactly the + same, except for the first argument, which is replaced by a file path here. + + Additional (keyword) arguments are passed to + :py:meth:`~Hazard.from_xarray_raster`. + + Parameters + ---------- + filepath : Path or str + Path of the file to read with xarray. May be any file type supported by + xarray. See https://docs.xarray.dev/en/stable/user-guide/io.html + + Returns + ------- + hazard : climada.Hazard + A hazard object created from the input data + + Examples + -------- + + >>> hazard = Hazard.from_xarray_raster_file("path/to/file.nc", "", "") + + Notes + ----- + + If you have specific requirements for opening a data file, prefer opening it + yourself and using :py:meth:`~Hazard.from_xarray_raster`, following this pattern: + + >>> open_kwargs = dict(engine="h5netcdf", chunks=dict(x=-1, y="auto")) + >>> with xarray.open_dataset("path/to/file.nc", **open_kwargs) as dset: + ... hazard = Hazard.from_xarray_raster(dset, "", "") + """ + with xr.open_dataset(filepath, chunks="auto") as dset: + return cls.from_xarray_raster(dset, *args, **kwargs) + + @classmethod + def from_xarray_raster( cls, - data: Union[xr.Dataset, str, pathlib.Path], + data: xr.Dataset, hazard_type: str, intensity_unit: str, *, @@ -422,7 +465,7 @@ def from_raster_xarray( crs: str = DEF_CRS, rechunk: bool = False, ): - """Read raster-like data from an xarray Dataset or a raster data file + """Read raster-like data from an xarray Dataset This method reads data that can be interpreted using three coordinates for event, latitude, and longitude. The data and the coordinates themselves may be organized @@ -443,12 +486,13 @@ def from_raster_xarray( meaning that the object can be used in all CLIMADA operations without throwing an error due to missing data or faulty data types. + Use :py:meth:`~Hazard.from_xarray_raster_file` to open a file on disk + and load the resulting dataset with this method in one step. + Parameters ---------- - data : xarray.Dataset or str - The data to read from. May be an opened dataset or a path to a raster data - file, in which case the file is opened first. Works with any file format - supported by ``xarray``. + data : xarray.Dataset + The dataset to read from. hazard_type : str The type identifier of the hazard. Will be stored directly in the hazard object. @@ -499,6 +543,11 @@ def from_raster_xarray( hazard : climada.Hazard A hazard object created from the input data + See Also + -------- + :py:meth:`~Hazard.from_xarray_raster_file` + Use this method if you want CLIMADA to open and read a file on disk for you. + Notes ----- * Single-valued coordinates given by ``coordinate_vars``, that are not proper @@ -534,7 +583,7 @@ def from_raster_xarray( ... longitude=[0, 1, 2], ... ), ... ) - >>> hazard = Hazard.from_raster_xarray(dset, "", "") + >>> hazard = Hazard.from_xarray_raster(dset, "", "") For non-default coordinate names, use the ``coordinate_vars`` argument. @@ -551,7 +600,7 @@ def from_raster_xarray( ... longitude=[0, 1, 2], ... ), ... ) - >>> hazard = Hazard.from_raster_xarray( + >>> hazard = Hazard.from_xarray_raster( ... dset, "", "", coordinate_vars=dict(event="day", latitude="lat") ... ) @@ -568,7 +617,7 @@ def from_raster_xarray( ... latitude=(["y", "x"], [[0.0, 0.0, 0.0], [0.1, 0.1, 0.1]]), ... ), ... ) - >>> hazard = Hazard.from_raster_xarray(dset, "", "") + >>> hazard = Hazard.from_xarray_raster(dset, "", "") Optional data is read from the dataset if the default keys are found. Users can specify custom variables in the data, or that the default keys should be ignored, @@ -593,7 +642,7 @@ def from_raster_xarray( ... longitude=[0, 1, 2], ... ), ... ) - >>> hazard = Hazard.from_raster_xarray( + >>> hazard = Hazard.from_xarray_raster( ... dset, ... "", ... "", @@ -627,7 +676,7 @@ def from_raster_xarray( ... longitude=[0, 1, 2], ... ), ... ) - >>> hazard = Hazard.from_raster_xarray(dset, "", "") # Same as first example + >>> hazard = Hazard.from_xarray_raster(dset, "", "") # Same as first example If one coordinate is missing altogehter, you must add it or expand the dimensions before loading the dataset: @@ -645,14 +694,15 @@ def from_raster_xarray( ... ), ... ) >>> dset = dset.expand_dims(time=[numpy.datetime64("2000-01-01")]) - >>> hazard = Hazard.from_raster_xarray(dset, "", "") + >>> hazard = Hazard.from_xarray_raster(dset, "", "") """ - # If the data is a string, open the respective file + # Check data type for better error message if not isinstance(data, xr.Dataset): - LOGGER.info("Loading Hazard from file: %s", data) - data: xr.Dataset = xr.open_dataset(data, chunks="auto") - else: - LOGGER.info("Loading Hazard from xarray Dataset") + if isinstance(data, (pathlib.Path, str)): + raise TypeError("Passing a path to this classmethod is not supported. " + "Use Hazard.from_xarray_raster_file instead.") + + raise TypeError("This method only supports xarray.Dataset as input data") # Initialize Hazard object hazard_kwargs = dict(haz_type=hazard_type, units=intensity_unit) diff --git a/climada/hazard/test/test_base_xarray.py b/climada/hazard/test/test_base_xarray.py index f2299e82f..5f6147da1 100644 --- a/climada/hazard/test/test_base_xarray.py +++ b/climada/hazard/test/test_base_xarray.py @@ -23,6 +23,7 @@ from unittest.mock import patch import datetime as dt from pathlib import Path +from tempfile import TemporaryDirectory import numpy as np from scipy.sparse import csr_matrix @@ -37,24 +38,27 @@ class TestReadDefaultNetCDF(unittest.TestCase): """Test reading a NetCDF file where the coordinates to read match the dimensions""" - def setUp(self): + @classmethod + def setUpClass(cls): """Write a simple NetCDF file to read""" - self.netcdf_path = Path.cwd() / "default.nc" - self.intensity = np.array([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]) - self.time = np.array([dt.datetime(1999, 1, 1), dt.datetime(2000, 1, 1)]) - self.latitude = np.array([0, 1]) - self.longitude = np.array([0, 1, 2]) + cls.tempdir = TemporaryDirectory() + cls.netcdf_path = Path(cls.tempdir.name) / "default.nc" + cls.intensity = np.array([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]) + cls.time = np.array([dt.datetime(1999, 1, 1), dt.datetime(2000, 1, 1)]) + cls.latitude = np.array([0, 1]) + cls.longitude = np.array([0, 1, 2]) dset = xr.Dataset( { - "intensity": (["time", "latitude", "longitude"], self.intensity), + "intensity": (["time", "latitude", "longitude"], cls.intensity), }, - dict(time=self.time, latitude=self.latitude, longitude=self.longitude), + dict(time=cls.time, latitude=cls.latitude, longitude=cls.longitude), ) - dset.to_netcdf(self.netcdf_path) + dset.to_netcdf(cls.netcdf_path) - def tearDown(self): + @classmethod + def tearDownClass(cls): """Delete the NetCDF file""" - self.netcdf_path.unlink() + cls.tempdir.cleanup() def _assert_default(self, hazard): """Assertions for the default hazard to be loaded""" @@ -105,32 +109,53 @@ def _assert_default_types(self, hazard): def test_load_path(self): """Load the data with path as argument""" - hazard = Hazard.from_raster_xarray(self.netcdf_path, "", "") + hazard = Hazard.from_xarray_raster_file(self.netcdf_path, "", "") self._assert_default(hazard) # Check wrong paths with self.assertRaises(FileNotFoundError) as cm: - Hazard.from_raster_xarray("file-does-not-exist.nc", "", "") + Hazard.from_xarray_raster_file("file-does-not-exist.nc", "", "") self.assertIn("file-does-not-exist.nc", str(cm.exception)) with self.assertRaises(KeyError) as cm: - Hazard.from_raster_xarray( + Hazard.from_xarray_raster_file( self.netcdf_path, "", "", intensity="wrong-intensity-path" ) self.assertIn("wrong-intensity-path", str(cm.exception)) def test_load_dataset(self): """Load the data from an opened dataset as argument""" + def _load_and_assert(chunks): - dataset = xr.open_dataset(self.netcdf_path, chunks=chunks) - hazard = Hazard.from_raster_xarray(dataset, "", "") - self._assert_default(hazard) + with xr.open_dataset(self.netcdf_path, chunks=chunks) as dataset: + hazard = Hazard.from_xarray_raster(dataset, "", "") + self._assert_default(hazard) _load_and_assert(chunks=None) _load_and_assert(chunks=dict(latitude=1, longitude=1, time=1)) + def test_type_error(self): + """Calling 'from_xarray_raster' with wrong data type should throw""" + # Passing a path + with self.assertRaises(TypeError) as cm: + Hazard.from_xarray_raster(self.netcdf_path, "", "") + self.assertIn( + "Use Hazard.from_xarray_raster_file instead", + str(cm.exception), + ) + + # Passing a DataArray + with xr.open_dataset(self.netcdf_path) as dset, self.assertRaises( + TypeError + ) as cm: + Hazard.from_xarray_raster(dset["intensity"], "", "") + self.assertIn( + "This method only supports xarray.Dataset as input data", + str(cm.exception), + ) + def test_type_and_unit(self): """Test passing a custom type and unit""" - hazard = Hazard.from_raster_xarray( + hazard = Hazard.from_xarray_raster_file( self.netcdf_path, hazard_type="TC", intensity_unit="m/s" ) self._assert_default_types(hazard) @@ -139,143 +164,148 @@ def test_type_and_unit(self): def test_data_vars(self): """Check handling of data variables""" - dataset = xr.open_dataset(self.netcdf_path) - size = dataset.sizes["time"] - - # Set optionals in the dataset - frequency = np.ones(size) * 1.5 - event_id = np.array(range(size), dtype=np.int64) + 3 - event_name = ["bla"] * size - date = np.array(range(size)) + 100 - dataset["event_id"] = event_id - dataset["event_name"] = event_name - dataset["date"] = date - - # Assign a proper coordinate for a change - dataset = dataset.assign_coords(dict(frequency=("time", frequency))) - - # Assign fraction - frac = xr.DataArray( - np.array([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]]]), - dims=["time", "latitude", "longitude"], - coords=dict( - time=self.time, latitude=self.latitude, longitude=self.longitude - ), - ) - dataset["fraction"] = frac + with xr.open_dataset(self.netcdf_path) as dataset: + size = dataset.sizes["time"] + + # Set optionals in the dataset + frequency = np.ones(size) * 1.5 + event_id = np.array(range(size), dtype=np.int64) + 3 + event_name = ["bla"] * size + date = np.array(range(size)) + 100 + dataset["event_id"] = event_id + dataset["event_name"] = event_name + dataset["date"] = date + + # Assign a proper coordinate for a change + dataset = dataset.assign_coords(dict(frequency=("time", frequency))) + + # Assign fraction + frac = xr.DataArray( + np.array([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]]]), + dims=["time", "latitude", "longitude"], + coords=dict( + time=self.time, latitude=self.latitude, longitude=self.longitude + ), + ) + dataset["fraction"] = frac + + # Optionals should be read automatically + hazard = Hazard.from_xarray_raster(dataset, "", "") + self._assert_default_types(hazard) + np.testing.assert_array_equal(hazard.frequency, frequency) + np.testing.assert_array_equal(hazard.event_id, event_id) + np.testing.assert_array_equal(hazard.event_name, event_name) + np.testing.assert_array_equal(hazard.date, date) + np.testing.assert_array_equal( + hazard.fraction.toarray(), [[0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1]] + ) - # Optionals should be read automatically - hazard = Hazard.from_raster_xarray(dataset, "", "") - self._assert_default_types(hazard) - np.testing.assert_array_equal(hazard.frequency, frequency) - np.testing.assert_array_equal(hazard.event_id, event_id) - np.testing.assert_array_equal(hazard.event_name, event_name) - np.testing.assert_array_equal(hazard.date, date) - np.testing.assert_array_equal( - hazard.fraction.toarray(), [[0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1]] - ) + # Ignore keys (should be default values) + hazard = Hazard.from_xarray_raster( + dataset, + "", + "", + data_vars=dict( + frequency="", event_id="", event_name="", date="", fraction="" + ), + ) + self._assert_default(hazard) - # Ignore keys (should be default values) - hazard = Hazard.from_raster_xarray( - dataset, - "", - "", - data_vars=dict( - frequency="", event_id="", event_name="", date="", fraction="" - ), - ) - self._assert_default(hazard) + # Wrong key + with self.assertRaises(ValueError) as cm: + Hazard.from_xarray_raster( + dataset, "", "", data_vars=dict(wrong_key="stuff") + ) + self.assertIn( + "Unknown data variables passed: '['wrong_key']'.", str(cm.exception) + ) - # Wrong key - with self.assertRaises(ValueError) as cm: - Hazard.from_raster_xarray( - dataset, "", "", data_vars=dict(wrong_key="stuff") + # Non-existent identifier + with self.assertRaises(KeyError) as cm: + Hazard.from_xarray_raster( + dataset, "", "", data_vars=dict(frequency="freqqqqq") + ) + self.assertIn("freqqqqq", str(cm.exception)) + + # Wrong data length + # NOTE: This also implicitly checks that 'frequency' is not read! + dataset["freq"] = np.array(range(size + 1), dtype=np.float64) + with self.assertRaises(RuntimeError) as cm: + Hazard.from_xarray_raster( + dataset, "", "", data_vars=dict(frequency="freq") + ) + self.assertIn( + f"'freq' must have shape ({size},), but shape is ({size + 1},)", + str(cm.exception), ) - self.assertIn( - "Unknown data variables passed: '['wrong_key']'.", str(cm.exception) - ) - # Non-existent identifier - with self.assertRaises(KeyError) as cm: - Hazard.from_raster_xarray( - dataset, "", "", data_vars=dict(frequency="freqqqqq") + # Integer data assertions + dset = dataset.copy(deep=True) + dset["event_id"] = np.array(range(size), dtype=np.float64) + 3.5 + with self.assertRaises(TypeError) as cm: + Hazard.from_xarray_raster(dset, "", "") + self.assertIn("'event_id' data array must be integers", str(cm.exception)) + dset["event_id"] = np.linspace(0, 10, size, dtype=np.int64) + with self.assertRaises(ValueError) as cm: + Hazard.from_xarray_raster(dset, "", "") + self.assertIn("'event_id' data must be larger than zero", str(cm.exception)) + + # Date as datetime + date_str = [f"2000-01-{i:02}" for i in range(1, size + 1)] + dataset["date"] = date_str + hazard = Hazard.from_xarray_raster(dataset, "", "") + np.testing.assert_array_equal( + hazard.date, + [dt.datetime(2000, 1, i).toordinal() for i in range(1, size + 1)], ) - self.assertIn("freqqqqq", str(cm.exception)) - # Wrong data length - # NOTE: This also implicitly checks that 'frequency' is not read! - dataset["freq"] = np.array(range(size + 1), dtype=np.float64) - with self.assertRaises(RuntimeError) as cm: - Hazard.from_raster_xarray(dataset, "", "", data_vars=dict(frequency="freq")) - self.assertIn( - f"'freq' must have shape ({size},), but shape is ({size + 1},)", - str(cm.exception), - ) + def test_data_vars_repeat(self): + """Test if suitable data vars are repeated as expected""" + with xr.open_dataset(self.netcdf_path) as dataset: + size = dataset.sizes["time"] - # Integer data assertions - dset = dataset.copy(deep=True) - dset["event_id"] = np.array(range(size), dtype=np.float64) + 3.5 - with self.assertRaises(TypeError) as cm: - Hazard.from_raster_xarray(dset, "", "") - self.assertIn("'event_id' data array must be integers", str(cm.exception)) - dset["event_id"] = np.linspace(0, 10, size, dtype=np.int64) - with self.assertRaises(ValueError) as cm: - Hazard.from_raster_xarray(dset, "", "") - self.assertIn("'event_id' data must be larger than zero", str(cm.exception)) + # Set optionals in the dataset + frequency = [1.5] + event_name = ["bla"] + date = 1 + dataset["event_name"] = event_name + dataset["date"] = date + dataset["frequency"] = frequency - # Date as datetime - date_str = [f"2000-01-{i:02}" for i in range(1, size + 1)] - dataset["date"] = date_str - hazard = Hazard.from_raster_xarray(dataset, "", "") - np.testing.assert_array_equal( - hazard.date, - [dt.datetime(2000, 1, i).toordinal() for i in range(1, size + 1)], - ) + # Check if single-valued arrays are repeated + hazard = Hazard.from_xarray_raster(dataset, "", "") - def test_data_vars_repeat(self): - """Test if suitable data vars are repeated as expected""" - dataset = xr.open_dataset(self.netcdf_path) - size = dataset.sizes["time"] - - # Set optionals in the dataset - frequency = [1.5] - event_name = ["bla"] - date = 1 - dataset["event_name"] = event_name - dataset["date"] = date - dataset["frequency"] = frequency - - # Check if single-valued arrays are repeated - hazard = Hazard.from_raster_xarray(dataset, "", "") np.testing.assert_array_equal(hazard.date, [date] * size) np.testing.assert_array_equal(hazard.event_name, event_name * size) np.testing.assert_array_equal(hazard.frequency, frequency * size) def test_nan(self): """Check handling of NaNs in original data""" - dataset = xr.open_dataset(self.netcdf_path) - intensity = xr.DataArray( - np.array([[[0, np.nan, 2], [3, 4, 5]], [[6, np.nan, 8], [9, 10, 11]]]), - dims=["time", "latitude", "longitude"], - coords=dict( - time=self.time, latitude=self.latitude, longitude=self.longitude - ), - ) - dataset["intensity"] = intensity - fraction = xr.DataArray( - np.array([[[0, 0, 0], [0, 0, 0]], [[1, np.nan, 1], [np.nan, 1, 1]]]), - dims=["time", "latitude", "longitude"], - coords=dict( - time=self.time, latitude=self.latitude, longitude=self.longitude - ), - ) - dataset["fraction"] = fraction - frequency = np.ones(dataset.sizes["time"]) - frequency[0] = np.nan - dataset["frequency"] = frequency + with xr.open_dataset(self.netcdf_path) as dataset: + intensity = xr.DataArray( + np.array([[[0, np.nan, 2], [3, 4, 5]], [[6, np.nan, 8], [9, 10, 11]]]), + dims=["time", "latitude", "longitude"], + coords=dict( + time=self.time, latitude=self.latitude, longitude=self.longitude + ), + ) + dataset["intensity"] = intensity + fraction = xr.DataArray( + np.array([[[0, 0, 0], [0, 0, 0]], [[1, np.nan, 1], [np.nan, 1, 1]]]), + dims=["time", "latitude", "longitude"], + coords=dict( + time=self.time, latitude=self.latitude, longitude=self.longitude + ), + ) + dataset["fraction"] = fraction + frequency = np.ones(dataset.sizes["time"]) + frequency[0] = np.nan + dataset["frequency"] = frequency + + # Load hazard + hazard = Hazard.from_xarray_raster(dataset, "", "") - # Load hazard - hazard = Hazard.from_raster_xarray(dataset, "", "") + # Check types self._assert_default_types(hazard) # NaNs are set to zero in sparse data @@ -296,7 +326,9 @@ def test_crs(self): def test_crs_from_input(crs_input): crs = CRS.from_user_input(crs_input) - hazard = Hazard.from_raster_xarray(self.netcdf_path, "", "", crs=crs_input) + hazard = Hazard.from_xarray_raster_file( + self.netcdf_path, "", "", crs=crs_input + ) self.assertEqual(hazard.centroids.geometry.crs, crs) test_crs_from_input("EPSG:3857") @@ -306,74 +338,86 @@ def test_crs_from_input(crs_input): def test_missing_dims(self): """Test if missing coordinates are expanded and correct errors are thrown""" # Drop time as dimension, but not as coordinate! - ds = xr.open_dataset(self.netcdf_path).isel(time=0).squeeze() - hazard = Hazard.from_raster_xarray(ds, "", "") - self._assert_default_types(hazard) - np.testing.assert_array_equal(hazard.event_name, [np.datetime64(self.time[0])]) - np.testing.assert_array_equal(hazard.date, [self.time[0].toordinal()]) - np.testing.assert_array_equal(hazard.centroids.lat, [0, 0, 0, 1, 1, 1]) - np.testing.assert_array_equal(hazard.centroids.lon, [0, 1, 2, 0, 1, 2]) - self.assertEqual(hazard.centroids.geometry.crs, DEF_CRS) - np.testing.assert_array_equal(hazard.intensity.toarray(), [[0, 1, 2, 3, 4, 5]]) - self.assertEqual(hazard.fraction.nnz, 0) - np.testing.assert_array_equal(hazard.fraction.toarray(), [[0, 0, 0, 0, 0, 0]]) + with xr.open_dataset(self.netcdf_path) as ds: + ds = ds.isel(time=0).squeeze() + hazard = Hazard.from_xarray_raster(ds, "", "") + self._assert_default_types(hazard) + np.testing.assert_array_equal( + hazard.event_name, [np.datetime64(self.time[0])] + ) + np.testing.assert_array_equal(hazard.date, [self.time[0].toordinal()]) + np.testing.assert_array_equal(hazard.centroids.lat, [0, 0, 0, 1, 1, 1]) + np.testing.assert_array_equal(hazard.centroids.lon, [0, 1, 2, 0, 1, 2]) + self.assertEqual(hazard.centroids.geometry.crs, DEF_CRS) + np.testing.assert_array_equal( + hazard.intensity.toarray(), [[0, 1, 2, 3, 4, 5]] + ) + self.assertEqual(hazard.fraction.nnz, 0) + np.testing.assert_array_equal( + hazard.fraction.toarray(), [[0, 0, 0, 0, 0, 0]] + ) - # Now drop variable altogether, should raise an error - ds = ds.drop_vars("time") - with self.assertRaises(RuntimeError) as cm: - Hazard.from_raster_xarray(ds, "", "") - self.assertIn( - "Dataset is missing dimension/coordinate: time", str(cm.exception) - ) + # Now drop variable altogether, should raise an error + ds = ds.drop_vars("time") + with self.assertRaises(RuntimeError) as cm: + Hazard.from_xarray_raster(ds, "", "") + self.assertIn( + "Dataset is missing dimension/coordinate: time", str(cm.exception) + ) - # Expand time again - ds = ds.expand_dims(time=[np.datetime64("2022-01-01")]) - hazard = Hazard.from_raster_xarray(ds, "", "") - self._assert_default_types(hazard) - np.testing.assert_array_equal(hazard.event_name, [np.datetime64("2022-01-01")]) - np.testing.assert_array_equal( - hazard.date, [dt.datetime(2022, 1, 1).toordinal()] - ) + # Expand time again + ds = ds.expand_dims(time=[np.datetime64("2022-01-01")]) + hazard = Hazard.from_xarray_raster(ds, "", "") + self._assert_default_types(hazard) + np.testing.assert_array_equal( + hazard.event_name, [np.datetime64("2022-01-01")] + ) + np.testing.assert_array_equal( + hazard.date, [dt.datetime(2022, 1, 1).toordinal()] + ) class TestReadDimsCoordsNetCDF(unittest.TestCase): """Checks for dimensions and coordinates with different names and shapes""" - def setUp(self): + @classmethod + def setUpClass(cls): """Write a NetCDF file with many coordinates""" - self.netcdf_path = Path.cwd() / "coords.nc" - self.intensity = np.array([[[0, 1, 2], [3, 4, 5]]]) - self.fraction = np.array([[[0, 0, 0], [1, 1, 1]]]) - self.time = np.array([dt.datetime(2000, 1, 1)]) - self.x = np.array([0, 1, 2]) - self.y = np.array([0, 1]) - self.lon = np.array([1, 2, 3]) - self.lat = np.array([1, 2]) - self.years = np.array([dt.datetime(1999, 2, 2)]) - self.longitude = np.array([[10, 11, 12], [10, 11, 12]]) - self.latitude = np.array([[100, 100, 100], [200, 200, 200]]) + cls.tempdir = TemporaryDirectory() + cls.netcdf_path = Path(cls.tempdir.name) / "coords.nc" + cls.intensity = np.array([[[0, 1, 2], [3, 4, 5]]]) + cls.fraction = np.array([[[0, 0, 0], [1, 1, 1]]]) + cls.time = np.array([dt.datetime(2000, 1, 1)]) + cls.x = np.array([0, 1, 2]) + cls.y = np.array([0, 1]) + cls.lon = np.array([1, 2, 3]) + cls.lat = np.array([1, 2]) + cls.years = np.array([dt.datetime(1999, 2, 2)]) + cls.longitude = np.array([[10, 11, 12], [10, 11, 12]]) + cls.latitude = np.array([[100, 100, 100], [200, 200, 200]]) dset = xr.Dataset( { - "intensity": (["time", "y", "x"], self.intensity), - "fraction": (["time", "y", "x"], self.fraction), + "intensity": (["time", "y", "x"], cls.intensity), + "fraction": (["time", "y", "x"], cls.fraction), }, { - "time": self.time, - "x": self.x, - "y": self.y, - "lon": (["x"], self.lon), - "lat": (["y"], self.lat), - "years": (["time"], self.years), - "latitude": (["y", "x"], self.latitude), - "longitude": (["y", "x"], self.longitude), + "time": cls.time, + "x": cls.x, + "y": cls.y, + "lon": (["x"], cls.lon), + "lat": (["y"], cls.lat), + "years": (["time"], cls.years), + "latitude": (["y", "x"], cls.latitude), + "longitude": (["y", "x"], cls.longitude), }, ) - dset.to_netcdf(self.netcdf_path) + dset.to_netcdf(cls.netcdf_path) - def tearDown(self): + @classmethod + def tearDownClass(cls): """Delete the NetCDF file""" - self.netcdf_path.unlink() + cls.tempdir.cleanup() def _assert_intensity_fraction(self, hazard): """Check if intensity and fraction data are read correctly""" @@ -382,7 +426,7 @@ def _assert_intensity_fraction(self, hazard): def test_dimension_naming(self): """Test if dimensions with different names can be read""" - hazard = Hazard.from_raster_xarray( + hazard = Hazard.from_xarray_raster_file( self.netcdf_path, "", "", @@ -397,7 +441,7 @@ def test_dimension_naming(self): def test_coordinate_naming(self): """Test if coordinates with different names than dimensions can be read""" - hazard = Hazard.from_raster_xarray( + hazard = Hazard.from_xarray_raster_file( self.netcdf_path, "", "", @@ -412,7 +456,7 @@ def test_coordinate_naming(self): def test_2D_coordinates(self): """Test if read method correctly handles 2D coordinates""" - hazard = Hazard.from_raster_xarray( + hazard = Hazard.from_xarray_raster_file( self.netcdf_path, "", "", @@ -426,14 +470,15 @@ def test_2D_coordinates(self): def test_load_dataset_rechunk(self): """Load the data from an opened dataset and force rechunking""" - dataset = xr.open_dataset(self.netcdf_path) - hazard = Hazard.from_raster_xarray( - dataset, - "", - "", - coordinate_vars=dict(latitude="latitude", longitude="longitude"), - rechunk=True, - ) + with xr.open_dataset(self.netcdf_path) as dataset: + hazard = Hazard.from_xarray_raster( + dataset, + "", + "", + coordinate_vars=dict(latitude="latitude", longitude="longitude"), + rechunk=True, + ) + np.testing.assert_array_equal( hazard.centroids.lat, [100, 100, 100, 200, 200, 200] ) @@ -442,26 +487,30 @@ def test_load_dataset_rechunk(self): # Assert that .chunk is called the right way with patch("xarray.Dataset.chunk") as mock: - mock.return_value = dataset - dataset = xr.open_dataset(self.netcdf_path) - Hazard.from_raster_xarray( - dataset, - "", - "", - coordinate_vars=dict(latitude="latitude", longitude="longitude"), - rechunk=True, - ) + with xr.open_dataset(self.netcdf_path) as dataset: + mock.return_value = dataset + Hazard.from_xarray_raster( + dataset, + "", + "", + coordinate_vars=dict(latitude="latitude", longitude="longitude"), + rechunk=True, + ) + # First latitude dim, then longitude dim, then event dim mock.assert_called_once_with(chunks=dict(y=-1, x=-1, time="auto")) # Should not be called by default mock.reset_mock() - Hazard.from_raster_xarray( - dataset, - "", - "", - coordinate_vars=dict(latitude="latitude", longitude="longitude"), - ) + with xr.open_dataset(self.netcdf_path) as dataset: + mock.return_value = dataset + Hazard.from_xarray_raster( + dataset, + "", + "", + coordinate_vars=dict(latitude="latitude", longitude="longitude"), + ) + mock.assert_not_called() def test_2D_time(self): @@ -491,7 +540,7 @@ def test_2D_time(self): "time": (["year", "month"], time), }, ) - hazard = Hazard.from_raster_xarray(ds, "", "") + hazard = Hazard.from_xarray_raster(ds, "", "") np.testing.assert_array_equal(hazard.intensity.toarray(), [[1], [2], [3], [4]]) np.testing.assert_array_equal( @@ -507,7 +556,7 @@ def test_errors(self): """Check if expected errors are thrown""" # Wrong coordinate key with self.assertRaises(ValueError) as cm: - Hazard.from_raster_xarray( + Hazard.from_xarray_raster_file( self.netcdf_path, "", "", @@ -517,7 +566,7 @@ def test_errors(self): # Correctly specified, but the custom dimension does not exist with self.assertRaises(RuntimeError) as cm: - Hazard.from_raster_xarray( + Hazard.from_xarray_raster_file( self.netcdf_path, "", "",