From 4f6952e5eeba6be154b316795cae02790b5574a9 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:10:21 +0200 Subject: [PATCH 01/14] Ensure that csr_matrices are in canonical format * Add getter/setter attributes for csr_matrices in Hazard. * Check format and sum duplicates when assigning matrices. * Update unit tests. --- climada/hazard/base.py | 46 +++++++++++++++++++++++++++----- climada/hazard/io.py | 21 ++++++++++++--- climada/hazard/test/test_base.py | 31 ++++++++++++++++----- 3 files changed, 81 insertions(+), 17 deletions(-) diff --git a/climada/hazard/base.py b/climada/hazard/base.py index daa4c3869d..def2826340 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -90,8 +90,8 @@ class Hazard(HazardIO, HazardPlot): 'centroids', 'event_id', 'frequency', - 'intensity', - 'fraction' + '_intensity', + '_fraction' } """Name of the variables needed to compute the impact. Types: scalar, str, list, 1dim np.array of size num_events, scipy.sparse matrix of shape @@ -181,16 +181,48 @@ def __init__(self, self.event_name = event_name if event_name is not None else list() self.date = date if date is not None else np.array([], int) self.orig = orig if orig is not None else np.array([], bool) - # following values are defined for each event and centroid - self.intensity = intensity if intensity is not None else sparse.csr_matrix( - np.empty((0, 0))) # events x centroids - self.fraction = fraction if fraction is not None else sparse.csr_matrix( - self.intensity.shape) # events x centroids + + # The following values are defined for each event and centroid + # Shape: ( events x centroids ) + self._intensity = intensity + self._fraction = fraction self.pool = pool if self.pool: LOGGER.info('Using %s CPUs.', self.pool.ncpus) + @property + def intensity(self) -> sparse.csr_matrix: + """Hazard intensity matrix""" + if self._intensity is None: + return sparse.csr_matrix(np.empty((0, 0))) + + return self._intensity + + @intensity.setter + def intensity(self, value: sparse.csr_matrix): + """Set intensity matrix to new value""" + self._intensity = value + self._intensity.check_format() + self._intensity.sort_indices() + self._intensity.sum_duplicates() + + @property + def fraction(self): + """Hazard fraction matrix""" + if self._fraction is None: + return sparse.csr_matrix(self.intensity.shape) + + return self._fraction + + @fraction.setter + def fraction(self, value: sparse.csr_matrix): + """Set fraction matrix to new value""" + self._fraction = value + self._fraction.check_format() + self._fraction.sort_indices() + self._fraction.sum_duplicates() + @classmethod def get_default(cls, attribute): """Get the Hazard type default for a given attribute. diff --git a/climada/hazard/io.py b/climada/hazard/io.py index d50dcfc1d9..80696dd177 100644 --- a/climada/hazard/io.py +++ b/climada/hazard/io.py @@ -1004,12 +1004,18 @@ def write_hdf5(self, file_name, todense=False): LOGGER.info('Writing %s', file_name) with h5py.File(file_name, 'w') as hf_data: str_dt = h5py.special_dtype(vlen=str) - for (var_name, var_val) in self.__dict__.items(): + for var_name in self.__dict__: if var_name == 'centroids': # Centroids have their own write_hdf5 method, # which is invoked at the end of this method (s.b.) - pass - elif isinstance(var_val, sparse.csr_matrix): + continue + # Prune private attributes + if var_name in self.vars_oblig: + var_name = var_name.lstrip("_") + + var_val = getattr(self, var_name) # Also works with properties + + if isinstance(var_val, sparse.csr_matrix): if todense: hf_data.create_dataset(var_name, data=var_val.toarray()) else: @@ -1065,11 +1071,18 @@ def from_hdf5(cls, file_name): haz = cls() hazard_kwargs = dict() with h5py.File(file_name, 'r') as hf_data: - for (var_name, var_val) in haz.__dict__.items(): + for var_name in haz.__dict__: + # Prune private attributes + if var_name in haz.vars_oblig: + var_name = var_name.lstrip("_") + if var_name not in hf_data.keys(): continue if var_name == 'centroids': continue + + var_val = getattr(haz, var_name) # Also works with properties + if isinstance(var_val, np.ndarray) and var_val.ndim == 1: hazard_kwargs[var_name] = np.array(hf_data.get(var_name)) elif isinstance(var_val, sparse.csr_matrix): diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py index 5df525fbfc..6943ef19c1 100644 --- a/climada/hazard/test/test_base.py +++ b/climada/hazard/test/test_base.py @@ -124,18 +124,18 @@ def test_check_wrongFreq_fail(self): def test_check_wrongInten_fail(self): """Wrong hazard definition""" self.hazard.intensity = sparse.csr_matrix([[1, 2], [1, 2]]) - - with self.assertRaises(ValueError) as cm: + with self.assertRaisesRegex( + ValueError, "Invalid Hazard._intensity row size: 3 != 2." + ): self.hazard.check() - self.assertIn('Invalid Hazard.intensity row size: 3 != 2.', str(cm.exception)) def test_check_wrongFrac_fail(self): """Wrong hazard definition""" self.hazard.fraction = sparse.csr_matrix([[1], [1], [1]]) - - with self.assertRaises(ValueError) as cm: + with self.assertRaisesRegex( + ValueError, "Invalid Hazard._fraction column size: 2 != 1." + ): self.hazard.check() - self.assertIn('Invalid Hazard.fraction column size: 2 != 1.', str(cm.exception)) def test_check_wrongEvName_fail(self): """Wrong hazard definition""" @@ -212,6 +212,25 @@ def test_get_date_strings_pass(self): self.assertEqual(haz.get_event_date()[560], u_dt.date_to_str(haz.date[560])) + def test_matrix_consistency(self): + """Check that the csr_matrix is brought in canonical format""" + # Non-canonical: All data points will be summed onto the first matrix entry + data = [0, 1, 2] + indices = [0, 0, 0] + indptr = [0, 3, 3, 3] + matrix = sparse.csr_matrix((data, indices, indptr), shape=(3, 2)) + self.assertEqual(matrix.data.max(), 2) + self.assertEqual(matrix[0, 0], 3) + self.assertFalse(matrix.has_canonical_format) + + # Check conversion to canonical format when assigning + for attr in ("intensity", "fraction"): + setattr(self.hazard, attr, matrix.copy()) + hazard_matrix = getattr(self.hazard, attr) + self.assertTrue(hazard_matrix.has_canonical_format) + self.assertEqual(hazard_matrix[0, 0], 3) + np.testing.assert_array_equal(hazard_matrix.data, [3]) + class TestRemoveDupl(unittest.TestCase): """Test remove_duplicates method.""" From 91c52e9b237596e4415074c739a90744e3de15e9 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:15:50 +0200 Subject: [PATCH 02/14] Ensure canonical format in init and streamline checks --- climada/hazard/base.py | 18 +++++------------- climada/hazard/test/test_base.py | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/climada/hazard/base.py b/climada/hazard/base.py index def2826340..6fb981ffff 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -181,11 +181,11 @@ def __init__(self, self.event_name = event_name if event_name is not None else list() self.date = date if date is not None else np.array([], int) self.orig = orig if orig is not None else np.array([], bool) - - # The following values are defined for each event and centroid - # Shape: ( events x centroids ) - self._intensity = intensity - self._fraction = fraction + # following values are defined for each event and centroid + self.intensity = intensity if intensity is not None else sparse.csr_matrix( + np.empty((0, 0))) # events x centroids + self.fraction = fraction if fraction is not None else sparse.csr_matrix( + self.intensity.shape) # events x centroids self.pool = pool if self.pool: @@ -194,9 +194,6 @@ def __init__(self, @property def intensity(self) -> sparse.csr_matrix: """Hazard intensity matrix""" - if self._intensity is None: - return sparse.csr_matrix(np.empty((0, 0))) - return self._intensity @intensity.setter @@ -204,15 +201,11 @@ def intensity(self, value: sparse.csr_matrix): """Set intensity matrix to new value""" self._intensity = value self._intensity.check_format() - self._intensity.sort_indices() self._intensity.sum_duplicates() @property def fraction(self): """Hazard fraction matrix""" - if self._fraction is None: - return sparse.csr_matrix(self.intensity.shape) - return self._fraction @fraction.setter @@ -220,7 +213,6 @@ def fraction(self, value: sparse.csr_matrix): """Set fraction matrix to new value""" self._fraction = value self._fraction.check_format() - self._fraction.sort_indices() self._fraction.sum_duplicates() @classmethod diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py index 6943ef19c1..acd0cd0868 100644 --- a/climada/hazard/test/test_base.py +++ b/climada/hazard/test/test_base.py @@ -223,13 +223,21 @@ def test_matrix_consistency(self): self.assertEqual(matrix[0, 0], 3) self.assertFalse(matrix.has_canonical_format) + def check_canonical_matrix(mat): + self.assertTrue(mat.has_canonical_format) + self.assertEqual(mat[0, 0], 3) + np.testing.assert_array_equal(mat.data, [3]) + + # Check canonical format when initializing + hazard_new = Hazard("TC", intensity=matrix.copy(), fraction=matrix.copy()) + matrix_attrs = ("intensity", "fraction") + for attr in matrix_attrs: + check_canonical_matrix(getattr(hazard_new, attr)) + # Check conversion to canonical format when assigning for attr in ("intensity", "fraction"): setattr(self.hazard, attr, matrix.copy()) - hazard_matrix = getattr(self.hazard, attr) - self.assertTrue(hazard_matrix.has_canonical_format) - self.assertEqual(hazard_matrix[0, 0], 3) - np.testing.assert_array_equal(hazard_matrix.data, [3]) + check_canonical_matrix(getattr(self.hazard, attr)) class TestRemoveDupl(unittest.TestCase): """Test remove_duplicates method.""" From 3e27ac8b0f1311712c5a069858c2889da5e104d9 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:04:45 +0200 Subject: [PATCH 03/14] Explicitly remove zeros from csr matrices when assigning --- climada/hazard/base.py | 4 +++- climada/hazard/test/test_base.py | 15 +++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/climada/hazard/base.py b/climada/hazard/base.py index 6fb981ffff..567cf14dde 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -201,10 +201,11 @@ def intensity(self, value: sparse.csr_matrix): """Set intensity matrix to new value""" self._intensity = value self._intensity.check_format() + self._intensity.eliminate_zeros() self._intensity.sum_duplicates() @property - def fraction(self): + def fraction(self) -> sparse.csr_matrix: """Hazard fraction matrix""" return self._fraction @@ -213,6 +214,7 @@ def fraction(self, value: sparse.csr_matrix): """Set fraction matrix to new value""" self._fraction = value self._fraction.check_format() + self._fraction.eliminate_zeros() self._fraction.sum_duplicates() @classmethod diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py index acd0cd0868..6aa8fb1222 100644 --- a/climada/hazard/test/test_base.py +++ b/climada/hazard/test/test_base.py @@ -214,19 +214,22 @@ def test_get_date_strings_pass(self): def test_matrix_consistency(self): """Check that the csr_matrix is brought in canonical format""" - # Non-canonical: All data points will be summed onto the first matrix entry - data = [0, 1, 2] - indices = [0, 0, 0] - indptr = [0, 3, 3, 3] + # Non-canonical: First three data points will be summed onto the first matrix + # entry, forth will be an explicit zero entry + data = [0, 1, 2, 0] + indices = [0, 0, 0, 1] + indptr = [0, 4, 4, 4] matrix = sparse.csr_matrix((data, indices, indptr), shape=(3, 2)) - self.assertEqual(matrix.data.max(), 2) - self.assertEqual(matrix[0, 0], 3) + np.testing.assert_array_equal(matrix.data, data) + np.testing.assert_array_equal(matrix[0, [0, 1]].toarray(), [[3, 0]]) + self.assertEqual(matrix.nnz, 4) self.assertFalse(matrix.has_canonical_format) def check_canonical_matrix(mat): self.assertTrue(mat.has_canonical_format) self.assertEqual(mat[0, 0], 3) np.testing.assert_array_equal(mat.data, [3]) + self.assertEqual(mat.nnz, 1) # Check canonical format when initializing hazard_new = Hazard("TC", intensity=matrix.copy(), fraction=matrix.copy()) From f539dec675fece465d236fee4d3bdac15d9cd1a0 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:55:15 +0200 Subject: [PATCH 04/14] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78543cf491..3bcf18f7b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ CLIMADA tutorials. [#872](https://github.com/CLIMADA-project/climada_python/pull - Centroids complete overhaul. Most function should be backward compatible. Internal data is stored in a geodataframe attribute. Raster are now stored as points, and the meta attribute is removed. Several methds were deprecated or removed. [#787](https://github.com/CLIMADA-project/climada_python/pull/787) - Improved error messages produced by `ImpactCalc.impact()` in case impact function in the exposures is not found in impf_set [#863](https://github.com/CLIMADA-project/climada_python/pull/863) - Changed module structure: `climada.hazard.Hazard` has been split into the modules `base`, `io` and `plot` [#871](https://github.com/CLIMADA-project/climada_python/pull/871) +- Ensure `csr_matrix` stored in `climada.hazard.Hazard` have consistent data format and store no explicit zeros [#893](https://github.com/CLIMADA-project/climada_python/pull/893) ### Fixed From 4ced09c627b005afb36325fa590d9475d61f4a5d Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 17 Jun 2024 11:28:10 +0200 Subject: [PATCH 05/14] Add explicit check for updating matrices --- climada/hazard/test/test_base.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py index 6aa8fb1222..19fa746de5 100644 --- a/climada/hazard/test/test_base.py +++ b/climada/hazard/test/test_base.py @@ -212,6 +212,17 @@ def test_get_date_strings_pass(self): self.assertEqual(haz.get_event_date()[560], u_dt.date_to_str(haz.date[560])) + def test_matrix_update(self): + """Check that the csr_matrix can be updated with element access""" + self.hazard.intensity[[0, 2], 1] = 10 + np.testing.assert_array_equal( + self.hazard.intensity.toarray(), [[1, 10], [1, 2], [1, 10]] + ) + self.hazard.fraction[:, 1] = 0 + np.testing.assert_array_equal( + self.hazard.fraction.toarray(), [[1, 0], [1, 0], [1, 0]] + ) + def test_matrix_consistency(self): """Check that the csr_matrix is brought in canonical format""" # Non-canonical: First three data points will be summed onto the first matrix From de06f2ecd381f64ea4e2eaa346d3c487cb47d5ea Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 17 Jun 2024 15:40:29 +0200 Subject: [PATCH 06/14] Add util function for pruning csr_matrices Update hazard docstring --- climada/hazard/base.py | 44 +++++++++++++++++++++++++++++++++++------ climada/util/checker.py | 36 ++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/climada/hazard/base.py b/climada/hazard/base.py index 567cf14dde..d29107e1ad 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -49,6 +49,22 @@ class Hazard(HazardIO, HazardPlot): Contains events of some hazard type defined at centroids. Loads from files with format defined in FILE_EXT. + Attention + --------- + This class uses instances of + `scipy.sparse.csr_matrix + `_ + to store :py:attr:`intensity` and :py:attr:`fraction`. This data types comes with + its particular pitfalls. Depending on how the objects are instantiated and modified, + a matrix might end up in a "non-canonical" state. In this state, its ``.data`` + attribute does not necessarily represent the values apparent in the final matrix. + In particular, a "non-canonical" matrix may store "duplicates", i.e. multiple values + that map to the same matrix position. This is supported, and the default behavior is + to sum up these values. To avoid any inconsistencies, call :py:meth:`check_matrices` + once you inserted all data. + This class will call :py:func:`climada.util.checker.prune_csr_matrix` whenever a + csr_matrix is assigned to one of the aforementioned attributes. + Attributes ---------- haz_type : str @@ -200,9 +216,7 @@ def intensity(self) -> sparse.csr_matrix: def intensity(self, value: sparse.csr_matrix): """Set intensity matrix to new value""" self._intensity = value - self._intensity.check_format() - self._intensity.eliminate_zeros() - self._intensity.sum_duplicates() + u_check.prune_csr_matrix(self._intensity) @property def fraction(self) -> sparse.csr_matrix: @@ -213,9 +227,27 @@ def fraction(self) -> sparse.csr_matrix: def fraction(self, value: sparse.csr_matrix): """Set fraction matrix to new value""" self._fraction = value - self._fraction.check_format() - self._fraction.eliminate_zeros() - self._fraction.sum_duplicates() + u_check.prune_csr_matrix(self._fraction) + + def check_matrices(self): + """Ensure that matrices are consistently shaped and stored + + See Also + -------- + :py:func:`climada.util.checker.prune_csr_matrix` + + Raises + ------ + ValueError + If matrices are ill-formed or ill-shaped + """ + u_check.prune_csr_matrix(self.intensity) + u_check.prune_csr_matrix(self.fraction) + if self.intensity.shape != self.fraction.shape: + if self.fraction.nnz > 0: + raise ValueError( + "Intensity and fraction matrices must have the same shape" + ) @classmethod def get_default(cls, attribute): diff --git a/climada/util/checker.py b/climada/util/checker.py index 2bcbbef057..5d9725489b 100644 --- a/climada/util/checker.py +++ b/climada/util/checker.py @@ -23,7 +23,8 @@ 'size', 'shape', 'array_optional', - 'array_default' + 'array_default', + 'prune_csr_matrix', ] import logging @@ -162,3 +163,36 @@ def array_default(exp_len, var, var_name, def_val): else: size(exp_len, var, var_name) return res + +def prune_csr_matrix(matrix: sparse.csr_matrix): + """Ensure that the matrix is in the "canonical format". + + Depending on how the matrix was instantiated or modified, it might be in a + "non-canonical" state. This only relates to its internal storage. In this state, + multiple values might be stored for a single "apparent" value in the matrix. + Also, the matrix might store zeros explicitly, which could be removed. + Calling this function makes sure that the matrix is in the "canonical state", and + brings it into this state, if possible. + + See Also + -------- + `csr_matrix.has_canonical_format + `_ + + Parameters + ---------- + matrix : csr_matrix + The matrix to check. It will be modified *inplace*. No apparent matrix values + will change. + + Raises + ------ + ValueError + If + `csr_matrix.check_format + `_ + fails + """ + matrix.check_format() + matrix.eliminate_zeros() + matrix.sum_duplicates() From ebb2cc8f9d36067a4cdf99b7a0195719a5e09744 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 17 Jun 2024 15:40:59 +0200 Subject: [PATCH 07/14] Check matrices when instantiating ImpactCalc --- climada/engine/impact_calc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/climada/engine/impact_calc.py b/climada/engine/impact_calc.py index 315a02c8a1..71d19f0871 100644 --- a/climada/engine/impact_calc.py +++ b/climada/engine/impact_calc.py @@ -61,6 +61,8 @@ def __init__(self, self.exposures = exposures self.impfset = impfset self.hazard = hazard + self.hazard.check_matrices() + # exposures index to use for matrix reconstruction self._orig_exp_idx = np.arange(self.exposures.gdf.shape[0]) From c58b48dcb8cad21bc45cb6f9b619eaee98f3f3f5 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 21 Jun 2024 12:24:43 +0200 Subject: [PATCH 08/14] Update climada/hazard/base.py Co-authored-by: Chahan M. Kropf --- climada/hazard/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/climada/hazard/base.py b/climada/hazard/base.py index d29107e1ad..e30fccf8fd 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -61,7 +61,7 @@ class Hazard(HazardIO, HazardPlot): In particular, a "non-canonical" matrix may store "duplicates", i.e. multiple values that map to the same matrix position. This is supported, and the default behavior is to sum up these values. To avoid any inconsistencies, call :py:meth:`check_matrices` - once you inserted all data. + once you inserted all data. This will explicitly sum all values at the same matrix position and eliminate explicit zeros. This class will call :py:func:`climada.util.checker.prune_csr_matrix` whenever a csr_matrix is assigned to one of the aforementioned attributes. From e0ebdefb22280a0d24353181fdaea65f3e984e69 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 21 Jun 2024 12:25:18 +0200 Subject: [PATCH 09/14] Format docstring suggestion from review --- climada/hazard/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/climada/hazard/base.py b/climada/hazard/base.py index e30fccf8fd..391aeaefa8 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -61,9 +61,10 @@ class Hazard(HazardIO, HazardPlot): In particular, a "non-canonical" matrix may store "duplicates", i.e. multiple values that map to the same matrix position. This is supported, and the default behavior is to sum up these values. To avoid any inconsistencies, call :py:meth:`check_matrices` - once you inserted all data. This will explicitly sum all values at the same matrix position and eliminate explicit zeros. - This class will call :py:func:`climada.util.checker.prune_csr_matrix` whenever a - csr_matrix is assigned to one of the aforementioned attributes. + once you inserted all data. This will explicitly sum all values at the same matrix + position and eliminate explicit zeros. This class will call + :py:func:`climada.util.checker.prune_csr_matrix` whenever a csr_matrix is assigned + to one of the aforementioned attributes. Attributes ---------- From 71ce00e2ab57c9518499162b50e69a3d75da2fbb Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:42:54 +0200 Subject: [PATCH 10/14] Update unit tests for matrix pruning and checks --- climada/engine/test/test_impact_calc.py | 10 +++++ climada/hazard/base.py | 11 ++++-- climada/hazard/test/test_base.py | 52 ++++++++++++++++++++----- climada/util/test/test_checker.py | 22 +++++++++++ 4 files changed, 82 insertions(+), 13 deletions(-) diff --git a/climada/engine/test/test_impact_calc.py b/climada/engine/test/test_impact_calc.py index 471706988b..c4ee5e26b4 100644 --- a/climada/engine/test/test_impact_calc.py +++ b/climada/engine/test/test_impact_calc.py @@ -70,6 +70,16 @@ def test_init(self): np.testing.assert_array_equal(HAZ.event_id, icalc.hazard.event_id) np.testing.assert_array_equal(HAZ.event_name, icalc.hazard.event_name) + # Test check matrices + hazard = deepcopy(HAZ) + hazard.intensity[0, hazard.intensity.indices[0]] = 0 + hazard.fraction = sparse.csr_matrix(np.ones((1, 1))) + with self.assertRaisesRegex( + ValueError, "Intensity and fraction matrices must have the same shape" + ): + ImpactCalc(ENT.exposures, ENT.impact_funcs, hazard) + self.assertEqual(hazard.intensity.nnz, HAZ.intensity.nnz - 1) # was pruned + def test_metrics(self): """Test methods to get impact metrics""" mat = sparse.csr_matrix(np.array( diff --git a/climada/hazard/base.py b/climada/hazard/base.py index 391aeaefa8..593023ada9 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -203,6 +203,7 @@ def __init__(self, np.empty((0, 0))) # events x centroids self.fraction = fraction if fraction is not None else sparse.csr_matrix( self.intensity.shape) # events x centroids + self.check_matrices() self.pool = pool if self.pool: @@ -237,15 +238,19 @@ def check_matrices(self): -------- :py:func:`climada.util.checker.prune_csr_matrix` + Todo + ----- + * Check consistency with centroids + Raises ------ ValueError - If matrices are ill-formed or ill-shaped + If matrices are ill-formed or ill-shaped in relation to each other """ u_check.prune_csr_matrix(self.intensity) u_check.prune_csr_matrix(self.fraction) - if self.intensity.shape != self.fraction.shape: - if self.fraction.nnz > 0: + if self.fraction.nnz > 0: + if self.intensity.shape != self.fraction.shape: raise ValueError( "Intensity and fraction matrices must have the same shape" ) diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py index 19fa746de5..5ab5a83961 100644 --- a/climada/hazard/test/test_base.py +++ b/climada/hazard/test/test_base.py @@ -224,34 +224,66 @@ def test_matrix_update(self): ) def test_matrix_consistency(self): - """Check that the csr_matrix is brought in canonical format""" + """Check that the csr_matrix is brought into canonical format""" # Non-canonical: First three data points will be summed onto the first matrix # entry, forth will be an explicit zero entry data = [0, 1, 2, 0] indices = [0, 0, 0, 1] indptr = [0, 4, 4, 4] matrix = sparse.csr_matrix((data, indices, indptr), shape=(3, 2)) - np.testing.assert_array_equal(matrix.data, data) - np.testing.assert_array_equal(matrix[0, [0, 1]].toarray(), [[3, 0]]) - self.assertEqual(matrix.nnz, 4) - self.assertFalse(matrix.has_canonical_format) def check_canonical_matrix(mat): self.assertTrue(mat.has_canonical_format) - self.assertEqual(mat[0, 0], 3) - np.testing.assert_array_equal(mat.data, [3]) self.assertEqual(mat.nnz, 1) # Check canonical format when initializing hazard_new = Hazard("TC", intensity=matrix.copy(), fraction=matrix.copy()) matrix_attrs = ("intensity", "fraction") for attr in matrix_attrs: - check_canonical_matrix(getattr(hazard_new, attr)) + with self.subTest(matrix=attr): + check_canonical_matrix(getattr(hazard_new, attr)) # Check conversion to canonical format when assigning + for attr in matrix_attrs: + with self.subTest(matrix=attr): + setattr(self.hazard, attr, matrix.copy()) + check_canonical_matrix(getattr(self.hazard, attr)) + + def test_check_matrices(self): + """Test the check_matrices method""" + # Check shapes + with self.assertRaisesRegex( + ValueError, "Intensity and fraction matrices must have the same shape" + ): + Hazard( + "TC", + intensity=sparse.csr_matrix(np.ones((2, 2))), + fraction=sparse.csr_matrix(np.ones((2, 3))), + ) + + hazard = Hazard("TC") + hazard.fraction = sparse.csr_matrix(np.zeros((2, 2))) + hazard.check_matrices() # No error, fraction.nnz = 0 + hazard.fraction = sparse.csr_matrix(np.ones((2, 2))) + with self.assertRaisesRegex( + ValueError, "Intensity and fraction matrices must have the same shape" + ): + hazard.check_matrices() + hazard.intensity = sparse.csr_matrix(np.ones((2, 3))) + with self.assertRaisesRegex( + ValueError, "Intensity and fraction matrices must have the same shape" + ): + hazard.check_matrices() + + # Check that matrices are pruned + hazard.intensity[:] = 0 + hazard.fraction = sparse.csr_matrix(([0], [0], [0, 1, 1]), shape=(2, 3)) + hazard.check_matrices() for attr in ("intensity", "fraction"): - setattr(self.hazard, attr, matrix.copy()) - check_canonical_matrix(getattr(self.hazard, attr)) + with self.subTest(matrix=attr): + matrix = getattr(hazard, attr) + self.assertEqual(matrix.nnz, 0) + self.assertTrue(matrix.has_canonical_format) class TestRemoveDupl(unittest.TestCase): """Test remove_duplicates method.""" diff --git a/climada/util/test/test_checker.py b/climada/util/test/test_checker.py index c645b2a51f..75ba630312 100644 --- a/climada/util/test/test_checker.py +++ b/climada/util/test/test_checker.py @@ -87,6 +87,28 @@ def test_check_optionals_fail(self): u_check.check_optionals(dummy.__dict__, dummy.vars_opt, "DummyClass.", dummy.id.size) self.assertIn('Invalid DummyClass.list size: 25 != 3.', str(cm.exception)) + def test_prune_csr_matrix(self): + """Check that csr matrices are brought into canonical format""" + # Non-canonical: First three data points will be summed onto the first matrix + # entry, forth will be an explicit zero entry + data = [0, 1, 2, 0] + indices = [0, 0, 0, 1] + indptr = [0, 4, 4, 4] + matrix = sparse.csr_matrix((data, indices, indptr), shape=(3, 2)) + + # These checks just make sure that we understand how csr_matrix works + np.testing.assert_array_equal(matrix.data, data) + np.testing.assert_array_equal(matrix[0, [0, 1]].toarray(), [[3, 0]]) + self.assertEqual(matrix.nnz, 4) + self.assertFalse(matrix.has_canonical_format) + + # Now test our function + u_check.prune_csr_matrix(matrix) + self.assertTrue(matrix.has_canonical_format) + self.assertEqual(matrix[0, 0], 3) + np.testing.assert_array_equal(matrix.data, [3]) + self.assertEqual(matrix.nnz, 1) + # Execute Tests if __name__ == "__main__": TESTS = unittest.TestLoader().loadTestsFromTestCase(TestChecks) From fa0821e97bdc894e02f36dc86494413969d01deb Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:51:10 +0200 Subject: [PATCH 11/14] Update climada/util/test/test_checker.py Co-authored-by: Chahan M. Kropf --- climada/util/test/test_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/climada/util/test/test_checker.py b/climada/util/test/test_checker.py index 75ba630312..da0bb631cf 100644 --- a/climada/util/test/test_checker.py +++ b/climada/util/test/test_checker.py @@ -90,7 +90,7 @@ def test_check_optionals_fail(self): def test_prune_csr_matrix(self): """Check that csr matrices are brought into canonical format""" # Non-canonical: First three data points will be summed onto the first matrix - # entry, forth will be an explicit zero entry + # entry, fourth will be an explicit zero entry data = [0, 1, 2, 0] indices = [0, 0, 0, 1] indptr = [0, 4, 4, 4] From 6ad7ab4e9943c6b57421d71b5b4a592d8681d904 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Thu, 4 Jul 2024 17:06:05 +0200 Subject: [PATCH 12/14] Apply suggestions from code review --- climada/util/checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/climada/util/checker.py b/climada/util/checker.py index 5d9725489b..9b706bbbb4 100644 --- a/climada/util/checker.py +++ b/climada/util/checker.py @@ -182,8 +182,8 @@ def prune_csr_matrix(matrix: sparse.csr_matrix): Parameters ---------- matrix : csr_matrix - The matrix to check. It will be modified *inplace*. No apparent matrix values - will change. + The matrix to check. It will be modified *inplace*. Its ``.data`` attribute + might change, but apparent matrix values will stay the same. Raises ------ From 79ab66f3fe370711b65425122c5ee845d8711d2a Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:10:05 +0200 Subject: [PATCH 13/14] Revert changes to attribute structure of Hazard * Only call `Hazard.check_matrices` from `ImpactCalc.__init__`. * Update tests and docstrings accordingly. --- climada/engine/impact_calc.py | 2 ++ climada/hazard/base.py | 36 +++++----------------- climada/hazard/io.py | 19 ++---------- climada/hazard/test/test_base.py | 51 ++------------------------------ 4 files changed, 14 insertions(+), 94 deletions(-) diff --git a/climada/engine/impact_calc.py b/climada/engine/impact_calc.py index 71d19f0871..f050607c50 100644 --- a/climada/engine/impact_calc.py +++ b/climada/engine/impact_calc.py @@ -47,6 +47,8 @@ def __init__(self, The dimension of the imp_mat variable must be compatible with the exposures and hazard objects. + This will call :py:meth:`climada.hazard.base.Hazard.check_matrices`. + Parameters ---------- exposures : climada.entity.Exposures diff --git a/climada/hazard/base.py b/climada/hazard/base.py index 593023ada9..e15e34fd2c 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -61,10 +61,8 @@ class Hazard(HazardIO, HazardPlot): In particular, a "non-canonical" matrix may store "duplicates", i.e. multiple values that map to the same matrix position. This is supported, and the default behavior is to sum up these values. To avoid any inconsistencies, call :py:meth:`check_matrices` - once you inserted all data. This will explicitly sum all values at the same matrix - position and eliminate explicit zeros. This class will call - :py:func:`climada.util.checker.prune_csr_matrix` whenever a csr_matrix is assigned - to one of the aforementioned attributes. + before accessing the ``data`` attribute of either matrix. This will explicitly sum + all values at the same matrix position and eliminate explicit zeros. Attributes ---------- @@ -107,8 +105,8 @@ class Hazard(HazardIO, HazardPlot): 'centroids', 'event_id', 'frequency', - '_intensity', - '_fraction' + 'intensity', + 'fraction' } """Name of the variables needed to compute the impact. Types: scalar, str, list, 1dim np.array of size num_events, scipy.sparse matrix of shape @@ -203,37 +201,17 @@ def __init__(self, np.empty((0, 0))) # events x centroids self.fraction = fraction if fraction is not None else sparse.csr_matrix( self.intensity.shape) # events x centroids - self.check_matrices() self.pool = pool if self.pool: LOGGER.info('Using %s CPUs.', self.pool.ncpus) - @property - def intensity(self) -> sparse.csr_matrix: - """Hazard intensity matrix""" - return self._intensity - - @intensity.setter - def intensity(self, value: sparse.csr_matrix): - """Set intensity matrix to new value""" - self._intensity = value - u_check.prune_csr_matrix(self._intensity) - - @property - def fraction(self) -> sparse.csr_matrix: - """Hazard fraction matrix""" - return self._fraction - - @fraction.setter - def fraction(self, value: sparse.csr_matrix): - """Set fraction matrix to new value""" - self._fraction = value - u_check.prune_csr_matrix(self._fraction) - def check_matrices(self): """Ensure that matrices are consistently shaped and stored + It is good practice to call this method before accessing the ``data`` attribute + of either :py:attr:`intensity` or :py:attr:`fraction`. + See Also -------- :py:func:`climada.util.checker.prune_csr_matrix` diff --git a/climada/hazard/io.py b/climada/hazard/io.py index 80696dd177..5248d45790 100644 --- a/climada/hazard/io.py +++ b/climada/hazard/io.py @@ -1004,18 +1004,12 @@ def write_hdf5(self, file_name, todense=False): LOGGER.info('Writing %s', file_name) with h5py.File(file_name, 'w') as hf_data: str_dt = h5py.special_dtype(vlen=str) - for var_name in self.__dict__: + for (var_name, var_val) in self.__dict__.items(): if var_name == 'centroids': # Centroids have their own write_hdf5 method, # which is invoked at the end of this method (s.b.) continue - # Prune private attributes - if var_name in self.vars_oblig: - var_name = var_name.lstrip("_") - - var_val = getattr(self, var_name) # Also works with properties - - if isinstance(var_val, sparse.csr_matrix): + elif isinstance(var_val, sparse.csr_matrix): if todense: hf_data.create_dataset(var_name, data=var_val.toarray()) else: @@ -1071,18 +1065,11 @@ def from_hdf5(cls, file_name): haz = cls() hazard_kwargs = dict() with h5py.File(file_name, 'r') as hf_data: - for var_name in haz.__dict__: - # Prune private attributes - if var_name in haz.vars_oblig: - var_name = var_name.lstrip("_") - + for (var_name, var_val) in haz.__dict__.items(): if var_name not in hf_data.keys(): continue if var_name == 'centroids': continue - - var_val = getattr(haz, var_name) # Also works with properties - if isinstance(var_val, np.ndarray) and var_val.ndim == 1: hazard_kwargs[var_name] = np.array(hf_data.get(var_name)) elif isinstance(var_val, sparse.csr_matrix): diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py index 5ab5a83961..37500c0644 100644 --- a/climada/hazard/test/test_base.py +++ b/climada/hazard/test/test_base.py @@ -125,7 +125,7 @@ def test_check_wrongInten_fail(self): """Wrong hazard definition""" self.hazard.intensity = sparse.csr_matrix([[1, 2], [1, 2]]) with self.assertRaisesRegex( - ValueError, "Invalid Hazard._intensity row size: 3 != 2." + ValueError, "Invalid Hazard.intensity row size: 3 != 2." ): self.hazard.check() @@ -133,7 +133,7 @@ def test_check_wrongFrac_fail(self): """Wrong hazard definition""" self.hazard.fraction = sparse.csr_matrix([[1], [1], [1]]) with self.assertRaisesRegex( - ValueError, "Invalid Hazard._fraction column size: 2 != 1." + ValueError, "Invalid Hazard.fraction column size: 2 != 1." ): self.hazard.check() @@ -212,55 +212,8 @@ def test_get_date_strings_pass(self): self.assertEqual(haz.get_event_date()[560], u_dt.date_to_str(haz.date[560])) - def test_matrix_update(self): - """Check that the csr_matrix can be updated with element access""" - self.hazard.intensity[[0, 2], 1] = 10 - np.testing.assert_array_equal( - self.hazard.intensity.toarray(), [[1, 10], [1, 2], [1, 10]] - ) - self.hazard.fraction[:, 1] = 0 - np.testing.assert_array_equal( - self.hazard.fraction.toarray(), [[1, 0], [1, 0], [1, 0]] - ) - - def test_matrix_consistency(self): - """Check that the csr_matrix is brought into canonical format""" - # Non-canonical: First three data points will be summed onto the first matrix - # entry, forth will be an explicit zero entry - data = [0, 1, 2, 0] - indices = [0, 0, 0, 1] - indptr = [0, 4, 4, 4] - matrix = sparse.csr_matrix((data, indices, indptr), shape=(3, 2)) - - def check_canonical_matrix(mat): - self.assertTrue(mat.has_canonical_format) - self.assertEqual(mat.nnz, 1) - - # Check canonical format when initializing - hazard_new = Hazard("TC", intensity=matrix.copy(), fraction=matrix.copy()) - matrix_attrs = ("intensity", "fraction") - for attr in matrix_attrs: - with self.subTest(matrix=attr): - check_canonical_matrix(getattr(hazard_new, attr)) - - # Check conversion to canonical format when assigning - for attr in matrix_attrs: - with self.subTest(matrix=attr): - setattr(self.hazard, attr, matrix.copy()) - check_canonical_matrix(getattr(self.hazard, attr)) - def test_check_matrices(self): """Test the check_matrices method""" - # Check shapes - with self.assertRaisesRegex( - ValueError, "Intensity and fraction matrices must have the same shape" - ): - Hazard( - "TC", - intensity=sparse.csr_matrix(np.ones((2, 2))), - fraction=sparse.csr_matrix(np.ones((2, 3))), - ) - hazard = Hazard("TC") hazard.fraction = sparse.csr_matrix(np.zeros((2, 2))) hazard.check_matrices() # No error, fraction.nnz = 0 From 9e0a58c251144afb774f77c17ef0f9aa1b9d0e17 Mon Sep 17 00:00:00 2001 From: Lukas Riedel <34276446+peanutfun@users.noreply.github.com> Date: Wed, 17 Jul 2024 11:11:17 +0200 Subject: [PATCH 14/14] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bcf18f7b6..4be883bc6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,7 @@ CLIMADA tutorials. [#872](https://github.com/CLIMADA-project/climada_python/pull - Centroids complete overhaul. Most function should be backward compatible. Internal data is stored in a geodataframe attribute. Raster are now stored as points, and the meta attribute is removed. Several methds were deprecated or removed. [#787](https://github.com/CLIMADA-project/climada_python/pull/787) - Improved error messages produced by `ImpactCalc.impact()` in case impact function in the exposures is not found in impf_set [#863](https://github.com/CLIMADA-project/climada_python/pull/863) - Changed module structure: `climada.hazard.Hazard` has been split into the modules `base`, `io` and `plot` [#871](https://github.com/CLIMADA-project/climada_python/pull/871) -- Ensure `csr_matrix` stored in `climada.hazard.Hazard` have consistent data format and store no explicit zeros [#893](https://github.com/CLIMADA-project/climada_python/pull/893) +- Ensure `csr_matrix` stored in `climada.hazard.Hazard` have consistent data format and store no explicit zeros when initializing `ImpactCalc` [#893](https://github.com/CLIMADA-project/climada_python/pull/893) ### Fixed @@ -31,6 +31,7 @@ CLIMADA tutorials. [#872](https://github.com/CLIMADA-project/climada_python/pull ### Added +- Method `Hazard.check_matrices` for bringing the stored CSR matrices into "canonical format" [#893](https://github.com/CLIMADA-project/climada_python/pull/893) - Generic s-shaped impact function via `ImpactFunc.from_poly_s_shape` [#878](https://github.com/CLIMADA-project/climada_python/pull/878) - climada.hazard.centroids.centr.Centroids.get_area_pixel - climada.hazard.centroids.centr.Centroids.get_dist_coast