diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 8bcfb3f33..5ae894795 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -90,6 +90,7 @@ def forecast( conditional=False, probmatching_method="cdf", mask_method="incremental", + resample_distribution=True, smooth_radar_mask_range=0, callback=None, return_output=True, @@ -205,25 +206,32 @@ def forecast( If set to True, compute the statistics of the precipitation field conditionally by excluding pixels where the values are below the threshold precip_thr. + probmatching_method: {'cdf','mean',None}, optional + Method for matching the statistics of the forecast field with those of + the most recently observed one. 'cdf'=map the forecast CDF to the observed + one, 'mean'=adjust only the conditional mean value of the forecast field + in precipitation areas, None=no matching applied. Using 'mean' requires + that mask_method is not None. mask_method: {'obs','incremental',None}, optional The method to use for masking no precipitation areas in the forecast field. The masked pixels are set to the minimum value of the observations. 'obs' = apply precip_thr to the most recently observed precipitation intensity field, 'incremental' = iteratively buffer the mask with a certain rate (currently it is 1 km/min), None=no masking. + resample_distribution: bool, optional + Method to resample the distribution from the extrapolation and NWP cascade as input + for the probability matching. Not resampling these distributions may lead to losing + some extremes when the weight of both the extrapolation and NWP cascade is similar. + Defaults to True. smooth_radar_mask_range: int, Default is 0. Method to smooth the transition between the radar-NWP-noise blend and the NWP-noise blend near the edge of the radar domain (radar mask), where the radar data is either - not present anymore or is not reliable. If set to 0 (grid cells), this generates a normal forecast without smoothing. To create a smooth mask, this range - should be a positive value, representing a buffer band of a number of pixels - by which the mask is cropped and smoothed. The smooth radar mask removes - the hard edges between NWP and radar in the final blended product. Typically, a value between 50 and 100 km can be used. 80 km generally gives good results. - probmatching_method: {'cdf','mean',None}, optional - Method for matching the statistics of the forecast field with those of - the most recently observed one. 'cdf'=map the forecast CDF to the observed - one, 'mean'=adjust only the conditional mean value of the forecast field - in precipitation areas, None=no matching applied. Using 'mean' requires - that mask_method is not None. + not present anymore or is not reliable. If set to 0 (grid cells), this generates a + normal forecast without smoothing. To create a smooth mask, this range should be a + positive value, representing a buffer band of a number of pixels by which the mask + is cropped and smoothed. The smooth radar mask removes the hard edges between NWP + and radar in the final blended product. Typically, a value between 50 and 100 km + can be used. 80 km generally gives good results. callback: function, optional Optional function that is called after computation of each time step of the nowcast. The function takes one argument: a three-dimensional array @@ -1404,7 +1412,6 @@ def worker(j): # latest extrapolated radar rainfall field blended with the # nwp model(s) rainfall forecast fields as 'benchmark'. - # TODO: Check probability matching method # 8.7.1 first blend the extrapolated rainfall field (the field # that is only used for post-processing steps) with the NWP # rainfall forecast for this time step using the weights @@ -1538,19 +1545,39 @@ def worker(j): # Set to min value outside of mask R_f_new[~MASK_prec_] = R_cmin + # If probmatching_method is not None, resample the distribution from + # both the extrapolation cascade and the model (NWP) cascade and use + # that for the probability matching + if probmatching_method is not None and resample_distribution: + # deal with missing values + arr1 = R_pm_ep[t_index] + arr2 = precip_models_pm_temp[j] + arr2 = np.where(np.isnan(arr2), np.nanmin(arr2), arr2) + arr1 = np.where(np.isnan(arr1), arr2, arr1) + # resample weights based on cascade level 2 + R_pm_resampled = probmatching.resample_distributions( + first_array=arr1, + second_array=arr2, + probability_first_array=weights_pm_normalized[0], + ) + else: + R_pm_resampled = R_pm_blended.copy() + if probmatching_method == "cdf": # Adjust the CDF of the forecast to match the most recent # benchmark rainfall field (R_pm_blended). If the forecast if np.any(np.isfinite(R_f_new)): R_f_new = probmatching.nonparam_match_empirical_cdf( - R_f_new, R_pm_blended + R_f_new, R_pm_resampled ) + R_pm_resampled = None elif probmatching_method == "mean": # Use R_pm_blended as benchmark field and - mu_0 = np.mean(R_pm_blended[R_pm_blended >= precip_thr]) + mu_0 = np.mean(R_pm_resampled[R_pm_resampled >= precip_thr]) MASK = R_f_new >= precip_thr mu_fct = np.mean(R_f_new[MASK]) R_f_new[MASK] = R_f_new[MASK] - mu_fct + mu_0 + R_pm_resampled = None R_f_out.append(R_f_new) diff --git a/pysteps/postprocessing/probmatching.py b/pysteps/postprocessing/probmatching.py index e84935714..7c96149e9 100644 --- a/pysteps/postprocessing/probmatching.py +++ b/pysteps/postprocessing/probmatching.py @@ -13,6 +13,7 @@ pmm_init pmm_compute shift_scale + resample_distributions """ import numpy as np @@ -259,6 +260,56 @@ def _get_error(scale): return shift, scale, R.reshape(shape) +def resample_distributions(first_array, second_array, probability_first_array): + """ + Merges two distributions (e.g., from the extrapolation nowcast and NWP in the blending module) + to effectively combine two distributions for probability matching without losing extremes. + + Parameters + ---------- + first_array: array_like + One of the two arrays from which the distribution should be sampled (e.g., the extrapolation + cascade). It must be of the same shape as `second_array`. Input must not contain NaNs. + second_array: array_like + One of the two arrays from which the distribution should be sampled (e.g., the NWP (model) + cascade). It must be of the same shape as `first_array`.. Input must not contain NaNs. + probability_first_array: float + The weight that `first_array` should get (a value between 0 and 1). This determines the + likelihood of selecting elements from `first_array` over `second_array`. + + Returns + ------- + csort: array_like + The combined output distribution. This is an array of the same shape as the input arrays, + where each element is chosen from either `first_array` or `second_array` based on the specified + probability, and then sorted in descending order. + + Raises + ------ + ValueError + If `first_array` and `second_array` do not have the same shape or if inputs contain NaNs. + """ + + # Valide inputs + if first_array.shape != second_array.shape: + raise ValueError("first_array and second_array must have the same shape") + if np.isnan(first_array).any() or np.isnan(second_array).any(): + raise ValueError("Input arrays must not contain NaNs") + probability_first_array = np.clip(probability_first_array, 0.0, 1.0) + + # Flatten and sort the arrays + asort = np.sort(first_array, axis=None)[::-1] + bsort = np.sort(second_array, axis=None)[::-1] + n = asort.shape[0] + + # Resample the distributions + idxsamples = np.random.binomial(1, probability_first_array, n).astype(bool) + csort = np.where(idxsamples, asort, bsort) + csort = np.sort(csort)[::-1] + + return csort + + def _invfunc(y, fx, fy): if len(y) == 0: return np.array([]) diff --git a/pysteps/tests/test_blending_steps.py b/pysteps/tests/test_blending_steps.py index ea20eaf25..1b813e08b 100644 --- a/pysteps/tests/test_blending_steps.py +++ b/pysteps/tests/test_blending_steps.py @@ -8,37 +8,40 @@ steps_arg_values = [ - (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0), - (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0), - (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0), - (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0), - (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0), - (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0), - (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0), - (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0), - (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0), - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0), + (1, 3, 4, 8, None, None, False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, "obs", None, False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, "incremental", None, False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, None, "mean", False, "spn", True, 4, False, False, 0, True), + (1, 3, 4, 8, None, "cdf", False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 8, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", True, 4, False, False, 0, False), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, False), + (1, 3, 4, 6, "incremental", "cdf", False, "bps", False, 4, False, False, 0, True), + (1, 3, 4, 9, "incremental", "cdf", False, "spn", True, 4, False, False, 0, False), + (2, 3, 10, 8, "incremental", "cdf", False, "spn", True, 10, False, False, 0, False), + (5, 3, 5, 8, "incremental", "cdf", False, "spn", True, 5, False, False, 0, False), + (1, 10, 1, 8, "incremental", "cdf", False, "spn", True, 1, False, False, 0, False), + (2, 3, 2, 8, "incremental", "cdf", True, "spn", True, 2, False, False, 0, False), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 0, False), # Test the case where the radar image contains no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0), + (1, 3, 6, 8, None, None, False, "spn", True, 6, True, False, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 0, True), # Test the case where the NWP fields contain no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, True, 0, True), # Test the case where both the radar image and the NWP fields contain no rain. - (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0), - (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0), + (1, 3, 6, 8, None, None, False, "spn", True, 6, True, True, 0, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, True, 0, False), + (5, 3, 5, 6, "obs", "mean", True, "spn", True, 5, True, True, 0, False), # Test for smooth radar mask - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80), - (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80), - (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80), - (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80), - (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, False, 80, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, False, False, 80, False), + (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, False, False, 80, False), + (1, 3, 6, 8, None, None, False, "spn", True, 6, False, True, 80, False), + (5, 3, 5, 6, "incremental", "cdf", False, "spn", False, 5, True, False, 80, True), + (5, 3, 5, 6, "obs", "mean", False, "spn", False, 5, True, True, 80, False), ] steps_arg_names = ( @@ -55,6 +58,7 @@ "zero_radar", "zero_nwp", "smooth_radar_mask_range", + "resample_distribution", ) @@ -73,6 +77,7 @@ def test_steps_blending( zero_radar, zero_nwp, smooth_radar_mask_range, + resample_distribution, ): pytest.importorskip("cv2") diff --git a/pysteps/tests/test_postprocessing_probmatching.py b/pysteps/tests/test_postprocessing_probmatching.py new file mode 100644 index 000000000..fa27cdf39 --- /dev/null +++ b/pysteps/tests/test_postprocessing_probmatching.py @@ -0,0 +1,57 @@ +import numpy as np +import pytest +from pysteps.postprocessing.probmatching import resample_distributions + + +class TestResampleDistributions: + + @pytest.fixture(autouse=True) + def setup(self): + # Set the seed for reproducibility + np.random.seed(42) + + def test_valid_inputs(self): + first_array = np.array([1, 3, 5, 7, 9]) + second_array = np.array([2, 4, 6, 8, 10]) + probability_first_array = 0.6 + result = resample_distributions( + first_array, second_array, probability_first_array + ) + expected_result = np.array([9, 8, 6, 3, 1]) # Expected result based on the seed + assert result.shape == first_array.shape + assert np.array_equal(result, expected_result) + + def test_probability_zero(self): + first_array = np.array([1, 3, 5, 7, 9]) + second_array = np.array([2, 4, 6, 8, 10]) + probability_first_array = 0.0 + result = resample_distributions( + first_array, second_array, probability_first_array + ) + assert np.array_equal(result, np.sort(second_array)[::-1]) + + def test_probability_one(self): + first_array = np.array([1, 3, 5, 7, 9]) + second_array = np.array([2, 4, 6, 8, 10]) + probability_first_array = 1.0 + result = resample_distributions( + first_array, second_array, probability_first_array + ) + assert np.array_equal(result, np.sort(first_array)[::-1]) + + def test_nan_in_inputs(self): + array_with_nan = np.array([1, 3, np.nan, 7, 9]) + array_without_nan = np.array([2, 4, 6, 8, 10]) + probability_first_array = 0.6 + with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): + resample_distributions( + array_with_nan, array_without_nan, probability_first_array + ) + with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): + resample_distributions( + array_without_nan, array_with_nan, probability_first_array + ) + with pytest.raises(ValueError, match="Input arrays must not contain NaNs"): + resample_distributions( + array_with_nan, array_with_nan, probability_first_array + )