From ac469a1448dda8d2590c31401dc4a590b00b79aa Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Wed, 27 Mar 2024 10:20:09 -0700 Subject: [PATCH 01/24] renamed Lionel's functions to uar1 CI tests pass --- pyleoclim/tests/test_utils_tsmodel.py | 6 +++--- pyleoclim/utils/tsmodel.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 5342ac9a..54107fee 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -34,7 +34,7 @@ def test_time_increments_2(): assert all(np.cumsum(delta_t)>0) @pytest.mark.parametrize('evenly_spaced', [True, False]) -def test_ar1fit_ml(evenly_spaced): +def test_uar1_fit(evenly_spaced): ''' Tests whether this method works well on an AR(1) process with known parameters @@ -45,7 +45,7 @@ def test_ar1fit_ml(evenly_spaced): sigma_2 = 1 # create p=50 time series - y_sim, t_sim = tsmodel.ar1_sim_geneva(n=200, tau_0=tau, sigma_2_0=sigma_2, + y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=evenly_spaced, p = 10) # Create an empty matrix to store estimated parameters @@ -53,7 +53,7 @@ def test_ar1fit_ml(evenly_spaced): # estimate parameters for each time series for j in range(y_sim.shape[1]): - theta_hat_matrix[j,:] = tsmodel.ar1_fit_ml(y_sim[:, j], t_sim[:, j]) + theta_hat_matrix[j,:] = tsmodel.uar1_fit(y_sim[:, j], t_sim[:, j]) # compute mean of estimated param for each simulate ts theta_hat_bar = np.mean(theta_hat_matrix, axis=0) diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index e3f28a3f..a09f2895 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -19,9 +19,9 @@ __all__ = [ 'ar1_fit', - 'ar1_fit_ml', + 'uar1_fit', + 'uar1_sim', 'ar1_sim', - 'ar1_sim_geneva', 'colored_noise', 'colored_noise_2regimes', 'gen_ar1_evenly', @@ -650,7 +650,7 @@ def n_ll_unevenly_spaced_ar1(theta, y, t): nll = np.log(2*np.pi) + np.log(sigma_2) + term_5 + term_4 return(nll) -def ar1_fit_ml(y, t): +def uar1_fit(y, t): ''' Maximum Likelihood Estimation of parameters tau_0 and sigma_2_0 @@ -678,7 +678,7 @@ def ar1_fit_ml(y, t): return theta_hat -def ar1_sim_geneva(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, +def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, delta_t_dist = "exponential", param = 1): """ From 4b1a19b251870088a984ec87781b4f2404bb24a1 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Wed, 27 Mar 2024 10:54:59 -0700 Subject: [PATCH 02/24] stub out uar1 surrogates TODO list in comments --- pyleoclim/core/series.py | 47 ++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index aef54763..076ff176 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3675,40 +3675,55 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings -------- pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator - pyleoclim.utils.tsutils.phaseran : phase randomization + pyleoclim.utils.tsutils.phaseran2 : phase randomization ''' settings = {} if settings is None else settings.copy() - args = {} - time = self.time - args['ar1sim'] = {'t': time} - args['phaseran'] = {} - args[method].update(settings) + # args = {} + # args['ar1sim'] = {'t': self.time} + # args['phaseran'] = {} + + # args[method].update(settings) if seed is not None: np.random.seed(seed) if method == 'ar1sim': - surr_res = tsmodel.ar1_sim(self.value, number, **args[method]) + y_surr = tsmodel.ar1_sim(self.value, number, self.time) + times = np.tile(self.time, number) # TURN THIS INTO A MATRIX elif method == 'phaseran': if self.is_evenly_spaced(): - surr_res = tsutils.phaseran2(self.value, number, **args[method]) + y_surr = tsutils.phaseran2(self.value, number) + times = np.tile(self.time, number) # TURN THIS INTO A MATRIX else: raise ValueError("Phase-randomization presently requires evenly-spaced series.") - - # elif method == 'uar1': - # # TODO : implement Lionel's ML method + elif method == 'uar1': + theta_hat = tsmodel.uar1_fit(self.value, self.time) + # 3 choices: 1) emulate self.time (use resolution()) TODO + # 2) generate unevenly at random + # 3) generate evenly spaced + # grab the parameters from the settings dictionary (do exception handling on dictionary keys) + + # ys, ts = tsmodel.uar1_sim(len(self.value), tau_0=theta_hat[0], + # sigma_2_0=theta_hat[1], + # seed=seed, p=number, evenly_spaced = False, + # delta_t_dist = "exponential", param = 1) + y_surr, times = tsmodel.uar1_sim(len(self.value), tau_0=theta_hat[0], + sigma_2_0=theta_hat[1], + seed=seed, p=number, **settings) + + # TODO : implement Lionel's ML method # elif method == 'power-law': # # TODO : implement Stochastic # elif method == 'fBm': # # TODO : implement Stochastic - if len(np.shape(surr_res)) == 1: - surr_res = surr_res[:, np.newaxis] + if len(np.shape(y_surr)) == 1: + y_surr = y_surr[:, np.newaxis] s_list = [] - for i, s in enumerate(surr_res.T): - s_tmp = Series(time=time, value=s, # will need reformation after uar1 pull + for i, s in enumerate(y_surr.T): + s_tmp = Series(time=times[:,i], value=s, # will need reformation after uar1 pull time_name=self.time_name, time_unit=self.time_unit, value_name=self.value_name, @@ -3720,7 +3735,7 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings surr = SurrogateSeries(series_list=s_list, label = self.label, surrogate_method=method, - surrogate_args=args[method]) + surrogate_args=settings) return surr From 967b3c963416e5286aaa13365288d07a7288e316 Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Tue, 2 Apr 2024 11:56:18 +0200 Subject: [PATCH 03/24] add option for the distribution of the delta_t in fct time_increments --- pyleoclim/utils/tsmodel.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index a09f2895..230aaa0f 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -19,9 +19,9 @@ __all__ = [ 'ar1_fit', + 'ar1_sim', 'uar1_fit', 'uar1_sim', - 'ar1_sim', 'colored_noise', 'colored_noise_2regimes', 'gen_ar1_evenly', @@ -759,6 +759,8 @@ def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, y_sim[:, j] = y return y_sim, t_sim +def inverse_cumsum(arr): + return np.diff(np.concatenate(([0], arr))) def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): ''' @@ -774,7 +776,7 @@ def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): delta_t_dist: str the probability distribution of delta_t - possible choices include 'exponential', 'poisson', 'pareto', or 'random_choice' + possible choices include 'exponential', 'poisson', 'pareto', 'random_choice' or `empirical` if 'exponential', `param` is expected to be a single scale parameter (traditionally denoted \lambda) if 'poisson', `param` is expected to be a single parameter (rate) @@ -785,6 +787,7 @@ def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): prob_random_choice: probabilities associated with each entry value_random_choice (e.g. [.95,.05]) (These two arrays must be of the same size) + if 'empirical', expect an array containing the time index of the original time series Returns: ------- @@ -793,9 +796,9 @@ def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): ''' # check for a valid distribution - valid_distributions = ["exponential", "poisson", "pareto", "random_choice"] + valid_distributions = ["exponential", "poisson", "pareto", "random_choice", "empirical"] if delta_t_dist not in valid_distributions: - raise ValueError("delta_t_dist must be one of: 'exponential', 'poisson', 'pareto', or 'random_choice'.") + raise ValueError("delta_t_dist must be one of: 'exponential', 'poisson', 'pareto', 'random_choice' or 'empirical'.") np.random.seed(seed) param = np.array(param) # coerce array type @@ -813,9 +816,17 @@ def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): if len(param)<2 or len(param[0]) != len(param[1]): raise ValueError("value_random_choice and prob_random_choice must have the same size.") delta_t = np.random.choice(param[0], size=n, p=param[1]) - + elif delta_t_dist == "empirical": + if len(param[0])!= n: + raise ValueError("Number of time index provided not equal to the n value provided") + delta_t = inverse_cumsum(param[0]) return delta_t + + +time_index = np.array([1, 2,3,4,5,6,7]) +time_increments(n=10, param= , delta_t_dist = "empirical") + # def fBMsim(N=128, H=0.25): # '''Simple method to generate fractional Brownian Motion From 96de6ff8f1c1a1cccecff06a62f67d481464dc9e Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Wed, 3 Apr 2024 12:46:59 +0200 Subject: [PATCH 04/24] added delta_t_dist 'empirical' in fct time_increments, added support for uar1 in surrogates, try to develop test --- pyleoclim/core/series.py | 40 +++++++++++++++++++++++-- pyleoclim/tests/test_utils_tsmodel.py | 43 ++++++++++++++++++++++++--- pyleoclim/utils/tsmodel.py | 14 ++++++--- 3 files changed, 86 insertions(+), 11 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 076ff176..4c117477 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3698,7 +3698,40 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings else: raise ValueError("Phase-randomization presently requires evenly-spaced series.") elif method == 'uar1': + # Check if 'evenly_spaced' in the settings dictionary + required_keys = ['evenly_spaced'] # should alway be indicated + missing_keys = [key for key in required_keys if key not in settings] + if missing_keys: + raise ValueError("The following required key is missing from the settings dictionary: {missing_keys}") + # check now that if evenly spaced is set to False, the key delta_t_dist is provided + if settings["evenly_spaced"] == False: + required_keys = ['delta_t_dist'] + missing_keys = [key for key in required_keys if key not in settings] + if missing_keys: + raise ValueError(f"The following required keys are missing from the settings dictionary: {missing_keys}") + # check that param if evenly spaced is false and key delta t dist is not empirical, then param are provided + if settings["delta_t_dist"] != "empirical": + required_keys = ['param'] + missing_keys = [key for key in required_keys if key not in settings] + if missing_keys: + raise ValueError(f"The following required keys are missing from the settings dictionary: {missing_keys}") + + + # estimate theta with MLE theta_hat = tsmodel.uar1_fit(self.value, self.time) + + # the method `empirical` needs the time value provided as parameters + if settings['delta_t_dist'] == "empirical": + y_surr, times = tsmodel.uar1_sim(n = len(self.value), tau_0=theta_hat[0], + sigma_2_0=theta_hat[1], + seed=seed, p=number, delta_t_dist = "empirical", param = [[self.time]]) + # for other delta_t_dist, then the parameters are provided + else: + y_surr, times = tsmodel.uar1_sim(n = len(self.value), tau_0=theta_hat[0], + sigma_2_0=theta_hat[1], + seed=seed, p=number, **settings) + + # 3 choices: 1) emulate self.time (use resolution()) TODO # 2) generate unevenly at random # 3) generate evenly spaced @@ -3708,9 +3741,10 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings # sigma_2_0=theta_hat[1], # seed=seed, p=number, evenly_spaced = False, # delta_t_dist = "exponential", param = 1) - y_surr, times = tsmodel.uar1_sim(len(self.value), tau_0=theta_hat[0], - sigma_2_0=theta_hat[1], - seed=seed, p=number, **settings) + + #y_surr, times = tsmodel.uar1_sim(len(self.value), tau_0=theta_hat[0], + # sigma_2_0=theta_hat[1], + # seed=seed, p=number, **settings) # TODO : implement Lionel's ML method # elif method == 'power-law': diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 54107fee..7f2cdfab 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -9,13 +9,20 @@ import pytest import numpy as np from pyleoclim.utils import tsmodel +import pyleoclim as pyleo + + + + + + @pytest.mark.parametrize('model', ["exponential", "poisson"]) def test_time_increments_0(model): ''' Generate time increments with 1-parameter models ''' - delta_t = tsmodel.time_increments(n=20, param=1, delta_t_dist = model) + delta_t = tsmodel.time_increments(n=20, param=[1], delta_t_dist = model) assert all(np.cumsum(delta_t)>0) def test_time_increments_1(): @@ -32,6 +39,18 @@ def test_time_increments_2(): delta_t = tsmodel.time_increments(n=20, delta_t_dist = "random_choice", param=[[1,2],[.95,.05]] ) assert all(np.cumsum(delta_t)>0) + + +def test_time_increments_3(): + ''' + Generate time increments with provided time vector + ''' + delta_t = tsmodel.time_increments(n=20, delta_t_dist = "empirical", param=[range(1,21)] ) + assert all(delta_t==1) + + + + @pytest.mark.parametrize('evenly_spaced', [True, False]) def test_uar1_fit(evenly_spaced): @@ -45,8 +64,14 @@ def test_uar1_fit(evenly_spaced): sigma_2 = 1 # create p=50 time series - y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, - evenly_spaced=evenly_spaced, p = 10) + if evenly_spaced==True: + y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, + evenly_spaced=evenly_spaced, p = 10) + else: + y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, + evenly_spaced=evenly_spaced, p = 10, delta_t_dist = "exponential", + param=[1]) + # Create an empty matrix to store estimated parameters theta_hat_matrix = np.empty((y_sim.shape[1], 2)) @@ -64,6 +89,16 @@ def test_uar1_fit(evenly_spaced): assert np.abs(theta_hat_bar[1]-sigma_2) < tol +def test_surrogates_1(): + tau = 2 + sigma_2 = 1 + #y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + #pyleo.utils.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + ts = pyleo.Series(time = t_sim, value= y_sim) + # surr = ts.surrogates(method = 'uar1') + surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) + # nothing is working ! need to talk with julien! + return(len(surr)) - \ No newline at end of file diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index 230aaa0f..11085f07 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -27,7 +27,8 @@ 'gen_ar1_evenly', 'gen_ts', 'tau_estimation', - 'parametric_surrogates' + 'parametric_surrogates', + 'time_increments' ] @@ -737,7 +738,7 @@ def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, if evenly_spaced: delta_t = [1]*n # for now we assume delta_t = 1 if evenly sampled, potentially to improve with a parameter that specify the time spacing else: - delta_t = time_increments(n, param, delta_t_dist = "exponential", seed = seed+j) + delta_t = time_increments(n, param, delta_t_dist = delta_t_dist, seed = seed+j) # obtain the 0-based time index from the delta_t distribution t = np.cumsum(delta_t)-1 @@ -804,8 +805,14 @@ def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): param = np.array(param) # coerce array type if delta_t_dist == "exponential": + # make sure that param is of len 1 + if len(param) != 1: + raise ValueError('The Exponential law takes a single scale parameter.') delta_t = np.random.exponential(scale = param, size=n) + elif delta_t_dist == "poisson": + if len(param) != 1: + raise ValueError('The Poisson law takes a single parameter.') delta_t = np.random.poisson(lam = param, size = n) + 1 elif delta_t_dist == "pareto": if len(param) != 2: @@ -824,8 +831,7 @@ def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): -time_index = np.array([1, 2,3,4,5,6,7]) -time_increments(n=10, param= , delta_t_dist = "empirical") + # def fBMsim(N=128, H=0.25): # '''Simple method to generate fractional Brownian Motion From d3feebe40c40abccefe44ba7620ccaedc034b5ef Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Wed, 3 Apr 2024 14:13:41 +0200 Subject: [PATCH 05/24] minor --- pyleoclim/tests/test_utils_tsmodel.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 7f2cdfab..73c9291c 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -12,11 +12,6 @@ import pyleoclim as pyleo - - - - - @pytest.mark.parametrize('model', ["exponential", "poisson"]) def test_time_increments_0(model): ''' @@ -40,13 +35,7 @@ def test_time_increments_2(): param=[[1,2],[.95,.05]] ) assert all(np.cumsum(delta_t)>0) - -def test_time_increments_3(): - ''' - Generate time increments with provided time vector - ''' - delta_t = tsmodel.time_increments(n=20, delta_t_dist = "empirical", param=[range(1,21)] ) - assert all(delta_t==1) + @@ -89,6 +78,15 @@ def test_uar1_fit(evenly_spaced): assert np.abs(theta_hat_bar[1]-sigma_2) < tol + +def test_time_increments_3(): + ''' + Generate time increments with provided time vector + ''' + delta_t = tsmodel.time_increments(n=20, delta_t_dist = "empirical", param=[range(1,21)] ) + assert all(delta_t==1) + + def test_surrogates_1(): tau = 2 sigma_2 = 1 @@ -98,7 +96,6 @@ def test_surrogates_1(): ts = pyleo.Series(time = t_sim, value= y_sim) # surr = ts.surrogates(method = 'uar1') surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) - # nothing is working ! need to talk with julien! return(len(surr)) From f047dcddefa85fb1db07b6096961a1e6c6ed928c Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Wed, 3 Apr 2024 14:27:37 +0200 Subject: [PATCH 06/24] minor --- pyleoclim/core/series.py | 2 ++ pyleoclim/tests/test_utils_tsmodel.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 4c117477..e9f603d9 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3691,12 +3691,14 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings if method == 'ar1sim': y_surr = tsmodel.ar1_sim(self.value, number, self.time) times = np.tile(self.time, number) # TURN THIS INTO A MATRIX + elif method == 'phaseran': if self.is_evenly_spaced(): y_surr = tsutils.phaseran2(self.value, number) times = np.tile(self.time, number) # TURN THIS INTO A MATRIX else: raise ValueError("Phase-randomization presently requires evenly-spaced series.") + elif method == 'uar1': # Check if 'evenly_spaced' in the settings dictionary required_keys = ['evenly_spaced'] # should alway be indicated diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 73c9291c..6371f0e7 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -90,8 +90,12 @@ def test_time_increments_3(): def test_surrogates_1(): tau = 2 sigma_2 = 1 - #y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + + + #pyleo.utils.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) ts = pyleo.Series(time = t_sim, value= y_sim) # surr = ts.surrogates(method = 'uar1') From c568a45fb3a5be0e7361d350b60a6e9d98a1effb Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Wed, 3 Apr 2024 15:01:48 +0200 Subject: [PATCH 07/24] trying to understand how to load current pkg with new fcts --- pyleoclim/tests/test_utils_tsmodel.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 6371f0e7..02731ef9 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -10,6 +10,7 @@ import numpy as np from pyleoclim.utils import tsmodel import pyleoclim as pyleo +from pyleoclim.utils.tsmodel import ar1_sim @pytest.mark.parametrize('model', ["exponential", "poisson"]) @@ -98,8 +99,10 @@ def test_surrogates_1(): #pyleo.utils.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) ts = pyleo.Series(time = t_sim, value= y_sim) - # surr = ts.surrogates(method = 'uar1') - surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) - return(len(surr)) + # surr = ts.surrogates(method = 'uar1') + surr = ts.surrogates(method = 'ar1sim', settings={}) + #surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) + + assert(1==1) From 177328f5fa90059e341ab0c61e46a075aa7e4122 Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Wed, 3 Apr 2024 15:10:02 +0200 Subject: [PATCH 08/24] more weird stuff --- pyleoclim/tests/test_utils_tsmodel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 02731ef9..f56a11e6 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -100,9 +100,8 @@ def test_surrogates_1(): #pyleo.utils.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) ts = pyleo.Series(time = t_sim, value= y_sim) # surr = ts.surrogates(method = 'uar1') - surr = ts.surrogates(method = 'ar1sim', settings={}) + surr = ts.surrogates(method = 'ar1sim', number = 10) #surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) - assert(1==1) From a355ed22ec18536019e2438c371daa3ad61671a3 Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Wed, 3 Apr 2024 17:11:07 +0200 Subject: [PATCH 09/24] better exception handling --- pyleoclim/core/series.py | 22 +++++++++------------- pyleoclim/tests/test_utils_tsmodel.py | 8 +++----- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index e9f603d9..a92609f6 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3689,34 +3689,30 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings np.random.seed(seed) if method == 'ar1sim': - y_surr = tsmodel.ar1_sim(self.value, number, self.time) - times = np.tile(self.time, number) # TURN THIS INTO A MATRIX + y_surr = tsmodel.ar1_sim(self.value, number, self.time) + times = np.tile(self.time, (number, 1)).T elif method == 'phaseran': if self.is_evenly_spaced(): y_surr = tsutils.phaseran2(self.value, number) - times = np.tile(self.time, number) # TURN THIS INTO A MATRIX + times = np.tile(self.time, (number, 1)).T else: raise ValueError("Phase-randomization presently requires evenly-spaced series.") elif method == 'uar1': # Check if 'evenly_spaced' in the settings dictionary - required_keys = ['evenly_spaced'] # should alway be indicated - missing_keys = [key for key in required_keys if key not in settings] - if missing_keys: - raise ValueError("The following required key is missing from the settings dictionary: {missing_keys}") + if "evenly_spaced" not in settings: + raise ValueError("The following required key is missing from the settings dictionary: evenly_spaced") # check now that if evenly spaced is set to False, the key delta_t_dist is provided if settings["evenly_spaced"] == False: - required_keys = ['delta_t_dist'] - missing_keys = [key for key in required_keys if key not in settings] - if missing_keys: - raise ValueError(f"The following required keys are missing from the settings dictionary: {missing_keys}") + if "delta_t_dist" not in settings: + raise ValueError("The following required keys are missing from the settings dictionary: delta_t_dist") # check that param if evenly spaced is false and key delta t dist is not empirical, then param are provided - if settings["delta_t_dist"] != "empirical": + if "delta_t_dist" in settings and settings["delta_t_dist"] != "empirical": required_keys = ['param'] missing_keys = [key for key in required_keys if key not in settings] if missing_keys: - raise ValueError(f"The following required keys are missing from the settings dictionary: {missing_keys}") + raise ValueError(f"The following required keys are missing from the settings dictionary: param") # estimate theta with MLE diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index f56a11e6..b0a50644 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -95,13 +95,11 @@ def test_surrogates_1(): y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) - - #pyleo.utils.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) ts = pyleo.Series(time = t_sim, value= y_sim) - # surr = ts.surrogates(method = 'uar1') - surr = ts.surrogates(method = 'ar1sim', number = 10) - #surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) + surr = ts.surrogates(method = 'uar1', number = 2, settings={"evenly_spaced" :True}) + #surr = ts.surrogates(method = 'ar1sim', number = 10) + # surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) assert(1==1) From 14e2d7a831aa0521fe768cdc122f33278a621d74 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Wed, 3 Apr 2024 09:55:41 -0700 Subject: [PATCH 10/24] stub out surrogate time_pattern --- pyleoclim/core/series.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index a92609f6..1720c2af 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3641,7 +3641,8 @@ def causality(self, target_series, method='liang', timespan=None, settings=None, causal_res = spec_func[method](value1, value2, **args[method]) return causal_res - def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings=None): + def surrogates(self, method='ar1sim', number=1, time_pattern='match', + length=None, seed=None, settings=None): ''' Generate surrogates of the Series object according to "method" For now, assumes uniform spacing and increasing time axis @@ -3649,7 +3650,7 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings Parameters ---------- - method : {ar1sim, phaseran} + method : {ar1sim, phaseran, uar1} The method used to generate surrogates of the timeseries Note that phaseran assumes an odd number of samples. If the series @@ -3657,7 +3658,13 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings number : int The number of surrogates to generate - + + time_pattern : str {match, even, uneven} + The pattern used to generate the surrogate time axes + 'match' uses the same pattern as the original Series + 'even' uses an evenly-spaced time with spacing delta_t specified in settings (will return error if not specified) + 'uneven' uses random_time_increments() with specified distribution and parameters (default: 'exponential' with parameter 1) + length : int Length of the series @@ -3675,7 +3682,9 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings -------- pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator + pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator pyleoclim.utils.tsutils.phaseran2 : phase randomization + pyleoclim.utils.tsutils.random_time_increments : ''' settings = {} if settings is None else settings.copy() @@ -3712,7 +3721,7 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings required_keys = ['param'] missing_keys = [key for key in required_keys if key not in settings] if missing_keys: - raise ValueError(f"The following required keys are missing from the settings dictionary: param") + raise ValueError("The following required keys are missing from the settings dictionary: param") # estimate theta with MLE From c1f65ab45883ef66b159fccd06d0725f490f7bde Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Wed, 3 Apr 2024 09:59:28 -0700 Subject: [PATCH 11/24] renamed time_increments to random_time_increments --- pyleoclim/utils/tsmodel.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index 11085f07..ee57bc7e 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -28,7 +28,7 @@ 'gen_ts', 'tau_estimation', 'parametric_surrogates', - 'time_increments' + 'random_time_increments' ] @@ -725,7 +725,7 @@ def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, pyleoclim.utils.tsmodel.ar1_fit_ml : Maximumum likelihood estimate of AR(1) parameters - pyleoclim.utils.tsmodel.time_increments : Generate time increment vector according to a specific probability model + pyleoclim.utils.tsmodel.random_time_increments : Generate time increment vector according to a specific probability model """ @@ -738,7 +738,7 @@ def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, if evenly_spaced: delta_t = [1]*n # for now we assume delta_t = 1 if evenly sampled, potentially to improve with a parameter that specify the time spacing else: - delta_t = time_increments(n, param, delta_t_dist = delta_t_dist, seed = seed+j) + delta_t = random_time_increments(n, param, delta_t_dist = delta_t_dist, seed = seed+j) # obtain the 0-based time index from the delta_t distribution t = np.cumsum(delta_t)-1 @@ -763,7 +763,7 @@ def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, def inverse_cumsum(arr): return np.diff(np.concatenate(([0], arr))) -def time_increments(n, param, delta_t_dist = "exponential", seed = 12345): +def random_time_increments(n, param, delta_t_dist = "exponential", seed = 12345): ''' Generate a time increment vector according to a specific probability model From 756fb97b38516c76b4eaa37ab3ffe701aaeb5d27 Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Wed, 3 Apr 2024 21:28:26 +0200 Subject: [PATCH 12/24] minor --- pyleoclim/core/series.py | 4 ++-- pyleoclim/tests/test_utils_tsmodel.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index a92609f6..644e9671 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3712,14 +3712,14 @@ def surrogates(self, method='ar1sim', number=1, length=None, seed=None, settings required_keys = ['param'] missing_keys = [key for key in required_keys if key not in settings] if missing_keys: - raise ValueError(f"The following required keys are missing from the settings dictionary: param") + raise ValueError("The following required keys are missing from the settings dictionary: param") # estimate theta with MLE theta_hat = tsmodel.uar1_fit(self.value, self.time) # the method `empirical` needs the time value provided as parameters - if settings['delta_t_dist'] == "empirical": + if "delta_t_dist" in settings and settings['delta_t_dist'] == "empirical": y_surr, times = tsmodel.uar1_sim(n = len(self.value), tau_0=theta_hat[0], sigma_2_0=theta_hat[1], seed=seed, p=number, delta_t_dist = "empirical", param = [[self.time]]) diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index b0a50644..62cc5abf 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -11,6 +11,9 @@ from pyleoclim.utils import tsmodel import pyleoclim as pyleo from pyleoclim.utils.tsmodel import ar1_sim +from pyleoclim.utils.tsmodel import uar1_sim + + @pytest.mark.parametrize('model', ["exponential", "poisson"]) @@ -97,8 +100,12 @@ def test_surrogates_1(): #pyleo.utils.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) ts = pyleo.Series(time = t_sim, value= y_sim) + surr = ts.surrogates(method = 'uar1', number = 2, settings={"evenly_spaced" :True}) - #surr = ts.surrogates(method = 'ar1sim', number = 10) + surr = ts.surrogates(method = 'ar1sim', number = 10) + surr.series_list[0].time + surr.series_list[0].value + surr # surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) assert(1==1) From c2329cfcdfd0f2367ffb0830208347718f15a35f Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Thu, 4 Apr 2024 17:33:59 +0200 Subject: [PATCH 13/24] modified uar1_sim to accept time index as an argument, change fct random_time_increment to random_time_index,modified tests --- pyleoclim/tests/test_utils_tsmodel.py | 90 +++++++++++++-------------- pyleoclim/utils/tsmodel.py | 82 +++++++++++------------- 2 files changed, 79 insertions(+), 93 deletions(-) diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 62cc5abf..4281e25c 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -10,86 +10,84 @@ import numpy as np from pyleoclim.utils import tsmodel import pyleoclim as pyleo -from pyleoclim.utils.tsmodel import ar1_sim -from pyleoclim.utils.tsmodel import uar1_sim - @pytest.mark.parametrize('model', ["exponential", "poisson"]) -def test_time_increments_0(model): +def test_time_index_0(model): ''' Generate time increments with 1-parameter models ''' - delta_t = tsmodel.time_increments(n=20, param=[1], delta_t_dist = model) + delta_t = tsmodel.random_time_index(n=20, param=[1], delta_t_dist = model) assert all(np.cumsum(delta_t)>0) -def test_time_increments_1(): +def test_time_index_1(): ''' Generate time increments with Pareto ''' - delta_t = tsmodel.time_increments(n=20, param=[4.2,2.5], delta_t_dist = "pareto") + delta_t = tsmodel.random_time_index(n=20, param=[4.2,2.5], delta_t_dist = "pareto") assert all(np.cumsum(delta_t)>0) -def test_time_increments_2(): +def test_time_index_2(): ''' Generate time increments with random choice ''' - delta_t = tsmodel.time_increments(n=20, delta_t_dist = "random_choice", + delta_t = tsmodel.random_time_index(n=20, delta_t_dist = "random_choice", param=[[1,2],[.95,.05]] ) assert all(np.cumsum(delta_t)>0) - - - -@pytest.mark.parametrize('evenly_spaced', [True, False]) -def test_uar1_fit(evenly_spaced): +@pytest.mark.parametrize(('p', 'evenly_spaced'), [(1, True), (50, True), (1, False), (50, False)]) +def test_uar1_fit(p, evenly_spaced): ''' - Tests whether this method works well on an AR(1) process with known parameters + Tests whether this method works well on an AR(1) process with known parameters and evenly spaced time points ''' # define tolerance tol = .4 tau = 2 sigma_2 = 1 + n = 500 - # create p=50 time series - if evenly_spaced==True: - y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, - evenly_spaced=evenly_spaced, p = 10) - else: - y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, - evenly_spaced=evenly_spaced, p = 10, delta_t_dist = "exponential", - param=[1]) - + # create an array of evenly spaced points + if p==1: + if evenly_spaced: + t_arr = np.arange(1, 500) + else: + t_arr= tsmodel.random_time_index(n = n, param=[1]) + y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = tau, sigma_2_0=sigma_2) + theta_hat = tsmodel.uar1_fit(y_sim, t_sim) + assert np.abs(theta_hat[0]-tau) < tol + assert np.abs(theta_hat[1]-sigma_2) < tol + elif p>1: + if evenly_spaced: + t_arr = np.tile(range(1,n), (p, 1)).T + else: + t_arr = np.zeros((n, p)) # Initialize matrix to store time increments + for i in range(p): + # Generate random time increment + t_arr[:, i] = tsmodel.random_time_index(n=n, param=[1]) + y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = tau, sigma_2_0=sigma_2) + # Create an empty matrix to store estimated parameters + theta_hat_matrix = np.empty((y_sim.shape[1], 2)) + # estimate parameters for each time series + for j in range(y_sim.shape[1]): + theta_hat_matrix[j,:] = tsmodel.uar1_fit(y_sim[:, j], t_sim[:, j]) + # compute mean of estimated param for each simulate ts + theta_hat_bar = np.mean(theta_hat_matrix, axis=0) + + # test that + assert np.abs(theta_hat_bar[0]-tau) < tol + assert np.abs(theta_hat_bar[1]-sigma_2) < tol - # Create an empty matrix to store estimated parameters - theta_hat_matrix = np.empty((y_sim.shape[1], 2)) - - # estimate parameters for each time series - for j in range(y_sim.shape[1]): - theta_hat_matrix[j,:] = tsmodel.uar1_fit(y_sim[:, j], t_sim[:, j]) - - # compute mean of estimated param for each simulate ts - theta_hat_bar = np.mean(theta_hat_matrix, axis=0) - - # test that - assert np.abs(theta_hat_bar[0]-tau) < tol - assert np.abs(theta_hat_bar[1]-sigma_2) < tol - - - -def test_time_increments_3(): - ''' - Generate time increments with provided time vector - ''' - delta_t = tsmodel.time_increments(n=20, delta_t_dist = "empirical", param=[range(1,21)] ) - assert all(delta_t==1) + + + + def test_surrogates_1(): tau = 2 diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index ee57bc7e..8ecc6f36 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -28,7 +28,7 @@ 'gen_ts', 'tau_estimation', 'parametric_surrogates', - 'random_time_increments' + 'random_time_index' ] @@ -668,6 +668,7 @@ def uar1_fit(y, t): None. ''' + # obtain initial value for tau_0 tau_initial_value = tau_estimation(y= y, t = t) # obtain initial value for sifma_2_0 @@ -679,38 +680,21 @@ def uar1_fit(y, t): return theta_hat -def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, - delta_t_dist = "exponential", param = 1): +def uar1_sim(t_arr, tau_0=5, sigma_2_0=2): """ Generate a time series of length n from an autoregressive process of order 1 with evenly/unevenly spaced time points. Parameters ---------- - n : integer - The length of the time series + t: array + The vector of time indices of the signal, if multiple time series to be generated, an array of n*p where n is the length of the signal and p the number of surrogate to generate. tau_0 : float Time decay parameter of the AR(1) model ($\phi = e^{-\tau}$) sigma_2_0 : float - Variance of the innovations - - seed : integer - Random seed for reproducible results. - - p : integer - Parameter specifying the number of time series to generate - - evenly_spaced : boolean - if True, delta_t (spacing between time points) is a vector of 1, - if False, delta_t is generated from various distribution (exponential, pareto, poisson and random choice). - - delta_t_dist : str - the distribution that generates the delta_t - possible choices include 'exponential', 'poisson', 'pareto', or 'random_choice' - - param : distributional parameter(s) + Variance of the innovations Returns ------- @@ -725,29 +709,35 @@ def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, pyleoclim.utils.tsmodel.ar1_fit_ml : Maximumum likelihood estimate of AR(1) parameters - pyleoclim.utils.tsmodel.random_time_increments : Generate time increment vector according to a specific probability model + pyleoclim.utils.tsmodel.random_time_index : Generate time increment vector according to a specific probability model """ + # get value n and p from the object t + if t_arr.ndim == 1 or t_arr.shape[1] == 1: + p= 1 + n= t_arr.shape[0] + elif t_arr.shape[1]!= 1: + n = t_arr.shape[0] + p = t_arr.shape[1] + # declare two array to save the values and the time index y_sim = np.empty(shape=(n, p)) t_sim = np.empty(shape=(n, p)) # generate p time series for j in np.arange(p): - if evenly_spaced: - delta_t = [1]*n # for now we assume delta_t = 1 if evenly sampled, potentially to improve with a parameter that specify the time spacing - else: - delta_t = random_time_increments(n, param, delta_t_dist = delta_t_dist, seed = seed+j) - # obtain the 0-based time index from the delta_t distribution - t = np.cumsum(delta_t)-1 + # obtain the vector t from the provided t_arr + if p==1: + t = t_arr + elif p>1: + t = t_arr[:,j] # create empty vector y = np.empty(n) # generate unevenly spaced AR(1) - np.random.seed(seed+j) z = np.random.normal(loc=0, scale=1, size=n) y[0] = z[0] for i in range(1,n): @@ -758,26 +748,28 @@ def uar1_sim(n, tau_0=5, sigma_2_0=2, seed=123, p=1, evenly_spaced = False, y[i] = phi_i * y[i-1] + sigma_i * z[i] t_sim[:, j] = t y_sim[:, j] = y + + # reshape if p == 1 + if p==1: + y_sim = y_sim.reshape(y_sim.shape[0]) + t_sim = t_sim.reshape(t_sim.shape[0]) return y_sim, t_sim def inverse_cumsum(arr): return np.diff(np.concatenate(([0], arr))) -def random_time_increments(n, param, delta_t_dist = "exponential", seed = 12345): +def random_time_index(n, delta_t_dist = "exponential", param = 1): ''' - Generate a time increment vector according to a specific probability model + Generate a random time index vector according to a specific probability model Parameters ---------- n: integer The length of the time series - seed: integer - Random seed for reproducible results. - delta_t_dist: str - the probability distribution of delta_t - possible choices include 'exponential', 'poisson', 'pareto', 'random_choice' or `empirical` + the probability distribution of the random time increments. + possible choices include 'exponential', 'poisson', 'pareto', or 'random_choice'. if 'exponential', `param` is expected to be a single scale parameter (traditionally denoted \lambda) if 'poisson', `param` is expected to be a single parameter (rate) @@ -788,20 +780,20 @@ def random_time_increments(n, param, delta_t_dist = "exponential", seed = 12345) prob_random_choice: probabilities associated with each entry value_random_choice (e.g. [.95,.05]) (These two arrays must be of the same size) - if 'empirical', expect an array containing the time index of the original time series + Returns: ------- - delta_t : 1D array of time increments, length n + t : 1D array of random time index obtained by taking the cumulative sum of the sampled random time increments, length n ''' # check for a valid distribution - valid_distributions = ["exponential", "poisson", "pareto", "random_choice", "empirical"] + valid_distributions = ["exponential", "poisson", "pareto", "random_choice"] if delta_t_dist not in valid_distributions: - raise ValueError("delta_t_dist must be one of: 'exponential', 'poisson', 'pareto', 'random_choice' or 'empirical'.") + raise ValueError("delta_t_dist must be one of: 'exponential', 'poisson', 'pareto', 'random_choice'.") - np.random.seed(seed) + param = np.array(param) # coerce array type if delta_t_dist == "exponential": @@ -823,11 +815,7 @@ def random_time_increments(n, param, delta_t_dist = "exponential", seed = 12345) if len(param)<2 or len(param[0]) != len(param[1]): raise ValueError("value_random_choice and prob_random_choice must have the same size.") delta_t = np.random.choice(param[0], size=n, p=param[1]) - elif delta_t_dist == "empirical": - if len(param[0])!= n: - raise ValueError("Number of time index provided not equal to the n value provided") - delta_t = inverse_cumsum(param[0]) - return delta_t + return np.cumsum(delta_t) From cdecd0999108c1819020ceafa3cf1a8842316477 Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Fri, 5 Apr 2024 11:58:28 +0200 Subject: [PATCH 14/24] develop time_pattern match in surrogates --- pyleoclim/core/series.py | 42 +++++++++++---------------- pyleoclim/core/surrogateseries.py | 6 ++-- pyleoclim/tests/test_utils_tsmodel.py | 36 ++++++++++------------- 3 files changed, 37 insertions(+), 47 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 28298e57..84aebc39 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3709,36 +3709,25 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', raise ValueError("Phase-randomization presently requires evenly-spaced series.") elif method == 'uar1': - # Check if 'evenly_spaced' in the settings dictionary - if "evenly_spaced" not in settings: - raise ValueError("The following required key is missing from the settings dictionary: evenly_spaced") - # check now that if evenly spaced is set to False, the key delta_t_dist is provided - if settings["evenly_spaced"] == False: - if "delta_t_dist" not in settings: - raise ValueError("The following required keys are missing from the settings dictionary: delta_t_dist") - # check that param if evenly spaced is false and key delta t dist is not empirical, then param are provided - if "delta_t_dist" in settings and settings["delta_t_dist"] != "empirical": - required_keys = ['param'] - missing_keys = [key for key in required_keys if key not in settings] - if missing_keys: - raise ValueError("The following required keys are missing from the settings dictionary: param") + if time_pattern == "match": + # estimate theta with MLE + theta_hat = tsmodel.uar1_fit(self.value, self.time) + # create time index + if number >1: + t_arr = np.tile(self.time, (number, 1)).T + y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + else: + y_surr, times = tsmodel.uar1_sim(t_arr = self.time, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + + - # estimate theta with MLE - theta_hat = tsmodel.uar1_fit(self.value, self.time) - # the method `empirical` needs the time value provided as parameters - if "delta_t_dist" in settings and settings['delta_t_dist'] == "empirical": - y_surr, times = tsmodel.uar1_sim(n = len(self.value), tau_0=theta_hat[0], - sigma_2_0=theta_hat[1], - seed=seed, p=number, delta_t_dist = "empirical", param = [[self.time]]) - # for other delta_t_dist, then the parameters are provided - else: - y_surr, times = tsmodel.uar1_sim(n = len(self.value), tau_0=theta_hat[0], - sigma_2_0=theta_hat[1], - seed=seed, p=number, **settings) + + + # 3 choices: 1) emulate self.time (use resolution()) TODO # 2) generate unevenly at random # 3) generate evenly spaced @@ -3761,6 +3750,9 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', if len(np.shape(y_surr)) == 1: y_surr = y_surr[:, np.newaxis] + + if len(np.shape(times)) == 1: + times = times[:, np.newaxis] s_list = [] for i, s in enumerate(y_surr.T): diff --git a/pyleoclim/core/surrogateseries.py b/pyleoclim/core/surrogateseries.py index 20102ecb..1fe4c32c 100644 --- a/pyleoclim/core/surrogateseries.py +++ b/pyleoclim/core/surrogateseries.py @@ -6,7 +6,7 @@ from ..core.multipleseries import MultipleSeries -supported_surrogates = frozenset(['ar1sim','phaseran']) # broadcast all supported surrogates as global variable, for exception handling +supported_surrogates = frozenset(['ar1sim','phaseran', 'uar1']) # broadcast all supported surrogates as global variable, for exception handling class SurrogateSeries(MultipleSeries): ''' Object containing surrogate timeseries, usually obtained through recursive modeling (e.g., AR(1)) @@ -24,8 +24,10 @@ def __init__(self, series_list, label, surrogate_method=None, surrogate_args=Non self.label = str(label or "series") + " surrogates [AR(1)]" elif surrogate_method == 'phaseran': self.label = str(label or "series") + " surrogates [phase-randomized]" + elif surrogate_method == 'uar1': + self.label = str(label or "series") + " surrogates [uar1]" else: - raise ValueError('Surrogate method should either be "ar1sim" or "phaseran"') + raise ValueError('Surrogate method should either be "ar1sim", "phaseran" or "uar1"') diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 4281e25c..fa3b673a 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -82,29 +82,25 @@ def test_uar1_fit(p, evenly_spaced): assert np.abs(theta_hat_bar[0]-tau) < tol assert np.abs(theta_hat_bar[1]-sigma_2) < tol - - - - - - - -def test_surrogates_1(): + +@pytest.mark.parametrize('p', [1, 50]) +def test_surrogates_uar1_match(p): tau = 2 sigma_2 = 1 # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) - y_sim, t_sim = tsmodel.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) - # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + n = 500 + # generate time index + t_arr = np.arange(1,(n+1)) + # create time series + y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) + ts = pyleo.Series(time = t_sim, value=y_sim) + # generate surrogates + surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="match") + if p ==1: + assert(all(surr.series_list[0].time == t_sim)) + if p> 1: + for i in range(p): + assert(all(surr.series_list[i].time == t_sim)) - #pyleo.utils.uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) - ts = pyleo.Series(time = t_sim, value= y_sim) - - surr = ts.surrogates(method = 'uar1', number = 2, settings={"evenly_spaced" :True}) - surr = ts.surrogates(method = 'ar1sim', number = 10) - surr.series_list[0].time - surr.series_list[0].value - surr - # surr = ts.surrogates(method = 'uar1', settings={'delta_t_dist' :"empirical", 'evenly_spaced':False}) - assert(1==1) From ae2049b31f110f1aa95e924c105995e9c894072e Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Fri, 5 Apr 2024 13:46:10 +0200 Subject: [PATCH 15/24] develop time_pattern even in surrogates --- pyleoclim/core/series.py | 21 +++++++++++++++++++-- pyleoclim/tests/test_utils_tsmodel.py | 21 +++++++++++++++++++++ pyleoclim/utils/tsmodel.py | 3 ++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 84aebc39..b6a47cf4 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3721,8 +3721,25 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', y_surr, times = tsmodel.uar1_sim(t_arr = self.time, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - - + if time_pattern == "even": + if "time_increment" not in settings: + print("The parameter 'time_increment' is not present in the dictionary, default value is set to '1'.") + time_increment = 1 + else: + time_increment = settings["time_increment"] + theta_hat = tsmodel.uar1_fit(self.value, self.time) + if length is None: + delta_t = [time_increment]*len(self.value) + else: + delta_t = [time_increment]*length + t = np.cumsum(delta_t) + if number >1: + t_arr = np.tile(t , (number, 1)).T + y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + else: + y_surr, times = tsmodel.uar1_sim(t_arr = t, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + + diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index fa3b673a..17c1cbed 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -104,3 +104,24 @@ def test_surrogates_uar1_match(p): +@pytest.mark.parametrize('p', [1, 50]) +def test_surrogates_uar1_even(p): + tau = 2 + sigma_2 = 1 + time_incr = 2 + # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + n = 500 + # generate time index + t_arr = np.arange(1,(n+1)) + # create time series + y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) + ts = pyleo.Series(time = t_sim, value=y_sim) + # generate surrogates + surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr}) + if p ==1: + assert(all(tsmodel.inverse_cumsum(surr.series_list[0].time) == time_incr)) + if p> 1: + for i in range(p): + assert(all(tsmodel.inverse_cumsum(surr.series_list[i].time) == time_incr)) + + diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index 8ecc6f36..025c5913 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -28,7 +28,8 @@ 'gen_ts', 'tau_estimation', 'parametric_surrogates', - 'random_time_index' + 'random_time_index', + 'inverse_cumsum' ] From 56372aa0e317af2f84bf4643013e5a7967093b3e Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Fri, 5 Apr 2024 14:26:35 +0200 Subject: [PATCH 16/24] implement time_pattern uneven in surrogate when method is uar1 --- pyleoclim/core/series.py | 19 +++++++++++++++++++ pyleoclim/tests/test_utils_tsmodel.py | 18 ++++++++++++++++++ pyleoclim/utils/tsmodel.py | 2 +- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index b6a47cf4..91d4ae26 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3740,6 +3740,25 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', y_surr, times = tsmodel.uar1_sim(t_arr = t, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + if time_pattern == "uneven": + if length is None: + n = len(self.value) + else: + n = length + # estimate paremeter on original time serie + theta_hat = tsmodel.uar1_fit(self.value, self.time) + if number == 1: + t_arr = tsmodel.random_time_index(n = n, **settings) + y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + elif number > 1: + # create time array + t_arr = np.zeros((n, number)) + # fill time array using random time index + for col in range(number): + t_arr[:, col] = tsmodel.random_time_index(n = n, **settings) + # create surrogates + y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 17c1cbed..e80631b5 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -125,3 +125,21 @@ def test_surrogates_uar1_even(p): assert(all(tsmodel.inverse_cumsum(surr.series_list[i].time) == time_incr)) +@pytest.mark.parametrize('p', [1, 50]) +def test_surrogates_uar1_uneven(p): + tau = 2 + sigma_2 = 1 + n = 500 + # generate time index + t_arr = np.arange(1,(n+1)) + # create time series + y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) + ts = pyleo.Series(time = t_sim, value=y_sim) + # generate surrogates + surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven") + #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) + if p ==1: + assert(len(surr.series_list[0].time)==n) + if p> 1: + for i in range(p): + assert(len(surr.series_list[i].time)==n) \ No newline at end of file diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index 025c5913..abd3746f 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -759,7 +759,7 @@ def uar1_sim(t_arr, tau_0=5, sigma_2_0=2): def inverse_cumsum(arr): return np.diff(np.concatenate(([0], arr))) -def random_time_index(n, delta_t_dist = "exponential", param = 1): +def random_time_index(n, delta_t_dist = "exponential", param = [1]): ''' Generate a random time index vector according to a specific probability model From a6ee33dab540760183a28580d0e3055cd8c62b0e Mon Sep 17 00:00:00 2001 From: Lionel Voirol Date: Sat, 6 Apr 2024 01:03:11 +0200 Subject: [PATCH 17/24] better test for surrogate with uar1 uneven sampling --- pyleoclim/tests/test_utils_tsmodel.py | 45 ++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index e80631b5..96521cac 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -6,11 +6,12 @@ @author: julieneg """ + import pytest import numpy as np from pyleoclim.utils import tsmodel import pyleoclim as pyleo - +from scipy.stats import expon @pytest.mark.parametrize('model', ["exponential", "poisson"]) @@ -127,6 +128,7 @@ def test_surrogates_uar1_even(p): @pytest.mark.parametrize('p', [1, 50]) def test_surrogates_uar1_uneven(p): + tol = 0.5 tau = 2 sigma_2 = 1 n = 500 @@ -135,11 +137,46 @@ def test_surrogates_uar1_uneven(p): # create time series y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) ts = pyleo.Series(time = t_sim, value=y_sim) - # generate surrogates + # generate surrogates default is exponential with parameter value 1 surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven") #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) if p ==1: - assert(len(surr.series_list[0].time)==n) + delta_t = tsmodel.inverse_cumsum(surr.series_list[0].time) + # Compute the empirical cumulative distribution function (CDF) of the generated data + empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) + empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) + + # Compute the theoretical CDF of the Exponential distribution + theoretical_cdf = expon.cdf(bins[1:], scale=1) + + # Trim theoretical_cdf to match the size of empirical_cdf + theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] + + # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs + l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) + + assert(l2_norm 1: for i in range(p): - assert(len(surr.series_list[i].time)==n) \ No newline at end of file + delta_t = tsmodel.inverse_cumsum(surr.series_list[i].time) + # Compute the empirical cumulative distribution function (CDF) of the generated data + empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) + empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) + + # Compute the theoretical CDF of the Exponential distribution + theoretical_cdf = expon.cdf(bins[1:], scale=1) + + # Trim theoretical_cdf to match the size of empirical_cdf + theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] + + # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs + l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) + + assert(l2_norm Date: Mon, 8 Apr 2024 11:03:07 +0200 Subject: [PATCH 18/24] minor --- pyleoclim/core/series.py | 30 +++++++-------------------- pyleoclim/tests/test_utils_tsmodel.py | 8 +------ 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 91d4ae26..e98c24df 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3710,6 +3710,8 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', elif method == 'uar1': + + # time pattern match if time_pattern == "match": # estimate theta with MLE theta_hat = tsmodel.uar1_fit(self.value, self.time) @@ -3720,13 +3722,14 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', else: y_surr, times = tsmodel.uar1_sim(t_arr = self.time, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - + # time pattern even if time_pattern == "even": if "time_increment" not in settings: print("The parameter 'time_increment' is not present in the dictionary, default value is set to '1'.") time_increment = 1 else: time_increment = settings["time_increment"] + # estimate parameters theta_hat = tsmodel.uar1_fit(self.value, self.time) if length is None: delta_t = [time_increment]*len(self.value) @@ -3759,31 +3762,14 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', # create surrogates y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - - - - - - # 3 choices: 1) emulate self.time (use resolution()) TODO - # 2) generate unevenly at random - # 3) generate evenly spaced - # grab the parameters from the settings dictionary (do exception handling on dictionary keys) - - # ys, ts = tsmodel.uar1_sim(len(self.value), tau_0=theta_hat[0], - # sigma_2_0=theta_hat[1], - # seed=seed, p=number, evenly_spaced = False, - # delta_t_dist = "exponential", param = 1) - - #y_surr, times = tsmodel.uar1_sim(len(self.value), tau_0=theta_hat[0], - # sigma_2_0=theta_hat[1], - # seed=seed, p=number, **settings) - - # TODO : implement Lionel's ML method + # TODO : implement Lionel's ML method # elif method == 'power-law': # # TODO : implement Stochastic # elif method == 'fBm': # # TODO : implement Stochastic - + + + # reshape if len(np.shape(y_surr)) == 1: y_surr = y_surr[:, np.newaxis] diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 96521cac..9efbc8f7 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -12,7 +12,7 @@ from pyleoclim.utils import tsmodel import pyleoclim as pyleo from scipy.stats import expon - +from scipy.stats import poisson @pytest.mark.parametrize('model', ["exponential", "poisson"]) def test_time_index_0(model): @@ -174,9 +174,3 @@ def test_surrogates_uar1_uneven(p): assert(l2_norm Date: Mon, 8 Apr 2024 17:59:52 -0700 Subject: [PATCH 19/24] Redesigned uar1_sim made it univariate, since trying to return a matrix was surprisingly buggy. --- pyleoclim/core/series.py | 118 ++++++++++---------------- pyleoclim/tests/test_utils_tsmodel.py | 80 ++++++++--------- pyleoclim/utils/tsmodel.py | 104 ++++++++--------------- 3 files changed, 122 insertions(+), 180 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index e98c24df..ec9cc83f 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3659,11 +3659,11 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', number : int The number of surrogates to generate - time_pattern : str {match, even, uneven} + time_pattern : str {match, even, random} The pattern used to generate the surrogate time axes 'match' uses the same pattern as the original Series 'even' uses an evenly-spaced time with spacing delta_t specified in settings (will return error if not specified) - 'uneven' uses random_time_increments() with specified distribution and parameters (default: 'exponential' with parameter 1) + 'random' uses random_time_increments() with specified distribution and parameters (default: 'exponential' with parameter 1) length : int Length of the series @@ -3688,96 +3688,70 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', ''' settings = {} if settings is None else settings.copy() - # args = {} - # args['ar1sim'] = {'t': self.time} - # args['phaseran'] = {} - - # args[method].update(settings) if seed is not None: np.random.seed(seed) - - if method == 'ar1sim': - y_surr = tsmodel.ar1_sim(self.value, number, self.time) - times = np.tile(self.time, (number, 1)).T + if number < 2: + raise ValueError('Too few surrogates. At least 2 are needed for testing; many more for accurate uncertiany quantification') + if length is None: + n = len(self.value) + else: + n = length + + # generate time axes according to provided pattern + if time_pattern == "match": + times = np.tile(self.time, (number, 1)).T + elif time_pattern == "even": + if "time_increment" not in settings: + warnings.warn("'time_increment' not found in the dictionary, default set to 1.",stacklevel=2) + time_increment = 1 + else: + time_increment = settings["time_increment"] + + t = np.cumsum([time_increment]*n) + times = np.tile(t, (number, 1)).T + elif time_pattern == "random": + times = np.zeros((n, number)) + for i in range(number): + times[:, i] = tsmodel.random_time_index(n = n, **settings) + else: + raise ValueError(f"Unknown time pattern: {time_pattern}") + + # apply method + if method == 'ar1sim': + y_surr = tsmodel.ar1_sim(self.value, number, self.time) # CHECK: how does this handle the new time? + elif method == 'phaseran': - if self.is_evenly_spaced(): + if self.is_evenly_spaced() and time_pattern != "random": y_surr = tsutils.phaseran2(self.value, number) - times = np.tile(self.time, (number, 1)).T else: raise ValueError("Phase-randomization presently requires evenly-spaced series.") elif method == 'uar1': - - - # time pattern match - if time_pattern == "match": - # estimate theta with MLE - theta_hat = tsmodel.uar1_fit(self.value, self.time) - # create time index - if number >1: - t_arr = np.tile(self.time, (number, 1)).T - y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - else: - y_surr, times = tsmodel.uar1_sim(t_arr = self.time, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) + # estimate theta with MLE + theta_hat = tsmodel.uar1_fit(self.value, self.time) + y_surr = np.empty_like(times) + for j in range(number): + y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], **theta_hat) + - # time pattern even - if time_pattern == "even": - if "time_increment" not in settings: - print("The parameter 'time_increment' is not present in the dictionary, default value is set to '1'.") - time_increment = 1 - else: - time_increment = settings["time_increment"] - # estimate parameters - theta_hat = tsmodel.uar1_fit(self.value, self.time) - if length is None: - delta_t = [time_increment]*len(self.value) - else: - delta_t = [time_increment]*length - t = np.cumsum(delta_t) - if number >1: - t_arr = np.tile(t , (number, 1)).T - y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - else: - y_surr, times = tsmodel.uar1_sim(t_arr = t, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - - - if time_pattern == "uneven": - if length is None: - n = len(self.value) - else: - n = length - # estimate paremeter on original time serie - theta_hat = tsmodel.uar1_fit(self.value, self.time) - if number == 1: - t_arr = tsmodel.random_time_index(n = n, **settings) - y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - elif number > 1: - # create time array - t_arr = np.zeros((n, number)) - # fill time array using random time index - for col in range(number): - t_arr[:, col] = tsmodel.random_time_index(n = n, **settings) - # create surrogates - y_surr, times = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = theta_hat[0], sigma_2_0=theta_hat[1]) - - # TODO : implement Lionel's ML method # elif method == 'power-law': # # TODO : implement Stochastic # elif method == 'fBm': # # TODO : implement Stochastic - # reshape - if len(np.shape(y_surr)) == 1: - y_surr = y_surr[:, np.newaxis] + # THIS SHOULD BE UNNECESSARY + # # reshape + # if len(np.shape(y_surr)) == 1: + # y_surr = y_surr[:, np.newaxis] - if len(np.shape(times)) == 1: - times = times[:, np.newaxis] + # if len(np.shape(times)) == 1: + # times = times[:, np.newaxis] s_list = [] - for i, s in enumerate(y_surr.T): + for i, (t, y) in enumerate(zip(times.T,y_surr.T)): s_tmp = Series(time=times[:,i], value=s, # will need reformation after uar1 pull time_name=self.time_name, time_unit=self.time_unit, diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 9efbc8f7..7e65549c 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -12,7 +12,7 @@ from pyleoclim.utils import tsmodel import pyleoclim as pyleo from scipy.stats import expon -from scipy.stats import poisson +#from scipy.stats import poisson @pytest.mark.parametrize('model', ["exponential", "poisson"]) def test_time_index_0(model): @@ -40,7 +40,7 @@ def test_time_index_2(): -@pytest.mark.parametrize(('p', 'evenly_spaced'), [(1, True), (50, True), (1, False), (50, False)]) +@pytest.mark.parametrize(('p', 'evenly_spaced'), [(1, True), (10, True), (1, False), (10, False)]) def test_uar1_fit(p, evenly_spaced): ''' Tests whether this method works well on an AR(1) process with known parameters and evenly spaced time points @@ -53,48 +53,49 @@ def test_uar1_fit(p, evenly_spaced): n = 500 # create an array of evenly spaced points - if p==1: - if evenly_spaced: - t_arr = np.arange(1, 500) - else: - t_arr= tsmodel.random_time_index(n = n, param=[1]) - y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = tau, sigma_2_0=sigma_2) - theta_hat = tsmodel.uar1_fit(y_sim, t_sim) - assert np.abs(theta_hat[0]-tau) < tol - assert np.abs(theta_hat[1]-sigma_2) < tol - elif p>1: - if evenly_spaced: - t_arr = np.tile(range(1,n), (p, 1)).T - else: - t_arr = np.zeros((n, p)) # Initialize matrix to store time increments - for i in range(p): - # Generate random time increment - t_arr[:, i] = tsmodel.random_time_index(n=n, param=[1]) - y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0 = tau, sigma_2_0=sigma_2) - # Create an empty matrix to store estimated parameters - theta_hat_matrix = np.empty((y_sim.shape[1], 2)) - # estimate parameters for each time series - for j in range(y_sim.shape[1]): - theta_hat_matrix[j,:] = tsmodel.uar1_fit(y_sim[:, j], t_sim[:, j]) - # compute mean of estimated param for each simulate ts - theta_hat_bar = np.mean(theta_hat_matrix, axis=0) - - # test that - assert np.abs(theta_hat_bar[0]-tau) < tol - assert np.abs(theta_hat_bar[1]-sigma_2) < tol + # if p==1: + # if evenly_spaced: + # t = np.arange(n) + # else: + # t = tsmodel.random_time_index(n = n, param=[1]) + # ys = tsmodel.uar1_sim(t, tau_0 = tau, sigma_2_0=sigma_2) + # theta_hat = tsmodel.uar1_fit(ys, t) + # assert np.abs(theta_hat[0]-tau) < tol + # assert np.abs(theta_hat[1]-sigma_2) < tol + # elif p>1: + if evenly_spaced: + t_arr = np.tile(range(1,n), (p, 1)).T + else: + t_arr = np.zeros((n, p)) # Initialize matrix to store time increments + for j in range(p): + # Generate random time increment + t_arr[:, j] = tsmodel.random_time_index(n=n, param=[1]) + + # Create an empty matrix to store estimated parameters + theta_hat_matrix = np.empty((p, 2)) + # estimate parameters for each time series + for j in range(p): + ys = tsmodel.uar1_sim(t = t_arr[:, j], tau_0 = tau, sigma_2_0=sigma_2) + theta_hat_matrix[j,:] = tsmodel.uar1_fit(ys, t_arr[:, j]) + # compute mean of estimated param for each simulate ts + theta_hat_bar = np.mean(theta_hat_matrix, axis=0) + + # test that + assert np.abs(theta_hat_bar[0]-tau) < tol + assert np.abs(theta_hat_bar[1]-sigma_2) < tol @pytest.mark.parametrize('p', [1, 50]) def test_surrogates_uar1_match(p): tau = 2 sigma_2 = 1 - # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) + # ys, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) n = 500 # generate time index t_arr = np.arange(1,(n+1)) # create time series - y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t_sim, value=y_sim) + ys, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) + ts = pyleo.Series(time = t_sim, value=ys) # generate surrogates surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="match") if p ==1: @@ -110,13 +111,12 @@ def test_surrogates_uar1_even(p): tau = 2 sigma_2 = 1 time_incr = 2 - # y_sim, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) n = 500 # generate time index - t_arr = np.arange(1,(n+1)) + t = np.arange(1,(n+1)) # create time series - y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t_sim, value=y_sim) + ys = tsmodel.uar1_sim(t, tau_0=tau, sigma_2_0=sigma_2) + ts = pyleo.Series(time = t, value=ys) # generate surrogates surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr}) if p ==1: @@ -135,8 +135,8 @@ def test_surrogates_uar1_uneven(p): # generate time index t_arr = np.arange(1,(n+1)) # create time series - y_sim, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t_sim, value=y_sim) + ys, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) + ts = pyleo.Series(time = t_sim, value=ys) # generate surrogates default is exponential with parameter value 1 surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven") #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) diff --git a/pyleoclim/utils/tsmodel.py b/pyleoclim/utils/tsmodel.py index abd3746f..d003483d 100644 --- a/pyleoclim/utils/tsmodel.py +++ b/pyleoclim/utils/tsmodel.py @@ -681,80 +681,48 @@ def uar1_fit(y, t): return theta_hat -def uar1_sim(t_arr, tau_0=5, sigma_2_0=2): +def uar1_sim(t, tau_0=5, sigma_2_0=2): - """ - Generate a time series of length n from an autoregressive process of order 1 with evenly/unevenly spaced time points. - - Parameters - ---------- - t: array - The vector of time indices of the signal, if multiple time series to be generated, an array of n*p where n is the length of the signal and p the number of surrogate to generate. - - tau_0 : float - Time decay parameter of the AR(1) model ($\phi = e^{-\tau}$) - - sigma_2_0 : float - Variance of the innovations - - Returns - ------- - A tuple of 2 arrays - y_sim : n x p NumPy array - matrix of simulated AR(1) vectors - t_sim : n x p NumPy array - matrix of corresponding time axes + """ + Generate a time series of length n from an autoregressive process of order 1 with evenly/unevenly spaced time points. + + Parameters + ---------- + t : array + Time axis - See also - -------- - - pyleoclim.utils.tsmodel.ar1_fit_ml : Maximumum likelihood estimate of AR(1) parameters + tau_0 : float + Time decay parameter of the AR(1) model ($\phi = e^{-\tau}$) + + sigma_2_0 : float + Variance of the innovations - pyleoclim.utils.tsmodel.random_time_index : Generate time increment vector according to a specific probability model + Returns + ------- + ys : n + matrix of simulated AR(1) vector - """ + + See also + -------- - # get value n and p from the object t - if t_arr.ndim == 1 or t_arr.shape[1] == 1: - p= 1 - n= t_arr.shape[0] - elif t_arr.shape[1]!= 1: - n = t_arr.shape[0] - p = t_arr.shape[1] - - # declare two array to save the values and the time index - y_sim = np.empty(shape=(n, p)) - t_sim = np.empty(shape=(n, p)) - - # generate p time series - for j in np.arange(p): - - # obtain the vector t from the provided t_arr - if p==1: - t = t_arr - elif p>1: - t = t_arr[:,j] + pyleoclim.utils.tsmodel.uar1_fit : Maximumum likelihood estimate of AR(1) parameters + pyleoclim.utils.tsmodel.random_time_index : Generate time increment vector according to a specific probability model - # create empty vector - y = np.empty(n) - - # generate unevenly spaced AR(1) - z = np.random.normal(loc=0, scale=1, size=n) - y[0] = z[0] - for i in range(1,n): - delta_i = t[i] - t[i-1] - phi_i = np.exp(-delta_i / tau_0) - sigma_2_i = sigma_2_0 * (1-pow(phi_i, 2)) - sigma_i = np.sqrt(sigma_2_i) - y[i] = phi_i * y[i-1] + sigma_i * z[i] - t_sim[:, j] = t - y_sim[:, j] = y - - # reshape if p == 1 - if p==1: - y_sim = y_sim.reshape(y_sim.shape[0]) - t_sim = t_sim.reshape(t_sim.shape[0]) - return y_sim, t_sim + """ + n = len(t) + ys = np.zeros(n) # create empty vector + # generate unevenly spaced AR(1) + z = np.random.normal(loc=0, scale=1, size=n) + ys[0] = z[0] + for i in range(n-1): + delta_i = t[i+1] - t[i] + phi_i = np.exp(-delta_i / tau_0) + sigma_2_i = sigma_2_0 * (1-pow(phi_i, 2)) + sigma_i = np.sqrt(sigma_2_i) + ys[i+1] = phi_i * ys[i] + sigma_i * z[i] + + return ys def inverse_cumsum(arr): return np.diff(np.concatenate(([0], arr))) From 13438977f6e6470dc2cb7c3ad5998bfe7c377472 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Mon, 8 Apr 2024 18:35:57 -0700 Subject: [PATCH 20/24] Most tests moved to core.Series and fixed --- pyleoclim/core/series.py | 23 +++--- pyleoclim/tests/test_core_Series.py | 68 +++++++++++++---- pyleoclim/tests/test_utils_tsmodel.py | 102 -------------------------- 3 files changed, 67 insertions(+), 126 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index ec9cc83f..6afaee2d 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3715,14 +3715,17 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', times = np.zeros((n, number)) for i in range(number): times[:, i] = tsmodel.random_time_index(n = n, **settings) - else: - raise ValueError(f"Unknown time pattern: {time_pattern}") + else: + raise ValueError(f"Unknown time pattern: {time_pattern}") - # apply method + # apply surrogate method if method == 'ar1sim': - y_surr = tsmodel.ar1_sim(self.value, number, self.time) # CHECK: how does this handle the new time? + if time_pattern != 'match': + raise ValueError('Only a matching time pattern is supported with this method') + else: + y_surr = tsmodel.ar1_sim(self.value, number, self.time) # CHECK: how does this handle the new time? - elif method == 'phaseran': + elif method == 'phaseran': if self.is_evenly_spaced() and time_pattern != "random": y_surr = tsutils.phaseran2(self.value, number) else: @@ -3731,11 +3734,12 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', elif method == 'uar1': # estimate theta with MLE theta_hat = tsmodel.uar1_fit(self.value, self.time) + # generate surrogates y_surr = np.empty_like(times) for j in range(number): - y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], **theta_hat) + y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], + tau_0=theta_hat[0],sigma_2_0=theta_hat[1]) - # elif method == 'power-law': # # TODO : implement Stochastic # elif method == 'fBm': @@ -3750,16 +3754,17 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', # if len(np.shape(times)) == 1: # times = times[:, np.newaxis] + # wrap it all up with a bow s_list = [] for i, (t, y) in enumerate(zip(times.T,y_surr.T)): - s_tmp = Series(time=times[:,i], value=s, # will need reformation after uar1 pull + ts = Series(time=t, value=y, # will need reformation after uar1 pull time_name=self.time_name, time_unit=self.time_unit, value_name=self.value_name, value_unit=self.value_unit, label = str(self.label or '') + " surr #" + str(i+1), verbose=False, auto_time_params=True) - s_list.append(s_tmp) + s_list.append(ts) surr = SurrogateSeries(series_list=s_list, label = self.label, diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index 7b5a374c..dcc080ca 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -28,10 +28,12 @@ #from urllib.request import urlopen import pyleoclim as pyleo -from pyleoclim.utils.tsmodel import ar1_fit +import pyleoclim.utils.tsmodel as tsmodel import pyleoclim.utils.tsbase as tsbase from statsmodels.tsa.arima_process import arma_generate_sample +from scipy.stats import expon + # a collection of useful functions @@ -538,7 +540,7 @@ def test_invalid(self): class TestUISeriesSurrogates: ''' Test Series.surrogates() ''' - def test_surrogates_t0(self, eps=0.2): + def test_surrogates_t0(self, p=2, eps=0.2): ''' Generate AR(1) surrogates based on a AR(1) series with certain parameters, and then evaluate and assert the parameters of the surrogates are correct ''' @@ -549,10 +551,57 @@ def test_surrogates_t0(self, eps=0.2): ar1 = arma_generate_sample(ar, ma, nsample=n, scale=1) ts = pyleo.Series(time=np.arange(1000), value=ar1) - ts_surrs = ts.surrogates(number=1) + ts_surrs = ts.surrogates(number=p) for ts_surr in ts_surrs.series_list: - g_surr = ar1_fit(ts_surr.value) + g_surr = tsmodel.ar1_fit(ts_surr.value) assert np.abs(g_surr-g) < eps + + def test_surrogates_uar1_match(self, p=5): + ts = gen_ts(nt=550, alpha=1.0) + # generate surrogates + surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="match") + for i in range(p): + assert(np.allclose(surr.series_list[i].time, ts.time)) + + def test_surrogates_uar1_even(self, p=5): + ts = gen_ts(nt=550, alpha=1.0) + time_incr = np.median(np.diff(ts.time)) + # generate surrogates + surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr}) + for i in range(p): + assert(np.allclose(tsmodel.inverse_cumsum(surr.series_list[i].time),time_incr)) + + def test_surrogates_uar1_random(self, p=5): + tol = 0.5 + tau = 2 + sigma_2 = 1 + n = 500 + # generate time index + t = np.arange(1,(n+1)) + # create time series + ys = tsmodel.uar1_sim(t, tau_0=tau, sigma_2_0=sigma_2) + ts = pyleo.Series(time = t, value=ys, auto_time_params=True,verbose=False) + # generate surrogates default is exponential with parameter value 1 + surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="random") + #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) + + for i in range(p): + delta_t = tsmodel.inverse_cumsum(surr.series_list[i].time) + # Compute the empirical cumulative distribution function (CDF) of the generated data + empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) + empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) + + # Compute the theoretical CDF of the Exponential distribution + theoretical_cdf = expon.cdf(bins[1:], scale=1) + + # Trim theoretical_cdf to match the size of empirical_cdf + theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] + + # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs + l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) + + assert(l2_norm= 0) assert np.square(miss_ssa.RCseries.value - soi.value).mean() < 0.3 - - - # def test_ssa_t5(self): - # '''Test Series.ssa() on Allen&Smith dataset - # ''' - # df = pd.read_csv('https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/Development/example_data/mratest.txt',delim_whitespace=True,names=['Total','Signal','Noise']) - # mra = pyleo.Series(time=df.index, value=df['Total'], value_name='Allen&Smith test data', time_name='Time', time_unit='yr') - # mraSsa = mra.ssa(nMC=10) - # fig, ax = mraSsa.screeplot() - # pyleo.closefig(fig) - class TestUISeriesPlot: '''Test for Series.plot() diff --git a/pyleoclim/tests/test_utils_tsmodel.py b/pyleoclim/tests/test_utils_tsmodel.py index 7e65549c..68eb64b1 100644 --- a/pyleoclim/tests/test_utils_tsmodel.py +++ b/pyleoclim/tests/test_utils_tsmodel.py @@ -10,8 +10,6 @@ import pytest import numpy as np from pyleoclim.utils import tsmodel -import pyleoclim as pyleo -from scipy.stats import expon #from scipy.stats import poisson @pytest.mark.parametrize('model', ["exponential", "poisson"]) @@ -52,17 +50,6 @@ def test_uar1_fit(p, evenly_spaced): sigma_2 = 1 n = 500 - # create an array of evenly spaced points - # if p==1: - # if evenly_spaced: - # t = np.arange(n) - # else: - # t = tsmodel.random_time_index(n = n, param=[1]) - # ys = tsmodel.uar1_sim(t, tau_0 = tau, sigma_2_0=sigma_2) - # theta_hat = tsmodel.uar1_fit(ys, t) - # assert np.abs(theta_hat[0]-tau) < tol - # assert np.abs(theta_hat[1]-sigma_2) < tol - # elif p>1: if evenly_spaced: t_arr = np.tile(range(1,n), (p, 1)).T else: @@ -84,93 +71,4 @@ def test_uar1_fit(p, evenly_spaced): assert np.abs(theta_hat_bar[0]-tau) < tol assert np.abs(theta_hat_bar[1]-sigma_2) < tol - -@pytest.mark.parametrize('p', [1, 50]) -def test_surrogates_uar1_match(p): - tau = 2 - sigma_2 = 1 - # ys, t_sim = uar1_sim(n=200, tau_0=tau, sigma_2_0=sigma_2, evenly_spaced=True, p = 1) - n = 500 - # generate time index - t_arr = np.arange(1,(n+1)) - # create time series - ys, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t_sim, value=ys) - # generate surrogates - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="match") - if p ==1: - assert(all(surr.series_list[0].time == t_sim)) - if p> 1: - for i in range(p): - assert(all(surr.series_list[i].time == t_sim)) - - - -@pytest.mark.parametrize('p', [1, 50]) -def test_surrogates_uar1_even(p): - tau = 2 - sigma_2 = 1 - time_incr = 2 - n = 500 - # generate time index - t = np.arange(1,(n+1)) - # create time series - ys = tsmodel.uar1_sim(t, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t, value=ys) - # generate surrogates - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr}) - if p ==1: - assert(all(tsmodel.inverse_cumsum(surr.series_list[0].time) == time_incr)) - if p> 1: - for i in range(p): - assert(all(tsmodel.inverse_cumsum(surr.series_list[i].time) == time_incr)) - - -@pytest.mark.parametrize('p', [1, 50]) -def test_surrogates_uar1_uneven(p): - tol = 0.5 - tau = 2 - sigma_2 = 1 - n = 500 - # generate time index - t_arr = np.arange(1,(n+1)) - # create time series - ys, t_sim = tsmodel.uar1_sim(t_arr = t_arr, tau_0=tau, sigma_2_0=sigma_2) - ts = pyleo.Series(time = t_sim, value=ys) - # generate surrogates default is exponential with parameter value 1 - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven") - #surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} ) - if p ==1: - delta_t = tsmodel.inverse_cumsum(surr.series_list[0].time) - # Compute the empirical cumulative distribution function (CDF) of the generated data - empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) - empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) - - # Compute the theoretical CDF of the Exponential distribution - theoretical_cdf = expon.cdf(bins[1:], scale=1) - - # Trim theoretical_cdf to match the size of empirical_cdf - theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] - - # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs - l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) - - assert(l2_norm 1: - for i in range(p): - delta_t = tsmodel.inverse_cumsum(surr.series_list[i].time) - # Compute the empirical cumulative distribution function (CDF) of the generated data - empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) - empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) - - # Compute the theoretical CDF of the Exponential distribution - theoretical_cdf = expon.cdf(bins[1:], scale=1) - - # Trim theoretical_cdf to match the size of empirical_cdf - theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] - - # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs - l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) - assert(l2_norm Date: Mon, 8 Apr 2024 21:06:27 -0700 Subject: [PATCH 21/24] minor changes to Series.correlation() (cf Pyleo dvlpt call on 04/05/2024) --- pyleoclim/core/series.py | 173 +++++++++++++++++---------------- pyleoclim/utils/correlation.py | 2 +- 2 files changed, 88 insertions(+), 87 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index 6afaee2d..e99c090b 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -105,14 +105,14 @@ class Series: source of the dataset. If it came from a LiPD file, this could be the datasetID property archiveType : string - climate archive, one of 'Borehole', 'Coral', 'FluvialSediment', 'GlacierIce', 'GroundIce', 'LakeSediment', 'MarineSediment', 'Midden', 'MolluskShell', 'Peat', 'Sclerosponge', 'Shoreline', 'Speleothem', 'TerrestrialSediment', 'Wood' + climate archive, one of 'Borehole', 'Coral', 'FluvialSediment', 'GlacierIce', 'GroundIce', 'LakeSediment', 'MarineSediment', 'Midden', 'MolluskShell', 'Peat', 'Sclerosponge', 'Shoreline', 'Speleothem', 'TerrestrialSediment', 'Wood' Reference: https://lipdverse.org/vocabulary/archivetype/ - + control_archiveType : [True, False] - Whether to standardize the name of the archiveType agains the vocabulary from: https://lipdverse.org/vocabulary/paleodata_proxy/. - If set to True, will only allow for these terms and automatically convert known synonyms to the standardized name. Only standardized variable names will be automatically assigned a color scheme. - Default is False. - + Whether to standardize the name of the archiveType agains the vocabulary from: https://lipdverse.org/vocabulary/paleodata_proxy/. + If set to True, will only allow for these terms and automatically convert known synonyms to the standardized name. Only standardized variable names will be automatically assigned a color scheme. + Default is False. + dropna : bool Whether to drop NaNs from the series to prevent downstream functions from choking on them defaults to True @@ -128,10 +128,10 @@ class Series: set to True to remove the NaNs and make time axis strictly prograde with duplicated timestamps reduced by averaging the values Default is None (marked for deprecation) - auto_time_params : bool, - If True, uses tsbase.disambiguate_time_metadata to ensure that time_name and time_unit are usable by Pyleoclim. This may override the provided metadata. + auto_time_params : bool, + If True, uses tsbase.disambiguate_time_metadata to ensure that time_name and time_unit are usable by Pyleoclim. This may override the provided metadata. If False, the provided time_name and time_unit are used. This may break some functionalities (e.g. common_time and convert_time_unit), so use at your own risk. - If not provided, code will set to True for internal consistency. + If not provided, code will set to True for internal consistency. Examples -------- @@ -265,16 +265,16 @@ def __init__(self, time, value, time_unit=None, time_name=None, self.control_archiveType = control_archiveType if archiveType is not None: #Deal with archiveType - + #Get the possible list of archiveTyp - - + + if control_archiveType == True: - + res = lipdutils.get_archive_type() std_var = list(res.keys()) std_var_lower = [key.lower() for key in res.keys()] - + data = [] for key, values in res.items(): if values: # Check if the list is not empty @@ -288,10 +288,10 @@ def __init__(self, time, value, time_unit=None, time_name=None, synonym = df[df['Value'].isin([archiveType.lower().replace(" ", "")])]['Key'] if synonym.empty == True: synonym = None - else: + else: synonym = synonym.to_string(header=False, index=False) - - + + if archiveType.lower().replace(" ", "") in std_var_lower: index = std_var_lower.index(archiveType.lower().replace(" ", "")) self.archiveType = std_var[index] @@ -561,7 +561,7 @@ def from_json(cls, path): b = jsonutils.iterate_through_dict(a, 'Series') return cls(**b) - + def pandas_method(self, method): ser = self.to_pandas() result = method(ser) @@ -3239,19 +3239,19 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, As with wavelet analysis, both CWT and WWZ admit optional arguments through `settings`. For instance, one can adjust the resolution of the time axis on which coherence is evaluated: - + .. jupyter-execute:: coh_wwz = ts_air.wavelet_coherence(ts_nino, method = 'wwz', settings = {'ntau':20}) coh_wwz.plot() - + The frequency (scale) axis can also be customized, e.g. to focus on scales from 1 to 20y, with 24 scales: - + .. jupyter-execute:: - + coh = ts_air.wavelet_coherence(ts_nino, freq_kwargs={'fmin':1/20,'fmax':1,'nf':24}) coh.plot() - + Significance is assessed similarly to PSD or Scalogram objects: .. jupyter-execute:: @@ -3270,8 +3270,8 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, cwt_sig.dashboard() Note: this design balances many considerations, and is not easily customizable. - - + + ''' if not verbose: warnings.simplefilter('ignore') @@ -3290,7 +3290,7 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, args = {} args['wwz'] = {'freq': freq, 'verbose': verbose} args['cwt'] = {'freq': freq} - + # put on same time axes if necessary if method == 'cwt' and not np.array_equal(self.time, target_series.time): @@ -3326,7 +3326,7 @@ def wavelet_coherence(self, target_series, method='cwt', settings=None, tau = np.linspace(lb, ub, ntau) settings.update({'tau': tau}) - + args[method].update(settings) # Apply WTC method @@ -3358,8 +3358,11 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = The significance of the correlation is assessed using one of the following methods: 1) 'ttest': T-test adjusted for effective sample size. - 2) 'isopersistent': AR(1) modeling of x and y. - 3) 'isospectral': phase randomization of original inputs. (default) + 2) 'ar1sim': AR(1) modeling of x and y. + 3) 'phaseran': phase randomization of original inputs. (default) + 4) 'built-in': uses built-in method + + Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral" The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances. The others are non-parametric, but their computational requirements scale with the number of simulations. @@ -3371,27 +3374,25 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = target_series : Series A pyleoclim Series object - + alpha : float The significance level (default: 0.05) - - statistic : str + + statistic : str statistic being evaluated. Can use any of the SciPy-supported ones: https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'] - Default: 'pearsonr'. - + Default: 'pearsonr'. + method : str, {'ttest','built-in','ar1sim','phaseran'} method for significance testing. Default is 'phaseran' 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1] - 'built-in' uses the p-value that ships with the statistic. - The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning. - Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case. - + 'built-in' uses the p-value that ships with the SciPy function. + The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning. + Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case. + timespan : tuple The time interval over which to perform the calculation - - settings : dict Parameters for the correlation function, including: @@ -3408,7 +3409,7 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = seed : float or int random seed for isopersistent and isospectral methods - + mute_pbar : bool, optional Mute the progress bar. The default is False. @@ -3432,13 +3433,13 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = -------- pyleoclim.utils.correlation.corr_sig : Correlation function (marked for deprecation) - + pyleoclim.utils.correlation.association : SciPy measures of association between variables - + pyleoclim.series.surrogates : parametric and non-parametric surrogates of any Series object - + pyleoclim.multipleseries.common_time : Aligning time axes - + References ---------- [1] Hu, J., J. Emile-Geay, and J. Partin (2017), Correlation-based interpretations of paleoclimate data – where statistics meet past climates, Earth and Planetary Science Letters, 459, 362–371, doi:10.1016/j.epsl.2016.11.048. @@ -3458,7 +3459,7 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = # set an arbitrary random seed to fix the result corr_res = ts_nino.correlation(ts_air, settings={'nsim': 20}, seed=2333) print(corr_res) - + # changing the statistic corr_res = ts_nino.correlation(ts_air, statistic='kendalltau') print(corr_res) @@ -3482,7 +3483,7 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = warnings.warn("isopersistent is deprecated and was replaced by 'ar1sim'", DeprecationWarning, stacklevel=2) method = 'ar1sim' - + settings = {} if settings is None else settings.copy() corr_args = {'alpha': alpha, 'method': method} corr_args.update(settings) @@ -3514,19 +3515,19 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = res = corrutils.association(ts0.value,ts1.value,statistic) stat = res[0] pval = res.pvalue if len(res) > 1 else np.nan - signif = pval <= alpha - elif method in supported_surrogates: + signif = pval <= alpha + elif method in supported_surrogates: number = corr_args['nsim'] if 'nsim' in corr_args.keys() else 1000 seed = corr_args['seed'] if 'seed' in corr_args.keys() else None - method = corr_args['method'] if 'method' in corr_args.keys() else None + #method = corr_args['method'] if 'method' in corr_args.keys() else None surr_settings = corr_args['surr_settings'] if 'surr_settings' in corr_args.keys() else None - + # compute correlation statistic stat = corrutils.association(ts0.value,ts1.value,statistic)[0] - - ts0_surr = ts0.surrogates(number=number, seed=seed, + + ts0_surr = ts0.surrogates(number=number, seed=seed, method=method, settings=surr_settings) - ts1_surr = ts1.surrogates(number=number, seed=seed, + ts1_surr = ts1.surrogates(number=number, seed=seed, method=method, settings=surr_settings) stat_surr = np.empty((number)) for i in tqdm(range(number), desc='Evaluating association on surrogate pairs', total=number, disable=mute_pbar): @@ -3535,7 +3536,7 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method = # obtain p-value pval = np.sum(np.abs(stat_surr) >= np.abs(stat)) / number # establish significance - signif = pval <= alpha + signif = pval <= alpha else: raise ValueError(f'Unknown method: {method}. Look up documentation for a wiser choice.') @@ -3644,7 +3645,7 @@ def causality(self, target_series, method='liang', timespan=None, settings=None, def surrogates(self, method='ar1sim', number=1, time_pattern='match', length=None, seed=None, settings=None): ''' Generate surrogates of the Series object according to "method" - + For now, assumes uniform spacing and increasing time axis Parameters @@ -3652,19 +3653,19 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', method : {ar1sim, phaseran, uar1} The method used to generate surrogates of the timeseries - - Note that phaseran assumes an odd number of samples. If the series + + Note that phaseran assumes an odd number of samples. If the series has even length, the last point is dropped to satisfy this requirement number : int The number of surrogates to generate - + time_pattern : str {match, even, random} The pattern used to generate the surrogate time axes 'match' uses the same pattern as the original Series 'even' uses an evenly-spaced time with spacing delta_t specified in settings (will return error if not specified) - 'random' uses random_time_increments() with specified distribution and parameters (default: 'exponential' with parameter 1) - + 'random' uses random_time_index() with specified distribution and parameters (default: 'exponential' with parameter 1) + length : int Length of the series @@ -3684,21 +3685,21 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', pyleoclim.utils.tsmodel.ar1_sim : AR(1) simulator pyleoclim.utils.tsmodel.uar1_sim : maximum likelihood AR(1) simulator pyleoclim.utils.tsutils.phaseran2 : phase randomization - pyleoclim.utils.tsutils.random_time_increments : + pyleoclim.utils.tsutils.random_time_index : random time index vector according to a specific probability model ''' settings = {} if settings is None else settings.copy() if seed is not None: np.random.seed(seed) - + if number < 2: raise ValueError('Too few surrogates. At least 2 are needed for testing; many more for accurate uncertiany quantification') if length is None: n = len(self.value) else: n = length - + # generate time axes according to provided pattern if time_pattern == "match": times = np.tile(self.time, (number, 1)).T @@ -3708,49 +3709,49 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', time_increment = 1 else: time_increment = settings["time_increment"] - + t = np.cumsum([time_increment]*n) times = np.tile(t, (number, 1)).T elif time_pattern == "random": - times = np.zeros((n, number)) + times = np.zeros((n, number)) for i in range(number): times[:, i] = tsmodel.random_time_index(n = n, **settings) else: raise ValueError(f"Unknown time pattern: {time_pattern}") - - # apply surrogate method + + # apply surrogate method if method == 'ar1sim': if time_pattern != 'match': raise ValueError('Only a matching time pattern is supported with this method') else: - y_surr = tsmodel.ar1_sim(self.value, number, self.time) # CHECK: how does this handle the new time? - - elif method == 'phaseran': + y_surr = tsmodel.ar1_sim(self.value, number, self.time) # CHECK: how does this handle the new time? + + elif method == 'phaseran': if self.is_evenly_spaced() and time_pattern != "random": y_surr = tsutils.phaseran2(self.value, number) else: raise ValueError("Phase-randomization presently requires evenly-spaced series.") - + elif method == 'uar1': # estimate theta with MLE theta_hat = tsmodel.uar1_fit(self.value, self.time) # generate surrogates - y_surr = np.empty_like(times) + y_surr = np.empty_like(times) for j in range(number): - y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], - tau_0=theta_hat[0],sigma_2_0=theta_hat[1]) - + y_surr[:,j] = tsmodel.uar1_sim(t = times[:,j], + tau_0=theta_hat[0],sigma_2_0=theta_hat[1]) + # elif method == 'power-law': # # TODO : implement Stochastic # elif method == 'fBm': # # TODO : implement Stochastic - - + + # THIS SHOULD BE UNNECESSARY # # reshape # if len(np.shape(y_surr)) == 1: # y_surr = y_surr[:, np.newaxis] - + # if len(np.shape(times)) == 1: # times = times[:, np.newaxis] @@ -3758,17 +3759,17 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', s_list = [] for i, (t, y) in enumerate(zip(times.T,y_surr.T)): ts = Series(time=t, value=y, # will need reformation after uar1 pull - time_name=self.time_name, - time_unit=self.time_unit, - value_name=self.value_name, - value_unit=self.value_unit, + time_name=self.time_name, + time_unit=self.time_unit, + value_name=self.value_name, + value_unit=self.value_unit, label = str(self.label or '') + " surr #" + str(i+1), verbose=False, auto_time_params=True) s_list.append(ts) - surr = SurrogateSeries(series_list=s_list, + surr = SurrogateSeries(series_list=s_list, label = self.label, - surrogate_method=method, + surrogate_method=method, surrogate_args=settings) return surr @@ -4222,7 +4223,7 @@ def resolution(self): See Also -------- - + pyleoclim.core.resolutions.Resolution Examples diff --git a/pyleoclim/utils/correlation.py b/pyleoclim/utils/correlation.py index 1217d0af..cdf045f4 100644 --- a/pyleoclim/utils/correlation.py +++ b/pyleoclim/utils/correlation.py @@ -711,7 +711,7 @@ def association(y1, y2, statistic='pearsonr',settings=None): func = getattr(stats, statistic) res = func(y1,y2,**args) else: - raise ValueError(f'Wrong statistic: {statistic}; acceptble choices are {acceptable_methods}') + raise ValueError(f'Wrong statistic: {statistic}; acceptable choices are {acceptable_methods}') return res From 1857e16b591c17f77cdbc46488670e92a8b33840 Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Tue, 9 Apr 2024 09:52:11 -0700 Subject: [PATCH 22/24] minor reformatting --- pyleoclim/core/series.py | 6 ++---- pyleoclim/tests/test_core_Series.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py index e99c090b..3ddd3953 100644 --- a/pyleoclim/core/series.py +++ b/pyleoclim/core/series.py @@ -3693,8 +3693,6 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', if seed is not None: np.random.seed(seed) - if number < 2: - raise ValueError('Too few surrogates. At least 2 are needed for testing; many more for accurate uncertiany quantification') if length is None: n = len(self.value) else: @@ -3715,7 +3713,7 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', elif time_pattern == "random": times = np.zeros((n, number)) for i in range(number): - times[:, i] = tsmodel.random_time_index(n = n, **settings) + times[:, i] = tsmodel.random_time_index(n = n, **settings) # TODO: check that this does not break when unexpected keywords are passed in `settings` else: raise ValueError(f"Unknown time pattern: {time_pattern}") @@ -3758,7 +3756,7 @@ def surrogates(self, method='ar1sim', number=1, time_pattern='match', # wrap it all up with a bow s_list = [] for i, (t, y) in enumerate(zip(times.T,y_surr.T)): - ts = Series(time=t, value=y, # will need reformation after uar1 pull + ts = Series(time=t, value=y, time_name=self.time_name, time_unit=self.time_unit, value_name=self.value_name, diff --git a/pyleoclim/tests/test_core_Series.py b/pyleoclim/tests/test_core_Series.py index dcc080ca..a3c858b7 100644 --- a/pyleoclim/tests/test_core_Series.py +++ b/pyleoclim/tests/test_core_Series.py @@ -556,13 +556,14 @@ def test_surrogates_t0(self, p=2, eps=0.2): g_surr = tsmodel.ar1_fit(ts_surr.value) assert np.abs(g_surr-g) < eps - def test_surrogates_uar1_match(self, p=5): + @pytest.mark.parametrize('number',[1,5]) + def test_surrogates_uar1_match(self, number): ts = gen_ts(nt=550, alpha=1.0) # generate surrogates - surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="match") - for i in range(p): + surr = ts.surrogates(method = 'uar1', number = number, time_pattern ="match") + for i in range(number): assert(np.allclose(surr.series_list[i].time, ts.time)) - + def test_surrogates_uar1_even(self, p=5): ts = gen_ts(nt=550, alpha=1.0) time_incr = np.median(np.diff(ts.time)) @@ -571,8 +572,7 @@ def test_surrogates_uar1_even(self, p=5): for i in range(p): assert(np.allclose(tsmodel.inverse_cumsum(surr.series_list[i].time),time_incr)) - def test_surrogates_uar1_random(self, p=5): - tol = 0.5 + def test_surrogates_uar1_random(self, p=5, tol = 0.5): tau = 2 sigma_2 = 1 n = 500 @@ -590,16 +590,12 @@ def test_surrogates_uar1_random(self, p=5): # Compute the empirical cumulative distribution function (CDF) of the generated data empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True) empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins) - # Compute the theoretical CDF of the Exponential distribution theoretical_cdf = expon.cdf(bins[1:], scale=1) - # Trim theoretical_cdf to match the size of empirical_cdf theoretical_cdf = theoretical_cdf[:len(empirical_cdf)] - # Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf) - assert(l2_norm Date: Tue, 9 Apr 2024 17:01:28 -0700 Subject: [PATCH 23/24] enabled spectral analysis tests with uar1 --- pyleoclim/core/psds.py | 7 ++++--- pyleoclim/tests/test_core_PSD.py | 22 ++++++---------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/pyleoclim/core/psds.py b/pyleoclim/core/psds.py index e7bcafaf..273c8451 100644 --- a/pyleoclim/core/psds.py +++ b/pyleoclim/core/psds.py @@ -280,10 +280,10 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], if self.spec_method == 'lomb_scargle' and method == 'ar1asym': raise ValueError('Asymptotic solution is not supported for the Lomb-Scargle method') - if method not in ['ar1sim', 'ar1asym']: + if method not in ['ar1sim', 'uar1','ar1asym']: raise ValueError("The available methods are 'ar1sim' and 'ar1asym'") - if method in ['ar1sim', 'ar1old']: # fix later in Series.surrogates() + if method in ['ar1sim', 'uar1']: signif_scals = None if scalogram: try: @@ -760,7 +760,8 @@ def plot(self, in_loglog=True, in_period=True, label=None, xlabel=None, ylabel=' # plot significance levels if self.signif_qs is not None: signif_method_label = { - 'ar1sim': 'AR(1) simulations', + 'ar1sim': 'AR(1) simulations (MoM)', + 'uar1': 'AR(1) simulations (MLE)', 'ar1asym': 'AR(1) asymptotic solution' } nqs = np.size(self.signif_qs.psd_list) diff --git a/pyleoclim/tests/test_core_PSD.py b/pyleoclim/tests/test_core_PSD.py index eab313c4..8cc4cd64 100644 --- a/pyleoclim/tests/test_core_PSD.py +++ b/pyleoclim/tests/test_core_PSD.py @@ -12,7 +12,6 @@ 4. after `pip install pytest-xdist`, one may execute "pytest -n 4" to test in parallel with number of workers specified by `-n` 5. for more details, see https://docs.pytest.org/en/stable/usage.html ''' -import numpy as np import pytest import pyleoclim as pyleo @@ -20,17 +19,7 @@ def gen_ts(model='colored_noise',alpha=1, nt=100, f0=None, m=None, seed=None): 'wrapper for gen_ts in pyleoclim' t,v = pyleo.utils.gen_ts(model=model,alpha=alpha, nt=nt, f0=f0, m=m, seed=seed) - ts=pyleo.Series(t,v) - return ts - -# a collection of useful functions - -def gen_normal(loc=0, scale=1, nt=100): - ''' Generate random data with a Gaussian distribution - ''' - t = np.arange(nt) - v = np.random.normal(loc=loc, scale=scale, size=nt) - ts = pyleo.Series(t,v) + ts=pyleo.Series(t,v, auto_time_params=True,verbose=False) return ts @@ -50,12 +39,13 @@ def test_plot_t0(self): class TestUiPsdSignifTest: ''' Tests for PSD.signif_test() ''' - - def test_signif_test_t0(self): + @pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym']) + def test_signif_test_t0(self,method): ''' Test PSD.signif_test() with default parameters ''' ts = gen_ts(nt=500) psd = ts.spectral(method='mtm') - psd_signif = psd.signif_test(number=10) + psd_signif = psd.signif_test(number=10,method=method) fig, ax = psd_signif.plot() - pyleo.closefig(fig) \ No newline at end of file + pyleo.closefig(fig) + \ No newline at end of file From 494d6c72fcd338925e99544b46299502c7189a1d Mon Sep 17 00:00:00 2001 From: CommonClimate Date: Tue, 9 Apr 2024 17:12:30 -0700 Subject: [PATCH 24/24] enabled scalograms --- pyleoclim/core/psds.py | 2 +- pyleoclim/core/scalograms.py | 9 +++++---- pyleoclim/tests/test_core_Scalogram.py | 20 ++++++++++---------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pyleoclim/core/psds.py b/pyleoclim/core/psds.py index 273c8451..c572bedd 100644 --- a/pyleoclim/core/psds.py +++ b/pyleoclim/core/psds.py @@ -179,7 +179,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], Number of surrogate series to generate for significance testing. The default is None. - method : str; {'ar1asym','ar1sim'} + method : str; {'ar1asym','ar1sim','uar1'} Method to generate surrogates. AR1sim uses simulated timeseries with similar persistence. AR1asymp represents the closed form solution. The default is AR1sim diff --git a/pyleoclim/core/scalograms.py b/pyleoclim/core/scalograms.py index 08787d71..0661ad34 100644 --- a/pyleoclim/core/scalograms.py +++ b/pyleoclim/core/scalograms.py @@ -413,7 +413,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], Parameters ---------- - method : {'ar1asym', 'ar1sim'} + method : {'ar1asym', 'ar1sim','uar1'} Method to use to generate the surrogates. ar1sim uses simulated timeseries with similar persistence. ar1asym represents the theoretical, closed-form solution. The default is ar1sim @@ -485,10 +485,11 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95], if self.wave_method == 'wwz' and method == 'ar1asym': raise ValueError('Asymptotic solution is not supported for the wwz method') - if method not in ['ar1sim', 'ar1asym']: - raise ValueError("The available methods are ar1sim'and 'ar1asym'") + acceptable_methods = ['ar1sim', 'ar1asym', 'uar1'] + if method not in acceptable_methods: + raise ValueError(f"The available methods are: {acceptable_methods}") - if method == 'ar1sim': + if method in ['ar1sim','uar1'] : if hasattr(self,'signif_scals'): signif_scals = self.signif_scals diff --git a/pyleoclim/tests/test_core_Scalogram.py b/pyleoclim/tests/test_core_Scalogram.py index 041483ce..32eb3327 100644 --- a/pyleoclim/tests/test_core_Scalogram.py +++ b/pyleoclim/tests/test_core_Scalogram.py @@ -20,8 +20,7 @@ def gen_ts(model='colored_noise',alpha=1, nt=100, f0=None, m=None, seed=None): 'wrapper for gen_ts in pyleoclim' t,v = pyleo.utils.gen_ts(model=model,alpha=alpha, nt=nt, f0=f0, m=m, seed=seed) - ts=pyleo.Series(t,v) - + ts=pyleo.Series(t,v, auto_time_params=True,verbose=False) return ts # Tests below @@ -31,19 +30,20 @@ class TestUiScalogramSignifTest: @pytest.mark.parametrize('wave_method',['wwz','cwt']) def test_signif_test_t0(self, wave_method): - ''' Test scalogram.signif_test() with default parameters + ''' Test scalogram.signif_test() with differennt methods ''' - ts = gen_ts(model='colored_noise',nt=500) + ts = gen_ts(model='colored_noise') scal = ts.wavelet(method=wave_method) scal_signif = scal.signif_test(number=5, qs = [0.8, 0.9, .95]) fig,ax = scal_signif.plot(signif_thresh=0.99) pyleo.closefig(fig) - - @pytest.mark.parametrize('ar1_method',['ar1asym', 'ar1sim']) - def test_signif_test_t1(self,ar1_method): - ''' Test scalogram.signif_test() with default parameters + @pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym']) + def test_signif_test_t1(self,method): + ''' Test scalogram.signif_test() with various surrogates ''' - ts = gen_ts(model='colored_noise',nt=500) + ts = gen_ts(model='colored_noise') scal = ts.wavelet(method='cwt') - scal_signif = scal.signif_test(method=ar1_method,number=1) + scal_signif = scal.signif_test(method=method,number=2) + fig,ax = scal_signif.plot(signif_thresh=0.99) + pyleo.closefig(fig)