Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

uar1 surrogates #534

Merged
merged 25 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ac469a1
renamed Lionel's functions to uar1
CommonClimate Mar 27, 2024
4b1a19b
stub out uar1 surrogates
CommonClimate Mar 27, 2024
967b3c9
add option for the distribution of the delta_t in fct time_increments
lionelvoirol Apr 2, 2024
96de6ff
added delta_t_dist 'empirical' in fct time_increments, added support …
lionelvoirol Apr 3, 2024
d3feebe
minor
lionelvoirol Apr 3, 2024
f047dcd
minor
lionelvoirol Apr 3, 2024
c568a45
trying to understand how to load current pkg with new fcts
lionelvoirol Apr 3, 2024
177328f
more weird stuff
lionelvoirol Apr 3, 2024
a355ed2
better exception handling
lionelvoirol Apr 3, 2024
14e2d7a
stub out surrogate time_pattern
CommonClimate Apr 3, 2024
c1f65ab
renamed time_increments to random_time_increments
CommonClimate Apr 3, 2024
756fb97
minor
lionelvoirol Apr 3, 2024
f76489e
Merge branch 'uar1_surrogates' of github.com:LinkedEarth/Pyleoclim_ut…
lionelvoirol Apr 3, 2024
c2329cf
modified uar1_sim to accept time index as an argument, change fct ran…
lionelvoirol Apr 4, 2024
cdecd09
develop time_pattern match in surrogates
lionelvoirol Apr 5, 2024
ae2049b
develop time_pattern even in surrogates
lionelvoirol Apr 5, 2024
56372aa
implement time_pattern uneven in surrogate when method is uar1
lionelvoirol Apr 5, 2024
a6ee33d
better test for surrogate with uar1 uneven sampling
lionelvoirol Apr 5, 2024
5394ae7
minor
lionelvoirol Apr 8, 2024
d5244f9
Redesigned uar1_sim
CommonClimate Apr 9, 2024
1343897
Most tests moved to core.Series
CommonClimate Apr 9, 2024
5b7362e
minor changes to Series.correlation()
CommonClimate Apr 9, 2024
1857e16
minor reformatting
CommonClimate Apr 9, 2024
ec52d0d
enabled spectral analysis tests with uar1
CommonClimate Apr 10, 2024
494d6c7
enabled scalograms
CommonClimate Apr 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions pyleoclim/core/psds.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],

Number of surrogate series to generate for significance testing. The default is None.

method : str; {'ar1asym','ar1sim'}
method : str; {'ar1asym','ar1sim','uar1'}

Method to generate surrogates. AR1sim uses simulated timeseries with similar persistence. AR1asymp represents the closed form solution. The default is AR1sim

Expand Down Expand Up @@ -280,10 +280,10 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],
if self.spec_method == 'lomb_scargle' and method == 'ar1asym':
raise ValueError('Asymptotic solution is not supported for the Lomb-Scargle method')

if method not in ['ar1sim', 'ar1asym']:
if method not in ['ar1sim', 'uar1','ar1asym']:
raise ValueError("The available methods are 'ar1sim' and 'ar1asym'")

if method in ['ar1sim', 'ar1old']: # fix later in Series.surrogates()
if method in ['ar1sim', 'uar1']:
signif_scals = None
if scalogram:
try:
Expand Down Expand Up @@ -760,7 +760,8 @@ def plot(self, in_loglog=True, in_period=True, label=None, xlabel=None, ylabel='
# plot significance levels
if self.signif_qs is not None:
signif_method_label = {
'ar1sim': 'AR(1) simulations',
'ar1sim': 'AR(1) simulations (MoM)',
'uar1': 'AR(1) simulations (MLE)',
'ar1asym': 'AR(1) asymptotic solution'
}
nqs = np.size(self.signif_qs.psd_list)
Expand Down
9 changes: 5 additions & 4 deletions pyleoclim/core/scalograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],
Parameters
----------

method : {'ar1asym', 'ar1sim'}
method : {'ar1asym', 'ar1sim','uar1'}

Method to use to generate the surrogates. ar1sim uses simulated timeseries with similar persistence.
ar1asym represents the theoretical, closed-form solution. The default is ar1sim
Expand Down Expand Up @@ -485,10 +485,11 @@ def signif_test(self, method='ar1sim', number=None, seed=None, qs=[0.95],
if self.wave_method == 'wwz' and method == 'ar1asym':
raise ValueError('Asymptotic solution is not supported for the wwz method')

if method not in ['ar1sim', 'ar1asym']:
raise ValueError("The available methods are ar1sim'and 'ar1asym'")
acceptable_methods = ['ar1sim', 'ar1asym', 'uar1']
if method not in acceptable_methods:
raise ValueError(f"The available methods are: {acceptable_methods}")

if method == 'ar1sim':
if method in ['ar1sim','uar1'] :

if hasattr(self,'signif_scals'):
signif_scals = self.signif_scals
Expand Down
218 changes: 133 additions & 85 deletions pyleoclim/core/series.py

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions pyleoclim/core/surrogateseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from ..core.multipleseries import MultipleSeries

supported_surrogates = frozenset(['ar1sim','phaseran']) # broadcast all supported surrogates as global variable, for exception handling
supported_surrogates = frozenset(['ar1sim','phaseran', 'uar1']) # broadcast all supported surrogates as global variable, for exception handling

class SurrogateSeries(MultipleSeries):
''' Object containing surrogate timeseries, usually obtained through recursive modeling (e.g., AR(1))
Expand All @@ -24,8 +24,10 @@ def __init__(self, series_list, label, surrogate_method=None, surrogate_args=Non
self.label = str(label or "series") + " surrogates [AR(1)]"
elif surrogate_method == 'phaseran':
self.label = str(label or "series") + " surrogates [phase-randomized]"
elif surrogate_method == 'uar1':
self.label = str(label or "series") + " surrogates [uar1]"
else:
raise ValueError('Surrogate method should either be "ar1sim" or "phaseran"')
raise ValueError('Surrogate method should either be "ar1sim", "phaseran" or "uar1"')



Expand Down
22 changes: 6 additions & 16 deletions pyleoclim/tests/test_core_PSD.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,14 @@
4. after `pip install pytest-xdist`, one may execute "pytest -n 4" to test in parallel with number of workers specified by `-n`
5. for more details, see https://docs.pytest.org/en/stable/usage.html
'''
import numpy as np
import pytest
import pyleoclim as pyleo

def gen_ts(model='colored_noise',alpha=1, nt=100, f0=None, m=None, seed=None):
'wrapper for gen_ts in pyleoclim'

t,v = pyleo.utils.gen_ts(model=model,alpha=alpha, nt=nt, f0=f0, m=m, seed=seed)
ts=pyleo.Series(t,v)
return ts

# a collection of useful functions

def gen_normal(loc=0, scale=1, nt=100):
''' Generate random data with a Gaussian distribution
'''
t = np.arange(nt)
v = np.random.normal(loc=loc, scale=scale, size=nt)
ts = pyleo.Series(t,v)
ts=pyleo.Series(t,v, auto_time_params=True,verbose=False)
return ts


Expand All @@ -50,12 +39,13 @@ def test_plot_t0(self):
class TestUiPsdSignifTest:
''' Tests for PSD.signif_test()
'''

def test_signif_test_t0(self):
@pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym'])
def test_signif_test_t0(self,method):
''' Test PSD.signif_test() with default parameters
'''
ts = gen_ts(nt=500)
psd = ts.spectral(method='mtm')
psd_signif = psd.signif_test(number=10)
psd_signif = psd.signif_test(number=10,method=method)
fig, ax = psd_signif.plot()
pyleo.closefig(fig)
pyleo.closefig(fig)

20 changes: 10 additions & 10 deletions pyleoclim/tests/test_core_Scalogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ def gen_ts(model='colored_noise',alpha=1, nt=100, f0=None, m=None, seed=None):
'wrapper for gen_ts in pyleoclim'

t,v = pyleo.utils.gen_ts(model=model,alpha=alpha, nt=nt, f0=f0, m=m, seed=seed)
ts=pyleo.Series(t,v)

ts=pyleo.Series(t,v, auto_time_params=True,verbose=False)
return ts

# Tests below
Expand All @@ -31,19 +30,20 @@ class TestUiScalogramSignifTest:

@pytest.mark.parametrize('wave_method',['wwz','cwt'])
def test_signif_test_t0(self, wave_method):
''' Test scalogram.signif_test() with default parameters
''' Test scalogram.signif_test() with differennt methods
'''
ts = gen_ts(model='colored_noise',nt=500)
ts = gen_ts(model='colored_noise')
scal = ts.wavelet(method=wave_method)
scal_signif = scal.signif_test(number=5, qs = [0.8, 0.9, .95])
fig,ax = scal_signif.plot(signif_thresh=0.99)
pyleo.closefig(fig)


@pytest.mark.parametrize('ar1_method',['ar1asym', 'ar1sim'])
def test_signif_test_t1(self,ar1_method):
''' Test scalogram.signif_test() with default parameters
@pytest.mark.parametrize('method',['ar1sim','uar1','ar1asym'])
def test_signif_test_t1(self,method):
''' Test scalogram.signif_test() with various surrogates
'''
ts = gen_ts(model='colored_noise',nt=500)
ts = gen_ts(model='colored_noise')
scal = ts.wavelet(method='cwt')
scal_signif = scal.signif_test(method=ar1_method,number=1)
scal_signif = scal.signif_test(method=method,number=2)
fig,ax = scal_signif.plot(signif_thresh=0.99)
pyleo.closefig(fig)
64 changes: 49 additions & 15 deletions pyleoclim/tests/test_core_Series.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
#from urllib.request import urlopen

import pyleoclim as pyleo
from pyleoclim.utils.tsmodel import ar1_fit
import pyleoclim.utils.tsmodel as tsmodel
import pyleoclim.utils.tsbase as tsbase

from statsmodels.tsa.arima_process import arma_generate_sample
from scipy.stats import expon


# a collection of useful functions

Expand Down Expand Up @@ -538,7 +540,7 @@ def test_invalid(self):
class TestUISeriesSurrogates:
''' Test Series.surrogates()
'''
def test_surrogates_t0(self, eps=0.2):
def test_surrogates_t0(self, p=2, eps=0.2):
''' Generate AR(1) surrogates based on a AR(1) series with certain parameters,
and then evaluate and assert the parameters of the surrogates are correct
'''
Expand All @@ -549,10 +551,53 @@ def test_surrogates_t0(self, eps=0.2):
ar1 = arma_generate_sample(ar, ma, nsample=n, scale=1)
ts = pyleo.Series(time=np.arange(1000), value=ar1)

ts_surrs = ts.surrogates(number=1)
ts_surrs = ts.surrogates(number=p)
for ts_surr in ts_surrs.series_list:
g_surr = ar1_fit(ts_surr.value)
g_surr = tsmodel.ar1_fit(ts_surr.value)
assert np.abs(g_surr-g) < eps

@pytest.mark.parametrize('number',[1,5])
def test_surrogates_uar1_match(self, number):
ts = gen_ts(nt=550, alpha=1.0)
# generate surrogates
surr = ts.surrogates(method = 'uar1', number = number, time_pattern ="match")
for i in range(number):
assert(np.allclose(surr.series_list[i].time, ts.time))

def test_surrogates_uar1_even(self, p=5):
ts = gen_ts(nt=550, alpha=1.0)
time_incr = np.median(np.diff(ts.time))
# generate surrogates
surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="even", settings={"time_increment" :time_incr})
for i in range(p):
assert(np.allclose(tsmodel.inverse_cumsum(surr.series_list[i].time),time_incr))

def test_surrogates_uar1_random(self, p=5, tol = 0.5):
tau = 2
sigma_2 = 1
n = 500
# generate time index
t = np.arange(1,(n+1))
# create time series
ys = tsmodel.uar1_sim(t, tau_0=tau, sigma_2_0=sigma_2)
ts = pyleo.Series(time = t, value=ys, auto_time_params=True,verbose=False)
# generate surrogates default is exponential with parameter value 1
surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="random")
#surr = ts.surrogates(method = 'uar1', number = p, time_pattern ="uneven",settings={"delta_t_dist" :"poisson","param":[1]} )

for i in range(p):
delta_t = tsmodel.inverse_cumsum(surr.series_list[i].time)
# Compute the empirical cumulative distribution function (CDF) of the generated data
empirical_cdf, bins = np.histogram(delta_t, bins=100, density=True)
empirical_cdf = np.cumsum(empirical_cdf) * np.diff(bins)
# Compute the theoretical CDF of the Exponential distribution
theoretical_cdf = expon.cdf(bins[1:], scale=1)
# Trim theoretical_cdf to match the size of empirical_cdf
theoretical_cdf = theoretical_cdf[:len(empirical_cdf)]
# Compute the L2 norm (Euclidean distance) between empirical and theoretical CDFs
l2_norm = np.linalg.norm(empirical_cdf - theoretical_cdf)
assert(l2_norm<tol)


class TestUISeriesSummaryPlot:
''' Test Series.summary_plot()
Expand Down Expand Up @@ -1117,17 +1162,6 @@ def test_ssa_t4(self):
assert all(miss_ssa.eigvals >= 0)
assert np.square(miss_ssa.RCseries.value - soi.value).mean() < 0.3



# def test_ssa_t5(self):
# '''Test Series.ssa() on Allen&Smith dataset
# '''
# df = pd.read_csv('https://raw.githubusercontent.com/LinkedEarth/Pyleoclim_util/Development/example_data/mratest.txt',delim_whitespace=True,names=['Total','Signal','Noise'])
# mra = pyleo.Series(time=df.index, value=df['Total'], value_name='Allen&Smith test data', time_name='Time', time_unit='yr')
# mraSsa = mra.ssa(nMC=10)
# fig, ax = mraSsa.screeplot()
# pyleo.closefig(fig)

class TestUISeriesPlot:
'''Test for Series.plot()

Expand Down
51 changes: 28 additions & 23 deletions pyleoclim/tests/test_utils_tsmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,64 +6,69 @@
@author: julieneg
"""


import pytest
import numpy as np
from pyleoclim.utils import tsmodel
#from scipy.stats import poisson

@pytest.mark.parametrize('model', ["exponential", "poisson"])
def test_time_increments_0(model):
def test_time_index_0(model):
'''
Generate time increments with 1-parameter models
'''
delta_t = tsmodel.time_increments(n=20, param=1, delta_t_dist = model)
delta_t = tsmodel.random_time_index(n=20, param=[1], delta_t_dist = model)
assert all(np.cumsum(delta_t)>0)

def test_time_increments_1():
def test_time_index_1():
'''
Generate time increments with Pareto
'''
delta_t = tsmodel.time_increments(n=20, param=[4.2,2.5], delta_t_dist = "pareto")
delta_t = tsmodel.random_time_index(n=20, param=[4.2,2.5], delta_t_dist = "pareto")
assert all(np.cumsum(delta_t)>0)

def test_time_increments_2():
def test_time_index_2():
'''
Generate time increments with random choice
'''
delta_t = tsmodel.time_increments(n=20, delta_t_dist = "random_choice",
delta_t = tsmodel.random_time_index(n=20, delta_t_dist = "random_choice",
param=[[1,2],[.95,.05]] )
assert all(np.cumsum(delta_t)>0)




@pytest.mark.parametrize('evenly_spaced', [True, False])
def test_ar1fit_ml(evenly_spaced):
@pytest.mark.parametrize(('p', 'evenly_spaced'), [(1, True), (10, True), (1, False), (10, False)])
def test_uar1_fit(p, evenly_spaced):
'''
Tests whether this method works well on an AR(1) process with known parameters
Tests whether this method works well on an AR(1) process with known parameters and evenly spaced time points

'''
# define tolerance
tol = .4
tau = 2
sigma_2 = 1
n = 500

# create p=50 time series
y_sim, t_sim = tsmodel.ar1_sim_geneva(n=200, tau_0=tau, sigma_2_0=sigma_2,
evenly_spaced=evenly_spaced, p = 10)

if evenly_spaced:
t_arr = np.tile(range(1,n), (p, 1)).T
else:
t_arr = np.zeros((n, p)) # Initialize matrix to store time increments
for j in range(p):
# Generate random time increment
t_arr[:, j] = tsmodel.random_time_index(n=n, param=[1])

# Create an empty matrix to store estimated parameters
theta_hat_matrix = np.empty((y_sim.shape[1], 2))

theta_hat_matrix = np.empty((p, 2))
# estimate parameters for each time series
for j in range(y_sim.shape[1]):
theta_hat_matrix[j,:] = tsmodel.ar1_fit_ml(y_sim[:, j], t_sim[:, j])

for j in range(p):
ys = tsmodel.uar1_sim(t = t_arr[:, j], tau_0 = tau, sigma_2_0=sigma_2)
theta_hat_matrix[j,:] = tsmodel.uar1_fit(ys, t_arr[:, j])
# compute mean of estimated param for each simulate ts
theta_hat_bar = np.mean(theta_hat_matrix, axis=0)

# test that

assert np.abs(theta_hat_bar[0]-tau) < tol
assert np.abs(theta_hat_bar[1]-sigma_2) < tol





2 changes: 1 addition & 1 deletion pyleoclim/utils/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ def association(y1, y2, statistic='pearsonr',settings=None):
func = getattr(stats, statistic)
res = func(y1,y2,**args)
else:
raise ValueError(f'Wrong statistic: {statistic}; acceptble choices are {acceptable_methods}')
raise ValueError(f'Wrong statistic: {statistic}; acceptable choices are {acceptable_methods}')

return res

Expand Down
Loading
Loading