Skip to content

Commit

Permalink
Doc fixes (#617)
Browse files Browse the repository at this point in the history
* series.correlation()

also, replaced plot_legend keyword in EnsemlbleSeries.plot_traces() doc

* fixed ms.correlation() docstring and made unit tests
  • Loading branch information
CommonClimate authored Oct 10, 2024
1 parent dce9c80 commit a384df9
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 38 deletions.
2 changes: 1 addition & 1 deletion pyleoclim/core/ensembleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num
Matplotlib axis on which to return the plot. The default is None.
plot_legend : bool; {True,False}, optional
legend : bool; {True,False}, optional
Whether to plot the legend. The default is True.
Expand Down
45 changes: 37 additions & 8 deletions pyleoclim/core/multipleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,10 +730,27 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s

return ms

def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, method='phaseran', number=1000,
def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearsonr',
method='phaseran', number=1000, settings=None,
fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None):
''' Calculate the correlation between a MultipleSeries and a target Series
The significance of the correlation is assessed using one of the following methods:
1. 'ttest': T-test adjusted for effective sample size, see [1]
2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
5. 'built-in': uses built-in method from scipy (function of the statistic used)
Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral"
The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances.
The others are non-parametric, but their computational requirements scale with the number of simulations.
The choise of significance test and associated number of Monte-Carlo simulations are passed through the `settings` parameter.
Parameters
----------
target : pyleoclim.Series, optional
Expand All @@ -748,17 +765,29 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met
The significance level (0.05 by default)
statistic : str
statistic being evaluated. Can use any of the SciPy-supported ones:
https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests
Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']
Default: 'pearsonr'.
method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
method for significance testing. Default is 'phaseran'
- 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
- 'built-in' uses the p-value that ships with the SciPy function.
- 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals
- 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals
- 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning.
Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case.
settings : dict
Parameters for the correlation function (per scipy)
number : int
the number of simulations (default: 1000)
method : str, {'ttest', 'ar1sim', 'phaseran' (default)}
method for significance testing
the number of simulations (default: 1000)
fdr_kwargs : dict
Expand Down Expand Up @@ -840,7 +869,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met
print("Looping over "+ str(len(self.series_list)) +" Series in collection")
for idx, ts in tqdm(enumerate(self.series_list), total=len(self.series_list), disable=mute_pbar):
corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
method=method, number=number,
method=method, number=number, statistic=statistic,
common_time_kwargs=common_time_kwargs, seed=seed)
r_list.append(corr_res.r)
signif_list.append(corr_res.signif)
Expand Down
21 changes: 11 additions & 10 deletions pyleoclim/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3534,11 +3534,11 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =
The significance of the correlation is assessed using one of the following methods:
1) 'ttest': T-test adjusted for effective sample size, see [1]
2) 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
3) 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
3) 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
4) 'built-in': uses built-in method
1. 'ttest': T-test adjusted for effective sample size, see [1]
2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
5. 'built-in': uses built-in method from scipy (function of the statistic used)
Note: Up to version v0.14.0. ar1sim was called "isopersistent", phaseran was called "isospectral"
Expand All @@ -3561,12 +3561,13 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =
Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']
Default: 'pearsonr'.
method : str, {'ttest','built-in','ar1sim','phaseran'}
method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
method for significance testing. Default is 'phaseran'
* 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
* 'built-in' uses the p-value that ships with the SciPy function.
* 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) seires fitted to the originals
* 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
- 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
- 'built-in' uses the p-value that ships with the SciPy function.
- 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals
- 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals
- 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning.
Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case.
Expand Down
54 changes: 35 additions & 19 deletions pyleoclim/tests/test_core_MultipleSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def gen_colored_noise(alpha=1, nt=100, std=1.0, f0=None, m=None, seed=None):
v = colored_noise(alpha=alpha, t=t, std=std, f0=f0, m=m, seed=seed)
return t, v

def load_data():
def load_ms():
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
Expand Down Expand Up @@ -450,34 +450,56 @@ class TestMultipleSeriesStackPlot():
@pytest.mark.parametrize('labels', [None, 'auto', ['soi','nino']])
def test_StackPlot_t0(self, labels):

ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(labels=labels)
pyleo.closefig(fig)

@pytest.mark.parametrize('plot_kwargs', [{'marker':'o'},[{'marker':'o'},{'marker':'^'}]])
def test_StackPlot_t1(self, plot_kwargs):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(plot_kwargs=plot_kwargs)
pyleo.closefig(fig)

@pytest.mark.parametrize('ylims', ['spacious', 'auto'])
def test_StackPlot_t2(self, ylims):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(ylims=ylims)
pyleo.closefig(fig)

@pytest.mark.parametrize('yticks_minor', [True, False])
def test_StackPlot_t3(self, yticks_minor):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(yticks_minor=yticks_minor)
pyleo.closefig(fig)

@pytest.mark.parametrize('xticks_minor', [True, False])
def test_StackPlot_t4(self, xticks_minor):
ms = load_data()
ms = load_ms()
fig, ax = ms.stackplot(xticks_minor=xticks_minor)
pyleo.closefig(fig)


class TestMultipleSeriesCorrelation():
''' Test for MultipleSeries.spectral
'''
@pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran','CN'])
@pytest.mark.parametrize('number', [2,5])
def test_correlation_t0(self, sig_method, number):
''' Test the various significance methods
'''
ms = load_ms()
corr = ms.correlation(method=sig_method,number=number)

@pytest.mark.parametrize('stat', ['linregress','pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'])
def test_correlation_t1(self, stat, eps=0.2):
''' Test the various statistics
'''
ms = load_ms()
if stat == 'weightedtau':
corr = ms.correlation(statistic=stat)
else:
corr = ms.correlation(statistic=stat,method='built-in')

class TestMultipleSeriesSpectral():
''' Test for MultipleSeries.spectral
'''
Expand All @@ -486,33 +508,27 @@ def test_spectral_t0(self,spec_method):
'''Test the spectral function with pre-generated scalogram objects
'''

ms = load_data()
ms = load_ms()
if spec_method == 'cwt':
ms = ms.interp()
scals = ms.wavelet(method=spec_method)
ms.spectral(method=spec_method,scalogram_list=scals)

class TestToCSV:
def test_to_csv_default(self):
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
ms = load_ms()
ms.to_csv()
os.unlink('MultipleSeries.csv')
os.unlink('MultipleSeries.csv') # clean up after yourself!
def test_to_csv_label(self):
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
ms = load_ms()
ms.label='enso series'
ms.to_csv()
os.unlink('enso_series.csv') # this check that the file does exist
os.unlink('enso_series.csv') # clean up after yourself!
def test_to_csv_label_path(self):
soi = pyleo.utils.load_dataset('SOI')
nino = pyleo.utils.load_dataset('NINO3')
ms = soi & nino
ms = load_ms()
ms.label='enso wah wah'
ms.to_csv(path='./enso.csv')
os.unlink('enso.csv') # this check that the file does exist
os.unlink('enso.csv') # clean up after yourself!

class TestRemove:
def test_remove(self):
Expand Down

0 comments on commit a384df9

Please sign in to comment.