Doc fixes (#617)

* series.correlation() also, replaced plot_legend keyword in EnsemlbleSeries.plot_traces() doc * fixed ms.correlation() docstring and made unit tests
LinkedEarth · Oct 10, 2024 · a384df9 · a384df9
1 parent dce9c80
commit a384df9
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 38 deletions.
diff --git a/pyleoclim/core/ensembleseries.py b/pyleoclim/core/ensembleseries.py
@@ -728,7 +728,7 @@ def plot_traces(self, figsize=[10, 4], xlabel=None, ylabel=None, title=None, num
 
             Matplotlib axis on which to return the plot. The default is None.
 
-        plot_legend : bool; {True,False}, optional
+        legend : bool; {True,False}, optional
 
             Whether to plot the legend. The default is True.
 

diff --git a/pyleoclim/core/multipleseries.py b/pyleoclim/core/multipleseries.py
@@ -730,10 +730,27 @@ def common_time(self, method='interp', step = None, start = None, stop = None, s
 
         return ms
 
-    def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, method='phaseran', number=1000,
+    def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearsonr', 
+                    method='phaseran', number=1000, settings=None,
                     fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None):
         ''' Calculate the correlation between a MultipleSeries and a target Series
 
+        The significance of the correlation is assessed using one of the following methods:
+
+        1. 'ttest': T-test adjusted for effective sample size, see [1]
+        2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
+        3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
+        4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
+        5. 'built-in': uses built-in method from scipy (function of the statistic used)
+
+        Note: Up to version v0.14.0. ar1sim was called "isopersistent",  phaseran was called "isospectral"
+
+        The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances.
+        The others are non-parametric, but their computational requirements scale with the number of simulations.
+
+        The choise of significance test and associated number of Monte-Carlo simulations are passed through the `settings` parameter.
+
+        
         Parameters
         ----------
         target : pyleoclim.Series, optional
@@ -748,17 +765,29 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met
         
             The significance level (0.05 by default)
 
+        statistic : str
+            statistic being evaluated. Can use any of the SciPy-supported ones:
+                https://docs.scipy.org/doc/scipy/reference/stats.html#association-correlation-tests
+                Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']
+            Default: 'pearsonr'.
+
+        method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
+            method for significance testing. Default is 'phaseran'
+            - 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
+            - 'built-in' uses the p-value that ships with the SciPy function.
+            - 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals  
+            - 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals
+            - 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
+            The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning.
+            Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case.
+
         settings : dict
-        
+       
             Parameters for the correlation function (per scipy)
 
         number : int
 
-                the number of simulations (default: 1000)
-
-        method : str, {'ttest', 'ar1sim', 'phaseran' (default)}
-
-                method for significance testing
+            the number of simulations (default: 1000)  
 
         fdr_kwargs : dict
         
@@ -840,7 +869,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, settings=None, met
         print("Looping over "+ str(len(self.series_list)) +" Series in collection")
         for idx, ts in tqdm(enumerate(self.series_list),  total=len(self.series_list), disable=mute_pbar):
             corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
-                                      method=method, number=number,
+                                      method=method, number=number, statistic=statistic,
                                       common_time_kwargs=common_time_kwargs, seed=seed)
             r_list.append(corr_res.r)
             signif_list.append(corr_res.signif)

diff --git a/pyleoclim/core/series.py b/pyleoclim/core/series.py
@@ -3534,11 +3534,11 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =
 
         The significance of the correlation is assessed using one of the following methods:
 
-        1) 'ttest': T-test adjusted for effective sample size, see [1]
-        2) 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
-        3) 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
-        3) 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
-        4) 'built-in': uses built-in method
+        1. 'ttest': T-test adjusted for effective sample size, see [1]
+        2. 'ar1sim': AR(1) modeling of x and y (Monte Carlo method)
+        3. 'CN': colored noise (power-law spectrum) modeling of x and y (Monte Carlo method)
+        4. 'phaseran': phase randomization of original inputs. (Monte Carlo method, default), see [2]
+        5. 'built-in': uses built-in method from scipy (function of the statistic used)
 
         Note: Up to version v0.14.0. ar1sim was called "isopersistent",  phaseran was called "isospectral"
 
@@ -3561,12 +3561,13 @@ def correlation(self, target_series, alpha=0.05, statistic='pearsonr', method =
                 Currently supported: ['pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau']
             Default: 'pearsonr'.
 
-        method : str, {'ttest','built-in','ar1sim','phaseran'}
+        method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
             method for significance testing. Default is 'phaseran'
-            * 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
-            * 'built-in' uses the p-value that ships with the SciPy function.
-            * 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) seires fitted to the originals  
-            * 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
+            - 'ttest' implements the T-test with degrees of freedom adjusted for autocorrelation, as done in [1]
+            - 'built-in' uses the p-value that ships with the SciPy function.
+            - 'ar1sim' (formerly 'isopersistent') tests against an ensemble of AR(1) series fitted to the originals  
+            - 'CN' tests against an ensemble of colored noise series (power-law spectra) fitted to the originals
+            - 'phaseran' (formerly 'isospectral') tests against phase-randomized surrogates (aka the method of Ebisuzaki [2])
             The old options 'isopersistent' and 'isospectral' still work, but trigger a deprecation warning.
             Note that 'weightedtau' does not have a known distribution, so the 'built-in' method returns an error in that case.
 

diff --git a/pyleoclim/tests/test_core_MultipleSeries.py b/pyleoclim/tests/test_core_MultipleSeries.py
@@ -45,7 +45,7 @@ def gen_colored_noise(alpha=1, nt=100, std=1.0, f0=None, m=None, seed=None):
     v = colored_noise(alpha=alpha, t=t, std=std, f0=f0, m=m, seed=seed)
     return t, v
 
-def load_data():
+def load_ms():
     soi = pyleo.utils.load_dataset('SOI')
     nino = pyleo.utils.load_dataset('NINO3')
     ms = soi & nino
@@ -450,34 +450,56 @@ class TestMultipleSeriesStackPlot():
     @pytest.mark.parametrize('labels', [None, 'auto', ['soi','nino']])
     def test_StackPlot_t0(self, labels):
 
-        ms = load_data()
+        ms = load_ms()
         fig, ax = ms.stackplot(labels=labels)
         pyleo.closefig(fig)
 
     @pytest.mark.parametrize('plot_kwargs', [{'marker':'o'},[{'marker':'o'},{'marker':'^'}]])
     def test_StackPlot_t1(self, plot_kwargs):
-        ms = load_data()
+        ms = load_ms()
         fig, ax = ms.stackplot(plot_kwargs=plot_kwargs)
         pyleo.closefig(fig)
 
     @pytest.mark.parametrize('ylims', ['spacious', 'auto'])
     def test_StackPlot_t2(self, ylims):
-        ms = load_data()
+        ms = load_ms()
         fig, ax = ms.stackplot(ylims=ylims)
         pyleo.closefig(fig)
 
     @pytest.mark.parametrize('yticks_minor', [True, False])
     def test_StackPlot_t3(self, yticks_minor):
-        ms = load_data()
+        ms = load_ms()
         fig, ax = ms.stackplot(yticks_minor=yticks_minor)
         pyleo.closefig(fig)
 
     @pytest.mark.parametrize('xticks_minor', [True, False])
     def test_StackPlot_t4(self, xticks_minor):
-        ms = load_data()
+        ms = load_ms()
         fig, ax = ms.stackplot(xticks_minor=xticks_minor)
         pyleo.closefig(fig)
 
+
+class TestMultipleSeriesCorrelation():
+    ''' Test for MultipleSeries.spectral
+    '''
+    @pytest.mark.parametrize('sig_method', ['ttest','built-in','ar1sim','phaseran','CN'])
+    @pytest.mark.parametrize('number', [2,5])
+    def test_correlation_t0(self, sig_method, number):
+        ''' Test the various significance methods
+        '''
+        ms = load_ms()   
+        corr = ms.correlation(method=sig_method,number=number)
+
+    @pytest.mark.parametrize('stat', ['linregress','pearsonr','spearmanr','pointbiserialr','kendalltau','weightedtau'])
+    def test_correlation_t1(self, stat, eps=0.2):
+        ''' Test the various statistics
+        '''
+        ms = load_ms() 
+        if stat == 'weightedtau':
+            corr = ms.correlation(statistic=stat)
+        else:
+            corr = ms.correlation(statistic=stat,method='built-in')
+
 class TestMultipleSeriesSpectral():
     ''' Test for MultipleSeries.spectral
     '''
@@ -486,33 +508,27 @@ def test_spectral_t0(self,spec_method):
         '''Test the spectral function with pre-generated scalogram objects
         '''
 
-        ms = load_data()
+        ms = load_ms()
         if spec_method == 'cwt':
             ms = ms.interp()
         scals = ms.wavelet(method=spec_method)
         ms.spectral(method=spec_method,scalogram_list=scals)
 
 class TestToCSV:
     def test_to_csv_default(self):
-        soi = pyleo.utils.load_dataset('SOI')
-        nino = pyleo.utils.load_dataset('NINO3')
-        ms = soi & nino
+        ms = load_ms()
         ms.to_csv()
-        os.unlink('MultipleSeries.csv')
+        os.unlink('MultipleSeries.csv') # clean up after yourself!
     def test_to_csv_label(self):
-        soi = pyleo.utils.load_dataset('SOI')
-        nino = pyleo.utils.load_dataset('NINO3')
-        ms = soi & nino
+        ms = load_ms()
         ms.label='enso series'
         ms.to_csv()
-        os.unlink('enso_series.csv') # this check that the file does exist
+        os.unlink('enso_series.csv') # clean up after yourself!
     def test_to_csv_label_path(self):
-        soi = pyleo.utils.load_dataset('SOI')
-        nino = pyleo.utils.load_dataset('NINO3')
-        ms = soi & nino
+        ms = load_ms()
         ms.label='enso wah wah'
         ms.to_csv(path='./enso.csv')
-        os.unlink('enso.csv') # this check that the file does exist
+        os.unlink('enso.csv') # clean up after yourself!
 
 class TestRemove:
     def test_remove(self):