From 216b334a4960de7bb042b035a02a9ccc8a0b84be Mon Sep 17 00:00:00 2001
From: handwerkerd <dan.handwerker@gmail.com>
Date: Mon, 22 May 2023 17:39:23 -0400
Subject: [PATCH 1/7] More logging and setting subsample

---
 mapca/mapca.py | 62 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 8 deletions(-)

diff --git a/mapca/mapca.py b/mapca/mapca.py
index 3de0b3e..b771d64 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -128,7 +128,7 @@ def __init__(self, criterion="mdl", normalize=True):
         self.criterion = criterion
         self.normalize = normalize
 
-    def _fit(self, img, mask):
+    def _fit(self, img, mask, IIDsubsample=None):
         LGR.info(
             "Performing dimensionality reduction based on GIFT "
             "(https://trendscenter.org/software/gift/) and Li, Y. O., Adali, T., "
@@ -209,6 +209,20 @@ def _fit(self, img, mask):
             dim_n = x_single.ndim
 
         sub_iid_sp_median = int(np.round(np.median(sub_iid_sp)))
+        LGR.info(f"Esimated subsampling depth for effective i.i.i samples: {sub_iid_sp_median}")
+
+        # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median
+        #   and use that instead
+        calculated_sub_iid_sp_median = sub_iid_sp_median
+        if IIDsubsample:
+            if (isinstance(IIDsubsample, int) or (isinstance(IIDsubsample, float) and IIDsubsample == int(IIDsubsample))) and (1 <= IIDsubsample <= n_samples):        
+                sub_iid_sp_median = IIDsubsample
+            else:
+                raise ValueError(f"IIDsubsample must be an integer between 1 and the number of samples. It is {IIDsubsample}")
+
+
+
+
         if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median:
             sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)))
         N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n))
@@ -236,7 +250,7 @@ def _fit(self, img, mask):
             LGR.info("SVD done on subsampled i.i.d. data")
             eigenvalues = eigenvalues[::-1]
 
-        LGR.info("Effective number of i.i.d. samples %d" % N)
+        LGR.info("Effective number of i.i.d. samples %d from %d total voxels" % (N, n_samples))
 
         # Make eigen spectrum adjustment
         LGR.info("Perform eigen spectrum adjustment ...")
@@ -344,6 +358,12 @@ def _fit(self, img, mask):
             "n_components": ppca.n_components_,
             "explained_variance_total": cumsum_varexp,
         }
+        self.subsampling_ = {
+            "calculated_IID_subsample_depth": calculated_sub_iid_sp_median,
+            "used_IID_subsample_depth": sub_iid_sp_median,
+            "effective_num_IID_samples": N,
+            "total_num_samples": n_samples,
+        }
 
         # Assign attributes from model
         self.components_ = ppca.components_[:n_components, :]
@@ -365,7 +385,7 @@ def _fit(self, img, mask):
         self.u_ = component_maps
         self.u_nii_ = nib.Nifti1Image(component_maps_3d, img.affine, img.header)
 
-    def fit(self, img, mask):
+    def fit(self, img, mask, IIDsubsample=None):
         """Fit the model with X.
 
         Parameters
@@ -374,16 +394,21 @@ def fit(self, img, mask):
             Data on which to apply PCA.
         mask : 3D niimg_like
             Mask to apply on ``img``.
+        IIDsubsample : int
+            The subsampling value so that the voxels are assumed to be
+            independent and identically distributed (IID).
+            Default=None (use estimated value)
+
 
         Returns
         -------
         self : object
             Returns the instance itself.
         """
-        self._fit(img, mask)
+        self._fit(img, mask, IIDsubsample=IIDsubsample)
         return self
 
-    def fit_transform(self, img, mask):
+    def fit_transform(self, img, mask, IIDsubsample=None):
         """Fit the model with X and apply the dimensionality reduction on X.
 
         Parameters
@@ -392,6 +417,12 @@ def fit_transform(self, img, mask):
             Data on which to apply PCA.
         mask : 3D niimg_like
             Mask to apply on ``img``.
+        IIDsubsample : int
+            The subsampling value so that the voxels are assumed to be independent
+            and identically distributed (IID)
+            2 would mean using every other voxel in 3D space would mean the
+            remaining voxels are considered IID. 3 would mean every 3rd voxel.
+            Default=None (use estimated value)
 
         Returns
         -------
@@ -402,8 +433,17 @@ def fit_transform(self, img, mask):
         -----
         The transformation step is different from scikit-learn's approach,
         which ignores explained variance.
+
+        IIDsubsample is always calculated automatically, but it should be consistent
+        across a dataset with the sample acquisition parameters. In practice, it sometimes
+        gives a different value and causes problems. That is, for a dataset with 100 runs,
+        it is 2 in most runs, but when it is 3 substantially fewer components are estimated
+        and when it is 1, there is almost no dimensionality reduction. This has been added
+        as an option user provided parameter to use with caution. If mapca seems to be having
+        periodic mis-estimates, then this parameter should make it possible to set the IID
+        subsample size to be consistent across a dataset.
         """
-        self._fit(img, mask)
+        self._fit(img, mask, IIDsubsample=IIDsubsample)
         return self.transform(img)
 
     def transform(self, img):
@@ -471,7 +511,7 @@ def inverse_transform(self, img, mask):
         return img_orig
 
 
-def ma_pca(img, mask, criterion="mdl", normalize=False):
+def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None):
     """Perform moving average-based PCA on imaging data.
 
     Run Singular Value Decomposition (SVD) on input data,
@@ -493,6 +533,12 @@ def ma_pca(img, mask, criterion="mdl", normalize=False):
         ``kic`` refers to the Kullback-Leibler Information Criterion, which is the middle option.
     normalize : bool, optional
         Whether to normalize (zero mean and unit standard deviation) or not. Default is False.
+    IIDsubsample : int, optional
+            The subsampling value so that the voxels are assumed to be independent
+            and identically distributed (IID).
+            2 would mean using every other voxel in 3D space would mean the
+            remaining voxels are considered IID. 3 would mean every 3rd voxel.
+            Default=None (use estimated value)
 
     Returns
     -------
@@ -506,7 +552,7 @@ def ma_pca(img, mask, criterion="mdl", normalize=False):
         Component timeseries.
     """
     pca = MovingAveragePCA(criterion=criterion, normalize=normalize)
-    _ = pca.fit_transform(img, mask)
+    _ = pca.fit_transform(img, mask, IIDsubsample=IIDsubsample)
     u = pca.u_
     s = pca.explained_variance_
     varex_norm = pca.explained_variance_ratio_

From d3a37e7a5504387ea45ecc4207f0a469459d9203 Mon Sep 17 00:00:00 2001
From: handwerkerd <dan.handwerker@gmail.com>
Date: Tue, 23 May 2023 16:47:12 -0400
Subject: [PATCH 2/7] More logging

---
 mapca/mapca.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/mapca/mapca.py b/mapca/mapca.py
index b771d64..189cdd1 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -209,7 +209,18 @@ def _fit(self, img, mask, IIDsubsample=None):
             dim_n = x_single.ndim
 
         sub_iid_sp_median = int(np.round(np.median(sub_iid_sp)))
-        LGR.info(f"Esimated subsampling depth for effective i.i.i samples: {sub_iid_sp_median}")
+        # Will log the mean value to check if the differences in median within a dataset
+        # represent very small changes in the mean. It seems like this is the closest
+        # to a non-discrete value to store to compare across runs.
+        sub_iid_sp_mean = np.round(np.mean(sub_iid_sp),3)
+
+
+        if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median:
+            LGR.info(f"Subsampling IID depth estimate too high. Subsampling depth will "
+                     "be defined by number of datapoints rather than IID estimates.")
+            sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)))
+
+        LGR.info(f"Estimated subsampling depth for effective i.i.d samples: {sub_iid_sp_median}")
 
         # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median
         #   and use that instead
@@ -221,10 +232,6 @@ def _fit(self, img, mask, IIDsubsample=None):
                 raise ValueError(f"IIDsubsample must be an integer between 1 and the number of samples. It is {IIDsubsample}")
 
 
-
-
-        if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median:
-            sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)))
         N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n))
 
         if sub_iid_sp_median != 1:
@@ -360,6 +367,8 @@ def _fit(self, img, mask, IIDsubsample=None):
         }
         self.subsampling_ = {
             "calculated_IID_subsample_depth": calculated_sub_iid_sp_median,
+            "calculated_IID_subsample_mean": sub_iid_sp_mean,
+            "IID_subsample_input": sub_iid_sp,
             "used_IID_subsample_depth": sub_iid_sp_median,
             "effective_num_IID_samples": N,
             "total_num_samples": n_samples,

From c30c4e3d4f3d655d03c243c9621438395463e77b Mon Sep 17 00:00:00 2001
From: handwerkerd <dan.handwerker@gmail.com>
Date: Wed, 24 May 2023 16:44:57 -0400
Subject: [PATCH 3/7] Cleaned up code and doc and fixed bug in testing

---
 .gitignore                |  3 ++
 mapca/mapca.py            | 70 +++++++++++++++++++++------------------
 mapca/tests/conftest.py   |  2 +-
 mapca/tests/test_mapca.py |  6 ++++
 4 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/.gitignore b/.gitignore
index b6e4761..8648d5e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,6 +81,9 @@ target/
 profile_default/
 ipython_config.py
 
+# VSCode
+.vscode/
+
 # pyenv
 .python-version
 
diff --git a/mapca/mapca.py b/mapca/mapca.py
index 189cdd1..f0766cf 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -128,7 +128,7 @@ def __init__(self, criterion="mdl", normalize=True):
         self.criterion = criterion
         self.normalize = normalize
 
-    def _fit(self, img, mask, IIDsubsample=None):
+    def _fit(self, img, mask, subsample_depth=None):
         LGR.info(
             "Performing dimensionality reduction based on GIFT "
             "(https://trendscenter.org/software/gift/) and Li, Y. O., Adali, T., "
@@ -225,11 +225,14 @@ def _fit(self, img, mask, IIDsubsample=None):
         # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median
         #   and use that instead
         calculated_sub_iid_sp_median = sub_iid_sp_median
-        if IIDsubsample:
-            if (isinstance(IIDsubsample, int) or (isinstance(IIDsubsample, float) and IIDsubsample == int(IIDsubsample))) and (1 <= IIDsubsample <= n_samples):        
-                sub_iid_sp_median = IIDsubsample
+        if subsample_depth:
+            if (isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) and subsample_depth == int(subsample_depth))) and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100):        
+                sub_iid_sp_median = subsample_depth
             else:
-                raise ValueError(f"IIDsubsample must be an integer between 1 and the number of samples. It is {IIDsubsample}")
+                # The logic of the upper bound is subsample_depth^3 is the fraction of samples that removed and it would be good to have at least 100 sampling remaining to have a useful analysis
+                # Given a masked volume is going to result in fewer samples remaining in 3D space, this is likely a very liberal upper bound, but
+                # probably good to at least include an upper bound.
+                raise ValueError(f"subsample_depth must be an integer > 1 and will retain at least 100 samples after subsampling. It is {subsample_depth}")
 
 
         N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n))
@@ -394,7 +397,7 @@ def _fit(self, img, mask, IIDsubsample=None):
         self.u_ = component_maps
         self.u_nii_ = nib.Nifti1Image(component_maps_3d, img.affine, img.header)
 
-    def fit(self, img, mask, IIDsubsample=None):
+    def fit(self, img, mask, subsample_depth=None):
         """Fit the model with X.
 
         Parameters
@@ -403,7 +406,11 @@ def fit(self, img, mask, IIDsubsample=None):
             Data on which to apply PCA.
         mask : 3D niimg_like
             Mask to apply on ``img``.
-        IIDsubsample : int
+        subsample_depth : int, optional
+            Dimensionality reduction is calculated on a subset of voxels defined by
+            this depth. 2 would mean using every other voxel in 3D space and 3 would 
+            mean every 3rd voxel. Default=None (estimated depth to make remaining 
+            voxels independent and identically distributed (IID)
             The subsampling value so that the voxels are assumed to be
             independent and identically distributed (IID).
             Default=None (use estimated value)
@@ -414,10 +421,10 @@ def fit(self, img, mask, IIDsubsample=None):
         self : object
             Returns the instance itself.
         """
-        self._fit(img, mask, IIDsubsample=IIDsubsample)
+        self._fit(img, mask, subsample_depth=subsample_depth)
         return self
 
-    def fit_transform(self, img, mask, IIDsubsample=None):
+    def fit_transform(self, img, mask, subsample_depth=None):
         """Fit the model with X and apply the dimensionality reduction on X.
 
         Parameters
@@ -426,12 +433,11 @@ def fit_transform(self, img, mask, IIDsubsample=None):
             Data on which to apply PCA.
         mask : 3D niimg_like
             Mask to apply on ``img``.
-        IIDsubsample : int
-            The subsampling value so that the voxels are assumed to be independent
-            and identically distributed (IID)
-            2 would mean using every other voxel in 3D space would mean the
-            remaining voxels are considered IID. 3 would mean every 3rd voxel.
-            Default=None (use estimated value)
+        subsample_depth : int, optional
+            Dimensionality reduction is calculated on a subset of voxels defined by
+            this depth. 2 would mean using every other voxel in 3D space and 3 would 
+            mean every 3rd voxel. Default=None (estimated depth to make remaining 
+            voxels independent and identically distributed (IID)
 
         Returns
         -------
@@ -443,16 +449,17 @@ def fit_transform(self, img, mask, IIDsubsample=None):
         The transformation step is different from scikit-learn's approach,
         which ignores explained variance.
 
-        IIDsubsample is always calculated automatically, but it should be consistent
-        across a dataset with the sample acquisition parameters. In practice, it sometimes
-        gives a different value and causes problems. That is, for a dataset with 100 runs,
-        it is 2 in most runs, but when it is 3 substantially fewer components are estimated
-        and when it is 1, there is almost no dimensionality reduction. This has been added
-        as an option user provided parameter to use with caution. If mapca seems to be having
-        periodic mis-estimates, then this parameter should make it possible to set the IID
-        subsample size to be consistent across a dataset.
+        subsample_depth is always calculated automatically, but it should be consistent
+        across a dataset with the same acquisition parameters, since spatial dependence
+        should be similar. In practice, it sometimes gives a different value and causes
+        problems. That is, for a dataset with 100 runs, it is 2 in most runs, but when
+        it is 3, substantially fewer components are estimated and when it is 1, there is
+        almost no dimensionality reduction. This has been added as an optional user provided
+        parameter. If mapca seems to be having periodic mis-estimates, then this parameter
+        should make it possible to set the IID subsample depth to be consistent across a
+        dataset.
         """
-        self._fit(img, mask, IIDsubsample=IIDsubsample)
+        self._fit(img, mask, subsample_depth=subsample_depth)
         return self.transform(img)
 
     def transform(self, img):
@@ -520,7 +527,7 @@ def inverse_transform(self, img, mask):
         return img_orig
 
 
-def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None):
+def ma_pca(img, mask, criterion="mdl", normalize=False, subsample_depth=None):
     """Perform moving average-based PCA on imaging data.
 
     Run Singular Value Decomposition (SVD) on input data,
@@ -542,12 +549,11 @@ def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None):
         ``kic`` refers to the Kullback-Leibler Information Criterion, which is the middle option.
     normalize : bool, optional
         Whether to normalize (zero mean and unit standard deviation) or not. Default is False.
-    IIDsubsample : int, optional
-            The subsampling value so that the voxels are assumed to be independent
-            and identically distributed (IID).
-            2 would mean using every other voxel in 3D space would mean the
-            remaining voxels are considered IID. 3 would mean every 3rd voxel.
-            Default=None (use estimated value)
+    subsample_depth : int, optional
+        Dimensionality reduction is calculated on a subset of voxels defined by
+        this depth. 2 would mean using every other voxel in 3D space and 3 would 
+        mean every 3rd voxel. Default=None (estimated depth to make remaining 
+        voxels independent and identically distributed (IID)
 
     Returns
     -------
@@ -561,7 +567,7 @@ def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None):
         Component timeseries.
     """
     pca = MovingAveragePCA(criterion=criterion, normalize=normalize)
-    _ = pca.fit_transform(img, mask, IIDsubsample=IIDsubsample)
+    _ = pca.fit_transform(img, mask, subsample_depth=subsample_depth)
     u = pca.u_
     s = pca.explained_variance_
     varex_norm = pca.explained_variance_ratio_
diff --git a/mapca/tests/conftest.py b/mapca/tests/conftest.py
index 8ddfe21..21f6819 100644
--- a/mapca/tests/conftest.py
+++ b/mapca/tests/conftest.py
@@ -53,7 +53,7 @@ def test_mask(testpath):
 @pytest.fixture
 def test_ts(testpath):
     return fetch_file('gz2hb', testpath,
-                      'compt_ts.npy')
+                      'comp_ts.npy')
 
 
 @pytest.fixture
diff --git a/mapca/tests/test_mapca.py b/mapca/tests/test_mapca.py
index 38bb499..aefdf01 100644
--- a/mapca/tests/test_mapca.py
+++ b/mapca/tests/test_mapca.py
@@ -95,3 +95,9 @@ def test_MovingAveragePCA():
 
     test_data_est = pca2.inverse_transform(u2, test_mask_img)
     assert test_data_est.shape == test_img.shape
+
+    # Testing setting inputting a pre-defined subsampling depth
+    pca3 = MovingAveragePCA(criterion="mdl", normalize=False)
+    pca3.fit(test_img, test_mask_img, subsample_depth=2)
+    assert pca3.subsampling_['calculated_IID_subsample_depth'] == 1
+    assert pca3.subsampling_['used_IID_subsample_depth'] == 2

From e799f3e834432c92b0d9385876c596a696c0cd2c Mon Sep 17 00:00:00 2001
From: handwerkerd <dan.handwerker@gmail.com>
Date: Wed, 24 May 2023 16:55:40 -0400
Subject: [PATCH 4/7] style fixes

---
 mapca/mapca.py | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/mapca/mapca.py b/mapca/mapca.py
index f0766cf..b068e4d 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -212,27 +212,31 @@ def _fit(self, img, mask, subsample_depth=None):
         # Will log the mean value to check if the differences in median within a dataset
         # represent very small changes in the mean. It seems like this is the closest
         # to a non-discrete value to store to compare across runs.
-        sub_iid_sp_mean = np.round(np.mean(sub_iid_sp),3)
-
+        sub_iid_sp_mean = np.round(np.mean(sub_iid_sp), 3)
 
         if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median:
-            LGR.info(f"Subsampling IID depth estimate too high. Subsampling depth will "
+            LGR.info("Subsampling IID depth estimate too high. Subsampling depth will "
                      "be defined by number of datapoints rather than IID estimates.")
             sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)))
 
-        LGR.info(f"Estimated subsampling depth for effective i.i.d samples: {sub_iid_sp_median}")
+        LGR.info("Estimated subsampling depth for effective i.i.d samples: %d" % sub_iid_sp_median)
 
-        # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median
-        #   and use that instead
+        # Always save the calculated IID subsample value, but, if there is a user provide value, 
+        # assign that to sub_iid_sp_median and use that instead
         calculated_sub_iid_sp_median = sub_iid_sp_median
         if subsample_depth:
-            if (isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) and subsample_depth == int(subsample_depth))) and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100):        
+            if ((isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) 
+                                                     and subsample_depth == int(subsample_depth)))
+                and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100)):        
                 sub_iid_sp_median = subsample_depth
             else:
-                # The logic of the upper bound is subsample_depth^3 is the fraction of samples that removed and it would be good to have at least 100 sampling remaining to have a useful analysis
-                # Given a masked volume is going to result in fewer samples remaining in 3D space, this is likely a very liberal upper bound, but
+                # The logic of the upper bound is subsample_depth^3 is the fraction of samples
+                # that removed and it would be good to have at least 100 sampling remaining to
+                # have a useful analysis. Given a masked volume is going to result in fewer
+                # samples remaining in 3D space, this is likely a very liberal upper bound, but
                 # probably good to at least include an upper bound.
-                raise ValueError(f"subsample_depth must be an integer > 1 and will retain at least 100 samples after subsampling. It is {subsample_depth}")
+                raise ValueError("subsample_depth must be an integer > 1 and will retain >100 "
+                                 "samples after subsampling. It is %d" % subsample_depth)
 
 
         N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n))
@@ -408,14 +412,13 @@ def fit(self, img, mask, subsample_depth=None):
             Mask to apply on ``img``.
         subsample_depth : int, optional
             Dimensionality reduction is calculated on a subset of voxels defined by
-            this depth. 2 would mean using every other voxel in 3D space and 3 would 
-            mean every 3rd voxel. Default=None (estimated depth to make remaining 
+            this depth. 2 would mean using every other voxel in 3D space and 3 would
+            mean every 3rd voxel. Default=None (estimated depth to make remaining
             voxels independent and identically distributed (IID)
             The subsampling value so that the voxels are assumed to be
             independent and identically distributed (IID).
             Default=None (use estimated value)
 
-
         Returns
         -------
         self : object
@@ -435,8 +438,8 @@ def fit_transform(self, img, mask, subsample_depth=None):
             Mask to apply on ``img``.
         subsample_depth : int, optional
             Dimensionality reduction is calculated on a subset of voxels defined by
-            this depth. 2 would mean using every other voxel in 3D space and 3 would 
-            mean every 3rd voxel. Default=None (estimated depth to make remaining 
+            this depth. 2 would mean using every other voxel in 3D space and 3 would
+            mean every 3rd voxel. Default=None (estimated depth to make remaining
             voxels independent and identically distributed (IID)
 
         Returns
@@ -551,8 +554,8 @@ def ma_pca(img, mask, criterion="mdl", normalize=False, subsample_depth=None):
         Whether to normalize (zero mean and unit standard deviation) or not. Default is False.
     subsample_depth : int, optional
         Dimensionality reduction is calculated on a subset of voxels defined by
-        this depth. 2 would mean using every other voxel in 3D space and 3 would 
-        mean every 3rd voxel. Default=None (estimated depth to make remaining 
+        this depth. 2 would mean using every other voxel in 3D space and 3 would
+        mean every 3rd voxel. Default=None (estimated depth to make remaining
         voxels independent and identically distributed (IID)
 
     Returns

From 45f46f44dd9da94165af0377f6ebfab758c69a1c Mon Sep 17 00:00:00 2001
From: handwerkerd <dan.handwerker@gmail.com>
Date: Wed, 24 May 2023 17:10:26 -0400
Subject: [PATCH 5/7] more style fixes

---
 mapca/mapca.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mapca/mapca.py b/mapca/mapca.py
index b068e4d..b9633ab 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -221,13 +221,14 @@ def _fit(self, img, mask, subsample_depth=None):
 
         LGR.info("Estimated subsampling depth for effective i.i.d samples: %d" % sub_iid_sp_median)
 
-        # Always save the calculated IID subsample value, but, if there is a user provide value, 
+        # Always save the calculated IID subsample value, but, if there is a user provide value,
         # assign that to sub_iid_sp_median and use that instead
         calculated_sub_iid_sp_median = sub_iid_sp_median
         if subsample_depth:
-            if ((isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) 
-                                                     and subsample_depth == int(subsample_depth)))
-                and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100)):        
+            if ((isinstance(subsample_depth, int)
+                 or (isinstance(subsample_depth, float)
+                 and subsample_depth == int(subsample_depth)))
+                 and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)):
                 sub_iid_sp_median = subsample_depth
             else:
                 # The logic of the upper bound is subsample_depth^3 is the fraction of samples
@@ -238,7 +239,6 @@ def _fit(self, img, mask, subsample_depth=None):
                 raise ValueError("subsample_depth must be an integer > 1 and will retain >100 "
                                  "samples after subsampling. It is %d" % subsample_depth)
 
-
         N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n))
 
         if sub_iid_sp_median != 1:

From 92b1c41e7d7beb30900b4eb540456abbb051b3fb Mon Sep 17 00:00:00 2001
From: handwerkerd <dan.handwerker@gmail.com>
Date: Wed, 24 May 2023 17:22:13 -0400
Subject: [PATCH 6/7] style fix

---
 mapca/mapca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mapca/mapca.py b/mapca/mapca.py
index b9633ab..28e6a28 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -228,7 +228,7 @@ def _fit(self, img, mask, subsample_depth=None):
             if ((isinstance(subsample_depth, int)
                  or (isinstance(subsample_depth, float)
                  and subsample_depth == int(subsample_depth)))
-                 and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)):
+                and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)):
                 sub_iid_sp_median = subsample_depth
             else:
                 # The logic of the upper bound is subsample_depth^3 is the fraction of samples

From 8abb34d8e172db31ec387fdc0e85c1e148806398 Mon Sep 17 00:00:00 2001
From: handwerkerd <dan.handwerker@gmail.com>
Date: Wed, 24 May 2023 17:34:27 -0400
Subject: [PATCH 7/7] black formatting

---
 mapca/mapca.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/mapca/mapca.py b/mapca/mapca.py
index 28e6a28..246540b 100644
--- a/mapca/mapca.py
+++ b/mapca/mapca.py
@@ -215,8 +215,10 @@ def _fit(self, img, mask, subsample_depth=None):
         sub_iid_sp_mean = np.round(np.mean(sub_iid_sp), 3)
 
         if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median:
-            LGR.info("Subsampling IID depth estimate too high. Subsampling depth will "
-                     "be defined by number of datapoints rather than IID estimates.")
+            LGR.info(
+                "Subsampling IID depth estimate too high. Subsampling depth will "
+                "be defined by number of datapoints rather than IID estimates."
+            )
             sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)))
 
         LGR.info("Estimated subsampling depth for effective i.i.d samples: %d" % sub_iid_sp_median)
@@ -225,19 +227,29 @@ def _fit(self, img, mask, subsample_depth=None):
         # assign that to sub_iid_sp_median and use that instead
         calculated_sub_iid_sp_median = sub_iid_sp_median
         if subsample_depth:
-            if ((isinstance(subsample_depth, int)
-                 or (isinstance(subsample_depth, float)
-                 and subsample_depth == int(subsample_depth)))
-                and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)):
+            if (
+                (
+                    isinstance(subsample_depth, int)
+                    or (
+                        isinstance(subsample_depth, float)
+                        and subsample_depth == int(subsample_depth)
+                    )
+                )
+                and (1 <= subsample_depth)
+                and ((n_samples / (subsample_depth**3)) >= 100)
+            ):
                 sub_iid_sp_median = subsample_depth
+
             else:
                 # The logic of the upper bound is subsample_depth^3 is the fraction of samples
                 # that removed and it would be good to have at least 100 sampling remaining to
                 # have a useful analysis. Given a masked volume is going to result in fewer
                 # samples remaining in 3D space, this is likely a very liberal upper bound, but
                 # probably good to at least include an upper bound.
-                raise ValueError("subsample_depth must be an integer > 1 and will retain >100 "
-                                 "samples after subsampling. It is %d" % subsample_depth)
+                raise ValueError(
+                    "subsample_depth must be an integer > 1 and will retain >100 "
+                    "samples after subsampling. It is %d" % subsample_depth
+                )
 
         N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n))