From 216b334a4960de7bb042b035a02a9ccc8a0b84be Mon Sep 17 00:00:00 2001 From: handwerkerd Date: Mon, 22 May 2023 17:39:23 -0400 Subject: [PATCH 1/7] More logging and setting subsample --- mapca/mapca.py | 62 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/mapca/mapca.py b/mapca/mapca.py index 3de0b3e..b771d64 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -128,7 +128,7 @@ def __init__(self, criterion="mdl", normalize=True): self.criterion = criterion self.normalize = normalize - def _fit(self, img, mask): + def _fit(self, img, mask, IIDsubsample=None): LGR.info( "Performing dimensionality reduction based on GIFT " "(https://trendscenter.org/software/gift/) and Li, Y. O., Adali, T., " @@ -209,6 +209,20 @@ def _fit(self, img, mask): dim_n = x_single.ndim sub_iid_sp_median = int(np.round(np.median(sub_iid_sp))) + LGR.info(f"Esimated subsampling depth for effective i.i.i samples: {sub_iid_sp_median}") + + # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median + # and use that instead + calculated_sub_iid_sp_median = sub_iid_sp_median + if IIDsubsample: + if (isinstance(IIDsubsample, int) or (isinstance(IIDsubsample, float) and IIDsubsample == int(IIDsubsample))) and (1 <= IIDsubsample <= n_samples): + sub_iid_sp_median = IIDsubsample + else: + raise ValueError(f"IIDsubsample must be an integer between 1 and the number of samples. It is {IIDsubsample}") + + + + if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median: sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n))) N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n)) @@ -236,7 +250,7 @@ def _fit(self, img, mask): LGR.info("SVD done on subsampled i.i.d. data") eigenvalues = eigenvalues[::-1] - LGR.info("Effective number of i.i.d. samples %d" % N) + LGR.info("Effective number of i.i.d. samples %d from %d total voxels" % (N, n_samples)) # Make eigen spectrum adjustment LGR.info("Perform eigen spectrum adjustment ...") @@ -344,6 +358,12 @@ def _fit(self, img, mask): "n_components": ppca.n_components_, "explained_variance_total": cumsum_varexp, } + self.subsampling_ = { + "calculated_IID_subsample_depth": calculated_sub_iid_sp_median, + "used_IID_subsample_depth": sub_iid_sp_median, + "effective_num_IID_samples": N, + "total_num_samples": n_samples, + } # Assign attributes from model self.components_ = ppca.components_[:n_components, :] @@ -365,7 +385,7 @@ def _fit(self, img, mask): self.u_ = component_maps self.u_nii_ = nib.Nifti1Image(component_maps_3d, img.affine, img.header) - def fit(self, img, mask): + def fit(self, img, mask, IIDsubsample=None): """Fit the model with X. Parameters @@ -374,16 +394,21 @@ def fit(self, img, mask): Data on which to apply PCA. mask : 3D niimg_like Mask to apply on ``img``. + IIDsubsample : int + The subsampling value so that the voxels are assumed to be + independent and identically distributed (IID). + Default=None (use estimated value) + Returns ------- self : object Returns the instance itself. """ - self._fit(img, mask) + self._fit(img, mask, IIDsubsample=IIDsubsample) return self - def fit_transform(self, img, mask): + def fit_transform(self, img, mask, IIDsubsample=None): """Fit the model with X and apply the dimensionality reduction on X. Parameters @@ -392,6 +417,12 @@ def fit_transform(self, img, mask): Data on which to apply PCA. mask : 3D niimg_like Mask to apply on ``img``. + IIDsubsample : int + The subsampling value so that the voxels are assumed to be independent + and identically distributed (IID) + 2 would mean using every other voxel in 3D space would mean the + remaining voxels are considered IID. 3 would mean every 3rd voxel. + Default=None (use estimated value) Returns ------- @@ -402,8 +433,17 @@ def fit_transform(self, img, mask): ----- The transformation step is different from scikit-learn's approach, which ignores explained variance. + + IIDsubsample is always calculated automatically, but it should be consistent + across a dataset with the sample acquisition parameters. In practice, it sometimes + gives a different value and causes problems. That is, for a dataset with 100 runs, + it is 2 in most runs, but when it is 3 substantially fewer components are estimated + and when it is 1, there is almost no dimensionality reduction. This has been added + as an option user provided parameter to use with caution. If mapca seems to be having + periodic mis-estimates, then this parameter should make it possible to set the IID + subsample size to be consistent across a dataset. """ - self._fit(img, mask) + self._fit(img, mask, IIDsubsample=IIDsubsample) return self.transform(img) def transform(self, img): @@ -471,7 +511,7 @@ def inverse_transform(self, img, mask): return img_orig -def ma_pca(img, mask, criterion="mdl", normalize=False): +def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None): """Perform moving average-based PCA on imaging data. Run Singular Value Decomposition (SVD) on input data, @@ -493,6 +533,12 @@ def ma_pca(img, mask, criterion="mdl", normalize=False): ``kic`` refers to the Kullback-Leibler Information Criterion, which is the middle option. normalize : bool, optional Whether to normalize (zero mean and unit standard deviation) or not. Default is False. + IIDsubsample : int, optional + The subsampling value so that the voxels are assumed to be independent + and identically distributed (IID). + 2 would mean using every other voxel in 3D space would mean the + remaining voxels are considered IID. 3 would mean every 3rd voxel. + Default=None (use estimated value) Returns ------- @@ -506,7 +552,7 @@ def ma_pca(img, mask, criterion="mdl", normalize=False): Component timeseries. """ pca = MovingAveragePCA(criterion=criterion, normalize=normalize) - _ = pca.fit_transform(img, mask) + _ = pca.fit_transform(img, mask, IIDsubsample=IIDsubsample) u = pca.u_ s = pca.explained_variance_ varex_norm = pca.explained_variance_ratio_ From d3a37e7a5504387ea45ecc4207f0a469459d9203 Mon Sep 17 00:00:00 2001 From: handwerkerd Date: Tue, 23 May 2023 16:47:12 -0400 Subject: [PATCH 2/7] More logging --- mapca/mapca.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mapca/mapca.py b/mapca/mapca.py index b771d64..189cdd1 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -209,7 +209,18 @@ def _fit(self, img, mask, IIDsubsample=None): dim_n = x_single.ndim sub_iid_sp_median = int(np.round(np.median(sub_iid_sp))) - LGR.info(f"Esimated subsampling depth for effective i.i.i samples: {sub_iid_sp_median}") + # Will log the mean value to check if the differences in median within a dataset + # represent very small changes in the mean. It seems like this is the closest + # to a non-discrete value to store to compare across runs. + sub_iid_sp_mean = np.round(np.mean(sub_iid_sp),3) + + + if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median: + LGR.info(f"Subsampling IID depth estimate too high. Subsampling depth will " + "be defined by number of datapoints rather than IID estimates.") + sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n))) + + LGR.info(f"Estimated subsampling depth for effective i.i.d samples: {sub_iid_sp_median}") # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median # and use that instead @@ -221,10 +232,6 @@ def _fit(self, img, mask, IIDsubsample=None): raise ValueError(f"IIDsubsample must be an integer between 1 and the number of samples. It is {IIDsubsample}") - - - if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median: - sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n))) N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n)) if sub_iid_sp_median != 1: @@ -360,6 +367,8 @@ def _fit(self, img, mask, IIDsubsample=None): } self.subsampling_ = { "calculated_IID_subsample_depth": calculated_sub_iid_sp_median, + "calculated_IID_subsample_mean": sub_iid_sp_mean, + "IID_subsample_input": sub_iid_sp, "used_IID_subsample_depth": sub_iid_sp_median, "effective_num_IID_samples": N, "total_num_samples": n_samples, From c30c4e3d4f3d655d03c243c9621438395463e77b Mon Sep 17 00:00:00 2001 From: handwerkerd Date: Wed, 24 May 2023 16:44:57 -0400 Subject: [PATCH 3/7] Cleaned up code and doc and fixed bug in testing --- .gitignore | 3 ++ mapca/mapca.py | 70 +++++++++++++++++++++------------------ mapca/tests/conftest.py | 2 +- mapca/tests/test_mapca.py | 6 ++++ 4 files changed, 48 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index b6e4761..8648d5e 100644 --- a/.gitignore +++ b/.gitignore @@ -81,6 +81,9 @@ target/ profile_default/ ipython_config.py +# VSCode +.vscode/ + # pyenv .python-version diff --git a/mapca/mapca.py b/mapca/mapca.py index 189cdd1..f0766cf 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -128,7 +128,7 @@ def __init__(self, criterion="mdl", normalize=True): self.criterion = criterion self.normalize = normalize - def _fit(self, img, mask, IIDsubsample=None): + def _fit(self, img, mask, subsample_depth=None): LGR.info( "Performing dimensionality reduction based on GIFT " "(https://trendscenter.org/software/gift/) and Li, Y. O., Adali, T., " @@ -225,11 +225,14 @@ def _fit(self, img, mask, IIDsubsample=None): # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median # and use that instead calculated_sub_iid_sp_median = sub_iid_sp_median - if IIDsubsample: - if (isinstance(IIDsubsample, int) or (isinstance(IIDsubsample, float) and IIDsubsample == int(IIDsubsample))) and (1 <= IIDsubsample <= n_samples): - sub_iid_sp_median = IIDsubsample + if subsample_depth: + if (isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) and subsample_depth == int(subsample_depth))) and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100): + sub_iid_sp_median = subsample_depth else: - raise ValueError(f"IIDsubsample must be an integer between 1 and the number of samples. It is {IIDsubsample}") + # The logic of the upper bound is subsample_depth^3 is the fraction of samples that removed and it would be good to have at least 100 sampling remaining to have a useful analysis + # Given a masked volume is going to result in fewer samples remaining in 3D space, this is likely a very liberal upper bound, but + # probably good to at least include an upper bound. + raise ValueError(f"subsample_depth must be an integer > 1 and will retain at least 100 samples after subsampling. It is {subsample_depth}") N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n)) @@ -394,7 +397,7 @@ def _fit(self, img, mask, IIDsubsample=None): self.u_ = component_maps self.u_nii_ = nib.Nifti1Image(component_maps_3d, img.affine, img.header) - def fit(self, img, mask, IIDsubsample=None): + def fit(self, img, mask, subsample_depth=None): """Fit the model with X. Parameters @@ -403,7 +406,11 @@ def fit(self, img, mask, IIDsubsample=None): Data on which to apply PCA. mask : 3D niimg_like Mask to apply on ``img``. - IIDsubsample : int + subsample_depth : int, optional + Dimensionality reduction is calculated on a subset of voxels defined by + this depth. 2 would mean using every other voxel in 3D space and 3 would + mean every 3rd voxel. Default=None (estimated depth to make remaining + voxels independent and identically distributed (IID) The subsampling value so that the voxels are assumed to be independent and identically distributed (IID). Default=None (use estimated value) @@ -414,10 +421,10 @@ def fit(self, img, mask, IIDsubsample=None): self : object Returns the instance itself. """ - self._fit(img, mask, IIDsubsample=IIDsubsample) + self._fit(img, mask, subsample_depth=subsample_depth) return self - def fit_transform(self, img, mask, IIDsubsample=None): + def fit_transform(self, img, mask, subsample_depth=None): """Fit the model with X and apply the dimensionality reduction on X. Parameters @@ -426,12 +433,11 @@ def fit_transform(self, img, mask, IIDsubsample=None): Data on which to apply PCA. mask : 3D niimg_like Mask to apply on ``img``. - IIDsubsample : int - The subsampling value so that the voxels are assumed to be independent - and identically distributed (IID) - 2 would mean using every other voxel in 3D space would mean the - remaining voxels are considered IID. 3 would mean every 3rd voxel. - Default=None (use estimated value) + subsample_depth : int, optional + Dimensionality reduction is calculated on a subset of voxels defined by + this depth. 2 would mean using every other voxel in 3D space and 3 would + mean every 3rd voxel. Default=None (estimated depth to make remaining + voxels independent and identically distributed (IID) Returns ------- @@ -443,16 +449,17 @@ def fit_transform(self, img, mask, IIDsubsample=None): The transformation step is different from scikit-learn's approach, which ignores explained variance. - IIDsubsample is always calculated automatically, but it should be consistent - across a dataset with the sample acquisition parameters. In practice, it sometimes - gives a different value and causes problems. That is, for a dataset with 100 runs, - it is 2 in most runs, but when it is 3 substantially fewer components are estimated - and when it is 1, there is almost no dimensionality reduction. This has been added - as an option user provided parameter to use with caution. If mapca seems to be having - periodic mis-estimates, then this parameter should make it possible to set the IID - subsample size to be consistent across a dataset. + subsample_depth is always calculated automatically, but it should be consistent + across a dataset with the same acquisition parameters, since spatial dependence + should be similar. In practice, it sometimes gives a different value and causes + problems. That is, for a dataset with 100 runs, it is 2 in most runs, but when + it is 3, substantially fewer components are estimated and when it is 1, there is + almost no dimensionality reduction. This has been added as an optional user provided + parameter. If mapca seems to be having periodic mis-estimates, then this parameter + should make it possible to set the IID subsample depth to be consistent across a + dataset. """ - self._fit(img, mask, IIDsubsample=IIDsubsample) + self._fit(img, mask, subsample_depth=subsample_depth) return self.transform(img) def transform(self, img): @@ -520,7 +527,7 @@ def inverse_transform(self, img, mask): return img_orig -def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None): +def ma_pca(img, mask, criterion="mdl", normalize=False, subsample_depth=None): """Perform moving average-based PCA on imaging data. Run Singular Value Decomposition (SVD) on input data, @@ -542,12 +549,11 @@ def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None): ``kic`` refers to the Kullback-Leibler Information Criterion, which is the middle option. normalize : bool, optional Whether to normalize (zero mean and unit standard deviation) or not. Default is False. - IIDsubsample : int, optional - The subsampling value so that the voxels are assumed to be independent - and identically distributed (IID). - 2 would mean using every other voxel in 3D space would mean the - remaining voxels are considered IID. 3 would mean every 3rd voxel. - Default=None (use estimated value) + subsample_depth : int, optional + Dimensionality reduction is calculated on a subset of voxels defined by + this depth. 2 would mean using every other voxel in 3D space and 3 would + mean every 3rd voxel. Default=None (estimated depth to make remaining + voxels independent and identically distributed (IID) Returns ------- @@ -561,7 +567,7 @@ def ma_pca(img, mask, criterion="mdl", normalize=False, IIDsubsample=None): Component timeseries. """ pca = MovingAveragePCA(criterion=criterion, normalize=normalize) - _ = pca.fit_transform(img, mask, IIDsubsample=IIDsubsample) + _ = pca.fit_transform(img, mask, subsample_depth=subsample_depth) u = pca.u_ s = pca.explained_variance_ varex_norm = pca.explained_variance_ratio_ diff --git a/mapca/tests/conftest.py b/mapca/tests/conftest.py index 8ddfe21..21f6819 100644 --- a/mapca/tests/conftest.py +++ b/mapca/tests/conftest.py @@ -53,7 +53,7 @@ def test_mask(testpath): @pytest.fixture def test_ts(testpath): return fetch_file('gz2hb', testpath, - 'compt_ts.npy') + 'comp_ts.npy') @pytest.fixture diff --git a/mapca/tests/test_mapca.py b/mapca/tests/test_mapca.py index 38bb499..aefdf01 100644 --- a/mapca/tests/test_mapca.py +++ b/mapca/tests/test_mapca.py @@ -95,3 +95,9 @@ def test_MovingAveragePCA(): test_data_est = pca2.inverse_transform(u2, test_mask_img) assert test_data_est.shape == test_img.shape + + # Testing setting inputting a pre-defined subsampling depth + pca3 = MovingAveragePCA(criterion="mdl", normalize=False) + pca3.fit(test_img, test_mask_img, subsample_depth=2) + assert pca3.subsampling_['calculated_IID_subsample_depth'] == 1 + assert pca3.subsampling_['used_IID_subsample_depth'] == 2 From e799f3e834432c92b0d9385876c596a696c0cd2c Mon Sep 17 00:00:00 2001 From: handwerkerd Date: Wed, 24 May 2023 16:55:40 -0400 Subject: [PATCH 4/7] style fixes --- mapca/mapca.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/mapca/mapca.py b/mapca/mapca.py index f0766cf..b068e4d 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -212,27 +212,31 @@ def _fit(self, img, mask, subsample_depth=None): # Will log the mean value to check if the differences in median within a dataset # represent very small changes in the mean. It seems like this is the closest # to a non-discrete value to store to compare across runs. - sub_iid_sp_mean = np.round(np.mean(sub_iid_sp),3) - + sub_iid_sp_mean = np.round(np.mean(sub_iid_sp), 3) if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median: - LGR.info(f"Subsampling IID depth estimate too high. Subsampling depth will " + LGR.info("Subsampling IID depth estimate too high. Subsampling depth will " "be defined by number of datapoints rather than IID estimates.") sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n))) - LGR.info(f"Estimated subsampling depth for effective i.i.d samples: {sub_iid_sp_median}") + LGR.info("Estimated subsampling depth for effective i.i.d samples: %d" % sub_iid_sp_median) - # Always save the calculated IID subsample value, but, if there is a user provide value, assign that to sub_iid_sp_median - # and use that instead + # Always save the calculated IID subsample value, but, if there is a user provide value, + # assign that to sub_iid_sp_median and use that instead calculated_sub_iid_sp_median = sub_iid_sp_median if subsample_depth: - if (isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) and subsample_depth == int(subsample_depth))) and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100): + if ((isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) + and subsample_depth == int(subsample_depth))) + and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100)): sub_iid_sp_median = subsample_depth else: - # The logic of the upper bound is subsample_depth^3 is the fraction of samples that removed and it would be good to have at least 100 sampling remaining to have a useful analysis - # Given a masked volume is going to result in fewer samples remaining in 3D space, this is likely a very liberal upper bound, but + # The logic of the upper bound is subsample_depth^3 is the fraction of samples + # that removed and it would be good to have at least 100 sampling remaining to + # have a useful analysis. Given a masked volume is going to result in fewer + # samples remaining in 3D space, this is likely a very liberal upper bound, but # probably good to at least include an upper bound. - raise ValueError(f"subsample_depth must be an integer > 1 and will retain at least 100 samples after subsampling. It is {subsample_depth}") + raise ValueError("subsample_depth must be an integer > 1 and will retain >100 " + "samples after subsampling. It is %d" % subsample_depth) N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n)) @@ -408,14 +412,13 @@ def fit(self, img, mask, subsample_depth=None): Mask to apply on ``img``. subsample_depth : int, optional Dimensionality reduction is calculated on a subset of voxels defined by - this depth. 2 would mean using every other voxel in 3D space and 3 would - mean every 3rd voxel. Default=None (estimated depth to make remaining + this depth. 2 would mean using every other voxel in 3D space and 3 would + mean every 3rd voxel. Default=None (estimated depth to make remaining voxels independent and identically distributed (IID) The subsampling value so that the voxels are assumed to be independent and identically distributed (IID). Default=None (use estimated value) - Returns ------- self : object @@ -435,8 +438,8 @@ def fit_transform(self, img, mask, subsample_depth=None): Mask to apply on ``img``. subsample_depth : int, optional Dimensionality reduction is calculated on a subset of voxels defined by - this depth. 2 would mean using every other voxel in 3D space and 3 would - mean every 3rd voxel. Default=None (estimated depth to make remaining + this depth. 2 would mean using every other voxel in 3D space and 3 would + mean every 3rd voxel. Default=None (estimated depth to make remaining voxels independent and identically distributed (IID) Returns @@ -551,8 +554,8 @@ def ma_pca(img, mask, criterion="mdl", normalize=False, subsample_depth=None): Whether to normalize (zero mean and unit standard deviation) or not. Default is False. subsample_depth : int, optional Dimensionality reduction is calculated on a subset of voxels defined by - this depth. 2 would mean using every other voxel in 3D space and 3 would - mean every 3rd voxel. Default=None (estimated depth to make remaining + this depth. 2 would mean using every other voxel in 3D space and 3 would + mean every 3rd voxel. Default=None (estimated depth to make remaining voxels independent and identically distributed (IID) Returns From 45f46f44dd9da94165af0377f6ebfab758c69a1c Mon Sep 17 00:00:00 2001 From: handwerkerd Date: Wed, 24 May 2023 17:10:26 -0400 Subject: [PATCH 5/7] more style fixes --- mapca/mapca.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mapca/mapca.py b/mapca/mapca.py index b068e4d..b9633ab 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -221,13 +221,14 @@ def _fit(self, img, mask, subsample_depth=None): LGR.info("Estimated subsampling depth for effective i.i.d samples: %d" % sub_iid_sp_median) - # Always save the calculated IID subsample value, but, if there is a user provide value, + # Always save the calculated IID subsample value, but, if there is a user provide value, # assign that to sub_iid_sp_median and use that instead calculated_sub_iid_sp_median = sub_iid_sp_median if subsample_depth: - if ((isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) - and subsample_depth == int(subsample_depth))) - and (1 <= subsample_depth) and ((n_samples/(subsample_depth ** 3)) >= 100)): + if ((isinstance(subsample_depth, int) + or (isinstance(subsample_depth, float) + and subsample_depth == int(subsample_depth))) + and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)): sub_iid_sp_median = subsample_depth else: # The logic of the upper bound is subsample_depth^3 is the fraction of samples @@ -238,7 +239,6 @@ def _fit(self, img, mask, subsample_depth=None): raise ValueError("subsample_depth must be an integer > 1 and will retain >100 " "samples after subsampling. It is %d" % subsample_depth) - N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n)) if sub_iid_sp_median != 1: From 92b1c41e7d7beb30900b4eb540456abbb051b3fb Mon Sep 17 00:00:00 2001 From: handwerkerd Date: Wed, 24 May 2023 17:22:13 -0400 Subject: [PATCH 6/7] style fix --- mapca/mapca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapca/mapca.py b/mapca/mapca.py index b9633ab..28e6a28 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -228,7 +228,7 @@ def _fit(self, img, mask, subsample_depth=None): if ((isinstance(subsample_depth, int) or (isinstance(subsample_depth, float) and subsample_depth == int(subsample_depth))) - and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)): + and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)): sub_iid_sp_median = subsample_depth else: # The logic of the upper bound is subsample_depth^3 is the fraction of samples From 8abb34d8e172db31ec387fdc0e85c1e148806398 Mon Sep 17 00:00:00 2001 From: handwerkerd Date: Wed, 24 May 2023 17:34:27 -0400 Subject: [PATCH 7/7] black formatting --- mapca/mapca.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/mapca/mapca.py b/mapca/mapca.py index 28e6a28..246540b 100644 --- a/mapca/mapca.py +++ b/mapca/mapca.py @@ -215,8 +215,10 @@ def _fit(self, img, mask, subsample_depth=None): sub_iid_sp_mean = np.round(np.mean(sub_iid_sp), 3) if np.floor(np.power(n_samples / n_timepoints, 1 / dim_n)) < sub_iid_sp_median: - LGR.info("Subsampling IID depth estimate too high. Subsampling depth will " - "be defined by number of datapoints rather than IID estimates.") + LGR.info( + "Subsampling IID depth estimate too high. Subsampling depth will " + "be defined by number of datapoints rather than IID estimates." + ) sub_iid_sp_median = int(np.floor(np.power(n_samples / n_timepoints, 1 / dim_n))) LGR.info("Estimated subsampling depth for effective i.i.d samples: %d" % sub_iid_sp_median) @@ -225,19 +227,29 @@ def _fit(self, img, mask, subsample_depth=None): # assign that to sub_iid_sp_median and use that instead calculated_sub_iid_sp_median = sub_iid_sp_median if subsample_depth: - if ((isinstance(subsample_depth, int) - or (isinstance(subsample_depth, float) - and subsample_depth == int(subsample_depth))) - and (1 <= subsample_depth) and ((n_samples / (subsample_depth ** 3)) >= 100)): + if ( + ( + isinstance(subsample_depth, int) + or ( + isinstance(subsample_depth, float) + and subsample_depth == int(subsample_depth) + ) + ) + and (1 <= subsample_depth) + and ((n_samples / (subsample_depth**3)) >= 100) + ): sub_iid_sp_median = subsample_depth + else: # The logic of the upper bound is subsample_depth^3 is the fraction of samples # that removed and it would be good to have at least 100 sampling remaining to # have a useful analysis. Given a masked volume is going to result in fewer # samples remaining in 3D space, this is likely a very liberal upper bound, but # probably good to at least include an upper bound. - raise ValueError("subsample_depth must be an integer > 1 and will retain >100 " - "samples after subsampling. It is %d" % subsample_depth) + raise ValueError( + "subsample_depth must be an integer > 1 and will retain >100 " + "samples after subsampling. It is %d" % subsample_depth + ) N = np.round(n_samples / np.power(sub_iid_sp_median, dim_n))