From 8da8db11dfe21c8c6aa062cb5b1b3b97c1869b35 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Thu, 4 Jan 2024 16:18:25 -0700 Subject: [PATCH 1/7] BUG:exposes parameter in core metrics from emperor --- q2_diversity/_core_metrics.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/q2_diversity/_core_metrics.py b/q2_diversity/_core_metrics.py index 0c8393a..fdf3b87 100644 --- a/q2_diversity/_core_metrics.py +++ b/q2_diversity/_core_metrics.py @@ -8,7 +8,7 @@ def core_metrics(ctx, table, sampling_depth, metadata, with_replacement=False, - n_jobs=1): + n_jobs=1, ignore_missing_samples=False): rarefy = ctx.get_action('feature_table', 'rarefy') observed_features = ctx.get_action('diversity_lib', 'observed_features') pielou_e = ctx.get_action('diversity_lib', 'pielou_evenness') @@ -39,13 +39,15 @@ def core_metrics(ctx, table, sampling_depth, metadata, with_replacement=False, pcoas += pcoa_results for pcoa in pcoas: - results += emperor_plot(pcoa=pcoa, metadata=metadata) + results += emperor_plot(pcoa=pcoa, metadata=metadata, + ignore_missing_samples=ignore_missing_samples) return tuple(results) def core_metrics_phylogenetic(ctx, table, phylogeny, sampling_depth, metadata, - with_replacement=False, n_jobs_or_threads=1): + with_replacement=False, n_jobs_or_threads=1, + ignore_missing_samples=False): faith_pd = ctx.get_action('diversity_lib', 'faith_pd') unweighted_unifrac = ctx.get_action('diversity_lib', 'unweighted_unifrac') weighted_unifrac = ctx.get_action( @@ -75,7 +77,8 @@ def core_metrics_phylogenetic(ctx, table, phylogeny, sampling_depth, metadata, plots = [] for pcoa in pcoas: - plots += emperor_plot(pcoa=pcoa, metadata=metadata) + plots += emperor_plot(pcoa=pcoa, metadata=metadata, + ignore_missing_samples=ignore_missing_samples) return ( cr.rarefied_table, faith_pd_vector, cr.observed_features_vector, From a9f4ee49b4d456814e49bb08bf05143dad7373f1 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Thu, 4 Jan 2024 17:11:06 -0700 Subject: [PATCH 2/7] added qiime type registration for parameter --- q2_diversity/__init__.py | 2 +- q2_diversity/plugin_setup.py | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/q2_diversity/__init__.py b/q2_diversity/__init__.py index 2e8adbc..15295d9 100644 --- a/q2_diversity/__init__.py +++ b/q2_diversity/__init__.py @@ -27,5 +27,5 @@ 'core_metrics_phylogenetic', 'core_metrics', 'filter_distance_matrix', 'mantel', 'alpha_rarefaction', 'beta_rarefaction', 'procrustes_analysis', 'beta_correlation', - 'adonis', 'partial_procrustes' + 'adonis', 'partial_procrustes', 'ignore_missing_samples' ] diff --git a/q2_diversity/plugin_setup.py b/q2_diversity/plugin_setup.py index 815601c..37d09f3 100644 --- a/q2_diversity/plugin_setup.py +++ b/q2_diversity/plugin_setup.py @@ -401,6 +401,7 @@ 'metadata': Metadata, 'with_replacement': Bool, 'n_jobs_or_threads': Int % Range(1, None) | Str % Choices(['auto']), + 'ignore_missing_samples': Bool }, outputs=[ ('rarefied_table', FeatureTable[Frequency]), @@ -436,7 +437,15 @@ 'metadata': 'The sample metadata to use in the emperor plots.', 'with_replacement': with_replacement_description, 'n_jobs_or_threads': '[beta/beta-phylogenetic methods only] - %s' - % n_jobs_or_threads_description + % n_jobs_or_threads_description, + 'ignore_missing_samples': 'If set to `True` samples and features ' + 'without metadata are included by ' + 'setting all metadata values to: ' + '"This element has no metadata". By ' + 'default an exception will be raised if ' + 'missing elements are encountered. Note, ' + 'this flag only takes effect if there is at ' + 'least one overlapping element.' }, output_descriptions={ 'rarefied_table': 'The resulting rarefied feature table.', @@ -490,6 +499,7 @@ 'metadata': Metadata, 'with_replacement': Bool, 'n_jobs': Int % Range(1, None) | Str % Choices(['auto']), + 'ignore_missing_samples': Bool }, outputs=[ ('rarefied_table', FeatureTable[Frequency]), @@ -512,7 +522,15 @@ 'rarefied to prior to computing diversity metrics.', 'metadata': 'The sample metadata to use in the emperor plots.', 'with_replacement': with_replacement_description, - 'n_jobs': '[beta methods only] - %s' % n_jobs_description + 'n_jobs': '[beta methods only] - %s' % n_jobs_description, + 'ignore_missing_samples': 'If set to `True` samples and features ' + 'without metadata are included by ' + 'setting all metadata values to: ' + '"This element has no metadata". By ' + 'default an exception will be raised if ' + 'missing elements are encountered. Note, ' + 'this flag only takes effect if there is at ' + 'least one overlapping element.' }, output_descriptions={ 'rarefied_table': 'The resulting rarefied feature table.', From a39698c4c9ae3f15424562441eb1cd991e290521 Mon Sep 17 00:00:00 2001 From: Hannah Hagen <125509369+hagenjp@users.noreply.github.com> Date: Mon, 8 Jan 2024 12:49:21 -0700 Subject: [PATCH 3/7] Update q2_diversity/__init__.py Co-authored-by: Liz Gehret <54517601+lizgehret@users.noreply.github.com> --- q2_diversity/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q2_diversity/__init__.py b/q2_diversity/__init__.py index 15295d9..2e8adbc 100644 --- a/q2_diversity/__init__.py +++ b/q2_diversity/__init__.py @@ -27,5 +27,5 @@ 'core_metrics_phylogenetic', 'core_metrics', 'filter_distance_matrix', 'mantel', 'alpha_rarefaction', 'beta_rarefaction', 'procrustes_analysis', 'beta_correlation', - 'adonis', 'partial_procrustes', 'ignore_missing_samples' + 'adonis', 'partial_procrustes' ] From 38cf32033a428d358f01b6f76816fe5ba29366ba Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Wed, 10 Jan 2024 14:49:43 -0700 Subject: [PATCH 4/7] added flag to additional function in core-metrics phylogenetic --- q2_diversity/_core_metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/q2_diversity/_core_metrics.py b/q2_diversity/_core_metrics.py index fdf3b87..d039bfd 100644 --- a/q2_diversity/_core_metrics.py +++ b/q2_diversity/_core_metrics.py @@ -59,7 +59,8 @@ def core_metrics_phylogenetic(ctx, table, phylogeny, sampling_depth, metadata, cr = core_metrics(table=table, sampling_depth=sampling_depth, metadata=metadata, with_replacement=with_replacement, - n_jobs=n_jobs_or_threads) + n_jobs=n_jobs_or_threads, + ignore_missing_samples=ignore_missing_samples) faith_pd_vector, = faith_pd(table=cr.rarefied_table, phylogeny=phylogeny) From 4357831f33886374d40d474b451b4a0269db37d7 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Thu, 11 Jan 2024 14:55:18 -0700 Subject: [PATCH 5/7] adds tests for 'ignore_missing_samples' parameter --- q2_diversity/tests/test_core_metrics.py | 79 +++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/q2_diversity/tests/test_core_metrics.py b/q2_diversity/tests/test_core_metrics.py index 5b9ab42..f50b181 100644 --- a/q2_diversity/tests/test_core_metrics.py +++ b/q2_diversity/tests/test_core_metrics.py @@ -18,6 +18,7 @@ from qiime2.plugin.testing import TestPluginBase from qiime2 import Artifact, Metadata +import warnings class CoreMetricsTests(TestPluginBase): package = 'q2_diversity' @@ -144,6 +145,84 @@ def test_core_metrics(self): pdt.assert_series_equal(results[1].view(pd.Series), obs_feat_exp) pdt.assert_series_equal(results[2].view(pd.Series), shannon_exp) + def test_core_metrics_ignore_missing_samples_false(self): + table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]), + ['O1', 'O2'], + ['S1', 'S2', 'S3']) + table = Artifact.import_data('FeatureTable[Frequency]', table) + + metadata = Metadata( + pd.DataFrame({'foo': ['1', '2']}, + index=pd.Index(['S1', 'S2'], name='id'))) + + with self.assertRaisesRegex(KeyError, 'samples not included'): + self.core_metrics(table=table, sampling_depth=200, + metadata=metadata, + ignore_missing_samples=False) + + def test_core_metrics_ignore_missing_samples_true(self): + table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]), + ['O1', 'O2'], + ['S1', 'S2', 'S3']) + table = Artifact.import_data('FeatureTable[Frequency]', table) + + metadata = Metadata( + pd.DataFrame({'foo': ['1', '2']}, + index=pd.Index(['S1', 'S2'], name='id'))) + + results = self.core_metrics(table=table, sampling_depth=200, + metadata=metadata, + ignore_missing_samples=True) + + self.assertEqual(len(results), 10) + self.assertEqual(repr(results.bray_curtis_distance_matrix.type), + 'DistanceMatrix') + self.assertEqual(repr(results.jaccard_emperor.type), 'Visualization') + + def test_core_metrics_phylogenetic_ignore_missing_samples_false(self): + table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]), + ['O1', 'O2'], + ['S1', 'S2', 'S3']) + table = Artifact.import_data('FeatureTable[Frequency]', table) + + tree = skbio.TreeNode.read(io.StringIO( + '((O1:0.25, O2:0.50):0.25, O3:0.75)root;')) + tree = Artifact.import_data('Phylogeny[Rooted]', tree) + + metadata = Metadata( + pd.DataFrame({'foo': ['1', '2']}, + index=pd.Index(['S1', 'S2'], name='id'))) + + with self.assertRaisesRegex(KeyError, 'samples not included'): + self.core_metrics_phylogenetic(table=table, phylogeny=tree, + sampling_depth=200, + metadata=metadata, + ignore_missing_samples=False) + + def test_core_metrics_phylogenetic_ignore_missing_samples_true(self): + table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]), + ['O1', 'O2'], + ['S1', 'S2', 'S3']) + table = Artifact.import_data('FeatureTable[Frequency]', table) + + tree = skbio.TreeNode.read(io.StringIO( + '((O1:0.25, O2:0.50):0.25, O3:0.75)root;')) + tree = Artifact.import_data('Phylogeny[Rooted]', tree) + + metadata = Metadata( + pd.DataFrame({'foo': ['1', '2']}, + index=pd.Index(['S1', 'S2'], name='id'))) + + results = self.core_metrics_phylogenetic(table=table, phylogeny=tree, + sampling_depth=200, + metadata=metadata, + ignore_missing_samples=True) + + self.assertEqual(len(results), 17) + self.assertEqual(repr(results.bray_curtis_distance_matrix.type), + 'DistanceMatrix') + self.assertEqual(repr(results.jaccard_emperor.type), 'Visualization') + if __name__ == '__main__': unittest.main() From 5ced420e9c43d6700848acb7365fcf0169c69c00 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Thu, 11 Jan 2024 15:10:10 -0700 Subject: [PATCH 6/7] linting --- q2_diversity/tests/test_core_metrics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/q2_diversity/tests/test_core_metrics.py b/q2_diversity/tests/test_core_metrics.py index f50b181..0bf3b39 100644 --- a/q2_diversity/tests/test_core_metrics.py +++ b/q2_diversity/tests/test_core_metrics.py @@ -18,7 +18,6 @@ from qiime2.plugin.testing import TestPluginBase from qiime2 import Artifact, Metadata -import warnings class CoreMetricsTests(TestPluginBase): package = 'q2_diversity' From e26aa36bce1f3e05e8580d50f1faed4c6e0971a7 Mon Sep 17 00:00:00 2001 From: Johannah Hagen Date: Tue, 16 Jan 2024 13:33:00 -0700 Subject: [PATCH 7/7] Minor changes in ignore_missing_samples tests --- q2_diversity/tests/test_core_metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/q2_diversity/tests/test_core_metrics.py b/q2_diversity/tests/test_core_metrics.py index 0bf3b39..9785ec2 100644 --- a/q2_diversity/tests/test_core_metrics.py +++ b/q2_diversity/tests/test_core_metrics.py @@ -154,7 +154,7 @@ def test_core_metrics_ignore_missing_samples_false(self): pd.DataFrame({'foo': ['1', '2']}, index=pd.Index(['S1', 'S2'], name='id'))) - with self.assertRaisesRegex(KeyError, 'samples not included'): + with self.assertRaisesRegex(KeyError, 'Offending samples: S3'): self.core_metrics(table=table, sampling_depth=200, metadata=metadata, ignore_missing_samples=False) @@ -192,7 +192,7 @@ def test_core_metrics_phylogenetic_ignore_missing_samples_false(self): pd.DataFrame({'foo': ['1', '2']}, index=pd.Index(['S1', 'S2'], name='id'))) - with self.assertRaisesRegex(KeyError, 'samples not included'): + with self.assertRaisesRegex(KeyError, 'Offending samples: S3'): self.core_metrics_phylogenetic(table=table, phylogeny=tree, sampling_depth=200, metadata=metadata,