Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Exposes 'ignore_missing_samples' parameter in core metrics from emperor #348

Merged
merged 8 commits into from
Jan 16, 2024
14 changes: 9 additions & 5 deletions q2_diversity/_core_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def core_metrics(ctx, table, sampling_depth, metadata, with_replacement=False,
n_jobs=1):
n_jobs=1, ignore_missing_samples=False):
rarefy = ctx.get_action('feature_table', 'rarefy')
observed_features = ctx.get_action('diversity_lib', 'observed_features')
pielou_e = ctx.get_action('diversity_lib', 'pielou_evenness')
Expand Down Expand Up @@ -39,13 +39,15 @@ def core_metrics(ctx, table, sampling_depth, metadata, with_replacement=False,
pcoas += pcoa_results

for pcoa in pcoas:
results += emperor_plot(pcoa=pcoa, metadata=metadata)
results += emperor_plot(pcoa=pcoa, metadata=metadata,
ignore_missing_samples=ignore_missing_samples)

return tuple(results)


def core_metrics_phylogenetic(ctx, table, phylogeny, sampling_depth, metadata,
with_replacement=False, n_jobs_or_threads=1):
with_replacement=False, n_jobs_or_threads=1,
ignore_missing_samples=False):
faith_pd = ctx.get_action('diversity_lib', 'faith_pd')
unweighted_unifrac = ctx.get_action('diversity_lib', 'unweighted_unifrac')
weighted_unifrac = ctx.get_action(
Expand All @@ -57,7 +59,8 @@ def core_metrics_phylogenetic(ctx, table, phylogeny, sampling_depth, metadata,

cr = core_metrics(table=table, sampling_depth=sampling_depth,
metadata=metadata, with_replacement=with_replacement,
n_jobs=n_jobs_or_threads)
n_jobs=n_jobs_or_threads,
ignore_missing_samples=ignore_missing_samples)
hagenjp marked this conversation as resolved.
Show resolved Hide resolved

faith_pd_vector, = faith_pd(table=cr.rarefied_table,
phylogeny=phylogeny)
Expand All @@ -75,7 +78,8 @@ def core_metrics_phylogenetic(ctx, table, phylogeny, sampling_depth, metadata,

plots = []
for pcoa in pcoas:
plots += emperor_plot(pcoa=pcoa, metadata=metadata)
plots += emperor_plot(pcoa=pcoa, metadata=metadata,
ignore_missing_samples=ignore_missing_samples)

return (
cr.rarefied_table, faith_pd_vector, cr.observed_features_vector,
Expand Down
22 changes: 20 additions & 2 deletions q2_diversity/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@
'metadata': Metadata,
'with_replacement': Bool,
'n_jobs_or_threads': Int % Range(1, None) | Str % Choices(['auto']),
'ignore_missing_samples': Bool
},
outputs=[
('rarefied_table', FeatureTable[Frequency]),
Expand Down Expand Up @@ -436,7 +437,15 @@
'metadata': 'The sample metadata to use in the emperor plots.',
'with_replacement': with_replacement_description,
'n_jobs_or_threads': '[beta/beta-phylogenetic methods only] - %s'
% n_jobs_or_threads_description
% n_jobs_or_threads_description,
'ignore_missing_samples': 'If set to `True` samples and features '
'without metadata are included by '
'setting all metadata values to: '
'"This element has no metadata". By '
'default an exception will be raised if '
'missing elements are encountered. Note, '
'this flag only takes effect if there is at '
'least one overlapping element.'
},
output_descriptions={
'rarefied_table': 'The resulting rarefied feature table.',
Expand Down Expand Up @@ -490,6 +499,7 @@
'metadata': Metadata,
'with_replacement': Bool,
'n_jobs': Int % Range(1, None) | Str % Choices(['auto']),
'ignore_missing_samples': Bool
},
outputs=[
('rarefied_table', FeatureTable[Frequency]),
Expand All @@ -512,7 +522,15 @@
'rarefied to prior to computing diversity metrics.',
'metadata': 'The sample metadata to use in the emperor plots.',
'with_replacement': with_replacement_description,
'n_jobs': '[beta methods only] - %s' % n_jobs_description
'n_jobs': '[beta methods only] - %s' % n_jobs_description,
'ignore_missing_samples': 'If set to `True` samples and features '
'without metadata are included by '
'setting all metadata values to: '
'"This element has no metadata". By '
'default an exception will be raised if '
'missing elements are encountered. Note, '
'this flag only takes effect if there is at '
'least one overlapping element.'
},
output_descriptions={
'rarefied_table': 'The resulting rarefied feature table.',
Expand Down
78 changes: 78 additions & 0 deletions q2_diversity/tests/test_core_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,84 @@ def test_core_metrics(self):
pdt.assert_series_equal(results[1].view(pd.Series), obs_feat_exp)
pdt.assert_series_equal(results[2].view(pd.Series), shannon_exp)

def test_core_metrics_ignore_missing_samples_false(self):
table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
table = Artifact.import_data('FeatureTable[Frequency]', table)

metadata = Metadata(
pd.DataFrame({'foo': ['1', '2']},
index=pd.Index(['S1', 'S2'], name='id')))

with self.assertRaisesRegex(KeyError, 'Offending samples: S3'):
self.core_metrics(table=table, sampling_depth=200,
metadata=metadata,
ignore_missing_samples=False)

def test_core_metrics_ignore_missing_samples_true(self):
table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
table = Artifact.import_data('FeatureTable[Frequency]', table)

metadata = Metadata(
pd.DataFrame({'foo': ['1', '2']},
index=pd.Index(['S1', 'S2'], name='id')))

results = self.core_metrics(table=table, sampling_depth=200,
metadata=metadata,
ignore_missing_samples=True)

self.assertEqual(len(results), 10)
self.assertEqual(repr(results.bray_curtis_distance_matrix.type),
'DistanceMatrix')
self.assertEqual(repr(results.jaccard_emperor.type), 'Visualization')

def test_core_metrics_phylogenetic_ignore_missing_samples_false(self):
table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
table = Artifact.import_data('FeatureTable[Frequency]', table)

tree = skbio.TreeNode.read(io.StringIO(
'((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
tree = Artifact.import_data('Phylogeny[Rooted]', tree)

metadata = Metadata(
pd.DataFrame({'foo': ['1', '2']},
index=pd.Index(['S1', 'S2'], name='id')))

with self.assertRaisesRegex(KeyError, 'Offending samples: S3'):
self.core_metrics_phylogenetic(table=table, phylogeny=tree,
sampling_depth=200,
metadata=metadata,
ignore_missing_samples=False)

def test_core_metrics_phylogenetic_ignore_missing_samples_true(self):
table = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
['O1', 'O2'],
['S1', 'S2', 'S3'])
table = Artifact.import_data('FeatureTable[Frequency]', table)

tree = skbio.TreeNode.read(io.StringIO(
'((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
tree = Artifact.import_data('Phylogeny[Rooted]', tree)

metadata = Metadata(
pd.DataFrame({'foo': ['1', '2']},
index=pd.Index(['S1', 'S2'], name='id')))

results = self.core_metrics_phylogenetic(table=table, phylogeny=tree,
sampling_depth=200,
metadata=metadata,
ignore_missing_samples=True)

self.assertEqual(len(results), 17)
self.assertEqual(repr(results.bray_curtis_distance_matrix.type),
'DistanceMatrix')
self.assertEqual(repr(results.jaccard_emperor.type), 'Visualization')


if __name__ == '__main__':
unittest.main()