Skip to content

Commit

Permalink
Resolves depth too low errors
Browse files Browse the repository at this point in the history
  • Loading branch information
wdwvt1 committed Aug 24, 2016
1 parent ae6aa05 commit e551825
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@ strategy**
`sourcetracker2 gibbs -i otu_table.biom -m map.txt -o example4/ --burnin 100`

**Calculate the proportion of each source in each sink, using a sink
rarefaction depth of 2500**
`sourcetracker2 gibbs -i otu_table.biom -m map.txt -o example5/ --sink_rarefaction_depth 2500`
rarefaction depth of 1700 and source rarefaction depth of 2000**
`sourcetracker2 gibbs -i otu_table.biom -m map.txt -o example5/ --sink_rarefaction_depth 1700 --source_rarefaction_depth 2000`

**Calculate the proportion of each source in each sink, using ipyparallel to run in parallel with 5 jobs**
`sourcetracker2 gibbs -i otu_table.biom -m map.txt -o example6/ --jobs 5`
Expand Down
27 changes: 25 additions & 2 deletions sourcetracker/_cli/gibbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,35 @@ def gibbs(table_fp, mapping_fp, output_dir, loo, jobs, alpha1, alpha2, beta,

# Rarify collapsed source data if requested.
if source_rarefaction_depth > 0:
csources = subsample_dataframe(csources, source_rarefaction_depth)
d = (csources.sum(1) >= source_rarefaction_depth)
if not d.all():
print(csources.sum(1))
too_shallow = (~d).sum()
shallowest = csources.sum(1).min()
raise ValueError(('You requested rarefaction of source samples at '
'%s, but there are %s collapsed source samples '
'that have less sequences than that. The '
'shallowest of these is %s sequences.') %
(source_rarefaction_depth, too_shallow,
shallowest))
else:
csources = subsample_dataframe(csources, source_rarefaction_depth)

# Prepare sink data and rarify if requested.
sinks = feature_table.loc[sink_samples, :]
if sink_rarefaction_depth > 0:
sinks = subsample_dataframe(sinks, sink_rarefaction_depth)
d = (sinks.sum(1) >= sink_rarefaction_depth)
if not d.all():
too_shallow = (~d).sum()
shallowest = sinks.sum(1).min()
raise ValueError(('You requested rarefaction of sink samples at '
'%s, but there are %s sink samples that have '
'less sequences than that. The shallowest of '
'these is %s sequences.') %
(sink_rarefaction_depth, too_shallow,
shallowest))
else:
sinks = subsample_dataframe(sinks, sink_rarefaction_depth)

# If we've been asked to do multiple jobs, we need to spin up a cluster.
if jobs > 1:
Expand Down

0 comments on commit e551825

Please sign in to comment.