Skip to content

Commit

Permalink
DataFrame formatting, ConfInts
Browse files Browse the repository at this point in the history
  • Loading branch information
bede committed Jul 5, 2017
1 parent eb8c567 commit ff466a5
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
25 changes: 14 additions & 11 deletions kindel/kindel.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,33 +403,36 @@ def binomial_ci(count, nobs, alpha=0.01):
'''Returns lower, upper bounds of the Jeffrey binomial proportion confidence interval'''
lower_ci, upper_ci = scipy.stats.beta.interval(1-alpha, count+0.5, nobs-count+0.5)
return lower_ci, upper_ci

refs_alns = parse_bam(bam_path)
weights_fmt = []
for ref, aln in refs_alns.items():
weights_fmt.extend([dict(w, ref=ref, pos=i) for i, w in enumerate(aln.weights, start=1)])

weights_df = pd.DataFrame(weights_fmt, columns=['ref','pos','A','C','G','T','N'])
weights_df['depth'] = weights_df[['A','C','G','T','N']].sum(axis=1)
consensus_depths_df = weights_df[['A','C','G','T','N']].max(axis=1)
weights_df['consensus'] = consensus_depths_df.divide(weights_df.depth)

rel_weights_df = pd.DataFrame()
for nt in ['A','C','G','T','N']:
rel_weights_df[[nt]] = weights_df[[nt]].divide(weights_df.depth, axis=0)

rel_weights_df = rel_weights_df.round(dict(A=3, C=3, G=3, T=3, N=3))

weights_df['shannon'] = [scipy.stats.entropy(x)
for x in rel_weights_df[['A','C','G','T']].as_matrix()]
for x in rel_weights_df[['A','C','G','T']].values]

if not no_confidence:
weights_df['lower_ci'] = [binomial_ci(c, t)[0]
for c, t, in zip(consensus_depths_df, weights_df['depth'])]

conf_ints = [binomial_ci(c, t) for c, t, in zip(consensus_depths_df,
weights_df['depth'])]
weights_df['lower_ci'] = [ci[0] for ci in conf_ints]
weights_df['upper_ci'] = [ci[1] for ci in conf_ints]

if relative:
for nt in ['A','C','G','T','N']:
weights_df[[nt]] = rel_weights_df[[nt]]
return weights_df.round(dict(consensus=3, lower_ci=3, shannon=3))

return weights_df.round(dict(consensus=3, lower_ci=3, upper_ci=3, shannon=3))


def features(bam_path: 'path to SAM/BAM file'):
Expand Down
5 changes: 5 additions & 0 deletions tests/test_kindel.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ def test_bam_to_consensus_realign_bwa():
for fn in bwa_fns:
assert kindel.bam_to_consensus(fn, realign=True)

def test_weights():
kindel.weights(bwa_fns[0], relative=True)


# CLI




# SAMPLE-SPECIFIC FUNCTIONAL REGRESSION

0 comments on commit ff466a5

Please sign in to comment.