Skip to content

Commit

Permalink
Merge pull request #163 from mvdbeek/py3_lumpy
Browse files Browse the repository at this point in the history
Make all lumpy python scripts python2.7 and python3 compatible
  • Loading branch information
ryanlayer authored Jun 14, 2018
2 parents 6f679dd + 5eb03e9 commit f466f61
Show file tree
Hide file tree
Showing 13 changed files with 130 additions and 135 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
language: python
python:
- "2.7"
script:
- "3.5"
script:
- source activate travis
- ./scripts/lumpyexpress -h
- python scripts/cnvanator_to_bedpes.py --cnvkit -b 100 --del_o delo2 --dup_o dupo2 -c data/example.cns
Expand Down
2 changes: 1 addition & 1 deletion scripts/bedpe_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@
for c in order:
if c in B:
for l in sorted(B[c], key=itemgetter(0)):
print l[1]
print(l[1])
14 changes: 7 additions & 7 deletions scripts/check_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
import numpy as np

if len(sys.argv) < 2:
print 'usage:' + sys.argv[0] + ' <bam 1> <bam 2> <..>'
print('usage:' + sys.argv[0] + ' <bam 1> <bam 2> <..>')
exit(1)

order = []


for i in range(1,len(sys.argv)):
bam_file = sys.argv[i]
print bam_file
print(bam_file)

p = subprocess.Popen(\
['samtools', 'view', '-H', bam_file], \
Expand Down Expand Up @@ -48,19 +48,19 @@
curr_chrom_index = order.index(chrom)
curr_pos = -1
elif order.index(chrom) < curr_chrom_index:
print 'out of order:\t' + l + '\toccurred after\t' + \
order[curr_chrom_index] + '\t' + str(curr_pos)
print('out of order:\t' + l + '\toccurred after\t' + \
order[curr_chrom_index] + '\t' + str(curr_pos))
broke = True
break

if pos > curr_pos:
curr_pos = pos
elif pos < curr_pos:
print 'out of order:\t' + l + '\toccurred after\t' + \
order[curr_chrom_index] + '\t' + str(curr_pos)
print('out of order:\t' + l + '\toccurred after\t' + \
order[curr_chrom_index] + '\t' + str(curr_pos))
broke = True
break
if not broke:
print "in order"
print("in order")


2 changes: 1 addition & 1 deletion scripts/extract-sites.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import print_function

import sys
import gzip
import collections
Expand Down
40 changes: 20 additions & 20 deletions scripts/extractSplitReads_BwaMem
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
for line in data:
split = 0
if line[0] == '@':
print line.strip()
print(line.strip())
continue
samList = line.strip().split('\t')
sam = SAM(samList)
Expand All @@ -29,10 +29,10 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
mateFlag = int(0)
if mate[2]=="-": mateFlag = int(16)
if split:
read1 = sam.flag & 64
read1 = sam.flag & 64
if read1 == 64: tag = "_1"
else: tag="_2"
samList[0] = sam.query + tag
samList[0] = sam.query + tag
readCigar = sam.cigar
readCigarOps = extractCigarOps(readCigar,sam.flag)
readQueryPos = calcQueryPosFromCigar(readCigarOps)
Expand All @@ -43,7 +43,7 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
nonOverlap2 = 1 + mateQueryPos.qePos - mateQueryPos.qsPos - overlap
mno = min(nonOverlap1, nonOverlap2)
if mno >= minNonOverlap:
print "\t".join(samList)
print("\t".join(samList))

#--------------------------------------------------------------------------------------------------
# functions
Expand Down Expand Up @@ -82,7 +82,7 @@ class SAM (object):
return int(tagParts[2],16);
return tagParts[2];
return None;

#-----------------------------------------------
cigarPattern = '([0-9]+[MIDNSHP])'
cigarSearch = re.compile(cigarPattern)
Expand Down Expand Up @@ -121,9 +121,9 @@ def calcQueryPosFromCigar(cigarOps):
qsPos = 0
qePos = 0
qLen = 0
# if first op is a H, need to shift start position
# the opPosition counter sees if the for loop is looking at the first index of the cigar object
opPosition = 0
# if first op is a H, need to shift start position
# the opPosition counter sees if the for loop is looking at the first index of the cigar object
opPosition = 0
for cigar in cigarOps:
if opPosition == 0 and (cigar.op == 'H' or cigar.op == 'S'):
qsPos += cigar.length
Expand Down Expand Up @@ -164,40 +164,40 @@ def calcQueryOverlap(s1,e1,s2,e2):

class Usage(Exception):
def __init__(self, msg):
self.msg = msg
self.msg = msg

def main():

usage = """%prog -i <file>
extractSplitReads_BwaMem v0.1.0
Author: Ira Hall
Author: Ira Hall
Description: Get split-read alignments from bwa-mem in lumpy compatible format. Ignores reads marked as duplicates.
Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.
Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.
"""
parser = OptionParser(usage)
parser.add_option("-i", "--inFile", dest="inFile",

parser.add_option("-i", "--inFile", dest="inFile",
help="A SAM file or standard input (-i stdin).",
metavar="FILE")
parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",
parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",
help="The maximum number of split-read mappings to allow per read. Reads with more are excluded. Default=2",
metavar="INT")
parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,
parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,
help="Include alignments marked as duplicates. Default=False")
parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",
parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",
help="minimum non-overlap between split alignments on the query (default=20)",
metavar="INT")
(opts, args) = parser.parse_args()
if opts.inFile is None:
parser.print_help()
print
print()
else:
try:
extractSplitsFromBwaMem(opts.inFile, opts.numSplits, opts.includeDups, opts.minNonOverlap)
except IOError as err:
sys.stderr.write("IOError " + str(err) + "\n");
return
if __name__ == "__main__":
sys.exit(main())
sys.exit(main())

8 changes: 4 additions & 4 deletions scripts/get_coverages.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np

if len(sys.argv) < 2:
print 'usage:' + sys.argv[0] + ' <in bam 1> <in bam 2> <..>'
print('usage:' + sys.argv[0] + ' <in bam 1> <in bam 2> <..>')
exit(1)

for i in range(1,len(sys.argv)):
Expand Down Expand Up @@ -54,14 +54,14 @@
for l in f:
a = l.rstrip().split('\t')
if float(a[3]) > 0:
C.append(float(a[3]))
C.append(float(a[3]))
W.append((float(a[2])-float(a[1]))/total_len)
min_c = min(C)
max_c = max(C)
mean_c = np.average(C,weights=W)
stdev_c = np.std(C)
print coverage_file + \
print(coverage_file + \
'\tmin:' + str(min_c) + \
'\tmax:' + str(max_c) + \
'\tmean(non-zero):' + str(mean_c)
'\tmean(non-zero):' + str(mean_c))
f.close()
4 changes: 2 additions & 2 deletions scripts/get_exclude_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np

if len(sys.argv) < 3:
print 'usage:' + sys.argv[0] + ' <max> <out file> <in bam 1> <in bam 2> <..>'
print('usage:' + sys.argv[0] + ' <max> <out file> <in bam 1> <in bam 2> <..>')
exit(1)

max_c = int(sys.argv[1])
Expand All @@ -24,7 +24,7 @@
o.write(l)
f.close()
o.close()

p = subprocess.Popen(\
'cat .exclude.tmp | ' \
'sort -S 20G -k1,1 -k2,2n | ' \
Expand Down
41 changes: 20 additions & 21 deletions scripts/l_bp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import sys
from sets import Set
import re

def find_all(a_str, sub):
Expand Down Expand Up @@ -34,7 +33,7 @@ def parse_vcf(vcf_file_name, vcf_lines, vcf_headers, add_sname=True):
A[7] += ';' + 'SNAME=' + ','.join(samples)
l = '\t'.join(A)


if 'SVTYPE=BND' in A[7]:
m = re.search(r"(\[|\])(.*)(\[|\])",A[4])
o_chr,o_pos = m.group(2).split(':')
Expand All @@ -44,13 +43,13 @@ def parse_vcf(vcf_file_name, vcf_lines, vcf_headers, add_sname=True):
pos_s = A[7].find('++:')

if neg_s > 0:
neg_e = neg_s + A[7][neg_s:].find(';')
neg_e = neg_s + A[7][neg_s:].find(';')
pre=A[7][:neg_s]
mid=A[7][neg_s:neg_e]
post=A[7][neg_e:]
A[7] = pre + '++:0,' + mid + post
else:
pos_e = pos_s + A[7][pos_s:].find(';')
pos_e = pos_s + A[7][pos_s:].find(';')
pre=A[7][:pos_s]
mid=A[7][pos_s:pos_e]
post=A[7][pos_e:]
Expand Down Expand Up @@ -91,7 +90,7 @@ def split_v(l):

start_r = pos_r + int(m['CIEND'].split(',')[0])
end_r = pos_r + int(m['CIEND'].split(',')[1])

strands = m['STRANDS']

return [m['SVTYPE'],chr_l,chr_r,strands,start_l,end_l,start_r,end_r,m]
Expand Down Expand Up @@ -131,7 +130,7 @@ def header_line_cmp(l1, l2):
return -1

if l2[:12] == '##fileformat':
return 1
return 1

# make sure #CHROM ... is last
if l1[1] != '#':
Expand All @@ -140,14 +139,14 @@ def header_line_cmp(l1, l2):
return -1

if l1.find('=') == -1:
return -1
return -1
if l2.find('=') == -1:
return 1

h1 = l1[:l1.find('=')]
h2 = l2[:l2.find('=')]
if h1 not in order:
return -1
return -1
if h2 not in order:
return 1
return cmp(order.index(h1),order.index(h2))
Expand All @@ -166,10 +165,10 @@ class breakpoint:
sv_type = ''

strands = ''

l = ''

def __init__(self,
def __init__(self,
l,
percent_slop=0,
fixed_slop=0):
Expand All @@ -182,7 +181,7 @@ def __init__(self,
self.start_l,\
self.end_l,\
self.start_r, \
self.end_r,
self.end_r,
m] = split_v(l)

self.p_l = [float(x) for x in m['PRPOS'].split(',')]
Expand Down Expand Up @@ -218,7 +217,7 @@ def __init__(self,
self.p_r = [float(x)/sum_p_r for x in new_p_r]

# old_l = float(self.end_l - self.start_l + 1)

# self.start_l = max(0,self.start_l-l_slop)
# self.end_l = self.end_l+l_slop

Expand Down Expand Up @@ -253,7 +252,7 @@ def __str__(self):
self.end_l,\
self.chr_r,\
self.start_r, \
self.end_r,
self.end_r,
self.sv_type,\
self.strands,\
self.p_l,
Expand Down Expand Up @@ -304,7 +303,7 @@ def trim(A):
if A[i] == 0:
clip_end += 1
else:
break
break
return [clip_start, clip_end]


Expand Down Expand Up @@ -338,11 +337,11 @@ def align_intervals(I):
new_i = [0]*n + new_i

if i[END] < end:
n = end - i[END]
n = end - i[END]
new_i = new_i + [0]*n

new_I.append(new_i)

return [start, end, new_I]


Expand Down Expand Up @@ -386,12 +385,12 @@ def bron_kerbosch(G, R, P, X):
if (len(P) == 0) and (len(X) == 0):
yield R
for v in P:
V = Set([v])
N = Set([g[0] for g in G[v].edges])
V = set([v])
N = set([g[0] for g in G[v].edges])

for r in bron_kerbosch(G, \
R.union(V), \
P.intersection(N),
P.intersection(N),
X.intersection(N)):
yield r

Expand Down
Loading

0 comments on commit f466f61

Please sign in to comment.