Merge pull request #163 from mvdbeek/py3_lumpy

Make all lumpy python scripts python2.7 and python3 compatible
arq5x · Jun 14, 2018 · f466f61 · f466f61
2 parents 6f679dd + 5eb03e9
commit f466f61
Show file tree

Hide file tree

Showing 13 changed files with 130 additions and 135 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,7 +1,8 @@
 language: python
 python:
   - "2.7"
-script: 
+  - "3.5"
+script:
     - source activate travis
     - ./scripts/lumpyexpress -h
     - python scripts/cnvanator_to_bedpes.py --cnvkit  -b 100 --del_o delo2 --dup_o dupo2 -c data/example.cns

diff --git a/scripts/bedpe_sort.py b/scripts/bedpe_sort.py
@@ -54,4 +54,4 @@
 for c in order:
     if c in B:
         for l in sorted(B[c], key=itemgetter(0)):
-            print l[1]
+            print(l[1])
diff --git a/scripts/check_sorting.py b/scripts/check_sorting.py
@@ -5,15 +5,15 @@
 import numpy as np
 
 if len(sys.argv) < 2:
-    print 'usage:' + sys.argv[0] + ' <bam 1> <bam 2> <..>'
+    print('usage:' + sys.argv[0] + ' <bam 1> <bam 2> <..>')
     exit(1)
 
 order = []
 
 
 for i in range(1,len(sys.argv)):
     bam_file = sys.argv[i]
-    print bam_file
+    print(bam_file)
 
     p = subprocess.Popen(\
             ['samtools', 'view', '-H', bam_file], \
@@ -48,19 +48,19 @@
                 curr_chrom_index = order.index(chrom)
                 curr_pos = -1
             elif order.index(chrom) < curr_chrom_index:
-                print 'out of order:\t' + l + '\toccurred after\t' + \
-                        order[curr_chrom_index] + '\t' + str(curr_pos)
+                print('out of order:\t' + l + '\toccurred after\t' + \
+                        order[curr_chrom_index] + '\t' + str(curr_pos))
                 broke = True
                 break
 
             if pos > curr_pos:
                 curr_pos = pos
             elif pos < curr_pos:
-                print 'out of order:\t' + l + '\toccurred after\t' + \
-                        order[curr_chrom_index] + '\t' + str(curr_pos)
+                print('out of order:\t' + l + '\toccurred after\t' + \
+                        order[curr_chrom_index] + '\t' + str(curr_pos))
                 broke = True
                 break
     if not broke:
-        print "in order"
+        print("in order")
 
 
diff --git a/scripts/extract-sites.py b/scripts/extract-sites.py
@@ -1,4 +1,4 @@
-from __future__ import print_function
+
 import sys
 import gzip
 import collections

diff --git a/scripts/extractSplitReads_BwaMem b/scripts/extractSplitReads_BwaMem
@@ -14,7 +14,7 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
 	for line in data:
 		split = 0
 		if line[0] == '@':
-			print line.strip()
+			print(line.strip())
 			continue
 		samList = line.strip().split('\t')
 		sam = SAM(samList)
@@ -29,10 +29,10 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
 					mateFlag = int(0)
 					if mate[2]=="-": mateFlag = int(16)
 		if split:
-			read1 = sam.flag & 64 
+			read1 = sam.flag & 64
 			if read1 == 64: tag = "_1"
 			else: tag="_2"
-			samList[0] = sam.query + tag 
+			samList[0] = sam.query + tag
 			readCigar = sam.cigar
 			readCigarOps = extractCigarOps(readCigar,sam.flag)
 			readQueryPos = calcQueryPosFromCigar(readCigarOps)
@@ -43,7 +43,7 @@ def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
 			nonOverlap2 = 1 + mateQueryPos.qePos - mateQueryPos.qsPos - overlap
 			mno = min(nonOverlap1, nonOverlap2)
 			if mno >= minNonOverlap:
-				print "\t".join(samList)
+				print("\t".join(samList))
 
 #--------------------------------------------------------------------------------------------------
 # functions
@@ -82,7 +82,7 @@ class SAM (object):
 					return int(tagParts[2],16);
 				return tagParts[2];
 		return None;
-	
+
 #-----------------------------------------------
 cigarPattern = '([0-9]+[MIDNSHP])'
 cigarSearch = re.compile(cigarPattern)
@@ -121,9 +121,9 @@ def calcQueryPosFromCigar(cigarOps):
 	qsPos = 0
 	qePos = 0
 	qLen  = 0
-	# if first op is a H, need to shift start position 
-	# the opPosition counter sees if the for loop is looking at the first index of the cigar object    
-	opPosition = 0  
+	# if first op is a H, need to shift start position
+	# the opPosition counter sees if the for loop is looking at the first index of the cigar object
+	opPosition = 0
 	for cigar in cigarOps:
 		if opPosition == 0 and (cigar.op == 'H' or cigar.op == 'S'):
 			qsPos += cigar.length
@@ -164,40 +164,40 @@ def calcQueryOverlap(s1,e1,s2,e2):
 
 class Usage(Exception):
 	def __init__(self, msg):
-		self.msg = msg		
+		self.msg = msg
 
 def main():
-	
+
 	usage = """%prog -i <file>
 
 extractSplitReads_BwaMem v0.1.0
-Author: Ira Hall	
+Author: Ira Hall
 Description: Get split-read alignments from bwa-mem in lumpy compatible format. Ignores reads marked as duplicates.
-Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405. 
+Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.
 	"""
 	parser = OptionParser(usage)
-	
-	parser.add_option("-i", "--inFile", dest="inFile", 
+
+	parser.add_option("-i", "--inFile", dest="inFile",
 		help="A SAM file or standard input (-i stdin).",
 		metavar="FILE")
-	parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int", 
+	parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",
 		help="The maximum number of split-read mappings to allow per read. Reads with more are excluded. Default=2",
 		metavar="INT")
-	parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0, 
+	parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,
 		help="Include alignments marked as duplicates. Default=False")
-	parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int", 
+	parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",
 		help="minimum non-overlap between split alignments on the query (default=20)",
 		metavar="INT")
 	(opts, args) = parser.parse_args()
 	if opts.inFile is None:
 		parser.print_help()
-		print
+		print()
 	else:
 		try:
 			extractSplitsFromBwaMem(opts.inFile, opts.numSplits, opts.includeDups, opts.minNonOverlap)
 		except IOError as err:
 			sys.stderr.write("IOError " + str(err) + "\n");
 			return
 if __name__ == "__main__":
-	sys.exit(main()) 
-	
+	sys.exit(main())
+
diff --git a/scripts/get_coverages.py b/scripts/get_coverages.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 if len(sys.argv) < 2:
-    print 'usage:' + sys.argv[0] + ' <in bam 1> <in bam 2> <..>'
+    print('usage:' + sys.argv[0] + ' <in bam 1> <in bam 2> <..>')
     exit(1)
 
 for i in range(1,len(sys.argv)):
@@ -54,14 +54,14 @@
     for l in f:
         a = l.rstrip().split('\t')
         if float(a[3]) > 0:
-            C.append(float(a[3])) 
+            C.append(float(a[3]))
             W.append((float(a[2])-float(a[1]))/total_len)
     min_c = min(C)
     max_c = max(C)
     mean_c = np.average(C,weights=W)
     stdev_c = np.std(C)
-    print coverage_file + \
+    print(coverage_file + \
             '\tmin:' + str(min_c) + \
             '\tmax:' + str(max_c) + \
-            '\tmean(non-zero):' + str(mean_c) 
+            '\tmean(non-zero):' + str(mean_c))
     f.close()
diff --git a/scripts/get_exclude_regions.py b/scripts/get_exclude_regions.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 if len(sys.argv) < 3:
-    print 'usage:' + sys.argv[0] + ' <max> <out file> <in bam 1> <in bam 2> <..>'
+    print('usage:' + sys.argv[0] + ' <max> <out file> <in bam 1> <in bam 2> <..>')
     exit(1)
 
 max_c = int(sys.argv[1])
@@ -24,7 +24,7 @@
             o.write(l)
     f.close()
 o.close()
-            
+
 p = subprocess.Popen(\
         'cat .exclude.tmp | ' \
         'sort -S 20G -k1,1 -k2,2n | ' \

diff --git a/scripts/l_bp.py b/scripts/l_bp.py
@@ -1,5 +1,4 @@
 import sys
-from sets import Set
 import re
 
 def find_all(a_str, sub):
@@ -34,7 +33,7 @@ def parse_vcf(vcf_file_name, vcf_lines, vcf_headers, add_sname=True):
                     A[7] += ';' + 'SNAME=' + ','.join(samples)
                     l = '\t'.join(A)
 
-                
+
                 if 'SVTYPE=BND' in A[7]:
                     m = re.search(r"(\[|\])(.*)(\[|\])",A[4])
                     o_chr,o_pos = m.group(2).split(':')
@@ -44,13 +43,13 @@ def parse_vcf(vcf_file_name, vcf_lines, vcf_headers, add_sname=True):
                         pos_s = A[7].find('++:')
 
                         if neg_s > 0:
-                            neg_e = neg_s + A[7][neg_s:].find(';') 
+                            neg_e = neg_s + A[7][neg_s:].find(';')
                             pre=A[7][:neg_s]
                             mid=A[7][neg_s:neg_e]
                             post=A[7][neg_e:]
                             A[7] = pre + '++:0,' + mid + post
                         else:
-                            pos_e = pos_s + A[7][pos_s:].find(';') 
+                            pos_e = pos_s + A[7][pos_s:].find(';')
                             pre=A[7][:pos_s]
                             mid=A[7][pos_s:pos_e]
                             post=A[7][pos_e:]
@@ -91,7 +90,7 @@ def split_v(l):
 
     start_r = pos_r + int(m['CIEND'].split(',')[0])
     end_r = pos_r + int(m['CIEND'].split(',')[1])
-        
+
     strands = m['STRANDS']
 
     return [m['SVTYPE'],chr_l,chr_r,strands,start_l,end_l,start_r,end_r,m]
@@ -131,7 +130,7 @@ def header_line_cmp(l1, l2):
         return -1
 
     if l2[:12] == '##fileformat':
-        return 1 
+        return 1
 
     # make sure #CHROM ... is last
     if l1[1] != '#':
@@ -140,14 +139,14 @@ def header_line_cmp(l1, l2):
         return -1
 
     if l1.find('=') == -1:
-        return -1 
+        return -1
     if l2.find('=') == -1:
         return 1
 
     h1 = l1[:l1.find('=')]
     h2 = l2[:l2.find('=')]
     if h1 not in order:
-        return -1 
+        return -1
     if h2 not in order:
         return 1
     return cmp(order.index(h1),order.index(h2))
@@ -166,10 +165,10 @@ class breakpoint:
     sv_type = ''
 
     strands = ''
-    
+
     l = ''
 
-    def __init__(self, 
+    def __init__(self,
                  l,
                  percent_slop=0,
                  fixed_slop=0):
@@ -182,7 +181,7 @@ def __init__(self,
         self.start_l,\
         self.end_l,\
         self.start_r, \
-        self.end_r, 
+        self.end_r,
         m] = split_v(l)
 
         self.p_l = [float(x) for x in m['PRPOS'].split(',')]
@@ -218,7 +217,7 @@ def __init__(self,
             self.p_r = [float(x)/sum_p_r for x in new_p_r]
 
             # old_l = float(self.end_l - self.start_l + 1)
-            
+
             # self.start_l = max(0,self.start_l-l_slop)
             # self.end_l = self.end_l+l_slop
 
@@ -253,7 +252,7 @@ def __str__(self):
                                            self.end_l,\
                                            self.chr_r,\
                                            self.start_r, \
-                                           self.end_r, 
+                                           self.end_r,
                                            self.sv_type,\
                                            self.strands,\
                                            self.p_l,
@@ -304,7 +303,7 @@ def trim(A):
         if A[i] == 0:
             clip_end += 1
         else:
-            break               
+            break
     return [clip_start, clip_end]
 
 
@@ -338,11 +337,11 @@ def align_intervals(I):
             new_i = [0]*n + new_i
 
         if i[END] < end:
-            n = end - i[END] 
+            n = end - i[END]
             new_i = new_i + [0]*n
-        
+
         new_I.append(new_i)
-        
+
     return [start, end, new_I]
 
 
@@ -386,12 +385,12 @@ def bron_kerbosch(G, R, P, X):
     if (len(P) == 0) and (len(X) == 0):
         yield R
     for v in P:
-        V = Set([v])
-        N = Set([g[0] for g in G[v].edges])
-    
+        V = set([v])
+        N = set([g[0] for g in G[v].edges])
+
         for r in bron_kerbosch(G, \
                                R.union(V), \
-                               P.intersection(N), 
+                               P.intersection(N),
                                X.intersection(N)):
             yield r