-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_phase_block_bed.py
44 lines (35 loc) · 1.31 KB
/
extract_phase_block_bed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""
Extract phase blocks from VCF
"""
import vcf
import sys
import argparse
from collections import defaultdict
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('invcf', type=argparse.FileType('r'))
parser.add_argument('outbed', type=argparse.FileType('w'))
parser.add_argument('--startPos', default=118989614)
parser.add_argument('--endPos', default=150469354)
parser.add_argument('--chrom', default='chr1')
parser.add_argument('--genome', default=None, help='add ID trackline')
return parser.parse_args()
def get_phase_blocks(vcf_handle, chrom, start, end):
rec_map = defaultdict(list)
for rec in vcf_handle.fetch(chrom, start, end):
rec_map[rec.samples[0]['PS']].append(rec)
phase_blocks = []
for ps, recs in rec_map.iteritems():
if len(recs) > 1:
phase_blocks.append(map(str, [chrom, recs[0].POS, recs[-1].POS, ps]))
return phase_blocks
def main():
args = parse_args()
vcf_handle = vcf.Reader(args.invcf)
phase_blocks = get_phase_blocks(vcf_handle, args.chrom, args.startPos, args.endPos)
if args.genome is not None:
args.outbed.write('track name="{} Phase Blocks"\n'.format(args.genome))
for rec in phase_blocks:
args.outbed.write('\t'.join(rec) + '\n')
if __name__ == '__main__':
main()