-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsv_positions.py
185 lines (142 loc) · 5.27 KB
/
sv_positions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
# =============================================================================
# FileName: sv_positions.py
# Desc: sv
# Author: Chu Yanshuo
# Email: [email protected]
# HomePage: http://yanshuo.name
# Version: 0.0.1
# LastChange: 2017-08-03 11:10:33
# History:
# =============================================================================
'''
from variant_info.SV import CNV, INSERTION, DELETION, INVERTION, TRANSLOCATION,\
TANDEMDUP
from collections import Counter
import random
class SVP(object):
"""Docstring for SVP. """
def __init__(self):
# [variant_length, variant_copy_number,
# variant_genotype, variant_name])
self.position = -1
self.sv_type = "" # CNV, INDEL, TRANSLOCATION, INVERSION
self.sv = None
def info_str_title(self):
return "# position\tsv_type\t{}\n".format(self.sv.info_str_title())
def info_str(self):
return "{0}\t{1}\t{2}\n".format(self.position, self.sv_type,
self.sv.info_str())
class SV_positions:
def __init__(self):
self.svp_dict = {}
def add_posi_CNV(self, chrom, position, length, copy_number, genotype,
ploidy_status):
# if chroms[k] not in self.sv_positions.keys():
# self.sv_positions[chroms[k]] = []
# self.sv_positions[chroms[k]].append([
# pois,
# self.sv_list[i][0],
# self.sv_list[i][1],
# self.sv_list[i][2],
# self.sv_list[i][-1]])
# temp_Node.sv_list.append(
# [variant_length, variant_copy_number,
# variant_genotype, variant_name])
temp_SVP = SVP()
temp_SVP.sv_type = "CNV"
temp_SVP.position = position
temp_SVP.sv = CNV()
temp_SVP.sv.length = length
temp_SVP.sv.copy_number = copy_number
temp_SVP.sv.genotype = genotype
temp_SVP.sv.hapl_remain = self._getHaplRemain(
genotype, ploidy_status[chrom])
self._add_svp(chrom, temp_SVP)
def _getHaplRemain(self, genotype, ploidy_status):
if genotype == "NONE":
return {}
hr = {}
gtc = Counter(genotype)
gpsc = Counter(ploidy_status)
for hapl_type in set(gtc.keys()) & set(gpsc.keys()):
number = 0
if gtc[hapl_type] >= gpsc[hapl_type]:
number = gpsc[hapl_type]
else:
number = gtc[hapl_type]
hr[hapl_type] = random.sample(range(gpsc[hapl_type]), number)
return hr
def add_posi_INSERTION(self, chrom, hapl_type, hapl_idx, position, length):
temp_SVP = SVP()
temp_SVP.sv_type = "INSERTION"
temp_SVP.position = position
temp_SVP.sv = INSERTION()
temp_SVP.sv.hapl_type = hapl_type
temp_SVP.sv.hapl_idx = hapl_idx
temp_SVP.sv.length = length
self._add_svp(chrom, temp_SVP)
def add_posi_DELETION(self, chrom, hapl_type, hapl_idx, position, length):
temp_SVP = SVP()
temp_SVP.sv_type = "DELETION"
temp_SVP.position = position
temp_SVP.sv = DELETION()
temp_SVP.sv.hapl_type = hapl_type
temp_SVP.sv.hapl_idx = hapl_idx
temp_SVP.sv.length = length
self._add_svp(chrom, temp_SVP)
def add_posi_INVERTION(self, chrom, hapl_type, hapl_idx, position, length):
temp_SVP = SVP()
temp_SVP.sv_type = "INVERTION"
temp_SVP.position = position
temp_SVP.sv = INVERTION()
temp_SVP.sv.hapl_type = hapl_type
temp_SVP.sv.hapl_idx = hapl_idx
temp_SVP.sv.length = length
self._add_svp(chrom, temp_SVP)
def add_posi_TANDEMDUP(self, chrom, hapl_type, hapl_idx, position, length,
times):
temp_SVP = SVP()
temp_SVP.sv_type = "TANDEMDUP"
temp_SVP.position = position
temp_SVP.sv = TANDEMDUP()
temp_SVP.sv.hapl_type = hapl_type
temp_SVP.sv.hapl_idx = hapl_idx
temp_SVP.sv.length = length
temp_SVP.sv.times = times
self._add_svp(chrom, temp_SVP)
def add_posi_TRANSLOCATION(
self,
chrom_from,
position_from,
hapl_type_from,
hapl_idx_from,
chrom_to,
hapl_type_to,
hapl_idx_to,
length):
temp_SVP = SVP()
temp_SVP.sv_type = "TRANSLOCATION"
temp_SVP.position = position_from
temp_SVP.sv = TRANSLOCATION()
temp_SVP.sv.chrom_to = chrom_to
temp_SVP.sv.hapl_type_from = hapl_type_from
temp_SVP.sv.hapl_type_to = hapl_type_to
temp_SVP.sv.hapl_idx_from = hapl_idx_from
temp_SVP.sv.hapl_idx_to = hapl_idx_to
temp_SVP.sv.length = length
self._add_svp(chrom_from, temp_SVP)
# 此处似乎不需要
# def add_ploidy(self, chrom, hapl, number):
def _add_svp(self, chrom, svp):
if chrom not in self.svp_dict.keys():
self.svp_dict[chrom] = [svp]
else:
self.svp_dict[chrom].append(svp)
def sorted(self):
for chrom in self.svp_dict.keys():
self.svp_dict[chrom] = sorted(
self.svp_dict[chrom],
key=lambda d: d.position, reverse=True)