-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplot_admixture_fixedPopSize.py~
143 lines (112 loc) · 5.37 KB
/
plot_admixture_fixedPopSize.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import matplotlib
matplotlib.use('Agg')
import numpy
import sys
from scipy.stats import rv_discrete
import os.path
import numpy as np
import matplotlib.pyplot as plt
#rho_in=float(sys.argv[1])
#adaptive_theta=float(sys.argv[2])
#selection=sys.argv[3] # boolean indicating if we are similating with selection
#locusLength=int(sys.argv[4])
rho_in='5e-9'
adaptive_theta='0'
selection='False'
#locus_length=350000
summary_statistics={} # store the summary statistics for all the models here. Key = [m_NA][m_NE][H12,S,Pi,avg_dist]
#NAms=[158, 2500, 61659, 1110000, 15984500, 28000000]
NAms=[2500, 61659, 1110000, 15984500, 28000000]
#EUrs=[3801, 16982, 67608, 39000, 3122470, 9550000]
#EUrs=[16982, 39000, 67608, 100000, 200000, 500000, 1000000, 2000000, 3122470, 9550000]
EUrs=[16982, 39000, 67608, 100000, 200000, 500000, 1000000, 2000000]
for NAm in NAms:
summary_statistics[NAm]={}
for EUr in EUrs:
summary_statistics[NAm][EUr]={}
summary_statistics[NAm][EUr]['H12']=[]
summary_statistics[NAm][EUr]['H2H1']=[]
summary_statistics[NAm][EUr]['S']=[]
summary_statistics[NAm][EUr]['Pi']=[]
summary_statistics[NAm][EUr]['avg_dist']=[]
summary_statistics[NAm][EUr]['TajD']=[]
for NAm in NAms:
for EUr in EUrs:
# H12
inFile=open(os.path.expanduser('~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm%s_EUr%s_rho_%s_theta_%s_selection_%s_MS_snps.txt') %(NAm, EUr,rho_in, adaptive_theta, selection),'r')
# iterate through the file
# check if there are 17 lines
# store H12 for this parameter configuration in a dictionary
for line in inFile:
items=line.split('\t')
if len(items)==17:
H12=float(items[8])
H2H1=float(items[9])
summary_statistics[NAm][EUr]['H12'].append(H12)
summary_statistics[NAm][EUr]['H2H1'].append(H2H1)
inFile.close()
# Pi, S, TajD
inFile=open(os.path.expanduser('~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm%s_EUr%s_rho_%s_theta_%s_selection_%s_MS_Pi_S_TajD_perBp.txt') %(NAm, EUr,rho_in, adaptive_theta, selection),'r')
for line in inFile:
items=line.strip('\n').split('\t')
if len(items)==4:
if items[0] !='':
S=float(items[0])
Pi=float(items[1])
avg_dist=float(items[2])
TajD=float(items[3])
summary_statistics[NAm][EUr]['S'].append(S)
summary_statistics[NAm][EUr]['Pi'].append(Pi)
summary_statistics[NAm][EUr]['avg_dist'].append(avg_dist)
summary_statistics[NAm][EUr]['TajD'].append(TajD)
inFile.close()
print('data is loaded...')
###########
# mean_H12 in the data:
data_means={}
data_means['H12']=0.01767854
data_means['H2H1']=0.8301393
data_means['TajD']=0.3835
data_means['S']=0.0578
data_means['Pi']=0.0118
# data medians
data_medians={}
data_medians['H12']=0.01545779
data_medians['H2H1']=0.8651685
data_medians['TajD']=0.3835
data_medians['S']=0.0578
data_medians['Pi']=0.0118
##########
print('plotting...')
# make boxplots of the distributions
matplotlib.rcParams.update({'font.size': 7})
NAm_mode=15984500
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 12
fig_size[1] = 9
for EUr_mode in EUrs:
print('EUr %s' %EUr_mode)
plot_no=1
for summary_statistic in ['H12', 'H2H1', 'Pi', 'S', 'TajD']:
plt.subplot(2, 3, plot_no)
simulations=[]
plt.boxplot(summary_statistics[2500][EUr_mode][summary_statistic],summary_statistics[61659][EUr_mode][summary_statistic],summary_statistics[1110000][EUr_mode][summary_statistic],summary_statistics[15984500][EUr_mode][summary_statistic], summary_statistics[28000000][EUr_mode][summary_statistic]], labels=['2500' ,'61659' ,'1110000', '15984500' ,'28000000'])
plt.axhline(y=data_means[summary_statistic],color='r', label='Mean value in data')
if plot_no==1:
plt.ylim(0,1)
plt.title('Europe Ne = %s' %EUr_mode)
elif plot_no==2:
plt.ylim(0,1)
elif plot_no==3:
plt.ylim(0,0.2)
elif plot_no==4:
plt.ylim(0,0.4)
elif plot_no==5:
plt.ylim(-3,3)
#plt.ylim(min(data_means[summary_statistic], min(summary_statistics[158][EUr_mode][summary_statistic]), min(summary_statistics[2500][EUr_mode][summary_statistic]), min(summary_statistics[61659][EUr_mode][summary_statistic]),min(summary_statistics[1110000][EUr_mode][summary_statistic]), min(summary_statistics[15984500][EUr_mode][summary_statistic]), min(summary_statistics[28000000][EUr_mode][summary_statistic]))-0.01, max(data_means[summary_statistic], max(summary_statistics[158][EUr_mode][summary_statistic]), max(summary_statistics[2500][EUr_mode][summary_statistic]), max(summary_statistics[61659][EUr_mode][summary_statistic]),max(summary_statistics[1110000][EUr_mode][summary_statistic]), max(summary_statistics[15984500][EUr_mode][summary_statistic]), max(summary_statistics[28000000][EUr_mode][summary_statistic]))+0.01)
plt.xlabel('North American Population Size')
plt.ylabel(summary_statistic)
plot_no+=1
plt.tight_layout()
plt.savefig(os.path.expanduser('~/Jensen_response/analysis/admixture_fixedPopSize_boxplot_EUr%s.png' %EUr_mode))
plt.close()