forked from garudlab/Harris_etal_response
-
Notifications
You must be signed in to change notification settings - Fork 0
/
documentation_Jensen_response_publication.doc~
230 lines (148 loc) · 9.96 KB
/
documentation_Jensen_response_publication.doc~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#####################################################################################
# Measure H12 in 10KB, 400SNP windows under neutrality for ALL the different demographic models
####################################################################################
qsub ~/Jensen_response/scripts/qsub_constNe10e6_theta0
qsub ~/Jensen_response/scripts/qsub_constNe2.7e6_theta0
qsub ~/Jensen_response/scripts/qsub_dadi1_theta0
qsub ~/Jensen_response/scripts/qsub_dadi2_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_bottleneck_mode_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_posterior_95CI_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_posterior_95CI_Harris_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_posterior_95CI_Harris_theta0_rho5e-7
qsub ~/Jensen_response/scripts/qsub_admixture_mode_Garud_2015_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_bot_mode_Garud_2015_theta0
qsub ~/Jensen_response/scripts/qsub_Arguello
##########################################
# Compute long range LD for simulations #
##########################################
# Check if there are 8 entries
for model in admixture_bot_mode admixture_mode admixture_posterior constNe10e6 constNe2.7e6 dadi1 dadi2 admixture_posterior_Harris admixture_mode_Garud2015 admixture_bot_mode_Garud2015 Arguello; do
echo $model
# check that 8 fields were printed
python ~/Jensen_response/scripts/count_fields_any.py ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95.txt tmp.txt 8
# average the simulation results for LD:
cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95_sorted.txt
python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95_averaged.txt
done
###########################
# Long range LD DGRP data #
###########################
cat ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr2R.txt ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr2L.txt ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr3R.txt ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr3L.txt | sort -k2,2 -g > alldataSorted
python ~/Jensen_response/scripts/averageLD_realData.py alldataSorted alldata_averaged
cat alldata_averaged | sort -k1,1 -g > alldata_averaged_sorted
#############################################################
# compute Pi, S, TajD statistics from short introns in DGRP #
#############################################################
python ~/Jensen_response/scripts/DaDi_compute_S_Pi_TajD.py
#####################################################################
# plot S, Pi, TajD, H12, and H2/H1 for simulations vs the real data #
#####################################################################
python ~/Jensen_response/scripts/plot_original_models.py
# note that this script depends on the experiments done below
##########################################################
# experiments to find a well-fitting demographic model #
##########################################################
#############
# migration #
#############
qsub_admixture_mode_theta0_migration_400SNPs
#plot_admixture_migration.py
for m_NA in 0 0.1 0.25 0.5 0.75; do
file=~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95.txt
python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8
# average the simulation results for LD:
cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95_sorted.txt
python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95_averaged.txt
done
##############################
# vary admixture proportions #
##############################
qsub_admixture_mode_theta0_diffProps_400SNPs
#plot_admixture_diffProps.py
for prop in 0 0.1 0.25 0.4 0.5 0.75 0.85 0.9; do
file=~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95.txt
python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8
# average the simulation results for LD:
cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt
python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_averaged.txt
done
##########################
# fixed population sizes #
##########################
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0_fixedPopSize_400SNPs_MS
#plot_admixture_fixedPopSize.py
# also test hard vs soft sweeps for fixed sizes (make BF plots)
# scenarios we want:
NAm=1110000
EUr=700000
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta10_fixedPopSize_400SNPs_MS
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0.01_fixedPopSize_400SNPs_MS
# what do the trajectories look like for adaptive mutations
bash run_admixture_mode_fixedPopSize_400SNPs_MS_trajectory.sh
#plot the trajectories
python plot_trajectories.py
#try conditoining on the ending frequency of a sweep
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta10_fixedPopSize_400SNPs_MS_conditionEndFreq
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0.01_fixedPopSize_400SNPs_MS_conditionEndFreq
#num_sims=20000
#for NAm in 61659 1110000 15984500; do
# for EUr in 500000 2000000; do
num_sims=100000
for NAm in 1110000 15984500; do
for EUr in 700000; do
hard_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0.01_selection_True_MS_snps.txt
soft_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_10_selection_True_MS_snps.txt
outFile=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_BFs_numSim_${num_sims}.txt
python ~/Jensen_response/scripts/compute_Bayes_factors.py $hard_in $soft_in $outFile $num_sims
done
done
for NAm in 2500 61659 1110000 15984500 28000000; do
echo 'NAm'
echo $NAm
for EUr in 16982 67608 700000 2000000 9550000; do
echo 'EUr'
echo $EUr
file=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95.txt
python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8
# average the simulation results for LD:
cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt
python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_averaged.txt
done
done
#####################
# vary growth rates #
#####################
#qsub_admixture_mode_theta0_varyGrowth_400SNPs_MS
qsub_admixture_mode_theta0_varyGrowth_400SNPs_MS_for_supp
plot_admixture_varyGrowth.py
for NAm_anc in 2500 61659; do
for NAm_cur in 1110000 15984500 28000000; do
for EUr_anc in 16982 67608; do
for EUr_cur in 700000 2000000 9550000; do
file=~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95.txt
python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8
# average the simulation results for LD:
cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt
python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_averaged.txt
done
done
done
done
#########################
# Bayes factor analysis #
#########################
# simulate hard and soft sweeps under inferred admixture models with fixed pop sizes for north am. and eur.
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0_fixedPopSize_400SNPs_MS
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta10_fixedPopSize_400SNPs_MS
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0.01_fixedPopSize_400SNPs_MS
# compute Bayes factors.
num_sims=100000
for NAm in 1110000 15984500; do
for EUr in 700000; do
hard_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0.01_selection_True_MS_snps.txt
soft_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_10_selection_True_MS_snps.txt
outFile=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_BFs_numSim_${num_sims}.txt
python ~/Jensen_response/scripts/compute_Bayes_factors.py $hard_in $soft_in $outFile $num_sims
done
done