documentation_Jensen_response_publication.doc~

#####################################################################################
# Measure H12 in 10KB, 400SNP  windows under neutrality for ALL the different demographic models
####################################################################################
qsub ~/Jensen_response/scripts/qsub_constNe10e6_theta0
qsub ~/Jensen_response/scripts/qsub_constNe2.7e6_theta0
qsub ~/Jensen_response/scripts/qsub_dadi1_theta0
qsub ~/Jensen_response/scripts/qsub_dadi2_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_bottleneck_mode_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_posterior_95CI_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_posterior_95CI_Harris_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_posterior_95CI_Harris_theta0_rho5e-7
qsub ~/Jensen_response/scripts/qsub_admixture_mode_Garud_2015_theta0
qsub ~/Jensen_response/scripts/qsub_admixture_bot_mode_Garud_2015_theta0
qsub ~/Jensen_response/scripts/qsub_Arguello


##########################################
# Compute long range LD  for simulations #
##########################################

# Check if there are 8 entries
for model in admixture_bot_mode admixture_mode admixture_posterior constNe10e6 constNe2.7e6 dadi1 dadi2 admixture_posterior_Harris admixture_mode_Garud2015 admixture_bot_mode_Garud2015 Arguello; do 
    
    echo $model

    # check that 8 fields were printed
    python ~/Jensen_response/scripts/count_fields_any.py ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95.txt tmp.txt 8

    # average the simulation results for LD:
    cat tmp.txt | head -10000000 | sort -k4,4 -g >  ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95_sorted.txt

    python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/${model}_neutrality_locusLen11000_LD_0.05_0.95_averaged.txt

done 


###########################
# Long range LD DGRP data #
###########################
cat ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr2R.txt ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr2L.txt ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr3R.txt ~/Jensen_response/analysis/DGRP_LD/DGRP_LD_1000_50_2_2_0_chr3L.txt | sort -k2,2 -g > alldataSorted    

python ~/Jensen_response/scripts/averageLD_realData.py alldataSorted alldata_averaged    

cat alldata_averaged | sort -k1,1 -g > alldata_averaged_sorted 


#############################################################
# compute Pi, S, TajD statistics from short introns in DGRP #
#############################################################
python ~/Jensen_response/scripts/DaDi_compute_S_Pi_TajD.py


#####################################################################
# plot S, Pi, TajD, H12, and H2/H1 for simulations vs the real data #
#####################################################################
python ~/Jensen_response/scripts/plot_original_models.py
# note that this script depends on the experiments done below

########################################################## 
# experiments to find a well-fitting demographic model   #
##########################################################

#############
# migration #
#############

qsub_admixture_mode_theta0_migration_400SNPs

#plot_admixture_migration.py

for m_NA in 0 0.1 0.25 0.5 0.75; do
    
    file=~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95.txt

    python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8

    # average the simulation results for LD:
    cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95_sorted.txt

    python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_migration_mNA${m_NA}_mNE${m_NA}_rho_5e-9_theta_0_selection_False_LD_0.05_0.95_averaged.txt

done


##############################
# vary admixture proportions #
##############################

qsub_admixture_mode_theta0_diffProps_400SNPs

#plot_admixture_diffProps.py

for prop in 0 0.1 0.25 0.4 0.5 0.75 0.85 0.9; do
    
    file=~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95.txt

    python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8

    # average the simulation results for LD:
    cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt

    python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_migration_diffProps${prop}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_averaged.txt
    
done


########################## 
# fixed population sizes #
##########################

qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0_fixedPopSize_400SNPs_MS

#plot_admixture_fixedPopSize.py

# also test hard vs soft sweeps for fixed sizes (make BF plots)

# scenarios we want: 
NAm=1110000
EUr=700000

qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta10_fixedPopSize_400SNPs_MS 
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0.01_fixedPopSize_400SNPs_MS 


# what do the trajectories look like for adaptive mutations
bash run_admixture_mode_fixedPopSize_400SNPs_MS_trajectory.sh

#plot the trajectories
python plot_trajectories.py

#try conditoining on the ending frequency of a sweep
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta10_fixedPopSize_400SNPs_MS_conditionEndFreq 
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0.01_fixedPopSize_400SNPs_MS_conditionEndFreq


#num_sims=20000
#for NAm in 61659 1110000 15984500; do
#    for EUr in 500000 2000000; do 

num_sims=100000
for NAm in 1110000 15984500; do
    for EUr in 700000; do 

    hard_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0.01_selection_True_MS_snps.txt
    soft_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_10_selection_True_MS_snps.txt
    outFile=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_BFs_numSim_${num_sims}.txt
    python ~/Jensen_response/scripts/compute_Bayes_factors.py $hard_in $soft_in $outFile $num_sims
    done
done


for NAm in 2500 61659 1110000 15984500 28000000; do
    echo 'NAm'
    echo $NAm
    for EUr in 16982 67608 700000 2000000 9550000; do 
	echo 'EUr'
	echo $EUr

    file=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95.txt

    python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8

    # average the simulation results for LD:
    cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt

    python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_averaged.txt

    done 
done


#####################
# vary growth rates #
#####################

#qsub_admixture_mode_theta0_varyGrowth_400SNPs_MS
qsub_admixture_mode_theta0_varyGrowth_400SNPs_MS_for_supp

plot_admixture_varyGrowth.py

for NAm_anc in 2500 61659; do
    for NAm_cur in 1110000 15984500 28000000; do
	for EUr_anc in 16982 67608; do
	    for EUr_cur in 700000 2000000 9550000; do
    
    file=~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95.txt

    python ~/Jensen_response/scripts/count_fields_any.py $file tmp.txt 8

    # average the simulation results for LD:
    cat tmp.txt | head -10000000 | sort -k4,4 -g > ~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt

    python ~/Jensen_response/scripts/averageLD_realData2.py ~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_sorted.txt ~/Jensen_response/analysis/Admixture_mode_varyGrowth_NAm_anc${NAm_anc}_NAm_cur${NAm_cur}_EUr_anc${EUr_anc}_EUr_cur${EUr_cur}_rho_5e-9_theta_0_selection_False_MS_LD_0.05_0.95_averaged.txt
    
done
done
done
done


#########################
# Bayes factor analysis #
#########################

# simulate hard and soft sweeps under inferred admixture models with fixed pop sizes for north am. and eur. 
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0_fixedPopSize_400SNPs_MS
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta10_fixedPopSize_400SNPs_MS 
qsub ~/Jensen_response/scripts/qsub_admixture_mode_theta0.01_fixedPopSize_400SNPs_MS 

# compute Bayes factors. 

num_sims=100000
for NAm in 1110000 15984500; do
    for EUr in 700000; do 

    hard_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_0.01_selection_True_MS_snps.txt
    soft_in=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_theta_10_selection_True_MS_snps.txt
    outFile=~/Jensen_response/analysis/Admixture_mode_fixedPopSize_NAm${NAm}_EUr${EUr}_rho_5e-9_BFs_numSim_${num_sims}.txt
    python ~/Jensen_response/scripts/compute_Bayes_factors.py $hard_in $soft_in $outFile $num_sims
    done
done