a simple pipeline to calculate LD_decay
/usr/share/plink_linux_x86_64_20181202/plink
vcftools --vcf 8indv_Kin_het05_27Nov2018.vcf --keep keepsichuan.txt --out 4kin_sichuan.vcf
cat ../vcffile/4kin_sichuan.vcf.recode.vcf | grep -v '#' | cut -f 1 | awk '{print $0 "\t" $0}' > 8kinscid.txt
#但是由于input文件一般很大(1G),如果不是对很多群体分别计算LD,可以pruning之后再做这步
#如果不清楚我在说什么就忽略这步
cp input output
./del_scaffolds.sh input output
vcftools --vcf ../vcffile/8indv_Kin_het05_27Nov2018.vcf --plink --chrom-map 8kinscid.txt --out 8indv_Kin
/usr/share/plink_linux_x86_64_20181202/plink --file test3 --allow-extra-chr --indep-pairwise 50 5 0.5 --out /usr/share/plink_linux_x86_64_20181202/plink --file test3 --allow-extra-chr --indep-pairwise 1 kb 1 0.5 --out
#这步时间很长,可能6h cp plink.prune.in plink.prune.in.del ./del_scaffolds.sh plink.prune.in plink.prune.in.del
/usr/share/plink_linux_x86_64_20181202/plink --file test3 --extract plink.prune.in.del --make-bed --out prunedtest3data --allow-extra-chr
#--maf (minor allele frequency) #--geno (SNP missing rate) /usr/share/plink_linux_x86_64_20181202/plink --allow-extra-chr --bfile prunedtest3data --allow-no-sex --maf 0.5 --geno 0.01 --r2 --ld-window-kb 500 --ld-window 500000 --ld-window-r2 0 --out test3ld_out
cat test1216pruneddata.ld | grep -v 'CHR_A' | awk '{print ($5-$2) "\t" $7}' | sort -n -k 1 > test1216pruneddata.ld.sort
python compress_ld.py test1216pruneddata.ld.sort test1216pruneddata.ld.sort.cp