-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvcf2phase.r
66 lines (44 loc) · 1.81 KB
/
vcf2phase.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
## Convert VCF format to PHASE format
# numindivs
# numloci
# space-delim loci positions
# genotype in 0 or 1
###################################################
################# IMPORTANT NOTE ##################
###################################################
# The multiVCF that we feed this script
# must have "#CHROM" replaced with "CHROM".
# So run this: sed 's/#CHROM/CHROM/' in.vcf > out.vcf
###################################################
################# LOAD LIBARIES ###################
###################################################
library(stringr)
###################################################
################ DEFINE FUNCTIONS #################
###################################################
## Given a multiVCF, get the genotypes
genoGet <- function(dataset) {
## REMOVE FIRST NINE COLUMNS FROM THE MULTIVCFs
data <- dataset[-c(1:9)]
geno <- as.data.frame(sapply(data, function(x) str_extract(x, "[0123456789]")))
}
## Given a list, print each element to new line of file
listWriter <- function(list, filename) {
length(list)
for (i in 1:length(list)) {
write.table(megalist[[i]], filename, append = TRUE, quote = FALSE, row.names = FALSE, col.names = FALSE)
}
}
###################################################
################## READ IN DATA ###################
###################################################
## READ IN THE Pf AND Pv MULTIVCFs
pf <- read.table("../cambodiaWGS/pv/variants/our_goods_UG.pass.vcf", comment.char="#", header=FALSE)
chr06 <- pf[pf$V1 == "Pv_Sal1_chr06",]
genos <- genoGet(chr06)
megalist <- list()
megalist$nsam <- ncol(genos)
megalist$ngenos <- nrow(genos)
megalist$pos <- paste("P", paste(chr06$V2, collapse = " "))
megalist <- c(megalist, as.list(apply(genos, 2, paste, collapse = "")))
listWriter(megalist, "chr06.txt")