Skip to content

Commit

Permalink
fix a bug related to tidyr v.0.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrygosselin committed Sep 11, 2015
1 parent 9ac1742 commit 7d86ec9
Show file tree
Hide file tree
Showing 12 changed files with 49 additions and 61 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,5 @@ import(readr)
import(reshape2)
import(stringdist)
import(stringi)
import(tidyr)
importFrom(stringr,str_pad)
importFrom(stringr,str_sub)
3 changes: 1 addition & 2 deletions R/blacklist_id.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
#' @rdname missing_genotypes
#' @import reshape2
#' @import dplyr
#' @import tidyr
#' @author Thierry Gosselin \email{thierrygosselin@@icloud.com}

missing_genotypes <- function(haplotypes.file,
Expand All @@ -54,7 +53,7 @@ missing_genotypes <- function(haplotypes.file,
haplotype <- read_tsv(file = haplotypes.file, col_names = T) %>%
select(-Cnt) %>%
rename(LOCUS = `Catalog ID`) %>%
gather(INDIVIDUALS, HAPLOTYPES, -LOCUS)
tidyr::gather(INDIVIDUALS, HAPLOTYPES, -LOCUS)


# Whitelist-------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions R/erase_genotypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ erase_genotypes <- function(data, is.tidy.vcf, blacklist.genotypes, filename) {

# haplotypes file preparation
haplo.prep <- data %>%
gather(INDIVIDUALS, HAPLOTYPES, -c(LOCUS, Cnt)) %>%
tidyr::gather(INDIVIDUALS, HAPLOTYPES, -c(LOCUS, Cnt)) %>%
mutate(INDIVIDUALS = as.character(INDIVIDUALS))


Expand Down Expand Up @@ -195,7 +195,7 @@ erase_genotypes <- function(data, is.tidy.vcf, blacklist.genotypes, filename) {
new.file <- bind_rows(erase, keep) %>%
arrange(LOCUS, INDIVIDUALS) %>%
rename(`Catalog ID` = LOCUS) %>%
spread(INDIVIDUALS, HAPLOTYPES)
tidyr::spread(INDIVIDUALS, HAPLOTYPES)



Expand Down
14 changes: 6 additions & 8 deletions R/filter_all.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ filter_all <- function (haplotypes, vcf,

haplo <- read_tsv(haplotypes, col_names = T) %>%
rename(LOCUS =`Catalog ID`) %>%
gather(SAMPLES, HAPLOTYPES, -c(LOCUS, Cnt)) %>%
tidyr::gather(SAMPLES, HAPLOTYPES, -c(LOCUS, Cnt)) %>%
mutate(
POP_ID = str_sub(SAMPLES, pop.id.start, pop.id.end),
POP_ID = factor(POP_ID, levels = pop.levels, ordered = T)
Expand Down Expand Up @@ -167,30 +167,28 @@ filter_all <- function (haplotypes, vcf,
message("Tidying the VCF...")

vcf <- vcf.paralogs %>%
separate(INFO, c("N", "AF"), sep = ";", extra = "error") %>%
tidyr::separate(INFO, c("N", "AF"), sep = ";", extra = "error") %>%
mutate(
N = as.numeric(stri_replace_all_fixed(N, "NS=", "", vectorize_all=F)),
AF = stri_replace_all_fixed(AF, "AF=", "", vectorize_all=F)
) %>%
separate(AF, c("REF_FREQ", "ALT_FREQ"), sep = ",", extra = "error") %>%
tidyr::separate(AF, c("REF_FREQ", "ALT_FREQ"), sep = ",", extra = "error") %>%
mutate(
REF_FREQ = as.numeric(REF_FREQ),
ALT_FREQ = as.numeric(ALT_FREQ)
)
# Gather individuals in 1 colummn --------------------------------------------
vcf <- gather(vcf, INDIVIDUALS, FORMAT, -c(CHROM:ALT_FREQ))
vcf <- tidyr::gather(vcf, INDIVIDUALS, FORMAT, -c(CHROM:ALT_FREQ))

message("Gathering individuals in 1 column")

# Separate FORMAT and COVERAGE columns ---------------------------------------
message("Tidying the VCF...")

vcf <- vcf %>%
separate(FORMAT, c("GT", "READ_DEPTH", "ALLELE_DEPTH", "GL"),
tidyr::separate(FORMAT, c("GT", "READ_DEPTH", "ALLELE_DEPTH", "GL"),
sep = ":", extra = "error") %>%
# separate(GT, c("ALLELE_P", "ALLELE_Q"),
# sep = "/", extra = "error", remove = F) %>%
separate(ALLELE_DEPTH, c("ALLELE_REF_DEPTH", "ALLELE_ALT_DEPTH"),
tidyr::separate(ALLELE_DEPTH, c("ALLELE_REF_DEPTH", "ALLELE_ALT_DEPTH"),
sep = ",", extra = "error")

# Work with Mutate on CHROM and GL -------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion R/genotypes_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ genotypes_summary <- function(genotypes, markers, filter.monomorphic = TRUE, fil
) %>%
dcast(LOCUS + ONEMAP ~ INDIVIDUALS, value.var = "GENOTYPES") %>%
mutate(LOCUS = paste("*", LOCUS, sep="")) %>%
unite(MARKERS, LOCUS, ONEMAP, sep=" ", remove = T) %>%
tidyr::unite(MARKERS, LOCUS, ONEMAP, sep=" ", remove = T) %>%
mutate(MARKERS = paste(.[,1], .[,2], sep = "\t"))

progeny.names <- genotypes.file %>%
Expand Down
13 changes: 6 additions & 7 deletions R/haplo2colony.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ if(getRversion() >= "2.15.1") utils::globalVariables(c("Catalog ID", "Catalog.I
#' @rdname haplo2colony
#' @import reshape2
#' @import dplyr
#' @import tidyr
#' @import lazyeval
#' @importFrom stringr str_pad
#' @references Catchen JM, Amores A, Hohenlohe PA et al. (2011)
Expand Down Expand Up @@ -317,7 +316,7 @@ haplo2colony <- function(haplotypes.file,
variable.name = "Catalog.ID",
value.name = "HAPLOTYPES"
) %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
Expand Down Expand Up @@ -350,7 +349,7 @@ haplo2colony <- function(haplotypes.file,

frequency.markers <- suppressWarnings(
haplo.prep %>%
gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
tidyr::gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
select(-INDIVIDUALS, -ALLELE, -POP_ID) %>%
group_by(Catalog.ID) %>%
filter(NUCLEOTIDES != "NA") %>%
Expand Down Expand Up @@ -379,7 +378,7 @@ haplo2colony <- function(haplotypes.file,
frequency.markers <- suppressWarnings(
haplo.prep %>%
filter(POP_ID %in% allele.freq) %>%
gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
tidyr::gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
select(-INDIVIDUALS, -ALLELE, -POP_ID) %>%
group_by(Catalog.ID) %>%
filter(NUCLEOTIDES != "NA") %>%
Expand Down Expand Up @@ -857,7 +856,7 @@ haplo2colony <- function(haplotypes.file,
variable.name = "Catalog.ID",
value.name = "HAPLOTYPES"
) %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
Expand Down Expand Up @@ -891,7 +890,7 @@ haplo2colony <- function(haplotypes.file,

frequency.markers <- suppressWarnings(
haplo.imp %>%
gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
tidyr::gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
select(-INDIVIDUALS, -ALLELE, -POP_ID) %>%
group_by(Catalog.ID) %>%
filter(NUCLEOTIDES != "NA") %>%
Expand Down Expand Up @@ -920,7 +919,7 @@ haplo2colony <- function(haplotypes.file,
frequency.markers <- suppressWarnings(
haplo.imp %>%
filter(POP_ID %in% allele.freq) %>%
gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
tidyr::gather(Catalog.ID, NUCLEOTIDES, -c(INDIVIDUALS, POP_ID, ALLELE)) %>%
select(-INDIVIDUALS, -ALLELE, -POP_ID) %>%
group_by(Catalog.ID) %>%
filter(NUCLEOTIDES != "NA") %>%
Expand Down
9 changes: 4 additions & 5 deletions R/haplo2genepop.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ haplo2genepop <- function(haplotypes.file,
haplo.prep <- suppressWarnings(
haplo.filtered %>%
melt(id.vars = c("INDIVIDUALS", "POP_ID"), variable.name = "Catalog.ID", value.name = "HAPLOTYPES") %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
Expand Down Expand Up @@ -266,7 +266,7 @@ haplo2genepop <- function(haplotypes.file,
mutate(HAPLOTYPES = stri_pad_left(str = HAPLOTYPES, width = 3, pad = "0")) %>%
mutate(HAPLOTYPES = stri_replace_na(str = HAPLOTYPES, replacement = "000")) %>%
dcast(Catalog.ID + INDIVIDUALS + POP_ID ~ ALLELE, value.var = "HAPLOTYPES") %>%
unite(GENOTYPE, ALLELE1:ALLELE2, sep = "") %>%
tidyr::unite(GENOTYPE, ALLELE1:ALLELE2, sep = "") %>%
arrange(Catalog.ID) %>%
dcast(INDIVIDUALS + POP_ID ~ Catalog.ID, value.var = "GENOTYPE") %>%
arrange(POP_ID, INDIVIDUALS) %>%
Expand Down Expand Up @@ -472,7 +472,7 @@ haplo2genepop <- function(haplotypes.file,
haplo.imp <- suppressWarnings(
haplo.imp %>%
melt(id.vars = c("INDIVIDUALS", "POP_ID"), variable.name = "Catalog.ID", value.name = "HAPLOTYPES") %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
Expand Down Expand Up @@ -510,8 +510,7 @@ haplo2genepop <- function(haplotypes.file,
mutate(HAPLOTYPES = stri_pad_left(str = HAPLOTYPES, width = 3, pad = "0")) %>%
mutate(HAPLOTYPES = stri_replace_na(str = HAPLOTYPES, replacement = "000")) %>%
dcast(Catalog.ID + INDIVIDUALS + POP_ID ~ ALLELE, value.var = "HAPLOTYPES") %>%
# unite(GENOTYPE, ALLELE1:ALLELE2, sep = "/") %>%
unite(GENOTYPE, ALLELE1:ALLELE2, sep = "") %>%
tidyr::unite(GENOTYPE, ALLELE1:ALLELE2, sep = "") %>%
arrange(Catalog.ID) %>%
dcast(INDIVIDUALS + POP_ID ~ Catalog.ID, value.var = "GENOTYPE") %>%
arrange(POP_ID, INDIVIDUALS) %>%
Expand Down
9 changes: 4 additions & 5 deletions R/haplo2genind.R
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ haplo2genind <- function(haplotypes.file,
haplo.prep <- suppressWarnings(
haplo.filtered %>%
melt(id.vars = c("INDIVIDUALS", "POP_ID"), variable.name = "Catalog.ID", value.name = "HAPLOTYPES") %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
Expand Down Expand Up @@ -269,7 +269,7 @@ haplo2genind <- function(haplotypes.file,
mutate(HAPLOTYPES = stri_pad_left(str = HAPLOTYPES, width = 3, pad = "0")) %>%
mutate(HAPLOTYPES = stri_replace_na(str = HAPLOTYPES, replacement = "000")) %>%
dcast(Catalog.ID + INDIVIDUALS + POP_ID ~ ALLELE, value.var = "HAPLOTYPES") %>%
unite(GENOTYPE, ALLELE1:ALLELE2, sep = "/") %>%
tidyr::unite(GENOTYPE, ALLELE1:ALLELE2, sep = "/") %>%
arrange(Catalog.ID) %>%
dcast(INDIVIDUALS + POP_ID ~ Catalog.ID, value.var = "GENOTYPE") %>%
arrange(POP_ID, INDIVIDUALS)
Expand Down Expand Up @@ -419,7 +419,7 @@ haplo2genind <- function(haplotypes.file,
haplo.imp <- suppressWarnings(
haplo.imp %>%
melt(id.vars = c("INDIVIDUALS", "POP_ID"), variable.name = "Catalog.ID", value.name = "HAPLOTYPES") %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
Expand Down Expand Up @@ -457,8 +457,7 @@ haplo2genind <- function(haplotypes.file,
mutate(HAPLOTYPES = stri_pad_left(str = HAPLOTYPES, width = 3, pad = "0")) %>%
mutate(HAPLOTYPES = stri_replace_na(str = HAPLOTYPES, replacement = "000")) %>%
dcast(Catalog.ID + INDIVIDUALS + POP_ID ~ ALLELE, value.var = "HAPLOTYPES") %>%
# unite(GENOTYPE, ALLELE1:ALLELE2, sep = "/") %>%
unite(GENOTYPE, ALLELE1:ALLELE2, sep = "") %>%
tidyr::unite(GENOTYPE, ALLELE1:ALLELE2, sep = "") %>%
arrange(Catalog.ID) %>%
dcast(INDIVIDUALS + POP_ID ~ Catalog.ID, value.var = "GENOTYPE") %>%
arrange(POP_ID, INDIVIDUALS)
Expand Down
11 changes: 5 additions & 6 deletions R/read_stacks_haplotypes_vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#' @rdname read_stacks_haplotypes_vcf
#' @export
#' @import dplyr
#' @import tidyr
#' @import stringi

read_stacks_haplotypes_vcf <- function(haplotypes.vcf.file, pop.id.start, pop.id.end, pop.levels, filter, filename) {
Expand Down Expand Up @@ -44,7 +43,7 @@ read_stacks_haplotypes_vcf <- function(haplotypes.vcf.file, pop.id.start, pop.id
)%>%
select(-c(QUAL, FILTER, FORMAT)) %>%
rename(LOCUS = ID, CHROM = `#CHROM`) %>%
separate(INFO, c("N", "AF"), sep = ";", extra = "error") %>%
tidyr::separate(INFO, c("N", "AF"), sep = ";", extra = "error") %>%
mutate(
N = as.numeric(stri_replace_all_fixed(N, "NS=", "", vectorize_all=F)),
AF = stri_replace_all_fixed(AF, "AF=", "", vectorize_all=F)
Expand All @@ -55,11 +54,11 @@ read_stacks_haplotypes_vcf <- function(haplotypes.vcf.file, pop.id.start, pop.id
message("Gathering individuals in 1 column...")

vcf <- vcf %>%
gather(INDIVIDUALS, FORMAT, -c(CHROM:AF)) %>%
separate(FORMAT, c("GT", "READ_DEPTH"), sep = ":",
tidyr::gather(INDIVIDUALS, FORMAT, -c(CHROM:AF)) %>%
tidyr::separate(FORMAT, c("GT", "READ_DEPTH"), sep = ":",
extra = "error") %>%
separate(AF, c("REF_FREQ", stri_join("ALT_FREQ", seq(1, max(stri_count_fixed(.$ALT, pattern = ","))), sep = "_")), sep = ",", extra = "drop") %>%
separate(ALT, stri_join("ALT", seq(1, max(stri_count_fixed(.$ALT, pattern = ","))), sep = "_"), extra = "drop")
tidyr::separate(AF, c("REF_FREQ", stri_join("ALT_FREQ", seq(1, max(stri_count_fixed(.$ALT, pattern = ","))), sep = "_")), sep = ",", extra = "drop") %>%
tidyr::separate(ALT, stri_join("ALT", seq(1, max(stri_count_fixed(.$ALT, pattern = ","))), sep = "_"), extra = "drop")


message("Fixing columns...")
Expand Down
12 changes: 5 additions & 7 deletions R/read_stacks_vcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,30 +125,28 @@ read_stacks_vcf <- function(vcf.file, pop.id.start, pop.id.end, pop.levels, whit

# Make VCF tidy-----------------------------------------------------------------
vcf <- vcf %>%
separate(INFO, c("N", "AF"), sep = ";", extra = "error") %>%
tidyr::separate(INFO, c("N", "AF"), sep = ";", extra = "error") %>%
mutate(
N = as.numeric(stri_replace_all_fixed(N, "NS=", "", vectorize_all=F)),
AF = stri_replace_all_fixed(AF, "AF=", "", vectorize_all=F)
) %>%
separate(AF, c("REF_FREQ", "ALT_FREQ"), sep = ",", extra = "error") %>%
tidyr::separate(AF, c("REF_FREQ", "ALT_FREQ"), sep = ",", extra = "error") %>%
mutate(
REF_FREQ = as.numeric(REF_FREQ),
ALT_FREQ = as.numeric(ALT_FREQ)
)
# Gather individuals in 1 colummn --------------------------------------------
vcf <- gather(vcf, INDIVIDUALS, FORMAT, -c(CHROM:ALT_FREQ))
vcf <- tidyr::gather(vcf, INDIVIDUALS, FORMAT, -c(CHROM:ALT_FREQ))

message("Gathering individuals in 1 column")

# Separate FORMAT and COVERAGE columns ---------------------------------------
message("Tidying the VCF...")

vcf <- vcf %>%
separate(FORMAT, c("GT", "READ_DEPTH", "ALLELE_DEPTH", "GL"),
tidyr::separate(FORMAT, c("GT", "READ_DEPTH", "ALLELE_DEPTH", "GL"),
sep = ":", extra = "error") %>%
# separate(GT, c("ALLELE_P", "ALLELE_Q"),
# sep = "/", extra = "error", remove = F) %>%
separate(ALLELE_DEPTH, c("ALLELE_REF_DEPTH", "ALLELE_ALT_DEPTH"),
tidyr::separate(ALLELE_DEPTH, c("ALLELE_REF_DEPTH", "ALLELE_ALT_DEPTH"),
sep = ",", extra = "error")

# Work with Mutate on CHROM and GL -------------------------------------------
Expand Down
15 changes: 7 additions & 8 deletions R/summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ summary_haplotypes <- function(haplotypes.file,
# Import haplotype file ------------------------------------------------------
haplotype <- read_tsv(haplotypes.file, col_names = T) %>%
rename(LOCUS =`Catalog ID`) %>%
gather(INDIVIDUALS, HAPLOTYPES, -c(LOCUS, Cnt)) %>%
tidyr::gather(INDIVIDUALS, HAPLOTYPES, -c(LOCUS, Cnt)) %>%
mutate(
POP_ID = str_sub(INDIVIDUALS, pop.id.start, pop.id.end),
POP_ID = factor(POP_ID, levels = pop.levels, ordered = T)
Expand Down Expand Up @@ -296,13 +296,13 @@ summary_haplotypes <- function(haplotypes.file,
filter(HAPLOTYPES != "-") %>%
group_by(LOCUS, POP_ID) %>%
mutate(DIPLO= length(INDIVIDUALS) *2) %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = F
) %>%
mutate(ALLELE2 = ifelse(is.na(ALLELE2), ALLELE1, ALLELE2)) %>%
select(-Cnt, -HAPLOTYPES, -INDIVIDUALS) %>%
gather(ALLELE_GROUP, ALLELES, -c(LOCUS, POP_ID, DIPLO)) %>%
tidyr::gather(ALLELE_GROUP, ALLELES, -c(LOCUS, POP_ID, DIPLO)) %>%
group_by(LOCUS, POP_ID, ALLELES) %>%
summarise(
FREQ_ALLELES = length(ALLELES)/mean(DIPLO),
Expand Down Expand Up @@ -363,7 +363,7 @@ summary_haplotypes <- function(haplotypes.file,
pi.data <- haplo.filtered.paralogs %>%
select(-Cnt) %>%
filter(HAPLOTYPES != "-") %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
Expand All @@ -384,7 +384,7 @@ summary_haplotypes <- function(haplotypes.file,
message("Pi calculations by populations, take a break...")

pi.data.pop <- pi.data %>%
gather(ALLELE_GROUP, ALLELES, -c(LOCUS, INDIVIDUALS, POP_ID))
tidyr::gather(ALLELE_GROUP, ALLELES, -c(LOCUS, INDIVIDUALS, POP_ID))

df.split.pop <- split(x = pi.data.pop, f = pi.data.pop$POP_ID) # slip data frame by population
pop.list <- names(df.split.pop) # list the pop
Expand Down Expand Up @@ -457,12 +457,12 @@ summary_haplotypes <- function(haplotypes.file,
summary.prep <- haplo.filtered.consensus %>%
filter(HAPLOTYPES != "-") %>%
select(-Cnt, -INDIVIDUALS) %>%
separate(
tidyr::separate(
col = HAPLOTYPES, into = c("ALLELE1", "ALLELE2"),
sep = "/", extra = "drop", remove = T
) %>%
mutate(ALLELE2 = ifelse(is.na(ALLELE2), ALLELE1, ALLELE2)) %>%
gather(ALLELE_GROUP, ALLELES, -c(LOCUS, POP_ID))
tidyr::gather(ALLELE_GROUP, ALLELES, -c(LOCUS, POP_ID))

summary.pop <- summary.prep %>%
group_by(LOCUS, POP_ID) %>%
Expand Down Expand Up @@ -668,7 +668,6 @@ summary_hapstats <- function(data, pop.num, pop.col.types, pop.integer.equi, pop
POP_ID = factor(POP_ID, levels = pop.levels, ordered = T)
) %>%
arrange(LOCUS, POP_ID)
# separate(HAPLOTYPES, c("ALLELE_P", "ALLELE_Q"), sep = "/", extra = "error", remove = F) %>%
}


Expand Down
Loading

0 comments on commit 7d86ec9

Please sign in to comment.