-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsample.R
executable file
·100 lines (70 loc) · 3.02 KB
/
sample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env Rscript
'sample
Usage:
sample -b <id> -f <pattern> [-n <n>] -o <file>
Options:
-h --help Show this screen.
-b <id> --batch_id=<id> Batch ID.
-f <pattern> --pattern=<pattern> Regular expression - only csv\'s.
-n <n> --replicates=<n> Number of replicates to select per plate map.
-o <file> --output=<file> Output file - either csv or rds.' -> doc
suppressWarnings(suppressMessages(library(docopt)))
suppressWarnings(suppressMessages(library(dplyr)))
suppressWarnings(suppressMessages(library(magrittr)))
suppressWarnings(suppressMessages(library(readr)))
opts <- docopt(doc)
batch_id <- opts[["batch_id"]]
pattern <- opts[["pattern"]]
replicates <- opts[["replicates"]]
output <- opts[["output"]]
backend_dir <- paste("../..", "backend", batch_id, sep = "/")
metadata_dir <- paste("../..", "metadata", batch_id, sep = "/")
file_list <- list.files(backend_dir,
pattern = pattern,
recursive = T, full.names = T)
if (!is.null(replicates)) {
# get the list of plates that retrieved using the pattern
plate_list_retrieved <-
lapply(file_list, function(file) head(tail(stringr::str_split(file, "/")[[1]], 2), 1)) %>%
unlist() %>%
data_frame(Assay_Plate_Barcode = .)
replicates <- as.integer(replicates)
# create a plate_list based on number of replicates to be selected
plate_list <-
suppressMessages(readr::read_csv(paste(metadata_dir, "barcode_platemap.csv", sep = "/"),
col_types = cols(Assay_Plate_Barcode = col_character(),
Plate_Map_Name = col_character()))) %>%
select(Assay_Plate_Barcode, Plate_Map_Name) %>%
inner_join(plate_list_retrieved, by = "Assay_Plate_Barcode") %>%
group_by(Plate_Map_Name) %>%
arrange(Assay_Plate_Barcode) %>%
mutate(replicate_id = dense_rank(Assay_Plate_Barcode)) %>%
filter(replicate_id %in% seq(replicates)) %>%
ungroup() %>%
select(Assay_Plate_Barcode) %>%
magrittr::extract2("Assay_Plate_Barcode")
# filter file_list based on plate_list
file_list <-
lapply(file_list,
function(file) {
if(length(unlist(lapply(plate_list, function(plate) grep(plate, file))))) {
file
}
}
) %>%
unlist()
}
futile.logger::flog.info(sprintf("Reading %d files...:\n%s", length(file_list), paste(file_list, collapse="\n")))
df <- file_list %>%
lapply(function(x) suppressMessages(readr::read_csv(x))) %>%
bind_rows()
futile.logger::flog.info(sprintf("Output contains %d rows.", nrow(df)))
if (tools::file_ext(output) == "rds") {
saveRDS(df, output)
} else if (tools::file_ext(output) == "csv"){
readr::write_csv(df, output)
} else if (tools::file_ext(output) == "feather"){
feather::write_feather(df, output)
} else {
stop(paste0("Unsupported file extension: ", tools::file_ext(output)))
}