-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathbatch_correct.R
35 lines (25 loc) · 1 KB
/
batch_correct.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
library(argparser)
p = arg_parser("Normalize training and test set")
p = add_argument(p, "train", help = "training matrix")
p = add_argument(p, "test", help = "testing matrix")
p = add_argument(p, "prefix", help = "prefix to write files")
argv = parse_args(p)
argv = parse_args(p, c('data/raw/cells.rds', 'data/raw/bortezomib.rds', 'debug'))
cell = readRDS(argv$train)
clin = readRDS(argv$test)
cell = cell[, !apply(is.na(cell), 2L, any)]
clin = clin[, !apply(is.na(clin), 2L, any)]
genes = intersect(colnames(cell), colnames(clin))
stopifnot(length(genes) > 0L)
cell = cell[, genes]
clin = clin[, genes]
suppressPackageStartupMessages(library(sva))
batches = rep(c('cell', 'clin'), times = c(nrow(cell), nrow(clin)))
batches = as.factor(batches)
cell_clin = t(rbind(cell, clin))
cell_clin = ComBat(cell_clin, batches)
cell_clin = t(cell_clin)
cell = cell_clin[batches == 'cell', ]
clin = cell_clin[batches == 'clin', ]
saveRDS(cell, paste0(argv$prefix, "cells.rds"))
saveRDS(clin, paste0(argv$prefix, "clinical.rds"))