-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscpred_train_model.R
executable file
·187 lines (174 loc) · 5.66 KB
/
scpred_train_model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/usr/bin/env Rscript
suppressPackageStartupMessages(require(optparse))
suppressPackageStartupMessages(require(workflowscriptscommon))
suppressPackageStartupMessages(require(scPred))
suppressPackageStartupMessages(require(Seurat))
suppressPackageStartupMessages(require(doParallel))
# Use principal component-projected data and selected features to train a specified classification model
option_list = list(
make_option(
c("-i", "--input-object"),
action = "store",
default = NA,
type = 'character',
help = 'Path to the input object of Seurat class in .rds format'
),
make_option(
c("-f", "--train-id"),
action = "store",
default = NA,
type = 'character',
help = 'ID of the training dataset (optional)'
),
make_option(
c("-m", "--model"),
action = "store",
default = 'svmRadial',
type = 'character',
help = 'Model type used for training. Must be one of the models supported by Caret package.
Default: svmRadial'
),
make_option(
c("-r", "--resample-method"),
action = "store",
default = 'cv',
type = 'character',
help = 'Resampling method used for model fit evaluation'
),
make_option(
c("-n", "--iter-num"),
action = "store",
default = 5,
type = 'numeric',
help = 'Number of resampling iterations. Default: 5'
),
make_option(
c("-s", "--random-seed"),
action = "store",
default = 123,
type = 'numeric',
help = 'Random seed'
),
make_option(
c("-p", "--allow-parallel"),
action = "store",
default = TRUE,
type = 'logical',
help = 'Should parallel processing be allowed? Default: TRUE'
),
make_option(
c("-c", "--num-cores"),
action = "store",
default = 1,
type = 'numeric',
help = 'For parallel processing, how many cores should be used?'
),
make_option(
c("-t", "--tune-length"),
action = "store",
default = 3,
type = 'numeric',
help = 'An integer denoting the amount of granularity in the tuning parameter grid'
),
make_option(
c("-a", "--metric"),
action = "store",
default = "ROC,PR,Accuracy,Kappa",
type = 'character',
help = "Performance metric to be used to select best model"
),
make_option(
c("-e", "--preprocess"),
action = "store",
default = "center,scale",
type = 'character',
help = "A string vector that defines a pre-processing of the predictor data. Enter values as comma-separated string. Current possibilities are
'BoxCox', 'YeoJohnson', 'expoTrans', 'center', 'scale', 'range', 'knnImpute', 'bagImpute', 'medianImpute'
'pca', 'ica' and 'spatialSign'. The default is 'center' and 'scale'."
),
make_option(
c("-u", "--return-data"),
action = "store",
default = FALSE,
type = 'logical',
help = 'If TRUE, training data is returned within scPred object. Default: FALSE'
),
make_option(
c("-v", "--save-predictions"),
action = "store",
default = "final",
type = 'character',
help = "Specifies the set of hold-out predictions for each resample that should be
returned. Values can be either 'all', 'final' or 'none'."
),
make_option(
c("-y", "--reclassify"),
action = "store",
default = NULL,
type = 'character',
help = "Cell types to reclassify using a different model"
),
make_option(
c("-o", "--output-path"),
action = "store",
default = NA,
type = 'character',
help = 'Path for the output scPred object in .rds format'
),
make_option(
c("-g", "--get-scpred"),
action = "store",
default = FALSE,
type = 'logical',
help = 'Should scpred object be extracted from Seurat object after model training? Default: FALSE'
),
make_option(
c("-d", "--train-probs-plot"),
action = "store",
default = NA,
type = 'character',
help = 'Path for training probabilities plot in .png format'
)
)
opt = wsc_parse_args(option_list, mandatory = c("input_object", "output_path"))
preprocess = wsc_split_string(opt$preprocess, ",")
metric = wsc_split_string(opt$metric, ",")
if(!is.null(opt$reclassify)){
cells_to_reclassify = wsc_split_string(opt$preprocess, ",")
} else {
cells_to_reclassify = NULL
}
data_seurat = readRDS(opt$input_object)
# model training step
clust = makePSOCKcluster(opt$num_cores)
registerDoParallel(clust)
classifier = trainModel(data_seurat,
seed = opt$random_seed,
model = opt$model,
resampleMethod = opt$resample_method,
number = opt$iter_num,
allowParallel = opt$allow_parallel,
preProcess = preprocess,
tuneLength = opt$tune_length,
metric = metric,
returnData = opt$return_data,
savePredictions = opt$save_predictions,
reclassify = NULL
)
stopCluster(clust)
if(opt$get_scpred){
classifier = get_scpred(classifier)
}
# plot class probs
if(!is.na(opt$train_probs_plot)){
png(opt$train_probs_plot)
print(plot_probabilities(classifier))
dev.off()
}
# add dataset field to the object
if(!is.na(opt$train_id)){
attributes(classifier)$dataset = opt$train_id
} else{
attributes(classifier)$dataset = NA
}
saveRDS(classifier, opt$output_path)