-
Notifications
You must be signed in to change notification settings - Fork 12
/
plot-spc-extended.R
475 lines (429 loc) · 20.9 KB
/
plot-spc-extended.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
################################################################################
## Helper functions to gather spectra, corresponding x-value vectors,
## metadata and measure columns (e.g. chemical reference data) from tibble
## list-columns into a single data.table or a list of data.tables conntaining
## long form data directly to be used for customized ggplot2 plotting functions
################################################################################
# bind a list-column in a tibble to a list of data.tables ----------------------
#' @title Bind list-columns within a tibble into a list of data.tables
#' @description Bind one to many list-columns in spectral tibble into a list
#' of data.tables.
#' @param spc_tbl Spectral data in a tibble data frame (classes "tibble_df",
#' "tbl" and "data.frame").
#' @param lcols Character vector of column names of list-columns to be bound
#' into a list of data.tables
#' @param spc_id Character vector denoting column name for a unique spectrum ID.
#' Default is \code{"unique_id"}.
#' @param group_id Character vector denoting column name for the spectrum group
#' ID. Default is \code{"sample_id"}. The group ID can later be used for
#' plotting spectra by group (e.g. by using different colors or panels).
#' @return A list of data.tables. Elements contain data from list-columns
#' specified in \code{lcols} argument as data.tables. All data.tables contain in
#' addition \code{spc_id} and \code{group_id} columns.
#' @export
bind_lcols_dts <- function(spc_tbl, lcols,
spc_id = "unique_id",
group_id = "sample_id") {
# todo: add warning for lcols not present in spc_tbl
which_bind <- colnames(spc_tbl) %in% lcols
lcols_to_bind <- colnames(spc_tbl)[which_bind]
names(lcols_to_bind) <- lcols_to_bind
dts <- purrr::map(lcols_to_bind,
function(y) {
if (is.list(spc_tbl[, y][[y]])) {
# todo: Test if number of columns is equal in each data.frame or matrix
# of the list-(column); if not, return a comprehensible error
data.table::data.table(do.call(rbind, spc_tbl[, y][[y]]))
} else if (is.atomic((spc_tbl[, y][[y]]))) {
data.table::data.table(spc_tbl[, y])
}
}
)
# Append IDs to data.tables in list
spc_id <- spc_tbl[, spc_id][[spc_id]]
lcol_types <- purrr::imap(dts, ~ rep(.y, nrow(spc_tbl)))
group_id <- as.character(spc_tbl[, group_id][[group_id]])
# Return list of data.tables
purrr::imap(dts, function(dt, nm) {
dt[, `:=` (spc_id = spc_id, group_id = group_id)]
dt[, `:=` (lcol_type = lcol_types[[nm]])]}
)
}
# Convert list of wide form data.tables into long form -------------------------
dts_to_long <- function(spc_tbl, lcols,
spc_id = "unique_id",
group_id = "sample_id",
variable_name = "variable",
value_name = "value") {
dts <- bind_lcols_dts(spc_tbl = spc_tbl, lcols = lcols,
spc_id = spc_id, group_id = group_id)
# Convert list of data.tables into long form
dts_long <- purrr::map(dts, function(x) {
data.table::melt(
x,
id.vars = c("spc_id", "lcol_type", "group_id"),
variable.factor = FALSE,
variable.name = variable_name,
value.name = value_name
)}
)
# Append unique id (idx lg:= index 'long') for long form
purrr::imap(dts_long,
function(dt_long, nm) {
dt_long[, `:=` (id_long = 1:nrow(dt_long))]
}
)
}
# Match the spectra list columns and corresponding xunit list columns ----------
match_lcols <- function(spc_tbl, lcols) {
# Determine to which spectrum types list-columns belong
lcols_spc_all <- c("spc", "spc_rs", "spc_mean", "spc_nocomp", "sc_sm",
"sc_rf", "spc_pre")
xvalue_lookup <- list(
"spc" = c("wavenumbers", "wavelengths"),
"spc_rs" = c("wavenumbers_rs", "wavelengths_rs"),
"spc_mean" = c("wavenumbers_rs", "wavelengths_rs"),
"spc_nocomp" = c("wavenumbers", "wavelengths"),
"sc_sm" = c("wavenumbers_sc_sm"),
"sc_rf" = c("wavenumbers_sc_rf"),
"spc_pre" = c("xvalues_pre")
)
# Create character vector of spectra type names
spc_matched <- lcols[lcols %in% lcols_spc_all]
spc_matched <- spc_matched[order(match(spc_matched, lcols_spc_all))]
# Create vector of corresponding xunit types in predefined order
xvalues <- unlist(xvalue_lookup[spc_matched])
xvalues_matched <- colnames(spc_tbl)[colnames(spc_tbl) %in% xvalues]
xvalues_matched <- xvalues_matched[order(match(xvalues_matched, xvalues))]
# Return all matches as list of character vectors for spectra and x-values
list(
spc_matched = spc_matched,
xvalues_matched = xvalues_matched
)
}
# Create a list of long form data.tables containing spectra and x-values for
# a set of spectral types ------------------------------------------------------
tolist_spc_xvalues <- function(spc_tbl, lcols_spc,
spc_id = "unique_id",
group_id = "sample_id",
variable_name = "variable",
value_name = "value") {
lcols_matched <- match_lcols(spc_tbl = spc_tbl, lcols = lcols_spc)
# Check if length of matched spectra and xunits is equal and if not,
# return an error
spc_types <- lcols_matched[["spc_matched"]]
xunit_types <- lcols_matched[["xvalues_matched"]]
# Gather different spectra types into list of data.tables
spc_dts <- dts_to_long(spc_tbl = spc_tbl,
lcols = spc_types, spc_id = spc_id, group_id = group_id,
variable_name = "spc_variable", value_name = "spc_value")
# Gather corresponding xunit types into list of data.tables
xvalues_dts <- dts_to_long(spc_tbl = spc_tbl,
lcols = xunit_types, spc_id = spc_id, group_id = group_id,
variable_name = "xvalues_variable", value_name = "xvalues_value")
# Rename lcol_type to spc_type only for spectra data.tables
spc_dts <- purrr::map(spc_dts,
function(x) data.table::setnames(x, "lcol_type", "spc_type"))
# Return data.tables in nested list
list(
"spc_dts" = spc_dts,
"xvalues_dts" = xvalues_dts
)
}
# Merge data tables of spectra, xunits, metadata and measured variables
# into a single long form data.table -------------------------------------------
#' @title Merge list-columns of spectra, x-axis values, metadata and additional
#' measured variables into a single long form data.table
#' @description Helper function that merges all spectra and related data into
#' a single long form data.table than can subsequently be used for plotting.
#' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata
#' and eventual measured variables as list-columns.
#' @param lcols_spc Character vector of spectral list-columns to be extracted.
#' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).
#' @param lcol_measure Character vector of length 1 denoting the column name
#' of the measure columns. This argument is optional. Default is \code{NULL},
#' which does not extract an additional measure column.
#' @param spc_id Character vector of column that contains a unique spectral
#' identifier for all spectra. Default is \code{"unique_id"}.
#' @param group_id Character vector of columns that is used assigning spectra
#' into groups. Default is \code{"sample_id"}. The \code{group_id} can be
#' used for later plotting and thereby visually separating spectral groups into
#' using different colors or panels.
#' @return A single data.table containing long form aggregated data of
#' spectra, x-axis values, metadata and an additionally measured variable.
#' @export
merge_dts <- function(spc_tbl,
lcols_spc = c("spc", "spc_pre"), lcol_measure = NULL,
spc_id = "unique_id",
group_id = "sample_id") {
id_long <- NULL
spc_xvalues <- tolist_spc_xvalues(spc_tbl = spc_tbl,
lcols_spc = lcols_spc, spc_id = spc_id, group_id = group_id)
# Set keys for merging list of data.tables for spectra and xunits
purrr::imap(
spc_xvalues,
function(dts, nm) purrr::map(dts[[nm]],
function(x) data.table::setkey(x = x, spc_id, id_long, group_id))
)
spc_xvalues <- purrr::map2(spc_xvalues[["spc_dts"]],
spc_xvalues[["xvalues_dts"]], merge)
# Bind metadata if present, and set keys for merging metadata to spectra
metadata <- bind_lcols_dts(spc_tbl = spc_tbl,
lcols = "metadata", spc_id = spc_id, group_id = group_id)
dts <- list(
"data" = spc_xvalues,
"metadata" = rep(metadata, length(spc_xvalues))
)
if (length(metadata) == 0) dts$metadata <- NULL
# Convert a "measure" tibble column (numeric|character) to list of data.tables
if (!is.null(lcol_measure)) {
measure <- bind_lcols_dts(spc_tbl = spc_tbl,
lcols = lcol_measure, spc_id = spc_id, group_id = group_id)
dts$measure <- rep(measure, length(spc_xvalues))
}
# Set keys (common columns), merge metadata with spectral data (list of
# data tables) and combine into a single data.table that is returned
purrr::imap(dts,
function(dt, nm) lapply(dts[[nm]],
function(x) data.table::setkey(x = x, spc_id, group_id))
)
# Merge multiple data.table by common keys
# https://gist.github.com/reinholdsson/67008ee3e671ff23b568
data.table::rbindlist(
lapply(seq_along(dts[[1]]),
function(i) Reduce(merge, lapply(dts, `[[`, i)))
)
}
# Wrapper function around merge_dts for list of tibbles to aggregate data for
# plotting ---------------------------------------------------------------------
#' @title Wrapper function around \code{merge_dts()} for list of tibbles to
#' aggregate data for plotting.
#' @description Instead of a single spectral tibble (data frame) multiple
#' spectral tibbles can be merged into a long-form data.table for plotting
#' spectra and related data. For details, see
#' \code{\link{merge_dts}}.
#' @param spc_tbl_l List of spectral tibbles (data frames).
#' @param lcols_spc Character vector of spectral list-columns to be extracted.
#' Default is \code{c("spc", "spc_pre")} (raw and preprocessed spectra).
#' @param lcol_measure Character vector of length 1 denoting the column name
#' of the measure columns. This argument is optional. Default is \code{NULL},
#' which does not extract an additional measure column.
#' @param spc_id Character vector of column that contains a unique spectral
#' identifier for all spectra. Default is \code{"unique_id"}.
#' @param group_id Character vector of columns that is used assigning spectra
#' into groups. Default is \code{"sample_id"}. The \code{group_id} can be
#' used for later plotting and thereby visually separating spectral groups into
#' using different colors or panels.
#' @return A single data.table containing long form aggregated data of
#' spectra, x-axis values, metadata and an additionally measured variable.
#' An additional column called \code{group_id_tbl} is appended. It denotes
#' the name of the spectral tibble supplied with the list \code{spc_tbl_l}.
#' @export
merge_dts_l <- function(spc_tbl_l,
lcols_spc = c("spc", "spc_pre"),
lcol_measure = NULL,
spc_id = "unique_id",
group_id = "sample_id") {
group_id_tbl <- NULL
dts <- lapply(seq_along(spc_tbl_l),
function(i) merge_dts(spc_tbl = spc_tbl_l[[i]],
lcols_spc = lcols_spc, lcol_measure = lcol_measure,
spc_id = spc_id, group_id = group_id))
dts <- lapply(seq_along(dts),
function(i) dts[[i]][, group_id_tbl := names(spc_tbl_l[i])])
data.table::rbindlist(dts)
}
## Create plotting functions based on complete long data.table =================
# Function that reorders factor column in data.table based on ascending numeric
# order when converted to numeric type
# https://stackoverflow.com/questions/15665535/reorder-factors-numerically-in-a-data-frame
# ------------------------------------------------------------------------------
reorder_factor_num <- function(dt, column = "group_id") {
group_id <- NULL
dt[, group_id := as.factor(group_id)]
sorted_labels <- paste(sort(as.numeric(levels(dt$group_id))))
dt$group_id <- factor(dt$group_id, levels = sorted_labels)
dt
}
# Custom ggplot2 labeller for spectra types ------------------------------------
relabel_spc_types <- function(lb_sc_sm = "Reflectance sample (<ScSm>)",
lb_sc_rf = "Reflectance reference (<ScRf>)",
lb_ig_sm = "Interferogram sample (<IgSm>)",
lb_ig_rf = "Interferogram reference (<IgRf>)",
lb_spc_nocomp = "Abs. before atm. comp.",
lb_spc = "Absorbance",
lb_spc_rs = "Resampled Abs.",
lb_spc_mean = "Mean Abs.",
lb_spc_pre = "Preprocessed Abs.") {
ggplot2::as_labeller(
x = c(
"sc_sm" = lb_sc_sm,
"sc_rf" = lb_sc_rf,
"ig_sm" = lb_ig_sm,
"spc_nocomp" = lb_spc_nocomp,
"spc" = lb_spc,
"spc_rs" = lb_spc_rs,
"spc_mean" = lb_spc_mean,
"spc_pre" = lb_spc_pre
)
)
}
# Main spectra explorative analysis and diagnostics plotting function ----------
#' @title ggplot2 wrapper for extended spectra plotting
#' @description \code{plot_spc_ext} is a custom plotting function developed
#' within the simplerspec framework. Returns plots based on ggplot2
#' (class "ggplot"). Different spectra types such as raw or preprocessed spectra
#' and groups can be differentiated by different colors or by using panels
#' (so called facets). Additionally, spectra can be colored based on an
#' additional measure variable, e.g. determined by chemical reference analysis.
#' @param spc_tbl Tibble data frame containing spectra, x-axis values, metadata
#' and eventual measured variables as list-columns.
#' @param spc_tbl_l List of spectral tibbles (data frames). Default is
#' \code{NULL} (argument is not used).
#' @param lcols_spc Character vector of spectral list-columns to be extracted.
#' Default is \code{"spc"} (raw spectra).
#' @param lcol_measure Character vector of length 1 denoting the column name
#' of the measure columns. This argument is optional. Default is \code{NULL},
#' which does not extract an additional measure column.
#' @param lcol_measure_col_palette Palette value supplied to
#' `ggplot::scale_colour_brewer()`. Default is `"Spectral"`, but you can set
#' it to the default argument `1` (will use
#' `scale_colour_brewer(..., palette = 1)`).
#' @param lcol_measure_col_direction Sets the the order of colours in the scale
#' that is based on a measure column. Default is \code{-1} which reverses the
#' scale. Argument is passed on to the function `ggplot2::sclae_colour_brewer()`
#' as argument `direction`.
#' @param spc_id Character vector denoting column name for a unique spectrum ID.
#' Default is \code{"unique_id"}.
#' @param group_id Character vector denoting column name for the spectrum group
#' ID. Default is \code{"sample_id"}. The group ID is used for
#' plotting spectra by group (e.g. by using different colors or panels).
#' @param group_id_order Logical that specifies whether the panel names
#' derived from a numeric \code{group_id} column are reordered using ascending
#' numbers. Default is \code{TRUE}.
#' @param group_color Logical defining whether spectra are colored by the column
#' specified by \code{group_id}.
#' @param group_color_palette Character (1L) defining the diverging colour
#' scales from colorbrewer.org; see `?scale_colour_brewer` for supported
#' diverging colur types (`palette` argument).
#' @param group_panel Logical defining whether spectra are arranged into panels
#' by groups specified in \code{group_id}. Default is \code{TRUE}.
#' @param group_legend Logical defining whether a legend for the \code{group_id}
#' is plotted. Default is \code{FALSE}.
#' @param ncol Integer vector of length 1. Defines number of columns when
#' plotting panels (facets). Default is \code{NULL} (argument not used).
#' @param relabel_spc Logical defining whether panels are relabeled with custom
#' names for spectra types. Default is TRUE. When \code{TRUE}, arguments
#' from \code{relabel_spc_types} can be passed to \code{plot_spc_ext}
#' (supported via the \code{...} (ellipsis) argument)
#' @param ylab Character vector or vector of type \code{"expression"} created by
#' mathematical expression created by \code{expression}. Custom annotation for
#' y-axis of spectra
#' @param alpha Integer of length 1, from 0 to 1. Defines transparency of
#' spectral lines. Default is \code{0.5} (0 is completely transparent and
#' 1 is no transparency).
#' @param line_width Numeric vector of length 1 specifying the width of the
#' spectral lines. Default is \code{0.2}.
#' @param ... Further arguments to be passed to \code{plot_spc_ext}. Currently,
#' arguments of \code{relabel_spc_types} are supported.
#' @return Object of class \code{"ggplot"} (ggplot2 graph).
#' @export
plot_spc_ext <- function(spc_tbl, spc_tbl_l = NULL,
lcols_spc = "spc",
lcol_measure = NULL,
lcol_measure_col_palette = "Spectral",
lcol_measure_col_direction = -1,
spc_id = "unique_id",
group_id = "sample_id", group_id_order = TRUE,
group_color = TRUE,
group_color_palette = NULL,
group_panel = TRUE,
group_legend = FALSE,
ncol = NULL,
relabel_spc = TRUE,
ylab = "Spectrum value",
alpha = 0.5, line_width = 0.2,
# Further arguments to be passed to functions called
# within this function
...) {
# Merge spectral data, additional (measurement data) and metadata into a
# single long-form data.table
if (!is.null(spc_tbl_l)) {
dt <- merge_dts_l(spc_tbl_l = spc_tbl_l,
lcols_spc = lcols_spc, lcol_measure = lcol_measure,
spc_id = spc_id, group_id = group_id) # see merge_dts_l wrapper function
} else {
dt <- merge_dts(spc_tbl = spc_tbl,
lcols_spc = lcols_spc, lcol_measure = lcol_measure,
spc_id = spc_id, group_id = group_id)
}
# Option to order originally numeric group_id factors by group
if (is.null(spc_tbl_l)) {
if (group_id_order && is.numeric(dplyr::pull(spc_tbl, !!group_id))) {
dt <- reorder_factor_num(dt = dt, column = "group_id")
}
}
brk <- pretty(dt[["xvalues_value"]], n = 10) # Pretty x-axis breaks
p <- ggplot2::ggplot(data = dt,
ggplot2::aes_string(x = "xvalues_value", y = "spc_value"))
if (group_color == TRUE && is.null(lcol_measure)) {
p <- p +
ggplot2::geom_line(ggplot2::aes_string(colour = "group_id",
group = "spc_id"),
alpha = alpha, size = line_width)
if (!is.null(group_color_palette)) {
p <- p +
ggplot2::scale_colour_brewer(type = "div",
palette = group_color_palette, direction = -1)
}
if (group_legend == FALSE) {
p <- p + ggplot2::guides(colour = FALSE)
}
} else if (group_color == FALSE && is.null(lcol_measure)) {
p <- p + ggplot2::geom_line(
ggplot2::aes_string(group = "spc_id"),
alpha = alpha, size = line_width)
}
if (!is.null(lcol_measure)) {
p <- p + ggplot2::geom_line(
ggplot2::aes_string(colour = lcol_measure, group = "spc_id",
x = "xvalues_value", y = "spc_value"),
alpha = alpha, size = line_width, inherit.aes = FALSE) +
ggplot2::scale_colour_distiller(palette = lcol_measure_col_palette,
direction = lcol_measure_col_direction)
}
# Plot different spectral types and group_id in panels
if (group_panel && length(lcols_spc) > 1) {
if (relabel_spc) {
lbl <- relabel_spc_types(...) # see this function for arguments and values
p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free",
labeller = ggplot2::labeller(spc_type = lbl))
} else {
p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free")
}
}
if (group_panel && length(lcols_spc) == 1) {
p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free")
}
# Special case when list of tibbles are supplied
if (group_panel && !is.null(spc_tbl_l)) {
p <- ggplot2::ggplot(data = dt,
ggplot2::aes_string(x = "xvalues_value", y = "spc_value")) +
ggplot2::geom_line(ggplot2::aes_string(colour = "group_id_tbl",
group = "spc_id"), alpha = alpha, size = line_width)
if (relabel_spc == TRUE) {
lbl <- relabel_spc_types(...)
p <- p + ggplot2::facet_grid(spc_type ~ group_id, scales = "free",
labeller = ggplot2::labeller(spc_type = lbl))
} else if (relabel_spc == FALSE) {
p <- p + ggplot2::facet_wrap(~ group_id, ncol = ncol, scales = "free")
}
}
p <- p + ggplot2::scale_x_reverse(breaks = brk) +
ggplot2::xlab(expression(paste("Wavenumber [", cm^-1, "]"))) +
ggplot2::ylab(ylab) +
ggplot2::theme_bw() +
ggplot2::theme(legend.position = "right",
axis.text.x = ggplot2::element_text(angle = 90, hjust = 1, vjust = 0.5))
p
}