-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathresample-spc.R
185 lines (169 loc) · 8.99 KB
/
resample-spc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#' @title Resample spectra in list-column to new x-axis interval
#' @description Resamples (interpolates) different spectra types with
#' corresponding x-axis values that are both stored in list-columns of a spectra
#' tibble. A spectra tibble hosts spectra, x-axis vectors, metadata, and
#' further linked data with standardized naming conventions. Data input for
#' resampling can for example be generated with `simplerspec::gather_spc()`.
#' Resampling is a key harmonizing step to process and later model spectra
#' measured at different resolutions and spectral ranges (i.e., different
#' spectrometer devices and/or measurement settings).
#' @param spc_tbl Spectra data embedded in a tibble object (classes
#' `"tbl_df", "tbl", "data.frame"`). The spectra tibble needs to contain at
#' least of one of the the spectra columns `spc`, `spc_rs`, `spc_mean`,
#' `spc_nocomp`, `sc_sm`, `sc_rf`, or `spc_pre` (list-columns with spectral
#' `data.table`s), and `wavenumbers` or `wavelengths` (list-column with vectors
#' of x-axis values corresponding to each spectrum). The help section *"Matching
#' spectrum type and corresponding x-axis type"* describes the spectra types
#' and corresponding x-axis types.
#' @param column_in Character vector of length 1L or symbol/name
#' specifying the name of list-column that contains the spectra to be resampled.
#' @param x_unit Character vector of length 1L specifying the measurement unit
#' of the x-axis values (list-column) of the input spectra in `spc_tbl`.
#' Possible values are `"wavenumber"` (default) or `"wavelength"`. Wavenumber
#' is a convenient unit of frequency in the mid-infrared spectral range,
#' where wavelength is often used as spatial period for the visible and
#' near-infrared range.
#' @param wn_lower Numeric value of lowest wavenumber. This argument will only
#' be used if `x_unit = "wavenumber"`. The value serves as starting value for
#' the new wavenumber sequence that the spectra will be resampled upon. Default
#' value is 500 (i.e., in reciprocal centimeters).
#' @param wn_upper Numeric value of highest wavenumber. This argument will only
#' be used if `x_unit = "wavenumber`. The value will be used as last value of
#' the new wavenumber sequence that the spectra will be resampled upon. Default
#' value is 4000 (i.e., in reciprocal centimeters).
#' @param wn_interval Numeric value of the wavenumber increment for the new
#' wavenumber sequence that the spectra will be resampled upon. Default value
#' is 2 (i.e., in reciprocal centimeters).
#' @param wl_lower Numeric value of lowest wavelength. This argument will only
#' be used if `x_unit = "wavelength"`. The value serves as starting value of
#' the new wavenumber sequence that the spectra will be resampled upon.
#' Default value is 350 (i.e. in nanometers).
#' @param wl_upper Numeric value of highest wavelength. This argument will only
#' be used if `x_unit = "wavelength"`. The value will be used as last value of
#' the new wavenumber sequence that the spectra will be resampled upon. Default
#' value is 2500 (i.e., in nanometers).
#' @param wl_interval Numeric value of the wavelength increment for the new
#' wavenumber sequence that the spectra will be resampled upon. This argument
#' will only be used if `x_unit = "wavelength"`. Default value is 1 (i.e., in
#' nanometers).
#' @param interpol_method Character of `"linear"` (default) or `"spline"` with
#' the interpolation method. `"spline"` uses a cubic spline to interpolate the
#' input spectra at given x-axis values to new equispaced x-axis intervals.
#' @return A spectra tibble (`spc_tbl`) containing two added list-columns:
#' * `spc_rs:` Resampled spectra as list of `data.table`s
#' * `wavenumbers_rs` or `wavelengths_rs`: Resampled x-axis values as list of
#' numeric vectors
#' @section Matching spectrum type and corresponding x-axis type:
#' The combinations of input spectrum types (`column_in`) and
#' corresponding x-axis types are generated from a simple lookup list. The
#' following key-value(s) pairs can be matched at given key, which is the column
#' name from `column_in` containing the spectra.
#' * `"spc"` : `"wavenumbers"` or `"wavelengths"` (raw spectra)
#' * `"spc_rs"` : `"wavenumbers_rs"` or `"wavelengths_rs"`) (resampled spectra)
#' * `"spc_mean"` : `"wavenumbers_rs"` or `"wavelengths_rs"` (mean spectra)
#' * `"spc_nocomp"` `"wavenumbers"` or `"wavelengths"` (spectra prior
#' atmospheric compensation)
#' * `"sc_sm" : c("wavenumbers_sc_sm", "wavelengths_sc_sm")` (single channel
#' sample spectra)
#' * `"sc_rf" : c("wavenumbers_sc_rf", "wavelengths_sc_rf")` (single channel
#' reference spectra)
#' * `"spc_pre" : "xvalues_pre"` (preprocessed spectra)
#' @export
resample_spc <- function(spc_tbl,
column_in = "spc",
x_unit = c("wavenumber", "wavelength"),
wn_lower = 500, wn_upper = 4000, wn_interval = 2,
wl_lower = 350, wl_upper = 2500, wl_interval = 1,
interpol_method = c("linear", "spline")) {
# Capture user input as expressions (can be both of type character or symbol),
# also called quoting; convert quosures to characters for later arg matching
column_in <- rlang::enquo(column_in)
column_in_chr <- rlang::quo_name(column_in)
stopifnot(
is.character(x_unit) && length(x_unit) > 0,
is.numeric(wn_lower), is.numeric(wn_upper), is.numeric(wn_interval),
is.numeric(wl_lower), is.numeric(wl_upper), is.numeric(wl_interval)
)
# Lookup list to match spectrum types and corresponding x-axis types
spc_xaxis_types <- list(
"spc" = c("wavenumbers", "wavelengths"), # raw/unprocessed
"spc_rs" = c("wavenumbers_rs", "wavelengths_rs"), # resampled
"spc_mean" = c("wavenumbers_rs", "wavelengths_rs"), # mean
"spc_nocomp" = c("wavenumbers", "wavelengths"), # no atm. compensation
"sc_sm" = c("wavenumbers_sc_sm", "wavelengths_sc_sm"), # single channel sample
"sc_rf" = c("wavenumbers_sc_rf", "wavelengths_sc_rf"), # single channel reference
"spc_pre" = rep("xvalues_pre", 2) # preprocessed
)
spctypes <- names(spc_xaxis_types)
column_spc <- match.arg(column_in_chr, spctypes)
x_unit <- match.arg(x_unit)
switch(x_unit,
wavenumber = {x_unit_int <- 1L},
wavelength = {x_unit_int <- 2L})
interpol_method <- match.arg(interpol_method)
# Final selection of `x_unit` column name string from user input and lookup
x_unit_sel <- spc_xaxis_types[[column_spc]][x_unit_int]
# Both columns with X-values and input spectra need to be present in `spc_tbl`
colnm <- colnames(spc_tbl)
stopifnot(x_unit_sel %in% colnm, column_spc %in% colnm)
# Extract list-column containing spectra
spc_in_list <- dplyr::pull(spc_tbl, !!column_in)
# Extract list-column containing x-axis values
xvalues_in_list <- dplyr::pull(spc_tbl, !!x_unit_sel)
# Automatically check the arrangement of the input x-Unit values;
# often, it is convenient to have have a descending ordner of spectral columns
# if the physical quantity of the x-axis is wavenumbers
xvalue_order_chr <- purrr::map_chr(xvalues_in_list, seq_order)
if (length(unique(xvalue_order_chr)) > 1L) {
stop(
glue::glue(
"The column `{x_unit_sel}` which contains the list of X-values
has both elements of ascending and descending order.
* To resolve, you can split `spc_tbl` in a list of `spc_tbl`s
with identical X-value vectors based on `group_by_col_hash()`,
and apply `resample_spc()` separately to each list element.
* Alternatively, you could fix the order of x-axis values
for all input spectra and X-value vectors to all ascending or
descending"),
call. = FALSE)
}
xvalue_order <- xvalue_order_chr[1L]
# Generate sequence of new x-axis values
switch(x_unit_int,
`1L` = {
xvalues_out <- seq(from = wn_lower, to = wn_upper, by = wn_interval)
x_unit_type_rs <- "wavenumbers_rs"
},
`2L` = {
xvalues_out <- seq(from = wl_lower, to = wl_upper, by = wl_interval)
x_unit_type_rs <- "wavelengths_rs"
})
if (xvalue_order == "descending") xvalues_out <- rev(xvalues_out)
# Repeat sequence of new (resampled) x-axis values in list (for every obs.)
xvalues_out_list <- rep(list(xvalues_out), nrow(spc_tbl))
names(xvalues_out_list) <- names(spc_in_list)
# Resample all spectra extracted from list-column `column_in` using prospectr
spc_rs <- lapply(
seq_along(spc_in_list),
function(i) {
data.table::data.table(
prospectr::resample(
X = spc_in_list[[i]], # spectral data.table to resample
wav = xvalues_in_list[[i]], # old x-values vector
new.wav = xvalues_out_list[[i]], # new x-values vector
interpol = interpol_method
)
)
}
)
names(spc_rs) <- names(spc_in_list)
spc_tbl_out <-
spc_tbl %>%
tibble::add_column(
spc_rs = spc_rs,
!!x_unit_type_rs := xvalues_out_list
)
return(spc_tbl_out)
}
# Helper
seq_order <- function(x) ifelse(x[1L] < x[length(x)], "ascending", "descending")