-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathredundancy_analysis.R
229 lines (173 loc) · 9.31 KB
/
redundancy_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# Análise de Redundância RDA ---------------------------------------------------------------------------------------------------------------
# Autoria do script: Jeanne Franco ---------------------------------------------------------------------------------------------------------
# Data: 27/05/22 ---------------------------------------------------------------------------------------------------------------------------
# Referência: http://r.qcbs.ca/workshop10/book-en/redundancy-analysis.html -----------------------------------------------------------------
# Carregar dados ---------------------------------------------------------------------------------------------------------------------------
# Make sure the files are in your working directory! If R
# cannot find the dataset, set your working directory with
# setwd() to the folder in which your data is stored (e.g.
# setwd('~/Desktop/workshop10'))
# Species community data frame (fish abundance)
spe <- read.csv("doubsspe.csv", row.names = 1)
spe <- spe[-8, ] # Site number 8 contains no species, so we remove row 8 (site 8)
spe
# Be careful to only run this command line once as you are
# overwriting 'spe' each time!
# Environmental data frame: “DoubsEnv.csv”
env <- read.csv("doubsenv.csv", row.names = 1)
env <- env[-8, ] # Remove corresponding abiotic data for site 8 (because removed from fish data).
env
# Again, be careful to only run the last line once.
# Análise RDA ------------------------------------------------------------------------------------------------------------------------------
# Nós computamos a análise de RDA usando a função rda() do pacote vegan.
library(vegan)
# Step 1: Standardize and/or transform the data.
# Apply Hellinger transformation to correct for the double
# zero problem
spe.hel <- decostand(spe, method = "hellinger")
env.z <- decostand(env, method = "standardize")
# Iremos remover a variável das porque ela apresenta colinearidade dcom várias outras
# variáveis.
env.z <- subset(env.z, select = -das)
# Step 2: Run the RDA.
spe.rda <- rda(spe.hel ~ ., data = env.z)
# Step 3: Extract key results of the RDA.
summary(spe.rda)
# The first section of the summary contains
# the pieces we need to verify the performance of our RDA.
## Partitioning of variance:
## Inertia Proportion
## Total 0.5025 1.0000
## Constrained 0.3689 0.7341
## Unconstrained 0.1336 0.2659
# Constrained Proportion: variance of Y explained by X (73.41%)
# Unconstrained Proportion: unexplained variance in Y (26.59%)
# How would you report these results? You could say: “The included environmental
# variables explain 73.41% of the variation in fish community composition across
# sites.”
# Seleção de variáveis ---------------------------------------------------------------------------------------------------------------------
# Se quiser simplificar o modelo, nós podemos performar um stepwise. Essa seleção
# nos ajuda a selecionar variáveis estatisticamente significantes. Entretanto,
# selecionar variáveis ecológicas é mais importante.
# Nós iremos fazer a seleção das nossas 11 variáveis ambientais. Para isso, nós
# usamos a função ordiR2step().
# Forward selection of variables:
fwd.sel <- ordiR2step(rda(spe.hel ~ 1, data = env.z), # lower model limit (simple!)
scope = formula(spe.rda), # upper model limit (the "full" model)
direction = "forward",
R2scope = TRUE, # can't surpass the "full" model's R2
pstep = 1000,
trace = FALSE) # change to TRUE to see the selection process!
# Which variables are retained by the forward selection?
# Check the new model with forward-selected variables
fwd.sel$call
# Resultado:
## rda(formula = spe.hel ~ alt + oxy + dbo, data = env.z)
# What is the adjusted R2 of the RDA with the selected variables?
# Write our new model
spe.rda.signif <- rda(spe.hel ~ alt + oxy + dbo, data = env.z)
# check the adjusted R2 (corrected for the number of
# explanatory variables)
RsquareAdj(spe.rda.signif)
# Resultado:
## $r.squared
## [1] 0.5894243
##
## $adj.r.squared
## [1] 0.5401552
# The explanatory variables (altitude, oxygen and biological oxygen demand) now
# explain 59% of the variance in Y (species abundances across sites, or community
# composition). When we correct for the number of variables in X, the adjusted
# R2 tells us that three selected variables explain 54% of the variance in species
# abundances.
# Because the adjusted R2 is corrected for the number of explanatory variables,
# it is comparable across models and datasets. For this reason, you should report
# the adjusted R2 when writing up the result of an RDA for an article, or in a
# study which compares the explanatory power of different models.
# Testando a significância -----------------------------------------------------------------------------------------------------------------
# The significance of your RDA can be tested using the function anova.cca().
anova.cca(spe.rda.signif, step = 1000)
# You can also test the significance of each variable with by = "term".
anova.cca(spe.rda.signif, step = 1000, by = "term")
# You can also test the significance of each canonical axis with by = "axis".
# Recall that these axes represent the variation in explanatory variables in
# fewer dimensions.
anova.cca(spe.rda.signif, step = 1000, by = "axis")
# Our full model is statistically significant (p = 0.001), and every variable
# included in this model is significant as well (p = 0.001). Every canonical
# axis resulting from the RDA is also statistically significant (p = 0.001).
# Gráfico RDA ------------------------------------------------------------------------------------------------------------------------------
# One of the most powerful aspects of RDA is the simultaneous visualization of
# your response and explanatory variables (i.e. species and environmental variables).
# Assim como na PCA existem dois tipos de escalas:
# - Tipo 1: distância entre os objetos reflete as similaridades entre eles.
# - Tipo 2: ângulos entre as variáveis reflete as corelações entre elas.
# Type 1 scaling
ordiplot(spe.rda.signif, scaling = 1, type = "text")
# Type 2 scaling
ordiplot(spe.rda.signif, scaling = 2, type = "text")
# Type 1 scaling: shows similarities between objects in the response matrix.
# Sites (numbers) that are closer together have more similar communities.
# Species that are closer together occupy more sites in common.
# Type 2 scaling: shows the effects of explanatory variables.
# Longer arrows mean this variable strongly drives the variation
# in the community matrix.
# Arrows pointing in opposite directions have a negative relationship.
# Arrows pointing in the same direction have a positive relationship.
# Customizando o gráfico RDA ---------------------------------------------------------------------------------------------------------------
# Both plot() and ordiplot() make quick and simple ordination plots, but you can
# customize your plots by extracting scores with scores() and manually setting
# the aesthetics of points(), text(), and arrows().
# Here is an example of a custom triplot. Feel free to play around with the colours
# and other parameters to make it your own!
# Custom triplot code!
## extract % explained by the first 2 axes
perc <- round(100*(summary(spe.rda.signif)$cont$importance[2, 1:2]), 2)
## extract scores - these are coordinates in the RDA space
sc_si <- scores(spe.rda.signif, display = "sites", choices = c(1,2), scaling = 1)
sc_sp <- scores(spe.rda.signif, display = "species", choices = c(1,2), scaling = 1)
sc_bp <- scores(spe.rda.signif, display = "bp", choices = c(1, 2), scaling = 1)
## Custom triplot, step by step
# Set up a blank plot with scaling, axes, and labels
plot(spe.rda.signif,
scaling = 1, # set scaling type
type = "none", # this excludes the plotting of any points from the results
frame = FALSE,
# set axis limits
xlim = c(-1,1),
ylim = c(-1,1),
# label the plot (title, and axes)
main = "Triplot RDA - scaling 1",
xlab = paste0("RDA1 (", perc[1], "%)"),
ylab = paste0("RDA2 (", perc[2], "%)")
)
# add points for site scores
points(sc_si,
pch = 21, # set shape (here, circle with a fill colour)
col = "black", # outline colour
bg = "steelblue", # fill colour
cex = 1.2) # size
# add points for species scores
points(sc_sp,
pch = 22, # set shape (here, square with a fill colour)
col = "black",
bg = "#f2bd33",
cex = 1.2)
# add text labels for species abbreviations
text(sc_sp + c(0.03, 0.09), # adjust text coordinates to avoid overlap with points
labels = rownames(sc_sp),
col = "grey40",
font = 2, # bold
cex = 0.6)
# add arrows for effects of the expanatory variables
arrows(0,0, # start them from (0,0)
sc_bp[,1], sc_bp[,2], # end them at the score value
col = "red",
lwd = 3)
# add text labels for arrows
text(x = sc_bp[,1] - 0.1, # adjust text coordinate to avoid overlap with arrow tip
y = sc_bp[,2] - 0.03,
labels = rownames(sc_bp),
col = "red",
cex = 1,
font = 2)