-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunguild.R
144 lines (141 loc) · 5.75 KB
/
funguild.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#' Retrieve the FUNGuild database
#' @description
#'
#' <a href="https://adrientaudiere.github.io/MiscMetabar/articles/Rules.html#lifecycle">
#' <img src="https://img.shields.io/badge/lifecycle-stable-green" alt="lifecycle-stable"></a>
#'
#' The original function and documentation was written by Brendan Furneaux
#' in the [FUNGuildR](https://github.com/brendanf/FUNGuildR/) package.
#'
#' Please cite this publication (\doi{10.1016/j.funeco.2015.06.006}).
#'
#' @param db_url a length 1 character string giving the URL to retrieve the database
#' from
#'
#' @return a [`tibble::tibble`] containing the database, which can be passed
#' to the `db` argument of [funguild_assign()]
#' @export
#'
#' @references Nguyen NH, Song Z, Bates ST, Branco S, Tedersoo L, Menke J,
#' Schilling JS, Kennedy PG. 2016. *FUNGuild: An open annotation tool for
#' parsing fungal community datasets by ecological guild*. Fungal Ecology
#' 20:241-248.
#' @author Brendan Furneaux (orcid: [0000-0003-3522-7363](https://orcid.org/0000-0003-3522-7363)),
#' modified by Adrien Taudière
#'
get_funguild_db <- function(db_url = "http://www.stbates.org/funguild_db_2.php") {
httr::GET(url = db_url) %>%
httr::content(as = "text") %>%
stringr::str_split("\n") %>%
unlist() %>%
magrittr::extract(7) %>%
stringr::str_replace("^\\[", "") %>%
stringr::str_replace("]</body>$", "") %>%
stringr::str_replace_all("\\} ?, ?\\{", "} \n {") %>%
stringr::str_split("\n") %>%
unlist() %>%
purrr::map_dfr(
function(record) {
current_record <- jsonlite::fromJSON(record)
if (!is.null(current_record[["TrophicMode"]])) {
current_record$trophicMode <- current_record$TrophicMode
}
if (!is.null(current_record[["growthMorphology"]])) {
current_record$growthForm <- current_record$growthMorphology
}
purrr::flatten(current_record)
}
) %>%
dplyr::select(
"taxon", "guid", "mbNumber", "taxonomicLevel", "trophicMode",
"guild", "confidenceRanking", "growthForm", "trait", "notes",
"citationSource"
)
}
#' Assign Guilds to Organisms Based on Taxonomic Classification
#'
#' @description
#'
#' <a href="https://adrientaudiere.github.io/MiscMetabar/articles/Rules.html#lifecycle">
#' <img src="https://img.shields.io/badge/lifecycle-stable-green" alt="lifecycle-stable"></a>
#'
#' The original function and documentation was written by Brendan Furneaux
#' in the [FUNGuildR](https://github.com/brendanf/FUNGuildR/) package.
#'
#' These functions have identical behavior if supplied with a database; however
#' they download the database corresponding to their name by default.
#'
#' Taxa present in the database are matched to the taxa present in the supplied
#' `otu_table` by exact name.
#' In the case of multiple matches, the lowest (most specific) rank is chosen.
#' No attempt is made to check or correct the classification in
#' `otu_table$Taxonomy`.
#'
#' @param otu_table A `data.frame` with a `character`
#' column named "`Taxonomy`" (or another name as specified in
#' `tax_col`), as well as any other columns.
#' Each entry in "`otu_table$Taxonomy`" should be a comma-, colon-,
#' underscore-, or semicolon-delimited classification of an organism.
#' Rank indicators as given by Sintax ("`k:`", "`p:`"...) or Unite ("`k__`,
#' "`p__`", ...) are also allowed.
#' A `character` vector, representing only the taxonomic classification,
#' is also accepted.
#' @param tax_col A `character` string, optionally giving an alternate
#' column name in `otu_table` to use instead of `otu_table$Taxonomy`.
#'
#' @param db_funguild A `data.frame` representing the FUNGuild as returned by
#' [get_funguild_db()]
#' If not supplied, the default database will be downloaded.
#'
#' @return A [`tibble::tibble`] containing all columns of
#' `otu_table`, plus relevant columns of information from the FUNGuild
#' @export
#'
#' @references Nguyen NH, Song Z, Bates ST, Branco S, Tedersoo L, Menke J,
#' Schilling JS, Kennedy PG. 2016. *FUNGuild: An open annotation tool for
#' parsing fungal community datasets by ecological guild*. Fungal Ecology
#' 20:241-248.
#' @author Brendan Furneaux (orcid: [0000-0003-3522-7363](https://orcid.org/0000-0003-3522-7363)),
#' modified by Adrien Taudière
funguild_assign <- function(otu_table, db_funguild = get_funguild_db(),
tax_col = "Taxonomy") {
if (is.character(otu_table)) {
otu_table <- tibble::tibble(otu_table)
names(otu_table) <- tax_col
}
if (!is.data.frame(otu_table)) {
stop(paste0("otu_table must be a dataframe not a ", class(otu_table)))
}
if (!tax_col %in% colnames(otu_table)) {
stop(paste0("The tax_col args ", tax_col, " is not present in the colnames
of the otu_table dataframe."))
}
make_taxkey <- function(x) {
out <- gsub("\\b[kpcofgs](:|__)", "", x)
out <- gsub("[_ ;,:]", "@", out)
paste0("@", out, "@")
}
otu_table$taxkey <- make_taxkey(otu_table[[tax_col]])
all_taxkey <- unique(otu_table$taxkey) %>% na.omit()
`.` <- taxon <- taxkey <- searchkey <- taxonomicLevel <- NULL # to pass R CMD check
db_funguild <- dplyr::mutate(
db_funguild,
searchkey = paste0("@", stringr::str_replace(taxon, "[ _]", "@"), "@")
)
dplyr::select(db_funguild, taxonomicLevel, searchkey) %>%
dplyr::mutate(
taxkey = purrr::map(searchkey, stringr::str_subset, string = all_taxkey)
) %>%
tidyr::unnest(cols = taxkey) %>%
dplyr::group_by(taxkey) %>%
dplyr::arrange(dplyr::desc(taxonomicLevel)) %>%
dplyr::summarize_at("searchkey", dplyr::first) %>%
dplyr::ungroup() %>%
dplyr::mutate_all(as.character) %>%
dplyr::left_join(otu_table, ., by = "taxkey") %>%
dplyr::left_join(db_funguild,
by = "searchkey",
suffix = c("", ".funguild")
) %>%
dplyr::select(-taxkey, -searchkey)
}