From 1a044584c6e71b74c8e33356b3262f065e1723ab Mon Sep 17 00:00:00 2001 From: Sameer Padhye Date: Thu, 12 Sep 2024 10:16:09 -0400 Subject: [PATCH] update Rd files --- R/bold.analyze.align.R | 12 ++++++------ R/bold.public.search.R | 5 +++++ man/bold.analyze.align.Rd | 11 +++++------ man/bold.public.search.Rd | 5 +++++ 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/R/bold.analyze.align.R b/R/bold.analyze.align.R index d0cc6f1..2a9c4a4 100644 --- a/R/bold.analyze.align.R +++ b/R/bold.analyze.align.R @@ -9,11 +9,11 @@ #' @param ... additional arguments that can be passed to msa::msa() function. #' #' @details -#' `bold.analyze.align` retrieves the sequence information obtained using `bold.fetch` function and performs a multiple sequence alignment. Type of clustering method can be specified using the `align.method` argument. It utilizes the msa::msa() function with default settings but additional arguments can be passed via the `...` argument. Marker name provided must match with the standard marker names available on the BOLD webpage. Name for individual sequences in the output can be customized by using the `seq.name.fields` argument. If more than one field is specified, the name will follow the sequence of the fields given in the vector. Performing a multiple sequence alignment on large sequence data might slow the system. Additionally, users are responsible for verifying the sequence quality and integrity, as the function does not provide any checks on issues like STOP codons and indels within the data. The output of this function is a modified Barcode Core Data Model (BCDM) dataframe, which includes two additional columns: one for the aligned sequences and another for the names given to the sequences. +#' `bold.analyze.align` retrieves the sequence information obtained using `bold.fetch` function and performs a multiple sequence alignment. Type of clustering method can be specified using the `align.method` argument. It utilizes the `msa::msa()` function with default settings but additional arguments can be passed via the `...` argument. Marker name provided must match with the standard marker names available on the BOLD webpage (Ex. COI-5P). Name for individual sequences in the output can be customized by using the `seq.name.fields` argument. If more than one field is specified, the name will follow the sequence of the fields given in the vector. Performing a multiple sequence alignment on large sequence data might slow the system. Additionally, users are responsible for verifying the sequence quality and integrity, as the function does not provide any checks on issues like STOP codons and indels within the data by default. The output of this function is a modified Barcode Core Data Model (BCDM) dataframe, which includes two additional columns: one for the aligned sequences and another for the names given to the sequences. #' #' \emph{Note: }. Users are required to install and load the `Biostrings` and `msa` packages using `BiocManager` before running this function. #' -#' @returns An 'output' list containing: +#' @returns #' * bold.df.mod = A modified BCDM data frame with two additional columns (’aligned_seq’ and ’msa.seq.name’). #' #' @export @@ -35,12 +35,12 @@ #' # Both the packages are installed using `BiocManager`. #' #' # Align the data (using bin_uri as the name for each sequence) -#' # seq.align <- bold.analyze.align(seq.data, -#' # seq.name.fields = c("bin_uri"), -#' # align.method="ClustalOmega") +#' seq.align <- bold.analyze.align(seq.data, +#' seq.name.fields = c("bin_uri"), +#' align.method="ClustalOmega") #' #' # Dataframe of the sequences (aligned) with their corresponding names -#' # head(seq.align) +#' head(seq.align) #' } #' bold.analyze.align<-function (bold.df, diff --git a/R/bold.public.search.R b/R/bold.public.search.R index bc8d77c..9d58b11 100644 --- a/R/bold.public.search.R +++ b/R/bold.public.search.R @@ -26,11 +26,15 @@ #' #' # Taxonomy #' bold.data <- bold.public.search(taxonomy = "Panthera leo") +#' +#' #Result #' head(bold.data,10) #' #' # Taxonomy and Geography #' bold.data.taxo.geo <- bold.public.search(taxonomy = "Panthera uncia", #' geography = "India") +#' +#' #Result #' head(bold.data.taxo.geo,10) #' #' # Taxonomy, Geography and BINs @@ -38,6 +42,7 @@ #' geography = "India", #' bins=c("BOLD:AAD6819")) #' +#' #Result #' bold.data.taxo.geo.bin #' #' @importFrom utils URLencode diff --git a/man/bold.analyze.align.Rd b/man/bold.analyze.align.Rd index d8b48d9..68662b9 100644 --- a/man/bold.analyze.align.Rd +++ b/man/bold.analyze.align.Rd @@ -24,7 +24,6 @@ bold.analyze.align( \item{...}{additional arguments that can be passed to msa::msa() function.} } \value{ -An 'output' list containing: \itemize{ \item bold.df.mod = A modified BCDM data frame with two additional columns (’aligned_seq’ and ’msa.seq.name’). } @@ -33,7 +32,7 @@ An 'output' list containing: Function designed to transform and align the sequence data retrieved from the function \code{bold.fetch}. } \details{ -\code{bold.analyze.align} retrieves the sequence information obtained using \code{bold.fetch} function and performs a multiple sequence alignment. Type of clustering method can be specified using the \code{align.method} argument. It utilizes the msa::msa() function with default settings but additional arguments can be passed via the \code{...} argument. Marker name provided must match with the standard marker names available on the BOLD webpage. Name for individual sequences in the output can be customized by using the \code{seq.name.fields} argument. If more than one field is specified, the name will follow the sequence of the fields given in the vector. Performing a multiple sequence alignment on large sequence data might slow the system. Additionally, users are responsible for verifying the sequence quality and integrity, as the function does not provide any checks on issues like STOP codons and indels within the data. The output of this function is a modified Barcode Core Data Model (BCDM) dataframe, which includes two additional columns: one for the aligned sequences and another for the names given to the sequences. +\code{bold.analyze.align} retrieves the sequence information obtained using \code{bold.fetch} function and performs a multiple sequence alignment. Type of clustering method can be specified using the \code{align.method} argument. It utilizes the \code{msa::msa()} function with default settings but additional arguments can be passed via the \code{...} argument. Marker name provided must match with the standard marker names available on the BOLD webpage (Ex. COI-5P). Name for individual sequences in the output can be customized by using the \code{seq.name.fields} argument. If more than one field is specified, the name will follow the sequence of the fields given in the vector. Performing a multiple sequence alignment on large sequence data might slow the system. Additionally, users are responsible for verifying the sequence quality and integrity, as the function does not provide any checks on issues like STOP codons and indels within the data by default. The output of this function is a modified Barcode Core Data Model (BCDM) dataframe, which includes two additional columns: one for the aligned sequences and another for the names given to the sequences. \emph{Note: }. Users are required to install and load the \code{Biostrings} and \code{msa} packages using \code{BiocManager} before running this function. } @@ -54,12 +53,12 @@ api_key=apikey) # Both the packages are installed using `BiocManager`. # Align the data (using bin_uri as the name for each sequence) -# seq.align <- bold.analyze.align(seq.data, -# seq.name.fields = c("bin_uri"), -# align.method="ClustalOmega") +seq.align <- bold.analyze.align(seq.data, +seq.name.fields = c("bin_uri"), +align.method="ClustalOmega") # Dataframe of the sequences (aligned) with their corresponding names -# head(seq.align) +head(seq.align) } } diff --git a/man/bold.public.search.Rd b/man/bold.public.search.Rd index 6d6b94e..040e8b6 100644 --- a/man/bold.public.search.Rd +++ b/man/bold.public.search.Rd @@ -66,11 +66,15 @@ Retrieves record ids for publicly available data based on taxonomy, geography or # Taxonomy bold.data <- bold.public.search(taxonomy = "Panthera leo") + +#Result head(bold.data,10) # Taxonomy and Geography bold.data.taxo.geo <- bold.public.search(taxonomy = "Panthera uncia", geography = "India") + +#Result head(bold.data.taxo.geo,10) # Taxonomy, Geography and BINs @@ -78,6 +82,7 @@ bold.data.taxo.geo.bin <- bold.public.search("Panthera leo", geography = "India", bins=c("BOLD:AAD6819")) +#Result bold.data.taxo.geo.bin }