Skip to content

Commit

Permalink
Merge pull request #24 from adrientaudiere/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
adrientaudiere authored Sep 17, 2023
2 parents d8b7c03 + a72d87b commit eeb3b7e
Show file tree
Hide file tree
Showing 53 changed files with 985 additions and 215 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/draft-pdf.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
on: [push]

on:
push:
branches: [main, master]
jobs:
paper:
runs-on: ubuntu-latest
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
release:
types: [published]
branches:
- master
workflow_dispatch:

name: pkgdown
Expand All @@ -33,6 +30,12 @@ jobs:
extra-packages: any::pkgdown, local::.
needs: website

- name: Install vsearch
run: sudo apt-get install vsearch

- name: Install blastn
run: sudo apt-get install ncbi-blast+

- name: Build site
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
shell: Rscript {0}
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
branches:
- master

name: test-coverage

Expand Down
4 changes: 4 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@ Depends:
Suggests:
Biostrings,
circlize,
ComplexUpset,
data.table,
DECIPHER,
DESeq2,
DT,
edgeR,
formattable,
gghalves,
ggVennDiagram,
grDevices,
grid,
gridExtra,
here,
iNEXT,
knitr,
lulu,
metacoder,
Expand All @@ -51,6 +54,7 @@ Suggests:
stringr,
testthat (>= 3.0.0),
tibble,
tidyr,
vegan,
venneuler,
viridis
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(clean_physeq)
export(clean_pq)
export(compare_pairs_pq)
export(count_seq)
export(diff_fct_diff_class)
export(dist_bycol)
export(dist_pos_control)
export(filter_asv_blast)
Expand All @@ -33,6 +34,7 @@ export(hill_phyloseq)
export(hill_pq)
export(hill_tuckey_phyloseq)
export(hill_tuckey_pq)
export(iNEXT_pq)
export(krona)
export(list_fastq_files)
export(lulu_phyloseq)
Expand Down Expand Up @@ -71,6 +73,7 @@ export(tax_datatable)
export(track_wkflow)
export(track_wkflow_samples)
export(tsne_pq)
export(upset_pq)
export(venn_phyloseq)
export(venn_pq)
export(verify_pq)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# MiscMetabar 0.41 (in development)

- Add function `iNEXT_pq()` to calculate hill diversity using the [iNEXT](https://github.com/AnneChao/iNEXT) package.
- Add argument `paires` to `multi_biplot_pq()` in order to indicate all paires of samples we want to print.
- Improve `compare_pairs_pq()` with information about the number of shared sequences among paires
- Add function `upset_pq()` to plot upset of phyloseq object using the [ComplexUpset](https://krassowski.github.io/complex-upset/) package
- Add info (param `add_info`) in subtitle of the `hill_pq()` function


# MiscMetabar 0.40

Expand Down
2 changes: 1 addition & 1 deletion R/MiscMetabar-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ NULL

if (getRversion() >= "2.15.1") {
utils::globalVariables(c(
".id", "%>%", "Ab", "Abundance", "col_tax", "combn", "complement", "devtools", "e-value", "Family", "Genus", "grid.draw", "grid.layout", "group_by", "Hill_0", "Hill_1", "Hill_2", "install_github", "log2FoldChange", "logFC", "lwr", "max_Hill", "modality", "multcompLetters", "nb_values", "ott_id", "OTU", "Proportion", "pushViewport", "Query name", "rarefy", "reverse", "rgb", "reverseComplement", "rrarefy", "Species", "summarise", "tax", "tax_col", "teststat", "tnrs_match_names", "tol_induced_subtree", "upr", "upViewport", "vegdist", "viewport", "x", "x1", "X1", "x2", "y", "y1", "y2", "ymax", "ymin"
".id", "%>%", "Ab", "Abundance", "character_method", "col_tax", "combn", "complement", "devtools", "e-value", "Family", "Genus", "grid.draw", "grid.layout", "group_by", "Hill_0", "Hill_1", "Hill_2", "install_github", "log2FoldChange", "logFC", "lwr", "max_Hill", "modality", "multcompLetters", "nb_values", "ott_id", "OTU", "Proportion", "pushViewport", "Query name", "rarefy", "reverse", "rgb", "reverseComplement", "rrarefy", "Sample", "Species", "summarise", "tax", "tax_col", "teststat", "tnrs_match_names", "tol_induced_subtree", "upr", "upViewport", "val", "vegdist", "viewport", "x", "x1", "X1", "x2", "y", "y1", "y2", "ymax", "ymin"
))
}

Expand Down
52 changes: 27 additions & 25 deletions R/blast.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
#' @param e_value_cut (default: 1e-30) cut of in e-value (%) to keep result
#' The BLAST E-value is the number of expected hits of similar quality (score)
#' that could be found just by chance.
#' @param unique_per_seq (logical) if TRUE only return the first match for
#' each sequence in seq2search
#' @param score_filter (logical) does results are filter by score? If
#' FALSE, `id_cut`,`bit_score_cut` and `min_cover_cut` are ignored
#' @param unique_per_seq (logical, default FALSE) if TRUE only return the better match
#' (higher **bit score**) for each sequence
#' @param score_filter (logical, default TRUE) does results are filter by score? If
#' FALSE, `id_cut`,`bit_score_cut`, `e_value_cut` and `min_cover_cut` are ignored
#' @param list_no_output_query (logical) does the result table include
#' query sequences for which `blastn` does not find any correspondence?
#' @param args_makedb Additional parameters parse to makeblastdb command
Expand All @@ -34,9 +34,9 @@
#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files
#' - db.fasta (refseq transformed into a database)
#' - dbase list of files (output of blastn)
#' - blast_result.txt the summary result of blastn using
#' - blast_result.txt the summary result of blastn using
#' `-outfmt "6 qseqid qlen sseqid slen length pident evalue bitscore qcovs"`
#'
#'
#' @seealso [MiscMetabar::blast_pq()] to use `refseq` slot as query sequences
#' against un custom database.
#'
Expand Down Expand Up @@ -70,12 +70,13 @@ blast_to_phyloseq <- function(physeq,
dna <- Biostrings::DNAStringSet(physeq@refseq)
Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "db.fasta"))

system(paste0(blastpath,
system(paste0(
blastpath,
"makeblastdb -dbtype nucl -in ",
paste0(tempdir(), "/", "db.fasta"),
paste0(tempdir(), "/", "db.fasta"),
" -out ",
paste0(tempdir(), "/", "dbase"),
" ",
" ",
args_makedb
))

Expand All @@ -97,7 +98,7 @@ blast_to_phyloseq <- function(physeq,
)
if (file.info(paste0(tempdir(), "/", "blast_result.txt"))$size > 0) {
blast_tab <- utils::read.table(
paste0(tempdir(), "/", "blast_result.txt"),,
paste0(tempdir(), "/", "blast_result.txt"), ,
sep = "\t",
header = FALSE,
stringsAsFactors = FALSE
Expand All @@ -115,7 +116,7 @@ blast_to_phyloseq <- function(physeq,
message(paste0("Temporary files are located at ", tempdir()))
}

if(!blast_tab_OK){
if (!blast_tab_OK) {
message("None query sequences matched your phyloseq references sequences.")
return(NULL)
}
Expand All @@ -132,7 +133,7 @@ blast_to_phyloseq <- function(physeq,
"Query cover"
)

blast_tab <- blast_tab[order(blast_tab[, "% id. match"], decreasing = TRUE), ]
blast_tab <- blast_tab[order(blast_tab[, "bit score"], decreasing = TRUE), ]

if (unique_per_seq) {
blast_tab <- blast_tab[which(!duplicated(blast_tab[, 1])), ]
Expand Down Expand Up @@ -184,9 +185,9 @@ blast_to_phyloseq <- function(physeq,
#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files
#' - db.fasta (refseq transformed into a database)
#' - dbase list of files (output of blastn)
#' - blast_result.txt the summary result of blastn using
#' - blast_result.txt the summary result of blastn using
#' `-outfmt "6 qseqid qlen sseqid slen length pident evalue bitscore qcovs"`
#'
#'
#' @seealso [MiscMetabar::blast_to_phyloseq()] to use `refseq`
#' slot as a database
#' @return a blast table
Expand Down Expand Up @@ -254,7 +255,7 @@ blast_pq <- function(physeq,
" -db ",
database,
" -out ",
paste0(tempdir(), "/", "blast_result.txt"),
paste0(tempdir(), "/", "blast_result.txt"),
" -outfmt \"6 qseqid qlen sseqid slen",
" length pident evalue bitscore qcovs\"",
" -num_threads ", nproc,
Expand All @@ -264,9 +265,9 @@ blast_pq <- function(physeq,
)
}

if (file.info(paste0(tempdir(), "/", "blast_result.txt"))$size > 0) {
if (file.info(paste0(tempdir(), "/", "blast_result.txt"))$size > 0) {
blast_tab <- utils::read.table(
paste0(tempdir(), "/", "blast_result.txt"),,
paste0(tempdir(), "/", "blast_result.txt"), ,
sep = "\t",
header = FALSE,
stringsAsFactors = FALSE
Expand All @@ -284,7 +285,7 @@ blast_pq <- function(physeq,
message(paste0("Temporary files are located at ", tempdir()))
}

if(!blast_tab_OK){
if (!blast_tab_OK) {
message("None query sequences matched your phyloseq references sequences.")
return(NULL)
}
Expand All @@ -301,7 +302,7 @@ blast_pq <- function(physeq,
"Query cover"
)

blast_tab <- blast_tab[order(blast_tab[, "% id. match"], decreasing = TRUE), ]
blast_tab <- blast_tab[order(blast_tab[, "bit score"], decreasing = TRUE), ]

if (unique_per_seq) {
blast_tab <- blast_tab[which(!duplicated(blast_tab[, 1])), ]
Expand Down Expand Up @@ -423,7 +424,7 @@ filter_asv_blast <- function(physeq,
#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files
#' - db.fasta (refseq transformed into a database)
#' - dbase list of files (output of blastn)
#' - blast_result.txt the summary result of blastn using
#' - blast_result.txt the summary result of blastn using
#' `-outfmt "6 qseqid qlen sseqid slen length pident evalue bitscore qcovs"`
#' @return A blast table
#'
Expand Down Expand Up @@ -468,7 +469,8 @@ blast_to_derep <- function(derep,
names(dna) <- paste0(names(dna), "(", unlist(derep_occurence), "seqs)")
Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "db.fasta"))

system(paste0(blastpath,
system(paste0(
blastpath,
"makeblastdb -dbtype nucl -in ",
paste0(tempdir(), "/", "db.fasta"),
" -out ",
Expand All @@ -494,9 +496,9 @@ blast_to_derep <- function(derep,
)


if (file.info(paste0(tempdir(), "/", "blast_result.txt"))$size > 0) {
if (file.info(paste0(tempdir(), "/", "blast_result.txt"))$size > 0) {
blast_tab <- utils::read.table(
paste0(tempdir(), "/", "blast_result.txt"),,
paste0(tempdir(), "/", "blast_result.txt"), ,
sep = "\t",
header = FALSE,
stringsAsFactors = FALSE
Expand All @@ -514,7 +516,7 @@ blast_to_derep <- function(derep,
message(paste0("Temporary files are located at ", tempdir()))
}

if(!blast_tab_OK){
if (!blast_tab_OK) {
message("None query sequences matched your phyloseq references sequences.")
return(NULL)
}
Expand All @@ -533,7 +535,7 @@ blast_to_derep <- function(derep,

blast_tab$occurence <- sub("seqs\\)", "", sub(".*\\(", "", blast_tab$`Sample name`, perl = TRUE), perl = TRUE)

blast_tab <- blast_tab[order(blast_tab[, "% id. match"], decreasing = TRUE), ]
blast_tab <- blast_tab[order(blast_tab[, "bit score"], decreasing = TRUE), ]

if (unique_per_seq) {
blast_tab <- blast_tab[which(!duplicated(blast_tab[, 1])), ]
Expand Down
13 changes: 8 additions & 5 deletions R/controls.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@ search_exact_seq_pq <- function(physeq, sequences) {
################################################################################
#' Calculate ecological distance among positive controls vs
#' distance for all samples
#'

#' @description
#' `r lifecycle::badge("experimental")`
#'
#' @aliases dist_pos_control
#' @details Compute distance among positive controls,
#'
#' Compute distance among positive controls,
#' i.e. samples which are duplicated
#' to test for variation, for example in
#' (i) a step in the sampling,
#' (ii) a step in the extraction,
#' (iii) a step in the sequencing.
#' @aliases dist_pos_control
#' @inheritParams clean_pq
#' @param samples_names (required) a vector of names for samples with
#' positives controls of the same samples having the same name
Expand Down Expand Up @@ -141,7 +142,9 @@ dist_pos_control <- function(physeq, samples_names, method = "bray") {
#' @examples
#' data(data_fungi)
#'
#' subset_taxa_tax_control(data_fungi, as.numeric(data_fungi@otu_table[, 300]))
#' subset_taxa_tax_control(data_fungi,
#' as.numeric(data_fungi@otu_table[, 300]),
#' min_diff_for_cutoff = 2)
#'
#' @author Adrien Taudière
subset_taxa_tax_control <-
Expand Down
Loading

0 comments on commit eeb3b7e

Please sign in to comment.