-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor for multiple dataset support (#29)
* [TMP] script working * [TMP] update DA scripts * [TMP] Viz + LR working * Updates to workflow for multiple datasets * Fix #27 * Update GH Action * Move workflow structure * updated gh actions
- Loading branch information
Showing
62 changed files
with
9,157 additions
and
430 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,36 +16,27 @@ on: | |
- "README.md" | ||
|
||
jobs: | ||
Linting: | ||
build: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Lint workflow | ||
uses: snakemake/[email protected] | ||
with: | ||
directory: . | ||
snakefile: workflow/Snakefile | ||
stagein: "mamba install -y -n snakemake --channel conda-forge --channel bioconda" | ||
args: "--lint" | ||
|
||
Testing: | ||
runs-on: ubuntu-latest | ||
needs: | ||
- Linting | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- uses: actions/checkout@v2 | ||
with: | ||
persist-credentials: false | ||
fetch-depth: 0 | ||
|
||
- uses: conda-incubator/setup-miniconda@v2 | ||
with: | ||
activate-environment: qadabra | ||
mamba-version: "*" | ||
channels: conda-forge,defaults,bioconda | ||
channel-priority: true | ||
python-version: "3.8" | ||
|
||
- name: Test workflow | ||
uses: snakemake/snakemake-github-action@v1 | ||
with: | ||
directory: . | ||
snakefile: workflow/Snakefile | ||
args: "--use-conda --show-failed-logs -j 10 --conda-cleanup-pkgs cache --conda-frontend mamba" | ||
stagein: "conda config --get channel_priority --json" | ||
- name: Install conda packages | ||
shell: bash -l {0} | ||
run: mamba install snakemake click biom-format pandas | ||
|
||
- name: Test report | ||
uses: snakemake/snakemake-github-action@v1 | ||
with: | ||
directory: . | ||
snakefile: workflow/Snakefile | ||
args: "--report report.zip" | ||
- name: Run Snakemake | ||
shell: bash -l {0} | ||
run: make snaketest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,5 @@ | |
*.swp | ||
*.snakemake | ||
*__pycache__ | ||
*egg-info/ | ||
config/datasets.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
graft workflow | ||
graft config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,12 @@ | ||
TMPDIR := $(shell mktemp -d) | ||
TABLE_FILE := $(shell realpath qadabra/test_data/table.biom) | ||
MD_FILE := $(shell realpath qadabra/test_data/metadata.tsv) | ||
|
||
create_rulegraph: | ||
snakemake -f --rulegraph | dot -Tpng > imgs/rule_graph.png | ||
|
||
snaketest: | ||
@cd $(TMPDIR); \ | ||
qadabra create-workflow --workflow-dest . ;\ | ||
qadabra add-dataset --table $(TABLE_FILE) --metadata $(MD_FILE) --name "ampharos" --factor-name anemia --target-level anemic --reference-level normal --verbose ; \ | ||
snakemake --use-conda --cores 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = "0.3.0a1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
import logging | ||
import os | ||
import pathlib | ||
from pkg_resources import resource_filename | ||
import shutil | ||
from typing import List | ||
|
||
import biom | ||
import click | ||
import pandas as pd | ||
|
||
from qadabra import __version__ | ||
from qadabra.utils import _validate_input | ||
|
||
SNKFILE_TEXT = """from pkg_resources import resource_filename | ||
from snakemake.utils import min_version | ||
min_version("6.0") | ||
qadabra_snakefile = resource_filename("qadabra", "workflow/Snakefile") | ||
configfile: "config/config.yaml" | ||
module qadabra: | ||
snakefile: | ||
qadabra_snakefile | ||
config: | ||
config | ||
use rule * from qadabra | ||
""" | ||
|
||
|
||
@click.group() | ||
@click.version_option(__version__) | ||
def qadabra(): | ||
"""Differential abundance workflow""" | ||
pass | ||
|
||
|
||
@qadabra.command() | ||
@click.option( | ||
"--table", | ||
type=click.Path(exists=True), | ||
required=True, | ||
help="Feature table in BIOM format" | ||
) | ||
@click.option( | ||
"--metadata", | ||
type=click.Path(exists=True), | ||
required=True, | ||
help="Metadata in TSV format" | ||
) | ||
@click.option( | ||
"--tree", | ||
type=click.Path(exists=True), | ||
required=False, | ||
help="Phylogenetic tree in Newick format" | ||
) | ||
@click.option( | ||
"--name", | ||
type=str, | ||
required=True, | ||
help="Name of dataset" | ||
) | ||
@click.option( | ||
"--factor-name", | ||
type=str, | ||
required=True, | ||
help="Name of factor grouping in metadata" | ||
) | ||
@click.option( | ||
"--target-level", | ||
type=str, | ||
required=True, | ||
help="Grouping level on which to perform differential abundance" | ||
) | ||
@click.option( | ||
"--reference-level", | ||
type=str, | ||
required=True, | ||
help="Grouping level to use as reference" | ||
) | ||
@click.option( | ||
"--confounder", | ||
type=str, | ||
required=False, | ||
multiple=True, | ||
help="Confounder variable to consider (can provide multiple)" | ||
) | ||
@click.option( | ||
"--validate-input", | ||
is_flag=True, | ||
show_default=True, | ||
default=True | ||
) | ||
@click.option( | ||
"--verbose", | ||
is_flag=True, | ||
show_default=True, | ||
default=False, | ||
help="Whether to output progress to console" | ||
) | ||
def add_dataset( | ||
table, | ||
metadata, | ||
tree, | ||
name, | ||
factor_name, | ||
target_level, | ||
reference_level, | ||
confounder, | ||
validate_input, | ||
verbose | ||
): | ||
"""Add dataset on which to run Qadabra""" | ||
if not pathlib.Path("./workflow").exists: | ||
raise ValueError("Workflow has not been created!") | ||
|
||
dataset_sheet = "config/datasets.tsv" | ||
logger = logging.getLogger(__name__) | ||
log_level = logging.INFO if verbose else logging.WARNING | ||
logger.setLevel(log_level) | ||
sh = logging.StreamHandler() | ||
sh.setLevel(log_level) | ||
formatter = logging.Formatter( | ||
"[%(asctime)s - %(levelname)s] :: %(message)s", | ||
"%Y-%m-%d %H:%M:%S" | ||
) | ||
sh.setFormatter(formatter) | ||
logger.addHandler(sh) | ||
|
||
if validate_input: | ||
logger.info("Validating input...") | ||
_validate_input(logger, table, metadata, factor_name, target_level, | ||
reference_level, tree, confounder) | ||
|
||
dataset_sheet = pathlib.Path(dataset_sheet) | ||
new_ds = pd.Series({ | ||
"table": pathlib.Path(table).resolve(), | ||
"metadata": pathlib.Path(metadata).resolve(), | ||
"factor_name": factor_name, | ||
"target_level": target_level, | ||
"reference_level": reference_level, | ||
}, name=name).to_frame().T | ||
|
||
if tree is not None: | ||
new_ds["tree"] = pathlib.Path(tree).resolve() | ||
else: | ||
new_ds["tree"] = None | ||
|
||
if confounder: | ||
new_ds["confounders"] = ";".join(confounder) | ||
else: | ||
new_ds["confounders"] = None | ||
|
||
if dataset_sheet.exists(): | ||
logger.info("Loading datasheet...") | ||
ds_sheet = pd.read_table(dataset_sheet, sep="\t", index_col=0) | ||
if name in ds_sheet.index: | ||
raise ValueError(f"{name} already exists in dataset sheet!") | ||
ds_sheet = pd.concat([ds_sheet, new_ds], axis=0) | ||
else: | ||
logger.info("Dataset does not exist. Creating...") | ||
ds_sheet = new_ds | ||
|
||
ds_sheet.to_csv(dataset_sheet, sep="\t", index=True) | ||
logger.info(f"Saved dataset sheet to {dataset_sheet}") | ||
|
||
|
||
@qadabra.command() | ||
@click.option( | ||
"--workflow-dest", | ||
type=click.Path(exists=False), | ||
default="." | ||
) | ||
def create_workflow(workflow_dest): | ||
"""Create new Qadabra workflow structure""" | ||
wflow_dest = pathlib.Path(workflow_dest) | ||
wflow_dir = wflow_dest / "workflow" | ||
cfg_dir = wflow_dest/ "config" | ||
os.makedirs(wflow_dir) | ||
os.makedirs(cfg_dir) | ||
|
||
cfg_file = resource_filename("qadabra", "config/config.yaml") | ||
shutil.copy(cfg_file, cfg_dir / "config.yaml") | ||
|
||
style_file = resource_filename("qadabra", "config/qadabra.mplstyle") | ||
shutil.copy(style_file, cfg_dir / "qadabra.mplstyle") | ||
|
||
snkfile_path = wflow_dir / "Snakefile" | ||
with open(snkfile_path, "w") as f: | ||
f.write(SNKFILE_TEXT) | ||
|
||
|
||
if __name__ == "__main__": | ||
qadabra() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from pkg_resources import resource_filename | ||
|
||
from snakemake.utils import min_version | ||
min_version("6.0") | ||
|
||
qadabra_snakefile = resource_filename("qadabra", "workflow/Snakefile") | ||
configfile: "config/config.yaml" | ||
|
||
module qadabra: | ||
snakefile: | ||
qadabra_snakefile | ||
config: | ||
config | ||
|
||
use rule * from qadabra |
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.