diff --git a/README.md b/README.md index 8f8d5d0..86681a7 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,11 @@ Importantly, Qadabra focuses on both FDR corrected p-values *and* [feature ranks ![Schematic](images/Qadabra_schematic.svg) +Please note this software is currently a work in progress. Your patience is appreciated as we continue to develop and enhance its features. Please leave an issue on GitHub should you run into any errors. + ## Installation + +### Option 1: Pip install from [PyPI](https://pypi.org/project/qadabra/0.3.0a1/) ``` pip install qadabra ``` @@ -24,12 +28,31 @@ Qadabra requires the following dependencies: * cython * iow +Check out the [tutorial](tutorial.md) for more in-depth instructions on installation. + + +### Option 2: Install from source (this GitHub repository) +Prerequisites + +Before you begin, ensure you have Git and the necessary build tools installed on your system. + +Clone the Repository +``` +git clone https://github.com/biocore/qadabra.git +``` + +Navigate to repo root directory where the `setup.py` file is located and then install QADABRA in editable mode +``` +cd qadabra +pip install -e . +``` + ## Usage ### 1. Creating the workflow directory Qadabra can be used on multiple datasets at once. -First, we want to create the workflow directory to perfrom differential abundance with all methods: +First, we want to create the workflow directory to perform differential abundance with all methods: ``` qadabra create-workflow --workflow-dest @@ -97,7 +120,7 @@ This will create a zipped directory containing the report. Unzip this file and open the `report.html` file to view the report containing results and visualizations in your browser. ## Tutorial -See the [tutorial](tutorial.md) page for a walkthroughon using Qadabra workflow with a microbiome dataset. +See the [tutorial](tutorial.md) page for a walkthrough on using Qadabra workflow with a microbiome dataset. ## FAQs Coming soon: An [FAQs](FAQs.md) page of commonly asked question on the statistics and code pertaining to Qadabra. diff --git a/qadabra/utils.py b/qadabra/utils.py index ae7104d..56dd0f5 100644 --- a/qadabra/utils.py +++ b/qadabra/utils.py @@ -4,7 +4,7 @@ import biom import pandas as pd - +import warnings def _validate_input( logger: logging.Logger, @@ -51,11 +51,18 @@ def _validate_input( joint_df = tbl_df.join(md) gb = joint_df.groupby(factor_name).sum(numeric_only=True) feat_presence = gb.apply(lambda x: x.all()) - if not feat_presence.all(): - raise ValueError( - "Some taxa in the table perfectly discriminate factor groups. " - "Please filter out these taxa before running Qadabra." - ) + + discriminating_feats = feat_presence[~feat_presence].index.tolist() + + if len(discriminating_feats) > 0: + logger.warn("Number of discriminating features: " + str(len(discriminating_feats))) + warning_msg = f"Some features in the table perfectly discriminate factor groups. Automatically filtering out {len(discriminating_feats)} features before running Qadabra..." + warnings.warn(warning_msg, category=Warning) + + # Filtering out the discriminating features from the BIOM table + tbl = tbl.filter(lambda value, id_, metadata: id_ not in discriminating_feats, axis='observation', inplace=False) + logger.info(f"Table shape after filtering: {tbl.shape}") + if tree: from bp import parse_newick, to_skbio_treenode @@ -69,4 +76,4 @@ def _validate_input( raise ValueError("Tree tips are not a subset of table features!") else: logger.info("Reading phylogenetic tree...") - logger.info("(Optional tree file not provided. Skipping tree validation.)") + logger.info("(Optional tree file not provided. Skipping tree validation.)") \ No newline at end of file