From 777e97526ffde25fee9ae6e598200af43acaa5df Mon Sep 17 00:00:00 2001
From: Yang Chen <60239063+yangchen2@users.noreply.github.com>
Date: Mon, 27 Nov 2023 12:07:15 -0700
Subject: [PATCH 1/5] filter out discriminatory taxa

---
 qadabra/utils.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/qadabra/utils.py b/qadabra/utils.py
index ae7104d..73ec524 100644
--- a/qadabra/utils.py
+++ b/qadabra/utils.py
@@ -4,7 +4,7 @@
 
 import biom
 import pandas as pd
-
+import warnings
 
 def _validate_input(
     logger: logging.Logger,
@@ -51,11 +51,22 @@ def _validate_input(
     joint_df = tbl_df.join(md)
     gb = joint_df.groupby(factor_name).sum(numeric_only=True)
     feat_presence = gb.apply(lambda x: x.all())
-    if not feat_presence.all():
-        raise ValueError(
-            "Some taxa in the table perfectly discriminate factor groups. "
-            "Please filter out these taxa before running Qadabra."
-        )
+    # if not feat_presence.all():
+    #     raise ValueError(
+    #         "Some taxa in the table perfectly discriminate factor groups. "
+    #         "Please filter out these taxa before running Qadabra."
+    #     )
+    discriminating_feats = feat_presence[~feat_presence].index.tolist()
+
+    if len(discriminating_feats) > 0:
+        warning_msg = "Some features in the table perfectly discriminate factor groups:\n" + '\n'.join(discriminating_feats) + ".\nAutomatically filtering out these features before running Qadabra..."
+        print("Number of discriminating features: " + str(len(discriminating_feats)))
+        warnings.warn(warning_msg, category=Warning)
+
+        # Filtering out the discriminating features from the BIOM table
+        tbl = tbl.filter(lambda value, id_, metadata: id_ not in discriminating_feats, axis='observation', inplace=False)
+        logger.info(f"Table shape after filtering: {tbl.shape}")
+
 
     if tree:
         from bp import parse_newick, to_skbio_treenode
@@ -69,4 +80,4 @@ def _validate_input(
             raise ValueError("Tree tips are not a subset of table features!")
     else:
         logger.info("Reading phylogenetic tree...")
-        logger.info("(Optional tree file not provided. Skipping tree validation.)")
+        logger.info("(Optional tree file not provided. Skipping tree validation.)")
\ No newline at end of file

From 889f414f7bda9b4707388a4fec1e088c35c85c14 Mon Sep 17 00:00:00 2001
From: Yang Chen <60239063+yangchen2@users.noreply.github.com>
Date: Mon, 27 Nov 2023 12:08:34 -0700
Subject: [PATCH 2/5] remove previous code block

---
 qadabra/utils.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/qadabra/utils.py b/qadabra/utils.py
index 73ec524..7c13096 100644
--- a/qadabra/utils.py
+++ b/qadabra/utils.py
@@ -51,11 +51,7 @@ def _validate_input(
     joint_df = tbl_df.join(md)
     gb = joint_df.groupby(factor_name).sum(numeric_only=True)
     feat_presence = gb.apply(lambda x: x.all())
-    # if not feat_presence.all():
-    #     raise ValueError(
-    #         "Some taxa in the table perfectly discriminate factor groups. "
-    #         "Please filter out these taxa before running Qadabra."
-    #     )
+
     discriminating_feats = feat_presence[~feat_presence].index.tolist()
 
     if len(discriminating_feats) > 0:

From 671901343e1bb0a8939323725534e252ba14ca5c Mon Sep 17 00:00:00 2001
From: Yang Chen <60239063+yangchen2@users.noreply.github.com>
Date: Fri, 19 Jan 2024 11:39:55 -0800
Subject: [PATCH 3/5] update README to say QADABRA is WIP software

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 456e061..c71c608 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,8 @@ Importantly, Qadabra focuses on both FDR corrected p-values *and* [feature ranks
 
 ![Schematic](images/Qadabra_schematic.svg)
 
+Please note this software is currently a work in progress. Your patience is appreciated as we continue to develop and enhance its features. Please leave an issue on GitHub should you run into any errors.
+
 ## Installation
 ```
 pip install qadabra
@@ -97,7 +99,7 @@ This will create a zipped directory containing the report.
 Unzip this file and open the `report.html` file to view the report containing results and visualizations in your browser.
 
 ## Tutorial
-See the [tutorial](tutorial.md) page for a walkthroughon using Qadabra workflow with a microbiome dataset.
+See the [tutorial](tutorial.md) page for a walkthrough on using Qadabra workflow with a microbiome dataset.
 
 ## FAQs
 Coming soon: An [FAQs](FAQs.md) page of commonly asked question on the statistics and code pertaining to Qadabra.

From 71de05c4c6153384cc455eed02bd0d54004915c3 Mon Sep 17 00:00:00 2001
From: Yang Chen <60239063+yangchen2@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:56:42 -0800
Subject: [PATCH 4/5] f-string format for discriminatory features warning, log
 number of discrim feats

---
 qadabra/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qadabra/utils.py b/qadabra/utils.py
index 7c13096..56dd0f5 100644
--- a/qadabra/utils.py
+++ b/qadabra/utils.py
@@ -55,8 +55,8 @@ def _validate_input(
     discriminating_feats = feat_presence[~feat_presence].index.tolist()
 
     if len(discriminating_feats) > 0:
-        warning_msg = "Some features in the table perfectly discriminate factor groups:\n" + '\n'.join(discriminating_feats) + ".\nAutomatically filtering out these features before running Qadabra..."
-        print("Number of discriminating features: " + str(len(discriminating_feats)))
+        logger.warn("Number of discriminating features: " + str(len(discriminating_feats)))
+        warning_msg = f"Some features in the table perfectly discriminate factor groups. Automatically filtering out {len(discriminating_feats)} features before running Qadabra..."        
         warnings.warn(warning_msg, category=Warning)
 
         # Filtering out the discriminating features from the BIOM table

From a78121a70f8ba3882109cd0f0660eac7c10e1706 Mon Sep 17 00:00:00 2001
From: Yang Chen <60239063+yangchen2@users.noreply.github.com>
Date: Fri, 19 Jan 2024 16:27:08 -0800
Subject: [PATCH 5/5] add install from source instructions

---
 README.md | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 57584ee..86681a7 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,8 @@ Importantly, Qadabra focuses on both FDR corrected p-values *and* [feature ranks
 Please note this software is currently a work in progress. Your patience is appreciated as we continue to develop and enhance its features. Please leave an issue on GitHub should you run into any errors.
 
 ## Installation
+
+### Option 1: Pip install from [PyPI](https://pypi.org/project/qadabra/0.3.0a1/)
 ```
 pip install qadabra
 ```
@@ -26,12 +28,31 @@ Qadabra requires the following dependencies:
 * cython
 * iow
 
+Check out the [tutorial](tutorial.md) for more in-depth instructions on installation.
+
+
+### Option 2: Install from source (this GitHub repository)
+Prerequisites
+
+Before you begin, ensure you have Git and the necessary build tools installed on your system.
+
+Clone the Repository
+```
+git clone https://github.com/biocore/qadabra.git
+```
+
+Navigate to repo root directory where the `setup.py` file is located and then install QADABRA in editable mode
+```
+cd qadabra
+pip install -e .
+```
+
 ## Usage
 
 ### 1. Creating the workflow directory
 
 Qadabra can be used on multiple datasets at once.
-First, we want to create the workflow directory to perfrom differential abundance with all methods:
+First, we want to create the workflow directory to perform differential abundance with all methods:
 
 ```
 qadabra create-workflow --workflow-dest <directory_name>