diff --git a/docs/source/index.rst b/docs/source/index.rst
index f80b1562..c444d231 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,6 +13,7 @@ Then, DROP can be executed in multiple ways (:doc:`pipeline`).
    installation
    prepare
    pipeline
+   output
    license
    help
 
@@ -24,7 +25,7 @@ We recommend using a dedicated conda environment. (installation time: ~ 10min)
 
 .. code-block:: bash
 
-    mamba install -c conda-forge -c bioconda drop
+    mamba create -n drop -c conda-forge -c bioconda drop
 
 Test installation with demo project
 
diff --git a/docs/source/output.rst b/docs/source/output.rst
new file mode 100644
index 00000000..c4b0e2a6
--- /dev/null
+++ b/docs/source/output.rst
@@ -0,0 +1,110 @@
+Results and Output of DROP
+===========================
+
+DROP is intended to help researchers use RNA-Seq data in order to detect genes with aberrant expression,
+aberrant splicing and mono-allelic expression. By simplifying the workflow process we hope to provide
+easy-to-read HTML files and output files. This section explains the results files. The paths of the output
+files correspond to the ones from the demo (that can be run with the following code snippet)::
+
+    #install drop
+    mamba create -n drop_env -c conda-forge -c bioconda drop
+    conda activate drop_env
+    
+    mkdir drop_demo
+    cd drop_demo
+    drop demo
+    
+    snakemake -c1
+
+Aberrant Expression
++++++++++++++++++++
+
+HTML file
+#########
+Looking at the resulting ``Output/html/drop_demo_index.html`` we can see the ``AberrantExpression`` 
+tab at the top of the screen. The Overview tab contains links to the:  
+
+* Counts Summaries for each aberrant expression group
+    * number of local and external samples
+    * Mapped reads and size factors for each sample
+    * histograms showing the mean count distribution with different conditions
+    * expressed genes within each sample and as a dataset
+* Outrider Summaries for each aberrant expression group
+    * aberrantly expressed genes per sample
+    * correlation between samples before and after the autoencoder
+    * biological coefficient of variation
+    * aberrant samples
+    * results table
+* Files for each aberrant expression group
+    * OUTRIDER datasets 
+        * Follow the `OUTRIDER vignette <https://www.bioconductor.org/packages/devel/bioc/vignettes/OUTRIDER/inst/doc/OUTRIDER.pdf>`_ for individual OUTRIDER object file (ods) analysis.
+    * Results tables
+        * ``results.tsv`` this text file contains only the significant genes and samples that meet the cutoffs defined in the config file for ``padjCutoff`` and ``zScoreCutoff``
+
+Local result files
+##################
+Additionally the ``aberrantExpression`` module creates the file ``Output/processed_results/aberrant_expression/{annotation}/outrider/{drop_group}/OUTRIDER_results_all.Rds``. This file contains the entire OUTRIDER results table regardless of significance.
+
+Aberrant Splicing
++++++++++++++++++
+
+HTML file
+##########
+Looking at the resulting ``Output/html/drop_demo_index.html`` we can see the ``AberrantSplicing`` 
+tab at the top of the screen. The Overview tab contains links to the:  
+
+* Counting Summaries for each aberrant splicing group
+    * number of local and external samples
+    * number introns/splice sites before and after merging
+    * comparison of local and external mean counts
+    * histograms showing the junction expression before and after filtering and variability
+* FRASER Summaries for each aberrant splicing group
+    * the number of samples, introns, and splice sites 
+    * correlation between samples before and after the autoencoder
+    * results table
+* Files for each aberrant splicing group
+    * FRASER datasets (fds)
+        * Follow the `FRASER vignette <https://www.bioconductor.org/packages/devel/bioc/vignettes/FRASER/inst/doc/FRASER.pdf>`_ for individual FRASER object file (fds) analysis.
+    * Results tables
+        * ``results_per_junction.tsv`` this text file contains only significant junctions that meet the cutoffs defined in the config file. 
+
+Local result files
+##################
+Additionally the ``aberrantSplicing`` module creates the following file ``Output/processed_results/aberrant_splicing/results/{annotation}/fraser/{drop_group}/results.tsv``.
+This text file contains only significant junctions that meet the cutoffs defined in the config file, aggregated at the gene level. Any sample/gene pair is represented by only the most significant junction.
+
+Mono-allelic Expression
++++++++++++++++++++++++
+
+HTML file
+##########
+Looking at the resulting ``Output/html/drop_demo_index.html`` we can see the ``MonoallelicExpression`` 
+tab at the top of the screen. The Overview tab contains links to the:  
+
+* Results for each mae group
+    * number of samples, genes, and mono-allelically expressed heterozygous SNVs
+    * a cascade plot that shows additional filters
+    * histogram of inner cohort frequency
+    * summary of the cascade plot and results table
+* Files for each mae group
+    * Allelic counts
+        * a directory containing the allelic counts of heterozygous variants
+    * Results data tables of each sample (.Rds)
+        * Rds objects containing the full results table regardless of MAE status
+    * Significant MAE results tables
+        * a link to the results file
+        * Only contains significant MAE for the alternative allele results and results that pass the config file cutoffs
+* Quality Control
+    * QC Overview
+        * For each mae group QC checks for DNA/RNA matching
+    
+Local result files
+##################
+Additionally the ``mae`` module creates the following files:
+
+* ``Output/processed_results/mae/{drop_group}/MAE_results_all_{annotation}.tsv.gz``
+    * this file contains the MAE results of all heterozygous SNVs regardless of significance
+* ``Output/processed_results/mae/{drop_group}/MAE_results_{annotation}.tsv``
+    * this is the file linked in the HTML document and described above
+* ``Output/processed_results/mae/{drop_group}/MAE_results_{annotation}_rare.tsv``
+    * this file is a subset of ``MAE_results_{annotation}.tsv`` with only the variants that pass the allele frequency cutoffs. If ``add_AF`` is set to ``true`` in config file must meet minimum AF set by ``max_AF``. Additionally, the inner-cohort frequency must meet the ``maxVarFreqCohort`` cutoff
diff --git a/docs/source/prepare.rst b/docs/source/prepare.rst
index 249b438d..48754a3c 100644
--- a/docs/source/prepare.rst
+++ b/docs/source/prepare.rst
@@ -46,6 +46,7 @@ When providing a path to a file or directory, please provide the *full system pa
 
 Global parameters
 +++++++++++++++++
+These parameters are applied to multiple modules and as a result should be consistent throughout the data you are analyzing
 
 ===================  ==========  =======================================================================================================================================  ======
 Parameter            Type        Description                                                                                                                              Default/Examples
@@ -55,9 +56,9 @@ htmlOutputPath       character   Full path of the folder where the HTML files ar
 indexWithFolderName  boolean     If true, the basename of the project directory will be used as prefix for the index.html file                                            ``true``
 genomeAssembly       character   Either hg19/hs37d5 or hg38/GRCh38, depending on the genome assembly used for mapping                                                     ``/data/project1``
 sampleAnnotation     character   Full path of the sample annotation table                                                                                                 ``/data/project1/sample_annotation.tsv``
-root                 character   Full path of the folder where the subdirectories processed_data and processed_results will be created containing DROP's output files.    ``/data/project1``
+root                 character   Full path of the folder where the sub-directories processed_data and processed_results will be created containing DROP's output files.    ``/data/project1``
 genome               character   Full path of a human reference genome fasta file                                                                                         ``/path/to/hg19.fa``
-genome               dictionary  (Optional) Multiple fasta files can be specified when RNA-seq BAM files belong to different genome assemblies (eg, ncbi, ucsc).          ``ncbi: /path/to/hg19_ncbi.fa``
+genome               dictionary  (Optional) Multiple fasta files can be specified when RNA-seq BAM files belong to different genome. assemblies (eg, ncbi, ucsc).          ``ncbi: /path/to/hg19_ncbi.fa``
 
                                                                                                                                                                           ``ucsc: /path/to/hg19_ucsc.fa``
 geneAnnotation       dictionary  A key-value list of the annotation name (key) and the full path to the GTF file (value). More than one annotation file can be provided.  ``anno1: /path/to/gtf1.gtf``
@@ -73,6 +74,10 @@ tools                dictionary  A key-value list of different commands (key) an
 
 Export counts dictionary
 ++++++++++++++++++++++++
+These parameters are directly used by the ``exportCounts`` snakemake command. This section
+is used to designate which aberrant expression and aberrant splicing groups should be exported
+into datasets that can be shared. To avoid sharing sensitive data, only the canonical annotations
+as described by `geneAnnotations` are exported. Only the groups excluded by `excludeGroups` are not exported.
 
 ===============  ====  ==========================================================================================================================  ======
 Parameter        Type  Description                                                                                                                 Default/Examples
@@ -84,6 +89,8 @@ excludeGroups    list  aberrant expression and aberrant splicing groups whose co
 
 Aberrant expression dictionary
 ++++++++++++++++++++++++++++++
+These parameters are directly used by the ``aberrantExpression`` snakemake command. Aberrant expression groups must have at least ``10``
+samples per group. To use external counts please see the ``Using External Counts`` section.
 
 ============================  =========  =================================================================================================================================  ======
 Parameter                     Type       Description                                                                                                                        Default/Examples
@@ -102,6 +109,8 @@ maxTestedDimensionProportion  numeric    An integer that controls the maximum va
 
 Aberrant splicing dictionary
 ++++++++++++++++++++++++++++
+These parameters are directly used by the ``aberrantSplicing`` snakemake command. Aberrant splicing groups must have at least ``10``
+samples per group. To use external counts please see the ``Using External Counts`` section.
 
 ============================  =========  ============================================================================================  ======
 Parameter                     Type       Description                                                                                   Default/Examples
@@ -122,8 +131,10 @@ maxTestedDimensionProportion  numeric    Same as in aberrant expression.
 ============================  =========  ============================================================================================  ======
 
 
-Mono-allelic expression dictionary
+Mono-allelic expression (MAE) dictionary
 ++++++++++++++++++++++++++++++++++
+These parameters are directly used by the ``mae`` snakemake command. MAE groups are not bound by a minimum number of samples,
+but require additional information in the sample annotation table.
 
 =====================  =========  ========================================================================================================================  ======
 Parameter              Type       Description                                                                                                               Default/Examples
@@ -148,86 +159,117 @@ For example, if the AberrantExpression module is set to false, the  ``Scripts/Ab
 
 Creating the sample annotation table
 ------------------------------------
-
 For a detailed explanation of the columns of the sample annotation, please refer to
-Box 3 of the `DROP manuscript <https://rdcu.be/cdMmF>`_. 
+Box 3 of the `DROP manuscript <https://rdcu.be/cdMmF>`_. Although some information has been updated since puplication, please use this documentation as the preferred syntax/formatting.
 
 Each row of the sample annotation table corresponds to a unique pair of RNA and DNA
 samples derived from the same individual. An RNA assay can belong to one or more DNA
 assays, and vice-versa. If so, they must be specified in different rows. The required
 columns are ``RNA_ID``, ``RNA_BAM_FILE`` and ``DROP_GROUP``, plus other module-specific
-ones (see DROP manuscript). 
+ones (see DROP manuscript).
 
 The following columns describe the RNA-seq experimental setup:
 ``PAIRED_END``, ``STRAND``, ``COUNT_MODE`` and ``COUNT_OVERLAPS``. They affect the 
 counting procedures of the aberrant expression and splicing modules. For a detailed 
 explanation, refer to the documentation of `HTSeq <https://htseq.readthedocs.io/en/latest/>`_.
 
-To run the MAE module, the columns ``DNA_ID`` and ``DNA_VCF_FILE`` are needed.
-
-In case external counts are included, add a new row for each sample from those 
-files (or a subset if not all samples are needed). Add the columns: ``GENE_COUNTS_FILE``,
-``GENE_ANNOTATON``, ``SPLIT_COUNTS_FILE`` and ``NON_SPLIT_COUNTS_FILE``. See examples below.
+To run the MAE module, the columns ``DNA_ID`` and ``DNA_VCF_FILE`` are needed. MAE can not be run
+in samples using external counts as we need to use the ``RNA_BAM_FILE`` to count reads supporting
+each allele of the heterozygous variants found in the ``DNA_VCF_FILE``.
 
 In case RNA-seq BAM files belong to different genome assemblies (eg, ncbi, ucsc), multiple
 reference genome fasta files can be specified. Add a column called `GENOME` that  
 contains, for each sample, the key from the `genome` parameter in the config file that
 matches its genome assembly (eg, ncbi or ucsc).
 
-
 The sample annotation file must be saved in the tab-separated values (tsv) format. The 
 column order does not matter. Also, it does not matter where it is stored, as the path is 
 specified in the config file. Here we provide some examples on how to deal with certain
 situations. For simplicity, we do not include all possible columns in the examples.
 
-Example of RNA replicates 
-++++++++++++++++++++++++++++++++++
 
-======  ======  ==========  ===================  ==
-RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE         DNA_VCF_FILE
-======  ======  ==========  ===================  ==
-S10R_B  S10G    BLOOD       /path/to/S10R_B.BAM  /path/to/S10G.vcf.gz
-S10R_M  S10G    MUSCLE      /path/to/S10R_M.BAM  /path/to/S10G.vcf.gz
-======  ======  ==========  ===================  ==
-
-Example of DNA replicates 
+Using External Counts
 ++++++++++++++++++++++++++++++++++
+DROP can utilize external counts for the ``aberrantExpression`` and ``aberrantSplicing`` modules
+which can enhance the statistical power of these modules by providing more samples from which we 
+can build a distribution of counts and detect outliers. However this process introduces some
+particular issues that need to be addressed to make sure it is a valuable addition to the experiment.
 
-======  ======  ==========  =================  ==
-RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE       DNA_VCF_FILE
-======  ======  ==========  =================  ==
-S20R    S20E    WES         /path/to/S20R.BAM  /path/to/S20E.vcf.gz
-S20R    S20G    WGS         /path/to/S20R.BAM  /path/to/S20G.vcf.gz
-======  ======  ==========  =================  ==
-
-Example of a multi-sample vcf file
-++++++++++++++++++++++++++++++++++
-
-======  ======  ==========  =================  ==
-RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE       DNA_VCF_FILE
-======  ======  ==========  =================  ==
-S10R    S10G    WGS         /path/to/S10R.BAM  /path/to/multi_sample.vcf.gz
-S20R    S20G    WGS         /path/to/S20R.BAM  /path/to/multi_sample.vcf.gz
-======  ======  ==========  =================  ==
-
-External count matrices
+In case external counts are included, add a new row for each sample from those 
+files (or a subset if not all samples are needed). Add the columns: ``GENE_COUNTS_FILE``
+(for aberrant expression), ``GENE_ANNOTATON``, and ``SPLICE_COUNTS_DIR`` (for aberrant splicing).
+These columns should remain empty for samples processed locally (from ``RNA_BAM``).
+
+Aberrant Expression
+####################
+Using external counts for aberrant expression forces you to use the exact same gene annotation for each
+external sample as well as using the same gene annotation file specified in the config file
+``Global parameters`` section. This is to avoid potential mismatching on counting, 2 different gene
+annotations could drastically affect which reads are counted in which region drastically skewing the results.
+
+The user must also use special consideration when building the sample annotation table. Samples
+using external counts need only ``RNA_ID`` which must exactly match the column header in the external count file
+``DROP_GROUP``, ``GENE_COUNTS_FILE``, and ``GENE_ANNOTATION`` which must contain the exact key specified in the config.
+The other columns should remain empty. 
+
+Using ``exportCounts`` generates the sharable ``GENE_COUNTS_FILE`` file in the appropriate
+``ROOT_DIR/Output/processed_results/exported_counts/`` sub-directory.
+
+Aberrant Splicing
+##################
+Using external counts for aberrant splicing reduces the number of introns processed to only those
+that are exactly the same between the local and external junctions. Because rare junctions may be 
+personally identifiable the ``exportCounts`` command only exports regions canonically mentioned in the gtf file.
+As a result, when merging the external counts with the local counts we only match introns that are **exact** between
+the 2 sets, this is to ensure that if a region is missing we don't introduce 0 counts into the distribution calculations.
+
+The user must also use special consideration when building the sample annotation table. Samples
+using external counts need only ``RNA_ID`` which must exactly match the column header in the external count file
+``DROP_GROUP``, and ``SPLICE_COUNTS_DIR``. ``SPLICE_COUNTS_DIR`` is the directory containing the set of 5 needed count files.
+The other columns should remain empty. 
+
+Using ``exportCounts`` generates the necessary files in the appropriate
+``ROOT_DIR/Output/processed_results/exported_counts/`` sub-directory
+
+``SPLICE_COUNTS_DIR`` should contain the following:  
+
+* k_j_counts.tsv.gz  
+* k_theta_counts.tsv.gz  
+* n_psi3_counts.tsv.gz  
+* n_psi5_counts.tsv.gz  
+* n_theta_counts.tsv.gz  
+
+Publicly available DROP external counts
+#######################################
+You can find different sets of publicly available external counts to add to your
+analysis on our `github page <https://github.com/gagneurlab/drop/#datasets>`_
+
+If you want to contribute with your own count matrices, please contact us: yepez at in.tum.de (yepez@in.tum.de)
+
+External count examples
 +++++++++++++++++++++++
 
 In case counts from external matrices are to be integrated into the analysis,
-the file must be specified in the GENE_COUNTS_FILE column. A new row must be
-added for each sample from the count matrix that should be included in the 
-analysis. An RNA_BAM_FILE must not be specified. The DROP_GROUP of the local
+the sample annotation must be built in a particular way
+A new row must be added for each sample from the count matrix that should be included in the 
+analysis. The ``RNA_ID`` must match the column header of the external files,
+the ``RNA_BAM_FILE`` must not be specified. The ``DROP_GROUP`` of the local
 and external samples that are to be analyzed together must be the same.
-Similarly, the GENE_ANNOTATION of the external counts and the key of the `geneAnnotation`
+For aberrant expression, the GENE_ANNOTATION of the external counts and the key of the `geneAnnotation`
 parameter from the config file must match.
 
-======  ======  ==========  =================  ==============================  ==
-RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE       GENE_COUNTS_FILE                GENE_ANNOTATION
-======  ======  ==========  =================  ==============================  ==
-S10R    S10G    BLOOD       /path/to/S10R.BAM  
-EXT-1R          BLOOD                          /path/to/externalCounts.tsv.gz  gencode34
-EXT-2R          BLOOD                          /path/to/externalCounts.tsv.gz  gencode34
-======  ======  ==========  =================  ==============================  ==
+This example will use the ``DROP_GROUP`` BLOOD_AE for the aberrant expression module (containing S10R, EXT-1R, EXT-2R) and
+the ``DROP_GROUP`` BLOOD_AS for the aberrant expression module (containing S10R, EXT-3R, EXT-4R)
+
+======  ======  ==========  =================  ==============================  =============== ========================
+RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE       GENE_COUNTS_FILE                GENE_ANNOTATION SPLICE_COUNTS_DIR
+======  ======  ==========  =================  ==============================  =============== ========================
+S10R    S10G    BLOOD_AE    /path/to/S10R.BAM  
+EXT-1R          BLOOD_AE                       /path/to/externalCounts.tsv.gz  gencode34
+EXT-2R          BLOOD_AE                       /path/to/externalCounts.tsv.gz  gencode34
+EXT-3R          BLOOD_AS                                                                       /path/to/externalCountDir 
+EXT-4R          BLOOD_AS                                                                       /path/to/externalCountDir 
+======  ======  ==========  =================  ==============================  =============== ========================
 
 .. _filesdownload:
 
@@ -248,8 +290,39 @@ Download it and indicate the full path to it in the ``hpoFile`` key.
 The file is only needed in case HPO terms are specified in the sample annotation.
 Otherwise, write ``null`` in the ``hpoFile`` key.
 
+
 .. _advancedoptions:
 
+Example of RNA replicates 
+++++++++++++++++++++++++++++++++++
+
+======  ======  ==========  ===================  ==
+RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE         DNA_VCF_FILE
+======  ======  ==========  ===================  ==
+S10R_B  S10G    BLOOD       /path/to/S10R_B.BAM  /path/to/S10G.vcf.gz
+S10R_M  S10G    MUSCLE      /path/to/S10R_M.BAM  /path/to/S10G.vcf.gz
+======  ======  ==========  ===================  ==
+
+Example of DNA replicates 
+++++++++++++++++++++++++++++++++++
+
+======  ======  ==========  =================  ==
+RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE       DNA_VCF_FILE
+======  ======  ==========  =================  ==
+S20R    S20E    WES         /path/to/S20R.BAM  /path/to/S20E.vcf.gz
+S20R    S20G    WGS         /path/to/S20R.BAM  /path/to/S20G.vcf.gz
+======  ======  ==========  =================  ==
+
+Example of a multi-sample vcf file
+++++++++++++++++++++++++++++++++++
+
+======  ======  ==========  =================  ==
+RNA_ID  DNA_ID  DROP_GROUP  RNA_BAM_FILE       DNA_VCF_FILE
+======  ======  ==========  =================  ==
+S10R    S10G    WGS         /path/to/S10R.BAM  /path/to/multi_sample.vcf.gz
+S20R    S20G    WGS         /path/to/S20R.BAM  /path/to/multi_sample.vcf.gz
+======  ======  ==========  =================  ==
+
 Advanced options
 ----------------
 
@@ -284,4 +357,3 @@ In additon, DROP allows that BAM files from RNA-seq were aligned to one genome
 assembly (eg ucsc) and the corresponding VCF files from DNA sequencing to another
 genome assembly (eg ncbi). If so, the assembly of the reference genome fasta file
 must correspond to the one of the BAM file from RNA-seq.
-
diff --git a/drop/config/ExportCounts.py b/drop/config/ExportCounts.py
index 5044d753..f02e7023 100644
--- a/drop/config/ExportCounts.py
+++ b/drop/config/ExportCounts.py
@@ -27,7 +27,7 @@ def __init__(
         self.CONFIG_KEYS = ["geneAnnotations", "excludeGroups"]
         self.config_dict = self.setDefaults(dict_, genome.annotation)
         self.outputRoot = outputRoot / "exported_counts"
-        self.sa = sampleAnnotation
+        self.sampleAnnotation = sampleAnnotation
         self.genomeAssembly = genome.assembly
         self.geneAnnotations = self.get("geneAnnotations")
         self.modules = {
diff --git a/drop/config/SampleAnnotation.py b/drop/config/SampleAnnotation.py
index 2704cd89..4ea6dbe3 100644
--- a/drop/config/SampleAnnotation.py
+++ b/drop/config/SampleAnnotation.py
@@ -9,7 +9,7 @@
 
 
 class SampleAnnotation:
-    FILE_TYPES = ["RNA_BAM_FILE", "DNA_VCF_FILE", "GENE_COUNTS_FILE"]
+    FILE_TYPES = ["RNA_BAM_FILE", "DNA_VCF_FILE", "GENE_COUNTS_FILE", "SPLICE_COUNTS_DIR"]
     SAMPLE_ANNOTATION_COLUMNS = FILE_TYPES + [
         "RNA_ID", "DNA_ID", "DROP_GROUP", "GENE_ANNOTATION",
         "PAIRED_END", "COUNT_MODE", "COUNT_OVERLAPS", "STRAND", "GENOME"
@@ -32,6 +32,7 @@ def __init__(self, file, root, genome):
         self.dnaIDs = self.createGroupIds(file_type="DNA_VCF_FILE", sep=',')
         # external counts
         self.extGeneCountIDs = self.createGroupIds(file_type="GENE_COUNTS_FILE", sep=',')
+        self.extSpliceCountIDs = self.createGroupIds(file_type="SPLICE_COUNTS_DIR", sep=',')
 
     def parse(self, sep='\t'):
         """
@@ -39,24 +40,25 @@ def parse(self, sep='\t'):
         clean columns and set types
         """
         data_types = {
-            "RNA_ID": str, "DNA_ID": str, "DROP_GROUP": str, "GENE_ANNOTATION": str,
-            "PAIRED_END": bool, "COUNT_MODE": str, "COUNT_OVERLAPS": bool, "STRAND": str, "GENOME": str
+            "RNA_ID": str, "DNA_ID": str, "DROP_GROUP": str, 
+            "PAIRED_END": bool, "COUNT_MODE": str, "COUNT_OVERLAPS": bool, "STRAND": str, 
         }
+        optional_columns = {"GENE_COUNTS_FILE", "SPLICE_COUNTS_DIR", "GENE_ANNOTATION", "GENOME"}
+
         sa = pd.read_csv(self.file, sep=sep, index_col=False)
         missing_cols = [x for x in self.SAMPLE_ANNOTATION_COLUMNS if x not in sa.columns.values]
         if len(missing_cols) > 0:
-            if "GENOME" in missing_cols:
-                # deal with missing columns in data types, remove it to fix checks later
-                del data_types["GENOME"]
-                self.SAMPLE_ANNOTATION_COLUMNS.remove("GENOME")
-                missing_cols.remove("GENOME")
-
             if "GENE_ANNOTATION" in missing_cols and "ANNOTATION" in sa.columns.values:
                 logger.info(
                     "WARNING: GENE_ANNOTATION must be a column in the sample annotation table, ANNOTATION is the old column name and will be deprecated in the future\n")
                 sa["GENE_ANNOTATION"] = sa.pop("ANNOTATION")
                 missing_cols.remove("GENE_ANNOTATION")
 
+            for toDel_optional in (set(missing_cols) & optional_columns):
+                # deal with missing columns in data types, remove it to fix checks later
+                self.SAMPLE_ANNOTATION_COLUMNS.remove(toDel_optional)
+                missing_cols.remove(toDel_optional)
+
             if len(missing_cols) > 0:
                 raise ValueError(f"Incorrect columns in sample annotation file. Missing:\n{missing_cols}")
 
@@ -80,10 +82,12 @@ def createSampleFileMapping(self):
             columns: [ID | ASSAY | FILE_TYPE | FILE_PATH ]
         """
 
-        assay_mapping = {'RNA_ID': ['RNA_BAM_FILE', 'GENE_COUNTS_FILE'], 'DNA_ID': ['DNA_VCF_FILE']}
+        assay_mapping = {'RNA_ID': ['RNA_BAM_FILE', 'GENE_COUNTS_FILE', 'SPLICE_COUNTS_DIR'], 'DNA_ID': ['DNA_VCF_FILE']}
         assay_subsets = []
         for id_, file_types in assay_mapping.items():
             for file_type in file_types:
+                if file_type not in self.annotationTable.columns:
+                    continue
                 df = self.annotationTable[[id_, file_type]].dropna().drop_duplicates().copy()
                 df.rename(columns={id_: 'ID', file_type: 'FILE_PATH'}, inplace=True)
                 df['ASSAY'] = id_
@@ -150,13 +154,12 @@ def createGroupIds(self, group_key="DROP_GROUP", file_type=None, sep=','):
 
     ### Subsetting
 
-    def subsetSampleAnnotation(self, column, values, subset=None, exact_match=True):
+    def subsetSampleAnnotation(self, column, values, subset=None):
         """
         subset by one or more values of different columns from sample file mapping
             :param column: valid column in sample annotation
             :param values: values of column to subset
             :param subset: subset sample annotation
-            :param exact_match: whether to match substrings in the sample annotation, false allows substring matching
         """
         sa_cols = set(self.SAMPLE_ANNOTATION_COLUMNS)
         if subset is None:
@@ -168,10 +171,16 @@ def subsetSampleAnnotation(self, column, values, subset=None, exact_match=True):
             if not sa_cols <= set(subset.columns):  # check if mandatory cols not contained
                 raise ValueError(f"Subset columns not the same as {sa_cols}\ngot: {subset.columns}")
 
+
+        # if you don't want to subset
+        if values is None:
+            return subset
         # check if column is valid
-        if column not in sa_cols:
+        elif column not in sa_cols:
             raise KeyError(f"Column '{column}' not present in sample annotation.")
-        return utils.subsetBy(subset, column, values, exact_match=exact_match)
+        #subset column for matching values
+        else:
+            return utils.subsetBy(subset, column, values)
 
     def subsetFileMapping(self, file_type=None, sample_id=None):
         """
@@ -230,10 +239,9 @@ def getFilePaths(self, file_type, group=None):
         return self.getFilePath(sampleIDs, file_type, single_file=False)
 
     # build a dictionary from the drop group and column. like getImportCounts with skipping options and dict output
-    def getGenomes(self, value, group, file_type="RNA_ID",
-                            column="GENOME", group_key="DROP_GROUP",exact_match = True,skip = False):
+    def getGenomes(self, value, group, file_type="RNA_ID", column="GENOME", group_key="DROP_GROUP", skip = False):
         """
-        :param value: values to match in the column. Must be an exact match, passed to subsetting sample annotation 
+        :param value: values to match in the column.
         :param group: a group of the group_key (DROP_GROUP) column. 
         :return: dict file_type to column
         """
@@ -242,24 +250,30 @@ def getGenomes(self, value, group, file_type="RNA_ID",
         if skip:
             subset = None
         else:
-            subset = self.subsetSampleAnnotation(column, value,exact_match=True)
+            subset = self.subsetSampleAnnotation(column, value)
 
         # additionally subset for the group_key and the group
-        subset = self.subsetSampleAnnotation(group_key, group, subset,exact_match=exact_match)
+        subset = self.subsetSampleAnnotation(group_key, group, subset)
 
         return {sample_id: value for sample_id in subset[file_type].tolist()}
 
-    def getImportCountFiles(self, annotation, group, file_type="GENE_COUNTS_FILE",
-                            annotation_key="GENE_ANNOTATION", group_key="DROP_GROUP",exact_match = True):
+    def getImportCountFiles(self, annotation, group, file_type: str = "GENE_COUNTS_FILE",
+                            annotation_key: str = "GENE_ANNOTATION", group_key: str = "DROP_GROUP", 
+                            asSet: bool = True):
         """
-        :param annotation: annotation name as specified in config and GENE_ANNOTATION column
-        :param group: a group of the DROP_GROUP column. exact match is passed to subsetter, false allows for substring matching
+        :param annotation: annotation name as specified in config and GENE_ANNOTATION column. Can be None
+        :param group: a group of the DROP_GROUP column.
         :return: set of unique external count file names
         """
+        
         #subset for the annotation_key in the annotation group and the group_key in the group
-        subset = self.subsetSampleAnnotation(annotation_key, annotation,exact_match=exact_match)
-        subset = self.subsetSampleAnnotation(group_key, group, subset,exact_match=False)
-        return set(subset[file_type].tolist())
+        subset = self.subsetSampleAnnotation(annotation_key, annotation)
+        subset = self.subsetSampleAnnotation(group_key, group, subset)
+            
+        ans = subset[file_type].tolist()
+        if asSet:
+            ans = set(ans)
+        return ans
 
     def getRow(self, column, value):
         sa = self.annotationTable
@@ -277,15 +291,18 @@ def getGroupedIDs(self, assays):
         Get group to IDs mapping
         :param assays: list of or single assay the IDs should be from. Can be file_type or 'RNA'/'DNA'
         """
-        assays = [assays] if isinstance(assays, str) else assays
+        if isinstance(assays, str):
+            assays = [assays]
         groupedIDs = defaultdict(list)
         for assay in assays:
             if "RNA" in assay:
-                groupedIDs.update(self.rnaIDs)
+                utils.deep_merge_dict(groupedIDs, self.rnaIDs, inplace=True)
             elif "DNA" in assay:
-                groupedIDs.update(self.dnaIDs)
+                utils.deep_merge_dict(groupedIDs, self.dnaIDs, inplace=True)
             elif "GENE_COUNT" in assay:
-                groupedIDs.update(self.extGeneCountIDs)
+                utils.deep_merge_dict(groupedIDs, self.extGeneCountIDs, inplace=True)
+            elif "SPLICE_COUNT" in assay:
+                utils.deep_merge_dict(groupedIDs, self.extSpliceCountIDs, inplace=True)
             else:
                 raise ValueError(f"'{assay}' is not a valid assay name")
         return groupedIDs
diff --git a/drop/config/submodules/AberrantExpression.py b/drop/config/submodules/AberrantExpression.py
index 385c226d..cf7ad7ab 100644
--- a/drop/config/submodules/AberrantExpression.py
+++ b/drop/config/submodules/AberrantExpression.py
@@ -25,7 +25,7 @@ def __init__(self, config, sampleAnnotation, processedDataDir, processedResultsD
                 please fix to only have either external count or BAM processing\n")
 
         # check number of IDs per group
-        all_ids = {g: self.rnaIDs[g] + self.extRnaIDs[g] for g in self.groups}
+        all_ids = self.sampleAnnotation.subsetGroups(self.groups, assay=["RNA", "GENE_COUNTS"])
         self.checkSubset(all_ids)
 
     def setDefaultKeys(self, dict_):
@@ -47,6 +47,11 @@ def getCountFiles(self, annotation, group):
         :param group: DROP group name from wildcard
         :return: list of files
         """
+
+        # if sample annotation table does not contain GENE_COUNTS_FILE column. return no external counts
+        if("GENE_COUNTS_FILE" not in self.sampleAnnotation.SAMPLE_ANNOTATION_COLUMNS):
+            return []
+
         bam_IDs = self.sampleAnnotation.getIDsByGroup(group, assay="RNA")
         file_stump = self.processedDataDir / "aberrant_expression" / annotation / "counts" / "{sampleID}.Rds"
         count_files = expand(str(file_stump), sampleID=bam_IDs)
diff --git a/drop/config/submodules/AberrantSplicing.py b/drop/config/submodules/AberrantSplicing.py
index 7a59fb61..228ba775 100644
--- a/drop/config/submodules/AberrantSplicing.py
+++ b/drop/config/submodules/AberrantSplicing.py
@@ -1,3 +1,6 @@
+import numpy as np
+import pandas as pd
+
 from snakemake.io import expand
 
 from drop import utils
@@ -16,8 +19,16 @@ def __init__(self, config, sampleAnnotation, processedDataDir, processedResultsD
         # if self.run is false return without doing any config/sa checks for completeness
         if not self.run:
             return
-        self.rnaIDs = self.sampleAnnotation.subsetGroups(self.groups, assay="RNA")
-        self.checkSubset(self.rnaIDs)
+        
+        self.rnaIDs   = self.sampleAnnotation.subsetGroups(self.groups, assay="RNA")
+        self.rnaExIDs = self.sampleAnnotation.subsetGroups(self.groups, assay="SPLICE_COUNT")
+        for g in self.groups:
+            if len(set(self.rnaIDs[g]) & set(self.rnaExIDs[g])) > 0:
+                raise ValueError(f"{set(self.rnaIDs[g]) & set(self.extRnaIDs[g])} has both BAM and external count file \
+                please fix in sample annotation table to only have either external count or BAM processing\n")
+
+        all_ids = self.sampleAnnotation.subsetGroups(self.groups, assay=["RNA", "SPLICE_COUNT"])
+        self.checkSubset(all_ids)
 
     def setDefaultKeys(self, dict_):
         super().setDefaultKeys(dict_)
@@ -44,7 +55,7 @@ def getSplitCountFiles(self, dataset):
         :return: list of files
         """
         ids = self.sampleAnnotation.getIDsByGroup(dataset, assay="RNA")
-        file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / "cache" / f"raw-{dataset}" / \
+        file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / "cache" / f"raw-local-{dataset}" / \
                      "sample_tmp" / "splitCounts"
         done_files = str(file_stump / "sample_{sample_id}.done")
         return expand(done_files, sample_id=ids)
@@ -56,7 +67,30 @@ def getNonSplitCountFiles(self, dataset):
         :return: list of files
         """
         ids = self.sampleAnnotation.getIDsByGroup(dataset, assay="RNA")
-        file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / "cache" / f"raw-{dataset}" / \
+        file_stump = self.processedDataDir / "aberrant_splicing" / "datasets" / "cache" / f"raw-local-{dataset}" / \
                      "sample_tmp" / "nonSplitCounts"
         done_files = str(file_stump / "sample_{sample_id}.done")
         return expand(done_files, sample_id=ids)
+
+
+    def getExternalCounts(self, group: str, fileType: str = "k_j_counts"):
+        """
+        Get externally provided splice count data dir based on the given group.
+        If a file type is given the corresponding file within the folder is returned. 
+        :param group: DROP group name from wildcard
+        :param fileType: name of the file without extension which is to be returned
+        :return: list of directories or files
+        """
+
+        # if sample annotation table does not contain SPLICE_COUNTS_DIR column. return no external counts
+        if("SPLICE_COUNTS_DIR" not in self.sampleAnnotation.SAMPLE_ANNOTATION_COLUMNS):
+            return []
+
+        ids = self.sampleAnnotation.getIDsByGroup(group, assay="SPLICE_COUNT")
+        extCountFiles = self.sampleAnnotation.getImportCountFiles(annotation=None, group=group, 
+                file_type="SPLICE_COUNTS_DIR", asSet=False)
+        if fileType is not None:
+            extCountFiles = np.asarray(extCountFiles)[pd.isna(extCountFiles) == False].tolist()
+            extCountFiles = [x + "/" + fileType + ".tsv.gz" for x in extCountFiles]
+        return extCountFiles
+    
diff --git a/drop/config/submodules/MonoallelicExpression.py b/drop/config/submodules/MonoallelicExpression.py
index a8b45406..2f3173cd 100644
--- a/drop/config/submodules/MonoallelicExpression.py
+++ b/drop/config/submodules/MonoallelicExpression.py
@@ -51,7 +51,7 @@ def setDefaultKeys(self, dict_):
         return dict_
 
     def checkConfigSampleannotation(self):
-        subset = self.sampleAnnotation.subsetSampleAnnotation("DROP_GROUP", self.groups, exact_match=False)
+        subset = self.sampleAnnotation.subsetSampleAnnotation("DROP_GROUP", self.groups)
 
         if len(self.genomeFiles.keys()) > 1:  # more than 1 value in config defined genome dictionary
             if "GENOME" not in subset.columns.values:  # GENOME column not defined
@@ -149,16 +149,16 @@ def setGenomeDict(self, genomeFiles):
         if len(genomeFiles) == 1:  # globally defined in the config
             globalGenome = list(genomeFiles.values())[0]
 
-            # subset SA by the drop group (not exact match) and skip the filtering by SA-GENOME column
+            # subset SA by the drop group and skip the filtering by SA-GENOME column
             genomeDict = self.sampleAnnotation.getGenomes(
                 globalGenome,
                 self.groups,
                 file_type="RNA_ID",
                 column="DROP_GROUP", group_key="DROP_GROUP",
-                exact_match=False, skip=True
+                skip=True
             )
         else:
-            # subset SA by the drop group (not exact match) and filter by SA-GENOME column. Must exactly match config key
+            # subset SA by the drop group and filter by SA-GENOME column. Must exactly match config key
             for gf in genomeFiles.keys():
                 genomeDict.update(
                     self.sampleAnnotation.getGenomes(
@@ -166,7 +166,7 @@ def setGenomeDict(self, genomeFiles):
                         self.groups,
                         file_type="RNA_ID",
                         column="GENOME", group_key="DROP_GROUP",
-                        exact_match=False, skip=False
+                        skip=False
                     )
                 )
 
diff --git a/drop/demo/config_relative.yaml b/drop/demo/config_relative.yaml
index f79852d0..ea0b14b7 100755
--- a/drop/demo/config_relative.yaml
+++ b/drop/demo/config_relative.yaml
@@ -16,13 +16,14 @@ exportCounts:
     - v29
   excludeGroups:
     - mae
-    - import_exp
+    - outrider_external
+    - fraser_external
 
 aberrantExpression:
     run: true
     groups:
       - outrider
-      - import_exp
+      - outrider_external
     fpkmCutoff: 1
     implementation: autoencoder
     padjCutoff: 1
@@ -36,6 +37,7 @@ aberrantSplicing:
     run: true
     groups:
       - fraser
+      - fraser_external
     recount: true
     longRead: false
     keepNonStandardChrs: true
@@ -53,8 +55,8 @@ mae:
     groups:
       - mae
     gatkIgnoreHeaderCheck: true
-    padjCutoff: .05
-    allelicRatioCutoff: 0.8
+    padjCutoff: .5
+    allelicRatioCutoff: 0.7
     addAF: false
     maxAF: .001
     maxVarFreqCohort: 1
diff --git a/drop/demo/sample_annotation_relative.tsv b/drop/demo/sample_annotation_relative.tsv
index 0da07a27..9b720f9b 100755
--- a/drop/demo/sample_annotation_relative.tsv
+++ b/drop/demo/sample_annotation_relative.tsv
@@ -1,23 +1,25 @@
-RNA_ID	RNA_BAM_FILE	DNA_VCF_FILE	DNA_ID	DROP_GROUP	PAIRED_END	COUNT_MODE	COUNT_OVERLAPS	STRAND	HPO_TERMS	GENE_COUNTS_FILE	GENE_ANNOTATION	GENOME
+RNA_ID	RNA_BAM_FILE	DNA_VCF_FILE	DNA_ID	DROP_GROUP	PAIRED_END	COUNT_MODE	COUNT_OVERLAPS	STRAND	HPO_TERMS	GENE_COUNTS_FILE	GENE_ANNOTATION	GENOME	SPLICE_COUNTS_DIR
 HG00096.1.M_111124_6	Data/rna_bam/HG00096.1.M_111124_6_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00096	outrider,mae	TRUE	IntersectionStrict	TRUE	no	HP:0009802,HP:0010896
 HG00103.4.M_120208_3	Data/rna_bam/HG00103.4.M_120208_3_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00103	outrider,mae	TRUE	IntersectionStrict	TRUE	no	HP:0004582,HP:0031959
-HG00106.4.M_120208_5	Data/rna_bam/HG00106.4.M_120208_5_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00106	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0002895,HP:0006731
-HG00111.2.M_111215_4	Data/rna_bam/HG00111.2.M_111215_4_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00111	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0100491,HP:0100871
-HG00116.2.M_120131_1	Data/rna_bam/HG00116.2.M_120131_1_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00116	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0030613,HP:0012767
-HG00126.1.M_111124_8	Data/rna_bam/HG00126.1.M_111124_8_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00126	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0000290,HP:0000293
-HG00132.2.M_111215_4	Data/rna_bam/HG00132.2.M_111215_4_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00132	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0006489,HP:0006490
-HG00149.1.M_111124_6	Data/rna_bam/HG00149.1.M_111124_6_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00149	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0000014,HP:0000020,HP:0032663
-HG00150.4.M_120208_7	Data/rna_bam/HG00150.4.M_120208_7_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00150	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0030809,HP:0006144
-HG00176.4.M_120208_2	Data/rna_bam/HG00176.4.M_120208_2_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00176	outrider,import_exp	TRUE	IntersectionStrict	TRUE	no	HP:0005215,HP:0010234
-HG00096.1.M_111124_6_trunc	Data/rna_bam/HG00096.1.M_111124_6_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00096	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0009802,HP:0010896
-HG00103.4.M_120208_3_trunc	Data/rna_bam/HG00103.4.M_120208_3_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00103	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0004582,HP:0031959
-HG00106.4.M_120208_5_trunc	Data/rna_bam/HG00106.4.M_120208_5_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00106	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0002895,HP:0006731
-HG00111.2.M_111215_4_trunc	Data/rna_bam/HG00111.2.M_111215_4_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00111	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0100491,HP:0100871
-HG00116.2.M_120131_1_trunc	Data/rna_bam/HG00116.2.M_120131_1_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00116	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0030613,HP:0012767
+HG00106.4.M_120208_5	Data/rna_bam/HG00106.4.M_120208_5_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00106	outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0002895,HP:0006731
+HG00111.2.M_111215_4	Data/rna_bam/HG00111.2.M_111215_4_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00111	outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0100491,HP:0100871
+HG00116.2.M_120131_1	Data/rna_bam/HG00116.2.M_120131_1_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00116	outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0030613,HP:0012767
+HG00126.1.M_111124_8	Data/rna_bam/HG00126.1.M_111124_8_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00126	outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0000290,HP:0000293
+HG00132.2.M_111215_4	Data/rna_bam/HG00132.2.M_111215_4_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00132	outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0006489,HP:0006490
+HG00149.1.M_111124_6	Data/rna_bam/HG00149.1.M_111124_6_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00149	outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0000014,HP:0000020,HP:0032663
+HG00150.4.M_120208_7	Data/rna_bam/HG00150.4.M_120208_7_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00150	fraser_external,outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0030809,HP:0006144
+HG00176.4.M_120208_2	Data/rna_bam/HG00176.4.M_120208_2_chr21.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00176	fraser_external,outrider,outrider_external	TRUE	IntersectionStrict	TRUE	no	HP:0005215,HP:0010234
+HG00096.1.M_111124_6_trunc	Data/rna_bam/HG00096.1.M_111124_6_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00096	fraser,fraser_external	TRUE	IntersectionStrict	TRUE	no	HP:0009802,HP:0010896
+HG00103.4.M_120208_3_trunc	Data/rna_bam/HG00103.4.M_120208_3_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00103	fraser,fraser_external	TRUE	IntersectionStrict	TRUE	no	HP:0004582,HP:0031959
+HG00106.4.M_120208_5_trunc	Data/rna_bam/HG00106.4.M_120208_5_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00106	fraser,fraser_external	TRUE	IntersectionStrict	TRUE	no	HP:0002895,HP:0006731
+HG00111.2.M_111215_4_trunc	Data/rna_bam/HG00111.2.M_111215_4_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00111	fraser,fraser_external	TRUE	IntersectionStrict	TRUE	no	HP:0100491,HP:0100871
+HG00116.2.M_120131_1_trunc	Data/rna_bam/HG00116.2.M_120131_1_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00116	fraser,fraser_external	TRUE	IntersectionStrict	TRUE	no	HP:0030613,HP:0012767
 HG00126.1.M_111124_8_trunc	Data/rna_bam/HG00126.1.M_111124_8_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00126	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0000290,HP:0000293
 HG00132.2.M_111215_4_trunc	Data/rna_bam/HG00132.2.M_111215_4_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00132	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0006489,HP:0006490
 HG00149.1.M_111124_6_trunc	Data/rna_bam/HG00149.1.M_111124_6_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00149	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0000014,HP:0000020,HP:0032663
 HG00150.4.M_120208_7_trunc	Data/rna_bam/HG00150.4.M_120208_7_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00150	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0030809,HP:0006144
 HG00176.4.M_120208_2_trunc	Data/rna_bam/HG00176.4.M_120208_2_chr21.bam_trunc.bam	Data/dna_vcf/demo_chr21.vcf.gz	HG00176	fraser	TRUE	IntersectionStrict	TRUE	no	HP:0005215,HP:0010234
-HG00178.4.M_120208_8				import_exp						Data/external_geneCounts.tsv.gz	v29
-HG00181.4.M_120208_4				import_exp						Data/external_geneCounts.tsv.gz	v29
+HG00178.4.M_120208_8				outrider_external						Data/external_count_data/geneCounts.tsv.gz	v29
+HG00181.4.M_120208_4				fraser_external,outrider_external						Data/external_count_data/geneCounts.tsv.gz	v29		Data/external_count_data
+HG00191.3.M_120208_3				fraser_external									Data/external_count_data
+HG00201.1.M_120208_6				fraser_external									Data/external_count_data
diff --git a/drop/download_data.sh b/drop/download_data.sh
index ba2ac064..3e9ff804 100644
--- a/drop/download_data.sh
+++ b/drop/download_data.sh
@@ -2,12 +2,12 @@
 set -e
 
 # get data
-resource_url="https://www.cmm.in.tum.de/public/paper/drop_analysis/resource.tar.gz"
+resource_url="https://www.cmm.in.tum.de/public/paper/drop_analysis/resource_splice_merge.tar.gz"
 tmpdir="$(dirname "$(mktemp)")"
-wget -nc -P $tmpdir $resource_url
+wget -c -O $tmpdir/resource_exsplice.tar.gz $resource_url
 mkdir -p Data
 if [ -z "$(ls Data)" ]; then
-	tar -zxvf "$tmpdir/resource.tar.gz" -C .
+	tar -zxvf "$tmpdir/resource_exsplice.tar.gz" -C .
 	rm -rf Data
 	mv resource Data
 else
diff --git a/drop/installRPackages.R b/drop/installRPackages.R
index 33455546..cab1e63f 100644
--- a/drop/installRPackages.R
+++ b/drop/installRPackages.R
@@ -1,3 +1,5 @@
+options(timeout = 600) 
+
 options(repos=structure(c(CRAN="https://cloud.r-project.org")), warn = -1)
 
 if (!requireNamespace('BiocManager', quietly = TRUE)) {
diff --git a/drop/modules/aberrant-expression-pipeline/Counting/Summary.R b/drop/modules/aberrant-expression-pipeline/Counting/Summary.R
index c7a8666c..240e1682 100644
--- a/drop/modules/aberrant-expression-pipeline/Counting/Summary.R
+++ b/drop/modules/aberrant-expression-pipeline/Counting/Summary.R
@@ -32,19 +32,27 @@ suppressPackageStartupMessages({
 })
 
 ods <- readRDS(snakemake@input$ods)
+
+has_external <- any(as.logical(colData(ods)$isExternal))
+cnts_mtx_local <- counts(ods, normalized = F)[,!as.logical(ods@colData$isExternal)]
 cnts_mtx <- counts(ods, normalized = F)
 
-#' Number of samples: `r ncol(ods)`
+#' ## Number of samples:  
+#' Local: `r sum(!as.logical(ods@colData$isExternal))`  
+#' External: `r sum(as.logical(ods@colData$isExternal))`  
 #' 
 #' # Count Quality Control
 #' 
-#' Compare number of records vs. read counts
+#' Compare number of records vs. read counts  
+#' The `Obtained Read Count Ratio` plot does not include external counts
+#' because there are no raw reads to be counted.
 #' 
 bam_coverage <- fread(snakemake@input$bam_cov)
 bam_coverage[, sampleID := as.character(sampleID)]
 coverage_dt <- merge(bam_coverage,
                      data.table(sampleID = colnames(ods),
-                                read_count = colSums(cnts_mtx)),
+                                read_count = colSums(cnts_mtx),
+                                isExternal = ods@colData$isExternal),
                      by = "sampleID", sort = FALSE)
 # read count
 setorder(coverage_dt, read_count)
@@ -60,55 +68,81 @@ coverage_dt[, size_factors := sizeFactors(ods)]
 setorder(coverage_dt, size_factors)
 coverage_dt[, sf_rank := 1:.N]
 
-p_depth <- ggplot(coverage_dt, aes(count_rank, read_count)) +
-  geom_point() +
+p_depth <- ggplot(coverage_dt, aes(x = count_rank, y = read_count, col= isExternal )) +
+  geom_point(size = 3,show.legend = has_external) +
   theme_cowplot() +
   background_grid() +
   labs(title = "Obtained Read Counts", x="Sample Rank", y = "Reads Counted") +
-  ylim(c(0,NA))
+  ylim(c(0,NA)) +
+  scale_color_brewer(palette="Dark2")
 
-p_frac <- ggplot(coverage_dt, aes(frac_rank, counted_frac)) +
-  geom_point() +
+p_frac <- ggplot(coverage_dt, aes(x = frac_rank, y = counted_frac, col = isExternal)) +
+  geom_point(size = 3,show.legend = has_external) +
   theme_cowplot() +
   background_grid() +
   labs(title = "Obtained Read Count Ratio", x = "Sample Rank", 
        y = "Percent Reads Counted") +
-  ylim(c(0,NA))
+  ylim(c(0,NA)) +
+  scale_color_brewer(palette="Dark2")
 
 #+ QC, fig.height=6, fig.width=12
-plot_grid(p_depth, p_frac)
+plot_grid(p_depth, p_frac) 
 
-p_sf <- ggplot(coverage_dt, aes(sf_rank, size_factors)) +
-  geom_point() +
+p_sf <- ggplot(coverage_dt, aes(sf_rank, size_factors, col = isExternal)) +
+  geom_point(size = 3,show.legend = has_external) +
   ylim(c(0,NA)) +
   theme_cowplot() +
   background_grid() +
-  labs(title = 'Size Factors', x = 'Sample Rank', y = 'Size Factors')
+  labs(title = 'Size Factors', x = 'Sample Rank', y = 'Size Factors') +
+  scale_color_brewer(palette="Dark2")
 
-p_sf_cov <- ggplot(coverage_dt, aes(read_count, size_factors)) +
-  geom_point() +
+p_sf_cov <- ggplot(coverage_dt, aes(read_count, size_factors, col = isExternal)) +
+  geom_point(size = 3,show.legend = has_external) +
   ylim(c(0,NA)) +
   theme_cowplot() +
   background_grid() +
   labs(title = 'Size Factors vs. Read Counts',
-       x = 'Read Counts', y = 'Size Factors')
+       x = 'Read Counts', y = 'Size Factors') +
+  scale_color_brewer(palette="Dark2")
 
 #+ sizeFactors, fig.height=6, fig.width=12
 plot_grid(p_sf, p_sf_cov)
 
 #' # Filtering
+#' **local**: A pre-filtered summary of counts using only the local (from BAM) counts. Omitted if no external counts  
+#' **all**: A pre-filtered summary of counts using only the merged local (from BAM) and external counts  
+#' **passed FPKM**: Passes the user defined FPKM cutoff in at least 5% of genes  
+#' **min 1 read**: minimum of 1 read expressed in 5% of genes  
+#' **min 10 reads**: minimum of 10 reads expressed in 5% of genes  
+
 quant <- .95
-filter_mtx <- list(
-  all = cnts_mtx,
-  passed_FPKM = cnts_mtx[rowData(ods)$passedFilter,],
-  min_1 = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 1, ],
-  min_10 = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 10, ]
-)
-filter_dt <- lapply(names(filter_mtx), function(filter_name) {
-  mtx <- filter_mtx[[filter_name]]
-  data.table(gene_ID = rownames(mtx), median_counts = rowMeans(mtx), filter = filter_name)
-}) %>% rbindlist
-filter_dt[, filter := factor(filter, levels = c('all', 'passed_FPKM', 'min_1', 'min_10'))]
+
+if(has_external){
+    filter_mtx <- list(
+      local = cnts_mtx_local,
+      all = cnts_mtx,
+      `passed FPKM` = cnts_mtx[rowData(ods)$passedFilter,],
+      `min 1 read` = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 1, ],
+      `min 10 reads` = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 10, ]
+    )
+    filter_dt <- lapply(names(filter_mtx), function(filter_name) {
+      mtx <- filter_mtx[[filter_name]]
+      data.table(gene_ID = rownames(mtx), median_counts = rowMeans(mtx), filter = filter_name)
+    }) %>% rbindlist
+    filter_dt[, filter := factor(filter, levels = c('local', 'all', 'passed FPKM', 'min 1 read', 'min 10 reads'))]
+} else{
+    filter_mtx <- list(
+      all = cnts_mtx,
+      `passed FPKM` = cnts_mtx[rowData(ods)$passedFilter,],
+      `min 1 read` = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 1, ],
+      `min 10 reads` = cnts_mtx[rowQuantiles(cnts_mtx, probs = quant) > 10, ]
+    )
+    filter_dt <- lapply(names(filter_mtx), function(filter_name) {
+      mtx <- filter_mtx[[filter_name]]
+      data.table(gene_ID = rownames(mtx), median_counts = rowMeans(mtx), filter = filter_name)
+    }) %>% rbindlist
+    filter_dt[, filter := factor(filter, levels = c('all', 'passed FPKM', 'min 1 read', 'min 10 reads'))]
+}
 
 binwidth <- .2
 p_hist <- ggplot(filter_dt, aes(x = median_counts, fill = filter)) +
@@ -135,24 +169,24 @@ p_dens <- ggplot(filter_dt, aes(x = median_counts, col = filter)) +
 #+ meanCounts, fig.height=6, fig.width=12
 plot_grid(p_hist, p_dens)
 
+#' ### Expressed Genes
+exp_genes_cols <- c(Rank = "expressedGenesRank",`Expressed\ngenes` = "expressedGenes", 
+                    `Union of\nexpressed genes` = "unionExpressedGenes", 
+                    `Intersection of\nexpressed genes` = "intersectionExpressedGenes", 
+                    `Genes passed\nfiltering` = "passedFilterGenes", `Is External` = "isExternal")
+
+expressed_genes <- as.data.table(colData(ods)[,exp_genes_cols])
+colnames(expressed_genes) <- names(exp_genes_cols)
+
 #+ expressedGenes, fig.height=6, fig.width=8
-plotExpressedGenes(ods) +
+plotExpressedGenes(ods) + 
   theme_cowplot() +
-  background_grid(major = "y")
-
-expressed_genes <- as.data.table(colData(ods))
-expressed_genes <- expressed_genes[, .(expressedGenes, unionExpressedGenes,
-                                       intersectionExpressedGenes, passedFilterGenes,
-                                       expressedGenesRank)]
-
-#+echo=F
-rank_1 <- expressed_genes[expressedGenesRank == 1]
-#' **Rank 1:**
-#' `r as.character(rank_1$expressedGenes)` expressed genes
-#+echo=F
-rank_n <- expressed_genes[expressedGenesRank == .N]
-#' **Rank `r rank_n$expressedGenesRank`:**  
-#' `r as.character(rank_n$expressedGenes)` expressed genes  
-#' `r as.character(rank_n$unionExpressedGenes)` expressed genes (union)  
-#' `r as.character(rank_n$intersectionExpressedGenes)` expressed genes (intersection)  
-#' `r as.character(rank_n$passedFilterGenes)` genes passed the filter
+  background_grid(major = "y") +
+  geom_point(data =melt(expressed_genes,id.vars = c("Rank","Is External")),
+             aes(x = Rank, y = value, col = variable, shape = `Is External`),show.legend = has_external)
+
+if(has_external){
+    DT::datatable(expressed_genes[order(Rank)],rownames = F)
+} else{
+    DT::datatable(expressed_genes[order(Rank),-"Is External"],rownames = F)
+}
diff --git a/drop/modules/aberrant-expression-pipeline/Counting/filterCounts.R b/drop/modules/aberrant-expression-pipeline/Counting/filterCounts.R
index c1a58e70..82405251 100644
--- a/drop/modules/aberrant-expression-pipeline/Counting/filterCounts.R
+++ b/drop/modules/aberrant-expression-pipeline/Counting/filterCounts.R
@@ -36,5 +36,13 @@ ods <- filterExpression(ods, gtfFile=txdb, filter=FALSE,
 # add column for genes with at least 1 gene
 rowData(ods)$counted1sample = rowSums(assay(ods)) > 0
 
+has_external <- !(all(ods@colData$GENE_COUNTS_FILE == "") || is.null(ods@colData$GENE_COUNTS_FILE))
+if(has_external){
+    ods@colData$isExternal <- as.factor(ods@colData$GENE_COUNTS_FILE != "")
+}else{
+    ods@colData$isExternal <- as.factor(FALSE)
+}
+
+
 # Save the ods before filtering to preserve the original number of genes
 saveRDS(ods, snakemake@output$ods)
diff --git a/drop/modules/aberrant-expression-pipeline/OUTRIDER/Summary.R b/drop/modules/aberrant-expression-pipeline/OUTRIDER/Summary.R
index f18ad3dc..36fd13de 100644
--- a/drop/modules/aberrant-expression-pipeline/OUTRIDER/Summary.R
+++ b/drop/modules/aberrant-expression-pipeline/OUTRIDER/Summary.R
@@ -52,7 +52,7 @@ plotEncDimSearch(ods) +
   labs(title = dataset_title) +
   theme_cowplot() +
   background_grid() +
-  scale_color_brewer(palette = "Set1")
+  scale_color_brewer(palette="Dark2")
 
 
 #' ### Aberrantly expressed genes per sample
@@ -63,18 +63,18 @@ plotAberrantPerSample(ods, main = dataset_title,
 
 #' ### Batch correction
 #+ countCorHeatmap, fig.height=8, fig.width=8
-plotCountCorHeatmap(ods, normalized = FALSE, 
+plotCountCorHeatmap(ods, normalized = FALSE, colGroups = "isExternal", colColSet = "Dark2",
                     main = paste0('Raw Counts (', dataset_title, ')'))
-plotCountCorHeatmap(ods, normalized = TRUE, 
+plotCountCorHeatmap(ods, normalized = TRUE, ,colGroups = "isExternal", colColSet = "Dark2",
                     main = paste0('Normalized Counts (', dataset_title, ')'))
 
 
 #' ### Expression by gene per sample
-#+ geneSampleHeatmap, fig.height=12, fig.width=6
-plotCountGeneSampleHeatmap(ods, normalized = FALSE, nGenes = 50,
+#+ geneSampleHeatmap, fig.height=12, fig.width=8
+plotCountGeneSampleHeatmap(ods, normalized = FALSE, nGenes = 50, colGroups = "isExternal", colColSet = "Dark2",
                            main = paste0('Raw Counts (', dataset_title, ')'),
                            bcvQuantile = .95, show_names = 'row')
-plotCountGeneSampleHeatmap(ods, normalized = TRUE, nGenes = 50,
+plotCountGeneSampleHeatmap(ods, normalized = TRUE, nGenes = 50, colGroups = "isExternal", colColSet = "Dark2",
                            main = paste0('Normalized Counts (',dataset_title,')'),
                            bcvQuantile = .95, show_names = 'row')
 
@@ -110,13 +110,14 @@ ggplot(bcv_dt, aes(when, BCV)) +
 #' ## Results
 res <- fread(snakemake@input$results)
 
-#' Samples with at least one outlier gene: `r res[, uniqueN(sampleID)]`
-#'
+#' Samples with at least one outlier gene: `r res[, uniqueN(sampleID)]`  
+
 #' ### Aberrant samples
+#' An aberrant sample is one that has more than 0.1% of the total genes called as outliers.
 if (nrow(res) > 0) {
-  ab_table <- res[AberrantBySample > nrow(ods)/1000, .N, by = .(sampleID)] %>% unique
+  ab_table <- res[AberrantBySample > nrow(ods)/1000, .("Outlier genes" = .N), by = .(sampleID)] %>% unique
   if (nrow(ab_table) > 0) {
-    setorder(ab_table, N) 
+    setorder(ab_table, "Outlier genes") 
     DT::datatable(ab_table)
   } else {
     print("no aberrant samples")
@@ -134,8 +135,8 @@ fwrite(res, file, sep = '\t', quote = F)
 #+ echo=FALSE, results='asis'
 cat(paste0("<a href='./", basename(file), "'>Download OUTRIDER results table</a>"))
 
-res[, pValue := format(pValue, scientific = T, digits = 2)]
-res[, padjust := format(padjust, scientific = T, digits = 2)]
+res[, pValue := format(pValue, scientific = T, digits = 3)]
+res[, padjust := format(padjust, scientific = T, digits = 3)]
 
 DT::datatable(
   head(res, 1000),
diff --git a/drop/modules/aberrant-expression-pipeline/OUTRIDER/results.R b/drop/modules/aberrant-expression-pipeline/OUTRIDER/results.R
index b0da16ff..c76b252a 100644
--- a/drop/modules/aberrant-expression-pipeline/OUTRIDER/results.R
+++ b/drop/modules/aberrant-expression-pipeline/OUTRIDER/results.R
@@ -31,7 +31,8 @@ suppressPackageStartupMessages({
 })
 
 ods <- readRDS(snakemake@input$ods)
-res <- results(ods, all = TRUE)
+res <- results(ods, padjCutoff = snakemake@params$padjCutoff,
+			   zScoreCutoff = snakemake@params$zScoreCutoff, all = TRUE)
 
 # Add fold change
 res[, foldChange := round(2^l2fc, 2)]
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/00_define_datasets_from_anno.R b/drop/modules/aberrant-splicing-pipeline/Counting/00_define_datasets_from_anno.R
index 72e17db9..499e6a9a 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/00_define_datasets_from_anno.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/00_define_datasets_from_anno.R
@@ -38,9 +38,10 @@ mapping <- fread(fileMapFile)
 
 subset_ids <- snakemake@params$ids
 annoSub <- anno[RNA_ID %in% subset_ids]
-colData <- merge(
-    annoSub[,.(sampleID = RNA_ID, STRAND, PAIRED_END)],
+setnames(annoSub, "RNA_ID", "sampleID")
+colData <- merge(annoSub,
     mapping[FILE_TYPE == "RNA_BAM_FILE", .(sampleID=ID, bamFile=FILE_PATH)])
+setcolorder(colData, unique(c("sampleID", "STRAND", "PAIRED_END", "bamFile"), colnames(annoSub)))
 
 #'
 #' ## Dataset: `r name`
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/01_0_countRNA_init.R b/drop/modules/aberrant-splicing-pipeline/Counting/01_0_countRNA_init.R
index c54948ee..41e2ae5f 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/01_0_countRNA_init.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/01_0_countRNA_init.R
@@ -6,15 +6,15 @@
 #'    - snakemake: '`sm str(tmp_dir / "AS" / "{dataset}" / "01_0_init.Rds")`'
 #'  params:
 #'   - setup: '`sm cfg.AS.getWorkdir() + "/config.R"`'
-#'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets"`'
+#'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets/"`'
 #'  input:
 #'    - colData: '`sm cfg.getProcessedDataDir() + 
 #'                    "/aberrant_splicing/annotations/{dataset}.tsv"`'
 #'  output:
 #'   - fdsobj:  '`sm cfg.getProcessedDataDir() + 
-#'                   "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/fds-object.RDS"`'
+#'                   "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/fds-object.RDS"`'
 #'   - done_fds: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/cache/raw-{dataset}/fds.done" `'
+#'                "/aberrant_splicing/datasets/cache/raw-local-{dataset}/fds.done" `'
 #'  type: script
 #'---
 
@@ -31,7 +31,7 @@ col_data <- fread(colDataFile)
 
 fds <- FraserDataSet(colData = col_data,
                      workingDir = workingDir,
-                     name       = paste0("raw-", dataset))
+                     name       = paste0("raw-local-", dataset))
 
 # Add paired end and strand specificity to the fds
 pairedEnd(fds) <- colData(fds)$PAIRED_END
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/01_1_countRNA_splitReads_samplewise.R b/drop/modules/aberrant-splicing-pipeline/Counting/01_1_countRNA_splitReads_samplewise.R
index 5da348af..36eeb798 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/01_1_countRNA_splitReads_samplewise.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/01_1_countRNA_splitReads_samplewise.R
@@ -6,13 +6,13 @@
 #'    - snakemake: '`sm str(tmp_dir / "AS" / "{dataset}" / "splitReads" / "{sample_id}.Rds")`'
 #'  params:
 #'   - setup: '`sm cfg.AS.getWorkdir() + "/config.R"`'
-#'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets"`'
+#'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets/"`'
 #'  input:
 #'   - done_fds: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/cache/raw-{dataset}/fds.done"`'
+#'                "/aberrant_splicing/datasets/cache/raw-local-{dataset}/fds.done"`'
 #'  output:
 #'   - done_sample_splitCounts: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/cache/raw-{dataset}"
+#'                "/aberrant_splicing/datasets/cache/raw-local-{dataset}"
 #'                +"/sample_tmp/splitCounts/sample_{sample_id}.done"`'
 #'  threads: 3
 #'  type: script
@@ -28,7 +28,7 @@ params <- snakemake@config$aberrantSplicing
 genomeAssembly <- snakemake@config$genomeAssembly
 
 # Read FRASER object
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
 
 # Get sample id from wildcard
 sample_id <- snakemake@wildcards[["sample_id"]]
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R b/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R
index 1ea2a86e..4fb08ce7 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/01_2_countRNA_splitReads_merge.R
@@ -12,13 +12,13 @@
 #'   - sample_counts: '`sm lambda w: cfg.AS.getSplitCountFiles(w.dataset)`'
 #'  output:
 #'   - countsJ: '`sm cfg.getProcessedDataDir() +
-#'                   "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/rawCountsJ.h5"`'
+#'                          "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/rawCountsJ.h5"`'
 #'   - gRangesSplitCounts: '`sm cfg.getProcessedDataDir() + 
-#'                          "/aberrant_splicing/datasets/cache/raw-{dataset}/gRanges_splitCounts.rds"`'
+#'                          "/aberrant_splicing/datasets/cache/raw-local-{dataset}/gRanges_splitCounts.rds"`'
 #'   - gRangesNonSplitCounts: '`sm cfg.getProcessedDataDir() + 
-#'                          "/aberrant_splicing/datasets/cache/raw-{dataset}/gRanges_NonSplitCounts.rds"`'
+#'                          "/aberrant_splicing/datasets/cache/raw-local-{dataset}/gRanges_NonSplitCounts.rds"`'
 #'   - spliceSites: '`sm cfg.getProcessedDataDir() + 
-#'                   "/aberrant_splicing/datasets/cache/raw-{dataset}/spliceSites_splitCounts.rds"`'
+#'                   "/aberrant_splicing/datasets/cache/raw-local-{dataset}/spliceSites_splitCounts.rds"`'
 #'  type: script
 #'---
 
@@ -36,11 +36,11 @@ register(MulticoreParam(snakemake@threads))
 setAutoBPPARAM(MulticoreParam(snakemake@threads))
 
 # Read FRASER object
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
 
 # If samples are recounted, remove the merged ones
 splitCountsDir <- file.path(workingDir, "savedObjects", 
-                            paste0("raw-", dataset), 'splitCounts')
+                            paste0("raw-local-", dataset), 'splitCounts')
 if(params$recount == TRUE & dir.exists(splitCountsDir)){
   unlink(splitCountsDir, recursive = TRUE)
 }
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/01_3_countRNA_nonSplitReads_samplewise.R b/drop/modules/aberrant-splicing-pipeline/Counting/01_3_countRNA_nonSplitReads_samplewise.R
index c5d9e6a7..ce983be5 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/01_3_countRNA_nonSplitReads_samplewise.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/01_3_countRNA_nonSplitReads_samplewise.R
@@ -9,10 +9,10 @@
 #'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets"`'
 #'  input:
 #'   - spliceSites: '`sm cfg.getProcessedDataDir() + 
-#'                   "/aberrant_splicing/datasets/cache/raw-{dataset}/spliceSites_splitCounts.rds"`'
+#'                   "/aberrant_splicing/datasets/cache/raw-local-{dataset}/spliceSites_splitCounts.rds"`'
 #'  output:
 #'   - done_sample_nonSplitCounts : '`sm cfg.getProcessedDataDir() + 
-#'                   "/aberrant_splicing/datasets/cache/raw-{dataset}/sample_tmp/nonSplitCounts/sample_{sample_id}.done"`' 
+#'                   "/aberrant_splicing/datasets/cache/raw-local-{dataset}/sample_tmp/nonSplitCounts/sample_{sample_id}.done"`' 
 #'  threads: 3
 #'  type: script
 #'---
@@ -26,7 +26,7 @@ workingDir <- snakemake@params$workingDir
 params <- snakemake@config$aberrantSplicing
 
 # Read FRASER object
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
 
 # Get sample id from wildcard
 sample_id <- snakemake@wildcards[["sample_id"]]
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/01_4_countRNA_nonSplitReads_merge.R b/drop/modules/aberrant-splicing-pipeline/Counting/01_4_countRNA_nonSplitReads_merge.R
index 83c48939..85c330b6 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/01_4_countRNA_nonSplitReads_merge.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/01_4_countRNA_nonSplitReads_merge.R
@@ -11,10 +11,12 @@
 #'  input:
 #'   - sample_counts:  '`sm lambda w: cfg.AS.getNonSplitCountFiles(w.dataset)`'
 #'   - gRangesNonSplitCounts: '`sm cfg.getProcessedDataDir() + 
-#'                          "/aberrant_splicing/datasets/cache/raw-{dataset}/gRanges_NonSplitCounts.rds"`'
+#'                          "/aberrant_splicing/datasets/cache/raw-local-{dataset}/gRanges_NonSplitCounts.rds"`'
 #'  output:
-#'   - countsSS: '`sm cfg.getProcessedDataDir() +
-#'                   "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/rawCountsSS.h5"`'
+###   - countsSS: '`sm cfg.getProcessedDataDir() +
+###                   "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/rawCountsSS.h5"`'
+#'   - done:     '`sm cfg.getProcessedDataDir() + 
+#'                "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/merge_theta.done"`'
 #'  type: script
 #'---
 
@@ -30,14 +32,14 @@ register(MulticoreParam(snakemake@threads))
 setAutoBPPARAM(MulticoreParam(snakemake@threads))
 
 # Read FRASER object
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
 
 # Read splice site coordinates from RDS
 splitCounts_gRanges <- readRDS(snakemake@input$gRangesNonSplitCounts)
 
 # If samples are recounted, remove the merged ones
 nonSplitCountsDir <- file.path(workingDir, "savedObjects", 
-                            paste0("raw-", dataset), 'nonSplitCounts')
+                            paste0("raw-local-", dataset), 'nonSplitCounts')
 if(params$recount == TRUE & dir.exists(nonSplitCountsDir)){
   unlink(nonSplitCountsDir, recursive = TRUE)
 }
@@ -50,3 +52,5 @@ nonSplitCounts <- getNonSplitReadCountsForAllSamples(fds=fds,
                                                      longRead=params$longRead)
 
 message(date(), ":", dataset, " nonSplit counts done")
+
+file.create(snakemake@output$done)
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/01_5_countRNA_collect.R b/drop/modules/aberrant-splicing-pipeline/Counting/01_5_countRNA_collect.R
index da3d38b6..7724b478 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/01_5_countRNA_collect.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/01_5_countRNA_collect.R
@@ -8,17 +8,15 @@
 #'   - setup: '`sm cfg.AS.getWorkdir() + "/config.R"`'
 #'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets"`'
 #'  input:
-#'   - countsJ:  '`sm cfg.getProcessedDataDir() + 
-#'                    "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/rawCountsJ.h5"`'
-#'   - countsSS: '`sm cfg.getProcessedDataDir() + 
-#'                    "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/rawCountsSS.h5"`'
-#'   - gRangesSplitCounts: '`sm cfg.getProcessedDataDir() + 
-#'                          "/aberrant_splicing/datasets/cache/raw-{dataset}/gRanges_splitCounts.rds"`'
-#'   - spliceSites: '`sm cfg.getProcessedDataDir() + 
-#'                   "/aberrant_splicing/datasets/cache/raw-{dataset}/spliceSites_splitCounts.rds"`'
+#'    - countsSSdone: '`sm cfg.getProcessedDataDir() + 
+#'                          "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/merge_theta.done"`'
+#'    - gRangesSplitCounts: '`sm cfg.getProcessedDataDir() + 
+#'                          "/aberrant_splicing/datasets/cache/raw-local-{dataset}/gRanges_splitCounts.rds"`'
+#'    - spliceSites: '`sm cfg.getProcessedDataDir() + 
+#'                          "/aberrant_splicing/datasets/cache/raw-local-{dataset}/spliceSites_splitCounts.rds"`'
 #'  output:
 #'   - counting_done: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/counting.done" `'
+#'                          "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/counting.done" `'
 #'  type: script
 #'---
 
@@ -27,15 +25,15 @@ source(snakemake@params$setup, echo=FALSE)
 
 dataset    <- snakemake@wildcards$dataset
 workingDir <- snakemake@params$workingDir
-
+saveDir    <- dirname(snakemake@input$countsSSdone)
 
 # Read FRASER object
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
 splitCounts_gRanges <- readRDS(snakemake@input$gRangesSplitCounts)
 spliceSiteCoords <- readRDS(snakemake@input$spliceSites)
 
 # Get splitReads and nonSplitRead counts in order to store them in FRASER object
-splitCounts_h5 <- HDF5Array::HDF5Array(snakemake@input$countsJ, "rawCountsJ")
+splitCounts_h5 <- HDF5Array::HDF5Array(file.path(saveDir, "rawCountsJ.h5"), "rawCountsJ")
 splitCounts_se <- SummarizedExperiment(
   colData = colData(fds),
   rowRanges = splitCounts_gRanges,
@@ -43,7 +41,7 @@ splitCounts_se <- SummarizedExperiment(
 )
 
 
-nonSplitCounts_h5 <- HDF5Array::HDF5Array(snakemake@input$countsSS, "rawCountsSS")
+nonSplitCounts_h5 <- HDF5Array::HDF5Array(file.path(saveDir, "rawCountsSS.h5"), "rawCountsSS")
 nonSplitCounts_se <- SummarizedExperiment(
   colData = colData(fds),
   rowRanges = spliceSiteCoords,
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/02_psi_value_calculation_FraseR.R b/drop/modules/aberrant-splicing-pipeline/Counting/02_psi_value_calculation_FraseR.R
index 24e5cb9b..3f35f6aa 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/02_psi_value_calculation_FraseR.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/02_psi_value_calculation_FraseR.R
@@ -10,10 +10,10 @@
 #'  threads: 30
 #'  input:
 #'   - counting_done: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/counting.done" `'
+#'                "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/counting.done" `'
 #'  output:
 #'  - theta:     '`sm cfg.getProcessedDataDir() +
-#'                    "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/theta.h5"`'
+#'                    "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/theta.h5"`'
 #'  type: script
 #'--- 
 
@@ -28,7 +28,7 @@ register(MulticoreParam(snakemake@threads))
 # Limit number of threads for DelayedArray operations
 setAutoBPPARAM(MulticoreParam(snakemake@threads))
 
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
 
 # Calculating PSI values
 fds <- calculatePSIValues(fds)
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/03_filter_expression_FraseR.R b/drop/modules/aberrant-splicing-pipeline/Counting/03_filter_expression_FraseR.R
index e42522fc..a9355b6b 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/03_filter_expression_FraseR.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/03_filter_expression_FraseR.R
@@ -7,14 +7,16 @@
 #'  params:
 #'   - setup: '`sm cfg.AS.getWorkdir() + "/config.R"`'
 #'   - workingDir: '`sm cfg.getProcessedDataDir() + "/aberrant_splicing/datasets/"`'
+#'   - exCountIDs: '`sm lambda w: sa.getIDsByGroup(w.dataset, assay="SPLICE_COUNT")`'
 #'  input:
-#'   - theta:  '`sm cfg.getProcessedDataDir()+
-#'                  "/aberrant_splicing/datasets/savedObjects/raw-{dataset}/theta.h5"`'
+#'   - theta:  '`sm cfg.getProcessedDataDir() +
+#'                  "/aberrant_splicing/datasets/savedObjects/raw-local-{dataset}/theta.h5"`'
+#'   - exCounts: '`sm lambda w: cfg.AS.getExternalCounts(w.dataset, "k_j_counts")`'
 #'  output:
 #'   - fds: '`sm cfg.getProcessedDataDir() +
-#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/fds-object.RDS"`'
-#'   - done: '`sm cfg.getProcessedDataDir() + 
-#'                "/aberrant_splicing/datasets/savedObjects/{dataset}/filter.done" `'
+#'                  "/aberrant_splicing/datasets/savedObjects/{dataset}/fds-object.RDS"`'
+#'   - done: '`sm cfg.getProcessedDataDir() +
+#'                  "/aberrant_splicing/datasets/savedObjects/{dataset}/filter.done" `'
 #'  threads: 3
 #'  type: script
 #'---
@@ -27,23 +29,56 @@ opts_chunk$set(fig.width=12, fig.height=8)
 # input
 dataset    <- snakemake@wildcards$dataset
 workingDir <- snakemake@params$workingDir
-params <- snakemake@config$aberrantSplicing
+params     <- snakemake@config$aberrantSplicing
+exCountIDs <- snakemake@params$exCountIDs
+exCountFiles <- snakemake@input$exCounts
+sample_anno_file <- snakemake@config$sampleAnnotation
+minExpressionInOneSample <- params$minExpressionInOneSample
+minDeltaPsi <- params$minDeltaPsi
 
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
 
 register(MulticoreParam(snakemake@threads))
 # Limit number of threads for DelayedArray operations
 setAutoBPPARAM(MulticoreParam(snakemake@threads))
 
-# Apply filter
-minExpressionInOneSample <- params$minExpressionInOneSample
-minDeltaPsi <- params$minDeltaPsi
+# Add external data if provided by dataset
+if(length(exCountIDs) > 0){
+    message("create new merged fraser object")
+    fds <- saveFraserDataSet(fds,dir = workingDir, name=paste0("raw-", dataset))
 
+    for(resource in unique(exCountFiles)){
+        exSampleIDs <- exCountIDs[exCountFiles == resource]
+        exAnno <- fread(sample_anno_file, key="RNA_ID")[J(exSampleIDs)]
+        setnames(exAnno, "RNA_ID", "sampleID")
+        
+        ctsNames <- c("k_j", "k_theta", "n_psi3", "n_psi5", "n_theta")
+        ctsFiles <- paste0(dirname(resource), "/", ctsNames, "_counts.tsv.gz")
+        
+        # Merging external counts restricts the junctions to those that 
+        # are only present in both the counted (fromBam) junctions AND the 
+        # junctions from the external counts.
+        fds <- mergeExternalData(fds=fds, countFiles=ctsFiles,
+                sampleIDs=exSampleIDs, annotation=exAnno)
+        fds@colData$isExternal <- as.factor(!is.na(fds@colData$SPLICE_COUNTS_DIR))
+    }
+} else {
+    message("symLink fraser dir")
+    file.symlink(paste0(workingDir, "savedObjects/","raw-local-", dataset),
+                 paste0(workingDir, "savedObjects/","raw-", dataset))
+    
+    fds@colData$isExternal <- as.factor(FALSE)
+    workingDir(fds) <- workingDir
+    name(fds) <- paste0("raw-", dataset)
+}
+
+# filter for expression and write it out to disc.
 fds <- filterExpressionAndVariability(fds, 
-                        minExpressionInOneSample = minExpressionInOneSample,
-                        minDeltaPsi = minDeltaPsi,
-                        filter=FALSE)
-devNull <- saveFraserDataSet(fds)
+        minExpressionInOneSample = minExpressionInOneSample,
+        minDeltaPsi = minDeltaPsi,
+        filter=FALSE)
+
+devNull <- saveFraserDataSet(fds,dir = workingDir)
 
 # Keep junctions that pass filter
 name(fds) <- dataset
@@ -55,5 +90,5 @@ if (params$filter == TRUE) {
 
 seqlevels(fds) <- seqlevelsInUse(fds)
 colData(fds)$sampleID <- as.character(colData(fds)$sampleID)
-fds <- saveFraserDataSet(fds)
+fds <- saveFraserDataSet(fds,dir = workingDir)
 file.create(snakemake@output$done)
diff --git a/drop/modules/aberrant-splicing-pipeline/Counting/Summary.R b/drop/modules/aberrant-splicing-pipeline/Counting/Summary.R
index e2c61d86..b5d19e34 100644
--- a/drop/modules/aberrant-splicing-pipeline/Counting/Summary.R
+++ b/drop/modules/aberrant-splicing-pipeline/Counting/Summary.R
@@ -28,22 +28,70 @@ suppressPackageStartupMessages({
 dataset    <- snakemake@wildcards$dataset
 workingDir <- snakemake@params$workingDir
 
-fds <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
+fdsLocal <- loadFraserDataSet(dir=workingDir, name=paste0("raw-local-", dataset))
+fdsMerge <- loadFraserDataSet(dir=workingDir, name=paste0("raw-", dataset))
 
-#' Number of samples: `r nrow(colData(fds))`
+has_external <- !(all(is.na(fdsMerge@colData$SPLICE_COUNTS_DIR)) || is.null(fdsMerge@colData$SPLICE_COUNTS_DIR))
+if(has_external){
+    fdsMerge@colData$isExternal <- as.factor(!is.na(fdsMerge@colData$SPLICE_COUNTS_DIR))
+}else{
+    fdsMerge@colData$isExternal <- as.factor(FALSE)
+}
+devNull <- saveFraserDataSet(fdsMerge,dir=workingDir, name=paste0("raw-", dataset))
+
+
+#' ## Number of samples:   
+#' Local: `r sum(!as.logical(fdsMerge@colData$isExternal))`  
+#' External: `r sum(as.logical(fdsMerge@colData$isExternal))`  
+#' 
+#' **Using external counts**  
+#' External counts introduce some complexity into the problem of counting junctions
+#' because it is unknown whether or not a junction is not counted (because there are no reads)
+#' compared to filtered and not present due to legal/personal sharing reasons. As a result,
+#' after merging the local (counted from BAM files) counts and the external counts, only the junctions that are 
+#' present in both remain. As a result it is likely that the number of junctions will decrease after merging.
+#' 
 #' 
-#' Number of introns (psi5 or psi3): `r length(rowRanges(fds, type = "psi5"))`
+#' ### Number of introns (psi5 or psi3) before and after merging:  
+#' Local: `r length(rowRanges(fdsLocal, type = "psi5"))`  
+#' Merged: `r length(rowRanges(fdsMerge, type = "psi5"))`  
 #' 
-#' Number of splice sites (theta): `r length(rowRanges(fds, type = "theta"))`
+#' ### Number of splice sites (theta) before and after merging: 
+#' Local: `r length(rowRanges(fdsLocal, type = "theta"))`  
+#' Merged: `r length(rowRanges(fdsMerge, type = "theta"))`  
 #' 
-#' Introns that passed filter
-table(mcols(fds, type="j")[,"passed"])
+
+#' ### Comparison of local and external counts  
+if(has_external){
+    externalCountIDs <- colData(fdsMerge)[as.logical(colData(fdsMerge)[,"isExternal"]),"sampleID"]
+    localCountIDs <- colData(fdsMerge)[!as.logical(colData(fdsMerge)[,"isExternal"]),"sampleID"]
+
+    cts <- K(fdsMerge,"psi5")
+    ctsLocal<- cts[,localCountIDs]
+    ctsExt<- cts[,externalCountIDs]
+
+    rowMeanLocal <- rowMeans(ctsLocal)
+    rowMeanExt <- rowMeans(ctsExt)
+
+    dt <- data.table("Mean counts of local samples" = rowMeanLocal,
+                     "Mean counts of external samples" = rowMeanExt)
+                 
+    ggplot(dt,aes(x = `Mean counts of local samples`, y= `Mean counts of external samples`)) +
+       geom_hex() + theme_cowplot(font_size = 16) +
+	   theme_bw() + scale_x_log10() + scale_y_log10() + 
+       geom_abline(slope = 1, intercept =0) +
+       scale_color_brewer(palette="Dark2") 
+}else{
+	print("No external counts, comparison is ommitted")
+}
 
 #' ## Expression filtering
 #' Min expression cutoff: `r snakemake@config$aberrantSplicing$minExpressionInOneSample`
-plotFilterExpression(fds) + theme_cowplot(font_size = 16)
+plotFilterExpression(fdsMerge) + theme_cowplot(font_size = 16)
 
 #' ## Variability filtering
 #' Variability cutoff: `r snakemake@config$aberrantSplicing$minDeltaPsi`
-plotFilterVariability(fds) + theme_cowplot(font_size = 16)
+plotFilterVariability(fdsMerge) + theme_cowplot(font_size = 16)
 
+#' Introns that passed filter (after merging)
+table(mcols(fdsMerge, type="j")[,"passed"])
diff --git a/drop/modules/aberrant-splicing-pipeline/FRASER/Summary.R b/drop/modules/aberrant-splicing-pipeline/FRASER/Summary.R
index a24168a5..91343626 100644
--- a/drop/modules/aberrant-splicing-pipeline/FRASER/Summary.R
+++ b/drop/modules/aberrant-splicing-pipeline/FRASER/Summary.R
@@ -24,13 +24,19 @@ source(snakemake@params$setup, echo=FALSE)
 
 suppressPackageStartupMessages({
   library(cowplot)
+  library("RColorBrewer")
 })
 
 #+ input
 dataset    <- snakemake@wildcards$dataset
 annotation <- snakemake@wildcards$annotation
+padj_cutoff <- snakemake@config$aberrantSplicing$padjCutoff
+zScore_cutoff <- snakemake@config$aberrantSplicing$zScoreCutoff
+deltaPsi_cutoff <- snakemake@config$aberrantSplicing$deltaPsiCutoff
+
 
 fds <- loadFraserDataSet(file=snakemake@input$fdsin)
+hasExternal <- length(levels(colData(fds)$isExternal) > 1)
 
 #' Number of samples: `r nrow(colData(fds))`
 #' 
@@ -53,44 +59,50 @@ for(type in psiTypes){
 }
 
 #' ## Aberrantly spliced genes per sample
-plotAberrantPerSample(fds, aggregate=TRUE, main=dataset_title) + 
+plotAberrantPerSample(fds, padjCutoff = padj_cutoff, zScoreCutoff = zScore_cutoff, deltaPsiCutoff = deltaPsi_cutoff,
+                      aggregate=TRUE, main=dataset_title) + 
   theme_cowplot(font_size = 16) +
   theme(legend.position = "top")
 
-#' ## Batch Correlation: Samples x samples
+#' ## Batch Correlation: samples x samples
 topN <- 30000
 topJ <- 10000
+anno_color_scheme <- brewer.pal(n = 3, name = 'Dark2')[1:2]
 for(type in psiTypes){
   before <- plotCountCorHeatmap(
-    fds,
+    object=fds,
     type = type,
     logit = TRUE,
     topN = topN,
     topJ = topJ,
     plotType = "sampleCorrelation",
     normalized = FALSE,
-    annotation_col = NA,
+    annotation_col = "isExternal",
     annotation_row = NA,
     sampleCluster = NA,
+    minDeltaPsi = snakemake@config$aberrantSplicing$minDeltaPsi,
     plotMeanPsi=FALSE,
     plotCov = FALSE,
-    annotation_legend = TRUE
+    annotation_legend = TRUE,
+	annotation_colors = list(isExternal = c("FALSE" = anno_color_scheme[1],"TRUE" =  anno_color_scheme[2]))
   )
   before
   after <- plotCountCorHeatmap(
-    fds,
+    object=fds,
     type = type,
     logit = TRUE,
     topN = topN,
     topJ = topJ,
     plotType = "sampleCorrelation",
     normalized = TRUE,
-    annotation_col = NA,
+    annotation_col = "isExternal",
     annotation_row = NA,
     sampleCluster = NA,
+    minDeltaPsi = snakemake@config$aberrantSplicing$minDeltaPsi,
     plotMeanPsi=FALSE,
     plotCov = FALSE,
-    annotation_legend = TRUE
+    annotation_legend = TRUE,
+	annotation_colors = list(isExternal = c("FALSE" = anno_color_scheme[1],"TRUE" =  anno_color_scheme[2]))
   )
   after
 }
diff --git a/drop/modules/mae-pipeline/MAE/Results.R b/drop/modules/mae-pipeline/MAE/Results.R
index d120388d..d11aa255 100644
--- a/drop/modules/mae-pipeline/MAE/Results.R
+++ b/drop/modules/mae-pipeline/MAE/Results.R
@@ -36,6 +36,7 @@ suppressPackageStartupMessages({
   library(GenomicRanges)
   library(SummarizedExperiment)
   library(R.utils)
+  library(dplyr)
 })
 
 # Read all MAE results files
@@ -119,14 +120,16 @@ fwrite(res[MAE_ALT == TRUE & rare == TRUE], snakemake@output$res_signif_rare,
 
 # Add columns for plot
 res[, N := .N, by = ID]
-res[MAE == TRUE, N_MAE := .N, by = ID]
-res[MAE == TRUE & MAE_ALT == FALSE, N_MAE_REF := .N, by = ID]
-res[MAE_ALT == TRUE, N_MAE_ALT := .N, by = ID]
-res[MAE == TRUE & MAE_ALT == FALSE & rare == TRUE, N_MAE_REF_RARE := .N, by = ID]
-res[MAE_ALT == TRUE & rare == TRUE, N_MAE_ALT_RARE := .N, by = ID]
-
-rd <- unique(res[,.(ID, N, N_MAE, N_MAE_REF, N_MAE_ALT, N_MAE_REF_RARE, N_MAE_ALT_RARE)])
-melt_dt <- melt(rd, id.vars = 'ID')
+plot_res <- res[,.(N = .N,
+             N_MAE = sum(MAE==T),
+             N_MAE_REF=sum(MAE==T & MAE_ALT == F),
+             N_MAE_ALT=sum(MAE_ALT == T),
+             N_MAE_REF_RARE = sum(MAE ==T & MAE_ALT==F & rare == T),
+             N_MAE_ALT_RARE = sum(MAE_ALT ==T & rare ==T)
+			 ),by = ID]
+
+
+melt_dt <- melt(plot_res, id.vars = 'ID')
 melt_dt[variable == 'N', variable := '>10 counts']
 melt_dt[variable == 'N_MAE', variable := '+MAE']
 melt_dt[variable == 'N_MAE_REF', variable := '+MAE for\nREF']
@@ -136,20 +139,36 @@ melt_dt[variable == 'N_MAE_ALT_RARE', variable := '+MAE for ALT\n& rare']
 
 #' 
 #' ## Cascade plot 
+#' a cascade plot that shows a progression of added filters  
+#'   - >10 counts: only variants supported by more than 10 counts
+#'   - +MAE: and shows mono allelic expression
+#'   - +MAE for REF : the monoallelic expression favors the reference allele
+#'   - +MAE for ALT : the monoallelic expression favors the alternative allele
+#'   - rare: 
+#'     - if `add_AF` is set to true in config file must meet minimum AF set by the config value `max_AF`
+#'     - must meet the inner-cohort frequency `maxVarFreqCohort` cutoff
+
 ggplot(melt_dt, aes(variable, value)) + geom_boxplot() +
-  scale_y_log10() + theme_bw(base_size = 14) +
-  labs(y = 'Heterozygous SNVs per patient', x = '') +
+  scale_y_log10(limits = c(1,NA)) + theme_bw(base_size = 14) +
+  labs(y = 'Heterozygous SNVs per patient', x = '') + 
     annotation_logticks(sides = "l")
 
 #'
-#' ## Variant Frequency within Cohort Histogram
+#' ## Variant Frequency within Cohort
 ggplot(unique(res[,cohort_freq,by =.(gene_name, contig, position)]),aes(x = cohort_freq)) + geom_histogram( binwidth = 0.02)  +
-  geom_vline(xintercept = maxCohortFreq, col = "red") +
-  xlab("Variant frequency in cohort") + ylab("Count")
+  geom_vline(xintercept = maxCohortFreq, col = "red",linetype="dashed") + theme_bw(base_size = 14) +
+  xlim(0,NA) + xlab("Variant frequency in cohort") + ylab("Variants")
 
 #' Median of each category
 DT::datatable(melt_dt[, .(median = median(value, na.rm = T)), by = variable])
 
+
+# round numbers
+if(nrow(res) > 0){
+  res[, pvalue := signif(pvalue, 3)]
+  res[, padj := signif(padj, 3)]
+  res[, log2FC := signif(log2FC, 3)]
+}
 #' 
 #' ## MAE Results table
 DT::datatable(
diff --git a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R
index 25543292..eba0bd9b 100644
--- a/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R
+++ b/drop/modules/mae-pipeline/QC/DNA_RNA_matrix_plot.R
@@ -32,8 +32,6 @@ identityCutoff <- .85
 
 ggplot(melt_mat, aes(value)) + geom_histogram(fill = 'cadetblue4', binwidth = 0.05, center = .025) + 
   theme_bw(base_size = 14) + 
-  labs(x = 'Proportion of matching DNA-RNA variants', y = 'DNA-RNA combinations') + 
-  scale_y_log10() + annotation_logticks(sides = "l") + 
   expand_limits(x=c(0,1)) +
   geom_vline(xintercept=identityCutoff, linetype='dashed', color = 'firebrick')
 
@@ -42,7 +40,7 @@ ggplot(melt_mat, aes(value)) + geom_histogram(fill = 'cadetblue4', binwidth = 0.
 
 #' Number of samples: `r nrow(qc_mat)`
 #' 
-#' Number of samples that match with another: `r length(qc_mat[qc_mat > identityCutoff])`
+#' Number of samples that match RNA and DNA: `r length(qc_mat[qc_mat > identityCutoff])`
 #'
 #' Median of proportion of matching variants in matching samples: `r round(median(qc_mat[qc_mat > identityCutoff]), 2)`
 #'
diff --git a/drop/requirementsR.txt b/drop/requirementsR.txt
index 3a896286..65e4152f 100644
--- a/drop/requirementsR.txt
+++ b/drop/requirementsR.txt
@@ -1,7 +1,7 @@
 package	version
 devtools
 gagneurlab/OUTRIDER	1.6.1
-c-mertes/FRASER	1.2.1
+c-mertes/FRASER	1.6.1
 gagneurlab/tMAE	1.0.4
 VariantAnnotation	
 rmarkdown	
diff --git a/drop/template/Scripts/AberrantExpression/Overview.R b/drop/template/Scripts/AberrantExpression/Overview.R
index a3c6f167..2b25b548 100644
--- a/drop/template/Scripts/AberrantExpression/Overview.R
+++ b/drop/template/Scripts/AberrantExpression/Overview.R
@@ -90,10 +90,14 @@ sample <- res[1, sampleID]
 #' ### Volcano plot
 #' setting basePlot = FALSE creates an interactive plot
 #' that allows finding the gene(s) of interest
-OUTRIDER::plotVolcano(ods, sample, basePlot = TRUE)
+OUTRIDER::plotVolcano(ods, sample, basePlot = TRUE,
+                      zScoreCutoff = snakemake@config$aberrantExpression$zScoreCutoff,
+                      padjCutoff = snakemake@config$aberrantExpression$padjCutoff)
 
 #' ### Gene expression plot (normalized counts)
-OUTRIDER::plotExpressionRank(ods, gene, basePlot = TRUE)
+OUTRIDER::plotExpressionRank(ods, gene, basePlot = TRUE,
+                      zScoreCutoff = snakemake@config$aberrantExpression$zScoreCutoff,
+                      padjCutoff = snakemake@config$aberrantExpression$padjCutoff)
 
 #' ### Expected vs observed counts
 OUTRIDER::plotExpectedVsObservedCounts(ods, gene, basePlot = TRUE)
diff --git a/drop/template/Scripts/AberrantSplicing/Overview.R b/drop/template/Scripts/AberrantSplicing/Overview.R
index 6bcff37b..f00a7913 100644
--- a/drop/template/Scripts/AberrantSplicing/Overview.R
+++ b/drop/template/Scripts/AberrantSplicing/Overview.R
@@ -87,7 +87,9 @@ siteIndex <- 4
 
 #' ### Volcano plot
 # set basePlot to FALSE to create an interactive plot
-FRASER::plotVolcano(fds, sample, type = 'psi3', basePlot = TRUE)
+FRASER::plotVolcano(fds, sample, type = 'psi3', basePlot = TRUE,
+                    deltaPsiCutoff = snakemake@config$aberrantSplicing$deltaPsiCutoff,
+                    padjCutoff = snakemake@config$aberrantSplicing$padjCutoff)
 
 #' ### Expression plot
 FRASER::plotExpression(fds, type = 'psi3', site = siteIndex, basePlot = TRUE)
diff --git a/drop/template/Scripts/MonoallelicExpression/Overview.R b/drop/template/Scripts/MonoallelicExpression/Overview.R
index 5720c6a8..29ee9212 100644
--- a/drop/template/Scripts/MonoallelicExpression/Overview.R
+++ b/drop/template/Scripts/MonoallelicExpression/Overview.R
@@ -9,6 +9,7 @@
 #'    - datasets: '`sm cfg.MAE.groups`'
 #'    - qc_groups: '`sm cfg.MAE.qcGroups`'
 #'    - htmlDir: '`sm config["htmlOutputPath"] + "/MonoallelicExpression"`'
+#'    - resultsDir: '`sm cfg.getProcessedResultsDir() + "/mae"`'
 #'  input:
 #'    - functions: '`sm cfg.workDir / "Scripts/html_functions.R"`'
 #'    - allelic_counts: '`sm expand(cfg.getProcessedDataDir() +
@@ -24,7 +25,7 @@
 #'                  "dna_rna_qc_matrix.Rds", qc_group=cfg.MAE.qcGroups)`'
 #' output:
 #'   html_document:
-#'    code_folding: hide
+#'    code_folding: show
 #'    code_download: TRUE
 #'---
 
@@ -37,6 +38,7 @@ source(snakemake@input$functions)
 datasets <- sort(snakemake@params$datasets)
 annotations <- snakemake@params$annotations
 htmlDir <- snakemake@params$htmlDir
+resultsDir <- snakemake@params$resultsDir
 
 results_links <- sapply(
   annotations, function(v) build_link_list(
@@ -45,6 +47,13 @@ results_links <- sapply(
   )
 )
 
+table_links <- sapply(
+  annotations, function(v) build_link_list(
+    file_paths = file.path(resultsDir, paste0(datasets, '/MAE_results_', v, '.tsv')),
+    captions = paste0(datasets)
+  )
+)
+
 #'
 #' **Datasets:** `r paste(datasets, collapse = ', ')`
 #'
@@ -55,35 +64,17 @@ results_links <- sapply(
 #'
 #' ## Files
 #' * [Allelic counts](`r file.path(snakemake@config$root, 'processed_data/mae/allelic_counts/')`)
-#' * [Results tables of each sample](`r file.path(snakemake@config$root, 'processed_results/mae/samples/')`)
-#' * [Aggregated results tables of each group](`r paste('* ', snakemake@input$results_tables, collapse = '\n')`)
-#'
-#' ## Analyze Individual Results
-# Read the first results table
-res_sample <- readRDS(snakemake@input$results_obj[[1]])
+#' * [Results data tables of each sample (.Rds)](`r file.path(snakemake@config$root, 'processed_results/mae/samples/')`)  
 
-#+echo=F
-library(tMAE)
-
-if(is.null(res_sample$rare)){
-  g1 <- plotMA4MAE(res_sample)
-  g2 <- plotAllelicCounts(res_sample)
-} else {
-  g1 <- plotMA4MAE(res_sample, rare_column = 'rare')
-  g2 <- plotAllelicCounts(res_sample, rare_column = 'rare')
-}
-
-#' ### MA plot: fold change vs RNA coverage
-g1
+#'
+#' `r display_text(caption = 'Significant MAE results tables ', links = table_links)`
 
-#' ### Alternative vs Reference plot
-g2
 
 #' ## Quality Control: VCF-BAM Matching
 #+ eval=TRUE, echo=FALSE
 qc_groups <- sort(snakemake@params$qc_groups)
 qc_links <- build_link_list(
-    file_paths = file.path(htmlDir, paste0('QC', qc_groups, '.html')),
+    file_paths = file.path(htmlDir, paste0('QC/', qc_groups, '.html')),
     captions = qc_groups
 )
 
@@ -95,3 +86,36 @@ qc_matrix_links <- build_link_list(
 #' `r display_text(caption = 'QC Overview ', links = qc_links)`
 #' `r display_text(caption = 'DNA-RNA matrix ', links = qc_matrix_links)`
 #'
+
+#+ eval=TRUE, echo=TRUE
+#' ## Analyze Individual Results
+# Read the first results table
+res_sample <- readRDS(snakemake@input$results_obj[[1]])
+print(unique(res_sample$ID))
+
+#+echo=F
+library(tMAE)
+
+if(is.na(res_sample$rare)){
+  g1 <- plotMA4MAE(res_sample,
+                   padjCutoff = snakemake@config$mae$padjCutoff,
+                   allelicRatioCutoff = snakemake@config$mae$allelicRatioCutoff )
+  g2 <- plotAllelicCounts(res_sample,
+                   padjCutoff = snakemake@config$mae$padjCutoff,
+                   allelicRatioCutoff = snakemake@config$mae$allelicRatioCutoff )
+} else {
+  g1 <- plotMA4MAE(res_sample, rare_column = 'rare',
+                   padjCutoff = snakemake@config$mae$padjCutoff,
+                   allelicRatioCutoff = snakemake@config$mae$allelicRatioCutoff )
+  g2 <- plotAllelicCounts(res_sample, rare_column = 'rare',
+                   padjCutoff = snakemake@config$mae$padjCutoff,
+                   allelicRatioCutoff = snakemake@config$mae$allelicRatioCutoff )
+}
+
+#' ### MA plot: fold change vs RNA coverage
+#+echo=F
+g1
+
+#' ### Alternative vs Reference plot
+#+echo=F
+g2
diff --git a/drop/utils.py b/drop/utils.py
index b325f38f..fae88237 100644
--- a/drop/utils.py
+++ b/drop/utils.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 from snakemake.logging import logger
 import wbuild
+import copy
 
 
 def returnPath(path, str_=True):
@@ -65,22 +66,45 @@ def getWBuildSnakefile(str_=True):
     return returnPath(wb_path / "wBuild.snakefile", str_=str_)
 
 
-def subsetBy(df, column, values, exact_match=True):
+def subsetBy(df, column, values):
     """
     Subset by one or more values of different columns from data frame
     :param df: data frame
     :param column: column to subset by
     :param values: values to subset by
-    :param exact_match: default True. when False match substrings. Important for subsetting drop groups
     :return: df subset by values and column
     """
     if values is None:
         return df
-    elif isinstance(values, str) and exact_match :
-        return df[df[column] == values]
-    elif not isinstance(values,str) and exact_match:
-        return df[df[column].isin(values)]
-    elif isinstance(values,str) and not exact_match:
-        return df[df[column].str.contains(values)]
-    else:
-        return df[df[column].str.contains("|".join(values))]
+    
+    inner_regex = values
+    if not isinstance(values, str) :
+        inner_regex = "(" + "|".join(values) + ")"
+    
+    return  df[df[column].str.contains("(?:^|,)" + inner_regex + "(?:,|$)", na = False)]
+    
+def deep_merge_dict(dict1: dict, dict2: dict, inplace: bool = False):
+    """
+    Merges two dictionaries and all is children recursively
+    
+    :param dict1: dictionary to be merged into
+    :param dict2: dictionary to be merged
+    :param inplace: if False, default, a new dictionary will be returned als in-place merging is performed.
+    """
+    if not inplace:
+        dict1 = copy.deepcopy(dict1)
+        dict2 = copy.deepcopy(dict2)
+    
+    for k, v in dict2.items():
+        if isinstance(dict1.get(k), dict) and isinstance(v, dict):
+            dict1[k] = deep_merge_dict(dict1[k], v, inplace=inplace)
+        elif k not in dict1:
+            dict1[k] = v
+        elif isinstance(dict1.get(k), list) and isinstance(v, list):
+            dict1[k] = list(dict.fromkeys(dict1[k] + v))
+        elif isinstance(dict1.get(k), str) and isinstance(v, str):
+            dict1[k] = [dict1.get(k), v]
+        else:
+            raise TypeError(f"{k} has different types that can not be merged.")
+        
+    return dict1
diff --git a/environment.yml b/environment.yml
index ec905be2..53be3e1c 100644
--- a/environment.yml
+++ b/environment.yml
@@ -8,3 +8,6 @@ dependencies:
   - drop
   - flake8
   - bioconductor-bsgenome.hsapiens.ucsc.hg19
+
+  # required for downloading R packages through github
+  - unzip
diff --git a/tests/config/test_AE.py b/tests/config/test_AE.py
index fa1e1023..ed211c24 100644
--- a/tests/config/test_AE.py
+++ b/tests/config/test_AE.py
@@ -3,7 +3,7 @@ class Test_AE_Config:
     def test_config(self, dropConfig,demo_dir):
         assert dropConfig.AE.getWorkdir() == f"{demo_dir}/Scripts/AberrantExpression/pipeline"
         dict_ = {
-            'groups': ['outrider', 'import_exp'],
+            'groups': ['outrider', 'outrider_external'],
             'fpkmCutoff': 1,
             'implementation': 'autoencoder',
             'padjCutoff': 1,
@@ -35,9 +35,9 @@ def test_getCountsFiles(self, demo_dir, dropConfig):
 
         # import count
         counts_files_true = counts_files_true[2:]
-        counts_files_true.append(f"{demo_dir}/Data/external_geneCounts.tsv.gz")
+        counts_files_true.append(f"{demo_dir}/Data/external_count_data/geneCounts.tsv.gz")
         counts_files_true.sort()
-        counts_files_test = dropConfig.AE.getCountFiles(annotation="v29", group="import_exp")
+        counts_files_test = dropConfig.AE.getCountFiles(annotation="v29", group="outrider_external")
         counts_files_test.sort()
         assert counts_files_true == counts_files_test
 
diff --git a/tests/config/test_AS.py b/tests/config/test_AS.py
index df6b527d..5ab6d1e2 100644
--- a/tests/config/test_AS.py
+++ b/tests/config/test_AS.py
@@ -3,7 +3,7 @@ class Test_AS_Config:
     def test_config(self, dropConfig,demo_dir):
         assert dropConfig.AS.getWorkdir() == f"{demo_dir}/Scripts/AberrantSplicing/pipeline"
         dict_ = {
-            'groups': ['fraser'],
+            'groups': ['fraser', 'fraser_external'],
             'recount': True,
             'longRead': False,
             'keepNonStandardChrs': True,
@@ -19,7 +19,7 @@ def test_config(self, dropConfig,demo_dir):
         assert dict_.items() <= dropConfig.AS.dict_.items()
 
     def test_getSplitCountFiles(self, demo_dir, dropConfig):
-        counts_dir = f"{demo_dir}/Output/processed_data/aberrant_splicing/datasets/cache/raw-fraser/sample_tmp/" \
+        counts_dir = f"{demo_dir}/Output/processed_data/aberrant_splicing/datasets/cache/raw-local-fraser/sample_tmp/" \
                      "splitCounts"
         ids = [
             'HG00096.1.M_111124_6_trunc', 'HG00103.4.M_120208_3_trunc', 'HG00111.2.M_111215_4_trunc',
@@ -35,7 +35,7 @@ def test_getSplitCountFiles(self, demo_dir, dropConfig):
         assert counts_files_true == counts_files_test
 
     def test_getNonSplitCountFiles(self, demo_dir, dropConfig):
-        counts_dir = f"{demo_dir}/Output/processed_data/aberrant_splicing/datasets/cache/raw-fraser/sample_tmp/" \
+        counts_dir = f"{demo_dir}/Output/processed_data/aberrant_splicing/datasets/cache/raw-local-fraser/sample_tmp/" \
                      "nonSplitCounts"
         ids = [
             'HG00096.1.M_111124_6_trunc', 'HG00103.4.M_120208_3_trunc', 'HG00111.2.M_111215_4_trunc',
diff --git a/tests/config/test_MAE.py b/tests/config/test_MAE.py
index 54a2d8e6..c1f480e9 100644
--- a/tests/config/test_MAE.py
+++ b/tests/config/test_MAE.py
@@ -7,8 +7,8 @@ def test_config(self,dropConfig,demo_dir):
             'groups': ['mae'],
             'qcGroups': ['mae'],
             'gatkIgnoreHeaderCheck': True,
-            'padjCutoff': 0.05,
-            'allelicRatioCutoff': 0.8,
+            'padjCutoff': 0.5,
+            'allelicRatioCutoff': 0.7,
             'maxAF': 0.001,
             'addAF': False,
             'maxVarFreqCohort': 1,
diff --git a/tests/config/test_SampleAnnotation.py b/tests/config/test_SampleAnnotation.py
index ba93966f..311789f3 100644
--- a/tests/config/test_SampleAnnotation.py
+++ b/tests/config/test_SampleAnnotation.py
@@ -13,9 +13,9 @@ def test_columns(self, sampleAnnotation):
 
     def test_mapping(self, sampleAnnotation):
         # ID mappings/groups
-        assert sampleAnnotation.idMapping.shape == (22, 2)
-        assert sampleAnnotation.sampleFileMapping.shape == (32, 4)
-        true_mapping = {'mae': 2, 'import_exp': 8, 'outrider': 10, 'fraser': 10}
+        assert sampleAnnotation.idMapping.shape == (24, 2)
+        assert sampleAnnotation.sampleFileMapping.shape == (35, 4)
+        true_mapping = {'mae': 2, 'outrider_external': 8, 'outrider': 10, 'fraser': 10, 'fraser_external': 7}
         assert true_mapping == {k: len(v) for k, v in sampleAnnotation.rnaIDs.items()}
         assert true_mapping == {k: len(v) for k, v in sampleAnnotation.dnaIDs.items()}
 
@@ -23,8 +23,9 @@ def test_mapping(self, sampleAnnotation):
         "sample_id,file_type,file_name",
         [
             ("HG00096.1.M_111124_6", "RNA_BAM_FILE", "Data/rna_bam/HG00096.1.M_111124_6_chr21.bam"),
-            ("HG00178.4.M_120208_8", "GENE_COUNTS_FILE", "Data/external_geneCounts.tsv.gz"),
-            ("HG00096", "DNA_VCF_FILE", "Data/dna_vcf/demo_chr21.vcf.gz")
+            ("HG00178.4.M_120208_8", "GENE_COUNTS_FILE", "Data/external_count_data/geneCounts.tsv.gz"),
+            ("HG00096", "DNA_VCF_FILE", "Data/dna_vcf/demo_chr21.vcf.gz"),
+            ("HG00201.1.M_120208_6", "SPLICE_COUNTS_DIR", "Data/external_count_data")
         ]
     )
     def test_filePaths(self, demo_dir, sampleAnnotation, sample_id, file_type, file_name):
@@ -35,7 +36,7 @@ def test_filePaths(self, demo_dir, sampleAnnotation, sample_id, file_type, file_
     @pytest.mark.parametrize(
         "annotation,group,files",
         [
-            ("v29", "import_exp", {'Data/external_geneCounts.tsv.gz'})
+            ("v29", "outrider_external", {'Data/external_count_data/geneCounts.tsv.gz'})
         ]
     )
     def test_import(self, demo_dir, sampleAnnotation, annotation, group, files):
diff --git a/tests/pipeline/test_AE.py b/tests/pipeline/test_AE.py
index 1251c5ec..2befe1e2 100644
--- a/tests/pipeline/test_AE.py
+++ b/tests/pipeline/test_AE.py
@@ -50,7 +50,7 @@ def test_results(self, demo_dir):
         assert "res: 4310 15" in r.stdout
 
     def test_import_results(self, demo_dir):
-        output_dir = "Output/processed_results/aberrant_expression/v29/outrider/import_exp"
+        output_dir = "Output/processed_results/aberrant_expression/v29/outrider/outrider_external"
         r_cmd = """
                 # ods object
                 ods <- readRDS(file.path("{}", "ods.Rds"))
@@ -69,17 +69,12 @@ def test_import_results(self, demo_dir):
     def no_import(self, demo_dir):
         LOGGER.info("dryrun without import counts...")
 
-        # remove last 2 lines of sample annotation
-        run("head -n -2 Data/sample_annotation.tsv > Data/sample_annotation_noimp.tsv", demo_dir)
-
         # adapt config
-        run("sed 's/sample_annotation.tsv/sample_annotation_noimp.tsv/' config.yaml | "
-            "sed '/import_exp/d' > config_noimp.yaml", demo_dir)
+        run("sed '/outrider_external/d' config.yaml > config_noimp.yaml", demo_dir)
 
         yield demo_dir
 
         # reset changed files back to original
-        run("rm Data/sample_annotation_noimp.tsv", demo_dir)
         run("rm config_noimp.yaml", demo_dir)
 
     def test_no_import(self, no_import):
diff --git a/tests/pipeline/test_MAE.py b/tests/pipeline/test_MAE.py
index d014790f..f5ea3e76 100644
--- a/tests/pipeline/test_MAE.py
+++ b/tests/pipeline/test_MAE.py
@@ -32,7 +32,7 @@ def test_counts(self, demo_dir):
         assert "[1] 235" in r.stdout
 
     @pytest.mark.usefixtures("pipeline_run")
-    def test_results(self, demo_dir):
+    def test_all_results(self, demo_dir):
         results_file = "Output/processed_results/mae/mae/MAE_results_all_v29.tsv.gz"
         r_cmd = """
                 library(data.table)
@@ -41,3 +41,14 @@ def test_results(self, demo_dir):
                 """.format(results_file)
         r = runR(r_cmd, demo_dir)
         assert "[1] 253" in r.stdout
+
+    @pytest.mark.usefixtures("pipeline_run")
+    def test_sig_results(self, demo_dir):
+        results_file = "Output/processed_results/mae/mae/MAE_results_v29.tsv"
+        r_cmd = """
+                library(data.table)
+                res <- fread("{}")
+                print(nrow(res))
+                """.format(results_file)
+        r = runR(r_cmd, demo_dir)
+        assert "[1] 3" in r.stdout