diff --git a/README.md b/README.md
index 8f7fb640..8558a004 100644
--- a/README.md
+++ b/README.md
@@ -37,18 +37,19 @@ Options:
   --help               Show this message and exit.
 
 Commands:
-  read      Read 10x data and save in specified format.
-  filter    Filter data based on specified conditions.
-  norm      Normalise data per cell.
-  hvg       Find highly variable genes.
-  scale     Scale data per gene.
-  regress   Regress-out observation variables.
-  pca       Dimensionality reduction by PCA.
-  neighbor  Compute a neighbourhood graph of observations.
-  embed     Embed cells into two-dimensional space.
-  cluster   Cluster cells into sub-populations.
-  diffexp   Find markers for each clusters.
-  paga      Trajectory inference by abstract graph analysis.
-  dpt       Calculate diffusion pseudotime relative to the root cells.
-  plot      Visualise data.
+  read       Read 10x data and save in specified format.
+  filter     Filter data based on specified conditions.
+  norm       Normalise data per cell.
+  hvg        Find highly variable genes.
+  scale      Scale data per gene.
+  regress    Regress-out observation variables.
+  pca        Dimensionality reduction by PCA.
+  neighbor   Compute a neighbourhood graph of observations.
+  embed      Embed cells into two-dimensional space.
+  cluster    Cluster cells into sub-populations.
+  diffexp    Find markers for each clusters.
+  paga       Trajectory inference by abstract graph analysis.
+  dpt        Calculate diffusion pseudotime relative to the root cells.
+  integrate  Integrate cells from different experimental batches.
+  plot       Visualise data.
   ```
diff --git a/scanpy-scripts-tests.bats b/scanpy-scripts-tests.bats
index 4022bedd..4c846ce0 100755
--- a/scanpy-scripts-tests.bats
+++ b/scanpy-scripts-tests.bats
@@ -69,6 +69,19 @@ setup() {
     plt_rank_genes_groups_matrix_pdf="${output_dir}/rggmatrix_${test_clustering}.pdf"
     plt_rank_genes_groups_dot_pdf="${output_dir}/rggdot_${test_clustering}.pdf"
     plt_rank_genes_groups_heatmap_pdf="${output_dir}/rggheatmap_${test_clustering}.pdf"
+    harmony_integrate_obj="${output_dir}/harmony_integrate.h5ad"
+    harmony_integrate_opt="--batch-key ${test_clustering}"
+    harmony_plt_embed_opt="--projection 2d --color ${test_clustering} --title 'PCA embeddings after harmony' --basis 'X_pca_harmony'"
+    noharmony_plt_embed_opt="--projection 2d --color ${test_clustering} --title 'PCA embeddings before harmony' --basis 'X_pca'"
+    harmony_integrated_pca_pdf="${output_dir}/harmony_pca_${test_clustering}.pdf"
+    noharmony_integrated_pca_pdf="${output_dir}/pca_${test_clustering}.pdf"
+    bbknn_obj="${output_dir}/bbknn.h5ad"
+    bbknn_opt="--batch-key ${test_clustering} --key-added bbknn"
+    mnn_obj="${output_dir}/mnn.h5ad"
+    mnn_opt="--batch-key ${test_clustering}"
+    combat_obj="${output_dir}/combat.h5ad"
+    combat_opt="--batch-key ${test_clustering}"
+    
 
     if [ ! -d "$data_dir" ]; then
         mkdir -p $data_dir
@@ -442,6 +455,85 @@ setup() {
     [ -f  "$plt_rank_genes_groups_matrix_pdf" ]
 }
 
+# Do harmony batch correction, using clustering as batch (just for test purposes)
+
+@test "Run Harmony batch integration using clustering as batch" {
+    if [ "$resume" = 'true' ] && [ -f "$harmony_integrate_obj" ]; then
+        skip "$harmony_integrate_obj exists and resume is set to 'true'"
+    fi
+
+    run rm -f $harmony_integrate_obj && eval "$scanpy integrate harmony $harmony_integrate_opt $louvain_obj $harmony_integrate_obj"
+
+    [ "$status" -eq 0 ]
+    [ -f  "$plt_rank_genes_groups_matrix_pdf" ]
+
+}
+
+# Run Plot PCA embedding before harmony
+
+@test "Run Plot PCA embedding before Harmony" {
+    if [ "$resume" = 'true' ] && [ -f "$noharmony_integrated_pca_pdf" ]; then
+        skip "$noharmony_integrated_pca_pdf exists and resume is set to 'true'"
+    fi
+
+    run rm -f $noharmony_integrated_pca_pdf && eval "$scanpy plot embed $noharmony_plt_embed_opt $louvain_obj $noharmony_integrated_pca_pdf"
+
+    [ "$status" -eq 0 ]
+    [ -f  "$noharmony_integrated_pca_pdf" ]
+}
+
+# Run Plot PCA embedding after harmony
+
+@test "Run Plot PCA embedding after Harmony" {
+    if [ "$resume" = 'true' ] && [ -f "$harmony_integrated_pca_pdf" ]; then
+        skip "$harmony_integrated_pca_pdf exists and resume is set to 'true'"
+    fi
+
+    run rm -f $harmony_integrated_pca_pdf && eval "$scanpy plot embed $harmony_plt_embed_opt $harmony_integrate_obj $harmony_integrated_pca_pdf"
+
+    [ "$status" -eq 0 ]
+    [ -f  "$harmony_integrated_pca_pdf" ]
+}
+
+# Do bbknn batch correction, using clustering as batch (just for test purposes)
+
+@test "Run BBKNN batch integration using clustering as batch" {
+    if [ "$resume" = 'true' ] && [ -f "$bbknn_obj" ]; then
+        skip "$bbknn_obj exists and resume is set to 'true'"
+    fi
+
+    run rm -f $bbknn_obj && eval "$scanpy integrate bbknn $bbknn_opt $louvain_obj $bbknn_obj"
+
+    [ "$status" -eq 0 ]
+    [ -f  "$plt_rank_genes_groups_matrix_pdf" ]
+}
+
+# Do MNN batch correction, using clustering as batch (just for test purposes)
+
+@test "Run MNN batch integration using clustering as batch" {
+    if [ "$resume" = 'true' ] && [ -f "$mnn_obj" ]; then
+        skip "$mnn_obj exists and resume is set to 'true'"
+    fi
+
+    run rm -f $mnn_obj && eval "$scanpy integrate mnn $mnn_opt $louvain_obj $mnn_obj"
+
+    [ "$status" -eq 0 ]
+    [ -f  "$mnn_obj" ]
+}
+
+# Do ComBat batch correction, using clustering as batch (just for test purposes)
+
+@test "Run Combat batch integration using clustering as batch" {
+    if [ "$resume" = 'true' ] && [ -f "$combat_obj" ]; then
+        skip "$combat_obj exists and resume is set to 'true'"
+    fi
+
+    run rm -f $combat_obj && eval "$scanpy integrate combat $combat_opt $louvain_obj $combat_obj"
+
+    [ "$status" -eq 0 ]
+    [ -f  "$combat_obj" ]
+}
+
 # Local Variables:
 # mode: sh
 # End:
diff --git a/scanpy_scripts/cli.py b/scanpy_scripts/cli.py
index dc7a07d7..b5ed0bfc 100755
--- a/scanpy_scripts/cli.py
+++ b/scanpy_scripts/cli.py
@@ -30,6 +30,10 @@
     PLOT_DOT_CMD,
     PLOT_MATRIX_CMD,
     PLOT_HEATMAP_CMD,
+    HARMONY_INTEGRATE_CMD,
+    BBKNN_CMD,
+    MNN_CORRECT_CMD,
+    COMBAT_CMD,
 )
 
 
@@ -101,6 +105,16 @@ def cluster():
 cli.add_command(DPT_CMD)
 
 
+@cli.group(cls=NaturalOrderGroup)
+def integrate():
+    """Integrate cells from different experimental batches."""
+
+integrate.add_command(HARMONY_INTEGRATE_CMD)
+integrate.add_command(BBKNN_CMD)
+integrate.add_command(MNN_CORRECT_CMD)
+integrate.add_command(COMBAT_CMD)
+
+
 @cli.group(cls=NaturalOrderGroup)
 def plot():
     """Visualise data."""
diff --git a/scanpy_scripts/cmd_options.py b/scanpy_scripts/cmd_options.py
index 36e8276e..03e16d6b 100644
--- a/scanpy_scripts/cmd_options.py
+++ b/scanpy_scripts/cmd_options.py
@@ -503,7 +503,23 @@
             help='Key for categorical in `.obs`. You can pass your predefined '
             'groups by choosing any categorical annotation of observations.',
         ),
-    ]
+    ],
+
+    'batch_key': click.option(
+        '--batch-key', 'key',
+        type=click.STRING,
+        required=True,
+        help='The name of the column in adata.obs that differentiates among '
+        'experiments/batches.'
+    ),
+    
+    'batch_layer': click.option(
+        '--layer', '-l',
+        type=click.STRING,
+        default=None,
+        show_default=True,
+        help="Layer to batch correct. By default corrects the contents of .X."
+    ),
 }
 
 CMD_OPTIONS = {
@@ -697,6 +713,14 @@
             help='Cutoffs for the dispersion of expression'
             'in the format of "-d min max".',
         ),
+        click.option(
+            '--span',
+            type=click.FLOAT,
+            default=0.3,
+            show_default=True,
+            help="The fraction of the data (cells) used when estimating the "
+            "variance in the loess model fit if flavor='seurat_v3'."    
+        ),
         click.option(
             '--n-bins', '-b',
             type=click.INT,
@@ -713,7 +737,7 @@
         ),
         click.option(
             '--flavor', '-v',
-            type=click.Choice(['seurat', 'cellranger']),
+            type=click.Choice(['seurat', 'cellranger', 'seurat_v3']),
             default='seurat',
             show_default=True,
             help='Choose the flavor for computing normalized dispersion.',
@@ -726,12 +750,16 @@
             'only flag highly-variable genes.',
         ),
         click.option(
-            '--by-batch', '-B',
-            type=(click.STRING, click.INT),
-            default=(None, None),
-            show_default=True,
-            help='Find highly variable genes within each batch defined by <TEXT> '
-            'then pool and keep those found in at least <INTEGER> batches.',
+            '--batch-key', 'batch_key',
+            type=click.STRING,
+            default=None,
+            help="If specified, highly-variable genes are selected within each "
+            "batch separately and merged. This simple process avoids the selection of "
+            "batch-specific genes and acts as a lightweight batch correction method. For all "
+            "flavors, genes are first sorted by how many batches they are a HVG. For "
+            "dispersion-based flavors ties are broken by normalized dispersion. If flavor = "
+            "'seurat_v3', ties are broken by the median (across batches) rank based on "
+            "within-batch normalized variance."
         ),
     ],
 
@@ -1152,6 +1180,21 @@
             help='Rank genes by the absolute value of the score, not by the score. '
             'The returned scores are never the absolute values.',
         ),
+        click.option(
+            '--pts',
+            is_flag=True,
+            default=False,
+            show_default=True,
+            help='Compute the fraction of cells expressing the genes.'
+        ),
+        click.option(
+            '--tie-correct',
+            is_flag=True,
+            default=False,
+            show_default=True,
+            help="Use tie correction for 'wilcoxon' scores. Used only for "
+            "'wilcoxon'."
+        ),
         click.option(
             '--filter-params',
             type=Dictionary(keys=[
@@ -1269,6 +1312,321 @@
         ),
     ],
 
+    'combat': [
+        *COMMON_OPTIONS['input'],
+        *COMMON_OPTIONS['output'],
+        COMMON_OPTIONS['batch_key'],
+        COMMON_OPTIONS['batch_layer'],
+        click.option(
+            '--key-added',
+            type=click.STRING,
+            default=None,
+            show_default=True,
+            help="Key under which to add the computed results. By default a new "
+            "layer will be created called 'combat', 'combat_{layer}' or "
+            "'combat_layer_{key_added}' where those parameters were specified. A value of 'X' "
+            "causes batch-corrected values to overwrite the original content of .X."
+        ),
+        click.option(
+            '--covariates',
+            type=(CommaSeparatedText()),
+            default=None,
+            show_default=True,
+            help="Comma-separated list of additional covariates besides the "
+            "batch variable such as adjustment variables or biological condition. This "
+            "parameter refers to the design matrix X in Equation 2.1 in [Johnson07] and to "
+            "the mod argument in the original combat function in the sva R package.  Note "
+            "that not including covariates may introduce bias or lead to the removal of "
+            "biological signal in unbalanced designs."
+        ),
+        
+    ],
+
+    'harmony': [
+        *COMMON_OPTIONS['input'],
+        *COMMON_OPTIONS['output'],
+        COMMON_OPTIONS['batch_key'],
+        click.option(
+            '--basis',
+            type=click.STRING,
+            default='X_pca',
+            show_default=True,
+            help="The name of the field in adata.obsm where the PCA table is "
+            "stored. Defaults to 'X_pca', which is the default for sc.tl.pca()."
+        ),
+        click.option(
+            '--adjusted-basis',
+            type=click.STRING,
+            default='X_pca_harmony',
+            show_default=True,
+            help='The name of the field in adata.obsm where the adjusted PCA '
+            'table will be stored after running this function.'
+        ),
+        click.option(
+            '--theta',
+            type=click.FLOAT,
+            default=2,
+            show_default=True,
+            help='Diversity clustering penalty parameter. theta=0 does not encourage any '
+            'diversity. Larger values of theta result in more diverse clusters.'
+        ),
+        click.option(
+            '--lambda', 'lamb',
+            type=click.FLOAT,
+            default=1,
+            show_default=True,
+            help='Ridge regression penalty parameter. Lambda must be strictly '
+            'positive.  Smaller values result in more aggressive correction.'
+        ),
+        click.option(
+            '--sigma',
+            type=click.FLOAT,
+            default=0.1,
+            show_default=True,
+            help='Width of soft kmeans clusters. Sigma scales the distance from '
+            'a cell to cluster centroids. Larger values of sigma result in cells assigned to '
+            'more clusters. Smaller values of sigma make soft kmeans cluster approach hard '
+            'clustering.'
+        ),
+        click.option(
+            '--n-clust', 'nclust',
+            type=click.INT,
+            default=None,
+            show_default=False,
+            help='Number of clusters in model. nclust=1 equivalent to simple '
+            'linear regression.'
+        ),
+        click.option(
+            '--tau',
+            type=click.INT,
+            default=0,
+            show_default=True,
+            help='Protection against overclustering small datasets with large ones. '
+            'tau is the expected number of cells per cluster.'
+        ),
+        click.option(
+            '--block-size',
+            type=click.FLOAT,
+            default=0.05,
+            show_default=True,
+            help='What proportion of cells to update during clustering. Between '
+            '0 to 1, default 0.05. Larger values may be faster but less accurate.'
+        ),
+        click.option(
+            '--max-iter-cluster', 'max_iter_kmeans',
+            type=click.INT,
+            default=20,
+            show_default=True,
+            help='Maximum number of rounds to run clustering at each round of '
+            'Harmony.'
+        ),
+        click.option(
+            '--max-iter-harmony',
+            type=click.INT,
+            default=10,
+            show_default=True,
+            help='Maximum number of rounds to run Harmony. One round of Harmony '
+            'involves one clustering and one correction step.'
+        ),
+        click.option(
+            '--epsilon-cluster',
+            type=click.FLOAT,
+            default=1e-5,
+            show_default=True,
+            help='Convergence tolerance for clustering round of Harmony Set to '
+            '-Inf to never stop early.'
+        ),
+        click.option(
+            '--epsilon-harmony',
+            type=click.FLOAT,
+            default=1e-5,
+            show_default=True,
+            help='Convergence tolerance for clustering round of Harmony Set to '
+            '-Inf to never stop early.'
+        ),
+        COMMON_OPTIONS['random_state'],
+    ],
+
+    'mnn': [
+        *COMMON_OPTIONS['input'],
+        *COMMON_OPTIONS['output'],
+        COMMON_OPTIONS['batch_key'],
+        COMMON_OPTIONS['batch_layer'],
+        click.option(
+            '--key-added',
+            type=click.STRING,
+            default=None,
+            show_default=True,
+            help="Key under which to add the computed results. By default a new "
+            "layer will be created called 'mnn', 'mnn_{layer}' or "
+            "'mnn_layer_{key_added}' where those parameters were specified. A value of 'X' "
+            "causes batch-corrected values to overwrite the original content of .X."
+        ),
+        click.option(
+            '--var-subset',
+            type=(click.STRING, CommaSeparatedText()),
+            multiple=True,
+            help="The subset of vars (list of str) to be used when performing "
+            "MNN correction in the format of '--var-subset <name> <values>'. Typically, use "
+            "the highly variable genes (HVGs) like '--var-subset highly_variable True'. When "
+            "unset, uses all vars."
+        ),
+        click.option(
+            '--n-neighbors', '-k',
+            type=CommaSeparatedText(click.INT, simplify=True),
+            default=20,
+            show_default=True,
+            help='Number of mutual nearest neighbors.'
+        ),
+        click.option(
+            '--sigma',
+            type=click.FLOAT,
+            default=1.0,
+            show_default=True,
+            help='The bandwidth of the Gaussian smoothing kernel used to '
+            'compute the correction vectors.'
+        ),
+        click.option(
+            '--no-cos_norm_in', 'cos_norm_in',
+            is_flag=True,
+            default=True,
+            help='Default behaviour is to perform cosine normalization on the '
+            'input data prior to calculating distances between cells. Use this '
+            'flag to disable that behaviour.'
+        ),
+        click.option(
+            '--no-cos_norm_out', 'cos_norm_out',
+            is_flag=True,
+            default=True,
+            help='Default behaviour is to perform cosine normalization prior to '
+            'computing corrected expression values. Use this flag to disable that '
+            'behaviour.'
+        ),
+        click.option(
+            '--svd-dim',
+            type=click.INT,
+            default=None,
+            show_default=True,
+            help='The number of dimensions to use for summarizing biological '
+            'substructure within each batch. If not set, biological components '
+            'will not be removed from the correction vectors.'
+        ),
+        click.option(
+            '--no-var-adj',
+            is_flag=True,
+            default=True,
+            help='Default behaviour is to adjust variance of the correction '
+            'vectors. Use this flag to disable that behaviour. Note this step takes most '
+            'computing time.'
+        ),
+        click.option(
+            '--compute-angle',
+            is_flag=True,
+            default=False,
+            help='When set, compute the angle between each cell’s correction '
+            'vector and the biological subspace of the reference batch.'
+        ),
+        click.option(
+            '--svd-mode',
+            type=click.Choice(['svd', 'rsvd', 'irlb']),
+            default='rsvd',
+            show_default=True,
+            help="'svd' computes SVD using a non-randomized SVD-via-ID "
+            "algorithm, while 'rsvd' uses a randomized version. 'irlb' performs truncated "
+            "SVD by implicitly restarted Lanczos bidiagonalization (forked from "
+            "https://github.com/airysen/irlbpy)."
+        ),
+    ],
+
+    'bbknn': [
+        *COMMON_OPTIONS['input'],
+        *COMMON_OPTIONS['output'],
+        COMMON_OPTIONS['key_added'],
+        COMMON_OPTIONS['batch_key'],
+        click.option(
+            '--use-rep', '-u',
+            type=click.STRING,
+            default='X_pca',
+            show_default=True,
+            help='The dimensionality reduction in .obsm to use for neighbour '
+            'detection.'
+        ),
+        COMMON_OPTIONS['use_pc'][0], # --n-pcs
+        click.option(
+            '--no-approx', 'approx',
+            is_flag=True,
+            default=True,
+            help='Default behaviour is to use annoy’s approximate neighbour '
+            'finding. This results in a quicker run time for large datasets while also '
+            'potentially increasing the degree of batch correction. Use this flag to disable '
+            'that behaviour.',
+        ),
+        click.option(
+            '--metric', '-t',
+            type=click.Choice(['angular', 'cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']),
+            default='angular',
+            show_default=True,
+            help='A known metric’s name.'
+        ),
+        click.option(
+            '--neighbors-within-batch',
+            type=click.INT,
+            default=3,
+            show_default=True,
+            help='How many top neighbours to report for each batch; total '
+            'number of neighbours will be this number times the number of batches.'
+        ),
+        click.option(
+            '--trim',
+            type=click.INT,
+            default=None,
+            show_default=True,
+            help='Trim the neighbours of each cell to these many top '
+            'connectivities. May help with population independence and improve the tidiness '
+            'of clustering. The lower the value the more independent the individual '
+            'populations, at the cost of more conserved batch effect. If None, sets the '
+            'parameter value automatically to 10 times the total number of neighbours for '
+            'each cell. Set to 0 to skip.'
+        ),
+        click.option(
+            '--n-trees',
+            type=click.INT,
+            default=10,
+            show_default=True,
+            help='Only used when approx=True. The number of trees to construct '
+            'in the annoy forest. More trees give higher precision when querying, at the '
+            'cost of increased run time and resource intensity.'
+        ),
+        click.option(
+            '--no-use-faiss', 'use_faiss',
+            is_flag=True,
+            default=True,
+            help='Default behaviour If approx=False and the metric is '
+            '“euclidean”, is to use the faiss package to compute nearest neighbours if '
+            'installed. This improves performance at a minor cost to numerical precision as '
+            'faiss operates on float32. Use this flag to disable that behaviour.'
+        ),
+        click.option(
+            '--set-op-mix-ratio',
+            type=click.FLOAT,
+            default=1,
+            show_default=True,
+            help='UMAP connectivity computation parameter, float between 0 and '
+            '1, controlling the blend between a connectivity matrix formed exclusively from '
+            'mutual nearest neighbour pairs (0) and a union of all observed neighbour '
+            'relationships with the mutual pairs emphasised (1).'
+        ),
+        click.option(
+            '--local-connectivity',
+            type=click.INT,
+            default=1,
+            show_default=True,
+            help='UMAP connectivity computation parameter, how many nearest '
+            'neighbors of each cell are assumed to be fully connected (and given a '
+            'connectivity value of 1)'
+        ),
+    ],
+
     'embed': [
         *COMMON_OPTIONS['input'],
         *COMMON_OPTIONS['plot'],
diff --git a/scanpy_scripts/cmd_utils.py b/scanpy_scripts/cmd_utils.py
index 8f7a5665..a2449e07 100644
--- a/scanpy_scripts/cmd_utils.py
+++ b/scanpy_scripts/cmd_utils.py
@@ -235,9 +235,11 @@ def plot_function(
             if func_name == 'scatter' or func_name == 'embedding':
                 prefix =  kwargs.get('basis', func.__name__)
             elif kind:
-                prefix = kind
+                prefix = kind 
             elif func_name in plot_funcs:
                 prefix = plot_funcs[ func_name ].__name__.split('.')[-1]
+                if func_name in [ 'sviol', 'rgg_sviol', 'dot', 'rgg_dot', 'matrix', 'rgg_matrix' ]:
+                    prefix = prefix + '_'
 
             os.rename(
                 os.path.join(sc.settings.figdir, prefix + figname), output_fig)
diff --git a/scanpy_scripts/cmds.py b/scanpy_scripts/cmds.py
index e697e69c..b955f68b 100644
--- a/scanpy_scripts/cmds.py
+++ b/scanpy_scripts/cmds.py
@@ -5,6 +5,7 @@
 import os
 import sys
 import scanpy as sc
+import scanpy.external as sce
 
 from .cmd_utils import (
     make_subcmd,
@@ -25,6 +26,9 @@
 from .lib._paga import paga
 from .lib._diffmap import diffmap
 from .lib._dpt import dpt
+from .lib._bbknn import bbknn
+from .lib._mnn import mnn_correct
+from .lib._combat import combat
 
 LANG = os.environ.get('LANG', None)
 
@@ -210,3 +214,31 @@
     arg_desc=_IP_DESC,
     opt_set='plot_paga'
 )
+
+COMBAT_CMD = make_subcmd(
+    'combat',
+    combat,
+    cmd_desc='ComBat function for batch effect correction',
+    arg_desc=_IO_DESC
+)
+
+HARMONY_INTEGRATE_CMD = make_subcmd(
+    'harmony',
+    sce.pp.harmony_integrate,
+    cmd_desc='Use harmonypy [Korunsky19] to integrate different experiments.',    
+    arg_desc=_IO_DESC,
+)
+
+BBKNN_CMD = make_subcmd(
+    'bbknn',
+    bbknn,
+    cmd_desc='Batch balanced kNN [Polanski19].',
+    arg_desc=_IO_DESC,
+)
+
+MNN_CORRECT_CMD = make_subcmd(
+    'mnn',
+    mnn_correct,
+    cmd_desc='Correct batch effects by matching mutual nearest neighbors [Haghverdi18] [Kang18].',
+    arg_desc=_IO_DESC,
+)
diff --git a/scanpy_scripts/lib/_bbknn.py b/scanpy_scripts/lib/_bbknn.py
new file mode 100644
index 00000000..a10cb3a0
--- /dev/null
+++ b/scanpy_scripts/lib/_bbknn.py
@@ -0,0 +1,28 @@
+"""
+scanpy external bbknn
+"""
+
+import scanpy.external as sce
+
+from ..obj_utils import (
+    _backup_default_key,
+    _delete_backup_key,
+    _rename_default_key,
+)
+
+# Wrapper for bbknn allowing use of non-standard slot
+
+def bbknn(adata, key=None, key_added=None, **kwargs):
+    """
+    Wrapper function for sce.pp.bbknn(), for supporting non-standard neighbors slot
+    """
+
+    _backup_default_key(adata.uns, 'neighbors')
+    sce.pp.bbknn(adata, batch_key = key, **kwargs)    
+
+    if key_added:
+        _rename_default_key(adata.uns, 'neighbors',  f'neighbors_{key_added}')
+    else:
+        _delete_backup_key(adata.uns, 'neighbors')
+    
+    return adata
diff --git a/scanpy_scripts/lib/_combat.py b/scanpy_scripts/lib/_combat.py
new file mode 100644
index 00000000..7ef65bea
--- /dev/null
+++ b/scanpy_scripts/lib/_combat.py
@@ -0,0 +1,51 @@
+"""
+scanpy combat
+"""
+
+import scanpy as sc
+
+# Wrapper for mnn allowing use of non-standard slot
+
+def combat(adata, key=None, key_added=None, layer=None, **kwargs):
+    """
+    Wrapper function for scanpy.pp.combat(), for supporting non-standard slots
+    """
+
+    # If layer is set then we have to move the contents of that layer into
+    # .X for analysis. We back up the original .X, but only if the user hasn't
+    # specified to overwrite it anyway.
+
+    if layer:
+        if key_added and key_added != 'X':
+            adata.layers['X_backup'] = adata.X
+
+        adata.X = adata.layers[layer]
+    
+    # If we're storing results in .X (whether from .X or from a layer), run in
+    # place to save copying objects.
+
+    if key_added and key_added == 'X':
+        sc.pp.combat(adata, key = key, **kwargs)    
+    
+    # If we're storing in 'layers' (key_added is not set, or is not X, then
+    # don't run in place, and put the matrix in the specified layer. 
+
+    else:
+
+        cdata = sc.pp.combat(adata, key=key, inplace = False, **kwargs)    
+            
+        combat_key = 'combat'
+        if layer:
+            combat_key = f"{combat_key}_{layer}"
+        
+            # If we ran from a layer, restore the .X we had to overwrite
+            
+            adata.X = adata.layers['X_backup']    
+            del adata.layers['X_backup'] 
+            
+        if key_added:
+            combat_key = f"{combat_key}_{key_added}"
+
+        adata.layers[combat_key] = cdata
+
+    return adata
diff --git a/scanpy_scripts/lib/_hvg.py b/scanpy_scripts/lib/_hvg.py
index a4101729..3cd53b73 100644
--- a/scanpy_scripts/lib/_hvg.py
+++ b/scanpy_scripts/lib/_hvg.py
@@ -9,50 +9,24 @@ def hvg(
         adata,
         mean_limits=(0.0125, 3),
         disp_limits=(0.5, float('inf')),
-        subset=False,
-        by_batch=None,
         **kwargs,
 ):
     """
-    Wrapper function for sc.highly_variable_genes(), mainly to support searching
-    by batch and pooling.
+    Wrapper function for sc.highly_variable_genes()
     """
 
     # Check for n_top_genes beeing greater than the total genes
 
     if 'n_top_genes' in kwargs and kwargs['n_top_genes'] is not None:
         kwargs['n_top_genes'] = min(adata.n_vars, kwargs['n_top_genes'])
-
-    if by_batch and isinstance(by_batch, (list, tuple)) and by_batch[0]:
-        batch_name = by_batch[0]
-        min_n = by_batch[1]
-        k_hvg = np.zeros(adata.shape[1], dtype=int)
-        for batch_value in adata.obs[batch_name].unique():
-            k_v = adata.obs[batch_name] == batch_value
-            ad = adata[k_v, :]
-            k_f, _ = sc.pp.filter_genes(ad, min_cells=3, inplace=False)
-            hvg = sc.pp.highly_variable_genes(
-                ad[:, k_f],
-                min_mean=mean_limits[0],
-                max_mean=mean_limits[1],
-                min_disp=disp_limits[0],
-                max_disp=disp_limits[1],
-                inplace=False,
-                **kwargs,
-            )['highly_variable']
-            k_hvg[k_f] += hvg
-        if subset:
-            adata = adata[:, k_hvg >= min_n]
-        else:
-            adata.var['highly_variable'] = k_hvg >= min_n
-    else:
-        sc.pp.highly_variable_genes(
-            adata,
-            min_mean=mean_limits[0],
-            max_mean=mean_limits[1],
-            min_disp=disp_limits[0],
-            max_disp=disp_limits[1],
-            subset=subset,
-            **kwargs,
-        )
+    
+    sc.pp.highly_variable_genes(
+        adata,
+        min_mean=mean_limits[0],
+        max_mean=mean_limits[1],
+        min_disp=disp_limits[0],
+        max_disp=disp_limits[1],
+        **kwargs,
+    )
+    
     return adata
diff --git a/scanpy_scripts/lib/_mnn.py b/scanpy_scripts/lib/_mnn.py
new file mode 100644
index 00000000..0a559a64
--- /dev/null
+++ b/scanpy_scripts/lib/_mnn.py
@@ -0,0 +1,85 @@
+"""
+scanpy external mnn
+"""
+
+import scanpy.external as sce
+import numpy as np 
+import click
+
+# Wrapper for mnn allowing use of non-standard slot
+
+def mnn_correct(adata, key=None, key_added=None, var_subset=None, layer=None, **kwargs):
+    """
+    Wrapper function for sce.pp.mnn_correct(), for supporting non-standard neighbors slot
+    """
+
+    # mnn will use .X, so we need to put other layers there for processing
+
+    if layer:
+        adata.layers['X_backup'] = adata.X
+        adata.X = adata.layers[layer]
+
+    # mnn_correct() wants batches in separate adatas
+
+    batches = np.unique(adata.obs[key])
+    alldata = []
+    for batch in batches:
+        alldata.append( adata[adata.obs[key] == batch,] )
+
+    # Process var_subset into a list of strings that can be provided to
+    # mnn_correct()
+
+    if var_subset is not None and len(var_subset) > 0 and var_subset[0] is not None:
+   
+        subset = []
+ 
+        for name, values in var_subset :
+            if name in adata.var:
+                if adata.var[name].dtype == 'bool':
+                    values = [ True if x.lower() == "true" else x for x in values  ]
+            else:
+                raise click.ClickException(f'Var "{name}" unavailable')
+
+            ind = [ x in values for x in adata.var[name]  ]
+            subset = subset + adata.var.index[ ind  ].to_list()
+
+        var_subset = set(subset)
+        print('Will use %d selected genes for MNN' % len(var_subset))
+
+    else:
+        var_subset = None
+
+    # Here's the main bit
+
+    cdata = sce.pp.mnn_correct(*alldata, var_subset = var_subset, do_concatenate = True, index_unique = None, **kwargs)    
+    
+    # If user has specified key_added = X then they want us to overwrite .X,
+    # othwerwise copy the .X to a named layer of the original object. In either
+    # case make sure obs and var are the same as the original.
+
+    if key_added is None or key_added != 'X':
+
+        mnn_key = 'mnn'
+        if layer:
+            mnn_key = f"{mnn_key}_{layer}"
+        
+            # Layers is set (so we're not storing computed results in the .X,
+            # and we had to overwrite .X to run mnn), and key_added shows we're
+            # not storing in the .X, so we need to restore from the backup.
+
+            adata.X = adata.layers['X_backup']
+
+        if key_added:
+            mnn_key = f"{mnn_key}_{key_added}"
+   
+        adata.layers[mnn_key] = cdata[0][adata.obs.index, adata.var.index].X
+
+    else:
+        adata.X = cdata[0][adata.obs.index, adata.var.index].X
+
+    # Delete the backup of .X if we needed one 
+
+    if layer:
+        del adata.layers['X_backup'] 
+
+    return adata
diff --git a/setup.py b/setup.py
index 55d620f2..cb4df062 100644
--- a/setup.py
+++ b/setup.py
@@ -41,12 +41,15 @@
         'matplotlib',
         'pandas',
         'h5py',
-        'scanpy>=1.5.1,<1.6.0',
+        'scanpy>=1.6.0',
         'louvain',
         'leidenalg',
         'loompy>=2.0.0,<3.0.0',
         'MulticoreTSNE',
         'Click',
-        'umap-learn<0.4.0'
+        'umap-learn<0.4.0',
+        'harmonypy>=0.0.5',
+        'bbknn>=1.3.12',
+        'mnnpy>=0.1.9.5'
     ],
 )