diff --git a/dynamo/external/pearson_residual_recipe.py b/dynamo/external/pearson_residual_recipe.py
index e92b215b3..35d5f7122 100644
--- a/dynamo/external/pearson_residual_recipe.py
+++ b/dynamo/external/pearson_residual_recipe.py
@@ -28,7 +28,7 @@
     is_nonnegative_integer_arr,
     seurat_get_mean_var,
 )
-from ..preprocessing.utils import pca_monocle
+from ..preprocessing.utils import pca
 
 main_logger = LoggerManager.main_logger
 
diff --git a/dynamo/preprocessing/Preprocessor.py b/dynamo/preprocessing/Preprocessor.py
index 1cf017e70..a17ea7fa8 100644
--- a/dynamo/preprocessing/Preprocessor.py
+++ b/dynamo/preprocessing/Preprocessor.py
@@ -22,7 +22,7 @@
     select_genes_by_pearson_residuals,
 )
 from ..tools.connectivity import neighbors as default_neighbors
-from .preprocess import normalize_cell_expr_by_size_factors_legacy, pca_monocle
+from .preprocess import normalize_cell_expr_by_size_factors_legacy, pca
 from .preprocessor_utils import _infer_labeling_experiment_type
 from .preprocessor_utils import (
     filter_cells_by_outliers as monocle_filter_cells_by_outliers,
@@ -62,7 +62,7 @@ def __init__(
         normalize_selected_genes_kwargs: dict = {},
         use_log1p: bool = True,
         log1p_kwargs: dict = {},
-        pca_function: bool = pca_monocle,
+        pca_function: Callable = pca,
         pca_kwargs: dict = {},
         gene_append_list: List[str] = [],
         gene_exclude_list: List[str] = [],
@@ -72,36 +72,50 @@ def __init__(
         """Preprocessor constructor.
 
         The default preprocess functions are those of monocle recipe by default.
-        You can pass your own Callable objects (functions) to this constructor directly, which wil be used in the
-        preprocess steps later. These functions parameters are saved into Preprocessor instances. You can set these
-        attributes directly to your own implementation.
+        You can pass your own Callable objects (functions) to this constructor
+        directly, which wil be used in the preprocess steps later. These
+        functions parameters are saved into Preprocessor instances. You can set
+        these attributes directly to your own implementation.
 
         Args:
-            collapse_speicies_adata_function: function for collapsing the species data. Defaults to
-                collapse_species_adata.
-            convert_gene_name_function: transform gene names, by default convert2symbol, which transforms unofficial
-                gene names to official gene names. Defaults to convert2symbol.
-            filter_cells_by_outliers_function: filter cells by thresholds. Defaults to monocle_filter_cells_by_outliers.
-            filter_cells_by_outliers_kwargs: arguments that will be passed to filter_cells_by_outliers. Defaults to {}.
-            filter_genes_by_outliers_function: filter genes by thresholds. Defaults to monocle_filter_genes_by_outliers.
-            filter_genes_by_outliers_kwargs: arguments that will be passed to filter_genes_by_outliers. Defaults to {}.
-            normalize_by_cells_function: function for performing cell-wise normalization. Defaults to
-                normalize_cell_expr_by_size_factors.
-            normalize_by_cells_function_kwargs: arguments that will be passed to normalize_by_cells_function. Defaults
-                to {}.
-            select_genes_function: function for selecting gene features. Defaults to select_genes_by_dispersion_general.
-            select_genes_kwargs: arguments that will be passed to select_genes. Defaults to {}.
-            normalize_selected_genes_function: function for normalize selected genes. Defaults to None.
-            normalize_selected_genes_kwargs: arguments that will be passed to normalize_selected_genes. Defaults to {}.
-            use_log1p: whether to use log1p to normalize layers in adata. Defaults to True.
+            collapse_speicies_adata_function: function for collapsing the
+                species data. Defaults to collapse_species_adata.
+            convert_gene_name_function: transform gene names, by default
+                convert2symbol, which transforms unofficial gene names to
+                official gene names. Defaults to convert2symbol.
+            filter_cells_by_outliers_function: filter cells by thresholds.
+                Defaults to monocle_filter_cells_by_outliers.
+            filter_cells_by_outliers_kwargs: arguments that will be passed to
+                filter_cells_by_outliers. Defaults to {}.
+            filter_genes_by_outliers_function: filter genes by thresholds.
+                Defaults to monocle_filter_genes_by_outliers.
+            filter_genes_by_outliers_kwargs: arguments that will be passed to
+                filter_genes_by_outliers. Defaults to {}.
+            normalize_by_cells_function: function for performing cell-wise
+                normalization. Defaults to normalize_cell_expr_by_size_factors.
+            normalize_by_cells_function_kwargs: arguments that will be passed to
+                normalize_by_cells_function. Defaults to {}.
+            select_genes_function: function for selecting gene features.
+                Defaults to select_genes_by_dispersion_general.
+            select_genes_kwargs: arguments that will be passed to select_genes.
+                Defaults to {}.
+            normalize_selected_genes_function: function for normalize selected
+                genes. Defaults to None.
+            normalize_selected_genes_kwargs: arguments that will be passed to
+                normalize_selected_genes. Defaults to {}.
+            use_log1p: whether to use log1p to normalize layers in adata.
+                Defaults to True.
             log1p_kwargs: arguments passed to use_log1p. Defaults to {}.
-            pca_function: function to perform pca. Defaults to pca_monocle.
+            pca_function: function to perform pca. Defaults to pca in utils.py.
             pca_kwargs: arguments that will be passed pca. Defaults to {}.
-            gene_append_list: ensure that a list of genes show up in selected genes in monocle recipe pipeline. Defaults
-                to [].
-            gene_exclude_list: exclude a list of genes in monocle recipe pipeline. Defaults to [].
-            force_gene_list: use this gene list as selected genes in monocle recipe pipeline. Defaults to None.
-            sctransform_kwargs: arguments passed into sctransform function. Defaults to {}.
+            gene_append_list: ensure that a list of genes show up in selected
+                genes in monocle recipe pipeline. Defaults to [].
+            gene_exclude_list: exclude a list of genes in monocle recipe
+                pipeline. Defaults to [].
+            force_gene_list: use this gene list as selected genes in monocle
+                recipe pipeline. Defaults to None.
+            sctransform_kwargs: arguments passed into sctransform function.
+                Defaults to {}.
         """
 
         self.convert_layers2csr = convert_layers2csr
@@ -138,13 +152,15 @@ def __init__(
     def add_experiment_info(
         self, adata: AnnData, tkey: Optional[str] = None, experiment_type: Optional[str] = None
     ) -> None:
-        """Infer the experiment type and experiment layers stored in the AnnData object and record the info in unstructured metadata (.uns).
+        """Infer the experiment type and experiment layers stored in the AnnData
+        object and record the info in unstructured metadata (.uns).
 
         Args:
             adata: an AnnData object.
-            tkey: the key for time information (labeling time period for the cells) in .obs. Defaults to None.
-            experiment_type: the experiment type. If set to None, the experiment type would be inferred from the data.
-                Defaults to None.
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs. Defaults to None.
+            experiment_type: the experiment type. If set to None, the experiment
+                type would be inferred from the data. Defaults to None.
 
         Raises:
             ValueError: the tkey is invalid.
@@ -213,13 +229,15 @@ def standardize_adata(self, adata: AnnData, tkey: str, experiment_type: str) ->
         """Process the AnnData object to make it meet the standards of dynamo.
 
         The index of the observations would be ensured to be unique.
-        The layers with sparse matrix would be converted to compressed csr_matrix.
-        DKM.allowed_layer_raw_names() will be used to define only_splicing, only_labeling and splicing_labeling keys.
-        The genes would be renamed to their official name.
+        The layers with sparse matrix would be converted to compressed
+        csr_matrix. DKM.allowed_layer_raw_names() will be used to define
+        only_splicing, only_labeling and splicing_labeling keys. The genes
+        would be renamed to their official name.
 
         Args:
             adata: an AnnData object.
-            tkey: the key for time information (labeling time period for the cells) in .obs.
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs.
             experiment_type: the experiment type.
         """
 
@@ -229,7 +247,6 @@ def standardize_adata(self, adata: AnnData, tkey: str, experiment_type: str) ->
         main_info_insert_adata("tkey=%s" % tkey, "uns['pp']", indent_level=2)
         main_info_insert_adata("experiment_type=%s" % experiment_type, "uns['pp']", indent_level=2)
         main_info("making adata observation index unique...")
-        self.unique_var_obs_adata(adata)
         self.convert_layers2csr(adata)
 
         if self.collapse_species_adata:
@@ -239,11 +256,13 @@ def standardize_adata(self, adata: AnnData, tkey: str, experiment_type: str) ->
         if self.convert_gene_name:
             main_info("applying convert_gene_name function...")
             self.convert_gene_name(adata)
-            main_info("making adata observation index unique after gene name conversion...")
-            self.unique_var_obs_adata(adata)
+
+        main_info("making adata observation index unique after gene name conversion...")
+        self.unique_var_obs_adata(adata)
 
     def _filter_cells_by_outliers(self, adata: AnnData) -> None:
-        """Select valid cells based on the method specified as the preprocessor's `filter_cells_by_outliers`.
+        """Select valid cells based on the method specified as the
+        preprocessor's `filter_cells_by_outliers`.
 
         Args:
             adata: an AnnData object.
@@ -255,7 +274,8 @@ def _filter_cells_by_outliers(self, adata: AnnData) -> None:
             self.filter_cells_by_outliers(adata, **self.filter_cells_by_outliers_kwargs)
 
     def _filter_genes_by_outliers(self, adata: AnnData) -> None:
-        """Select valid genes based on the method specified as the preprocessor's `filter_genes_by_outliers`.
+        """Select valid genes based on the method specified as the
+        preprocessor's `filter_genes_by_outliers`.
 
         Args:
             adata: an AnnData object.
@@ -267,7 +287,8 @@ def _filter_genes_by_outliers(self, adata: AnnData) -> None:
             self.filter_genes_by_outliers(adata, **self.filter_genes_by_outliers_kwargs)
 
     def _select_genes(self, adata: AnnData) -> None:
-        """selecting gene by features, based on method specified as the preprocessor's `select_genes`.
+        """selecting gene by features, based on method specified as the
+        preprocessor's `select_genes`.
 
         Args:
             adata: an AnnData object.
@@ -279,7 +300,8 @@ def _select_genes(self, adata: AnnData) -> None:
             self.select_genes(adata, **self.select_genes_kwargs)
 
     def _append_gene_list(self, adata: AnnData) -> None:
-        """Add genes to the feature gene list detected by the preprocessing steps.
+        """Add genes to the feature gene list detected by the preprocessing
+        steps.
 
         Args:
             adata: an AnnData object.
@@ -291,7 +313,8 @@ def _append_gene_list(self, adata: AnnData) -> None:
             main_info("appended %d extra genes as required..." % len(append_genes))
 
     def _exclude_gene_list(self, adata: AnnData) -> None:
-        """Remove genes from the feature gene list detected by the preprocessing steps.
+        """Remove genes from the feature gene list detected by the preprocessing
+        steps.
 
         Args:
             adata: an AnnData object.
@@ -303,7 +326,8 @@ def _exclude_gene_list(self, adata: AnnData) -> None:
             main_info("excluded %d genes as required..." % len(exclude_genes))
 
     def _force_gene_list(self, adata: AnnData) -> None:
-        """Use the provided gene list as the feature gene list, overwrite the gene list detected by the preprocessing steps.
+        """Use the provided gene list as the feature gene list, overwrite the
+        gene list detected by the preprocessing steps.
 
         Args:
             adata: an AnnData object.
@@ -321,7 +345,8 @@ def _force_gene_list(self, adata: AnnData) -> None:
             main_info("self.force_gene_list is None, skipping filtering by gene list...")
 
     def _normalize_selected_genes(self, adata: AnnData) -> None:
-        """Normalize selected genes with method specified in the preprocessor's `normalize_selected_genes`
+        """Normalize selected genes with method specified in the preprocessor's
+        `normalize_selected_genes`
 
         Args:
             adata: an AnnData object.
@@ -337,7 +362,8 @@ def _normalize_selected_genes(self, adata: AnnData) -> None:
         self.normalize_selected_genes(adata, **self.normalize_selected_genes_kwargs)
 
     def _normalize_by_cells(self, adata: AnnData) -> None:
-        """Performing cell-wise normalization based on method specified as the preprocessor's `normalize_by_cells`.
+        """Performing cell-wise normalization based on method specified as the
+        preprocessor's `normalize_by_cells`.
 
         Args:
             adata: an AnnData object.
@@ -351,7 +377,8 @@ def _normalize_by_cells(self, adata: AnnData) -> None:
         self.normalize_by_cells(adata, **self.normalize_by_cells_function_kwargs)
 
     def _log1p(self, adata: AnnData) -> None:
-        """Perform log1p on the data with args specified in the preprocessor's `log1p_kwargs`.
+        """Perform log1p on the data with args specified in the preprocessor's
+        `log1p_kwargs`.
 
         Args:
             adata: an AnnData object.
@@ -369,7 +396,8 @@ def _log1p(self, adata: AnnData) -> None:
             self.log1p(adata, **self.log1p_kwargs)
 
     def _pca(self, adata: AnnData) -> None:
-        """Perform pca reduction with args specified in the preprocessor's `pca_kwargs`.
+        """Perform pca reduction with args specified in the preprocessor's
+        `pca_kwargs`.
 
         Args:
             adata: an AnnData object.
@@ -379,6 +407,31 @@ def _pca(self, adata: AnnData) -> None:
             main_info("reducing dimension by PCA...")
             self.pca(adata, **self.pca_kwargs)
 
+    def preprocess_adata_seurat_wo_pca(
+        self,
+        adata: AnnData,
+        tkey: Optional[str] = None,
+        experiment_type: Optional[str] = None
+    ) -> None:
+        """Preprocess the anndata object according to standard preprocessing in
+        Seurat recipe without PCA. This can be used to test different
+        dimentsion reduction methods.
+        """
+        main_info("Running preprocessing pipeline...")
+        temp_logger = LoggerManager.gen_logger("preprocessor-seurat_wo_pca")
+        temp_logger.log_time()
+
+        self.standardize_adata(adata, tkey, experiment_type)
+        self._filter_cells_by_outliers(adata)
+        self._filter_genes_by_outliers(adata)
+        self._normalize_by_cells(adata)
+        self._select_genes(adata)
+        self._log1p(adata)
+
+        temp_logger.finish_progress(
+            progress_name="preprocess by seurat wo pca recipe"
+        )
+
     def config_monocle_recipe(
         self, adata: AnnData, n_top_genes: int = 2000, gene_selection_method: str = "SVR"
     ) -> None:
@@ -386,8 +439,10 @@ def config_monocle_recipe(
 
         Args:
             adata: an AnnData object.
-            n_top_genes: Number of top feature genes to select in the preprocessing step. Defaults to 2000.
-            gene_selection_method: Which sorting method to be used to select genes. Defaults to "SVR".
+            n_top_genes: Number of top feature genes to select in the
+                preprocessing step. Defaults to 2000.
+            gene_selection_method: Which sorting method to be used to select
+                genes. Defaults to "SVR".
         """
 
         n_obs, n_genes = adata.n_obs, adata.n_vars
@@ -445,7 +500,7 @@ def config_monocle_recipe(
 
         # recipe monocle log1p all raw data in normalize_by_cells (dynamo version), so we do not need extra log1p transform.
         self.use_log1p = False
-        self.pca = pca_monocle
+        self.pca = pca
         self.pca_kwargs = {"pca_key": "X_pca"}
 
     def preprocess_adata_monocle(
@@ -455,9 +510,10 @@ def preprocess_adata_monocle(
 
         Args:
             adata: an AnnData object.
-            tkey: the key for time information (labeling time period for the cells) in .obs. Defaults to None.
-            experiment_type: the experiment type of the data. If not provided, would be inferred from the data. Defaults
-                to None.
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs. Defaults to None.
+            experiment_type: the experiment type of the data. If not provided,
+                would be inferred from the data. Defaults to None.
         """
 
         main_info("Running preprocessing pipeline...")
@@ -485,7 +541,8 @@ def preprocess_adata_monocle(
         temp_logger.finish_progress(progress_name="preprocess")
 
     def config_seurat_recipe(self, adata: AnnData) -> None:
-        """Automatically configure the preprocessor for using the seurat style recipe.
+        """Automatically configure the preprocessor for using the seurat style
+        recipe.
 
         Args:
             adata: an AnnData object.
@@ -503,16 +560,19 @@ def config_seurat_recipe(self, adata: AnnData) -> None:
     def preprocess_adata_seurat(
         self, adata: AnnData, tkey: Optional[str] = None, experiment_type: Optional[str] = None
     ) -> None:
-        """The preprocess pipeline in Seurat based on dispersion, implemented by dynamo authors.
+        """The preprocess pipeline in Seurat based on dispersion, implemented by
+        dynamo authors.
 
-        Stuart and Butler et al. Comprehensive Integration of Single-Cell Data. Cell (2019)
-        Butler et al. Integrating single-cell transcriptomic data across different conditions, technologies, and species. Nat Biotechnol
+        Stuart and Butler et al. Comprehensive Integration of Single-Cell Data.
+        Cell (2019) Butler et al. Integrating single-cell transcriptomic data
+        across different conditions, technologies, and species. Nat Biotechnol
 
         Args:
             adata: an AnnData object
-            tkey: the key for time information (labeling time period for the cells) in .obs. Defaults to None.
-            experiment_type: the experiment type of the data. If not provided, would be inferred from the data. Defaults
-                to None.
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs. Defaults to None.
+            experiment_type: the experiment type of the data. If not provided,
+                would be inferred from the data. Defaults to None.
         """
 
         temp_logger = LoggerManager.gen_logger("preprocessor-seurat")
@@ -528,7 +588,8 @@ def preprocess_adata_seurat(
         temp_logger.finish_progress(progress_name="preprocess by seurat recipe")
 
     def config_sctransform_recipe(self, adata: AnnData) -> None:
-        """Automatically configure the preprocessor for using the sctransform style recipe.
+        """Automatically configure the preprocessor for using the sctransform
+        style recipe.
 
         Args:
             adata: an AnnData object.
@@ -553,13 +614,15 @@ def preprocess_adata_sctransform(
     ) -> None:
         """Python implementation of https://github.com/satijalab/sctransform.
 
-        Hao and Hao et al. Integrated analysis of multimodal single-cell data. Cell (2021)
+        Hao and Hao et al. Integrated analysis of multimodal single-cell data.
+        Cell (2021)
 
         Args:
             adata: an AnnData object
-            tkey: the key for time information (labeling time period for the cells) in .obs. Defaults to None.
-            experiment_type: the experiment type of the data. If not provided, would be inferred from the data. Defaults
-                to None.
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs. Defaults to None.
+            experiment_type: the experiment type of the data. If not provided,
+                would be inferred from the data. Defaults to None.
         """
 
         temp_logger = LoggerManager.gen_logger("preprocessor-sctransform")
@@ -579,7 +642,8 @@ def preprocess_adata_sctransform(
         temp_logger.finish_progress(progress_name="preprocess by sctransform recipe")
 
     def config_pearson_residuals_recipe(self, adata: AnnData) -> None:
-        """Automatically configure the preprocessor for using the Pearson residuals style recipe.
+        """Automatically configure the preprocessor for using the Pearson
+        residuals style recipe.
 
         Args:
             adata: an AnnData object.
@@ -602,13 +666,16 @@ def preprocess_adata_pearson_residuals(
     ) -> None:
         """A pipeline proposed in Pearson residuals (Lause, Berens & Kobak, 2021).
 
-        Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258 (2021). https://doi.org/10.1186/s13059-021-02451-7
+        Lause, J., Berens, P. & Kobak, D. Analytic Pearson residuals for
+        normalization of single-cell RNA-seq UMI data. Genome Biol 22, 258
+        (2021). https://doi.org/10.1186/s13059-021-02451-7
 
         Args:
             adata: an AnnData object
-            tkey: the key for time information (labeling time period for the cells) in .obs. Defaults to None.
-            experiment_type: the experiment type of the data. If not provided, would be inferred from the data. Defaults
-                to None.
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs. Defaults to None.
+            experiment_type: the experiment type of the data. If not provided,
+                would be inferred from the data. Defaults to None.
         """
 
         temp_logger = LoggerManager.gen_logger("preprocessor-sctransform")
@@ -622,10 +689,13 @@ def preprocess_adata_pearson_residuals(
         temp_logger.finish_progress(progress_name="preprocess by pearson residual recipe")
 
     def config_monocle_pearson_residuals_recipe(self, adata: AnnData) -> None:
-        """Automatically configure the preprocessor for using the Monocle-Pearson-residuals style recipe.
+        """Automatically configure the preprocessor for using the
+        Monocle-Pearson-residuals style recipe.
 
-        Useful when you want to use Pearson residual to obtain feature genes and perform PCA but also using the standard
-        size-factor normalization and log1p analyses to normalize data for RNA velocity and vector field analyses.
+        Useful when you want to use Pearson residual to obtain feature genes and
+        perform PCA but also using the standard size-factor normalization and
+        log1p analyses to normalize data for RNA velocity and vector field
+        analyses.
 
         Args:
             adata: an AnnData object.
@@ -649,16 +719,19 @@ def preprocess_adata_monocle_pearson_residuals(
     ) -> None:
         """A combined pipeline of monocle and pearson_residuals.
 
-        Results after running pearson_residuals can contain negative values, an undesired feature for later RNA velocity
-        analysis. This function combine pearson_residual and monocle recipes so that it uses Pearson residual to obtain
-        feature genes and perform PCA but also uses monocle recipe to generate X_spliced, X_unspliced, X_new, X_total or
-        other data values for RNA velocity and downstream vector field analyses.
+        Results after running pearson_residuals can contain negative values, an
+        undesired feature for later RNA velocity analysis. This function combine
+        pearson_residual and monocle recipes so that it uses Pearson residual to
+        obtain feature genes and perform PCA but also uses monocle recipe to
+        generate X_spliced, X_unspliced, X_new, X_total or other data values for
+        RNA velocity and downstream vector field analyses.
 
         Args:
             adata: an AnnData object
-            tkey: the key for time information (labeling time period for the cells) in .obs. Defaults to None.
-            experiment_type: the experiment type of the data. If not provided, would be inferred from the data. Defaults
-                to None.
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs. Defaults to None.
+            experiment_type: the experiment type of the data. If not provided,
+                would be inferred from the data. Defaults to None.
         """
 
         temp_logger = LoggerManager.gen_logger("preprocessor-monocle-pearson-residual")
@@ -692,8 +765,10 @@ def preprocess_adata(
 
         Args:
             adata: An AnnData object.
-            recipe: The recipe used to preprocess the data. Defaults to "monocle".
-            tkey: the key for time information (labeling time period for the cells) in .obs. Defaults to None.
+            recipe: The recipe used to preprocess the data. Defaults to
+                "monocle".
+            tkey: the key for time information (labeling time period for the
+                cells) in .obs. Defaults to None.
 
         Raises:
             NotImplementedError: the recipe is invalid.
diff --git a/dynamo/preprocessing/__init__.py b/dynamo/preprocessing/__init__.py
index 4ce7baefc..69cc46c09 100755
--- a/dynamo/preprocessing/__init__.py
+++ b/dynamo/preprocessing/__init__.py
@@ -26,7 +26,7 @@
     cook_dist,
     decode,
     filter_genes_by_pattern,
-    pca_monocle,
+    pca,
     relative2abs,
     scale,
     top_pca_genes,
@@ -65,7 +65,7 @@
     "cell_cycle_scores",
     "basic_stats",
     "cook_dist",
-    "pca_monocle",
+    "pca",
     "top_pca_genes",
     "relative2abs",
     "scale",
diff --git a/dynamo/preprocessing/preprocess.py b/dynamo/preprocessing/preprocess.py
index a6299d7a6..8a02f49df 100755
--- a/dynamo/preprocessing/preprocess.py
+++ b/dynamo/preprocessing/preprocess.py
@@ -53,7 +53,7 @@
     get_sz_exprs,
     merge_adata_attrs,
     normalize_mat_monocle,
-    pca_monocle,
+    pca,
     sz_util,
     unique_var_obs_adata,
 )
@@ -1285,7 +1285,7 @@ def recipe_monocle(
     logger.info("applying %s ..." % (method.upper()))
 
     if method == "pca":
-        adata = pca_monocle(adata, pca_input, num_dim, "X_" + method.lower())
+        adata = pca(adata, pca_input, num_dim, "X_" + method.lower())
         # TODO remove adata.obsm["X"] in future, use adata.obsm.X_pca instead
         adata.obsm["X"] = adata.obsm["X_" + method.lower()]
 
@@ -1438,7 +1438,7 @@ def recipe_velocyto(
     CM = CM[:, valid_ind]
 
     if method == "pca":
-        adata, fit, _ = pca_monocle(adata, CM, num_dim, "X_" + method.lower(), return_all=True)
+        adata, fit, _ = pca(adata, CM, num_dim, "X_" + method.lower(), return_all=True)
         # adata.obsm['X_' + method.lower()] = reduce_dim
 
     elif method == "ica":
diff --git a/dynamo/preprocessing/preprocessor_utils.py b/dynamo/preprocessing/preprocessor_utils.py
index 5a15bc9a4..46a2602a3 100644
--- a/dynamo/preprocessing/preprocessor_utils.py
+++ b/dynamo/preprocessing/preprocessor_utils.py
@@ -49,7 +49,7 @@
     get_sz_exprs,
     merge_adata_attrs,
     normalize_mat_monocle,
-    pca_monocle,
+    pca,
     sz_util,
     unique_var_obs_adata,
 )
@@ -65,7 +65,7 @@ def is_log1p_transformed_adata(adata: anndata.AnnData) -> bool:
         A flag shows whether the adata object is log transformed.
     """
 
-    chosen_gene_indices = np.random.choice(adata.n_obs, 10)
+    chosen_gene_indices = np.random.choice(adata.n_vars, 10)
     _has_log1p_transformed = not np.allclose(
         np.array(adata.X[:, chosen_gene_indices].sum(1)),
         np.array(adata.layers["spliced"][:, chosen_gene_indices].sum(1)),
@@ -1528,7 +1528,7 @@ def is_nonnegative_integer_arr(mat: Union[np.ndarray, spmatrix, list]) -> bool:
 def pca_selected_genes_wrapper(
     adata: AnnData, pca_input: Union[np.ndarray, None] = None, n_pca_components: int = 30, key: str = "X_pca"
 ):
-    """A wrapper for pca_monocle function to reduce dimensions of the Adata with PCA.
+    """A wrapper for pca function to reduce dimensions of the Adata with PCA.
 
     Args:
         adata: an AnnData object.
@@ -1537,4 +1537,4 @@ def pca_selected_genes_wrapper(
         key: the key to store the calculation result. Defaults to "X_pca".
     """
 
-    adata = pca_monocle(adata, pca_input, n_pca_components=n_pca_components, pca_key=key)
+    adata = pca(adata, pca_input, n_pca_components=n_pca_components, pca_key=key)
diff --git a/dynamo/preprocessing/utils.py b/dynamo/preprocessing/utils.py
index bf8286e53..5e04a5b1b 100755
--- a/dynamo/preprocessing/utils.py
+++ b/dynamo/preprocessing/utils.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import Callable, Iterable, List, Tuple, Union
+from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 try:
     from typing import Literal
@@ -14,8 +14,12 @@
 import scipy.sparse
 import statsmodels.api as sm
 from anndata import AnnData
-from scipy.sparse import csr_matrix, issparse
+from scipy.sparse.linalg import LinearOperator, svds
+from scipy.sparse import csc_matrix, csr_matrix, issparse
 from sklearn.decomposition import PCA, TruncatedSVD
+from sklearn.utils import check_random_state
+from sklearn.utils.extmath import svd_flip
+from sklearn.utils.sparsefuncs import mean_variance_axis
 
 from ..configuration import DKM, DynamoAdataKeyManager
 from ..dynamo_logger import (
@@ -774,8 +778,131 @@ def decode(adata: anndata.AnnData) -> None:
 # ---------------------------------------------------------------------------------------------------
 # pca
 
+def _truncatedSVD_with_center(
+    X: Union[csc_matrix, csr_matrix],
+    n_components: int = 30,
+    random_state: int = 0,
+) -> Dict:
+    """Center a sparse matrix and perform truncated SVD on it.
 
-def pca_monocle(
+    It uses `scipy.sparse.linalg.LinearOperator` to express the centered sparse
+    input by given matrix-vector and matrix-matrix products. Then truncated
+    singular value decomposition (SVD) can be solved without calculating the
+    individual entries of the centered matrix. The right singular vectors after
+    decomposition represent the principal components. This function is inspired
+    by the implementation of scanpy (https://github.com/scverse/scanpy).
+
+    Args:
+        X: The input sparse matrix to perform truncated SVD on.
+        n_components: The number of components to keep. Default is 30.
+        random_state: Seed for the random number generator. Default is 0.
+
+    Returns:
+        The transformed input matrix and a sklearn PCA object containing the
+        right singular vectors and amount of variance explained by each
+        principal component.
+    """
+    random_state = check_random_state(random_state)
+    np.random.set_state(random_state.get_state())
+    v0 = random_state.uniform(-1, 1, np.min(X.shape))
+    n_components = min(n_components, X.shape[1] - 1)
+
+    mean = X.mean(0)
+    X_H = X.T.conj()
+    mean_H = mean.T.conj()
+    ones = np.ones(X.shape[0])[None, :].dot
+
+    # Following callables implements different type of matrix calculation.
+    def matvec(x):
+        """Matrix-vector multiplication. Performs the operation X_centered*x
+        where x is a column vector or an 1-D array."""
+        return X.dot(x) - mean.dot(x)
+
+    def matmat(x):
+        """Matrix-matrix multiplication. Performs the operation X_centered*x
+        where x is a matrix or ndarray."""
+        return X.dot(x) - mean.dot(x)
+
+    def rmatvec(x):
+        """Adjoint matrix-vector multiplication. Performs the operation
+        X_centered^H * x where x is a column vector or an 1-d array."""
+        return X_H.dot(x) - mean_H.dot(ones(x))
+
+    def rmatmat(x):
+        """Adjoint matrix-matrix multiplication. Performs the operation
+        X_centered^H * x where x is a matrix or ndarray."""
+        return X_H.dot(x) - mean_H.dot(ones(x))
+
+    # Construct the LinearOperator with callables above.
+    X_centered = LinearOperator(
+        shape=X.shape,
+        matvec=matvec,
+        matmat=matmat,
+        rmatvec=rmatvec,
+        rmatmat=rmatmat,
+        dtype=X.dtype,
+    )
+
+    # Solve SVD without calculating individuals entries in LinearOperator.
+    U, Sigma, VT = svds(X_centered, solver='arpack', k=n_components, v0=v0)
+    Sigma = Sigma[::-1]
+    U, VT = svd_flip(U[:, ::-1], VT[::-1])
+    X_transformed = U * Sigma
+    components_ = VT
+    exp_var = np.var(X_transformed, axis=0)
+    _, full_var = mean_variance_axis(X, axis=0)
+    full_var = full_var.sum()
+
+    result_dict = {
+        "X_pca": X_transformed,
+        "components_": components_,
+        "explained_variance_ratio_": exp_var / full_var,
+    }
+
+    fit = PCA(
+        n_components=n_components,
+        random_state=random_state,
+    )
+    X_pca = result_dict["X_pca"]
+    fit.components_ = result_dict["components_"]
+    fit.explained_variance_ratio_ = result_dict[
+        "explained_variance_ratio_"]
+
+    return fit, X_pca
+
+def _pca_fit(
+    X: np.ndarray,
+    pca_func: Callable,
+    n_components: int = 30,
+    **kwargs,
+) -> Tuple:
+    """Apply PCA to the input data array X using the specified PCA function.
+
+    Args:
+        X: the input data array of shape (n_samples, n_features).
+        pca_func: the PCA function to use, which should have a 'fit' and
+            'transform' method, such as the PCA class or the IncrementalPCA
+            class from sklearn.decomposition.
+        n_components: the number of principal components to compute. If
+            n_components is greater than or equal to the number of features in
+            X, it will be set to n_features - 1 to avoid overfitting.
+        **kwargs: any additional keyword arguments that will be passed to the
+            PCA function.
+
+    Returns:
+        A tuple containing two elements:
+            - The fitted PCA object, which has a 'fit' and 'transform' method.
+            - The transformed array X_pca of shape (n_samples, n_components).
+        """
+    fit = pca_func(
+        n_components=min(n_components, X.shape[1] - 1),
+        **kwargs,
+    ).fit(X)
+    X_pca = fit.transform(X)
+    return fit, X_pca
+
+
+def pca(
     adata: AnnData,
     X_data: np.ndarray = None,
     n_pca_components: int = 30,
@@ -783,29 +910,54 @@ def pca_monocle(
     pcs_key: str = "PCs",
     genes_to_append: Union[List[str], None] = None,
     layer: Union[List[str], str, None] = None,
+    svd_solver: Literal["randomized", "arpack"] = "randomized",
+    random_state: int = 0,
+    use_truncated_SVD_threshold: int = 500000,
+    use_incremental_PCA: bool = False,
+    incremental_batch_size: Optional[int] = None,
     return_all: bool = False,
 ) -> Union[AnnData, Tuple[AnnData, Union[PCA, TruncatedSVD], np.ndarray]]:
     """Perform PCA reduction for monocle recipe.
 
+    When large dataset is used (e.g. 1 million cells are used), Incremental PCA
+    is recommended to avoid the memory issue. When cell number is less than half
+    a million, by default PCA or _truncatedSVD_with_center (use sparse matrix
+    that doesn't explicitly perform centering) will be used. TruncatedSVD is the
+    fastest method. Unlike other methods which will center the data first,  it
+    performs SVD decomposition on raw input. Only use this when dataset is too
+    large for other methods.
+
     Args:
         adata: an AnnData object.
         X_data: the data to perform dimension reduction on. Defaults to None.
         n_pca_components: number of PCA components reduced to. Defaults to 30.
         pca_key: the key to store the reduced data. Defaults to "X".
-        pcs_key: the key to store the principle axes in feature space. Defaults to "PCs".
+        pcs_key: the key to store the principle axes in feature space. Defaults
+            to "PCs".
         genes_to_append: a list of genes should be inspected. Defaults to None.
-        layer: the layer(s) to perform dimension reduction on. Would be overrided by X_data. Defaults to None.
-        return_all: whether to return the PCA fit model and the reduced array together with the updated AnnData object.
-            Defaults to False.
+        layer: the layer(s) to perform dimension reduction on. Would be
+            overrided by X_data. Defaults to None.
+        svd_solver: the svd_solver to solve svd decomposition in PCA.
+        random_state: the seed used to initialize the random state for PCA.
+        use_truncated_SVD_threshold: the threshold of observations to use
+            truncated SVD instead of standard PCA for efficiency.
+        use_incremental_PCA: whether to use Incremental PCA. Recommend enabling
+            incremental PCA when dataset is too large to fit in memory.
+        incremental_batch_size: The number of samples to use for each batch when
+            performing incremental PCA. If batch_size is None, then batch_size
+            is inferred from the data and set to 5 * n_features.
+        return_all: whether to return the PCA fit model and the reduced array
+            together with the updated AnnData object. Defaults to False.
 
     Raises:
         ValueError: layer provided is not invalid.
         ValueError: list of genes to append is invalid.
 
     Returns:
-        The the updated AnnData object with reduced data if `return_all` is False. Otherwise, a tuple (adata, fit,
-        X_pca), where adata is the updated AnnData object, fit is the fit model for dimension reduction, and X_pca is
-        the reduced array, will be returned.
+        The updated AnnData object with reduced data if `return_all` is False.
+        Otherwise, a tuple (adata, fit, X_pca), where adata is the updated
+        AnnData object, fit is the fit model for dimension reduction, and X_pca
+        is the reduced array, will be returned.
     """
 
     # only use genes pass filter (based on use_for_pca) to perform dimension reduction.
@@ -847,33 +999,53 @@ def pca_monocle(
         adata.var.iloc[bad_genes, adata.var.columns.tolist().index("use_for_pca")] = False
         X_data = X_data[:, valid_ind]
 
-    USE_TRUNCATED_SVD_THRESHOLD = 100000
-    if adata.n_obs < USE_TRUNCATED_SVD_THRESHOLD:
-        pca = PCA(
-            n_components=min(n_pca_components, X_data.shape[1] - 1),
-            svd_solver="arpack",
-            random_state=0,
+    if use_incremental_PCA:
+        from sklearn.decomposition import IncrementalPCA
+        fit, X_pca = _pca_fit(
+            X_data,
+            pca_func=IncrementalPCA,
+            n_components=n_pca_components,
+            batch_size=incremental_batch_size,
         )
-        fit = pca.fit(X_data.toarray()) if issparse(X_data) else pca.fit(X_data)
-        X_pca = fit.transform(X_data.toarray()) if issparse(X_data) else fit.transform(X_data)
-        adata.obsm[pca_key] = X_pca
-        adata.uns[pcs_key] = fit.components_.T
+    else:
+        if adata.n_obs < use_truncated_SVD_threshold:
+            if not issparse(X_data):
+                fit, X_pca = _pca_fit(
+                    X_data,
+                    pca_func=PCA,
+                    n_components=n_pca_components,
+                    svd_solver=svd_solver,
+                    random_state=random_state,
+                )
+            else:
+                fit, X_pca = _truncatedSVD_with_center(
+                    X_data,
+                    n_components=n_pca_components,
+                    random_state=random_state,
+                )
+        else:
+            # TruncatedSVD is the fastest method we have. It doesn't center the
+            # data. It only performs SVD decomposition, which is the second part
+            # in our _truncatedSVD_with_center function.
+            fit, X_pca = _pca_fit(
+                X_data,
+                pca_func=TruncatedSVD,
+                n_components=n_pca_components + 1,
+                random_state=random_state
+            )
+            # first columns is related to the total UMI (or library size)
+            X_pca = X_pca[:, 1:]
 
-        adata.uns["explained_variance_ratio_"] = fit.explained_variance_ratio_
+    adata.obsm[pca_key] = X_pca
+    if use_incremental_PCA or adata.n_obs < use_truncated_SVD_threshold:
+        adata.uns[pcs_key] = fit.components_.T
+        adata.uns[
+            "explained_variance_ratio_"] = fit.explained_variance_ratio_
     else:
-        # unscaled PCA
-        fit = TruncatedSVD(
-            n_components=min(n_pca_components + 1, X_data.shape[1] - 1),
-            random_state=0,
-        )
         # first columns is related to the total UMI (or library size)
-        X_pca = fit.fit_transform(X_data)[:, 1:]
-        adata.obsm[pca_key] = X_pca
         adata.uns[pcs_key] = fit.components_.T[:, 1:]
-
-        adata.uns["explained_variance_ratio_"] = fit.explained_variance_ratio_[1:]
-
-    adata.uns["explained_variance_ratio_"] = fit.explained_variance_ratio_[1:]
+        adata.uns[
+            "explained_variance_ratio_"] = fit.explained_variance_ratio_[1:]
     adata.uns["pca_mean"] = fit.mean_ if hasattr(fit, "mean_") else None
 
     if return_all:
diff --git a/dynamo/tools/cell_velocities.py b/dynamo/tools/cell_velocities.py
index d5a114710..32713aa42 100755
--- a/dynamo/tools/cell_velocities.py
+++ b/dynamo/tools/cell_velocities.py
@@ -541,9 +541,9 @@ def cell_velocities(
 
         if "pca_fit" not in adata.uns_keys() or type(adata.uns["pca_fit"]) == str:
             CM = adata.X[:, adata.var.use_for_dynamics.values]
-            from ..preprocessing.utils import pca_monocle
+            from ..preprocessing.utils import pca
 
-            adata, pca_fit, X_pca = pca_monocle(adata, CM, n_pca_components, "X", return_all=True)
+            adata, pca_fit, X_pca = pca(adata, CM, n_pca_components, "X", return_all=True)
             adata.uns["pca_fit"] = pca_fit
 
         X_pca, pca_fit = adata.obsm["X"], adata.uns["pca_fit"]
diff --git a/dynamo/tools/clustering.py b/dynamo/tools/clustering.py
index 590de4be1..2725b0fb8 100644
--- a/dynamo/tools/clustering.py
+++ b/dynamo/tools/clustering.py
@@ -11,7 +11,7 @@
 from ..preprocessing.preprocessor_utils import filter_genes_by_outliers as filter_genes
 from ..preprocessing.preprocessor_utils import log1p_adata as log1p
 from ..preprocessing.preprocessor_utils import normalize_cell_expr_by_size_factors
-from ..preprocessing.utils import pca_monocle
+from ..preprocessing.utils import pca
 from ..utils import LoggerManager, copy_adata
 from .connectivity import _gen_neighbor_keys, neighbors
 from .utils import update_dict
@@ -642,7 +642,7 @@ def scc(
     adata.uns["pp"] = {}
     normalize_cell_expr_by_size_factors(adata, layers="X")
     log1p(adata)
-    pca_monocle(adata, n_pca_components=30, pca_key="X_pca")
+    pca(adata, n_pca_components=30, pca_key="X_pca")
 
     neighbors(adata, n_neighbors=e_neigh)
     if "X_" + spatial_key not in adata.obsm.keys():
diff --git a/dynamo/tools/connectivity.py b/dynamo/tools/connectivity.py
index fe03afdc9..2bd91d15a 100755
--- a/dynamo/tools/connectivity.py
+++ b/dynamo/tools/connectivity.py
@@ -564,14 +564,14 @@ def neighbors(
         logger.info("X_data is None, fetching or recomputing...", indent_level=2)
         if basis == "pca" and "X_pca" not in adata.obsm_keys():
             logger.info("PCA as basis not X_pca not found, doing PCAs", indent_level=2)
-            from ..preprocessing.utils import pca_monocle
+            from ..preprocessing.utils import pca
 
             CM = adata.X if genes is None else adata[:, genes].X
             cm_genesums = CM.sum(axis=0)
             valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
             valid_ind = np.array(valid_ind).flatten()
             CM = CM[:, valid_ind]
-            adata, _, _ = pca_monocle(adata, CM, pca_key="X_pca", n_pca_components=n_pca_components, return_all=True)
+            adata, _, _ = pca(adata, CM, pca_key="X_pca", n_pca_components=n_pca_components, return_all=True)
 
             X_data = adata.obsm["X_pca"]
         else:
diff --git a/dynamo/tools/moments.py b/dynamo/tools/moments.py
index 35400c613..7c2dea5c9 100755
--- a/dynamo/tools/moments.py
+++ b/dynamo/tools/moments.py
@@ -8,7 +8,7 @@
 
 from ..configuration import DKM, DynamoAdataKeyManager
 from ..dynamo_logger import LoggerManager
-from ..preprocessing.utils import pca_monocle
+from ..preprocessing.utils import pca
 from ..utils import copy_adata
 from .connectivity import mnn, normalize_knn_graph, umap_conn_indices_dist_embedding
 from .utils import elem_prod, get_mapper, inverse_norm
@@ -121,7 +121,7 @@ def moments(
                         valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
                         valid_ind = np.array(valid_ind).flatten()
                         CM = CM[:, valid_ind]
-                        adata, fit, _ = pca_monocle(
+                        adata, fit, _ = pca(
                             adata,
                             CM,
                             n_pca_components=n_pca_components,
diff --git a/dynamo/tools/recipes.py b/dynamo/tools/recipes.py
index 8332752f6..4bbea67b2 100644
--- a/dynamo/tools/recipes.py
+++ b/dynamo/tools/recipes.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from ..configuration import DynamoAdataConfig
-from ..preprocessing.utils import pca_monocle
+from ..preprocessing.utils import pca
 from .cell_velocities import cell_velocities
 from .connectivity import neighbors, normalize_knn_graph
 from .dimension_reduction import reduceDimension
@@ -86,7 +86,7 @@ def recipe_kin_data(
         An updated adata object that went through a proper and typical time-resolved RNA velocity analysis.
     """
     from ..preprocessing import recipe_monocle
-    from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
+    from ..preprocessing.utils import detect_experiment_datatype, pca
 
     keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
         keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
@@ -144,7 +144,7 @@ def recipe_kin_data(
         valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
         valid_ind = np.array(valid_ind).flatten()
 
-        pca_monocle(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
+        pca(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
         # then get neighbors graph based on X_spliced_pca
         neighbors(adata, X_data=adata.obsm["X_spliced_pca"], layer="X_spliced")
         # then normalize neighbors graph so that each row sums up to be 1
@@ -268,7 +268,7 @@ def recipe_deg_data(
     """
 
     from ..preprocessing import recipe_monocle
-    from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
+    from ..preprocessing.utils import detect_experiment_datatype, pca
 
     keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
         keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
@@ -322,7 +322,7 @@ def recipe_deg_data(
         cm_genesums = CM.sum(axis=0)
         valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
         valid_ind = np.array(valid_ind).flatten()
-        pca_monocle(adata, CM[:, valid_ind], pca_key="X_total_pca")
+        pca(adata, CM[:, valid_ind], pca_key="X_total_pca")
         # then get neighbors graph based on X_spliced_pca
         neighbors(adata, X_data=adata.obsm["X_total_pca"], layer="X_total")
         # then normalize neighbors graph so that each row sums up to be 1
@@ -454,7 +454,7 @@ def recipe_mix_kin_deg_data(
         An updated adata object that went through a proper and typical time-resolved RNA velocity analysis.
     """
     from ..preprocessing import recipe_monocle
-    from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
+    from ..preprocessing.utils import detect_experiment_datatype, pca
 
     keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
         keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
@@ -512,7 +512,7 @@ def recipe_mix_kin_deg_data(
         valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
         valid_ind = np.array(valid_ind).flatten()
 
-        pca_monocle(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
+        pca(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
         # then get neighbors graph based on X_spliced_pca
         neighbors(adata, X_data=adata.obsm["X_spliced_pca"], layer="X_spliced")
         # then normalize neighbors graph so that each row sums up to be 1
@@ -636,7 +636,7 @@ def recipe_one_shot_data(
         An updated adata object that went through a proper and typical time-resolved RNA velocity analysis.
     """
     from ..preprocessing import recipe_monocle
-    from ..preprocessing.utils import detect_experiment_datatype, pca_monocle
+    from ..preprocessing.utils import detect_experiment_datatype, pca
 
     keep_filtered_cells = DynamoAdataConfig.use_default_var_if_none(
         keep_filtered_cells, DynamoAdataConfig.RECIPE_KEEP_FILTERED_CELLS_KEY
@@ -694,7 +694,7 @@ def recipe_one_shot_data(
         valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
         valid_ind = np.array(valid_ind).flatten()
 
-        pca_monocle(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
+        pca(adata, CM[:, valid_ind], pca_key="X_spliced_pca")
         # then get neighbors graph based on X_spliced_pca
         neighbors(adata, X_data=adata.obsm["X_spliced_pca"], layer="X_spliced")
         # then normalize neighbors graph so that each row sums up to be 1
@@ -850,7 +850,7 @@ def velocity_N(
 
     # now let us first run pca with new RNA
     if recalculate_pca:
-        pca_monocle(adata, np.log1p(adata[:, adata.var.use_for_pca].layers["X_new"]), pca_key="X_pca")
+        pca(adata, np.log1p(adata[:, adata.var.use_for_pca].layers["X_new"]), pca_key="X_pca")
 
     # if there are unspliced / spliced data, delete them for now:
     for i in ["spliced", "unspliced", "X_spliced", "X_unspliced"]:
diff --git a/dynamo/tools/utils_reduceDimension.py b/dynamo/tools/utils_reduceDimension.py
index 144870dfb..75d68ab2e 100644
--- a/dynamo/tools/utils_reduceDimension.py
+++ b/dynamo/tools/utils_reduceDimension.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from ..configuration import DKM
-from ..preprocessing.utils import pca_monocle
+from ..preprocessing.utils import pca
 from .connectivity import (
     _gen_neighbor_keys,
     knn_to_adj,
@@ -93,7 +93,7 @@ def prepare_dim_reduction(
                 valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
                 valid_ind = np.array(valid_ind).flatten()
                 CM = CM[:, valid_ind]
-                adata, fit, _ = pca_monocle(
+                adata, fit, _ = pca(
                     adata,
                     CM,
                     n_pca_components=n_pca_components,
@@ -123,7 +123,7 @@ def prepare_dim_reduction(
             valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0)
             valid_ind = np.array(valid_ind).flatten()
             CM = CM[:, valid_ind]
-            adata, fit, _ = pca_monocle(adata, CM, n_pca_components=n_pca_components, pca_key=pca_key, return_all=True)
+            adata, fit, _ = pca(adata, CM, n_pca_components=n_pca_components, pca_key=pca_key, return_all=True)
             adata.uns["explained_variance_ratio_"] = fit.explained_variance_ratio_[1:]
 
             # valid genes used for dimension reduction calculation
diff --git a/dynamo/vectorfield/clustering.py b/dynamo/vectorfield/clustering.py
index 4c93387ba..d4ef2c038 100644
--- a/dynamo/vectorfield/clustering.py
+++ b/dynamo/vectorfield/clustering.py
@@ -8,7 +8,7 @@
 from sklearn.neighbors import NearestNeighbors
 
 from ..dynamo_logger import main_info
-from ..preprocessing.utils import pca_monocle
+from ..preprocessing.utils import pca
 from ..tools.clustering import hdbscan, infomap, leiden, louvain
 from ..tools.Markov import (
     grid_velocity_filter,
@@ -390,7 +390,7 @@ def streamline_clusters(
 
     # clustering
     feature_adata = AnnData(feature_df)
-    pca_monocle(feature_adata, X_data=feature_df, pca_key="X_pca")
+    pca(feature_adata, X_data=feature_df, pca_key="X_pca")
     if clustering_method == "louvain":
         louvain(feature_adata, obsm_key="X_pca")
     elif clustering_method == "leiden":
diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
index 0f6842aef..d7a0d5895 100644
--- a/tests/test_preprocess.py
+++ b/tests/test_preprocess.py
@@ -5,6 +5,7 @@
 import scipy.sparse
 from scipy import sparse
 from scipy.sparse.csr import csr_matrix
+from sklearn.decomposition import PCA
 
 # from utils import *
 import dynamo as dyn
@@ -164,6 +165,21 @@ def test_compute_gene_exp_fraction():
     print("frac:", list(frac))
     assert np.all(np.isclose(frac.flatten(), [2 / 5, 3 / 5]))
 
+def test_pca():
+    adata = dyn.sample_data.zebrafish()
+    preprocessor = Preprocessor()
+    preprocessor.preprocess_adata_seurat_wo_pca(adata)
+    adata = dyn.pp.pca(adata, n_pca_components=30)
+    assert adata.obsm["X"].shape[1] == 30
+    assert adata.uns['PCs'].shape[1] == 30
+    assert adata.uns['explained_variance_ratio_'].shape[0] == 30
+
+    X_filterd = adata.X[:, adata.var.use_for_pca.values].copy()
+    pca = PCA(n_components=30, random_state=0)
+    X_pca_sklearn = pca.fit_transform(X_filterd.toarray())
+    assert np.linalg.norm(X_pca_sklearn[:, :10] - adata.obsm['X'][:, :10]) < 1e-1
+    assert np.linalg.norm(pca.components_.T[:, :10] - adata.uns['PCs'][:, :10]) < 1e-1
+    assert np.linalg.norm(pca.explained_variance_ratio_[:10] - adata.uns['explained_variance_ratio_'][:10]) < 1e-1
 
 def test_preprocessor_seurat(adata):
     adata = dyn.sample_data.zebrafish()
@@ -221,6 +237,7 @@ def test_is_nonnegative():
     # # generate data if needed
     # adata = utils.gen_or_read_zebrafish_data()
     # test_is_log_transformed()
+    # test_pca()
     # test_Preprocessor_simple_run(dyn.sample_data.zebrafish())
 
     # test_calc_dispersion_sparse()