From 1c9df72894b903701b01c0b25cd74f275c856153 Mon Sep 17 00:00:00 2001 From: Sichao25 Date: Mon, 8 Jul 2024 21:57:48 -0400 Subject: [PATCH 1/3] deprecate sparse matrix .A attributes --- dynamo/estimation/csc/utils_velocity.py | 24 +++++------ dynamo/estimation/csc/velocity.py | 16 ++++---- dynamo/estimation/tsc/twostep.py | 12 +++--- dynamo/external/scifate.py | 6 +-- dynamo/external/scribe.py | 4 +- dynamo/plot/dynamics.py | 42 ++++++++++---------- dynamo/plot/markers.py | 2 +- dynamo/plot/networks.py | 4 +- dynamo/plot/preprocess.py | 6 +-- dynamo/plot/scatters.py | 4 +- dynamo/plot/time_series.py | 4 +- dynamo/plot/utils_dynamics.py | 34 ++++++++-------- dynamo/prediction/state_graph.py | 6 +-- dynamo/prediction/trajectory_analysis.py | 2 +- dynamo/preprocessing/QC.py | 2 +- dynamo/preprocessing/cell_cycle.py | 4 +- dynamo/preprocessing/deprecated.py | 4 +- dynamo/preprocessing/external/sctransform.py | 14 +++---- dynamo/preprocessing/gene_selection.py | 6 +-- dynamo/preprocessing/normalization.py | 2 +- dynamo/preprocessing/utils.py | 16 ++++---- dynamo/tools/Markov.py | 4 +- dynamo/tools/cell_velocities.py | 8 ++-- dynamo/tools/connectivity.py | 6 +-- dynamo/tools/deprecated.py | 2 +- dynamo/tools/dynamics.py | 8 ++-- dynamo/tools/markers.py | 6 +-- dynamo/tools/metric_velocity.py | 14 +++---- dynamo/tools/moments.py | 16 ++++---- dynamo/tools/pseudotime_velocity.py | 2 +- dynamo/tools/recipes.py | 8 ++-- dynamo/tools/utils.py | 6 +-- dynamo/vectorfield/rank_vf.py | 4 +- dynamo/vectorfield/scVectorField.py | 4 +- dynamo/vectorfield/topography.py | 2 +- tests/test_data_io.py | 4 +- tests/test_preprocess.py | 12 +++--- tests/test_tl.py | 26 ++++++------ 38 files changed, 173 insertions(+), 173 deletions(-) diff --git a/dynamo/estimation/csc/utils_velocity.py b/dynamo/estimation/csc/utils_velocity.py index 1df548b5d..e0f1c6d04 100755 --- a/dynamo/estimation/csc/utils_velocity.py +++ b/dynamo/estimation/csc/utils_velocity.py @@ -215,8 +215,8 @@ def fit_linreg( the extreme data points (r2), the r2 calculated using all data points (all_r2). If argument r2 is False, r2 and all_r2 will not be returned. """ - x = x.A if issparse(x) else x - y = y.A if issparse(y) else y + x = x.toarray() if issparse(x) else x + y = y.toarray() if issparse(y) else y _mask = np.logical_and(~np.isnan(x), ~np.isnan(y)) if mask is not None: @@ -279,8 +279,8 @@ def fit_linreg_robust( all_r2 will not be returned. """ - x = x.A if issparse(x) else x - y = y.A if issparse(y) else y + x = x.toarray() if issparse(x) else x + y = y.toarray() if issparse(y) else y _mask = np.logical_and(~np.isnan(x), ~np.isnan(y)) if mask is not None: @@ -405,7 +405,7 @@ def fit_first_order_deg_lsq( Returns: The estimated value for beta (beta) and the estimated value for the initial spliced, labeled mRNA count (l0). """ - l = l.A.flatten() if issparse(l) else l + l = l.toarray().flatten() if issparse(l) else l tau = t - np.min(t) l0 = np.nanmean(l[tau == 0]) @@ -434,7 +434,7 @@ def solve_first_order_deg(t: np.ndarray, l: Union[csr_matrix, np.ndarray]) -> Tu The initial counts of the species (for example, labeled mRNA), degradation rate constant and half-life the species. """ - x = l.A.flatten() if issparse(l) else l + x = l.toarray().flatten() if issparse(l) else l t_uniq = np.unique(t) x_stra = strat_mom(x, t, np.nanmean) @@ -468,7 +468,7 @@ def fit_gamma_lsq( Returns: The estimated value for gamma and the estimated value for the initial spliced mRNA count. """ - s = s.A.flatten() if issparse(s) else s + s = s.toarray().flatten() if issparse(s) else s tau = t - np.min(t) s0 = np.mean(s[tau == 0]) @@ -501,7 +501,7 @@ def fit_alpha_synthesis(t: np.ndarray, u: Union[csr_matrix, np.ndarray], beta: f Returns: The estimated value for alpha. """ - u = u.A if issparse(u) else u + u = u.toarray() if issparse(u) else u # fit alpha assuming u=0 at t=0 expt = np.exp(-beta * t) @@ -533,7 +533,7 @@ def fit_alpha_degradation( The estimated value for alpha (alpha), the initial unspliced mRNA count (u0), and coefficient of determination or r square (r2). """ - x = u.A if issparse(u) else u + x = u.toarray() if issparse(u) else u tau = t - np.min(t) @@ -569,7 +569,7 @@ def solve_alpha_degradation( The estimated value for alpha (alpha), the initial unspliced mRNA count (b), coefficient of determination or r square. """ - u = u.A if issparse(u) else u + u = u.toarray() if issparse(u) else u n = u.size tau = t - np.min(t) @@ -617,7 +617,7 @@ def fit_alpha_beta_synthesis( Returns: The estimated value for alpha and the estimated value for beta. """ - l = l.A if issparse(l) else l + l = l.toarray() if issparse(l) else l tau = np.hstack((0, t)) x = np.hstack((0, l)) @@ -649,7 +649,7 @@ def fit_all_synthesis( Returns: The estimated value for alpha, beta and gamma. """ - l = l.A if issparse(l) else l + l = l.toarray() if issparse(l) else l tau = np.hstack((0, t)) x = np.hstack((0, l)) diff --git a/dynamo/estimation/csc/velocity.py b/dynamo/estimation/csc/velocity.py index 74a14c6a6..d77f21ac9 100755 --- a/dynamo/estimation/csc/velocity.py +++ b/dynamo/estimation/csc/velocity.py @@ -1578,8 +1578,8 @@ def fit_gamma_steady_state(self, u, s, intercept=True, perc_left=None, perc_righ """ if intercept and perc_left is None: perc_left = perc_right - u = u.A.flatten() if issparse(u) else u.flatten() - s = s.A.flatten() if issparse(s) else s.flatten() + u = u.toarray().flatten() if issparse(u) else u.flatten() + s = s.toarray().flatten() if issparse(s) else s.flatten() mask = find_extreme( s, @@ -1660,10 +1660,10 @@ def fit_gamma_stochastic( all_r2: float Coefficient of determination or r square for all data points. """ - u = u.A.flatten() if issparse(u) else u.flatten() - s = s.A.flatten() if issparse(s) else s.flatten() - us = us.A.flatten() if issparse(us) else us.flatten() - ss = ss.A.flatten() if issparse(ss) else ss.flatten() + u = u.toarray().flatten() if issparse(u) else u.flatten() + s = s.toarray().flatten() if issparse(s) else s.flatten() + us = us.toarray().flatten() if issparse(us) else us.flatten() + ss = ss.toarray().flatten() if issparse(ss) else ss.flatten() mask = find_extreme( s, @@ -1795,7 +1795,7 @@ def solve_alpha_mix_std_stm(self, t, ul, beta, clusters=None, alpha_time_depende range(ul.shape[0]), desc="solving steady state alpha and induction alpha", ): - l = ul[i].A.flatten() if issparse(ul) else ul[i] + l = ul[i].toarray().flatten() if issparse(ul) else ul[i] for t_ind in np.arange(1, len(t_uniq)): alpha_stm[i, t_ind] = solve_alpha_2p( t_max - t_uniq[t_ind], @@ -1870,7 +1870,7 @@ def get_n_genes(self, key=None, data=None): else: data = self.data[key] if type(data) is list: - ret = len(data[0].A) if issparse(data[0]) else len(data[0]) + ret = len(data[0].toarray()) if issparse(data[0]) else len(data[0]) else: ret = data.shape[0] return ret diff --git a/dynamo/estimation/tsc/twostep.py b/dynamo/estimation/tsc/twostep.py index 137c9764c..bdd44dc32 100644 --- a/dynamo/estimation/tsc/twostep.py +++ b/dynamo/estimation/tsc/twostep.py @@ -37,10 +37,10 @@ def fit_slope_stochastic( zip(np.arange(n_var), S, U, US, S2), "Estimate slope k via linear regression.", ): - u = u.A.flatten() if issparse(u) else u.flatten() - s = s.A.flatten() if issparse(s) else s.flatten() - us = us.A.flatten() if issparse(us) else us.flatten() - s2 = s2.A.flatten() if issparse(s2) else s2.flatten() + u = u.toarray().flatten() if issparse(u) else u.flatten() + s = s.toarray().flatten() if issparse(s) else s.flatten() + us = us.toarray().flatten() if issparse(us) else us.flatten() + s2 = s2.toarray().flatten() if issparse(s2) else s2.flatten() mask = find_extreme(u, s, perc_left=perc_left, perc_right=perc_right) k[i] = fit_stochastic_linreg(u[mask], s[mask], us[mask], s2[mask]) @@ -77,8 +77,8 @@ def lin_reg_gamma_synthesis( zip(np.arange(n_var), R, N), "Estimate gamma via linear regression of t vs. -ln(1-K)", ): - r = r.A.flatten() if issparse(r) else r.flatten() - n = n.A.flatten() if issparse(n) else n.flatten() + r = r.toarray().flatten() if issparse(r) else r.flatten() + n = n.toarray().flatten() if issparse(n) else n.flatten() K_list[i], R2 = fit_labeling_synthesis(n, r, time, perc_right=perc_right) gamma[i], r2[i] = compute_gamma_synthesis(K_list[i], np.unique(time)) diff --git a/dynamo/external/scifate.py b/dynamo/external/scifate.py index db67a1061..12be14d4a 100644 --- a/dynamo/external/scifate.py +++ b/dynamo/external/scifate.py @@ -193,9 +193,9 @@ def adata_processing_TF_link( # normalize data (size factor correction, log transform and the scaling) if issparse(new): - new = new.A + new = new.toarray() if issparse(total): - total = total.A + total = total.toarray() new_mat = normalize_data(new, szfactors, pseudo_expr=0.1) tot_mat = normalize_data(total, szfactors, pseudo_expr=0.1) new_mat = pd.DataFrame(new_mat, index=adata.obs_names, columns=adata.var_names) @@ -207,7 +207,7 @@ def adata_processing_TF_link( var.loc[:, "gene_short_name"] = make_index_unique(var.loc[:, "gene_short_name"].astype("str")) ntr = adata.layers["new"].sum(1).A1 / adata.layers["total"].sum(1).A1 if issparse(ntr): - obs.loc[:, "labeling_rate"] = ntr.A1 + obs.loc[:, "labeling_rate"] = ntr.toarray().ravel() else: obs.loc[:, "labeling_rate"] = ntr diff --git a/dynamo/external/scribe.py b/dynamo/external/scribe.py index 518d22c6a..bae070160 100644 --- a/dynamo/external/scribe.py +++ b/dynamo/external/scribe.py @@ -252,7 +252,7 @@ def coexp_measure( pearson = np.zeros(len(genes)) X, Y = adata[:, genes].layers[layer_x].T, adata[:, genes].layers[layer_y].T - X, Y = X.A if issparse(X) else X, Y.A if issparse(Y) else Y + X, Y = X.toarray() if issparse(X) else X, Y.toarray() if issparse(Y) else Y k = min(5, int(adata.n_obs / 5 + 1)) for i in tqdm( @@ -424,7 +424,7 @@ def pool_mi(x, y, k): return mi(x, y, k) X = np.repeat(x[:, None], len(Targets), axis=1) - Y = t1_df[:, Targets] if issparse(t1_df) else t1_df[:, Targets].A + Y = t1_df[:, Targets].toarray() if issparse(t1_df) else t1_df[:, Targets] pool = ThreadPool(cores) res = pool.starmap(pool_mi, zip(X, Y, itertools.repeat(k))) pool.close() diff --git a/dynamo/plot/dynamics.py b/dynamo/plot/dynamics.py index e77296739..6871f13f2 100755 --- a/dynamo/plot/dynamics.py +++ b/dynamo/plot/dynamics.py @@ -341,8 +341,8 @@ def phase_portraits( raise ValueError("adata has no vkey {} in either the layers or the obsm slot".format(vkey)) E_vec, V_vec = ( - E_vec.A if issparse(E_vec) else E_vec, - V_vec.A if issparse(V_vec) else V_vec, + E_vec.toarray() if issparse(E_vec) else E_vec, + V_vec.toarray() if issparse(V_vec) else V_vec, ) if k_name in vel_params_df.columns: @@ -368,11 +368,11 @@ def phase_portraits( index_gene(adata, adata.layers[mapper["X_total"]], genes), ) - new_mat, tot_mat = (new_mat.A, tot_mat.A) if issparse(new_mat) else (new_mat, tot_mat) + new_mat, tot_mat = (new_mat.toarray(), tot_mat.toarray()) if issparse(new_mat) else (new_mat, tot_mat) vel_u, vel_s = ( - index_gene(adata, adata.layers["velocity_N"].A, genes), - index_gene(adata, adata.layers["velocity_T"].A, genes), + index_gene(adata, adata.layers["velocity_N"].toarray(), genes), + index_gene(adata, adata.layers["velocity_T"].toarray(), genes), ) df = pd.DataFrame( @@ -398,12 +398,12 @@ def phase_portraits( ) unspliced_mat, spliced_mat = ( - (unspliced_mat.A, spliced_mat.A) if issparse(unspliced_mat) else (unspliced_mat, spliced_mat) + (unspliced_mat.toarray(), spliced_mat.toarray()) if issparse(unspliced_mat) else (unspliced_mat, spliced_mat) ) vel_u, vel_s = ( - np.zeros_like(index_gene(adata, adata.layers["velocity_S"].A, genes)), - index_gene(adata, adata.layers["velocity_S"].A, genes), + np.zeros_like(index_gene(adata, adata.layers["velocity_S"].toarray(), genes)), + index_gene(adata, adata.layers["velocity_S"].toarray(), genes), ) df = pd.DataFrame( @@ -429,17 +429,17 @@ def phase_portraits( index_gene(adata, adata.layers[mapper["X_new"]], genes), index_gene(adata, adata.layers[mapper["X_total"]], genes), ) - U, S, N, T = (U.A, S.A, N.A, T.A) if issparse(U) else (U, S, N, T) + U, S, N, T = (U.toarray(), S.toarray(), N.toarray(), T.toarray()) if issparse(U) else (U, S, N, T) vel_u, vel_s = ( ( - index_gene(adata, adata.layers["velocity_U"].A, genes) if "velocity_U" in adata.layers.keys() else None, - index_gene(adata, adata.layers["velocity_S"].A, genes), + index_gene(adata, adata.layers["velocity_U"].toarray(), genes) if "velocity_U" in adata.layers.keys() else None, + index_gene(adata, adata.layers["velocity_S"].toarray(), genes), ) if vkey == "velocity_S" else ( - index_gene(adata, adata.layers["velocity_N"].A, genes) if "velocity_U" in adata.layers.keys() else None, - index_gene(adata, adata.layers["velocity_T"].A, genes), + index_gene(adata, adata.layers["velocity_N"].toarray(), genes) if "velocity_U" in adata.layers.keys() else None, + index_gene(adata, adata.layers["velocity_T"].toarray(), genes), ) ) if "protein" in adata.obsm.keys(): @@ -461,9 +461,9 @@ def phase_portraits( if (["X_protein"] in adata.obsm.keys() or [mapper["X_protein"]] in adata.obsm.keys()) else index_gene(adata, adata.obsm["protein"], genes) ) - P = P.A if issparse(P) else P + P = P.toarray() if issparse(P) else P if issparse(P_vec): - P_vec = P_vec.A + P_vec = P_vec.toarray() vel_p = np.zeros_like(adata.obsm["velocity_P"][:, :]) @@ -1683,16 +1683,16 @@ def dynamics( if has_splicing: tmp = ( [ - valid_adata[:, gene_name].layers["M_ul"].A.T, - valid_adata.layers["M_sl"].A.T, + valid_adata[:, gene_name].layers["M_ul"].toarray().T, + valid_adata.layers["M_sl"].toarray().T, ] if "M_ul" in valid_adata.layers.keys() else [ - valid_adata[:, gene_name].layers["ul"].A.T, - valid_adata.layers["sl"].A.T, + valid_adata[:, gene_name].layers["ul"].toarray().T, + valid_adata.layers["sl"].toarray().T, ] ) - x_data = [tmp[0].A, tmp[1].A] if issparse(tmp[0]) else tmp + x_data = [tmp[0].toarray(), tmp[1].toarray()] if issparse(tmp[0]) else tmp if log_unnormalized and "X_ul" not in valid_adata.layers.keys(): x_data = [np.log1p(tmp[0]), np.log1p(tmp[1])] @@ -1717,7 +1717,7 @@ def dynamics( if "X_new" in valid_adata.layers.keys() else valid_adata[:, gene_name].layers["new"].T ) - x_data = [tmp.A] if issparse(tmp) else [tmp] + x_data = [tmp.toarray()] if issparse(tmp) else [tmp] if log_unnormalized and "X_new" not in valid_adata.layers.keys(): x_data = [np.log1p(x_data[0])] diff --git a/dynamo/plot/markers.py b/dynamo/plot/markers.py index 96eee6491..7b57cea08 100644 --- a/dynamo/plot/markers.py +++ b/dynamo/plot/markers.py @@ -199,7 +199,7 @@ def bubble( cells_df = adata.obs.get(group) gene_df = adata[:, genes].layers[layer] - gene_df = gene_df.A if issparse(gene_df) else gene_df + gene_df = gene_df.toarray() if issparse(gene_df) else gene_df gene_df = pd.DataFrame(gene_df.T, index=genes, columns=adata.obs_names) xmin, xmax = gene_df.quantile(vmin / 100, axis=1), gene_df.quantile(vmax / 100, axis=1) diff --git a/dynamo/plot/networks.py b/dynamo/plot/networks.py index 2c6087766..5e53ff68c 100644 --- a/dynamo/plot/networks.py +++ b/dynamo/plot/networks.py @@ -103,10 +103,10 @@ def nxvizPlot( # data has to be float if cluster is not None: network.nodes[n]["size"] = ( - adata[adata.obs[cluster].isin(cluster_names), n].layers[layer].A.mean().astype(float) + adata[adata.obs[cluster].isin(cluster_names), n].layers[layer].toarray().mean().astype(float) ) else: - network.nodes[n]["size"] = adata[:, n].layers[layer].A.mean().astype(float) + network.nodes[n]["size"] = adata[:, n].layers[layer].toarray().mean().astype(float) network.nodes[n]["label"] = n for e in network.edges(): diff --git a/dynamo/plot/preprocess.py b/dynamo/plot/preprocess.py index fa3ec7c3e..8f07a2e16 100755 --- a/dynamo/plot/preprocess.py +++ b/dynamo/plot/preprocess.py @@ -734,7 +734,7 @@ def exp_by_groups( raise ValueError(f"The layer {layer} is not existed in your adata object!") exprs = adata[:, valid_genes].X if layer == "X" else adata[:, valid_genes].layers[layer] - exprs = exprs.A if issparse(exprs) else exprs + exprs = exprs.toarray() if issparse(exprs) else exprs if use_ratio: ( has_splicing, @@ -749,7 +749,7 @@ def exp_by_groups( if use_smoothed else adata[:, valid_genes].layers["X_total"] ) - tot = tot.A if issparse(tot) else tot + tot = tot.toarray() if issparse(tot) else tot exprs = exprs / tot else: exprs = exprs @@ -761,7 +761,7 @@ def exp_by_groups( if use_smoothed else adata[:, valid_genes].layers["X_unspliced"] + adata[:, valid_genes].layers["X_spliced"] ) - tot = tot.A if issparse(tot) else tot + tot = tot.toarray() if issparse(tot) else tot exprs = exprs / tot else: exprs = exprs diff --git a/dynamo/plot/scatters.py b/dynamo/plot/scatters.py index 8be93e828..893265a2f 100755 --- a/dynamo/plot/scatters.py +++ b/dynamo/plot/scatters.py @@ -2847,8 +2847,8 @@ def _map_cur_axis_to_title( else: x_points_df_data, x_points_column = _map_cur_axis_to_title(axis_x, _adata, cur_b, cur_l_smoothed) y_points_df_data, y_points_column = _map_cur_axis_to_title(axis_y, _adata, cur_b, cur_l_smoothed) - x_points_df_data = x_points_df_data.A.flatten() if issparse(x_points_df_data) else x_points_df_data - y_points_df_data = y_points_df_data.A.flatten() if issparse(y_points_df_data) else y_points_df_data + x_points_df_data = x_points_df_data.toarray().flatten() if issparse(x_points_df_data) else x_points_df_data + y_points_df_data = y_points_df_data.toarray().flatten() if issparse(y_points_df_data) else y_points_df_data points = pd.DataFrame( { axis_x: x_points_df_data, diff --git a/dynamo/plot/time_series.py b/dynamo/plot/time_series.py index 1ca8892e0..5cb2f50d4 100755 --- a/dynamo/plot/time_series.py +++ b/dynamo/plot/time_series.py @@ -117,7 +117,7 @@ def kinetic_curves( color = list(set(color).intersection(adata.obs.keys())) Color = adata.obs[color].values.T.flatten() if len(color) > 0 else np.empty((0, 1)) - exprs = exprs.A if issparse(exprs) else exprs + exprs = exprs.toarray() if issparse(exprs) else exprs if len(set(genes).intersection(valid_genes)) > 0: # by default, expression values are log1p tranformed if using the expression from adata. exprs = np.expm1(exprs) if not log else exprs @@ -310,7 +310,7 @@ def kinetic_heatmap( valid_genes = [x for x in genes if x in valid_genes] - exprs = exprs.A if issparse(exprs) else exprs + exprs = exprs.toarray() if issparse(exprs) else exprs if mode != "pseudotime": exprs = np.log1p(exprs) if log else exprs diff --git a/dynamo/plot/utils_dynamics.py b/dynamo/plot/utils_dynamics.py index 5040d6fdf..c54c0d11d 100644 --- a/dynamo/plot/utils_dynamics.py +++ b/dynamo/plot/utils_dynamics.py @@ -179,9 +179,9 @@ def plot_kin_det( ) if show_variance: if has_splicing: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() else: - Obs = X_raw[i].A.flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() + Obs = X_raw[i].toarray().flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() ax.boxplot( x=[Obs[T == std] for std in T_uniq], positions=T_uniq, @@ -391,9 +391,9 @@ def plot_kin_sto( if show_variance: if j < max_box_plots: if has_splicing: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() else: - Obs = X_raw[i].A.flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() + Obs = X_raw[i].toarray().flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() ax.boxplot( x=[Obs[T == std] for std in T_uniq], @@ -624,9 +624,9 @@ def plot_kin_mix( if show_variance: if j < max_box_plots: if has_splicing: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() else: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() ax.boxplot( x=[Obs[T == std] for std in T_uniq], @@ -874,9 +874,9 @@ def plot_kin_mix_det_sto( if show_variance: if j < max_box_plots: if has_splicing: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() else: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() ax.boxplot( x=[Obs[T == std] for std in T_uniq], @@ -1149,9 +1149,9 @@ def plot_kin_mix_sto_sto( if show_variance: if j < max_box_plots: if has_splicing: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() else: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() ax.boxplot( x=[Obs[T == std] for std in T_uniq], @@ -1375,9 +1375,9 @@ def plot_deg_det( ) if show_variance: if has_splicing: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() else: - Obs = X_raw[i].A.flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() + Obs = X_raw[i].toarray().flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() ax.boxplot( x=[Obs[T == std] for std in T_uniq], positions=T_uniq, @@ -1585,9 +1585,9 @@ def plot_deg_sto( if show_variance: if j < max_box_plots: if has_splicing: - Obs = X_raw[i][j][0].A.flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() + Obs = X_raw[i][j][0].toarray().flatten() if issparse(X_raw[i][j][0]) else X_raw[i][j][0].flatten() else: - Obs = X_raw[i].A.flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() + Obs = X_raw[i].toarray().flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() ax.boxplot( x=[Obs[T == std] for std in T_uniq], @@ -1697,8 +1697,8 @@ def plot_kin_twostep( ) r = adata[:, gene_name].layers[mapper["X_total"]] if use_smoothed else adata[:, gene_name].layers["X_total"] n = adata[:, gene_name].layers[mapper["X_new"]] if use_smoothed else adata[:, gene_name].layers["X_new"] - r = r.A.flatten() if issparse(r) else r.flatten() - n = n.A.flatten() if issparse(n) else n.flatten() + r = r.toarray().flatten() if issparse(r) else r.flatten() + n = n.toarray().flatten() if issparse(n) else n.flatten() for j in range(sub_plot_n): row_ind = int(np.floor(i / ncols)) # make sure unlabled and labeled are in the same column. @@ -1909,7 +1909,7 @@ def plot_kin_deg_twostep( X_fit_data[i][1][1], ) - Obs = X_raw[i].A.flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() + Obs = X_raw[i].toarray().flatten() if issparse(X_raw[i][0]) else X_raw[i].flatten() for j in range(sub_plot_n): row_ind = int(np.floor(i / ncols)) # make sure unlabled and labeled are in the same column. diff --git a/dynamo/prediction/state_graph.py b/dynamo/prediction/state_graph.py index 592e4499d..989e355f1 100755 --- a/dynamo/prediction/state_graph.py +++ b/dynamo/prediction/state_graph.py @@ -119,7 +119,7 @@ def prune_transition( main_info("prune vf based cell graph transition graph via g' = `M' g") # note that dmatrix will first sort the unique group names and then construct the design matrix, so this is needed. - membership_df = pd.DataFrame(membership_matrix.A > 0, index=sorted_grps, columns=sorted_grps) + membership_df = pd.DataFrame(membership_matrix.toarray() > 0, index=sorted_grps, columns=sorted_grps) M = (group_graph * (membership_df.loc[uniq_grps, uniq_grps].values > 0) > 0).astype(float) @@ -183,7 +183,7 @@ def state_graph( logger.info("KernelMarkovChain assuming column sum to be 1. Transposing transition matrix") T = T.T if sp.issparse(T): - T = T.A + T = T.toarray() dtmc = DiscreteTimeMarkovChain(P=T, eignum=eignum, check_norm=False) # ord_enc = OrdinalEncoder() @@ -193,7 +193,7 @@ def state_graph( for i, grp in enumerate(uniq_grp): labels[groups == grp] = i - grp_graph = dtmc.lump(labels).T if method == "markov" else dtmc.naive_lump(T.A, labels).T + grp_graph = dtmc.lump(labels).T if method == "markov" else dtmc.naive_lump(T.toarray(), labels).T label_len, grp_avg_time = len(np.unique(labels)), None grp_graph = grp_graph[:label_len, :label_len] diff --git a/dynamo/prediction/trajectory_analysis.py b/dynamo/prediction/trajectory_analysis.py index 8013a55e6..39364c2d4 100644 --- a/dynamo/prediction/trajectory_analysis.py +++ b/dynamo/prediction/trajectory_analysis.py @@ -155,7 +155,7 @@ def mean_first_passage_time( traj_id = adata.obs[traj_key] if sp.issparse(X): - X = X.A + X = X.toarray() trajs = [] for traj_i in np.unique(traj_id): diff --git a/dynamo/preprocessing/QC.py b/dynamo/preprocessing/QC.py index 7f2a113c5..4d370c169 100644 --- a/dynamo/preprocessing/QC.py +++ b/dynamo/preprocessing/QC.py @@ -299,7 +299,7 @@ def filter_cells_by_highly_variable_genes( filter_bool = adata.var[high_var_genes_key].values X = DKM.select_layer_data(adata, layer=select_genes_layer) - X = X[:, filter_bool].A if issparse(X) else X[:, filter_bool] + X = X[:, filter_bool].toarray() if issparse(X) else X[:, filter_bool] nan_columns_index = np.where(np.sum(X, axis=1) == 0)[0] adata.obs[obs_store_key].iloc[nan_columns_index] = False diff --git a/dynamo/preprocessing/cell_cycle.py b/dynamo/preprocessing/cell_cycle.py index 540f9dd52..48764920e 100644 --- a/dynamo/preprocessing/cell_cycle.py +++ b/dynamo/preprocessing/cell_cycle.py @@ -48,8 +48,8 @@ def group_corr(adata: anndata.AnnData, layer: Union[str, None], gene_list: list) avg_exp = expression_matrix.mean(axis=1) cor = ( einsum_correlation( - np.array(expression_matrix.A.T, dtype="float"), - np.array(avg_exp.A1, dtype="float"), + np.array(expression_matrix.toarray().T, dtype="float"), + np.array(avg_exp.toarray().ravel(), dtype="float"), ) if issparse(expression_matrix) else einsum_correlation( diff --git a/dynamo/preprocessing/deprecated.py b/dynamo/preprocessing/deprecated.py index 00cfbbd7f..79c655cc2 100644 --- a/dynamo/preprocessing/deprecated.py +++ b/dynamo/preprocessing/deprecated.py @@ -209,7 +209,7 @@ def _disp_calc_helper_NB_legacy( nzGenes = (rounded > lowerDetectedLimit).sum(axis=0) nzGenes = nzGenes > min_cells_detected - nzGenes = nzGenes.A1 if issparse(rounded) else nzGenes + nzGenes = nzGenes.toarray().ravel() if issparse(rounded) else nzGenes if layer.startswith("X_"): x = rounded[:, nzGenes] else: @@ -756,7 +756,7 @@ def _normalize_cell_expr_by_size_factors_legacy( n_feature = CM.shape[1] for i in range(CM.shape[0]): - x = CM[i].A if issparse(CM) else CM[i] + x = CM[i].toarray() if issparse(CM) else CM[i] res = np.log1p(x / (np.exp(np.nansum(np.log1p(x[x > 0])) / n_feature))) res[np.isnan(res)] = 0 # res[res > 100] = 100 diff --git a/dynamo/preprocessing/external/sctransform.py b/dynamo/preprocessing/external/sctransform.py index 3bbc12fae..745c1ce0f 100644 --- a/dynamo/preprocessing/external/sctransform.py +++ b/dynamo/preprocessing/external/sctransform.py @@ -109,7 +109,7 @@ def _parallel_init( def _parallel_wrapper(j: int) -> None: """Helper function to fit Poisson regression to UMI counts.""" name = gn[genes_bin_regress[j]] - y = umi_bin[:, j].A.flatten() + y = umi_bin[:, j].toarray().flatten() pr = statsmodels.discrete.discrete_model.Poisson(y, mm) res = pr.fit(disp=False) mu = res.predict() @@ -140,7 +140,7 @@ def gmean( assert np.all(X.sum(0) > 0) assert np.all(X.data > 0) X.data[:] = np.log(X.data + eps) - res = np.exp(X.mean(axis).A.flatten()) - eps + res = np.exp(X.mean(axis).toarray().flatten()) - eps assert np.all(res > 0) return res @@ -230,7 +230,7 @@ def sctransform_core( X.eliminate_zeros() gene_names = np.array(list(adata.var_names)) cell_names = np.array(list(adata.obs_names)) - genes_cell_count = X.sum(0).A.flatten() + genes_cell_count = X.sum(0).toarray().flatten() genes = np.where(genes_cell_count >= min_cells)[0] genes_ix = genes.copy() @@ -243,7 +243,7 @@ def sctransform_core( # sample by n_cells, or use all cells if n_cells is not None and n_cells < X.shape[0]: cells_step1 = np.sort(np.random.choice(X.shape[0], replace=False, size=n_cells)) - genes_cell_count_step1 = X[cells_step1].sum(0).A.flatten() + genes_cell_count_step1 = X[cells_step1].sum(0).toarray().flatten() genes_step1 = np.where(genes_cell_count_step1 >= min_cells)[0] genes_log_gmean_step1 = np.log10(gmean(X[cells_step1][:, genes_step1], axis=0, eps=gmean_eps)) else: @@ -251,11 +251,11 @@ def sctransform_core( genes_step1 = genes genes_log_gmean_step1 = genes_log_gmean - umi = X.sum(1).A.flatten() + umi = X.sum(1).toarray().flatten() log_umi = np.log10(umi) X2 = X.copy() X2.data[:] = 1 - gene = X2.sum(1).A.flatten() + gene = X2.sum(1).toarray().flatten() log_gene = np.log10(gene) umi_per_gene = umi / gene log_umi_per_gene = np.log10(umi_per_gene) @@ -443,5 +443,5 @@ def sctransform( X_squared = adata.X.copy() X_squared.data **= 2 variance = X_squared.mean(0) - np.square(adata.X.mean(0)) - adata.var["sct_score"] = variance.A1 + adata.var["sct_score"] = variance.toarray().ravel() if sp.issparse(variance) else variance.ravel() adata.var["use_for_pca"] = get_gene_selection_filter(adata.var["sct_score"], n_top_genes=n_top_genes) diff --git a/dynamo/preprocessing/gene_selection.py b/dynamo/preprocessing/gene_selection.py index 3016e725c..305b3208d 100644 --- a/dynamo/preprocessing/gene_selection.py +++ b/dynamo/preprocessing/gene_selection.py @@ -58,7 +58,7 @@ def calc_Gini(adata: AnnData, layers: Union[Literal["all"], List[str]] = "all") def _compute_gini(CM): # convert to dense array if sparse if issparse(CM): - CM = CM.A + CM = CM.toarray() # shift all values to be non-negative CM -= np.min(CM) @@ -373,12 +373,12 @@ def get_mean_cv( elif algorithm == "cv_dispersion": if winsorize: down, up = ( - np.percentile(valid_CM.A, winsor_perc, 0) + np.percentile(valid_CM.toarray(), winsor_perc, 0) if issparse(valid_CM) else np.percentile(valid_CM, winsor_perc, 0) ) Sfw = ( - np.clip(valid_CM.A, down[None, :], up[None, :]) + np.clip(valid_CM.toarray(), down[None, :], up[None, :]) if issparse(valid_CM) else np.percentile(valid_CM, winsor_perc, 0) ) diff --git a/dynamo/preprocessing/normalization.py b/dynamo/preprocessing/normalization.py index ad2dc9b8a..245cda265 100644 --- a/dynamo/preprocessing/normalization.py +++ b/dynamo/preprocessing/normalization.py @@ -336,7 +336,7 @@ def normalize( n_feature = CM.shape[1] for i in range(CM.shape[0]): - x = CM[i].A if issparse(CM) else CM[i] + x = CM[i].toarray() if issparse(CM) else CM[i] res = np.log1p(x / (np.exp(np.nansum(np.log1p(x[x > 0])) / n_feature))) res[np.isnan(res)] = 0 # res[res > 100] = 100 diff --git a/dynamo/preprocessing/utils.py b/dynamo/preprocessing/utils.py index 766501383..e951980a0 100755 --- a/dynamo/preprocessing/utils.py +++ b/dynamo/preprocessing/utils.py @@ -885,13 +885,13 @@ def relative2abs( if not logged: X, X_ercc = ( - np.log1p(X.A) if issparse(X_ercc) else np.log1p(X), - np.log1p(X_ercc.A) if issparse(X_ercc) else np.log1p(X_ercc), + np.log1p(X.toarray()) if issparse(X_ercc) else np.log1p(X), + np.log1p(X_ercc.toarray()) if issparse(X_ercc) else np.log1p(X_ercc), ) else: X, X_ercc = ( - X.A if issparse(X_ercc) else X, - X_ercc.A if issparse(X_ercc) else X_ercc, + X.toarray() if issparse(X_ercc) else X, + X_ercc.toarray() if issparse(X_ercc) else X_ercc, ) y = np.log1p(ERCC_annotation["numMolecules"]) @@ -908,9 +908,9 @@ def relative2abs( logged = False if X.max() > 100 else True if not logged: - X_i = np.log1p(X[i, :].A) if issparse(X) else np.log1p(X[i, :]) + X_i = np.log1p(X[i, :].toarray()) if issparse(X) else np.log1p(X[i, :]) else: - X_i = X[i, :].A if issparse(X) else X[i, :] + X_i = X[i, :].toarray() if issparse(X) else X[i, :] res = k * X_i + b res = res if logged else np.expm1(res) @@ -921,9 +921,9 @@ def relative2abs( logged = False if X.max() > 100 else True if not logged: - X_i = np.log1p(X[i, :].A) if issparse(X) else np.log1p(X[i, :]) + X_i = np.log1p(X[i, :].toarray()) if issparse(X) else np.log1p(X[i, :]) else: - X_i = X[i, :].A if issparse(X) else X[i, :] + X_i = X[i, :].toarray() if issparse(X) else X[i, :] res = k * X_i + b if logged else np.expm1(k * X_i + b) adata.layers[cur_layer][i, :] = csr_matrix(res) if issparse(X) else res diff --git a/dynamo/tools/Markov.py b/dynamo/tools/Markov.py index c54b34674..99415c4c6 100755 --- a/dynamo/tools/Markov.py +++ b/dynamo/tools/Markov.py @@ -516,7 +516,7 @@ def compute_drift(self, X: np.ndarray, num_prop: int = 1, scale: bool = True) -> V = np.zeros_like(X) P = self.propagate_P(int(num_prop)) for i in tqdm(range(n), desc="compute drift"): - V[i] = (X - X[i]).T.dot(P[:, i].A.flatten()) + V[i] = (X - X[i]).T.dot(P[:, i].toarray().flatten()) return V * 1 / V.max() if scale else V def compute_density_corrected_drift( @@ -553,7 +553,7 @@ def compute_density_corrected_drift( D = X[Idx] - X[i] if normalize_vector: D = D / np.linalg.norm(D, 1) - p = P[Idx, i].A.flatten() + p = P[Idx, i].toarray().flatten() if k is None: if not correct_by_mean: k_inv = 1 / len(Idx) diff --git a/dynamo/tools/cell_velocities.py b/dynamo/tools/cell_velocities.py index baff9cb89..222aaa96f 100755 --- a/dynamo/tools/cell_velocities.py +++ b/dynamo/tools/cell_velocities.py @@ -323,8 +323,8 @@ def cell_velocities( "min_gamma thresholds." ) - V = V.A if sp.issparse(V) else V - X = X.A if sp.issparse(X) else X + V = V.toarray() if sp.issparse(V) else V + X = X.toarray() if sp.issparse(X) else X finite_inds = get_finite_inds(V) if sum(finite_inds) != X.shape[1]: @@ -549,7 +549,7 @@ def cell_velocities( X_pca, pca_PCs = adata.obsm[DKM.X_PCA], adata.uns["PCs"] V = adata[:, adata.var.use_for_dynamics.values].layers[vkey] if vkey in adata.layers.keys() else None - CM, V = CM.A if sp.issparse(CM) else CM, V.A if sp.issparse(V) else V + CM, V = CM.toarray() if sp.issparse(CM) else CM, V.toarray() if sp.issparse(V) else V V[np.isnan(V)] = 0 Y_pca = expr_to_pca(CM + V, PCs=pca_PCs, mean=(CM + V).mean(0)) @@ -964,7 +964,7 @@ def kernels_from_velocyto_scvelo( if neg_cells_trick: G, G_ = G - confidence, ub_confidence = G.max(1).A.flatten(), np.percentile(G.max(1).A.flatten(), 98) + confidence, ub_confidence = G.max(1).toarray().flatten(), np.percentile(G.max(1).toarray().flatten(), 98) dig_p = np.clip(ub_confidence - confidence, 0, 1) G.setdiag(dig_p) diff --git a/dynamo/tools/connectivity.py b/dynamo/tools/connectivity.py index 5a8fd0e9c..a3fe7cf25 100755 --- a/dynamo/tools/connectivity.py +++ b/dynamo/tools/connectivity.py @@ -57,13 +57,13 @@ def adj_to_knn(adj: np.ndarray, n_neighbors: int = 15) -> Tuple[np.ndarray, np.n cur_n_neighbors = len(cur_neighbors[1]) if cur_n_neighbors > n_neighbors - 1: - sorted_indices = np.argsort(adj[cur_cell][:, cur_neighbors[1]].A)[0][: (n_neighbors - 1)] + sorted_indices = np.argsort(adj[cur_cell][:, cur_neighbors[1]].toarray())[0][: (n_neighbors - 1)] idx[cur_cell, 1:] = cur_neighbors[1][sorted_indices] - wgt[cur_cell, 1:] = adj[cur_cell][0, cur_neighbors[1][sorted_indices]].A + wgt[cur_cell, 1:] = adj[cur_cell][0, cur_neighbors[1][sorted_indices]].toarray() else: idx_ = np.arange(1, (cur_n_neighbors + 1)) idx[cur_cell, idx_] = cur_neighbors[1] - wgt[cur_cell, idx_] = adj[cur_cell][:, cur_neighbors[1]].A + wgt[cur_cell, idx_] = adj[cur_cell][:, cur_neighbors[1]].toarray() return idx, wgt diff --git a/dynamo/tools/deprecated.py b/dynamo/tools/deprecated.py index 3fef4f922..4075303c7 100644 --- a/dynamo/tools/deprecated.py +++ b/dynamo/tools/deprecated.py @@ -1653,7 +1653,7 @@ def __init__(self, adata, time_key="Time", has_nan=False): for g in tqdm(range(ng), desc="calculating 1/2 moments"): tmp = self[:, g].layers["new"] L = ( - np.array(tmp.A, dtype=float) if issparse(tmp) else np.array(tmp, dtype=float) + np.array(tmp.toarray(), dtype=float) if issparse(tmp) else np.array(tmp, dtype=float) ) # consider using the `adata.obs_vector`, `adata.var_vector` methods or accessing the array directly. if has_nan: self.M[g] = strat_mom(L, self.times, np.nanmean) diff --git a/dynamo/tools/dynamics.py b/dynamo/tools/dynamics.py index 490b574b0..6b0a46913 100755 --- a/dynamo/tools/dynamics.py +++ b/dynamo/tools/dynamics.py @@ -420,7 +420,7 @@ def dynamics( enumerate(L), desc=f"sanity check of {experiment_type} experiment data:", ): - cur_L = cur_L.A.flatten() if issparse(cur_L) else cur_L.flatten() + cur_L = cur_L.toarray().flatten() if issparse(cur_L) else cur_L.flatten() y = strat_mom(cur_L, t, np.nanmean) slope, _ = fit_linreg(t_uniq, y, intercept=True, r2=False) valid_gene_checker[L_iter] = ( @@ -1591,7 +1591,7 @@ def kinetic_model( if model.lower() == "mixture": cur_X_data = np.vstack([X[i_layer][i_gene] for i_layer in range(len(X))]) if issparse(X_raw[0]): - cur_X_raw = np.hstack([X_raw[i_layer][:, i_gene].A for i_layer in range(len(X))]) + cur_X_raw = np.hstack([X_raw[i_layer][:, i_gene].toarray() for i_layer in range(len(X))]) else: cur_X_raw = np.hstack([X_raw[i_layer][:, i_gene] for i_layer in range(len(X))]) else: @@ -1599,7 +1599,7 @@ def kinetic_model( cur_X_raw = X_raw[i_gene] if issparse(cur_X_raw[0, 0]): - cur_X_raw = np.hstack((cur_X_raw[0, 0].A, cur_X_raw[1, 0].A)) + cur_X_raw = np.hstack((cur_X_raw[0, 0].toarray(), cur_X_raw[1, 0].toarray())) _, cost[i_gene] = estm.auto_fit(np.unique(time), cur_X_data) ( @@ -1654,7 +1654,7 @@ def kinetic_model( Estm[i_gene] = estm.export_parameters() if issparse(cur_X_raw[0, 0]): - cur_X_raw = np.hstack((cur_X_raw[0, 0].A, cur_X_raw[1, 0].A)) + cur_X_raw = np.hstack((cur_X_raw[0, 0].toarray(), cur_X_raw[1, 0].toarray())) # model_1, kinetic_parameters, mix_x0 = estm.export_dictionary().values() # tmp = list(kinetic_parameters.values()) # tmp.extend(mix_x0) diff --git a/dynamo/tools/markers.py b/dynamo/tools/markers.py index 82c783fb9..ec1e1ff64 100755 --- a/dynamo/tools/markers.py +++ b/dynamo/tools/markers.py @@ -131,7 +131,7 @@ def moran_i( if local_moran: l_moran = np.zeros(X_data.shape) for cur_g in tqdm(range(gene_num), desc="Moran’s I Global Autocorrelation Statistic"): - cur_X = X_data[:, cur_g].A if sparse else X_data[:, cur_g] + cur_X = X_data[:, cur_g].toarray() if sparse else X_data[:, cur_g] mbi = explore.esda.moran.Moran(cur_X, W, two_tailed=False) Moran_I[cur_g] = mbi.I @@ -395,7 +395,7 @@ def two_groups_degs( for i_gene, gene in tqdm(enumerate(genes), desc="identifying top markers for each group"): rbc, specificity_, mw_p, log_fc, ncells = 0, 0, 1, 0, 0 - all_vals = X_data[:, i_gene].A if sparse else X_data[:, i_gene] + all_vals = X_data[:, i_gene].toarray() if sparse else X_data[:, i_gene] test_vals = all_vals[test_cells] perc, ef = [len(test_vals.nonzero()[0]) / n_cells], len(test_vals.nonzero()[0]) / num_test_cells if ef < exp_frac_thresh: @@ -716,7 +716,7 @@ def glm_degs( enumerate(genes), "Detecting time dependent genes via Generalized Additive Models (GAMs)", ): - expression = X_data[:, i].A if sparse else X_data[:, i] + expression = X_data[:, i].toarray() if sparse else X_data[:, i] df_factors["expression"] = expression deg_df.iloc[i, :] = diff_test_helper(df_factors, fullModelFormulaStr, reducedModelFormulaStr) diff --git a/dynamo/tools/metric_velocity.py b/dynamo/tools/metric_velocity.py index cbd8a54c2..6b335eb33 100755 --- a/dynamo/tools/metric_velocity.py +++ b/dynamo/tools/metric_velocity.py @@ -123,9 +123,9 @@ def cell_wise_confidence( range(adata.n_obs), desc="calculating hybrid method (jaccard + consensus) based cell wise confidence", ): - neigh_ids = np.where(intersect_[i].A)[0] if issparse(intersect_) else np.where(intersect_[i])[0] + neigh_ids = np.where(intersect_[i].toarray())[0] if issparse(intersect_) else np.where(intersect_[i])[0] confidence[i] = ( - jac[i] * np.mean([consensus(V[i].A.flatten(), V[j].A.flatten()) for j in neigh_ids]) + jac[i] * np.mean([consensus(V[i].toarray().flatten(), V[j].toarray().flatten()) for j in neigh_ids]) if issparse(V) else jac[i] * np.mean([consensus(V[i].flatten(), V[j].flatten()) for j in neigh_ids]) ) @@ -140,7 +140,7 @@ def cell_wise_confidence( ): neigh_ids = indices[i] confidence[i] = ( - np.mean([einsum_correlation(V[i].A, V[j].A.flatten(), type="cosine")[0, 0] for j in neigh_ids]) + np.mean([einsum_correlation(V[i].toarray(), V[j].toarray().flatten(), type="cosine")[0, 0] for j in neigh_ids]) if issparse(V) else np.mean( [einsum_correlation(V[i][None, :], V[j].flatten(), type="cosine")[0, 0] for j in neigh_ids] @@ -157,7 +157,7 @@ def cell_wise_confidence( ): neigh_ids = indices[i] confidence[i] = ( - np.mean([consensus(V[i].A.flatten(), V[j].A.flatten()) for j in neigh_ids]) + np.mean([consensus(V[i].toarray().flatten(), V[j].toarray().flatten()) for j in neigh_ids]) if issparse(V) else np.mean([consensus(V[i], V[j].flatten()) for j in neigh_ids]) ) @@ -173,7 +173,7 @@ def cell_wise_confidence( ): neigh_ids = indices[i] confidence[i] = ( - np.mean([einsum_correlation(V[i].A, V[j].A.flatten(), type="pearson")[0, 0] for j in neigh_ids]) + np.mean([einsum_correlation(V[i].toarray(), V[j].toarray().flatten(), type="pearson")[0, 0] for j in neigh_ids]) if issparse(V) else np.mean( [einsum_correlation(V[i][None, :], V[j].flatten(), type="pearson")[0, 0] for j in neigh_ids] @@ -295,8 +295,8 @@ def gene_wise_confidence( desc="calculating gene velocity vectors confidence based on phase " "portrait location with priors of progenitor/mature cell types", ): - all_vals = X_data[:, i_gene].A if sparse else X_data[:, i_gene] - all_vals_v = V_data[:, i_gene].A if sparse_v else V_data[:, i_gene] + all_vals = X_data[:, i_gene].toarray() if sparse else X_data[:, i_gene] + all_vals_v = V_data[:, i_gene].toarray() if sparse_v else V_data[:, i_gene] for progenitors_groups, mature_cells_groups in lineage_dict.items(): progenitors_groups = [progenitors_groups] diff --git a/dynamo/tools/moments.py b/dynamo/tools/moments.py index 83079fd6c..cfbec5cdf 100755 --- a/dynamo/tools/moments.py +++ b/dynamo/tools/moments.py @@ -377,7 +377,7 @@ def prepare_data_deterministic( x_layer = adata[:, genes].layers[layer] if return_ntr: T_genes = adata[:, genes].layers[get_layer_pair(layer)] - T_genes = T_genes.A if issparse(T_genes) else T_genes + T_genes = T_genes.toarray() if issparse(T_genes) else T_genes x_layer = x_layer / (T_genes + 1e-5) else: x_layer = x_layer - adata[:, genes].layers[get_layer_pair(layer)] @@ -443,7 +443,7 @@ def prepare_data_deterministic( if return_ntr: T_genes = adata[:, genes].layers[t_layer_key] - T_genes = T_genes.A if issparse(T_genes) else T_genes + T_genes = T_genes.toarray() if issparse(T_genes) else T_genes x_layer = (x_layer - y_layer) / (T_genes + 1e-5) else: x_layer = x_layer - y_layer @@ -454,7 +454,7 @@ def prepare_data_deterministic( if return_ntr: T_genes = adata[:, genes].layers[total_layer] - T_genes = T_genes.A if issparse(T_genes) else T_genes + T_genes = T_genes.toarray() if issparse(T_genes) else T_genes x_layer = adata[:, genes].layers[layer] / (T_genes + 1e-5) else: x_layer = adata[:, genes].layers[layer] @@ -491,7 +491,7 @@ def prepare_data_deterministic( pseudo_expr=0, norm_method=None, ) - total_layer = total_layer.A if issparse(total_layer) else total_layer + total_layer = total_layer.toarray() if issparse(total_layer) else total_layer x_layer /= total_layer + 1e-5 if log: if issparse(x_layer): @@ -640,7 +640,7 @@ def prepare_data_has_splicing( for i, g in enumerate(genes): if return_ntr: - T_i = T[:, i].A if issparse(T[:, i]) else T[:, i] + T_i = T[:, i].toarray() if issparse(T[:, i]) else T[:, i] u = U[:, i] / (T_i + 1e-5) s = S[:, i] / (T_i + 1e-5) else: @@ -759,7 +759,7 @@ def prepare_data_no_splicing( for i, g in enumerate(genes): if return_ntr: - T_i = T[:, i].A if issparse(T[:, i]) else T[:, i] + T_i = T[:, i].toarray() if issparse(T[:, i]) else T[:, i] u, t = U[:, i] / (T_i + 1e-5), T[:, i] else: u, t = U[:, i], T[:, i] @@ -1137,7 +1137,7 @@ def calc_12_mom_labeling( for i in range(data.shape[0]): data_ = ( - np.array(data[i].A.flatten(), dtype=float) if issparse(data) else np.array(data[i], dtype=float) + np.array(data[i].toarray().flatten(), dtype=float) if issparse(data) else np.array(data[i], dtype=float) ) # consider using the `adata.obs_vector`, `adata.var_vector` methods or accessing the array directly. m[i] = strat_mom(data_, t, np.nanmean) if calculate_2_mom: @@ -1189,7 +1189,7 @@ def strat_mom(arr: Union[np.ndarray, csr_matrix], strata: np.ndarray, fcn_mom: C The momentum for each stratum. """ - arr = arr.A if issparse(arr) else arr + arr = arr.toarray() if issparse(arr) else arr x = stratify(arr, strata) return np.array([fcn_mom(y) for y in x]) diff --git a/dynamo/tools/pseudotime_velocity.py b/dynamo/tools/pseudotime_velocity.py index 7b18b6311..aae045ae6 100644 --- a/dynamo/tools/pseudotime_velocity.py +++ b/dynamo/tools/pseudotime_velocity.py @@ -119,7 +119,7 @@ def pseudotime_velocity( adata.obsm[velocity_key] = delta_x logger.info("Use pseudotime transition matrix to learn gene-wise velocity vectors.") - delta_X = projection_with_transition_matrix(T, adata.layers[ekey].A, True) + delta_X = projection_with_transition_matrix(T, adata.layers[ekey].toarray(), True) logger.info_insert_adata(vkey, "layers") adata.layers[vkey] = csr_matrix(delta_X) diff --git a/dynamo/tools/recipes.py b/dynamo/tools/recipes.py index 5ebe839b3..7fa448742 100644 --- a/dynamo/tools/recipes.py +++ b/dynamo/tools/recipes.py @@ -150,7 +150,7 @@ def recipe_one_shot_data( # then we want to calculate moments for spliced and unspliced layers based on connectivity graph from spliced # data. # first get X_spliced based pca embedding - CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].A) + CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].toarray()) cm_genesums = CM.sum(axis=0) valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0) valid_ind = np.array(valid_ind).flatten() @@ -314,7 +314,7 @@ def recipe_kin_data( # then we want to calculate moments for spliced and unspliced layers based on connectivity graph from spliced # data. # first get X_spliced based pca embedding - CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].A) + CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].toarray()) cm_genesums = CM.sum(axis=0) valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0) valid_ind = np.array(valid_ind).flatten() @@ -465,7 +465,7 @@ def recipe_deg_data( # then calculate moments for labeling data relevant layers using total based connectivity graph # first get X_total based pca embedding - CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_total"].A) + CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_total"].toarray()) cm_genesums = CM.sum(axis=0) valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0) valid_ind = np.array(valid_ind).flatten() @@ -635,7 +635,7 @@ def recipe_mix_kin_deg_data( # then we want to calculate moments for spliced and unspliced layers based on connectivity graph from spliced # data. # first get X_spliced based pca embedding - CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].A) + CM = np.log1p(adata[:, adata.var.use_for_pca].layers["X_spliced"].toarray()) cm_genesums = CM.sum(axis=0) valid_ind = np.logical_and(np.isfinite(cm_genesums), cm_genesums != 0) valid_ind = np.array(valid_ind).flatten() diff --git a/dynamo/tools/utils.py b/dynamo/tools/utils.py index 9abae007a..fb4d402db 100755 --- a/dynamo/tools/utils.py +++ b/dynamo/tools/utils.py @@ -495,7 +495,7 @@ def flatten(arr: Union[pd.Series, sp.csr_matrix, np.ndarray]) -> np.ndarray: if type(arr) == pd.core.series.Series: ret = arr.values.flatten() elif sp.issparse(arr): - ret = arr.A.flatten() + ret = arr.toarray().flatten() else: ret = arr.flatten() return ret @@ -1184,7 +1184,7 @@ def _one_shot_gamma_alpha_matrix( Returns: A tuple containing the gamma and alpha parameters. """ - N, R = N.A.T, R.A.T + N, R = N.toarray().T, R.toarray().T K = np.array(K) tau = tau[0] Kc = np.clip(K, 0, 1 - 1e-3) @@ -2482,7 +2482,7 @@ def find_extreme( Returns: A boolean mask identifying the extreme regions based on the provided percentiles. """ - s, u = (s.A if sp.issparse(s) else s, u.A if sp.issparse(u) else u) + s, u = (s.toarray() if sp.issparse(s) else s, u.toarray() if sp.issparse(u) else u) if normalize: su = s / np.clip(np.max(s), 1e-3, None) diff --git a/dynamo/vectorfield/rank_vf.py b/dynamo/vectorfield/rank_vf.py index b8f731a0b..0332d23fb 100644 --- a/dynamo/vectorfield/rank_vf.py +++ b/dynamo/vectorfield/rank_vf.py @@ -97,7 +97,7 @@ def rank_genes( var_names = np.array(index_gene(adata, adata.var_names, genes)) for g, arr in arr_dict.items(): if ismatrix(arr): - arr = arr.A.flatten() + arr = arr.toarray().flatten() glst, sarr = list_top_genes(arr, var_names, None, return_sorted_array=True) # ret_dict[g] = {glst[i]: sarr[i] for i in range(len(glst))} ret_dict[g] = glst @@ -164,7 +164,7 @@ def rank_cell_groups( cell_names = np.array(adata.obs_names) for g, arr in arr_dict.items(): if ismatrix(arr): - arr = arr.A.flatten() + arr = arr.toarray().flatten() glst, sarr = list_top_genes(arr, cell_names, None, return_sorted_array=True) # ret_dict[g] = {glst[i]: sarr[i] for i in range(len(glst))} ret_dict[g] = glst diff --git a/dynamo/vectorfield/scVectorField.py b/dynamo/vectorfield/scVectorField.py index 8d185e796..6766a5809 100755 --- a/dynamo/vectorfield/scVectorField.py +++ b/dynamo/vectorfield/scVectorField.py @@ -286,8 +286,8 @@ def construct_v(X, i, idx, n_int_steps, func, distance_free, dist, D, n): """helper function for parallism""" V = sp.csr_matrix((n, n)) - x = X[i].A if sp.issparse(X) else X[i] - Y = X[idx[1:]].A if sp.issparse(X) else X[idx[1:]] + x = X[i].toarray() if sp.issparse(X) else X[i] + Y = X[idx[1:]].toarray() if sp.issparse(X) else X[idx[1:]] for j, y in enumerate(Y): pts = np.linspace(x, y, n_int_steps) v = func(pts) diff --git a/dynamo/vectorfield/topography.py b/dynamo/vectorfield/topography.py index c9c6caafc..1667d5f7b 100755 --- a/dynamo/vectorfield/topography.py +++ b/dynamo/vectorfield/topography.py @@ -485,7 +485,7 @@ def get_Xss_confidence(self, k: Optional[int] = 50) -> np.ndarray: an (n,) array of confidences for the fixed points """ X = self.X_data - X = X.A if sp.issparse(X) else X + X = X.toarray() if sp.issparse(X) else X Xss = self.Xss.get_X() Xref = np.median(X, 0) Xss = np.vstack((Xss, Xref)) diff --git a/tests/test_data_io.py b/tests/test_data_io.py index d7c553ab0..18282d5d0 100644 --- a/tests/test_data_io.py +++ b/tests/test_data_io.py @@ -75,9 +75,9 @@ def test_save_adata(): def alpha_minus_gamma_s(new, gamma, t, M_s): # equation: alpha = new / (1 - e^{-rt}) * r - alpha = new.A.T / (1 - np.exp(-gamma.values[:, None] * t.values[None, :])) * gamma.values[:, None] + alpha = new.toarray().T / (1 - np.exp(-gamma.values[:, None] * t.values[None, :])) * gamma.values[:, None] - gamma_s = gamma.values[:, None] * M_s.A.T + gamma_s = gamma.values[:, None] * M_s.toarray().T alpha_minus_gamma_s = alpha - gamma_s return alpha_minus_gamma_s diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index 54a5610f1..d5852f306 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -131,8 +131,8 @@ def test_recipe_monocle_feature_selection_layer_simple0(): # rpe1_kinetics = dyn.pp.recipe_monocle(rpe1_kinetics, n_top_genes=1000, total_layers=False, copy=True) dyn.pp.recipe_monocle(rpe1_kinetics, n_top_genes=20, total_layers=False, feature_selection_layer="new") - assert np.all(rpe1_kinetics.X.A == rpe1_kinetics.X.A) - assert not np.all(rpe1_kinetics.layers["new"].A != rpe1_kinetics.layers["new"].A) + assert np.all(rpe1_kinetics.X.toarray() == rpe1_kinetics.X.toarray()) + assert not np.all(rpe1_kinetics.layers["new"].toarray() != rpe1_kinetics.layers["new"].toarray()) def test_calc_dispersion_sparse(): @@ -530,19 +530,19 @@ def test_transform(): adata1 = log1p(adata.copy()) assert not np.all(adata1.X == adata.X) - assert np.all(adata1.layers["spliced"].A == adata.layers["spliced"].A) + assert np.all(adata1.layers["spliced"].toarray() == adata.layers["spliced"].toarray()) adata2 = log(adata.copy()) assert not np.all(adata2.X == adata.X) - assert np.all(adata2.layers["spliced"].A == adata.layers["spliced"].A) + assert np.all(adata2.layers["spliced"].toarray() == adata.layers["spliced"].toarray()) adata3 = log2(adata.copy()) assert not np.all(adata3.X == adata.X) - assert np.all(adata3.layers["spliced"].A == adata.layers["spliced"].A) + assert np.all(adata3.layers["spliced"].toarray() == adata.layers["spliced"].toarray()) adata4 = Freeman_Tukey(adata.copy()) assert not np.all(adata3.X == adata.X) - assert np.all(adata4.layers["spliced"].A == adata.layers["spliced"].A) + assert np.all(adata4.layers["spliced"].toarray() == adata.layers["spliced"].toarray()) def test_normalize(): diff --git a/tests/test_tl.py b/tests/test_tl.py index ea3df3091..01ff5ceab 100644 --- a/tests/test_tl.py +++ b/tests/test_tl.py @@ -194,23 +194,23 @@ def test_graph_calculus_and_operators(): res_op = dyn.tools.graph_operators.gradop(graph) res_calc = dyn.tools.graph_calculus.gradop(adj) - assert np.allclose(res_op.A, res_calc.A) + assert np.allclose(res_op.toarray(), res_calc.toarray()) res_op = dyn.tools.graph_operators.divop(graph) res_calc = dyn.tools.graph_calculus.divop(adj) - assert np.allclose(res_op.A, 2 * res_calc.A) + assert np.allclose(res_op.toarray(), 2 * res_calc.toarray()) res_op = dyn.tools.graph_operators.potential(graph) - res_calc = dyn.tools.graph_calculus.potential(adj.A, method="lsq") + res_calc = dyn.tools.graph_calculus.potential(adj.toarray(), method="lsq") assert np.allclose(res_op, res_calc * 2) - potential_lsq = dyn.tools.graph_calculus.potential(adj.A, method="lsq") + potential_lsq = dyn.tools.graph_calculus.potential(adj.toarray(), method="lsq") res_op = dyn.tools.graph_operators.grad(graph) - res_calc = dyn.tools.graph_calculus.gradient(adj.A, p=potential_lsq) + res_calc = dyn.tools.graph_calculus.gradient(adj.toarray(), p=potential_lsq) assert np.allclose(res_op, 2 * res_calc) res_op = dyn.tools.graph_operators.div(graph) - res_calc = dyn.tools.graph_calculus.divergence(adj.A) + res_calc = dyn.tools.graph_calculus.divergence(adj.toarray()) assert np.allclose(res_op, 2 * res_calc) @@ -258,28 +258,28 @@ def test_gradop(): grad_dense = dyn.tl.graph_calculus.gradop(adj.toarray()) # Check that the matrix has the expected shape values - assert np.all(grad.A == grad_dense.A) + assert np.all(grad.toarray() == grad_dense.toarray()) assert grad.shape == (4, 3) def test_graphize_velocity_coopt(adata): E, _, _ = dyn.tools.graph_calculus.graphize_velocity( - X=adata.X.A, + X=adata.X.toarray(), V=adata.layers["velocity_S"], ) assert E.shape == (adata.n_obs, adata.n_obs) E = dyn.tools.graph_calculus.graphize_velocity_coopt( - X=adata.X.A, - V=adata.layers["velocity_S"].A, + X=adata.X.toarray(), + V=adata.layers["velocity_S"].toarray(), nbrs=adata.uns["neighbors"]["indices"], - C=adata.obsp["pearson_transition_matrix"].A, + C=adata.obsp["pearson_transition_matrix"].toarray(), ) assert E.shape == (adata.n_obs, adata.n_obs) E = dyn.tools.graph_calculus.graphize_velocity_coopt( - X=adata.X.A, - V=adata.layers["velocity_S"].A, + X=adata.X.toarray(), + V=adata.layers["velocity_S"].toarray(), nbrs=adata.uns["neighbors"]["indices"], U=np.random.random((adata.n_obs, adata.n_vars)), ) From d196c16ba01b2ba33c3a73059c4d4041fa3037f5 Mon Sep 17 00:00:00 2001 From: sichao Date: Tue, 9 Jul 2024 11:51:56 -0400 Subject: [PATCH 2/3] revert unnecessary changes on marix.A --- dynamo/preprocessing/cell_cycle.py | 2 +- dynamo/preprocessing/deprecated.py | 2 +- dynamo/preprocessing/external/sctransform.py | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dynamo/preprocessing/cell_cycle.py b/dynamo/preprocessing/cell_cycle.py index 48764920e..0542fb6d0 100644 --- a/dynamo/preprocessing/cell_cycle.py +++ b/dynamo/preprocessing/cell_cycle.py @@ -49,7 +49,7 @@ def group_corr(adata: anndata.AnnData, layer: Union[str, None], gene_list: list) cor = ( einsum_correlation( np.array(expression_matrix.toarray().T, dtype="float"), - np.array(avg_exp.toarray().ravel(), dtype="float"), + np.array(avg_exp.A1, dtype="float"), ) if issparse(expression_matrix) else einsum_correlation( diff --git a/dynamo/preprocessing/deprecated.py b/dynamo/preprocessing/deprecated.py index 79c655cc2..522a5777a 100644 --- a/dynamo/preprocessing/deprecated.py +++ b/dynamo/preprocessing/deprecated.py @@ -209,7 +209,7 @@ def _disp_calc_helper_NB_legacy( nzGenes = (rounded > lowerDetectedLimit).sum(axis=0) nzGenes = nzGenes > min_cells_detected - nzGenes = nzGenes.toarray().ravel() if issparse(rounded) else nzGenes + nzGenes = nzGenes.A1 if issparse(rounded) else nzGenes if layer.startswith("X_"): x = rounded[:, nzGenes] else: diff --git a/dynamo/preprocessing/external/sctransform.py b/dynamo/preprocessing/external/sctransform.py index 745c1ce0f..4d20cb59e 100644 --- a/dynamo/preprocessing/external/sctransform.py +++ b/dynamo/preprocessing/external/sctransform.py @@ -140,7 +140,7 @@ def gmean( assert np.all(X.sum(0) > 0) assert np.all(X.data > 0) X.data[:] = np.log(X.data + eps) - res = np.exp(X.mean(axis).toarray().flatten()) - eps + res = np.exp(X.mean(axis).A.flatten()) - eps assert np.all(res > 0) return res @@ -230,7 +230,7 @@ def sctransform_core( X.eliminate_zeros() gene_names = np.array(list(adata.var_names)) cell_names = np.array(list(adata.obs_names)) - genes_cell_count = X.sum(0).toarray().flatten() + genes_cell_count = X.sum(0).A.flatten() genes = np.where(genes_cell_count >= min_cells)[0] genes_ix = genes.copy() @@ -251,11 +251,11 @@ def sctransform_core( genes_step1 = genes genes_log_gmean_step1 = genes_log_gmean - umi = X.sum(1).toarray().flatten() + umi = X.sum(1).A.flatten() log_umi = np.log10(umi) X2 = X.copy() X2.data[:] = 1 - gene = X2.sum(1).toarray().flatten() + gene = X2.sum(1).A.flatten() log_gene = np.log10(gene) umi_per_gene = umi / gene log_umi_per_gene = np.log10(umi_per_gene) @@ -443,5 +443,5 @@ def sctransform( X_squared = adata.X.copy() X_squared.data **= 2 variance = X_squared.mean(0) - np.square(adata.X.mean(0)) - adata.var["sct_score"] = variance.toarray().ravel() if sp.issparse(variance) else variance.ravel() + adata.var["sct_score"] = variance.A1 adata.var["use_for_pca"] = get_gene_selection_filter(adata.var["sct_score"], n_top_genes=n_top_genes) From 87422f00bd91a3520903b7beee8f4582cc4464e5 Mon Sep 17 00:00:00 2001 From: sichao Date: Tue, 9 Jul 2024 12:10:54 -0400 Subject: [PATCH 3/3] revert more unnecessary changes on matrix.A --- dynamo/preprocessing/external/sctransform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dynamo/preprocessing/external/sctransform.py b/dynamo/preprocessing/external/sctransform.py index 4d20cb59e..4d85ba03c 100644 --- a/dynamo/preprocessing/external/sctransform.py +++ b/dynamo/preprocessing/external/sctransform.py @@ -243,7 +243,7 @@ def sctransform_core( # sample by n_cells, or use all cells if n_cells is not None and n_cells < X.shape[0]: cells_step1 = np.sort(np.random.choice(X.shape[0], replace=False, size=n_cells)) - genes_cell_count_step1 = X[cells_step1].sum(0).toarray().flatten() + genes_cell_count_step1 = X[cells_step1].sum(0).A.flatten() genes_step1 = np.where(genes_cell_count_step1 >= min_cells)[0] genes_log_gmean_step1 = np.log10(gmean(X[cells_step1][:, genes_step1], axis=0, eps=gmean_eps)) else: