Skip to content

Commit

Permalink
adjust embedding point size & minimal cell number in a batch
Browse files Browse the repository at this point in the history
  • Loading branch information
jsxlei committed Sep 9, 2024
1 parent acabc08 commit 15aa689
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
12 changes: 9 additions & 3 deletions scalex/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,12 @@ def load_file(path, backed=False):
------
AnnData
"""
print(path)
if os.path.exists(DATA_PATH+path+'.h5ad'):
adata = sc.read_h5ad(DATA_PATH+path+'.h5ad', backed=backed)
elif path.endswith(tuple(['.h5mu/rna', '.h5mu/atac'])):
import muon as mu
adata = mu.read(path, backed=backed)
elif os.path.isdir(path): # mtx format
adata = read_mtx(path)
elif os.path.isfile(path):
Expand All @@ -99,9 +103,6 @@ def load_file(path, backed=False):

# return AnnData.concatenate(rna, gene_activity, join='inner', batch_key='batch',
# batch_categories=['RNA', 'ATAC'], index_unique=None)
elif path.endswith(tuple(['.h5mu/rna', '.h5mu/atac'])):
import muon as mu
adata = mu.read(path, backed=backed)
else:
raise ValueError("File {} not exists".format(path))

Expand Down Expand Up @@ -230,6 +231,11 @@ def preprocessing_rna(
if target_sum is None: target_sum = 10000

# adata.layers['count'] = adata.X.copy()
batch_counts = adata.obs['batch'].value_counts()

# Filter out batches with only one sample
valid_batches = batch_counts[batch_counts > 10].index
adata = adata[adata.obs['batch'].isin(valid_batches)].copy()

if log: log.info('Preprocessing')
# if not issparse(adata.X):
Expand Down
4 changes: 2 additions & 2 deletions scalex/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def embedding(
adata.obs.loc[adata.obs[cond2]!=v2, 'tmp'] = ''
groups = list(adata[(adata.obs[groupby]==b) &
(adata.obs[cond2]==v2)].obs[color].astype('category').cat.categories.values)
size = min(size, 120000/len(adata[(adata.obs[groupby]==b) & (adata.obs[cond2]==v2)]))
size = max(size, 120000/len(adata[(adata.obs[groupby]==b) & (adata.obs[cond2]==v2)]))
else:
groups = list(adata[adata.obs[groupby]==b].obs[color].astype('category').cat.categories.values)
size = min(size, 120000/len(adata[adata.obs[groupby]==b]))
size = max(size, 120000/len(adata[adata.obs[groupby]==b]))
adata.obs['tmp'] = adata.obs['tmp'].astype('category')
if color_map is not None:
palette = [color_map[i] if i in color_map else 'gray' for i in adata.obs['tmp'].cat.categories]
Expand Down

0 comments on commit 15aa689

Please sign in to comment.