Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
dannda committed Sep 12, 2024
1 parent cb26dc2 commit e2b126d
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 59 deletions.
2 changes: 1 addition & 1 deletion bin/cmdbase/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from . import alignment, irods, rna, basic
from . import alignment, basic, irods, rna
from .helpers import *
13 changes: 10 additions & 3 deletions bin/cmdbase/alignment/cellranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@

@click.command("cellranger")
@click.option("--samplefile", required=True, help="Sample file text file")
@click.option("--includebam", is_flag=True, default=False, help="Include BAM files (removes --no-bam from cellranger)")
#@click.option('--includebam', required=False, show_default=True, default=False, help="Pull Bam files")
@click.option(
"--includebam",
is_flag=True,
default=False,
help="Include BAM files (removes --no-bam from cellranger)",
)
# @click.option('--includebam', required=False, show_default=True, default=False, help="Pull Bam files")
def cellranger(samplefile, includebam):
"""
Cellranger aligns sc-rna seq reads... \n
Expand All @@ -31,6 +36,8 @@ def cellranger(samplefile, includebam):
includebam = str(includebam * 1)
includebam_str = "1" if includebam else "0"
result_CR = subprocess.run(
[shell_cellranger_script, samplefile, includebam], capture_output=True, text=True
[shell_cellranger_script, samplefile, includebam],
capture_output=True,
text=True,
)
click.echo(result_CR.stdout)
2 changes: 1 addition & 1 deletion bin/cmdbase/alignment/starsolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


@click.command()
@click.option('--samplefile', required=True, help="Sample file text file")
@click.option("--samplefile", required=True, help="Sample file text file")
def starsolo(samplefile):
"""
STARsolo aligns sc-rna seq reads...
Expand Down
22 changes: 11 additions & 11 deletions bin/cmdbase/basic/history.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
#!/usr/bin/env python3

import click
import os
from tabulate import tabulate
import pandas as pd

@click.command("history")
import click
import pandas as pd
from tabulate import tabulate

@click.option("--last", required=False, help="Retrieve last n commands",
default=10, type = int)

@click.command("history")
@click.option(
"--last", required=False, help="Retrieve last n commands", default=10, type=int
)
@click.option(
"--all",
default=False,
is_flag=True,
required=False,
help="Retrieve all history",
)

def history(last, all):
CWD = os.environ['CWD']
CWD = os.environ["CWD"]
hist_file = os.path.join(CWD, ".pap/") + "hist"
if not os.path.exists(hist_file):
click.echo("No history file present")
return 0
hist = pd.read_csv(hist_file, index_col = 0)
hist = pd.read_csv(hist_file, index_col=0)
if all:
print(tabulate(hist, headers='keys', tablefmt='plain'))
print(tabulate(hist, headers="keys", tablefmt="plain"))
else:
print(tabulate(hist.tail(last), headers='keys', tablefmt='plain'))
print(tabulate(hist.tail(last), headers="keys", tablefmt="plain"))
return 0
6 changes: 4 additions & 2 deletions bin/cmdbase/irods/pull_fastqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


@click.command("pull-fastqs")
@click.option('--samplefile', required=True, help="Sample file text file")
@click.option("--samplefile", required=True, help="Sample file text file")
def pull_fastqs(samplefile):
"""
Downloads processed irods data or any folder from irods
Expand All @@ -19,6 +19,8 @@ def pull_fastqs(samplefile):
print("Using irods to download data")
print("If you have a large set of files, this command will take a while to run")
shell_script_fq = os.path.join(SHELL_SCRIPT_BASE, "irods..fastqs")
result_fq = subprocess.run([shell_script_fq, samplefile], capture_output=True, text=True)
result_fq = subprocess.run(
[shell_script_fq, samplefile], capture_output=True, text=True
)
click.echo(result_fq.stdout)
click.echo(result_fq.stderr)
4 changes: 3 additions & 1 deletion bin/cmdbase/rna/cellbender.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

SHELL_SCRIPT_BASE = os.environ["SHELL_SCRIPT_BASE"]


@click.command("cellbender")
@click.option("--samplefile", required=True, help="Sample file text file")
@click.option("--total_droplets_included", required=True, help="total_droplets_included"
@click.option(
"--total_droplets_included", required=True, help="total_droplets_included"
)
def cellbender(samplefile, total_droplets_included, **kwargs):
"""
Expand Down
11 changes: 8 additions & 3 deletions bin/cmdbase/rna/scanpy_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@
import click

SHELL_SCRIPT_BASE = os.environ["SHELL_SCRIPT_BASE"]
HL_IRODS_DOWNLOAD=os.environ["HL_IRODS_DOWNLOAD"]
HL_IRODS_DOWNLOAD = os.environ["HL_IRODS_DOWNLOAD"]


@click.command("scanpy")
@click.option("--samplefile", required=True, help="Sample file text file")
@click.option("--sample_basedir", required=False, default = HL_IRODS_DOWNLOAD,
help="sample database folder")
@click.option(
"--sample_basedir",
required=False,
default=HL_IRODS_DOWNLOAD,
help="sample database folder",
)
def scanpyrun(samplefile, sample_basedir):
"""
Basic scanpy run
Expand Down
78 changes: 42 additions & 36 deletions bin/nb/sc_base1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@
" )\n",
" return outlier\n",
"\n",
"\n",
"def print_file(filename):\n",
" with open(filename, 'r') as f:\n",
" with open(filename, \"r\") as f:\n",
" print(f.readlines())"
]
},
Expand All @@ -52,7 +53,9 @@
"metadata": {},
"outputs": [],
"source": [
"os.environ['HLBI_PIPE_BASEDIR'] = '/lustre/scratch126/cellgen/team298/vm11/PROJECTS/PIPELINE/Haniffa-utils/'"
"os.environ[\n",
" \"HLBI_PIPE_BASEDIR\"\n",
"] = \"/lustre/scratch126/cellgen/team298/vm11/PROJECTS/PIPELINE/Haniffa-utils/\""
]
},
{
Expand All @@ -73,7 +76,7 @@
}
],
"source": [
" !cat scanpy_rna_config.py"
"!cat scanpy_rna_config.py"
]
},
{
Expand All @@ -92,11 +95,11 @@
],
"source": [
"# Importing config file\n",
"#%reload_ext scanpy_rna_config\n",
"#if os.path.exists(\"scanpy_rna_config.py\"):\n",
"# %reload_ext scanpy_rna_config\n",
"# if os.path.exists(\"scanpy_rna_config.py\"):\n",
"# print(\"Reading user config file\")\n",
"# from scanpy_rna_config import *\n",
"#else:\n",
"# else:\n",
"# print(\"Reading global config file\")\n",
"# sys.path.append(os.path.join(os.environ['HLBI_PIPE_BASEDIR'], 'bin', 'nb'))\n",
"# from scanpy_rna_config import *"
Expand All @@ -123,12 +126,12 @@
"min_genes_cutoff = 100\n",
"sim_doublet_ratio = 5\n",
"target_sum = 10000\n",
"n_top_genes=2000\n",
"n_comps = 50 pca\n",
"n_pcs = 50 # kNN\n",
"resolution = 1 # leiden\n",
"min_dist = 0.5 # UMAP\n",
"spread = 1 # UMAP"
"n_top_genes = 2000\n",
"n_comps = 50 # pca\n",
"n_pcs = 50 # kNN\n",
"resolution = 1 # leiden\n",
"min_dist = 0.5 # UMAP\n",
"spread = 1 # UMAP"
]
},
{
Expand All @@ -143,8 +146,8 @@
"outputs": [],
"source": [
"samples_database = \"/lustre/scratch126/cellgen/team298/sample_data/\"\n",
"#sample_n = \"Apr24_chimeroid_d97_03A-BFP_HCA_SkO15052460\"\n",
"#sample_folder=\"/lustre/scratch126/cellgen/team298/sample_data/BK23-SKI-27-FT-1b_mG_rBCN14655446/processed_sanger\"\n",
"# sample_n = \"Apr24_chimeroid_d97_03A-BFP_HCA_SkO15052460\"\n",
"# sample_folder=\"/lustre/scratch126/cellgen/team298/sample_data/BK23-SKI-27-FT-1b_mG_rBCN14655446/processed_sanger\"\n",
"sample_name = \"BK23-SKI-27-FT-1b_mG_rBCN14655446\""
]
},
Expand Down Expand Up @@ -204,17 +207,18 @@
"source": [
"# Read file\n",
"outpt_folder = os.path.join(samples_database, sample_name, \"rna_scanpy\")\n",
"#outpt_folder = os.path.join(\"test\", sample_name)\n",
"# outpt_folder = os.path.join(\"test\", sample_name)\n",
"os.makedirs(outpt_folder, exist_ok=True)\n",
"\n",
"x = list(Path(os.path.join(samples_database, sample_name, 'processed_sanger')).rglob('filtered_feature_bc_matrix.h5'))\n",
"x = list(\n",
" Path(os.path.join(samples_database, sample_name, \"processed_sanger\")).rglob(\n",
" \"filtered_feature_bc_matrix.h5\"\n",
" )\n",
")\n",
"print(x)\n",
"readfilename = x[0]\n",
"\n",
"adata = sc.read_10x_h5(\n",
" readfilename,\n",
" gex_only=gex_only\n",
")\n",
"adata = sc.read_10x_h5(readfilename, gex_only=gex_only)\n",
"\n",
"adata.var_names_make_unique()\n",
"adata"
Expand Down Expand Up @@ -267,7 +271,8 @@
"outputs": [],
"source": [
"min_cells = np.round(adata.shape[0] * 0.005)\n",
"if min_cells > min_cells_cutoff: min_cells = min_cells_cutoff\n",
"if min_cells > min_cells_cutoff:\n",
" min_cells = min_cells_cutoff\n",
"min_counts = 1"
]
},
Expand All @@ -279,7 +284,8 @@
"outputs": [],
"source": [
"min_genes = np.round(adata.shape[0] * 0.005)\n",
"if min_cells > min_cells_cutoff: min_cells = min_cells_cutoff\n",
"if min_cells > min_cells_cutoff:\n",
" min_cells = min_cells_cutoff\n",
"min_counts = 1"
]
},
Expand Down Expand Up @@ -526,10 +532,10 @@
}
],
"source": [
"print(\"Sim doublet ratio:\", sim_doublet_ratio )\n",
"threshold=0.7\n",
"sc.external.pp.scrublet(adata, sim_doublet_ratio=sim_doublet_ratio, threshold = threshold)\n",
"#sc.pp.scrublet(adata)"
"print(\"Sim doublet ratio:\", sim_doublet_ratio)\n",
"threshold = 0.7\n",
"sc.external.pp.scrublet(adata, sim_doublet_ratio=sim_doublet_ratio, threshold=threshold)\n",
"# sc.pp.scrublet(adata)"
]
},
{
Expand Down Expand Up @@ -587,7 +593,7 @@
}
],
"source": [
"#adata.obs.predicted_doublet.value_counts()"
"# adata.obs.predicted_doublet.value_counts()"
]
},
{
Expand All @@ -597,7 +603,7 @@
"metadata": {},
"outputs": [],
"source": [
"#adata = adata[~adata.obs.predicted_doublet]"
"# adata = adata[~adata.obs.predicted_doublet]"
]
},
{
Expand Down Expand Up @@ -647,7 +653,7 @@
}
],
"source": [
"adata.layers['counts'] = adata.X.copy()\n",
"adata.layers[\"counts\"] = adata.X.copy()\n",
"sc.pp.normalize_total(adata, target_sum=target_sum)"
]
},
Expand All @@ -669,7 +675,7 @@
"outputs": [],
"source": [
"adata.raw = adata\n",
"adata.layers['logcounts'] = adata.X.copy()"
"adata.layers[\"logcounts\"] = adata.X.copy()"
]
},
{
Expand Down Expand Up @@ -701,8 +707,8 @@
"# scaling\n",
"# classicially you scale the data\n",
"# this is a time consuming step, so is not run now.\n",
"# \n",
"#sc.pp.scale(adata)"
"#\n",
"# sc.pp.scale(adata)"
]
},
{
Expand Down Expand Up @@ -810,7 +816,7 @@
"metadata": {},
"outputs": [],
"source": [
"sc.tl.umap(adata, min_dist = min_dist, spread = spread)"
"sc.tl.umap(adata, min_dist=min_dist, spread=spread)"
]
},
{
Expand All @@ -820,7 +826,7 @@
"metadata": {},
"outputs": [],
"source": [
"reqCols.extend(['leiden'])"
"reqCols.extend([\"leiden\"])"
]
},
{
Expand Down Expand Up @@ -877,7 +883,7 @@
}
],
"source": [
"sc.pl.umap(adata, color = reqCols, ncols=3)"
"sc.pl.umap(adata, color=reqCols, ncols=3)"
]
},
{
Expand All @@ -887,7 +893,7 @@
"metadata": {},
"outputs": [],
"source": [
"adata.write_h5ad(os.path.join(outpt_folder, sample_name+\".h5ad\"))"
"adata.write_h5ad(os.path.join(outpt_folder, sample_name + \".h5ad\"))"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion bin/nb/scanpy_rna_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
gex_only = True
sim_doublet_ratio = 5
min_cells_cutoff = 50 # min number of cells a gene is present for it to be realised
min_cells_cutoff = 50 # min number of cells a gene is present for it to be realised

0 comments on commit e2b126d

Please sign in to comment.