diff --git a/bin/cmdbase/__init__.py b/bin/cmdbase/__init__.py index c78b0b6..48d5980 100644 --- a/bin/cmdbase/__init__.py +++ b/bin/cmdbase/__init__.py @@ -1,2 +1,2 @@ -from . import alignment, irods, rna, basic +from . import alignment, basic, irods, rna from .helpers import * diff --git a/bin/cmdbase/alignment/cellranger.py b/bin/cmdbase/alignment/cellranger.py index 44c5f2a..7b322db 100644 --- a/bin/cmdbase/alignment/cellranger.py +++ b/bin/cmdbase/alignment/cellranger.py @@ -10,8 +10,13 @@ @click.command("cellranger") @click.option("--samplefile", required=True, help="Sample file text file") -@click.option("--includebam", is_flag=True, default=False, help="Include BAM files (removes --no-bam from cellranger)") -#@click.option('--includebam', required=False, show_default=True, default=False, help="Pull Bam files") +@click.option( + "--includebam", + is_flag=True, + default=False, + help="Include BAM files (removes --no-bam from cellranger)", +) +# @click.option('--includebam', required=False, show_default=True, default=False, help="Pull Bam files") def cellranger(samplefile, includebam): """ Cellranger aligns sc-rna seq reads... \n @@ -31,6 +36,8 @@ def cellranger(samplefile, includebam): includebam = str(includebam * 1) includebam_str = "1" if includebam else "0" result_CR = subprocess.run( - [shell_cellranger_script, samplefile, includebam], capture_output=True, text=True + [shell_cellranger_script, samplefile, includebam], + capture_output=True, + text=True, ) click.echo(result_CR.stdout) diff --git a/bin/cmdbase/alignment/starsolo.py b/bin/cmdbase/alignment/starsolo.py index fba821b..c5189e1 100644 --- a/bin/cmdbase/alignment/starsolo.py +++ b/bin/cmdbase/alignment/starsolo.py @@ -9,7 +9,7 @@ @click.command() -@click.option('--samplefile', required=True, help="Sample file text file") +@click.option("--samplefile", required=True, help="Sample file text file") def starsolo(samplefile): """ STARsolo aligns sc-rna seq reads... diff --git a/bin/cmdbase/basic/history.py b/bin/cmdbase/basic/history.py index 51f7eb0..5d5d786 100644 --- a/bin/cmdbase/basic/history.py +++ b/bin/cmdbase/basic/history.py @@ -1,15 +1,16 @@ #!/usr/bin/env python3 -import click import os -from tabulate import tabulate -import pandas as pd -@click.command("history") +import click +import pandas as pd +from tabulate import tabulate -@click.option("--last", required=False, help="Retrieve last n commands", - default=10, type = int) +@click.command("history") +@click.option( + "--last", required=False, help="Retrieve last n commands", default=10, type=int +) @click.option( "--all", default=False, @@ -17,16 +18,15 @@ required=False, help="Retrieve all history", ) - def history(last, all): - CWD = os.environ['CWD'] + CWD = os.environ["CWD"] hist_file = os.path.join(CWD, ".pap/") + "hist" if not os.path.exists(hist_file): click.echo("No history file present") return 0 - hist = pd.read_csv(hist_file, index_col = 0) + hist = pd.read_csv(hist_file, index_col=0) if all: - print(tabulate(hist, headers='keys', tablefmt='plain')) + print(tabulate(hist, headers="keys", tablefmt="plain")) else: - print(tabulate(hist.tail(last), headers='keys', tablefmt='plain')) + print(tabulate(hist.tail(last), headers="keys", tablefmt="plain")) return 0 diff --git a/bin/cmdbase/irods/pull_fastqs.py b/bin/cmdbase/irods/pull_fastqs.py index 1329fb5..2c8a32e 100644 --- a/bin/cmdbase/irods/pull_fastqs.py +++ b/bin/cmdbase/irods/pull_fastqs.py @@ -9,7 +9,7 @@ @click.command("pull-fastqs") -@click.option('--samplefile', required=True, help="Sample file text file") +@click.option("--samplefile", required=True, help="Sample file text file") def pull_fastqs(samplefile): """ Downloads processed irods data or any folder from irods @@ -19,6 +19,8 @@ def pull_fastqs(samplefile): print("Using irods to download data") print("If you have a large set of files, this command will take a while to run") shell_script_fq = os.path.join(SHELL_SCRIPT_BASE, "irods..fastqs") - result_fq = subprocess.run([shell_script_fq, samplefile], capture_output=True, text=True) + result_fq = subprocess.run( + [shell_script_fq, samplefile], capture_output=True, text=True + ) click.echo(result_fq.stdout) click.echo(result_fq.stderr) diff --git a/bin/cmdbase/rna/cellbender.py b/bin/cmdbase/rna/cellbender.py index 01dbfd1..3615b23 100644 --- a/bin/cmdbase/rna/cellbender.py +++ b/bin/cmdbase/rna/cellbender.py @@ -7,9 +7,11 @@ SHELL_SCRIPT_BASE = os.environ["SHELL_SCRIPT_BASE"] + @click.command("cellbender") @click.option("--samplefile", required=True, help="Sample file text file") -@click.option("--total_droplets_included", required=True, help="total_droplets_included" +@click.option( + "--total_droplets_included", required=True, help="total_droplets_included" ) def cellbender(samplefile, total_droplets_included, **kwargs): """ diff --git a/bin/cmdbase/rna/scanpy_basic.py b/bin/cmdbase/rna/scanpy_basic.py index 94533f5..3849b9e 100644 --- a/bin/cmdbase/rna/scanpy_basic.py +++ b/bin/cmdbase/rna/scanpy_basic.py @@ -5,12 +5,17 @@ import click SHELL_SCRIPT_BASE = os.environ["SHELL_SCRIPT_BASE"] -HL_IRODS_DOWNLOAD=os.environ["HL_IRODS_DOWNLOAD"] +HL_IRODS_DOWNLOAD = os.environ["HL_IRODS_DOWNLOAD"] + @click.command("scanpy") @click.option("--samplefile", required=True, help="Sample file text file") -@click.option("--sample_basedir", required=False, default = HL_IRODS_DOWNLOAD, - help="sample database folder") +@click.option( + "--sample_basedir", + required=False, + default=HL_IRODS_DOWNLOAD, + help="sample database folder", +) def scanpyrun(samplefile, sample_basedir): """ Basic scanpy run diff --git a/bin/nb/sc_base1.ipynb b/bin/nb/sc_base1.ipynb index 0fe96b4..2068fca 100644 --- a/bin/nb/sc_base1.ipynb +++ b/bin/nb/sc_base1.ipynb @@ -40,8 +40,9 @@ " )\n", " return outlier\n", "\n", + "\n", "def print_file(filename):\n", - " with open(filename, 'r') as f:\n", + " with open(filename, \"r\") as f:\n", " print(f.readlines())" ] }, @@ -52,7 +53,9 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['HLBI_PIPE_BASEDIR'] = '/lustre/scratch126/cellgen/team298/vm11/PROJECTS/PIPELINE/Haniffa-utils/'" + "os.environ[\n", + " \"HLBI_PIPE_BASEDIR\"\n", + "] = \"/lustre/scratch126/cellgen/team298/vm11/PROJECTS/PIPELINE/Haniffa-utils/\"" ] }, { @@ -73,7 +76,7 @@ } ], "source": [ - " !cat scanpy_rna_config.py" + "!cat scanpy_rna_config.py" ] }, { @@ -92,11 +95,11 @@ ], "source": [ "# Importing config file\n", - "#%reload_ext scanpy_rna_config\n", - "#if os.path.exists(\"scanpy_rna_config.py\"):\n", + "# %reload_ext scanpy_rna_config\n", + "# if os.path.exists(\"scanpy_rna_config.py\"):\n", "# print(\"Reading user config file\")\n", "# from scanpy_rna_config import *\n", - "#else:\n", + "# else:\n", "# print(\"Reading global config file\")\n", "# sys.path.append(os.path.join(os.environ['HLBI_PIPE_BASEDIR'], 'bin', 'nb'))\n", "# from scanpy_rna_config import *" @@ -123,12 +126,12 @@ "min_genes_cutoff = 100\n", "sim_doublet_ratio = 5\n", "target_sum = 10000\n", - "n_top_genes=2000\n", - "n_comps = 50 # pca\n", - "n_pcs = 50 # kNN\n", - "resolution = 1 # leiden\n", - "min_dist = 0.5 # UMAP\n", - "spread = 1 # UMAP" + "n_top_genes = 2000\n", + "n_comps = 50 # pca\n", + "n_pcs = 50 # kNN\n", + "resolution = 1 # leiden\n", + "min_dist = 0.5 # UMAP\n", + "spread = 1 # UMAP" ] }, { @@ -143,8 +146,8 @@ "outputs": [], "source": [ "samples_database = \"/lustre/scratch126/cellgen/team298/sample_data/\"\n", - "#sample_n = \"Apr24_chimeroid_d97_03A-BFP_HCA_SkO15052460\"\n", - "#sample_folder=\"/lustre/scratch126/cellgen/team298/sample_data/BK23-SKI-27-FT-1b_mG_rBCN14655446/processed_sanger\"\n", + "# sample_n = \"Apr24_chimeroid_d97_03A-BFP_HCA_SkO15052460\"\n", + "# sample_folder=\"/lustre/scratch126/cellgen/team298/sample_data/BK23-SKI-27-FT-1b_mG_rBCN14655446/processed_sanger\"\n", "sample_name = \"BK23-SKI-27-FT-1b_mG_rBCN14655446\"" ] }, @@ -204,17 +207,18 @@ "source": [ "# Read file\n", "outpt_folder = os.path.join(samples_database, sample_name, \"rna_scanpy\")\n", - "#outpt_folder = os.path.join(\"test\", sample_name)\n", + "# outpt_folder = os.path.join(\"test\", sample_name)\n", "os.makedirs(outpt_folder, exist_ok=True)\n", "\n", - "x = list(Path(os.path.join(samples_database, sample_name, 'processed_sanger')).rglob('filtered_feature_bc_matrix.h5'))\n", + "x = list(\n", + " Path(os.path.join(samples_database, sample_name, \"processed_sanger\")).rglob(\n", + " \"filtered_feature_bc_matrix.h5\"\n", + " )\n", + ")\n", "print(x)\n", "readfilename = x[0]\n", "\n", - "adata = sc.read_10x_h5(\n", - " readfilename,\n", - " gex_only=gex_only\n", - ")\n", + "adata = sc.read_10x_h5(readfilename, gex_only=gex_only)\n", "\n", "adata.var_names_make_unique()\n", "adata" @@ -267,7 +271,8 @@ "outputs": [], "source": [ "min_cells = np.round(adata.shape[0] * 0.005)\n", - "if min_cells > min_cells_cutoff: min_cells = min_cells_cutoff\n", + "if min_cells > min_cells_cutoff:\n", + " min_cells = min_cells_cutoff\n", "min_counts = 1" ] }, @@ -279,7 +284,8 @@ "outputs": [], "source": [ "min_genes = np.round(adata.shape[0] * 0.005)\n", - "if min_cells > min_cells_cutoff: min_cells = min_cells_cutoff\n", + "if min_cells > min_cells_cutoff:\n", + " min_cells = min_cells_cutoff\n", "min_counts = 1" ] }, @@ -526,10 +532,10 @@ } ], "source": [ - "print(\"Sim doublet ratio:\", sim_doublet_ratio )\n", - "threshold=0.7\n", - "sc.external.pp.scrublet(adata, sim_doublet_ratio=sim_doublet_ratio, threshold = threshold)\n", - "#sc.pp.scrublet(adata)" + "print(\"Sim doublet ratio:\", sim_doublet_ratio)\n", + "threshold = 0.7\n", + "sc.external.pp.scrublet(adata, sim_doublet_ratio=sim_doublet_ratio, threshold=threshold)\n", + "# sc.pp.scrublet(adata)" ] }, { @@ -587,7 +593,7 @@ } ], "source": [ - "#adata.obs.predicted_doublet.value_counts()" + "# adata.obs.predicted_doublet.value_counts()" ] }, { @@ -597,7 +603,7 @@ "metadata": {}, "outputs": [], "source": [ - "#adata = adata[~adata.obs.predicted_doublet]" + "# adata = adata[~adata.obs.predicted_doublet]" ] }, { @@ -647,7 +653,7 @@ } ], "source": [ - "adata.layers['counts'] = adata.X.copy()\n", + "adata.layers[\"counts\"] = adata.X.copy()\n", "sc.pp.normalize_total(adata, target_sum=target_sum)" ] }, @@ -669,7 +675,7 @@ "outputs": [], "source": [ "adata.raw = adata\n", - "adata.layers['logcounts'] = adata.X.copy()" + "adata.layers[\"logcounts\"] = adata.X.copy()" ] }, { @@ -701,8 +707,8 @@ "# scaling\n", "# classicially you scale the data\n", "# this is a time consuming step, so is not run now.\n", - "# \n", - "#sc.pp.scale(adata)" + "#\n", + "# sc.pp.scale(adata)" ] }, { @@ -810,7 +816,7 @@ "metadata": {}, "outputs": [], "source": [ - "sc.tl.umap(adata, min_dist = min_dist, spread = spread)" + "sc.tl.umap(adata, min_dist=min_dist, spread=spread)" ] }, { @@ -820,7 +826,7 @@ "metadata": {}, "outputs": [], "source": [ - "reqCols.extend(['leiden'])" + "reqCols.extend([\"leiden\"])" ] }, { @@ -877,7 +883,7 @@ } ], "source": [ - "sc.pl.umap(adata, color = reqCols, ncols=3)" + "sc.pl.umap(adata, color=reqCols, ncols=3)" ] }, { @@ -887,7 +893,7 @@ "metadata": {}, "outputs": [], "source": [ - "adata.write_h5ad(os.path.join(outpt_folder, sample_name+\".h5ad\"))" + "adata.write_h5ad(os.path.join(outpt_folder, sample_name + \".h5ad\"))" ] } ], diff --git a/bin/nb/scanpy_rna_config.py b/bin/nb/scanpy_rna_config.py index 2841702..d12edc0 100644 --- a/bin/nb/scanpy_rna_config.py +++ b/bin/nb/scanpy_rna_config.py @@ -1,3 +1,3 @@ gex_only = True sim_doublet_ratio = 5 -min_cells_cutoff = 50 # min number of cells a gene is present for it to be realised \ No newline at end of file +min_cells_cutoff = 50 # min number of cells a gene is present for it to be realised