Skip to content

Commit

Permalink
Merge pull request #8 from haniffalab/scanpy
Browse files Browse the repository at this point in the history
Added full scanpy support
  • Loading branch information
vjbaskar authored Sep 11, 2024
2 parents 168f6c1 + 2efc998 commit 18e2ddf
Show file tree
Hide file tree
Showing 7 changed files with 1,087 additions and 8 deletions.
2 changes: 2 additions & 0 deletions bin/cmdbase/rna/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .cellbender import *
from .scanpy_basic import *

38 changes: 38 additions & 0 deletions bin/cmdbase/rna/scanpy_basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python3
import os
import subprocess

import click

SHELL_SCRIPT_BASE = os.environ["SHELL_SCRIPT_BASE"]
HL_IRODS_DOWNLOAD=os.environ["HL_IRODS_DOWNLOAD"]

@click.command("scanpy")
@click.option("--samplefile", required=True, help="Sample file text file")
@click.option("--sample_basedir", required=False, default = HL_IRODS_DOWNLOAD,
help="sample database folder")
def scanpyrun(samplefile, sample_basedir):
"""
Basic scanpy run
Example: /lustre/scratch126/cellgen/team298/soft/bin/examples/irods_download.txt
Input file should have 3 mandatory columns
1st column: sanger_id
2nd column: sample_name
LAST column: irods path
You can have any column in between
pBCN14844712 BK31_1 /seq/illumina/runs/49/..../cellranger710multi....
pBCN14844713 BK31_2 /seq/illumina/runs/49/..../cellranger710multi....
pBCN14844714 BK31_3 /seq/illumina/runs/49/..../cellranger710multi....
pBCN14844715 BK31_4 /seq/illumina/runs/49/..../cellranger710multi....
----------------------
Use the same sample file you used for irods/pull-processed
"""
shell_script = os.path.join(SHELL_SCRIPT_BASE, "rna..scanpy")
result = subprocess.run(
[shell_script, sample_basedir, samplefile], capture_output=True, text=True
)
click.echo(result.stdout)
click.echo(result.stderr)
1 change: 1 addition & 0 deletions bin/hl..piperv100
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ irods.add_command(cmdbase.irods.pull_fastqs)

################# scrna seq analysis commands ##
rna.add_command(cmdbase.rna.cellbender)
rna.add_command(cmdbase.rna.scanpyrun)

################# alignment commands ###########
alignment.add_command(cmdbase.alignment.cellranger)
Expand Down
913 changes: 913 additions & 0 deletions bin/nb/sc_base1.ipynb

Large diffs are not rendered by default.

63 changes: 63 additions & 0 deletions bin/workflows/_rna..scanpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash

conda_env="/software/cellgen/team298/shared/envs/hl-conda/hl_scanpy_v0.2.0"
if [ $# -ne 2 ]; then
echo "$0 samples_database sample_sheet.tsv"
echo "This is a follow up of irods/pull-processed. If you have not run it, do so"
echo "samples_database: Folder where you have all sample cellranger data. Ideally - /lustre/scratch126/cellgen/team298/sample_data/"
echo "sample_name: Folder name of sample that contains the processed_sanger folder"
exit 0
fi

samples_database=$1; shift
sample_tsv=$1; shift



HL_HIST_FOLDER=".pap"
mem=10000
target_dir=$HL_IRODS_DOWNLOAD # This is obtained by module load hl
cwd=`pwd`
run_token=$RUN_TOKEN
ofile="rna_scanpy_$run_token.cmds"
rm -f $ofile
declare -i i=0
while read line
do
i+=1
if [ `echo $line | grep -c -i Sample` -ne 1 ]; then
sanger_id=`echo $line | awk ' { print $1 } '`
sample_id=`echo $line | awk ' { print $2 } '`
sample_name="${sample_id}_${sanger_id}"
#sample_folder="$samples_database/${sample_id}_${sanger_id}/processed_sanger/"
outpt_folder="$samples_database//rna_scanpy/"
outpt_folder="test"
cmd="papermill sc_base1.ipynb $outpt_folder/$sample_name.ipynb -p samples_database '${samples_database}' -p sample_name $sample_name -k python3"
echo $cmd >> $ofile
fi
done < $sample_tsv

if [ ! -f $ofile ]; then
echo "Looks like nothing needs to be done"
echo "Exiting cleanly..."
exit 0
fi

total_jobs=$(cat $ofile | wc -l)
bsub_id="rna_scanpy_${run_token}"
cat > $bsub_id.bsub <<EOF
#!/bin/bash
#BSUB -J ${bsub_id}_[1-$total_jobs]%20
#BSUB -o $HL_HIST_FOLDER/lsf/${bsub_id}_%I.out
#BSUB -e $HL_HIST_FOLDER/lsf/${bsub_id}_%I.err
#BSUB -M $mem
#BSUB -R "select[mem>$mem] rusage[mem=$mem]"
eval $conda_env
COMMAND=\$(sed -n "\${LSB_JOBINDEX}p" $ofile)
eval \$COMMAND
EOF

#bsub -J irods_dl -o irods_dl_%J.log -e irods_dl_%J.log -q normal -n 1 -M4000 -R"select[mem>4000] rusage[mem=4000]" bash $ofile
#bash $ofile
#bsub < ${bsub_id}.bsub

17 changes: 9 additions & 8 deletions bin/workflows/irods..download_processed
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,21 @@ do
sanger_id=`echo $line | awk ' { print $1 } '`
sample_id=`echo $line | awk ' { print $2 } '`
irods_path=`echo $line | awk ' { print $NF } '`
mkdir -p $target_dir/${sample_id}_${sanger_id}
folder_name="$target_dir/${sample_id}_${sanger_id}/processed_sanger/"
folder_name="$target_dir/${sample_id}_${sanger_id}"
target_name="$folder_name/processed_sanger/"
mkdir -p $target_name
if [ $overwrite -eq 1 ]; then
rm -rf $folder_name
rm -rf $target_name
fi
if [ -d $folder_name ]; then
echo "[Warn] Target folder already exists. Not downloading. Try overwrite option if you want to download. Irods::$irods_path --> Folder::$folder_name "
if [ -d $target_name ]; then
echo "[Warn] Target folder already exists. Not downloading. Try overwrite option if you want to download. Irods::$irods_path --> Folder::$target_name "
else
echo "($i)[Info] Irods::$irods_path --> Folder::$folder_name"
echo "($i)[Info] Irods::$irods_path --> Folder::$target_name"
if [ $retain_bam == 1 ]; then
#echo "sleep 5; $bsub_command $irods_command $irods_path $folder_name ; chmod -R 774 $folder_name ; ln -s $folder_name $cwd/" >> $ofile
echo "$irods_command $irods_path $folder_name ; chmod -R 774 $folder_name ; ln -s $folder_name $cwd/" >> $ofile
echo "$irods_command $irods_path $target_name ; chmod -R 774 $folder_name ; ln -s $folder_name $cwd/" >> $ofile
else
echo "$irods_command $irods_path $folder_name ; chmod -R 774 $folder_name ; rm -rf $folder_name/gex_possorted_bam.bam ; find $folder_name -name 'possorted_genome_bam.bam' -exec rm -rf {} \; ; ln -s $folder_name $cwd/" >> $ofile
echo "$irods_command $irods_path $target_name ; chmod -R 774 $folder_name ; rm -rf $target_name/gex_possorted_bam.bam ; find $target_name -name 'possorted_genome_bam.bam' -exec rm -rf {} \; ; ln -s $folder_name $cwd/" >> $ofile
fi

fi
Expand Down
61 changes: 61 additions & 0 deletions bin/workflows/rna..scanpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/bin/bash

conda_env="/software/cellgen/team298/shared/envs/hl-conda/hl_scanpy_v0.2.0"
if [ $# -ne 2 ]; then
echo "$0 samples_database sample_sheet.tsv"
echo "This is a follow up of irods/pull-processed. If you have not run it, do so"
echo "samples_database: Folder where you have all sample cellranger data. Ideally - /lustre/scratch126/cellgen/team298/sample_data/"
echo "sample_name: Folder name of sample that contains the processed_sanger folder"
exit 0
fi

samples_database=$1; shift
sample_tsv=$1; shift

mkdir -p pap

HL_HIST_FOLDER=".pap"
mem=10000
target_dir=$HL_IRODS_DOWNLOAD # This is obtained by module load hl
cwd=`pwd`
run_token=$RUN_TOKEN
ofile="rna_scanpy_$run_token.cmds"
rm -f $ofile
declare -i i=0
while read line
do
i+=1
if [ `echo $line | grep -c -i Sample` -ne 1 ]; then
sanger_id=`echo $line | awk ' { print $1 } '`
sample_id=`echo $line | awk ' { print $2 } '`
sample_name="${sample_id}_${sanger_id}"
#sample_folder="$samples_database/${sample_id}_${sanger_id}/processed_sanger/"
outpt_folder="$samples_database/${sample_name}/rna_scanpy/"
mkdir -p $outpt_folder
cmd="papermill $HL_PIPE_BASEDIR/bin/nb/sc_base1.ipynb $outpt_folder/$sample_name.ipynb -p samples_database '${samples_database}' -p sample_name $sample_name -k python3;ln -s $samples_database/${sample_name} ."
echo $cmd >> $ofile
fi
done < $sample_tsv

if [ ! -f $ofile ]; then
echo "Looks like nothing needs to be done"
echo "Exiting cleanly..."
exit 0
fi

total_jobs=$(cat $ofile | wc -l)
bsub_id="rna_scanpy_${run_token}"
cat > $bsub_id.bsub <<EOF
#!/bin/bash
#BSUB -J ${bsub_id}_[1-$total_jobs]%20
#BSUB -o $HL_HIST_FOLDER/lsf/${bsub_id}_%I.out
#BSUB -e $HL_HIST_FOLDER/lsf/${bsub_id}_%I.err
#BSUB -M $mem
#BSUB -R "select[mem>$mem] rusage[mem=$mem]"
eval $conda_env
COMMAND=\$(sed -n "\${LSB_JOBINDEX}p" $ofile)
eval \$COMMAND
EOF

bsub < ${bsub_id}.bsub

0 comments on commit 18e2ddf

Please sign in to comment.