-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from haniffalab/scanpy
Added full scanpy support
- Loading branch information
Showing
7 changed files
with
1,087 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from .cellbender import * | ||
from .scanpy_basic import * | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
import subprocess | ||
|
||
import click | ||
|
||
SHELL_SCRIPT_BASE = os.environ["SHELL_SCRIPT_BASE"] | ||
HL_IRODS_DOWNLOAD=os.environ["HL_IRODS_DOWNLOAD"] | ||
|
||
@click.command("scanpy") | ||
@click.option("--samplefile", required=True, help="Sample file text file") | ||
@click.option("--sample_basedir", required=False, default = HL_IRODS_DOWNLOAD, | ||
help="sample database folder") | ||
def scanpyrun(samplefile, sample_basedir): | ||
""" | ||
Basic scanpy run | ||
Example: /lustre/scratch126/cellgen/team298/soft/bin/examples/irods_download.txt | ||
Input file should have 3 mandatory columns | ||
1st column: sanger_id | ||
2nd column: sample_name | ||
LAST column: irods path | ||
You can have any column in between | ||
pBCN14844712 BK31_1 /seq/illumina/runs/49/..../cellranger710multi.... | ||
pBCN14844713 BK31_2 /seq/illumina/runs/49/..../cellranger710multi.... | ||
pBCN14844714 BK31_3 /seq/illumina/runs/49/..../cellranger710multi.... | ||
pBCN14844715 BK31_4 /seq/illumina/runs/49/..../cellranger710multi.... | ||
---------------------- | ||
Use the same sample file you used for irods/pull-processed | ||
""" | ||
shell_script = os.path.join(SHELL_SCRIPT_BASE, "rna..scanpy") | ||
result = subprocess.run( | ||
[shell_script, sample_basedir, samplefile], capture_output=True, text=True | ||
) | ||
click.echo(result.stdout) | ||
click.echo(result.stderr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
#!/bin/bash | ||
|
||
conda_env="/software/cellgen/team298/shared/envs/hl-conda/hl_scanpy_v0.2.0" | ||
if [ $# -ne 2 ]; then | ||
echo "$0 samples_database sample_sheet.tsv" | ||
echo "This is a follow up of irods/pull-processed. If you have not run it, do so" | ||
echo "samples_database: Folder where you have all sample cellranger data. Ideally - /lustre/scratch126/cellgen/team298/sample_data/" | ||
echo "sample_name: Folder name of sample that contains the processed_sanger folder" | ||
exit 0 | ||
fi | ||
|
||
samples_database=$1; shift | ||
sample_tsv=$1; shift | ||
|
||
|
||
|
||
HL_HIST_FOLDER=".pap" | ||
mem=10000 | ||
target_dir=$HL_IRODS_DOWNLOAD # This is obtained by module load hl | ||
cwd=`pwd` | ||
run_token=$RUN_TOKEN | ||
ofile="rna_scanpy_$run_token.cmds" | ||
rm -f $ofile | ||
declare -i i=0 | ||
while read line | ||
do | ||
i+=1 | ||
if [ `echo $line | grep -c -i Sample` -ne 1 ]; then | ||
sanger_id=`echo $line | awk ' { print $1 } '` | ||
sample_id=`echo $line | awk ' { print $2 } '` | ||
sample_name="${sample_id}_${sanger_id}" | ||
#sample_folder="$samples_database/${sample_id}_${sanger_id}/processed_sanger/" | ||
outpt_folder="$samples_database//rna_scanpy/" | ||
outpt_folder="test" | ||
cmd="papermill sc_base1.ipynb $outpt_folder/$sample_name.ipynb -p samples_database '${samples_database}' -p sample_name $sample_name -k python3" | ||
echo $cmd >> $ofile | ||
fi | ||
done < $sample_tsv | ||
|
||
if [ ! -f $ofile ]; then | ||
echo "Looks like nothing needs to be done" | ||
echo "Exiting cleanly..." | ||
exit 0 | ||
fi | ||
|
||
total_jobs=$(cat $ofile | wc -l) | ||
bsub_id="rna_scanpy_${run_token}" | ||
cat > $bsub_id.bsub <<EOF | ||
#!/bin/bash | ||
#BSUB -J ${bsub_id}_[1-$total_jobs]%20 | ||
#BSUB -o $HL_HIST_FOLDER/lsf/${bsub_id}_%I.out | ||
#BSUB -e $HL_HIST_FOLDER/lsf/${bsub_id}_%I.err | ||
#BSUB -M $mem | ||
#BSUB -R "select[mem>$mem] rusage[mem=$mem]" | ||
eval $conda_env | ||
COMMAND=\$(sed -n "\${LSB_JOBINDEX}p" $ofile) | ||
eval \$COMMAND | ||
EOF | ||
|
||
#bsub -J irods_dl -o irods_dl_%J.log -e irods_dl_%J.log -q normal -n 1 -M4000 -R"select[mem>4000] rusage[mem=4000]" bash $ofile | ||
#bash $ofile | ||
#bsub < ${bsub_id}.bsub | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/bin/bash | ||
|
||
conda_env="/software/cellgen/team298/shared/envs/hl-conda/hl_scanpy_v0.2.0" | ||
if [ $# -ne 2 ]; then | ||
echo "$0 samples_database sample_sheet.tsv" | ||
echo "This is a follow up of irods/pull-processed. If you have not run it, do so" | ||
echo "samples_database: Folder where you have all sample cellranger data. Ideally - /lustre/scratch126/cellgen/team298/sample_data/" | ||
echo "sample_name: Folder name of sample that contains the processed_sanger folder" | ||
exit 0 | ||
fi | ||
|
||
samples_database=$1; shift | ||
sample_tsv=$1; shift | ||
|
||
mkdir -p pap | ||
|
||
HL_HIST_FOLDER=".pap" | ||
mem=10000 | ||
target_dir=$HL_IRODS_DOWNLOAD # This is obtained by module load hl | ||
cwd=`pwd` | ||
run_token=$RUN_TOKEN | ||
ofile="rna_scanpy_$run_token.cmds" | ||
rm -f $ofile | ||
declare -i i=0 | ||
while read line | ||
do | ||
i+=1 | ||
if [ `echo $line | grep -c -i Sample` -ne 1 ]; then | ||
sanger_id=`echo $line | awk ' { print $1 } '` | ||
sample_id=`echo $line | awk ' { print $2 } '` | ||
sample_name="${sample_id}_${sanger_id}" | ||
#sample_folder="$samples_database/${sample_id}_${sanger_id}/processed_sanger/" | ||
outpt_folder="$samples_database/${sample_name}/rna_scanpy/" | ||
mkdir -p $outpt_folder | ||
cmd="papermill $HL_PIPE_BASEDIR/bin/nb/sc_base1.ipynb $outpt_folder/$sample_name.ipynb -p samples_database '${samples_database}' -p sample_name $sample_name -k python3;ln -s $samples_database/${sample_name} ." | ||
echo $cmd >> $ofile | ||
fi | ||
done < $sample_tsv | ||
|
||
if [ ! -f $ofile ]; then | ||
echo "Looks like nothing needs to be done" | ||
echo "Exiting cleanly..." | ||
exit 0 | ||
fi | ||
|
||
total_jobs=$(cat $ofile | wc -l) | ||
bsub_id="rna_scanpy_${run_token}" | ||
cat > $bsub_id.bsub <<EOF | ||
#!/bin/bash | ||
#BSUB -J ${bsub_id}_[1-$total_jobs]%20 | ||
#BSUB -o $HL_HIST_FOLDER/lsf/${bsub_id}_%I.out | ||
#BSUB -e $HL_HIST_FOLDER/lsf/${bsub_id}_%I.err | ||
#BSUB -M $mem | ||
#BSUB -R "select[mem>$mem] rusage[mem=$mem]" | ||
eval $conda_env | ||
COMMAND=\$(sed -n "\${LSB_JOBINDEX}p" $ofile) | ||
eval \$COMMAND | ||
EOF | ||
|
||
bsub < ${bsub_id}.bsub | ||
|