-
Notifications
You must be signed in to change notification settings - Fork 68
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fc0a1a3
commit 9067aef
Showing
6 changed files
with
129 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,11 +4,11 @@ | |
# Author: Swati Parekh | ||
# Contact: [email protected] or [email protected] or [email protected] | ||
|
||
if(@ARGV != 12) | ||
if(@ARGV != 14) | ||
{ | ||
"\n##################################################################################### | ||
Usage: perl $0 <barcode-Read.fq.gz> <cDNA-Read.fq.gz> <cellbc_threshold> <Cellbc_Qual_threshold> <umi_threshold> <UMIbc_Qual_threshold> <Cellbc_range> <UMI_range> <Threads> <StudyName> <Outdir> <pigz-executable> \n | ||
Usage: perl $0 <barcode-Read.fq.gz> <cDNA-Read.fq.gz> <plateBC-read.fq.gz> <cellbc_threshold> <Cellbc_Qual_threshold> <umi_threshold> <UMIbc_Qual_threshold> <Cellbc_range> <PlateBC_range> <UMI_range> <Threads> <StudyName> <Outdir> <pigz-executable> \n | ||
Explanation of parameter: | ||
barcode-Read.fq.gz - Input barcode reads fastq file name. | ||
|
@@ -30,40 +30,56 @@ | |
|
||
$bcread=$ARGV[0]; | ||
$cdnaread=$ARGV[1]; | ||
$bnbases=$ARGV[2]; | ||
$bqualthreshold=$ARGV[3]; | ||
$mnbases=$ARGV[4]; | ||
$mqualthreshold=$ARGV[5]; | ||
$bcrange=$ARGV[6]; | ||
$mcrange=$ARGV[7]; | ||
$threads=$ARGV[8]; | ||
$study=$ARGV[9]; | ||
$outdir=$ARGV[10]; | ||
$pigz=$ARGV[11]; | ||
$pbcread=$ARGV[2]; | ||
$bnbases=$ARGV[3]; | ||
$bqualthreshold=$ARGV[4]; | ||
$mnbases=$ARGV[5]; | ||
$mqualthreshold=$ARGV[6]; | ||
$bcrange=$ARGV[7]; | ||
$pbcrange=$ARGV[8]; | ||
$mcrange=$ARGV[9]; | ||
$threads=$ARGV[10]; | ||
$study=$ARGV[11]; | ||
$outdir=$ARGV[12]; | ||
$pigz=$ARGV[13]; | ||
|
||
@b = split("-",$bcrange); | ||
@m = split("-",$mcrange); | ||
$bs = $b[0] - 1; | ||
$ms = $m[0] - 1; | ||
$bl = $b[1]-$b[0]+1; | ||
$ml = $m[1]-$m[0]+1; | ||
if($pbcread ne "NA") { | ||
@p = split("-",$pbcrange); | ||
$ps = $p[0] - 1; | ||
$pl = $p[1]-$p[0]+1; | ||
} | ||
|
||
|
||
|
||
$bcreadout = $outdir."/".$study.".barcodelist.filtered.sam"; | ||
$bcreadoutfull = $outdir."/".$study.".barcoderead.filtered.fastq"; | ||
if($pbcread ne "NA") {$pbcreadoutfull = $outdir."/".$study.".platebarcoderead.filtered.fastq";} | ||
$cdnareadout = $outdir."/".$study.".cdnaread.filtered.fastq"; | ||
|
||
if ($bcread =~ /\.gz$/) { | ||
open BCF, '-|', $pigz, '-dc', $bcread || die "Couldn't open file $bcread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
if($pbcread ne "NA") {open PBCF, '-|', $pigz, '-dc', $pbcread || die "Couldn't open file $pbcread. Check permissions!\n Check if it is differently zipped then .gz\n\n";} | ||
open CDF, '-|', $pigz, '-dc', $cdnaread || die "Couldn't open file $cdnaread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
} | ||
else { | ||
open BCF, "<", $bcread || die "Couldn't open file $bcread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
if($pbcread ne "NA") {open PBCF, "<", $pbcread || die "Couldn't open file $pbcread. Check permissions!\n Check if it is differently zipped then .gz\n\n";} | ||
open CDF, "<", $cdnaread || die "Couldn't open file $cdnaread. Check permissions!\n Check if it is differently zipped then .gz\n\n"; | ||
} | ||
|
||
open BCOUT, ">", $bcreadout || die "Couldn't open file $bcreadout to write\n\n";; | ||
open CDOUT, ">", $cdnareadout || die "Couldn't open file $cdnareadout to write\n\n";; | ||
open BCOUTFULL, ">", $bcreadoutfull || die "Couldn't open file $bcreadoutfull to write\n\n";; | ||
if($pbcread ne "NA") {open PBCOUTFULL, ">", $pbcreadoutfull || die "Couldn't open file $pbcreadoutfull to write\n\n";;} | ||
|
||
|
||
|
||
|
||
$count=0; | ||
$total=0; | ||
|
@@ -75,7 +91,13 @@ | |
$brseq=<BCF>; | ||
$bqid=<BCF>; | ||
$bqseq=<BCF>; | ||
|
||
if($pbcread ne "NA"){ | ||
$pbrid=<PBCF>; | ||
$pbrseq=<PBCF>; | ||
$pbqid=<PBCF>; | ||
$pbqseq=<PBCF>; | ||
$pbcqual = substr($pbqseq,$ps,$pl); | ||
} | ||
if($count==0){ | ||
$count=1; | ||
@quals = map {$_} unpack "C*", $bqseq; | ||
|
@@ -95,14 +117,26 @@ | |
|
||
if($c[0] eq $b[0]){ | ||
@bquals = map {$_ - $offset} unpack "C*", $bcqual; | ||
#@pbquals = map {$_ - $offset} unpack "C*", $pbcqual; | ||
@mquals = map {$_ - $offset} unpack "C*", $mcqual; | ||
$btmp = grep {$_ < $bqualthreshold} @bquals; | ||
#$pbtmp = grep {$_ < $bqualthreshold} @pbquals; | ||
$mtmp = grep {$_ < $mqualthreshold} @mquals; | ||
|
||
if(($btmp < $bnbases) && ($mtmp < $mnbases)){ | ||
$filtered++; | ||
$bcseq = substr($brseq,$bs,$bl); | ||
if($pbcread ne "NA") {$pbcseq = substr($pbrseq,$ps,$pl);} | ||
$mcseq = substr($brseq,$ms,$ml); | ||
|
||
$brid =~ m/^@(.*)\s/; chomp($brseq); | ||
print BCOUT $1,"\t4\t*\t0\t0\t*\t*\t0\t0\t$brseq\t$bqseq"; | ||
if($pbcread ne "NA") { | ||
print BCOUT $1,"\t4\t*\t0\t0\t*\t*\t0\t0\t",$pbcseq,$bcseq,$mcseq,"\t",$pbcqual,$bcqual,$mcqual,"\n"; | ||
print PBCOUTFULL $pbrid,$pbrseq,"\n",$pbqid,$pbqseq; | ||
} | ||
else { | ||
print BCOUT $1,"\t4\t*\t0\t0\t*\t*\t0\t0\t$brseq\t$bqseq"; | ||
} | ||
print BCOUTFULL $brid,$brseq,"\n",$bqid,$bqseq; | ||
print CDOUT $crid,$crseq,$cqid,$cqseq; | ||
} | ||
|
@@ -118,6 +152,11 @@ | |
close BCOUT; | ||
close CDOUT; | ||
close BCOUTFULL; | ||
if($pbcread ne "NA") { | ||
close PBCF; | ||
close PBCOUTFULL; | ||
`$pigz -f -p $threads $pbcreadoutfull`; | ||
} | ||
|
||
print "Raw reads: $total \nFiltered reads: $filtered \n\n"; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
# Pipeline to run UMI-seq analysis from fastq to read count tables. | ||
# Authors: Swati Parekh & Christoph Ziegenhain | ||
# Contact: [email protected] or [email protected] or [email protected] | ||
|
||
vers=0.0.4 | ||
function check_opts() { | ||
value=$1 | ||
name=$2 | ||
|
@@ -64,6 +64,8 @@ Make sure you have 3-4 times more disk space to your input fastq files. | |
This pipeline works based on one hit per read. Therefore, please do not report more multimapping hits. Default: "". | ||
-H <HammingDistance> : Hamming distance collapsing of UMI sequences. Default: 0. | ||
-B <BarcodeBinning> : Hamming distance binning of close cell barcode sequences. Default: 0. | ||
-T <PlateBC fastq> : Fastq file for plate barcode read. Default: NA | ||
-U <PlateBC range> : Barcode range for plate barcode read (e.g. 1-8). Default: NA | ||
## Program paths ## | ||
-o <outputdir> : Where to write output bam. Default: working directory. | ||
|
@@ -97,6 +99,8 @@ Make sure you have 3-4 times more disk space to your input fastq files. | |
-F <gel-barcode2 fastq> : Provide the second half of gel barcode + UMI read <-F> here. Default: NA. | ||
-L <library barcode fastq> : Provide the library barcode read here. Default: NA | ||
zUMIs version $vers | ||
EOF | ||
} | ||
|
||
|
@@ -130,8 +134,10 @@ xcrange2=0-0 | |
nreads=100 | ||
ham=0 | ||
XCbin=0 | ||
pbcfastq=NA | ||
pbcrange=NA | ||
|
||
while getopts ":R:S:f:r:g:o:a:t:s:c:m:l:b:n:N:q:Q:z:u:x:e:p:i:d:X:A:w:j:F:C:y:Y:L:P:V:H:B:h" options; do #Putting <:> between keys implies that they can not be called without an argument. | ||
while getopts ":R:S:f:r:g:o:a:t:s:c:m:l:b:n:N:q:Q:z:u:x:e:p:i:d:X:A:w:j:F:C:y:Y:L:P:V:H:B:U:T:h" options; do #Putting <:> between keys implies that they can not be called without an argument. | ||
case $options in | ||
R ) isslurm=$OPTARG;; | ||
S ) isStats=$OPTARG;; | ||
|
@@ -170,6 +176,8 @@ while getopts ":R:S:f:r:g:o:a:t:s:c:m:l:b:n:N:q:Q:z:u:x:e:p:i:d:X:A:w:j:F:C:y:Y: | |
V ) Rexc=$OPTARG;; | ||
H ) ham=$OPTARG;; | ||
B ) XCbin=$OPTARG;; | ||
T ) pbcfastq=$OPTARG;; | ||
U ) pbcrange=$OPTARG;; | ||
h ) usage | ||
exit 1;; | ||
\? ) echo -e "\n This key is not available! Please check the usage again: -$OPTARG" | ||
|
@@ -250,6 +258,8 @@ echo -e "\n\n You provided these parameters: | |
# bases below phred in UMI: $molbcbase | ||
Hamming Distance (UMI): $ham | ||
Hamming Distance (CellBC): $XCbin | ||
Plate Barcode Read: $pbcfastq | ||
Plate Barcode range: $pbcrange | ||
Barcodes: $barcodes | ||
zUMIs directory: $zumisdir | ||
STAR executable $starexc | ||
|
@@ -263,7 +273,8 @@ echo -e "\n\n You provided these parameters: | |
Barcode read2(STRT-seq): $bcread2 | ||
Barcode read2 range(STRT-seq): $xcrange2 | ||
Bases(G) to trim(STRT-seq): $BaseTrim | ||
Subsampling reads: $subsampling \n\n" | ||
Subsampling reads: $subsampling \n\n | ||
zUMIs version $vers \n\n" | tee "$sname.zUMIs_run.txt" | ||
|
||
#create output folders | ||
[ -d $outdir/zUMIs_output/ ] || mkdir $outdir/zUMIs_output/ | ||
|
@@ -279,18 +290,41 @@ if [[ "$isCustomFASTQ" != "no" ]] ; then | |
fi | ||
#Submit all the jobs | ||
if [[ "$isslurm" == "yes" ]] ; then | ||
if [[ "$pbcfastq" != "NA" ]] ; then | ||
tmpa=`echo $pbcrange | cut -f1 -d '-'` | ||
tmpb=`echo $pbcrange | cut -f2 -d '-'` | ||
pbcl=`expr $tmpa + $tmpb - 1` | ||
tmpa=`echo $xcrange | cut -f1 -d '-'` | ||
tmpb=`echo $xcrange | cut -f2 -d '-'` | ||
bcl=`expr $tmpa + $tmpb - 1` | ||
l=`expr $bcl + $pbcl` | ||
xc=1-"$l" | ||
xcst=1 | ||
xcend=$l | ||
xmst=`expr $l + 1` | ||
tmpa=`echo $xmrange | cut -f1 -d '-'` | ||
tmpb=`echo $xmrange | cut -f2 -d '-'` | ||
ml=`expr $tmpb - $tmpa` | ||
xmend=`expr $xmst + $ml` | ||
xmr="$xmst"-"$xmend" | ||
xcr="$xcst"-"$xcend" | ||
else | ||
xmr=$xmrange | ||
xcr=$xcrange | ||
fi | ||
|
||
case "$whichStage" in | ||
"filtering") | ||
if [[ "$isstrt" == "yes" ]] ; then | ||
bash $zumisdir/zUMIs-filtering-strt.sh $cdnaread $bcread $sname $outdir $xmrange $cbasequal $mbasequal $molbcbase $cellbcbase $threads $zumisdir $bcread2 $BaseTrim $pigzexc | ||
elif [[ "$isindrops" == "yes" ]] ; then | ||
bash $zumisdir/zUMIs-filtering-inDrops.sh $cdnaread $bcread $libread $bcread2 $sname $outdir $xmrange $cbasequal $mbasequal $molbcbase $cellbcbase $threads $zumisdir $pigzexc | ||
else | ||
bash $zumisdir/zUMIs-filtering.sh $bcread $cdnaread $sname $outdir $xcrange $xmrange $cbasequal $mbasequal $molbcbase $cellbcbase $threads $zumisdir $pigzexc | ||
bash $zumisdir/zUMIs-filtering.sh $bcread $cdnaread $sname $outdir $xcrange $xmrange $cbasequal $mbasequal $molbcbase $cellbcbase $threads $zumisdir $pigzexc $pbcfastq $pbcrange | ||
fi | ||
bash $zumisdir/zUMIs-mapping.sh $sname $outdir $genomedir $gtf $threads $readlen "$starparams" $starexc $samtoolsexc $xmrange $BaseTrim $isstrt | ||
bash $zumisdir/zUMIs-prepCounting.sh $sname $outdir $threads $samtoolsexc | ||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcrange $xmrange $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcr $xmr $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-cleaning.sh $sname $outdir | ||
;; | ||
"mapping") | ||
|
@@ -309,7 +343,7 @@ if [[ "$isslurm" == "yes" ]] ; then | |
fi | ||
bash $zumisdir/zUMIs-mapping.sh $sname $outdir $genomedir $gtf $threads $readlen "$starparams" $starexc $samtoolsexc $xmrange $BaseTrim $isstrt | ||
bash $zumisdir/zUMIs-prepCounting.sh $sname $outdir $threads $samtoolsexc | ||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcrange $xmrange $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcr $xmr $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-cleaning.sh $sname $outdir | ||
;; | ||
"counting") | ||
|
@@ -331,14 +365,14 @@ if [[ "$isslurm" == "yes" ]] ; then | |
bash $zumisdir/zUMIs-prepCounting.sh $sname $outdir $threads $samtoolsexc | ||
fi | ||
|
||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcrange $xmrange $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcr $xmr $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-cleaning.sh $sname $outdir | ||
;; | ||
"summarising") | ||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcrange $xmrange $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-counting.sh $sname $outdir $barcodes $threads $gtf $strandedness $xcr $xmr $subsampling $zumisdir $isStats $whichStage $isstrt $bcread2 $xcrange2 $nreads $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-cleaning.sh $sname $outdir | ||
;; | ||
esac | ||
else | ||
bash $zumisdir/zUMIs-noslurm.sh $cdnaread $bcread $sname $outdir $xcrange $xmrange $cbasequal $mbasequal $molbcbase $cellbcbase $threads $genomedir $gtf $readlen "$starparams" $starexc $barcodes $strandedness $subsampling $zumisdir $samtoolsexc $isStats $whichStage $bcread2 $BaseTrim $isstrt $xcrange2 $CustomMappedBAM $isCustomFASTQ $nreads $isindrops $libread $pigzexc $Rexc $ham $XCbin | ||
bash $zumisdir/zUMIs-noslurm.sh $cdnaread $bcread $sname $outdir $xcrange $xmrange $cbasequal $mbasequal $molbcbase $cellbcbase $threads $genomedir $gtf $readlen "$starparams" $starexc $barcodes $strandedness $subsampling $zumisdir $samtoolsexc $isStats $whichStage $bcread2 $BaseTrim $isstrt $xcrange2 $CustomMappedBAM $isCustomFASTQ $nreads $isindrops $libread $pigzexc $Rexc $ham $XCbin $pbcfastq $pbcrange | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters