-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathTag_CB_UMI.sh
33 lines (24 loc) · 892 Bytes
/
Tag_CB_UMI.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/bash
# Peter van Galen, 191004
# Move cell barcode (CB) and unique molecular identifier (UMI) from read identifier to sam tags.
# Resulting bam file will have tags for cell barcode (CB) and UMI (UB) as per 10X convention, https://www.10xgenomics.com/support/software/cell-ranger/latest/analysis/outputs/cr-outputs-bam
# These tags are required to run maegatk software
# Example execution:
# Tag_CB_UMI.PvG191004.sh <bam>
use -q Samtools
# First variable is bam file to convert
INPUT=$1
# Second variable is bam file to write (automatically named)
OUTPUT="$(echo "${INPUT/bam/10x.bam}")"
echo "Converting $INPUT into $OUTPUT..."
samtools view -h $INPUT | awk 'BEGIN{FS="\t"; OFS="\t"} {
if (substr($1,1,1) == "@") {
print $0
} else {
split($1, a, "_")
$1=""
print a[1]"_"a[2]$0"\tCB:Z:"a[3]"-1\tUB:Z:"a[4]
} }' | samtools view -bh > $OUTPUT
echo "Done!"
date
exit 0