-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #337 from vgteam/graph-to-chunk
Fix track JSON load, make sure chunk ref path is first, and add script to help make a chunk from a graph
- Loading branch information
Showing
8 changed files
with
249 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,15 @@ | ||
{ | ||
"trackFile": "cactus.vg", | ||
"trackType": "graph", | ||
"trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"} | ||
"trackColorSettings": {"mainPalette": "plainColors", "auxPalette": "greys"} | ||
}, | ||
{ | ||
"trackFile": "cactus0_10.sorted.gam", | ||
"trackType": "read", | ||
"trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"} | ||
"trackColorSettings": {"mainPalette": "blues", "auxPalette": "reds"} | ||
}, | ||
{ | ||
"trackFile": "cactus10_20.sorted.gam", | ||
"trackType": "read", | ||
"trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"} | ||
} | ||
"trackColorSettings": {"mainPalette": "blues", "auxPalette": "reds"} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,91 @@ | ||
#!/usr/bin/env bash | ||
set -e | ||
|
||
while getopts x:h:g:r:o: flag | ||
function usage() { | ||
echo >&2 "${0}: Extract graph and read chunks for a region, producing a referencing line for a BED file on standard output" | ||
echo >&2 | ||
echo >&2 "Usage: ${0} -x mygraph.xg [-h mygraph.gbwt] -r chr1:1-100 [-d 'Description of region'] -o chunk-chr1-1-100 [-g mygam1.gam [-g mygam2.gam ...]] >> regions.bed" | ||
exit 1 | ||
} | ||
|
||
while getopts x:h:g:r:o:d: flag | ||
do | ||
case "${flag}" in | ||
x) XG_FILE=${OPTARG};; | ||
h) GBWT=${OPTARG};; | ||
x) GRAPH_FILE=${OPTARG};; | ||
h) HAPLOTYPE_FILE=${OPTARG};; | ||
g) GAM_FILES+=("$OPTARG");; | ||
r) REGION=${OPTARG};; | ||
o) OUTDIR=${OPTARG};; | ||
d) DESC="${OPTARG}";; | ||
*) | ||
usage | ||
;; | ||
|
||
esac | ||
done | ||
|
||
if ! command -v jq &> /dev/null | ||
then | ||
echo "This script requires jq, exiting..." | ||
exit | ||
echo >&2 "This script requires jq, exiting..." | ||
exit 1 | ||
fi | ||
|
||
if [[ -z "${REGION}" ]] ; then | ||
echo >&2 "You must specify a region with -r" | ||
echo >&2 | ||
usage | ||
fi | ||
|
||
if [[ -z "${GRAPH_FILE}" ]] ; then | ||
echo >&2 "You must specify a graph with -x" | ||
echo >&2 | ||
usage | ||
fi | ||
|
||
echo "XG File: " $XG_FILE | ||
echo "Haplotype File: " $GBWT | ||
echo "Region: " $REGION | ||
echo "Output Directory: " $OUTDIR | ||
if [[ -z "${OUTDIR}" ]] ; then | ||
echo >&2 "You must specify an output directory with -o" | ||
echo >&2 | ||
usage | ||
fi | ||
|
||
if [[ -z "${DESC}" ]] ; then | ||
DESC="Region ${REGION}" | ||
fi | ||
|
||
echo >&2 "Graph File: " $GRAPH_FILE | ||
echo >&2 "Haplotype File: " $HAPLOTYPE_FILE | ||
echo >&2 "Region: " $REGION | ||
echo >&2 "Output Directory: " $OUTDIR | ||
|
||
rm -fr $OUTDIR | ||
mkdir -p $OUTDIR | ||
|
||
vg_chunk_params="-x $XG_FILE -g -c 20 -p $REGION -T -b $OUTDIR/chunk -E $OUTDIR/regions.tsv" | ||
vg_chunk_params=(-x $GRAPH_FILE -g -c 20 -p $REGION -T -b $OUTDIR/chunk -E $OUTDIR/regions.tsv) | ||
|
||
# construct track JSON for xg file | ||
jq -n --arg trackFile "${XG_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "greys", "auxPalette": "ygreys"}' '$ARGS.named' >> $OUTDIR/tracks.json | ||
# construct track JSON for graph file | ||
jq -n --arg trackFile "${GRAPH_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "plainColors", "auxPalette": "greys"}' '$ARGS.named' >> $OUTDIR/tracks.json | ||
|
||
# construct track JSON for gbwt file; if not any specific gbwt file, then default would be haplotype | ||
if [[ ! -z "${GBWT}" ]] ; then | ||
jq -n --arg trackFile "${GBWT}" --arg trackType "haplotype" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json | ||
# construct track JSON for haplotype file, if provided | ||
if [[ ! -z "${HAPLOTYPE_FILE}" ]] ; then | ||
jq -n --arg trackFile "${HAPLOTYPE_FILE}" --arg trackType "haplotype" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json | ||
fi | ||
|
||
# construct track JSON for each gam file | ||
echo "Gam Files:" | ||
echo >&2 "Gam Files:" | ||
for GAM_FILE in "${GAM_FILES[@]}"; do | ||
echo " - $GAM_FILE" | ||
echo >&2 " - $GAM_FILE" | ||
jq -n --arg trackFile "${GAM_FILE}" --arg trackType "read" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json | ||
vg_chunk_params=" $vg_chunk_params -a $GAM_FILE" | ||
vg_chunk_params+=(-a $GAM_FILE) | ||
done | ||
|
||
# Call vg chunk | ||
vg chunk $vg_chunk_params > $OUTDIR/chunk.vg | ||
vg chunk "${vg_chunk_params[@]}" > $OUTDIR/chunk.vg | ||
|
||
for file in `ls $OUTDIR/` | ||
do | ||
printf "$file\n" >> $OUTDIR/chunk_contents.txt | ||
done | ||
done | ||
|
||
# Print BED line | ||
cat $OUTDIR/regions.tsv | cut -f1-3 | tr -d "\n" | ||
printf "\t${DESC}\t${OUTDIR}\n" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#!/usr/bin/env bash | ||
set -e | ||
|
||
function usage() { | ||
echo >&2 "${0}: Prepare a tube map chunk and BED line on standard output from a pre-made subgraph. Only supports paths, not haplotypes." | ||
echo >&2 | ||
echo >&2 "Usage: ${0} -x subgraph.xg -r chr1:1-100 [-d 'Description of region'] -o chunk-chr1-1-100 [-g mygam1.gam [-g mygam2.gam ...]] >> regions.bed" | ||
exit 1 | ||
} | ||
|
||
while getopts x:g:r:o:d: flag | ||
do | ||
case "${flag}" in | ||
x) GRAPH_FILE=${OPTARG};; | ||
g) GAM_FILES+=("$OPTARG");; | ||
r) REGION=${OPTARG};; | ||
o) OUTDIR=${OPTARG};; | ||
d) DESC="${OPTARG}";; | ||
*) | ||
usage | ||
;; | ||
|
||
esac | ||
done | ||
|
||
if ! command -v jq &> /dev/null | ||
then | ||
echo >&2 "This script requires jq, exiting..." | ||
exit 1 | ||
fi | ||
|
||
if [[ -z "${REGION}" ]] ; then | ||
echo >&2 "You must specify a region with -r" | ||
echo >&2 | ||
usage | ||
fi | ||
|
||
if [[ -z "${GRAPH_FILE}" ]] ; then | ||
echo >&2 "You must specify a graph with -x" | ||
echo >&2 | ||
usage | ||
fi | ||
|
||
if [[ -z "${OUTDIR}" ]] ; then | ||
echo >&2 "You must specify an output directory with -o" | ||
echo >&2 | ||
usage | ||
fi | ||
|
||
if [[ -z "${DESC}" ]] ; then | ||
DESC="Region ${REGION}" | ||
fi | ||
|
||
echo >&2 "Graph File: " $GRAPH_FILE | ||
echo >&2 "Region: " $REGION | ||
echo >&2 "Output Directory: " $OUTDIR | ||
|
||
rm -fr $OUTDIR | ||
mkdir -p $OUTDIR | ||
|
||
# Parse the region | ||
REGION_END="$(echo ${REGION} | rev | cut -f1 -d'-' | rev)" | ||
REGION_START="$(echo ${REGION} | rev | cut -f2 -d'-' | cut -f1 -d':' | rev)" | ||
REGION_CONTIG="$(echo ${REGION} | rev| cut -f2- -d':' | rev)" | ||
|
||
# construct track JSON for graph file | ||
jq -n --arg trackFile "${GRAPH_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "plainColors", "auxPalette": "greys"}' '$ARGS.named' >> $OUTDIR/tracks.json | ||
|
||
# Put the graphy file in place | ||
vg convert -p "${GRAPH_FILE}" > $OUTDIR/chunk.vg | ||
# Start the region BED inside the chunk | ||
printf "${REGION_CONTIG}\t${REGION_START}\t${REGION_END}" > $OUTDIR/regions.tsv | ||
|
||
|
||
echo >&2 "Gam Files:" | ||
GAM_NUM=0 | ||
for GAM_FILE in "${GAM_FILES[@]}"; do | ||
echo >&2 " - $GAM_FILE" | ||
# construct track JSON for each gam file | ||
jq -n --arg trackFile "${GAM_FILE}" --arg trackType "read" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json | ||
# Work out a chunk-internal GAM name with the same leading numbering vg chunk uses | ||
if [[ "${GAM_NUM}" == "0" ]] ; then | ||
GAM_LEADER="chunk" | ||
else | ||
GAM_LEADER="chunk-${GAM_NUM}" | ||
fi | ||
GAM_CHUNK_NAME="${OUTDIR}/${GAM_LEADER}_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.gam" | ||
# Put the chunk in place | ||
cp "${GAM_FILE}" "${GAM_CHUNK_NAME}" | ||
# List it in the regions TSV like vg would | ||
printf "\t$(basename "${GAM_CHUNK_NAME}")" >> $OUTDIR/regions.tsv | ||
GAM_NUM=$((GAM_NUM + 1)) | ||
done | ||
|
||
# Make the empty but required annotation file. We have no haplotypes to put in it. | ||
touch "${OUTDIR}/chunk_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.annotate.txt" | ||
printf "\tchunk_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.annotate.txt\n" >> $OUTDIR/regions.tsv | ||
|
||
for file in `ls $OUTDIR/` | ||
do | ||
printf "$file\n" >> $OUTDIR/chunk_contents.txt | ||
done | ||
|
||
# Print BED line | ||
cat $OUTDIR/regions.tsv | cut -f1-3 | tr -d "\n" | ||
printf "\t${DESC}\t${OUTDIR}\n" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.