From 250aa660d8b515806d035f638ee6cbab111e75e7 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Fri, 15 Sep 2023 13:23:30 -0400
Subject: [PATCH 1/2] Fix track JSON load, make sure chunk ref path is first,
 and add script to help make a chunk from a graph

---
 docker/config.json                     |   8 +-
 exampleData/chunk-ref-1-20/tracks.json |   8 +-
 scripts/prepare_chunks.sh              |  78 +++++++++++++-----
 scripts/prepare_local_chunk.sh         | 107 +++++++++++++++++++++++++
 src/components/TubeMapContainer.js     |   4 +-
 src/server.mjs                         |  39 ++++++++-
 src/util/tubemap.js                    |   7 ++
 7 files changed, 217 insertions(+), 34 deletions(-)
 create mode 100755 scripts/prepare_local_chunk.sh

diff --git a/docker/config.json b/docker/config.json
index 36a020aa..7fb8a7ff 100644
--- a/docker/config.json
+++ b/docker/config.json
@@ -51,14 +51,14 @@
   "internalDataPath": "./exampleData/internal",
 
   "defaultHaplotypeColorPalette" : {
-    "mainPalette": "ygreys", 
-    "auxPalette": "ygreys", 
+    "mainPalette": "greys", 
+    "auxPalette": "greys", 
     "colorReadsByMappingQuality": false
   },
   
   "defaultReadColorPalette" : {
-    "mainPalette": "reds", 
-    "auxPalette": "blues", 
+    "mainPalette": "blues", 
+    "auxPalette": "reds", 
     "colorReadsByMappingQuality": false
   },
 
diff --git a/exampleData/chunk-ref-1-20/tracks.json b/exampleData/chunk-ref-1-20/tracks.json
index 9d806afa..44c6fd5f 100644
--- a/exampleData/chunk-ref-1-20/tracks.json
+++ b/exampleData/chunk-ref-1-20/tracks.json
@@ -1,15 +1,15 @@
 {
     "trackFile": "cactus.vg", 
     "trackType": "graph", 
-    "trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}
+    "trackColorSettings": {"mainPalette": "plainColors", "auxPalette": "greys"}
 },
 {
     "trackFile": "cactus0_10.sorted.gam", 
     "trackType": "read", 
-    "trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}
+    "trackColorSettings": {"mainPalette": "blues", "auxPalette": "reds"}
 },
 {
     "trackFile": "cactus10_20.sorted.gam", 
     "trackType": "read", 
-    "trackColorSettings": {"mainPalette": "greys", "auxPalette": "ygreys"}
-}
\ No newline at end of file
+    "trackColorSettings": {"mainPalette": "blues", "auxPalette": "reds"}
+}
diff --git a/scripts/prepare_chunks.sh b/scripts/prepare_chunks.sh
index 01af814c..af4c00dd 100755
--- a/scripts/prepare_chunks.sh
+++ b/scripts/prepare_chunks.sh
@@ -1,53 +1,91 @@
 #!/usr/bin/env bash
 set -e
 
-while getopts x:h:g:r:o: flag
+function usage() {
+    echo >&2 "${0}: Extract graph and read chunks for a region, producing a referencing line for a BED file on standard output"
+    echo >&2
+    echo >&2 "Usage: ${0} -x mygraph.xg [-h mygraph.gbwt] -r chr1:1-100 [-d 'Description of region'] -o chunk-chr1-1-100 [-g mygam1.gam [-g mygam2.gam ...]] >> regions.bed"
+    exit 1
+}
+
+while getopts x:h:g:r:o:d: flag
 do
     case "${flag}" in
-        x) XG_FILE=${OPTARG};;
-        h) GBWT=${OPTARG};;
+        x) GRAPH_FILE=${OPTARG};;
+        h) HAPLOTYPE_FILE=${OPTARG};;
         g) GAM_FILES+=("$OPTARG");;
         r) REGION=${OPTARG};;
         o) OUTDIR=${OPTARG};;
+        d) DESC="${OPTARG}";;
+        *)
+            usage
+            ;;
+
     esac
 done
 
 if ! command -v jq &> /dev/null
 then
-    echo "This script requires jq, exiting..."
-    exit
+    echo >&2 "This script requires jq, exiting..."
+    exit 1
+fi
+
+if [[ -z "${REGION}" ]] ; then
+    echo >&2 "You must specify a region with -r"
+    echo >&2
+    usage
+fi
+
+if [[ -z "${GRAPH_FILE}" ]] ; then
+    echo >&2 "You must specify a graph with -x"
+    echo >&2
+    usage
 fi
 
-echo "XG File: " $XG_FILE
-echo "Haplotype File: " $GBWT
-echo "Region: " $REGION
-echo "Output Directory: " $OUTDIR
+if [[ -z "${OUTDIR}" ]] ; then
+    echo >&2 "You must specify an output directory with -o"
+    echo >&2
+    usage
+fi
+
+if [[ -z "${DESC}" ]] ; then
+    DESC="Region ${REGION}"
+fi
+
+echo >&2 "Graph File: " $GRAPH_FILE
+echo >&2 "Haplotype File: " $HAPLOTYPE_FILE
+echo >&2 "Region: " $REGION
+echo >&2 "Output Directory: " $OUTDIR
 
 rm -fr $OUTDIR
 mkdir -p $OUTDIR
 
-vg_chunk_params="-x $XG_FILE -g -c 20 -p $REGION -T -b $OUTDIR/chunk -E $OUTDIR/regions.tsv"
+vg_chunk_params=(-x $GRAPH_FILE -g -c 20 -p $REGION -T -b $OUTDIR/chunk -E $OUTDIR/regions.tsv)
 
-# construct track JSON for xg file
-jq -n --arg trackFile "${XG_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "greys", "auxPalette": "ygreys"}' '$ARGS.named' >> $OUTDIR/tracks.json
+# construct track JSON for graph file
+jq -n --arg trackFile "${GRAPH_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "plainColors", "auxPalette": "greys"}' '$ARGS.named' >> $OUTDIR/tracks.json
 
-# construct track JSON for gbwt file; if not any specific gbwt file, then default would be haplotype
-if [[ ! -z "${GBWT}" ]] ; then
-    jq -n --arg trackFile "${GBWT}" --arg trackType "haplotype" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
+# construct track JSON for haplotype file, if provided
+if [[ ! -z "${HAPLOTYPE_FILE}" ]] ; then
+    jq -n --arg trackFile "${HAPLOTYPE_FILE}" --arg trackType "haplotype" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
 fi
 
 # construct track JSON for each gam file
-echo "Gam Files:"
+echo >&2 "Gam Files:"
 for GAM_FILE in "${GAM_FILES[@]}"; do
-    echo " - $GAM_FILE"
+    echo >&2 " - $GAM_FILE"
     jq -n --arg trackFile "${GAM_FILE}" --arg trackType "read" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
-    vg_chunk_params=" $vg_chunk_params -a $GAM_FILE"
+    vg_chunk_params+=(-a $GAM_FILE)
 done
 
 # Call vg chunk
-vg chunk $vg_chunk_params > $OUTDIR/chunk.vg
+vg chunk "${vg_chunk_params[@]}" > $OUTDIR/chunk.vg
 
 for file in `ls $OUTDIR/`
 do
     printf "$file\n" >> $OUTDIR/chunk_contents.txt
-done
\ No newline at end of file
+done
+
+# Print BED line
+cat $OUTDIR/regions.tsv | cut -f1-3 | tr -d "\n"
+printf "\t${DESC}\t${OUTDIR}\n"
diff --git a/scripts/prepare_local_chunk.sh b/scripts/prepare_local_chunk.sh
new file mode 100755
index 00000000..63cad98a
--- /dev/null
+++ b/scripts/prepare_local_chunk.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+set -e
+
+function usage() {
+    echo >&2 "${0}: Prepare a tube map chunk and BED line on standard output from a pre-made subgraph. Only supports paths, not haplotypes."
+    echo >&2
+    echo >&2 "Usage: ${0} -x subgraph.xg -r chr1:1-100 [-d 'Description of region'] -o chunk-chr1-1-100 [-g mygam1.gam [-g mygam2.gam ...]] >> regions.bed"
+    exit 1
+}
+
+while getopts x:g:r:o:d: flag
+do
+    case "${flag}" in
+        x) GRAPH_FILE=${OPTARG};;
+        g) GAM_FILES+=("$OPTARG");;
+        r) REGION=${OPTARG};;
+        o) OUTDIR=${OPTARG};;
+        d) DESC="${OPTARG}";;
+        *)
+            usage
+            ;;
+
+    esac
+done
+
+if ! command -v jq &> /dev/null
+then
+    echo >&2 "This script requires jq, exiting..."
+    exit 1
+fi
+
+if [[ -z "${REGION}" ]] ; then
+    echo >&2 "You must specify a region with -r"
+    echo >&2
+    usage
+fi
+
+if [[ -z "${GRAPH_FILE}" ]] ; then
+    echo >&2 "You must specify a graph with -x"
+    echo >&2
+    usage
+fi
+
+if [[ -z "${OUTDIR}" ]] ; then
+    echo >&2 "You must specify an output directory with -o"
+    echo >&2
+    usage
+fi
+
+if [[ -z "${DESC}" ]] ; then
+    DESC="Region ${REGION}"
+fi
+
+echo >&2 "Graph File: " $GRAPH_FILE
+echo >&2 "Region: " $REGION
+echo >&2 "Output Directory: " $OUTDIR
+
+rm -fr $OUTDIR
+mkdir -p $OUTDIR
+
+# Parse the region
+REGION_END="$(echo ${REGION} | rev | cut -f1 -d'-' | rev)"
+REGION_START="$(echo ${REGION} | rev | cut -f2 -d'-' | cut -f1 -d':' | rev)"
+REGION_CONTIG="$(echo ${REGION} | rev| cut -f2- -d':' | rev)"
+
+# construct track JSON for graph file
+jq -n --arg trackFile "${GRAPH_FILE}" --arg trackType "graph" --argjson trackColorSettings '{"mainPalette": "plainColors", "auxPalette": "greys"}' '$ARGS.named' >> $OUTDIR/tracks.json
+
+# Put the graphy file in place
+vg convert -p "${GRAPH_FILE}" > $OUTDIR/chunk.vg
+# Start the region BED inside the chunk
+printf "${REGION_CONTIG}\t${REGION_START}\t${REGION_END}" > $OUTDIR/regions.tsv
+
+
+echo >&2 "Gam Files:"
+GAM_NUM=0
+for GAM_FILE in "${GAM_FILES[@]}"; do
+    echo >&2 " - $GAM_FILE"
+    # construct track JSON for each gam file
+    jq -n --arg trackFile "${GAM_FILE}" --arg trackType "read" --argjson trackColorSettings '{"mainPalette": "blues", "auxPalette": "reds"}' '$ARGS.named' >> $OUTDIR/tracks.json
+    # Work out a chunk-internal GAM name with the same leading numbering vg chunk uses
+    if [[ "${GAM_NUM}" == "0" ]] ; then
+        GAM_LEADER="chunk"
+    else
+        GAM_LEADER="chunk-${GAM_NUM}"
+    fi
+    GAM_CHUNK_NAME="${OUTDIR}/${GAM_LEADER}_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.gam"
+    # Put the chunk in place
+    cp "${GAM_FILE}" "${GAM_CHUNK_NAME}"
+    # List it in the regions TSV like vg would
+    printf "\t$(basename "${GAM_CHUNK_NAME}")" >> $OUTDIR/regions.tsv
+    GAM_NUM=$((GAM_NUM + 1))
+done
+
+# Make the empty but required annotation file. We have no haplotypes to put in it.
+touch "${OUTDIR}/chunk_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.annotate.txt"
+printf "\tchunk_0_${REGION_CONTIG}_${REGION_START}_${REGION_END}.annotate.txt\n" >> $OUTDIR/regions.tsv
+
+for file in `ls $OUTDIR/`
+do
+    printf "$file\n" >> $OUTDIR/chunk_contents.txt
+done
+
+# Print BED line
+cat $OUTDIR/regions.tsv | cut -f1-3 | tr -d "\n"
+printf "\t${DESC}\t${OUTDIR}\n"
+
diff --git a/src/components/TubeMapContainer.js b/src/components/TubeMapContainer.js
index a11a4b0c..8c063a6a 100644
--- a/src/components/TubeMapContainer.js
+++ b/src/components/TubeMapContainer.js
@@ -30,7 +30,7 @@ class TubeMapContainer extends Component {
 
   handleFetchError(error, message) {
     if (!this.cancelSignal.aborted) {
-      console.log(message, error.name, error.message);
+      console.error(message, error);
       this.setState({ error: error, isLoading: false });
     } else {
       console.log("fetch canceled by componentWillUnmount", error.message);
@@ -199,7 +199,7 @@ class TubeMapContainer extends Component {
     } catch (error) {
       this.handleFetchError(
         error,
-        `POST to ${this.props.apiUrl}/getChunkedData failed:`
+        `Fetching and parsing POST to ${this.props.apiUrl}/getChunkedData failed:`
       );
     }
   };
diff --git a/src/server.mjs b/src/server.mjs
index be3e4826..674785bb 100644
--- a/src/server.mjs
+++ b/src/server.mjs
@@ -537,6 +537,7 @@ async function getChunkedData(req, res, next) {
       }
       req.graph = JSON.parse(graphAsString);
       req.region = [rangeRegion.start, rangeRegion.end];
+      // vg chunk always puts the path we reference on first automatically
       if (!sentResponse) {
         sentResponse = true;
         processAnnotationFile(req, res, next);
@@ -593,6 +594,24 @@ async function getChunkedData(req, res, next) {
       }
       req.graph = JSON.parse(graphAsString);
       req.region = [rangeRegion.start, rangeRegion.end];
+
+      // We might not have the path we are referencing on appearing first.
+      if (parsedRegion.contig !== "node") {
+        // Make sure that path 0 is the path we actually asked about
+        let refPaths = [];
+        let otherPaths = [];
+        for (let path of req.graph.path) {
+          if (path.name === parsedRegion.contig) {
+            // This is the path we asked about, so it goes first
+            refPaths.push(path);
+          } else { 
+            // Then we put each other path
+            otherPaths.push(path);
+          }
+        }
+        req.graph.path = refPaths.concat(otherPaths);
+      }
+
       if (!sentResponse) {
         sentResponse = true;
         processAnnotationFile(req, res, next);
@@ -669,6 +688,7 @@ function returnErrorMiddleware(err, req, res, next) {
     result.error += req.error.toString("utf-8");
   }
   console.log("returning error: " + result.error);
+  console.error(err);
   if (err.status) {
     // Error comes with a status
     res.status(err.status);
@@ -922,6 +942,10 @@ function processGamFiles(req, res, next) {
   }
 }
 
+// Function to do the step of reading the "region" file, a BED inside the chunk
+// that records the path and start offset that were used to define the chunk.
+//
+// Calls out to the next step, cleanUpAndSendResult
 function processRegionFile(req, res, next) {
   try {
     console.time("processing region file");
@@ -1432,14 +1456,21 @@ async function getBedRegions(bed) {
       if (fs.existsSync(track_json)) {
         // Create string of tracks data
         const string_data = fs.readFileSync(track_json);
-        const parser = new JSONParser();
-        parser.onValue = (value, key, parent, stack) => {
-          if (stack > 0) return; // ignore inner values
+        const parser = new JSONParser({separator: ''});
+        parser.onValue = ({value, key, parent, stack}) => {
+          if (stack.length > 0) {
+            // ignore inner values
+            return;
+          }
+          if (!Object.hasOwn(value, 'trackFile')) {
+            throw new BadRequestError('Non-track object in tracks.json: ' + JSON.stringify(value))
+          }
           // put tracks in array
           tracks_array.push(value);
         };
         parser.write(string_data);
-        tracks = tracks_array; 
+        // Convert to object container like the client component prop types expect
+        tracks = {...tracks_array}; 
       }
     }
 
diff --git a/src/util/tubemap.js b/src/util/tubemap.js
index 7e1322e7..a7b4e368 100644
--- a/src/util/tubemap.js
+++ b/src/util/tubemap.js
@@ -26,6 +26,7 @@ const greys = [
   "#000000",
 ];
 
+// Greys but with a special color for the first thing.
 const ygreys = [
   "#9467bd",
   "#d9d9d9",
@@ -4229,6 +4230,12 @@ export function vgExtractReads(
 
   for (let i = 0; i < myReads.length; i += 1) {
     const read = myReads[i];
+
+    if (!read.path) {
+      // Read does not have a path assigned, this is an unmapped read.
+      continue;
+    }
+
     const sequence = [];
     const sequenceNew = [];
     let firstIndex = -1; // index within mapping of the first node id contained in nodeNames

From 0cfc9b3825d8f5eb49e111f56bebdbb0836cfaf7 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Fri, 15 Sep 2023 15:34:57 -0400
Subject: [PATCH 2/2] Document the new subgraph to chunk script to fix #326

---
 README.md | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index bde4859e..03b2fbda 100644
--- a/README.md
+++ b/README.md
@@ -137,28 +137,51 @@ That can sometimes up to 10-20 seconds.
 If you already know of regions/subgraphs that you will be looking at, you can pre-fetch the data in advance. 
 This will save some time during the interactive visualization, especially if there are a lot of regions to visualize.
 
-This is a 2 step process that involves creating the chunk and linking it to a bed file
+The net result needs to be one or more chunk directories on disk, referenced from a BED file.
 
-1. 
-The subgraphs need to be pre-fetched using `vg chunk` like shown in [`prepare_chunks.sh`](scripts/prepare_chunks.sh). For example:
+To generate each chunk, you can use the `prepare_chunks.sh` script. You ought to run it from the directory containing your input files and where your output chunks will be stored (i.e. the `dataPath` in `sequenceTubeMpas/src/config.json`), which defaults to the `exampleData` directory in the repo.
+
+For example:
 
 ```
-./prepare_chunk.sh -x mygraph.xg -h mygraph.gbwt -r chr1:1-100 -o chunk-chr1-1-100 -g mygam1.gam -g mygam2.gam
+cd exampleData/
+../scripts/prepare_chunk.sh -x mygraph.xg -h mygraph.gbwt -r chr1:1-100 -d 'Region A' -o chunk-chr1-1-100 -g mygam1.gam -g mygam2.gam >> mychunks.bed
+../scripts/prepare_chunk.sh -x mygraph.xg -h mygraph.gbwt -r chr1:101-200 -d 'Region B' -o chunk-chr1-100-200 -g mygam1.gam -g mygam2.gam >> mychunks.bed
 ```
 
-2.
-Then compile those regions in a BED file with two additional columns: 
+The BED file linking to the chunks has two additional nonstandard columns: 
 
 - a description of the region (column 4)
 - the path to the output directory of the chunk, `chunk-chr1-1-100` in the example above, (column 5). 
 
 ```
-ref	1	10	region one to ten	chunk-ref-1-20
-ref	10	20	region ten to twenty	chunk-ref-1-20
+chr1	1	100	Region A	chunk-chr1-1-100
+chr1	101	200	Region B	chunk-chr2-101-200
 ```
 Note each column is seperated by tabs
 
-This BED file will be read if placed in the `dataPath` directory, like for other files to mount (see above).
+This BED file needs to be in the `dataPath` directory, or it can be hosted on the web along with its chunk directories and accessed via URL.
+
+##### Pre-made subgraphs
+
+You may want to look at a graph that has already been extracted from a larger graph.
+To support this, there is a `prepare_local_chunk.sh` script, which takes a subgraph rather than a full graph.
+It supports most of the options that `prepare_chunks.sh` does, with the notable exception of haplotype files.
+It assumes that the graph represents some region along some reference path that is present in the graph, and expects that region to be provided with the `-r` option.
+It assumes that path names in the subgraph *don't* use subregion suffixes (bracket-enclosed numbers).
+The path name used in the region should *exactly* match the name of one of the paths in the graph.
+
+For example, you can run it like:
+
+```
+cd exampleData/
+../scripts/prepare_local_chunk.sh -x subgraph.gfa -r chr5:1023911-1025911 -g subgraph_reads.gam -g other_sample_reads.gam -o subgraph1 >> subgraphs.bed
+```
+
+If the original subgraph file does not remain in place under the configured `dataPath` and accessible by the tube map, errors may occur complaining that it couldn't be accessed when the tube map attempts to list ist contained paths.
+
+The net result will be that you can select the BED file, select the region it specifies, and view a precomputed view of the subgraph, with coordinates computed assuming it covers the region provided to `prepare_local_chunk.sh`.
+
 
 #### Development Mode