Allow opening bedpe files from users tracks in SV inspector (#4694)

GMOD · Nov 27, 2024 · b320601 · b320601
1 parent 03d52c6
commit b320601
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 58 deletions.
diff --git a/plugins/bed/src/BedpeAdapter/configSchema.ts b/plugins/bed/src/BedpeAdapter/configSchema.ts
@@ -11,7 +11,8 @@ const BedpeAdapter = ConfigurationSchema(
   {
     /**
      * #slot
-     * can be plaintext or gzipped, not indexed so loaded into memory on startup
+     * can be plaintext or gzipped, not indexed so loaded into memory on
+     * startup
      */
     bedpeLocation: {
       type: 'fileLocation',

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/ImportWizard.ts b/plugins/spreadsheet-view/src/SpreadsheetView/ImportWizard.ts
@@ -27,11 +27,24 @@ function getType(adapter: Record<string, unknown>) {
     return 'VCF'
   } else if (adapter.bedLocation || adapter.bedGzLocation) {
     return 'BED'
+  } else if (adapter.bedpeLocation) {
+    return 'BEDPE'
   } else {
     return undefined
   }
 }
 
+// hardcodes a couple different adapter types
+function getAdapterLoc(adapter: Record<string, FileLocation>) {
+  return (
+    adapter.vcfLocation ||
+    adapter.vcfGzLocation ||
+    adapter.bedLocation ||
+    adapter.bedGzLocation ||
+    adapter.bedpeLocation
+  )
+}
+
 // regexp used to guess the type of a file or URL from its file extension
 const fileTypesRegexp = new RegExp(`\\.(${fileTypes.join('|')})(\\.gz)?$`, 'i')
 
@@ -137,36 +150,27 @@ export default function stateModelFactory() {
           ...sessionTracks,
         ] as AnyConfigurationModel[]
         return allTracks
-          .filter(track => {
-            const assemblyNames = readConfObject(track, 'assemblyNames')
-            const adapter = readConfObject(track, 'adapter')
-            return (
-              assemblyNames.includes(selectedAssembly) &&
-              (adapter.vcfLocation ||
-                adapter.vcfGzLocation ||
-                adapter.bedLocation ||
-                adapter.bedGzLocation)
-            )
-          })
           .map(track => {
+            const assemblyNames = readConfObject(track, 'assemblyNames')
             const adapter = readConfObject(track, 'adapter')
             const category = readConfObject(track, 'category').join(',')
-            return {
-              track,
-              label: [
-                category ? `[${category}]` : '',
-                getTrackName(track, session),
-              ]
-                .filter(f => !!f)
-                .join(' '),
-              assemblyNames: readConfObject(track, 'assemblyNames'),
-              type: getType(adapter) || 'UNKNOWN',
-              loc: (adapter.vcfLocation ||
-                adapter.vcfGzLocation ||
-                adapter.bedLocation ||
-                adapter.bedGzLocation) as FileLocation,
-            }
+            const loc = getAdapterLoc(adapter)
+            return assemblyNames.includes(selectedAssembly) && loc
+              ? {
+                  track,
+                  label: [
+                    category ? `[${category}]` : '',
+                    getTrackName(track, session),
+                  ]
+                    .filter(f => !!f)
+                    .join(' '),
+                  assemblyNames,
+                  type: getType(adapter) || 'UNKNOWN',
+                  loc,
+                }
+              : undefined
           })
+          .filter(f => !!f)
           .sort((a, b) => a.label.localeCompare(b.label))
       },
     }))

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts
@@ -1,6 +1,4 @@
-import type { Buffer } from 'buffer'
-
-export function parseBedBuffer(buffer: Buffer) {
+export function parseBedBuffer(buffer: Uint8Array) {
   const data = new TextDecoder('utf8').decode(buffer)
   const lines = data
     .split(/\n|\r\n|\r/)
@@ -15,10 +13,20 @@ export function parseBedBuffer(buffer: Buffer) {
       ),
   )
 
+  const lastHeaderLine = lines.findLast(line => line.startsWith('#'))
+  const coreColumns = ['refName', 'start', 'end', 'name', 'score', 'strand']
+  const numExtraColumns = Math.max(
+    0,
+    (rest[0]?.split('\t')?.length || 0) - coreColumns.length,
+  )
+  const extraNames = lastHeaderLine?.includes('\t')
+    ? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
+    : Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)
+
+  const colNames = [...coreColumns, ...extraNames]
+
   return {
-    columns: ['refName', 'start', 'end', 'name', 'score', 'strand'].map(c => ({
-      name: c,
-    })),
+    columns: colNames.map(c => ({ name: c })),
     rowSet: {
       rows: rest.map((line, idx) => {
         const cols = line.split('\t')
@@ -30,6 +38,9 @@ export function parseBedBuffer(buffer: Buffer) {
             name: cols[3],
             score: cols[4],
             strand: cols[5],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
           feature: {
             uniqueId: `bed-${idx}`,
@@ -39,6 +50,9 @@ export function parseBedBuffer(buffer: Buffer) {
             name: cols[3],
             score: cols[4],
             strand: cols[5],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
         }
       }),

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts
@@ -1,6 +1,4 @@
-import type { Buffer } from 'buffer'
-
-export function parseBedPEBuffer(buffer: Buffer) {
+export function parseBedPEBuffer(buffer: Uint8Array) {
   const data = new TextDecoder('utf8').decode(buffer)
   const lines = data
     .split(/\n|\r\n|\r/)
@@ -14,24 +12,36 @@ export function parseBedPEBuffer(buffer: Buffer) {
         line.startsWith('track')
       ),
   )
+  const lastHeaderLine = lines.findLast(line => line.startsWith('#'))
+
+  const coreColumns = [
+    'refName',
+    'start',
+    'end',
+    'mateRef',
+    'mateStart',
+    'mateEnd',
+    'name',
+    'score',
+    'strand',
+    'mateStrand',
+  ]
+  const numExtraColumns = Math.max(
+    0,
+    (rest[0]?.split('\t')?.length || 0) - coreColumns.length,
+  )
+
+  const extraNames = lastHeaderLine?.includes('\t')
+    ? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
+    : Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)
 
+  const colNames = [...coreColumns, ...extraNames]
   return {
-    columns: [
-      'refName',
-      'start',
-      'end',
-      'mateStart',
-      'mateEnd',
-      'name',
-      'score',
-      'strand',
-      'mateStrand',
-    ].map(c => ({
-      name: c,
-    })),
+    columns: colNames.map(c => ({ name: c })),
     rowSet: {
       rows: rest.map((line, idx) => {
         const cols = line.split('\t')
+
         return {
           cellData: {
             refName: cols[0],
@@ -41,24 +51,30 @@ export function parseBedPEBuffer(buffer: Buffer) {
             mateStart: cols[4],
             mateEnd: cols[5],
             name: cols[6],
-            score: cols[7],
+            score: +cols[7]! || cols[7],
             strand: cols[8],
             mateStrand: cols[9],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
           feature: {
             uniqueId: `bedpe-${idx}`,
             refName: cols[0],
             start: +cols[1]!,
             end: +cols[2]!,
-            strand: cols[8],
+            strand: cols[8] === '-' ? -1 : 1,
             mate: {
               refName: cols[3],
               start: +cols[4]!,
               end: +cols[5]!,
-              strand: cols[9],
+              strand: cols[9] === '-' ? -1 : 1,
             },
             name: cols[6],
             score: cols[7],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
         }
       }),

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts
@@ -1,7 +1,5 @@
 import { parseStrand } from './util'
 
-import type { Buffer } from 'buffer'
-
 function parseSTARFusionBreakpointString(str: string) {
   const fields = str.split(':')
   return {
@@ -12,7 +10,7 @@ function parseSTARFusionBreakpointString(str: string) {
   }
 }
 
-export function parseSTARFusionBuffer(buffer: Buffer) {
+export function parseSTARFusionBuffer(buffer: Uint8Array) {
   const text = new TextDecoder('utf8').decode(buffer)
   const lines = text
     .split(/\n|\r\n|\r/)

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts
@@ -1,8 +1,6 @@
 import VCF from '@gmod/vcf'
 import { VcfFeature } from '@jbrowse/plugin-variants'
 
-import type { Buffer } from 'buffer'
-
 function getRows(lines: string[], vcfParser: VCF) {
   const keys = new Set<string>()
   const rows = lines.map((l, id) => {
@@ -43,7 +41,7 @@ function getRows(lines: string[], vcfParser: VCF) {
   return { keys, rows }
 }
 
-export function parseVcfBuffer(buffer: Buffer) {
+export function parseVcfBuffer(buffer: Uint8Array) {
   const text = new TextDecoder('utf8').decode(buffer)
   const lines = text
     .split(/\n|\r\n|\r/)