Better support for extra columns bed/bedpe

GMOD · Nov 27, 2024 · a1dc312 · a1dc312
1 parent 55cc899
commit a1dc312
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 31 deletions.
diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts
@@ -1,6 +1,4 @@
-import type { Buffer } from 'buffer'
-
-export function parseBedBuffer(buffer: Buffer) {
+export function parseBedBuffer(buffer: Uint8Array) {
   const data = new TextDecoder('utf8').decode(buffer)
   const lines = data
     .split(/\n|\r\n|\r/)
@@ -15,10 +13,20 @@ export function parseBedBuffer(buffer: Buffer) {
       ),
   )
 
+  const lastHeaderLine = lines.filter(line => line.startsWith('#')).at(-1)
+  const coreColumns = ['refName', 'start', 'end', 'name', 'score', 'strand']
+  const numExtraColumns = Math.max(
+    0,
+    (rest[0]?.split('\t')?.length || 0) - coreColumns.length,
+  )
+  const extraNames = lastHeaderLine?.includes('\t')
+    ? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
+    : Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)
+
+  const colNames = [...coreColumns, ...extraNames]
+
   return {
-    columns: ['refName', 'start', 'end', 'name', 'score', 'strand'].map(c => ({
-      name: c,
-    })),
+    columns: colNames.map(c => ({ name: c })),
     rowSet: {
       rows: rest.map((line, idx) => {
         const cols = line.split('\t')
@@ -30,6 +38,9 @@ export function parseBedBuffer(buffer: Buffer) {
             name: cols[3],
             score: cols[4],
             strand: cols[5],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
           feature: {
             uniqueId: `bed-${idx}`,
@@ -39,6 +50,9 @@ export function parseBedBuffer(buffer: Buffer) {
             name: cols[3],
             score: cols[4],
             strand: cols[5],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
         }
       }),

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts
@@ -1,6 +1,4 @@
-import type { Buffer } from 'buffer'
-
-export function parseBedPEBuffer(buffer: Buffer) {
+export function parseBedPEBuffer(buffer: Uint8Array) {
   const data = new TextDecoder('utf8').decode(buffer)
   const lines = data
     .split(/\n|\r\n|\r/)
@@ -14,24 +12,36 @@ export function parseBedPEBuffer(buffer: Buffer) {
         line.startsWith('track')
       ),
   )
+  const lastHeaderLine = lines.filter(line => line.startsWith('#')).at(-1)
+
+  const coreColumns = [
+    'refName',
+    'start',
+    'end',
+    'mateRef',
+    'mateStart',
+    'mateEnd',
+    'name',
+    'score',
+    'strand',
+    'mateStrand',
+  ]
+  const numExtraColumns = Math.max(
+    0,
+    (rest[0]?.split('\t')?.length || 0) - coreColumns.length,
+  )
+
+  const extraNames = lastHeaderLine?.includes('\t')
+    ? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
+    : Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)
 
+  const colNames = [...coreColumns, ...extraNames]
   return {
-    columns: [
-      'refName',
-      'start',
-      'end',
-      'mateStart',
-      'mateEnd',
-      'name',
-      'score',
-      'strand',
-      'mateStrand',
-    ].map(c => ({
-      name: c,
-    })),
+    columns: colNames.map(c => ({ name: c })),
     rowSet: {
       rows: rest.map((line, idx) => {
         const cols = line.split('\t')
+
         return {
           cellData: {
             refName: cols[0],
@@ -41,24 +51,30 @@ export function parseBedPEBuffer(buffer: Buffer) {
             mateStart: cols[4],
             mateEnd: cols[5],
             name: cols[6],
-            score: cols[7],
+            score: +cols[7]! || cols[7],
             strand: cols[8],
             mateStrand: cols[9],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
           feature: {
             uniqueId: `bedpe-${idx}`,
             refName: cols[0],
             start: +cols[1]!,
             end: +cols[2]!,
-            strand: cols[8],
+            strand: cols[8] === '-' ? -1 : 1,
             mate: {
               refName: cols[3],
               start: +cols[4]!,
               end: +cols[5]!,
-              strand: cols[9],
+              strand: cols[9] === '-' ? -1 : 1,
             },
             name: cols[6],
             score: cols[7],
+            ...Object.fromEntries(
+              extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
+            ),
           },
         }
       }),

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts
@@ -1,7 +1,5 @@
 import { parseStrand } from './util'
 
-import type { Buffer } from 'buffer'
-
 function parseSTARFusionBreakpointString(str: string) {
   const fields = str.split(':')
   return {
@@ -12,7 +10,7 @@ function parseSTARFusionBreakpointString(str: string) {
   }
 }
 
-export function parseSTARFusionBuffer(buffer: Buffer) {
+export function parseSTARFusionBuffer(buffer: Uint8Array) {
   const text = new TextDecoder('utf8').decode(buffer)
   const lines = text
     .split(/\n|\r\n|\r/)

diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts
@@ -1,8 +1,6 @@
 import VCF from '@gmod/vcf'
 import { VcfFeature } from '@jbrowse/plugin-variants'
 
-import type { Buffer } from 'buffer'
-
 function getRows(lines: string[], vcfParser: VCF) {
   const keys = new Set<string>()
   const rows = lines.map((l, id) => {
@@ -43,7 +41,7 @@ function getRows(lines: string[], vcfParser: VCF) {
   return { keys, rows }
 }
 
-export function parseVcfBuffer(buffer: Buffer) {
+export function parseVcfBuffer(buffer: Uint8Array) {
   const text = new TextDecoder('utf8').decode(buffer)
   const lines = text
     .split(/\n|\r\n|\r/)