From a1dc312b43c63fb772dd10f99ab29f66d02870c8 Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 26 Nov 2024 22:08:51 -0500 Subject: [PATCH] Better support for extra columns bed/bedpe --- .../importAdapters/BedImport.ts | 26 ++++++--- .../importAdapters/BedpeImport.ts | 54 ++++++++++++------- .../importAdapters/STARFusionImport.ts | 4 +- .../importAdapters/VcfImport.ts | 4 +- 4 files changed, 57 insertions(+), 31 deletions(-) diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts index 15a613d9d6..bb8545c252 100644 --- a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts +++ b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts @@ -1,6 +1,4 @@ -import type { Buffer } from 'buffer' - -export function parseBedBuffer(buffer: Buffer) { +export function parseBedBuffer(buffer: Uint8Array) { const data = new TextDecoder('utf8').decode(buffer) const lines = data .split(/\n|\r\n|\r/) @@ -15,10 +13,20 @@ export function parseBedBuffer(buffer: Buffer) { ), ) + const lastHeaderLine = lines.filter(line => line.startsWith('#')).at(-1) + const coreColumns = ['refName', 'start', 'end', 'name', 'score', 'strand'] + const numExtraColumns = Math.max( + 0, + (rest[0]?.split('\t')?.length || 0) - coreColumns.length, + ) + const extraNames = lastHeaderLine?.includes('\t') + ? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length) + : Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`) + + const colNames = [...coreColumns, ...extraNames] + return { - columns: ['refName', 'start', 'end', 'name', 'score', 'strand'].map(c => ({ - name: c, - })), + columns: colNames.map(c => ({ name: c })), rowSet: { rows: rest.map((line, idx) => { const cols = line.split('\t') @@ -30,6 +38,9 @@ export function parseBedBuffer(buffer: Buffer) { name: cols[3], score: cols[4], strand: cols[5], + ...Object.fromEntries( + extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]), + ), }, feature: { uniqueId: `bed-${idx}`, @@ -39,6 +50,9 @@ export function parseBedBuffer(buffer: Buffer) { name: cols[3], score: cols[4], strand: cols[5], + ...Object.fromEntries( + extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]), + ), }, } }), diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts index 600168eb59..0ba1e2159b 100644 --- a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts +++ b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts @@ -1,6 +1,4 @@ -import type { Buffer } from 'buffer' - -export function parseBedPEBuffer(buffer: Buffer) { +export function parseBedPEBuffer(buffer: Uint8Array) { const data = new TextDecoder('utf8').decode(buffer) const lines = data .split(/\n|\r\n|\r/) @@ -14,24 +12,36 @@ export function parseBedPEBuffer(buffer: Buffer) { line.startsWith('track') ), ) + const lastHeaderLine = lines.filter(line => line.startsWith('#')).at(-1) + + const coreColumns = [ + 'refName', + 'start', + 'end', + 'mateRef', + 'mateStart', + 'mateEnd', + 'name', + 'score', + 'strand', + 'mateStrand', + ] + const numExtraColumns = Math.max( + 0, + (rest[0]?.split('\t')?.length || 0) - coreColumns.length, + ) + + const extraNames = lastHeaderLine?.includes('\t') + ? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length) + : Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`) + const colNames = [...coreColumns, ...extraNames] return { - columns: [ - 'refName', - 'start', - 'end', - 'mateStart', - 'mateEnd', - 'name', - 'score', - 'strand', - 'mateStrand', - ].map(c => ({ - name: c, - })), + columns: colNames.map(c => ({ name: c })), rowSet: { rows: rest.map((line, idx) => { const cols = line.split('\t') + return { cellData: { refName: cols[0], @@ -41,24 +51,30 @@ export function parseBedPEBuffer(buffer: Buffer) { mateStart: cols[4], mateEnd: cols[5], name: cols[6], - score: cols[7], + score: +cols[7]! || cols[7], strand: cols[8], mateStrand: cols[9], + ...Object.fromEntries( + extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]), + ), }, feature: { uniqueId: `bedpe-${idx}`, refName: cols[0], start: +cols[1]!, end: +cols[2]!, - strand: cols[8], + strand: cols[8] === '-' ? -1 : 1, mate: { refName: cols[3], start: +cols[4]!, end: +cols[5]!, - strand: cols[9], + strand: cols[9] === '-' ? -1 : 1, }, name: cols[6], score: cols[7], + ...Object.fromEntries( + extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]), + ), }, } }), diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts index e119ab06d9..54515dd940 100644 --- a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts +++ b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/STARFusionImport.ts @@ -1,7 +1,5 @@ import { parseStrand } from './util' -import type { Buffer } from 'buffer' - function parseSTARFusionBreakpointString(str: string) { const fields = str.split(':') return { @@ -12,7 +10,7 @@ function parseSTARFusionBreakpointString(str: string) { } } -export function parseSTARFusionBuffer(buffer: Buffer) { +export function parseSTARFusionBuffer(buffer: Uint8Array) { const text = new TextDecoder('utf8').decode(buffer) const lines = text .split(/\n|\r\n|\r/) diff --git a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts index a8de6a1c3b..0c60c3566d 100644 --- a/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts +++ b/plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/VcfImport.ts @@ -1,8 +1,6 @@ import VCF from '@gmod/vcf' import { VcfFeature } from '@jbrowse/plugin-variants' -import type { Buffer } from 'buffer' - function getRows(lines: string[], vcfParser: VCF) { const keys = new Set() const rows = lines.map((l, id) => { @@ -43,7 +41,7 @@ function getRows(lines: string[], vcfParser: VCF) { return { keys, rows } } -export function parseVcfBuffer(buffer: Buffer) { +export function parseVcfBuffer(buffer: Uint8Array) { const text = new TextDecoder('utf8').decode(buffer) const lines = text .split(/\n|\r\n|\r/)