Skip to content

Commit

Permalink
Better support for extra columns bed/bedpe
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Nov 27, 2024
1 parent 55cc899 commit a1dc312
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 31 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import type { Buffer } from 'buffer'

export function parseBedBuffer(buffer: Buffer) {
export function parseBedBuffer(buffer: Uint8Array) {
const data = new TextDecoder('utf8').decode(buffer)
const lines = data
.split(/\n|\r\n|\r/)
Expand All @@ -15,10 +13,20 @@ export function parseBedBuffer(buffer: Buffer) {
),
)

const lastHeaderLine = lines.filter(line => line.startsWith('#')).at(-1)

Check failure on line 16 in plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedImport.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

Prefer `.findLast(…)` over `.filter(…).at(-1)`
const coreColumns = ['refName', 'start', 'end', 'name', 'score', 'strand']
const numExtraColumns = Math.max(
0,
(rest[0]?.split('\t')?.length || 0) - coreColumns.length,
)
const extraNames = lastHeaderLine?.includes('\t')
? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
: Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)

const colNames = [...coreColumns, ...extraNames]

return {
columns: ['refName', 'start', 'end', 'name', 'score', 'strand'].map(c => ({
name: c,
})),
columns: colNames.map(c => ({ name: c })),
rowSet: {
rows: rest.map((line, idx) => {
const cols = line.split('\t')
Expand All @@ -30,6 +38,9 @@ export function parseBedBuffer(buffer: Buffer) {
name: cols[3],
score: cols[4],
strand: cols[5],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
feature: {
uniqueId: `bed-${idx}`,
Expand All @@ -39,6 +50,9 @@ export function parseBedBuffer(buffer: Buffer) {
name: cols[3],
score: cols[4],
strand: cols[5],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
}
}),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import type { Buffer } from 'buffer'

export function parseBedPEBuffer(buffer: Buffer) {
export function parseBedPEBuffer(buffer: Uint8Array) {
const data = new TextDecoder('utf8').decode(buffer)
const lines = data
.split(/\n|\r\n|\r/)
Expand All @@ -14,24 +12,36 @@ export function parseBedPEBuffer(buffer: Buffer) {
line.startsWith('track')
),
)
const lastHeaderLine = lines.filter(line => line.startsWith('#')).at(-1)

Check failure on line 15 in plugins/spreadsheet-view/src/SpreadsheetView/importAdapters/BedpeImport.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

Prefer `.findLast(…)` over `.filter(…).at(-1)`

const coreColumns = [
'refName',
'start',
'end',
'mateRef',
'mateStart',
'mateEnd',
'name',
'score',
'strand',
'mateStrand',
]
const numExtraColumns = Math.max(
0,
(rest[0]?.split('\t')?.length || 0) - coreColumns.length,
)

const extraNames = lastHeaderLine?.includes('\t')
? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
: Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)

const colNames = [...coreColumns, ...extraNames]
return {
columns: [
'refName',
'start',
'end',
'mateStart',
'mateEnd',
'name',
'score',
'strand',
'mateStrand',
].map(c => ({
name: c,
})),
columns: colNames.map(c => ({ name: c })),
rowSet: {
rows: rest.map((line, idx) => {
const cols = line.split('\t')

return {
cellData: {
refName: cols[0],
Expand All @@ -41,24 +51,30 @@ export function parseBedPEBuffer(buffer: Buffer) {
mateStart: cols[4],
mateEnd: cols[5],
name: cols[6],
score: cols[7],
score: +cols[7]! || cols[7],
strand: cols[8],
mateStrand: cols[9],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
feature: {
uniqueId: `bedpe-${idx}`,
refName: cols[0],
start: +cols[1]!,
end: +cols[2]!,
strand: cols[8],
strand: cols[8] === '-' ? -1 : 1,
mate: {
refName: cols[3],
start: +cols[4]!,
end: +cols[5]!,
strand: cols[9],
strand: cols[9] === '-' ? -1 : 1,
},
name: cols[6],
score: cols[7],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
}
}),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { parseStrand } from './util'

import type { Buffer } from 'buffer'

function parseSTARFusionBreakpointString(str: string) {
const fields = str.split(':')
return {
Expand All @@ -12,7 +10,7 @@ function parseSTARFusionBreakpointString(str: string) {
}
}

export function parseSTARFusionBuffer(buffer: Buffer) {
export function parseSTARFusionBuffer(buffer: Uint8Array) {
const text = new TextDecoder('utf8').decode(buffer)
const lines = text
.split(/\n|\r\n|\r/)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import VCF from '@gmod/vcf'
import { VcfFeature } from '@jbrowse/plugin-variants'

import type { Buffer } from 'buffer'

function getRows(lines: string[], vcfParser: VCF) {
const keys = new Set<string>()
const rows = lines.map((l, id) => {
Expand Down Expand Up @@ -43,7 +41,7 @@ function getRows(lines: string[], vcfParser: VCF) {
return { keys, rows }
}

export function parseVcfBuffer(buffer: Buffer) {
export function parseVcfBuffer(buffer: Uint8Array) {
const text = new TextDecoder('utf8').decode(buffer)
const lines = text
.split(/\n|\r\n|\r/)
Expand Down

0 comments on commit a1dc312

Please sign in to comment.