Skip to content

Commit

Permalink
Allow opening bedpe files from users tracks in SV inspector (#4694)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin authored Nov 27, 2024
1 parent 03d52c6 commit b320601
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 58 deletions.
3 changes: 2 additions & 1 deletion plugins/bed/src/BedpeAdapter/configSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ const BedpeAdapter = ConfigurationSchema(
{
/**
* #slot
* can be plaintext or gzipped, not indexed so loaded into memory on startup
* can be plaintext or gzipped, not indexed so loaded into memory on
* startup
*/
bedpeLocation: {
type: 'fileLocation',
Expand Down
56 changes: 30 additions & 26 deletions plugins/spreadsheet-view/src/SpreadsheetView/ImportWizard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,24 @@ function getType(adapter: Record<string, unknown>) {
return 'VCF'
} else if (adapter.bedLocation || adapter.bedGzLocation) {
return 'BED'
} else if (adapter.bedpeLocation) {
return 'BEDPE'
} else {
return undefined
}
}

// hardcodes a couple different adapter types
function getAdapterLoc(adapter: Record<string, FileLocation>) {
return (
adapter.vcfLocation ||
adapter.vcfGzLocation ||
adapter.bedLocation ||
adapter.bedGzLocation ||
adapter.bedpeLocation
)
}

// regexp used to guess the type of a file or URL from its file extension
const fileTypesRegexp = new RegExp(`\\.(${fileTypes.join('|')})(\\.gz)?$`, 'i')

Expand Down Expand Up @@ -137,36 +150,27 @@ export default function stateModelFactory() {
...sessionTracks,
] as AnyConfigurationModel[]
return allTracks
.filter(track => {
const assemblyNames = readConfObject(track, 'assemblyNames')
const adapter = readConfObject(track, 'adapter')
return (
assemblyNames.includes(selectedAssembly) &&
(adapter.vcfLocation ||
adapter.vcfGzLocation ||
adapter.bedLocation ||
adapter.bedGzLocation)
)
})
.map(track => {
const assemblyNames = readConfObject(track, 'assemblyNames')
const adapter = readConfObject(track, 'adapter')
const category = readConfObject(track, 'category').join(',')
return {
track,
label: [
category ? `[${category}]` : '',
getTrackName(track, session),
]
.filter(f => !!f)
.join(' '),
assemblyNames: readConfObject(track, 'assemblyNames'),
type: getType(adapter) || 'UNKNOWN',
loc: (adapter.vcfLocation ||
adapter.vcfGzLocation ||
adapter.bedLocation ||
adapter.bedGzLocation) as FileLocation,
}
const loc = getAdapterLoc(adapter)
return assemblyNames.includes(selectedAssembly) && loc
? {
track,
label: [
category ? `[${category}]` : '',
getTrackName(track, session),
]
.filter(f => !!f)
.join(' '),
assemblyNames,
type: getType(adapter) || 'UNKNOWN',
loc,
}
: undefined
})
.filter(f => !!f)
.sort((a, b) => a.label.localeCompare(b.label))
},
}))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import type { Buffer } from 'buffer'

export function parseBedBuffer(buffer: Buffer) {
export function parseBedBuffer(buffer: Uint8Array) {
const data = new TextDecoder('utf8').decode(buffer)
const lines = data
.split(/\n|\r\n|\r/)
Expand All @@ -15,10 +13,20 @@ export function parseBedBuffer(buffer: Buffer) {
),
)

const lastHeaderLine = lines.findLast(line => line.startsWith('#'))
const coreColumns = ['refName', 'start', 'end', 'name', 'score', 'strand']
const numExtraColumns = Math.max(
0,
(rest[0]?.split('\t')?.length || 0) - coreColumns.length,
)
const extraNames = lastHeaderLine?.includes('\t')
? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
: Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)

const colNames = [...coreColumns, ...extraNames]

return {
columns: ['refName', 'start', 'end', 'name', 'score', 'strand'].map(c => ({
name: c,
})),
columns: colNames.map(c => ({ name: c })),
rowSet: {
rows: rest.map((line, idx) => {
const cols = line.split('\t')
Expand All @@ -30,6 +38,9 @@ export function parseBedBuffer(buffer: Buffer) {
name: cols[3],
score: cols[4],
strand: cols[5],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
feature: {
uniqueId: `bed-${idx}`,
Expand All @@ -39,6 +50,9 @@ export function parseBedBuffer(buffer: Buffer) {
name: cols[3],
score: cols[4],
strand: cols[5],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
}
}),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import type { Buffer } from 'buffer'

export function parseBedPEBuffer(buffer: Buffer) {
export function parseBedPEBuffer(buffer: Uint8Array) {
const data = new TextDecoder('utf8').decode(buffer)
const lines = data
.split(/\n|\r\n|\r/)
Expand All @@ -14,24 +12,36 @@ export function parseBedPEBuffer(buffer: Buffer) {
line.startsWith('track')
),
)
const lastHeaderLine = lines.findLast(line => line.startsWith('#'))

const coreColumns = [
'refName',
'start',
'end',
'mateRef',
'mateStart',
'mateEnd',
'name',
'score',
'strand',
'mateStrand',
]
const numExtraColumns = Math.max(
0,
(rest[0]?.split('\t')?.length || 0) - coreColumns.length,
)

const extraNames = lastHeaderLine?.includes('\t')
? lastHeaderLine.slice(1).split('\t').slice(coreColumns.length)
: Array.from({ length: numExtraColumns }, (_v, i) => `field_${i}`)

const colNames = [...coreColumns, ...extraNames]
return {
columns: [
'refName',
'start',
'end',
'mateStart',
'mateEnd',
'name',
'score',
'strand',
'mateStrand',
].map(c => ({
name: c,
})),
columns: colNames.map(c => ({ name: c })),
rowSet: {
rows: rest.map((line, idx) => {
const cols = line.split('\t')

return {
cellData: {
refName: cols[0],
Expand All @@ -41,24 +51,30 @@ export function parseBedPEBuffer(buffer: Buffer) {
mateStart: cols[4],
mateEnd: cols[5],
name: cols[6],
score: cols[7],
score: +cols[7]! || cols[7],
strand: cols[8],
mateStrand: cols[9],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
feature: {
uniqueId: `bedpe-${idx}`,
refName: cols[0],
start: +cols[1]!,
end: +cols[2]!,
strand: cols[8],
strand: cols[8] === '-' ? -1 : 1,
mate: {
refName: cols[3],
start: +cols[4]!,
end: +cols[5]!,
strand: cols[9],
strand: cols[9] === '-' ? -1 : 1,
},
name: cols[6],
score: cols[7],
...Object.fromEntries(
extraNames.map((n, idx) => [n, cols[idx + coreColumns.length]]),
),
},
}
}),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { parseStrand } from './util'

import type { Buffer } from 'buffer'

function parseSTARFusionBreakpointString(str: string) {
const fields = str.split(':')
return {
Expand All @@ -12,7 +10,7 @@ function parseSTARFusionBreakpointString(str: string) {
}
}

export function parseSTARFusionBuffer(buffer: Buffer) {
export function parseSTARFusionBuffer(buffer: Uint8Array) {
const text = new TextDecoder('utf8').decode(buffer)
const lines = text
.split(/\n|\r\n|\r/)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import VCF from '@gmod/vcf'
import { VcfFeature } from '@jbrowse/plugin-variants'

import type { Buffer } from 'buffer'

function getRows(lines: string[], vcfParser: VCF) {
const keys = new Set<string>()
const rows = lines.map((l, id) => {
Expand Down Expand Up @@ -43,7 +41,7 @@ function getRows(lines: string[], vcfParser: VCF) {
return { keys, rows }
}

export function parseVcfBuffer(buffer: Buffer) {
export function parseVcfBuffer(buffer: Uint8Array) {
const text = new TextDecoder('utf8').decode(buffer)
const lines = text
.split(/\n|\r\n|\r/)
Expand Down

0 comments on commit b320601

Please sign in to comment.