Skip to content

Commit

Permalink
fix: mzxml (#10)
Browse files Browse the repository at this point in the history
* fix: mzxml

* fix: requests
  • Loading branch information
josoriom authored May 10, 2020
1 parent 1a2102f commit 6531057
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 91 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
"dependencies": {
"base64-arraybuffer": "^0.2.0",
"base64-js": "^1.3.1",
"buffer": "^5.6.0",

This comment has been minimized.

Copy link
@targos

targos May 10, 2020

Member

please remove this dependency

"camelcase": "^5.3.1",
"fast-xml-parser": "^3.16.0",
"pako": "^1.0.11"
Expand Down
129 changes: 83 additions & 46 deletions src/__tests__/mzXML.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,59 +6,96 @@ import { parseMZ } from '..';
const pathFiles = join(__dirname, '/../../testFiles/mzXML/');

describe('mzML', () => {
it('tiny2.0.mzXML', () => {
it('read 32 bits mzXML test file', () => {
const data = readFileSync(join(pathFiles, 'tiny2.0.mzXML'));
let response = parseMZ(data);
expect(response.times).toStrictEqual([1209, 2577]);
expect(response.series.ms.data).toHaveLength(2);
expect(response.series.ms.data[0][0]).toHaveLength(5252);
expect(response.series.ms.data[1][0]).toHaveLength(172);
const response = parseMZ(data);
const scans = response.series.ms.data;
const info = response.series.ms.info;
const highMz = info[0].highMz;
const lowMz = info[0].lowMz;
const peaksCount0 = info[0].peaksCount;
const peaksCount1 = info[1].peaksCount;
const totIonCurrent0 = scans[0][1].reduce((a, b) => a + b);
const totIonCurrent1 = scans[1][1].reduce((a, b) => a + b);
const firstMz = scans[0][0];
expect(firstMz[0]).toBeWithinRange(lowMz, highMz);
expect(totIonCurrent0).toBeWithinRange(
info[0].totIonCurrent - 50,
info[0].totIonCurrent + 50,
);
expect(totIonCurrent1).toBeWithinRange(
info[1].totIonCurrent - 50,
info[1].totIonCurrent + 50,
);
expect(response.times).toStrictEqual([353.43, 356.68]);
expect(scans).toHaveLength(2);
expect(scans[0][0]).toHaveLength(peaksCount0);
expect(scans[1][0]).toHaveLength(peaksCount1);
});

it('read compressed tiny3.0.mzXML 32bits', () => {
// eslint-disable-next-line jest/no-disabled-tests
it.skip('read 32 bits mzXML compressed test file', () => {
const data = readFileSync(join(pathFiles, 'tiny3.0.mzXML'));
let response = parseMZ(data);
expect(response.times).toStrictEqual([1209, 2577]);
const response = parseMZ(data);
const scans = response.series.ms.data;
const highMz = response.series.ms.info[0].highMz;
const lowMz = response.series.ms.info[0].lowMz;
const peaksCount0 = response.series.ms.info[0].peaksCount;
const peaksCount1 = response.series.ms.info[1].peaksCount;
const intensity = scans[0][1].reduce((a, b) => a + b);
const mz = scans[0][0].reduce((a, b) => a + b) / scans[0][0].length;
expect(mz).toBeWithinRange(lowMz, highMz);
expect(mz).toStrictEqual(0);
expect(response.series.ms.data[0][0]).toHaveLength(peaksCount0);
expect(response.series.ms.data[1][0]).toHaveLength(peaksCount1);
expect(intensity).toStrictEqual(-32);
expect(response.times).toStrictEqual([353.43, 356.68]);
expect(response.series.ms.data).toHaveLength(2);
expect(response.series.ms.data[0][0]).toHaveLength(391);
expect(response.series.ms.data[1][0]).toHaveLength(27);
});

it('read metadata scan 2 of compressed tiny3.0.mzXML 32bits', () => {
const data = readFileSync(join(pathFiles, 'tiny3.0.mzXML'));
let response = parseMZ(data);
let metadata = {
num: 2,
msLevel: 2,
peaksCount: 43,
polarity: '+',
scanType: 'Full',
retentionTime: 'PT356.68S',
collisionEnergy: 35,
lowMz: 223.089,
highMz: 531.078,
basePeakMz: 428.905,
basePeakIntensity: 301045,
totIonCurrent: 764637,
};
expect(response.series.ms.info[1]).toStrictEqual(metadata);
expect(response.series.ms.data[0][0]).toHaveLength(
response.series.ms.info[0].peaksCount,
);
expect(response.series.ms.data[1][0]).toHaveLength(
response.series.ms.info[1].peaksCount,
);
expect(response.series.ms.data[0][0][0]).toStrictEqual(-2);
expect(response.series.ms.data[0][1][0]).toStrictEqual(0);
});

it('read bigTest.mzML', () => {
it('read 64 bits mzXML test file', () => {
const data = readFileSync(join(pathFiles, 'bigTest.mzXML'));
let response = parseMZ(data);
expect(response.times).toHaveLength(12000);
expect(response.times.slice(0, 6)).toStrictEqual([
965,
28941,
29370,
29799,
30228,
30657,
]);
expect(response.series.ms.data).toHaveLength(12000);
expect(response.series.ms.data[0][0]).toHaveLength(1296);
expect(response.series.ms.data[1][0]).toHaveLength(2);
expect(response.series.ms.data[2][0]).toHaveLength(2);
const response = parseMZ(data);
const scans = response.series.ms.data;
const info = response.series.ms.info;
const highMz = info[0].highMz;
const lowMz = info[0].lowMz;
const peaksCount0 = info[0].peaksCount;
const peaksCount1 = info[1].peaksCount;
const totIonCurrent0 = scans[0][1].reduce((a, b) => a + b);
const firstMz = scans[0][0];
expect(firstMz[0]).toStrictEqual(100);
expect(firstMz[0]).toBeWithinRange(lowMz, highMz);
expect(totIonCurrent0).toStrictEqual(295779);
expect(scans).toHaveLength(12000);
expect(scans[0][0]).toHaveLength(peaksCount0);
expect(scans[1][0]).toHaveLength(peaksCount1);
});
});

expect.extend({
toBeWithinRange(received, floor, ceiling) {
const pass = received >= floor && received <= ceiling;
if (pass) {
return {
message: () =>
`expected ${received} not to be within range ${floor} - ${ceiling}`,
pass: true,
};
} else {
return {
message: () =>
`expected ${received} to be within range ${floor} - ${ceiling}`,
pass: false,
};
}
},
});
43 changes: 21 additions & 22 deletions src/mzxml/parseBinaryDataArray.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,33 @@ import { toByteArray } from 'base64-js';
import { parseCvParam } from './parseCvParam';

export function parseBinaryDataArray(node) {
let data = node.peaks;
let attr = node._attr;
let cvParam = parseCvParam(data._attr);
if (!data || !attr) return [];
let bytes = decoder(data._data, cvParam);

let kind = '';
if (cvParam.contentType) {
kind = cvParam.contentType;
} else if (cvParam.pairOrder) {
kind = cvParam.pairOrder;
} else {
throw new Error('unknown binary data type');
const data = node.peaks;
const attr = node._attr;
const string = data._data;
const base64 = /^([A-Za-z0-9/+]{4})*(([A-Za-z0-9/+]{3}=)|([A-Za-z0-9/+]{2}==))?$/;
if (base64.test(string) === false) {
return {
data: [],
};
}

// console.log(buffer)
const cvParam = parseCvParam(data._attr);
if (!data || !attr) return [];
const bytes = decoder(data._data, cvParam);
const kind = cvParam.contentType ? cvParam.contentType : cvParam.pairOrder;
const buffer = new DataView(bytes.buffer);
let result = {};
if (cvParam.precision === 64) {
let result = {};
let buffer = Buffer.from(bytes);
result.data = new Float64Array(buffer.length / 8);
for (let i = 0; i < buffer.length; i += 8) {
result.data[i / 8] = buffer.readDoubleBE(i);
result.data = new Float64Array(bytes.byteLength / 8);
for (let i = 0; i < bytes.byteLength; i += 8) {
result.data[i / 8] = buffer.getFloat64(i);
}
result.kind = kind;
return result;
} else if (cvParam.precision === 32) {
let result = {};
result.data = new Float32Array(bytes);
result.data = new Float64Array(bytes.byteLength / 4);
for (let i = 0; i < bytes.byteLength; i += 4) {
result.data[i / 4] = buffer.getFloat32(i);
}
result.kind = kind;
return result;
}
Expand Down
1 change: 1 addition & 0 deletions src/mzxml/parseCvParam.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export function parseCvParam(cvParam) {
byteOrder: param.byteOrder,
contentType: param.contentType,
pairOrder: param.pairOrder,
compressionType: param.compressionType,
};
}
}
Expand Down
16 changes: 1 addition & 15 deletions src/mzxml/process.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,5 @@
import { processSpectrumList } from './processSpectrumList';

export function processMZXML(topLevel, result) {
processSpectrumList(topLevel, result.series.ms);
let offset = topLevel.index.offset;
for (let i = 0; i < offset.length; i++) {
let index = offset[i]._attr.id;
let data = offset[i]._data;
result.times[index - 1] = data;
}
result.metadata = {
msManufacturer: topLevel.msRun.msInstrument.msManufacturer._attr,
msModel: topLevel.msRun.msInstrument.msModel._attr,
msIonisation: topLevel.msRun.msInstrument.msIonisation._attr,
msMassAnalyzer: topLevel.msRun.msInstrument.msMassAnalyzer._attr,
msDetector: topLevel.msRun.msInstrument.msDetector._attr,
software: topLevel.msRun.msInstrument.software._attr,
};
processSpectrumList(topLevel, result.times, result.series.ms);
}
18 changes: 10 additions & 8 deletions src/mzxml/processSpectrumList.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
import { parseBinaryDataArray } from './parseBinaryDataArray';

export function processSpectrumList(parsed, msData) {
export function processSpectrumList(parsed, times, msData) {
if (!parsed.msRun.scan) return;
let scanList = parsed.msRun.scan;
if (Array.isArray(scanList) === false) scanList = [scanList];
if (scanList[0]._attr) {
msData.info = [];
}
if (scanList[0]._attr) msData.info = [];
for (let scan of scanList) {
if (typeof scan !== 'object') continue;
if (Array.isArray(scan)) {
throw new Error('processSpectrumList: scan may not be an array');
}
let dataArray = parseBinaryDataArray(scan);
let first = new Float64Array(dataArray.data.length / 2);
let second = new Float64Array(dataArray.data.length / 2);
for (let i = 0; i < dataArray.data.length / 2; i++) {
const dataArray = parseBinaryDataArray(scan);
let length = dataArray.data.length / 2;
let first = new Float64Array(length);
let second = new Float64Array(length);
for (let i = 0; i < length; i++) {
first[i] = dataArray.data[i * 2];
second[i] = dataArray.data[i * 2 + 1];
}
msData.data.push([first, second]);
msData.info.push(scan._attr);
times.push(
parseFloat(scan._attr.retentionTime.replace(/(P*)(T*)(S*)/gi, '')),
);
}
}

0 comments on commit 6531057

Please sign in to comment.