Skip to content

Commit

Permalink
feat: add mzML parser
Browse files Browse the repository at this point in the history
  • Loading branch information
lpatiny committed Nov 6, 2019
1 parent a0e57b2 commit 079d3a5
Show file tree
Hide file tree
Showing 21 changed files with 58,588 additions and 16 deletions.
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# mzData

[![NPM version][npm-image]][npm-url]
[![build status][travis-image]][travis-url]
[![Test coverage][codecov-image]][codecov-url]
[![npm download][download-image]][download-url]
[![NPM version][npm-image]][npm-url]
[![build status][travis-image]][travis-url]
[![Test coverage][codecov-image]][codecov-url]
[![npm download][download-image]][download-url]

Read and explore mzData v1.05 files.

Expand All @@ -21,11 +21,15 @@ const mzDataFile = readFileSync(__dirname + '/tiny.mzData.xml');
var response = mzData(mzDataFile);
```

## More examples

http://www.psidev.info/mzML

## [API Documentation](https://cheminfo-js.github.io/mzData/)

## License

[MIT](./LICENSE)
[MIT](./LICENSE)

[npm-image]: https://img.shields.io/npm/v/mzdata.svg?style=flat-square
[npm-url]: https://npmjs.org/package/mzdata
Expand Down
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@
"rollup": "^1.26.0"
},
"dependencies": {
"base64-arraybuffer": "^0.2.0",
"base64-js": "^1.3.1",
"fast-xml-parser": "^3.14.0"
"camelcase": "^5.3.1",
"fast-xml-parser": "^3.14.0",
"pako": "^1.0.10"
}
}
54 changes: 54 additions & 0 deletions src/__tests__/mzML.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
const { readFileSync } = require('fs');
const join = require('path').join;

const mzML = require('..');

const pathFiles = join(__dirname, '/../../testFiles/mzML/');

describe('mzML', () => {
it('read tiny.mzML', () => {
const data = readFileSync(join(pathFiles, 'tiny.mzML'));
let response = mzML(data);
expect(response.times).toStrictEqual([5.8905, 5.9905, 42.05]);
expect(response.series.ms.data).toHaveLength(3);
expect(response.series.ms.data[0][0]).toHaveLength(15);
expect(response.series.ms.data[1][0]).toHaveLength(10);
expect(response.series.ms.data[2][0]).toHaveLength(15);
});

it('read test.mzML', () => {
const data = readFileSync(`${pathFiles}test.mzML`);
let response = mzML(data);
expect(response.times).toHaveLength(1500);
expect(response.times.slice(0, 6)).toStrictEqual([
0,
0.2,
0.4,
0.6,
0.8,
1
]);
expect(response.series.ms.data).toHaveLength(1500);
expect(response.series.ms.data[0][0]).toHaveLength(336);
expect(response.series.ms.data[1][0]).toHaveLength(465);
expect(response.series.ms.data[2][0]).toHaveLength(465);
});

it('read compressed 32bits', () => {
const data = readFileSync(`${pathFiles}small_zlib.pwiz.1.1.mzML`);
let response = mzML(data);
expect(response.times).toHaveLength(48);
expect(response.times.slice(0, 6)).toStrictEqual([
0.004935,
0.007896666666666666,
0.011218333333333334,
0.022838333333333332,
0.034925,
0.04862
]);
expect(response.series.ms.data).toHaveLength(48);
expect(response.series.ms.data[0][0]).toHaveLength(19914);
expect(response.series.ms.data[1][0]).toHaveLength(19800);
expect(response.series.ms.data[2][0]).toHaveLength(485);
});
});
34 changes: 24 additions & 10 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

const FastXmlParser = require('fast-xml-parser');

const processMetadata = require('./processMetaData');
const processSpectrumList = require('./processSpectrumList');
const processMZData = require('./mzdata/process');
const processMZML = require('./mzml/process');

const ensureText = require('./util/ensureText');
const searchObjectKey = require('./util/searchObjectKey');

/**
* Reads a mzData v1.05 file
Expand All @@ -21,24 +23,36 @@ function mzData(xml) {
attributeNamePrefix: '',
parseAttributeValue: true,
attrNodeName: '_attr',
ignoreAttributes: false,
ignoreAttributes: false
});

if (!parsed.mzData) throw new Error('The parent node is not mzData');
let topLevel = searchObjectKey(parsed, /^(mzdata|mzml|mzxml)$/i);
if (!topLevel) {
throw new Error('MZ parser: can not find tag mzdata, mzml or mzxml');
}

let result = {
metadata: {},
times: [],
series: {
ms: {
data: [],
},
},
data: []
}
}
};

processMetadata(parsed.mzData, result.metadata);

processSpectrumList(parsed.mzData, result.times, result.series.ms.data);
switch (Object.keys(topLevel)[0]) {
case 'mzdata':
processMZData(topLevel.mzdata, result);
break;
case 'mzml':
processMZML(topLevel.mzml, result);
break;
case 'mzxml':
break;
default:
throw new Error('MZ parser: unknown format: ' + Object.keys(topLevel)[0]);
}

return result;
}
Expand Down
File renamed without changes.
File renamed without changes.
9 changes: 9 additions & 0 deletions src/mzdata/process.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const processMetadata = require('./processMetaData');
const processSpectrumList = require('./processSpectrumList');

function processMZData(topLevel, result) {
processMetadata(topLevel, result.metadata);
processSpectrumList(topLevel, result.times, result.series.ms.data);
}

module.exports = processMZData;
File renamed without changes.
File renamed without changes.
43 changes: 43 additions & 0 deletions src/mzml/parseBinaryDataArray.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
'use strict';

const pako = require('pako');
const toByteArray = require('base64-js').toByteArray;
const parseCvParam = require('./parseCvParam');

function decodeData(node) {
let data = node.binary;
let attr = node._attr;
let cvParam = parseCvParam(node.cvParam);
if (!data || !attr) return [];
let buffer = decoder(data, cvParam);
let kind = '';
if (cvParam.mzArray) {
kind = 'mz';
} else if (cvParam.intensityArray) {
kind = 'intensity';
} else {
throw new Error('unknown binary data type');
}

if (cvParam['64BitFloat']) {
let result = {};
result[kind] = new Float64Array(buffer.buffer);
return result;
} else if (cvParam['32BitFloat']) {
let result = {};
result[kind] = new Float32Array(buffer.buffer);
return result;
}

throw new Error(`unknown precision in decoder: ${attr.precision}`);
}

function decoder(base64Encoded, cvParams = {}) {
if (cvParams.zlibCompression) {
return pako.inflate(toByteArray(base64Encoded));
} else {
return toByteArray(base64Encoded);
}
}

module.exports = decodeData;
28 changes: 28 additions & 0 deletions src/mzml/parseCvParam.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
'use strict';

const camelCase = require('camelcase');

function parseCvParam(cvParam) {
let result = {};
if (!cvParam) return result;
let cvParams;
if (Array.isArray(cvParam)) {
cvParams = cvParam;
} else {
cvParams = [cvParam];
}
for (let param of cvParams) {
let attr = param._attr;
if (attr.name) {
result[camelCase(attr.name.toLowerCase().replace(/[^ a-z0-9]/g, ''))] = {
accession: attr.accession,
cvLabel: attr.cvLabel,
value: attr.value,
name: attr.name
};
}
}
return result;
}

module.exports = parseCvParam;
8 changes: 8 additions & 0 deletions src/mzml/process.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
const processSpectrumList = require('./processSpectrumList');

function processMZML(topLevel, result) {
//processMetadata(topLevel, result.metadata);
processSpectrumList(topLevel, result.times, result.series.ms.data);
}

module.exports = processMZML;
40 changes: 40 additions & 0 deletions src/mzml/processSpectrumList.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
'use strict';

const parseCvParam = require('./parseCvParam');
const parseBinaryDataArray = require('./parseBinaryDataArray');

function processSpectrumList(parsed, times, msData) {
if (
!parsed ||
!parsed.run ||
!parsed.run.spectrumList ||
!parsed.run.spectrumList.spectrum
)
return;
let spectrumList = parsed.run.spectrumList.spectrum;
for (let spectrum of spectrumList) {
let scanList = spectrum.scanList;
if (Array.isArray(scanList)) throw new Error('Unsupported scanList');

let scan = scanList.scan;

if (typeof scan !== 'object') continue;
if (Array.isArray(scan)) {
throw new Error('processSpectrumList: scan may not be an array');
}
let cvParam = parseCvParam(scan.cvParam);
times.push(cvParam.scanStartTime.value);

let dataArrayList = spectrum.binaryDataArrayList.binaryDataArray;
if (dataArrayList.length !== 2) {
throw new Error('Can not decodeData because length !== 2');
}

let first = parseBinaryDataArray(dataArrayList[0]);
let second = parseBinaryDataArray(dataArrayList[1]);

msData.push([first.mz || second.mz, second.intensity || first.intensity]);
}
}

module.exports = processSpectrumList;
44 changes: 44 additions & 0 deletions src/mzml/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pako from 'pako';
import { decode } from 'base64-arraybuffer';

export function decoder(base64Encoded, options = {}) {
const { compressionAlgorithm } = options;
let decoded;
switch (compressionAlgorithm) {
case 'zlib':
decoded = pako.deflate(decode(base64Encoded));
break;
case undefined:
case '':
decoded = decode(base64Encoded);
break;
default:
throw new Error(
`utils.decoder: unknown compression: ${compressionAlgorithm}`,
);
}
if (!decoded.byteLength % 8) {
throw new Error('decode to Float64Array not the right length');
}
return new Float64Array(decoded);
}

export function formatResult(spectra) {
let result = {
times: [],
series: {
ms: {
data: [],
dimensions: 2,
},
},
};
for (let index in spectra) {
let element = spectra[index];
if (element.time && element.mass && element.intensity) {
result.times.push(Number(element.time));
result.series.ms.data.push([element.mass, element.intensity]);
}
}
return result;
}
27 changes: 27 additions & 0 deletions src/util/__tests__/searchObjectKey.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const searchObjectKey = require('../searchObjectKey');

test('searchObjectKey', () => {
const object = {
key1: {
key2: true,
key3: false,
key4: {
key5: true,
key6: true
}
}
};

expect(searchObjectKey(object, /key2/)).toEqual({ key2: true });
expect(searchObjectKey(object, /key4/)).toEqual({
key4: {
key5: true,
key6: true
}
});

expect(searchObjectKey(object, /key5/)).toEqual({
key5: true
});
expect(searchObjectKey(object, /key7/)).toBe(undefined);
});
15 changes: 15 additions & 0 deletions src/util/searchObjectKey.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
function searchObjectKey(object, searchKey) {
for (let key in object) {
if (key.match(searchKey)) {
let result = {};
result[key.toLowerCase()] = object[key];
return result;
} else if (typeof object[key] === 'object' && !Array.isArray(object[key])) {
let result = searchObjectKey(object[key], searchKey);
if (result) return result;
} else {
}
}
}

module.exports = searchObjectKey;
Loading

0 comments on commit 079d3a5

Please sign in to comment.