diff --git a/lib/util/mp4_box_parsers.js b/lib/util/mp4_box_parsers.js index 6abdcbc1a4..23a4e7e656 100644 --- a/lib/util/mp4_box_parsers.js +++ b/lib/util/mp4_box_parsers.js @@ -301,11 +301,45 @@ shaka.util.Mp4BoxParsers = class { */ static parseESDS(reader) { let codec = 'mp4a'; - reader.skip(11); - codec += '.' + this.toHex_(reader.readUint8()); - // this value is only a single digit - codec += '.' + - this.toHex_((reader.readUint8() >>> 2) & 0x3f).replace(/^0/, ''); + let tag; + let oti; + while (reader.hasMoreData()) { + tag = reader.readUint8(); + let byteRead = reader.readUint8(); + while (byteRead & 0x80) { + byteRead = reader.readUint8(); + } + if (tag == 0x03) { + reader.readUint16(); + const flags = reader.readUint8(); + if (flags & 0x80) { + reader.readUint16(); + } + if (flags & 0x40) { + reader.skip(reader.readUint8()); + } + if (flags & 0x20) { + reader.readUint16(); + } + } else if (tag == 0x04) { + oti = reader.readUint8(); + reader.skip(12); + } else if (tag == 0x05) { + break; + } + } + if (oti) { + codec += '.' + this.toHex_(oti); + if (tag == 0x05 && reader.hasMoreData()) { + const firstData = reader.readUint8(); + let audioObjectType = (firstData & 0xF8) >> 3; + if (audioObjectType === 31 && reader.hasMoreData()) { + audioObjectType = 32 + ((firstData & 0x7) << 3) + + ((reader.readUint8() & 0xE0) >> 5); + } + codec += '.' + audioObjectType; + } + } return {codec}; } diff --git a/test/test/assets/audio-xhe-aac.mp4 b/test/test/assets/audio-xhe-aac.mp4 new file mode 100644 index 0000000000..47d432661e Binary files /dev/null and b/test/test/assets/audio-xhe-aac.mp4 differ diff --git a/test/util/mp4_box_parsers_unit.js b/test/util/mp4_box_parsers_unit.js index fab41ee073..41a8827af8 100644 --- a/test/util/mp4_box_parsers_unit.js +++ b/test/util/mp4_box_parsers_unit.js @@ -8,18 +8,24 @@ describe('Mp4BoxParsers', () => { const videoInitSegmentUri = '/base/test/test/assets/sintel-video-init.mp4'; const videoSegmentUri = '/base/test/test/assets/sintel-video-segment.mp4'; + const audioInitSegmentXheAacUri = '/base/test/test/assets/audio-xhe-aac.mp4'; + /** @type {!ArrayBuffer} */ let videoInitSegment; /** @type {!ArrayBuffer} */ let videoSegment; + /** @type {!ArrayBuffer} */ + let audioInitSegmentXheAac; beforeAll(async () => { const responses = await Promise.all([ shaka.test.Util.fetch(videoInitSegmentUri), shaka.test.Util.fetch(videoSegmentUri), + shaka.test.Util.fetch(audioInitSegmentXheAacUri), ]); videoInitSegment = responses[0]; videoSegment = responses[1]; + audioInitSegmentXheAac = responses[2]; }); it('parses init segment', () => { @@ -157,6 +163,36 @@ describe('Mp4BoxParsers', () => { expect(baseMediaDecodeTime).toBe(expectedBaseMediaDecodeTime); }); + it('parses ESDS box for xHE-AAC segment', () => { + let channelCount; + let sampleRate; + let codec; + + const Mp4Parser = shaka.util.Mp4Parser; + new Mp4Parser() + .box('moov', Mp4Parser.children) + .box('trak', Mp4Parser.children) + .box('mdia', Mp4Parser.children) + .box('minf', Mp4Parser.children) + .box('stbl', Mp4Parser.children) + .fullBox('stsd', Mp4Parser.sampleDescription) + .box('mp4a', (box) => { + const parsedMP4ABox = shaka.util.Mp4BoxParsers.parseMP4A(box.reader); + channelCount = parsedMP4ABox.channelCount; + sampleRate = parsedMP4ABox.sampleRate; + if (box.reader.hasMoreData()) { + Mp4Parser.children(box); + } + }) + .box('esds', (box) => { + const parsedESDSBox = shaka.util.Mp4BoxParsers.parseESDS(box.reader); + codec = parsedESDSBox.codec; + }).parse(audioInitSegmentXheAac, /* partialOkay= */ false); + expect(channelCount).toBe(2); + expect(sampleRate).toBe(48000); + expect(codec).toBe('mp4a.40.42'); + }); + /** * * Explanation on the Uint8Array: