diff --git a/package-lock.json b/package-lock.json index f9f5c97a7..13cb00629 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1825,6 +1825,15 @@ } } } + }, + "mux.js": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/mux.js/-/mux.js-7.0.3.tgz", + "integrity": "sha512-gzlzJVEGFYPtl2vvEiJneSWAWD4nfYRHD5XgxmB2gWvXraMPOYk+sxfvexmNfjQUFpmk6hwLR5C6iSFmuwCHdQ==", + "requires": { + "@babel/runtime": "^7.11.2", + "global": "^4.4.0" + } } } }, @@ -6919,9 +6928,9 @@ "dev": true }, "mux.js": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/mux.js/-/mux.js-7.0.3.tgz", - "integrity": "sha512-gzlzJVEGFYPtl2vvEiJneSWAWD4nfYRHD5XgxmB2gWvXraMPOYk+sxfvexmNfjQUFpmk6hwLR5C6iSFmuwCHdQ==", + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/mux.js/-/mux.js-7.1.0.tgz", + "integrity": "sha512-NTxawK/BBELJrYsZThEulyUMDVlLizKdxyAsMuzoCD1eFj97BVaA8D/CvKsKu6FOLYkFojN5CbM9h++ZTZtknA==", "requires": { "@babel/runtime": "^7.11.2", "global": "^4.4.0" diff --git a/package.json b/package.json index db74d5eb0..110269009 100644 --- a/package.json +++ b/package.json @@ -63,7 +63,7 @@ "global": "^4.4.0", "m3u8-parser": "^7.2.0", "mpd-parser": "^1.3.1", - "mux.js": "7.0.3", + "mux.js": "7.1.0", "video.js": "^7 || ^8" }, "peerDependencies": { diff --git a/src/media-segment-request.js b/src/media-segment-request.js index 1f34e8205..d5f405eb8 100644 --- a/src/media-segment-request.js +++ b/src/media-segment-request.js @@ -18,6 +18,8 @@ export const REQUEST_ERRORS = { ABORTED: -102 }; +const WEB_VTT_CODEC = 'wvtt'; + /** * Abort all requests * @@ -164,6 +166,43 @@ const handleKeyResponse = (segment, objects, finishProcessingFn, triggerSegmentE return finishProcessingFn(null, segment); }; +/** + * Processes an mp4 init segment depending on the codec through the transmuxer. + * + * @param {Object} segment init segment to process + * @param {string} codec the codec of the text segments + */ +const initMp4Text = (segment, codec) => { + if (codec === WEB_VTT_CODEC) { + segment.transmuxer.postMessage({ + action: 'initMp4WebVttParser', + data: segment.map.bytes + }); + } +}; + +/** + * Parses an mp4 text segment with the transmuxer and calls the doneFn from + * the segment loader. + * + * @param {Object} segment the text segment to parse + * @param {string} codec the codec of the text segment + * @param {Function} doneFn the doneFn passed from the segment loader + */ +const parseMp4TextSegment = (segment, codec, doneFn) => { + if (codec === WEB_VTT_CODEC) { + workerCallback({ + action: 'getMp4WebVttText', + data: segment.bytes, + transmuxer: segment.transmuxer, + callback: ({data, mp4VttCues}) => { + segment.bytes = data; + doneFn(null, segment, { mp4VttCues }); + } + }); + } +}; + const parseInitSegment = (segment, callback) => { const type = detectContainerForBytes(segment.map.bytes); @@ -206,6 +245,10 @@ const parseInitSegment = (segment, callback) => { segment.map.timescales[track.id] = track.timescale; } + if (track.type === 'text') { + initMp4Text(segment, track.codec); + } + }); return callback(null); @@ -468,6 +511,16 @@ const handleSegmentBytes = ({ if (isLikelyFmp4MediaSegment(bytesAsUint8Array)) { segment.isFmp4 = true; const {tracks} = segment.map; + const isMp4TextSegment = tracks.text && (!tracks.audio || !tracks.video); + + if (isMp4TextSegment) { + dataFn(segment, { + data: bytesAsUint8Array, + type: 'text' + }); + parseMp4TextSegment(segment, tracks.text.codec, doneFn); + return; + } const trackInfo = { isFmp4: true, diff --git a/src/transmuxer-worker.js b/src/transmuxer-worker.js index 049e221ba..815f1d118 100644 --- a/src/transmuxer-worker.js +++ b/src/transmuxer-worker.js @@ -16,6 +16,7 @@ import {Transmuxer} from 'mux.js/lib/mp4/transmuxer'; import CaptionParser from 'mux.js/lib/mp4/caption-parser'; +import WebVttParser from 'mux.js/lib/mp4/webvtt-parser'; import mp4probe from 'mux.js/lib/mp4/probe'; import tsInspector from 'mux.js/lib/tools/ts-inspector.js'; import { @@ -207,6 +208,44 @@ class MessageHandlers { }, [segment.buffer]); } + /** + * Initializes the WebVttParser and passes the init segment. + * + * @param {Uint8Array} data mp4 boxed WebVTT init segment data + */ + initMp4WebVttParser(data) { + if (!this.webVttParser) { + this.webVttParser = new WebVttParser(); + } + const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength); + + // Set the timescale for the parser. + // This can be called repeatedly in order to set and re-set the timescale. + this.webVttParser.init(segment); + } + + /** + * Parse an mp4 encapsulated WebVTT segment and return an array of cues. + * + * @param {Uint8Array} data a text/webvtt segment + * @return {Object[]} an array of parsed cue objects + */ + getMp4WebVttText(data) { + if (!this.webVttParser) { + // timescale might not be set yet if the parser is created before an init segment is passed. + // default timescale is 90k. + this.webVttParser = new WebVttParser(); + } + const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength); + const parsed = this.webVttParser.parseSegment(segment); + + this.self.postMessage({ + action: 'getMp4WebVttText', + mp4VttCues: parsed || [], + data: segment.buffer + }, [segment.buffer]); + } + probeMp4StartTime({timescales, data}) { const startTime = mp4probe.startTime(timescales, data); diff --git a/src/vtt-segment-loader.js b/src/vtt-segment-loader.js index f36ca98de..4d1cb8b58 100644 --- a/src/vtt-segment-loader.js +++ b/src/vtt-segment-loader.js @@ -46,11 +46,6 @@ export default class VTTSegmentLoader extends SegmentLoader { this.shouldSaveSegmentTimingInfo_ = false; } - createTransmuxer_() { - // don't need to transmux any subtitles - return null; - } - /** * Indicates which time ranges are buffered * @@ -282,6 +277,11 @@ export default class VTTSegmentLoader extends SegmentLoader { } const segmentInfo = this.pendingSegment_; + const isMp4WebVttSegmentWithCues = result.mp4VttCues && result.mp4VttCues.length; + + if (isMp4WebVttSegmentWithCues) { + segmentInfo.mp4VttCues = result.mp4VttCues; + } // although the VTT segment loader bandwidth isn't really used, it's good to // maintain functionality between segment loaders @@ -334,11 +334,13 @@ export default class VTTSegmentLoader extends SegmentLoader { return; } - this.updateTimeMapping_( - segmentInfo, - this.syncController_.timelines[segmentInfo.timeline], - this.playlist_ - ); + if (!isMp4WebVttSegmentWithCues) { + this.updateTimeMapping_( + segmentInfo, + this.syncController_.timelines[segmentInfo.timeline], + this.playlist_ + ); + } if (segmentInfo.cues.length) { segmentInfo.timingInfo = { @@ -380,14 +382,49 @@ export default class VTTSegmentLoader extends SegmentLoader { this.handleAppendsDone_(); } - handleData_() { - // noop as we shouldn't be getting video/audio data captions - // that we do not support here. + handleData_(simpleSegment, result) { + const isVttType = simpleSegment && simpleSegment.type === 'vtt'; + const isTextResult = result && result.type === 'text'; + const isFmp4VttSegment = isVttType && isTextResult; + // handle segment data for fmp4 encapsulated webvtt + + if (isFmp4VttSegment) { + super.handleData_(simpleSegment, result); + } } + updateTimingInfoEnd_() { // noop } + /** + * Utility function for converting mp4 webvtt cue objects into VTTCues. + * + * @param {Object} segmentInfo with mp4 webvtt cues for parsing into VTTCue objecs + */ + parseMp4VttCues_(segmentInfo) { + const timestampOffset = this.sourceUpdater_.videoTimestampOffset() === null ? + this.sourceUpdater_.audioTimestampOffset() : + this.sourceUpdater_.videoTimestampOffset(); + + segmentInfo.mp4VttCues.forEach((cue) => { + const start = cue.start + timestampOffset; + const end = cue.end + timestampOffset; + const vttCue = new window.VTTCue(start, end, cue.cueText); + + if (cue.settings) { + cue.settings.split(' ').forEach((cueSetting) => { + const keyValString = cueSetting.split(':'); + const key = keyValString[0]; + const value = keyValString[1]; + + vttCue[key] = isNaN(value) ? value : Number(value); + }); + } + segmentInfo.cues.push(vttCue); + }); + } + /** * Uses the WebVTT parser to parse the segment response * @@ -406,6 +443,14 @@ export default class VTTSegmentLoader extends SegmentLoader { throw new NoVttJsError(); } + segmentInfo.cues = []; + segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 }; + + if (segmentInfo.mp4VttCues) { + this.parseMp4VttCues_(segmentInfo); + return; + } + if (typeof window.TextDecoder === 'function') { decoder = new window.TextDecoder('utf8'); } else { @@ -419,9 +464,6 @@ export default class VTTSegmentLoader extends SegmentLoader { decoder ); - segmentInfo.cues = []; - segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 }; - parser.oncue = segmentInfo.cues.push.bind(segmentInfo.cues); parser.ontimestampmap = (map) => { segmentInfo.timestampmap = map; diff --git a/test/media-segment-request.test.js b/test/media-segment-request.test.js index a3b670288..252572eb3 100644 --- a/test/media-segment-request.test.js +++ b/test/media-segment-request.test.js @@ -21,7 +21,9 @@ import { mp4VideoInit, muxed as muxedSegment, webmVideo, - webmVideoInit + webmVideoInit, + mp4WebVttInit, + mp4WebVtt } from 'create-test-data!segments'; // needed for plugin registration import '../src/videojs-http-streaming'; @@ -1863,3 +1865,84 @@ QUnit.test('can get emsg ID3 frames from fmp4 audio segment', function(assert) { // Simulate receiving the init segment after the media this.standardXHRResponse(initReq, mp4AudioInit()); }); + +QUnit.test('can get webvtt text from an fmp4 segment', function(assert) { + const done = assert.async(); + // expected frame data + const expectedCues = [ + { + cueText: '2024-10-16T05:13:50Z\nen # 864527815', + end: 1729055630.9, + settings: undefined, + start: 1729055630 + }, + { + cueText: '2024-10-16T05:13:51Z\nen # 864527815', + end: 1729055631.9, + settings: undefined, + start: 1729055631 + } + ]; + const transmuxer = new videojs.EventTarget(); + + transmuxer.postMessage = (event) => { + if (event.action === 'getMp4WebVttText') { + transmuxer.trigger({ + type: 'message', + data: { + action: 'getMp4WebVttText', + data: event.data, + mp4VttCues: expectedCues + } + }); + } + + if (event.action === 'probeMp4Tracks') { + transmuxer.trigger({ + type: 'message', + data: { + action: 'probeMp4Tracks', + data: event.data, + tracks: [{type: 'text', codec: 'wvtt'}] + } + }); + } + }; + + mediaSegmentRequest({ + xhr: this.xhr, + xhrOptions: this.xhrOptions, + decryptionWorker: this.mockDecrypter, + segment: { + transmuxer, + resolvedUri: 'mp4WebVtt.mp4', + map: { + resolvedUri: 'mp4WebVttInit.mp4' + }, + isFmp4: true + }, + progressFn: this.noop, + trackInfoFn: this.noop, + timingInfoFn: this.noop, + id3Fn: this.noop, + captionsFn: this.noop, + dataFn: this.noop, + doneFn: (_e, _s, result) => { + assert.equal(result.mp4VttCues.length, 2, 'there are 2 mp4VttCues'); + assert.deepEqual(result.mp4VttCues, expectedCues, 'mp4VttCues are expected values'); + transmuxer.off(); + done(); + }, + triggerSegmentEventFn: this.noop + }); + assert.equal(this.requests.length, 2, 'there are two requests'); + + const initReq = this.requests.shift(); + const segmentReq = this.requests.shift(); + + assert.equal(initReq.uri, 'mp4WebVttInit.mp4', 'the first request is for the init segment'); + assert.equal(segmentReq.uri, 'mp4WebVtt.mp4', 'the second request is for a segment'); + + this.standardXHRResponse(initReq, mp4WebVttInit()); + this.standardXHRResponse(segmentReq, mp4WebVtt()); +}); diff --git a/test/segments/mp4WebVtt.mp4 b/test/segments/mp4WebVtt.mp4 new file mode 100644 index 000000000..0b3d1a558 Binary files /dev/null and b/test/segments/mp4WebVtt.mp4 differ diff --git a/test/segments/mp4WebVttInit.mp4 b/test/segments/mp4WebVttInit.mp4 new file mode 100644 index 000000000..b40bc2be2 Binary files /dev/null and b/test/segments/mp4WebVttInit.mp4 differ diff --git a/test/transmuxer-worker.test.js b/test/transmuxer-worker.test.js index 4c1cb5ee0..1f29881f3 100644 --- a/test/transmuxer-worker.test.js +++ b/test/transmuxer-worker.test.js @@ -3,7 +3,9 @@ import {createTransmuxer as createTransmuxer_} from '../src/segment-transmuxer.j import { mp4Captions as mp4CaptionsSegment, muxed as muxedSegment, - caption as captionSegment + caption as captionSegment, + mp4WebVttInit as webVttInit, + mp4WebVtt as webVttSegment } from 'create-test-data!segments'; // needed for plugin registration import '../src/videojs-http-streaming'; @@ -381,6 +383,100 @@ QUnit.test('returns empty array without mp4 captions', function(assert) { }); }); +QUnit.test('can parse mp4 webvtt segments', function(assert) { + const done = assert.async(); + const initSegment = webVttInit(); + const segment = webVttSegment(); + + this.transmuxer = createTransmuxer(); + this.transmuxer.onmessage = (e) => { + const message = e.data; + const expectedCues = [ + { + cueText: '2024-10-16T05:13:50Z\nen # 864527815', + end: 1729055630.9, + settings: undefined, + start: 1729055630 + }, + { + cueText: '2024-10-16T05:13:51Z\nen # 864527815', + end: 1729055631.9, + settings: undefined, + start: 1729055631 + } + ]; + + assert.equal(message.action, 'getMp4WebVttText', 'returned getMp4WebVttText event'); + assert.deepEqual(message.mp4VttCues, expectedCues, 'mp4 vtt cues are expected values'); + + done(); + }; + + this.transmuxer.postMessage({ + action: 'initMp4WebVttParser', + data: initSegment + }); + + this.transmuxer.postMessage({ + action: 'getMp4WebVttText', + data: segment + }); +}); + +QUnit.test('returns empty webVttCues array if segment is empty', function(assert) { + const done = assert.async(); + const initSegment = webVttInit(); + const segment = new Uint8Array(); + const secondSegment = webVttSegment(); + let callCount = 0; + + this.transmuxer = createTransmuxer(); + this.transmuxer.onmessage = (e) => { + const message = e.data; + + callCount++; + if (callCount === 2) { + const secondExpectedCues = [ + { + cueText: '2024-10-16T05:13:50Z\nen # 864527815', + end: 1729055630.9, + settings: undefined, + start: 1729055630 + }, + { + cueText: '2024-10-16T05:13:51Z\nen # 864527815', + end: 1729055631.9, + settings: undefined, + start: 1729055631 + } + ]; + + assert.deepEqual(message.mp4VttCues, secondExpectedCues, 'mp4 vtt cues are expected values'); + done(); + } else { + const expectedCues = []; + + assert.equal(message.action, 'getMp4WebVttText', 'returned getMp4WebVttText event'); + assert.deepEqual(message.mp4VttCues, expectedCues, 'mp4 vtt cues are expected values'); + + this.transmuxer.postMessage({ + action: 'getMp4WebVttText', + data: secondSegment + }); + } + }; + + this.transmuxer.postMessage({ + action: 'initMp4WebVttParser', + data: initSegment + }); + + this.transmuxer.postMessage({ + action: 'getMp4WebVttText', + data: segment + }); +}); + QUnit.module('Transmuxer Worker: Partial Transmuxer', { beforeEach(assert) { assert.timeout(5000); diff --git a/test/vtt-segment-loader.test.js b/test/vtt-segment-loader.test.js index 46f895794..3acefffb7 100644 --- a/test/vtt-segment-loader.test.js +++ b/test/vtt-segment-loader.test.js @@ -943,5 +943,110 @@ QUnit.module('VTTSegmentLoader', function(hooks) { assert.equal(saveSegmentTimingInfoCalls, 0, 'no calls to save timing info'); }); + + QUnit.test('segmentRequestFinished_ adds mp4VttCues', function(assert) { + const done = assert.async(); + const mockSimpleSegment = {}; + const firstMockCue = { + start: 0, + end: 1, + cueText: 'foo', + settings: 'align:start line:8' + }; + const secondMockCue = { + start: 1, + end: 2, + cueText: 'bar', + settings: 'align:end line:10' + }; + const mockResult = { + mp4VttCues: [firstMockCue, secondMockCue] + }; + + loader.track(this.track); + loader.pendingSegment_ = { + segment: { }, + mp4VttCues: undefined, + // for early return. + isSyncRequest: true, + cues: [] + }; + loader.parseMp4VttCues_ = (segmentInfo) => { + assert.ok(segmentInfo.mp4VttCues.length, 'there are mp4VttCues'); + assert.equal(segmentInfo.mp4VttCues[0], firstMockCue, 'first cue is expected value'); + assert.equal(segmentInfo.mp4VttCues[1], secondMockCue, 'second cue is expected value'); + done(); + }; + loader.saveBandwidthRelatedStats_ = () => { }; + loader.segmentRequestFinished_(null, mockSimpleSegment, mockResult); + }); + + QUnit.test('parseMp4VttCues_ adds parsed mp4VttCues to the segmentInfo object', function(assert) { + const mockSegmentInfo = { + mp4VttCues: [{ + start: 0, + end: 2, + cueText: 'foo.bar', + settings: 'align:start line:9' + }], + cues: [] + }; + + // add an offset + loader.sourceUpdater_.videoTimestampOffset = () => { + return 1; + }; + loader.parseMp4VttCues_(mockSegmentInfo); + assert.ok(mockSegmentInfo.cues.length, 'there are parsed cues'); + assert.equal(mockSegmentInfo.cues[0].text, 'foo.bar', 'text is expected string'); + assert.equal(mockSegmentInfo.cues[0].line, 9, 'line is expected number'); + assert.equal(mockSegmentInfo.cues[0].align, 'start', 'align is correct value'); + assert.equal(mockSegmentInfo.cues[0].startTime, 1, 'startTime is correct value'); + assert.equal(mockSegmentInfo.cues[0].endTime, 3, 'startTime is correct value'); + }); + + QUnit.test('parseMp4VttCues_ adds parsed mp4VttCues to the segmentInfo object using audioTimestamp', function(assert) { + const mockSegmentInfo = { + mp4VttCues: [{ + start: 1, + end: 3, + cueText: 'bar.foo', + settings: 'align:end line:10' + }], + cues: [] + }; + + loader.sourceUpdater_.videoTimestampOffset = () => { + return null; + }; + loader.sourceUpdater_.audioTimestampOffset = () => { + return 0; + }; + loader.parseMp4VttCues_(mockSegmentInfo); + assert.ok(mockSegmentInfo.cues.length, 'there are parsed cues'); + assert.equal(mockSegmentInfo.cues[0].text, 'bar.foo', 'text is expected string'); + assert.equal(mockSegmentInfo.cues[0].line, 10, 'line is expected number'); + assert.equal(mockSegmentInfo.cues[0].align, 'end', 'align is correct value'); + assert.equal(mockSegmentInfo.cues[0].startTime, 1, 'startTime is correct value'); + assert.equal(mockSegmentInfo.cues[0].endTime, 3, 'startTime is correct value'); + }); + + QUnit.test('handleData_ passes fmp4 vtt segment data to parent loader', function(assert) { + const done = assert.async(); + const mockSimpleSegment = { + type: 'vtt', + stats: 'fake.stats' + }; + const mockResult = { + type: 'text' + }; + + // mock this function to spy the super handleData call. + loader.earlyAbortWhenNeeded_ = (stats) => { + assert.equal(stats, 'fake.stats', 'expected stats value is passed.'); + done(); + }; + loader.handleData_(mockSimpleSegment, mockResult); + }); }); });