-
Notifications
You must be signed in to change notification settings - Fork 213
/
caption-parser.js
453 lines (389 loc) · 13.1 KB
/
caption-parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
/**
* mux.js
*
* Copyright (c) Brightcove
* Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
*
* Reads in-band CEA-708 captions out of FMP4 segments.
* @see https://en.wikipedia.org/wiki/CEA-708
*/
'use strict';
var discardEmulationPreventionBytes = require('../tools/caption-packet-parser').discardEmulationPreventionBytes;
var CaptionStream = require('../m2ts/caption-stream').CaptionStream;
var probe = require('./probe');
var inspect = require('../tools/mp4-inspector');
/**
* Maps an offset in the mdat to a sample based on the the size of the samples.
* Assumes that `parseSamples` has been called first.
*
* @param {Number} offset - The offset into the mdat
* @param {Object[]} samples - An array of samples, parsed using `parseSamples`
* @return {?Object} The matching sample, or null if no match was found.
*
* @see ISO-BMFF-12/2015, Section 8.8.8
**/
var mapToSample = function(offset, samples) {
var approximateOffset = offset;
for (var i = 0; i < samples.length; i++) {
var sample = samples[i];
if (approximateOffset < sample.size) {
return sample;
}
approximateOffset -= sample.size;
}
return null;
};
/**
* Finds SEI nal units contained in a Media Data Box.
* Assumes that `parseSamples` has been called first.
*
* @param {Uint8Array} avcStream - The bytes of the mdat
* @param {Object[]} samples - The samples parsed out by `parseSamples`
* @param {Number} trackId - The trackId of this video track
* @return {Object[]} seiNals - the parsed SEI NALUs found.
* The contents of the seiNal should match what is expected by
* CaptionStream.push (nalUnitType, size, data, escapedRBSP, pts, dts)
*
* @see ISO-BMFF-12/2015, Section 8.1.1
* @see Rec. ITU-T H.264, 7.3.2.3.1
**/
var findSeiNals = function(avcStream, samples, trackId) {
var
avcView = new DataView(avcStream.buffer, avcStream.byteOffset, avcStream.byteLength),
result = [],
seiNal,
i,
length,
lastMatchedSample;
for (i = 0; i + 4 < avcStream.length; i += length) {
length = avcView.getUint32(i);
i += 4;
// Bail if this doesn't appear to be an H264 stream
if (length <= 0) {
continue;
}
switch (avcStream[i] & 0x1F) {
case 0x06:
var data = avcStream.subarray(i + 1, i + 1 + length);
var matchingSample = mapToSample(i, samples);
seiNal = {
nalUnitType: 'sei_rbsp',
size: length,
data: data,
escapedRBSP: discardEmulationPreventionBytes(data),
trackId: trackId
};
if (matchingSample) {
seiNal.pts = matchingSample.pts;
seiNal.dts = matchingSample.dts;
lastMatchedSample = matchingSample;
} else if (lastMatchedSample) {
// If a matching sample cannot be found, use the last
// sample's values as they should be as close as possible
seiNal.pts = lastMatchedSample.pts;
seiNal.dts = lastMatchedSample.dts;
} else {
// eslint-disable-next-line no-console
console.log("We've encountered a nal unit without data. See mux.js#233.");
break;
}
result.push(seiNal);
break;
default:
break;
}
}
return result;
};
/**
* Parses sample information out of Track Run Boxes and calculates
* the absolute presentation and decode timestamps of each sample.
*
* @param {Array<Uint8Array>} truns - The Trun Run boxes to be parsed
* @param {Number} baseMediaDecodeTime - base media decode time from tfdt
@see ISO-BMFF-12/2015, Section 8.8.12
* @param {Object} tfhd - The parsed Track Fragment Header
* @see inspect.parseTfhd
* @return {Object[]} the parsed samples
*
* @see ISO-BMFF-12/2015, Section 8.8.8
**/
var parseSamples = function(truns, baseMediaDecodeTime, tfhd) {
var currentDts = baseMediaDecodeTime;
var defaultSampleDuration = tfhd.defaultSampleDuration || 0;
var defaultSampleSize = tfhd.defaultSampleSize || 0;
var trackId = tfhd.trackId;
var allSamples = [];
truns.forEach(function(trun) {
// Note: We currently do not parse the sample table as well
// as the trun. It's possible some sources will require this.
// moov > trak > mdia > minf > stbl
var trackRun = inspect.parseTrun(trun);
var samples = trackRun.samples;
samples.forEach(function(sample) {
if (sample.duration === undefined) {
sample.duration = defaultSampleDuration;
}
if (sample.size === undefined) {
sample.size = defaultSampleSize;
}
sample.trackId = trackId;
sample.dts = currentDts;
if (sample.compositionTimeOffset === undefined) {
sample.compositionTimeOffset = 0;
}
sample.pts = currentDts + sample.compositionTimeOffset;
currentDts += sample.duration;
});
allSamples = allSamples.concat(samples);
});
return allSamples;
};
/**
* Parses out caption nals from an FMP4 segment's video tracks.
*
* @param {Uint8Array} segment - The bytes of a single segment
* @param {Number} videoTrackId - The trackId of a video track in the segment
* @return {Object.<Number, Object[]>} A mapping of video trackId to
* a list of seiNals found in that track
**/
var parseCaptionNals = function(segment, videoTrackId) {
// To get the samples
var trafs = probe.findBox(segment, ['moof', 'traf']);
// To get SEI NAL units
var mdats = probe.findBox(segment, ['mdat']);
var captionNals = {};
var mdatTrafPairs = [];
// Pair up each traf with a mdat as moofs and mdats are in pairs
mdats.forEach(function(mdat, index) {
var matchingTraf = trafs[index];
mdatTrafPairs.push({
mdat: mdat,
traf: matchingTraf
});
});
mdatTrafPairs.forEach(function(pair) {
var mdat = pair.mdat;
var traf = pair.traf;
var tfhd = probe.findBox(traf, ['tfhd']);
// Exactly 1 tfhd per traf
var headerInfo = inspect.parseTfhd(tfhd[0]);
var trackId = headerInfo.trackId;
var tfdt = probe.findBox(traf, ['tfdt']);
// Either 0 or 1 tfdt per traf
var baseMediaDecodeTime = (tfdt.length > 0) ? inspect.parseTfdt(tfdt[0]).baseMediaDecodeTime : 0;
var truns = probe.findBox(traf, ['trun']);
var samples;
var seiNals;
// Only parse video data for the chosen video track
if (videoTrackId === trackId && truns.length > 0) {
samples = parseSamples(truns, baseMediaDecodeTime, headerInfo);
seiNals = findSeiNals(mdat, samples, trackId);
if (!captionNals[trackId]) {
captionNals[trackId] = [];
}
captionNals[trackId] = captionNals[trackId].concat(seiNals);
}
});
return captionNals;
};
/**
* Parses out inband captions from an MP4 container and returns
* caption objects that can be used by WebVTT and the TextTrack API.
* @see https://developer.mozilla.org/en-US/docs/Web/API/VTTCue
* @see https://developer.mozilla.org/en-US/docs/Web/API/TextTrack
* Assumes that `probe.getVideoTrackIds` and `probe.timescale` have been called first
*
* @param {Uint8Array} segment - The fmp4 segment containing embedded captions
* @param {Number} trackId - The id of the video track to parse
* @param {Number} timescale - The timescale for the video track from the init segment
*
* @return {?Object[]} parsedCaptions - A list of captions or null if no video tracks
* @return {Number} parsedCaptions[].startTime - The time to show the caption in seconds
* @return {Number} parsedCaptions[].endTime - The time to stop showing the caption in seconds
* @return {String} parsedCaptions[].text - The visible content of the caption
**/
var parseEmbeddedCaptions = function(segment, trackId, timescale) {
var seiNals;
// the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
if (trackId === null) {
return null;
}
seiNals = parseCaptionNals(segment, trackId);
return {
seiNals: seiNals[trackId],
timescale: timescale
};
};
/**
* Converts SEI NALUs into captions that can be used by video.js
**/
var CaptionParser = function() {
var isInitialized = false;
var captionStream;
// Stores segments seen before trackId and timescale are set
var segmentCache;
// Stores video track ID of the track being parsed
var trackId;
// Stores the timescale of the track being parsed
var timescale;
// Stores captions parsed so far
var parsedCaptions;
// Stores whether we are receiving partial data or not
var parsingPartial;
/**
* A method to indicate whether a CaptionParser has been initalized
* @returns {Boolean}
**/
this.isInitialized = function() {
return isInitialized;
};
/**
* Initializes the underlying CaptionStream, SEI NAL parsing
* and management, and caption collection
**/
this.init = function(options) {
captionStream = new CaptionStream();
isInitialized = true;
parsingPartial = options ? options.isPartial : false;
// Collect dispatched captions
captionStream.on('data', function(event) {
// Convert to seconds in the source's timescale
event.startTime = event.startPts / timescale;
event.endTime = event.endPts / timescale;
parsedCaptions.captions.push(event);
parsedCaptions.captionStreams[event.stream] = true;
});
};
/**
* Determines if a new video track will be selected
* or if the timescale changed
* @return {Boolean}
**/
this.isNewInit = function(videoTrackIds, timescales) {
if ((videoTrackIds && videoTrackIds.length === 0) ||
(timescales && typeof timescales === 'object' &&
Object.keys(timescales).length === 0)) {
return false;
}
return trackId !== videoTrackIds[0] ||
timescale !== timescales[trackId];
};
/**
* Parses out SEI captions and interacts with underlying
* CaptionStream to return dispatched captions
*
* @param {Uint8Array} segment - The fmp4 segment containing embedded captions
* @param {Number[]} videoTrackIds - A list of video tracks found in the init segment
* @param {Object.<Number, Number>} timescales - The timescales found in the init segment
* @see parseEmbeddedCaptions
* @see m2ts/caption-stream.js
**/
this.parse = function(segment, videoTrackIds, timescales) {
var parsedData;
if (!this.isInitialized()) {
return null;
// This is not likely to be a video segment
} else if (!videoTrackIds || !timescales) {
return null;
} else if (this.isNewInit(videoTrackIds, timescales)) {
// Use the first video track only as there is no
// mechanism to switch to other video tracks
trackId = videoTrackIds[0];
timescale = timescales[trackId];
// If an init segment has not been seen yet, hold onto segment
// data until we have one.
// the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
} else if (trackId === null || !timescale) {
segmentCache.push(segment);
return null;
}
// Now that a timescale and trackId is set, parse cached segments
while (segmentCache.length > 0) {
var cachedSegment = segmentCache.shift();
this.parse(cachedSegment, videoTrackIds, timescales);
}
parsedData = parseEmbeddedCaptions(segment, trackId, timescale);
if (parsedData === null || !parsedData.seiNals) {
return null;
}
this.pushNals(parsedData.seiNals);
// Force the parsed captions to be dispatched
this.flushStream();
return parsedCaptions;
};
/**
* Pushes SEI NALUs onto CaptionStream
* @param {Object[]} nals - A list of SEI nals parsed using `parseCaptionNals`
* Assumes that `parseCaptionNals` has been called first
* @see m2ts/caption-stream.js
**/
this.pushNals = function(nals) {
if (!this.isInitialized() || !nals || nals.length === 0) {
return null;
}
nals.forEach(function(nal) {
captionStream.push(nal);
});
};
/**
* Flushes underlying CaptionStream to dispatch processed, displayable captions
* @see m2ts/caption-stream.js
**/
this.flushStream = function() {
if (!this.isInitialized()) {
return null;
}
if (!parsingPartial) {
captionStream.flush();
} else {
captionStream.partialFlush();
}
};
/**
* Reset caption buckets for new data
**/
this.clearParsedCaptions = function() {
parsedCaptions.captions = [];
parsedCaptions.captionStreams = {};
};
/**
* Resets underlying CaptionStream
* @see m2ts/caption-stream.js
**/
this.resetCaptionStream = function() {
if (!this.isInitialized()) {
return null;
}
captionStream.reset();
};
/**
* Convenience method to clear all captions flushed from the
* CaptionStream and still being parsed
* @see m2ts/caption-stream.js
**/
this.clearAllCaptions = function() {
this.clearParsedCaptions();
this.resetCaptionStream();
};
/**
* Reset caption parser
**/
this.reset = function() {
segmentCache = [];
trackId = null;
timescale = null;
if (!parsedCaptions) {
parsedCaptions = {
captions: [],
// CC1, CC2, CC3, CC4
captionStreams: {}
};
} else {
this.clearParsedCaptions();
}
this.resetCaptionStream();
};
this.reset();
};
module.exports = CaptionParser;