-
Notifications
You must be signed in to change notification settings - Fork 213
/
webvtt-parser.js
126 lines (107 loc) · 3.74 KB
/
webvtt-parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
const { parseTfdt } = require("../tools/mp4-inspector");
const findBox = require("./find-box");
const { getTimescaleFromMediaHeader } = require("./probe");
const { parseSamples, getMdatTrafPairs } = require("./samples");
/**
* Module for parsing WebVTT text and styles from FMP4 segments.
* Based on the ISO/IEC 14496-30.
*/
const WebVttParser = function() {
// default timescale to 90k
let timescale = 90e3;
/**
* Parses the timescale from the init segment.
* @param {Array<Uint8Array>} segment The initialization segment to parse the timescale from.
*/
this.init = function(segment) {
// We just need the timescale from the init segment.
const mdhd = findBox(segment, ['moov', 'trak', 'mdia', 'mdhd'])[0];
if (mdhd) {
timescale = getTimescaleFromMediaHeader(mdhd);
}
};
/**
* Parses a WebVTT FMP4 segment.
* @param {Array<Uint8Array>} segment The content segment to parse the WebVTT cues from.
* @returns The WebVTT cue text, styling, and timing info as an array of cue objects.
*/
this.parseSegment = function(segment) {
const vttCues = [];
const mdatTrafPairs = getMdatTrafPairs(segment);
let baseMediaDecodeTime = 0;
mdatTrafPairs.forEach(function(pair) {
const mdatBox = pair.mdat;
const trafBox = pair.traf;
// zero or one.
const tfdtBox = findBox(trafBox, ['tfdt'])[0];
// zero or one.
const tfhdBox = findBox(trafBox, ['tfhd'])[0];
// zero or more.
const trunBoxes = findBox(trafBox, ['trun']);
if (tfdtBox) {
const tfdt = parseTfdt(tfdtBox);
baseMediaDecodeTime = tfdt.baseMediaDecodeTime;
}
if (trunBoxes.length && tfhdBox) {
const samples = parseSamples(trunBoxes, baseMediaDecodeTime, tfhdBox);
let mdatOffset = 0;
samples.forEach(function(sample) {
// decode utf8 payload
const UTF_8 = 'utf-8';
const textDecoder = new TextDecoder(UTF_8);
// extract sample data from the mdat box.
// WebVTT Sample format:
// Exactly one VTTEmptyCueBox box
// OR one or more VTTCueBox boxes.
const sampleData = mdatBox.slice(mdatOffset, mdatOffset + sample.size);
// single vtte box.
const vtteBox = findBox(sampleData, ['vtte'])[0];
// empty box
if (vtteBox) {
mdatOffset += sample.size;
return;
}
// TODO: Support 'vtta' boxes.
// VTTAdditionalTextBoxes can be interleaved between VTTCueBoxes.
const vttcBoxes = findBox(sampleData, ['vttc']);
vttcBoxes.forEach(function(vttcBox) {
// mandatory payload box.
const paylBox = findBox(vttcBox, ['payl'])[0];
// optional settings box
const sttgBox = findBox(vttcBox, ['sttg'])[0];
const start = sample.pts / timescale;
const end = (sample.pts + sample.duration) / timescale;
let cueText, settings;
// contains cue text.
if (paylBox) {
try {
cueText = textDecoder.decode(paylBox);
} catch(e) {
console.error(e);
}
}
// settings box contains styling.
if (sttgBox) {
try {
settings = textDecoder.decode(sttgBox);
} catch(e) {
console.error(e);
}
}
if (sample.duration && cueText) {
vttCues.push({
cueText,
start,
end,
settings
});
}
});
mdatOffset += sample.size;
});
}
});
return vttCues;
};
};
module.exports = WebVttParser;