diff --git a/lib/text/web_vtt_generator.js b/lib/text/web_vtt_generator.js index aeb222483c..cfa9fbc554 100644 --- a/lib/text/web_vtt_generator.js +++ b/lib/text/web_vtt_generator.js @@ -81,16 +81,28 @@ shaka.text.WebVttGenerator = class { // We don't want to modify the array or objects passed in, since we don't // technically own them. So we build a new array and replace certain items // in it if they need to be flattened. - const flattenedCues = cues.map((cue) => { - if (cue.nestedCues.length) { - const flatCue = cue.clone(); - flatCue.nestedCues = []; - flatCue.payload = flattenPayload(cue); - return flatCue; - } else { - return cue; + // We also don't want to flatten the text payloads starting at a container + // element; otherwise, for containers encapsulating multiple caption lines, + // the lines would merge into a single cue. This is undesirable when a + // subset of the captions are outside of the append time window. To fix + // this, we only call flattenPayload() starting at elements marked as + // isContainer = false. + const getCuesToFlatten = (cues, result) => { + for (const cue of cues) { + if (cue.isContainer) { + // Recurse to find the actual text payload cues. + getCuesToFlatten(cue.nestedCues, result); + } else { + // Flatten the payload. + const flatCue = cue.clone(); + flatCue.nestedCues = []; + flatCue.payload = flattenPayload(cue); + result.push(flatCue); + } } - }); + return result; + }; + const flattenedCues = getCuesToFlatten(cues, []); let webvttString = 'WEBVTT\n\n'; for (const cue of flattenedCues) {