From b932e13bbbde8dc3327855dd868b34a731a32c41 Mon Sep 17 00:00:00 2001 From: absidue <48293849+absidue@users.noreply.github.com> Date: Fri, 4 Aug 2023 19:45:39 +0200 Subject: [PATCH] fix(toDash): Format grouping into AdaptationSets --- src/utils/FormatUtils.ts | 193 +++++++++++++++++++-------------------- 1 file changed, 95 insertions(+), 98 deletions(-) diff --git a/src/utils/FormatUtils.ts b/src/utils/FormatUtils.ts index d4fedd469..2ebba9093 100644 --- a/src/utils/FormatUtils.ts +++ b/src/utils/FormatUtils.ts @@ -308,50 +308,91 @@ class FormatUtils { actions?: Actions, storyboards?: PlayerStoryboardSpec ) { - const mime_types: string[] = []; - const mime_objects: Format[][] = [ [] ]; + const group_ids: string[] = []; + const group_objects: Format[][] = [ [] ]; - formats.forEach((video_format) => { - if ((!video_format.index_range || !video_format.init_range) && !video_format.is_type_otf) { + let has_multiple_audio_tracks = false; + + formats.forEach((format) => { + if ((!format.index_range || !format.init_range) && !format.is_type_otf) { return; } - const mime_type = video_format.mime_type; - const mime_type_index = mime_types.indexOf(mime_type); - if (mime_type_index > -1) { - mime_objects[mime_type_index].push(video_format); + + const mime_type = format.mime_type.split(';')[0]; + + // Codec without any profile or level information + const just_codec = getStringBetweenStrings(format.mime_type, 'codecs="', '"')?.split('.')[0]; + + let color_info = ''; + // HDR videos have both SDR and HDR vp9 formats, so we want to stick them in different groups + if (format.color_info) { + const { primaries, transfer_characteristics, matrix_coefficients } = format.color_info; + color_info = `${primaries}-${transfer_characteristics}-${matrix_coefficients}`; + } + + let audio_track_id = ''; + if (format.audio_track) { + audio_track_id = format.audio_track.id; + + if (!has_multiple_audio_tracks) { + has_multiple_audio_tracks = true; + } + } + + const group_id = `${mime_type}-${just_codec}-${color_info}-${audio_track_id}`; + + const group_index = group_ids.indexOf(group_id); + if (group_index > -1) { + group_objects[group_index].push(format); } else { - mime_types.push(mime_type); - mime_objects.push([]); - mime_objects[mime_types.length - 1].push(video_format); + group_ids.push(group_id); + group_objects.push([]); + group_objects[group_ids.length - 1].push(format); } }); + let set_id = 0; - for (let i = 0; i < mime_types.length; i++) { - // When the video has multiple different audio tracks we want to include the extra information in the manifest - if (mime_objects[i][0].has_audio && mime_objects[i][0].audio_track) { - const track_ids: string[] = []; - const track_objects: Format[][] = [ [] ]; - - mime_objects[i].forEach((format) => { - const id_index = track_ids.indexOf(format.audio_track?.id as string); - if (id_index > -1) { - track_objects[id_index].push(format); - } else { - track_ids.push(format.audio_track?.id as string); - track_objects.push([]); - track_objects[track_ids.length - 1].push(format); - } - }); - // The lang attribute has to go on the AdaptationSet element and the Role element goes inside the AdaptationSet too, so we need a separate adaptation set for each language and role - for (let j = 0; j < track_ids.length; j++) { - const first_format = track_objects[j][0]; + for (let i = 0; i < group_ids.length; i++) { + const group_formats = group_objects[i]; + const first_format = group_formats[0]; + + if (has_multiple_audio_tracks && first_format.has_audio && !first_format.audio_track) { + // Some videos with multiple audio tracks, have a broken one, that doesn't have any audio track information + // It seems to be the same as default audio track but broken + // We want to ignore it, as it messes up audio track selection in players and YouTube ignores it too + // At the time of writing, this video has a broken audio track: https://youtu.be/UJeSWbR6W04 + + continue; + } + + const set = this.#el(document, 'AdaptationSet', { + id: `${set_id}`, + mimeType: first_format.mime_type.split(';')[0], + startWithSAP: '1', + subsegmentAlignment: 'true' + }); - const children = []; + const hoisted: string[] = []; + + this.#hoistCodecsIfPossible(set, group_formats, hoisted); + + + if (first_format.has_audio) { + this.#hoistNumberAttributeIfPossible(set, group_formats, 'audioSamplingRate', 'audio_sample_rate', hoisted); + + this.#hoistAudioChannelsIfPossible(document, set, group_formats, hoisted); + + const language = first_format.language; + if (language) { + set.setAttribute('lang', language); + } + const audio_track = first_format.audio_track; + if (audio_track) { let role; - if (first_format.audio_track?.audio_is_default) { + if (audio_track.audio_is_default) { role = 'main'; } else if (first_format.is_dubbed) { role = 'dub'; @@ -361,51 +402,30 @@ class FormatUtils { role = 'alternate'; } - children.push( - this.#el(document, 'Role', { - schemeIdUri: 'urn:mpeg:dash:role:2011', - value: role - }), + set.appendChild(this.#el(document, 'Role', { + schemeIdUri: 'urn:mpeg:dash:role:2011', + value: role + })); + + set.appendChild( this.#el(document, 'Label', { id: set_id.toString() }, [ - document.createTextNode(first_format.audio_track?.display_name as string) + document.createTextNode(audio_track.display_name as string) ]) ); + } - const set = this.#el(document, 'AdaptationSet', { - id: `${set_id++}`, - mimeType: mime_types[i].split(';')[0], - startWithSAP: '1', - subsegmentAlignment: 'true', - lang: first_format.language as string, - // Non-standard attribute used by shaka instead of the standard Label element - label: first_format.audio_track?.display_name as string - }, children); - - const hoisted: string[] = []; - - this.#hoistCodecsIfPossible(set, track_objects[j], hoisted); - this.#hoistNumberAttributeIfPossible(set, track_objects[j], 'audioSamplingRate', 'audio_sample_rate', hoisted); - - this.#hoistAudioChannelsIfPossible(document, set, track_objects[j], hoisted); - - period.appendChild(set); - - for (const format of track_objects[j]) { - await this.#generateRepresentationAudio(document, set, format, url_transformer, hoisted, cpn, player, actions); - } + for (const format of group_formats) { + await this.#generateRepresentationAudio(document, set, format, url_transformer, hoisted, cpn, player, actions); } } else { - const set = this.#el(document, 'AdaptationSet', { - id: `${set_id++}`, - mimeType: mime_types[i].split(';')[0], - startWithSAP: '1', - subsegmentAlignment: 'true' - }); + set.setAttribute('maxPlayoutRate', '1'); + + this.#hoistNumberAttributeIfPossible(set, group_formats, 'frameRate', 'fps', hoisted); - const color_info = mime_objects[i][0].color_info; - if (typeof color_info !== 'undefined') { + const color_info = first_format.color_info; + if (color_info) { // Section 5.5 Video source metadata signalling https://dashif.org/docs/IOP-Guidelines/DASH-IF-IOP-Part7-v5.0.0.pdf // Section 8 Video code points https://www.itu.int/rec/T-REC-H.273-202107-I/en // The player.js file was also helpful @@ -470,10 +490,9 @@ class FormatUtils { matrix_coefficients = '14'; break; default: { - const format = mime_objects[i][0]; - const url = new URL(format.url as string); + const url = new URL(first_format.url as string); - const anonymisedFormat = JSON.parse(JSON.stringify(format)); + const anonymisedFormat = JSON.parse(JSON.stringify(first_format)); anonymisedFormat.url = 'REDACTED'; anonymisedFormat.signature_cipher = 'REDACTED'; anonymisedFormat.cipher = 'REDACTED'; @@ -493,35 +512,13 @@ class FormatUtils { } } - const hoisted: string[] = []; - - this.#hoistCodecsIfPossible(set, mime_objects[i], hoisted); - - if (mime_objects[i][0].has_audio) { - this.#hoistNumberAttributeIfPossible(set, mime_objects[i], 'audioSamplingRate', 'audio_sample_rate', hoisted); - - this.#hoistAudioChannelsIfPossible(document, set, mime_objects[i], hoisted); - - const language = mime_objects[i][0].language; - if (language) { - set.setAttribute('lang', language); - } - } else { - set.setAttribute('maxPlayoutRate', '1'); - - this.#hoistNumberAttributeIfPossible(set, mime_objects[i], 'frameRate', 'fps', hoisted); - } - - period.appendChild(set); - - for (const format of mime_objects[i]) { - if (format.has_video) { - await this.#generateRepresentationVideo(document, set, format, url_transformer, hoisted, cpn, player, actions); - } else { - await this.#generateRepresentationAudio(document, set, format, url_transformer, hoisted, cpn, player, actions); - } + for (const format of group_formats) { + await this.#generateRepresentationVideo(document, set, format, url_transformer, hoisted, cpn, player, actions); } } + + set_id++; + period.appendChild(set); } // We need to make requests to get the image sizes, so we'll skip the storyboards if we don't have an Actions instance