feat(Format): Populate audio language from captions when available (#445

)
LuanRT · Jul 16, 2023 · bdd98a3 · bdd98a3
1 parent 06750aa
commit bdd98a3
Show file tree

Hide file tree

Showing 4 changed files with 54 additions and 3 deletions.
diff --git a/src/parser/classes/PlayerCaptionsTracklist.ts b/src/parser/classes/PlayerCaptionsTracklist.ts
@@ -17,10 +17,10 @@ export default class PlayerCaptionsTracklist extends YTNode {
   audio_tracks?: {
     audio_track_id: string;
     captions_initial_state: string;
-    default_caption_track_index: number;
+    default_caption_track_index?: number;
     has_default_track: boolean;
     visibility: string;
-    caption_track_indices: number;
+    caption_track_indices: number[];
   }[];
 
   default_audio_track_index?: number;

diff --git a/src/parser/youtube/VideoInfo.ts b/src/parser/youtube/VideoInfo.ts
@@ -99,6 +99,28 @@ class VideoInfo extends MediaInfo {
     this.captions = info.captions;
     this.cards = info.cards;
 
+    if (this.streaming_data) {
+      const default_audio_track = this.streaming_data.adaptive_formats.find((format) => format.audio_track?.audio_is_default);
+      if (default_audio_track) {
+        // The combined formats only exist for the default language, even for videos with multiple audio tracks
+        // So we can copy the language from the default audio track to the combined formats
+        this.streaming_data.formats.forEach((format) => format.language = default_audio_track.language);
+      } else if (typeof this.captions?.default_audio_track_index !== 'undefined' && this.captions?.audio_tracks && this.captions.caption_tracks) {
+        // For videos with a single audio track and captions, we can use the captions to figure out the language of the audio and combined formats
+        const audioTrack = this.captions.audio_tracks[this.captions.default_audio_track_index];
+        const defaultCaptionTrackIndex = audioTrack.default_caption_track_index;
+        const index = audioTrack.caption_track_indices[defaultCaptionTrackIndex ? defaultCaptionTrackIndex : 0];
+        const language_code = this.captions.caption_tracks[index].language_code;
+
+        this.streaming_data.adaptive_formats.forEach((format) => {
+          if (format.has_audio) {
+            format.language = language_code;
+          }
+        });
+        this.streaming_data.formats.forEach((format) => format.language = language_code);
+      }
+    }
+
     const two_col = next?.contents?.item().as(TwoColumnWatchNextResults);
 
     const results = two_col?.results;

diff --git a/src/utils/FormatUtils.ts b/src/utils/FormatUtils.ts
@@ -491,6 +491,11 @@ class FormatUtils {
           this.#hoistNumberAttributeIfPossible(set, mime_objects[i], 'audioSamplingRate', 'audio_sample_rate', hoisted);
 
           this.#hoistAudioChannelsIfPossible(document, set, mime_objects[i], hoisted);
+
+          const language = mime_objects[i][0].language;
+          if (language) {
+            set.setAttribute('lang', language);
+          }
         } else {
           set.setAttribute('maxPlayoutRate', '1');
 

diff --git a/test/main.test.ts b/test/main.test.ts
@@ -1,5 +1,5 @@
 import { createWriteStream, existsSync } from 'node:fs';
-import { Innertube, IBrowseResponse, Utils, YT, YTMusic, YTNodes } from '../bundle/node.cjs';
+import { Innertube, Utils, YT, YTMusic, YTNodes } from '../bundle/node.cjs';
 
 describe('YouTube.js Tests', () => {
   let innertube: Innertube; 
@@ -19,6 +19,30 @@ describe('YouTube.js Tests', () => {
       expect(info.basic_info.id).toBe('bUHZ2k9DYHY');
     });
 
+    describe('Innertube#getBasicInfo', () => {
+      test('Format#language multiple audio tracks', async () => {
+        const info = await innertube.getBasicInfo('Kn56bMZ9OE8')
+        expect(info.streaming_data).toBeDefined()
+
+        const default_track_format = info.streaming_data?.adaptive_formats.find(format => format.audio_track?.audio_is_default)
+        expect(default_track_format).toBeDefined()
+        expect(default_track_format?.language).toBe('en')
+
+        // check that the language is properly propagated to the non-dash formats
+        expect(info.streaming_data?.formats[0].language).toBe('en')
+      })
+
+      test('Format#language single audio track with captions', async () => {
+        const info = await innertube.getBasicInfo('gisdyTBMNyQ')
+        expect(info.streaming_data).toBeDefined()
+
+        const audio_format = info.streaming_data?.adaptive_formats.find(format => format.has_audio)
+        expect(audio_format).toBeDefined()
+        expect(audio_format?.language).toBe('en')
+        expect(info.streaming_data?.formats[0].language).toBe('en')
+      })
+    })
+
     test('Innertube#search', async () => {
       const search = await innertube.search('Anton Petrov');
       expect(search.page).toBeDefined();