Skip to content

Commit

Permalink
feat(Format): Populate audio language from captions when available (#445
Browse files Browse the repository at this point in the history
)
  • Loading branch information
absidue authored Jul 16, 2023
1 parent 06750aa commit bdd98a3
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/parser/classes/PlayerCaptionsTracklist.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ export default class PlayerCaptionsTracklist extends YTNode {
audio_tracks?: {
audio_track_id: string;
captions_initial_state: string;
default_caption_track_index: number;
default_caption_track_index?: number;
has_default_track: boolean;
visibility: string;
caption_track_indices: number;
caption_track_indices: number[];
}[];

default_audio_track_index?: number;
Expand Down
22 changes: 22 additions & 0 deletions src/parser/youtube/VideoInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,28 @@ class VideoInfo extends MediaInfo {
this.captions = info.captions;
this.cards = info.cards;

if (this.streaming_data) {
const default_audio_track = this.streaming_data.adaptive_formats.find((format) => format.audio_track?.audio_is_default);
if (default_audio_track) {
// The combined formats only exist for the default language, even for videos with multiple audio tracks
// So we can copy the language from the default audio track to the combined formats
this.streaming_data.formats.forEach((format) => format.language = default_audio_track.language);
} else if (typeof this.captions?.default_audio_track_index !== 'undefined' && this.captions?.audio_tracks && this.captions.caption_tracks) {
// For videos with a single audio track and captions, we can use the captions to figure out the language of the audio and combined formats
const audioTrack = this.captions.audio_tracks[this.captions.default_audio_track_index];
const defaultCaptionTrackIndex = audioTrack.default_caption_track_index;
const index = audioTrack.caption_track_indices[defaultCaptionTrackIndex ? defaultCaptionTrackIndex : 0];
const language_code = this.captions.caption_tracks[index].language_code;

this.streaming_data.adaptive_formats.forEach((format) => {
if (format.has_audio) {
format.language = language_code;
}
});
this.streaming_data.formats.forEach((format) => format.language = language_code);
}
}

const two_col = next?.contents?.item().as(TwoColumnWatchNextResults);

const results = two_col?.results;
Expand Down
5 changes: 5 additions & 0 deletions src/utils/FormatUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,11 @@ class FormatUtils {
this.#hoistNumberAttributeIfPossible(set, mime_objects[i], 'audioSamplingRate', 'audio_sample_rate', hoisted);

this.#hoistAudioChannelsIfPossible(document, set, mime_objects[i], hoisted);

const language = mime_objects[i][0].language;
if (language) {
set.setAttribute('lang', language);
}
} else {
set.setAttribute('maxPlayoutRate', '1');

Expand Down
26 changes: 25 additions & 1 deletion test/main.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { createWriteStream, existsSync } from 'node:fs';
import { Innertube, IBrowseResponse, Utils, YT, YTMusic, YTNodes } from '../bundle/node.cjs';
import { Innertube, Utils, YT, YTMusic, YTNodes } from '../bundle/node.cjs';

describe('YouTube.js Tests', () => {
let innertube: Innertube;
Expand All @@ -19,6 +19,30 @@ describe('YouTube.js Tests', () => {
expect(info.basic_info.id).toBe('bUHZ2k9DYHY');
});

describe('Innertube#getBasicInfo', () => {
test('Format#language multiple audio tracks', async () => {
const info = await innertube.getBasicInfo('Kn56bMZ9OE8')
expect(info.streaming_data).toBeDefined()

const default_track_format = info.streaming_data?.adaptive_formats.find(format => format.audio_track?.audio_is_default)
expect(default_track_format).toBeDefined()
expect(default_track_format?.language).toBe('en')

// check that the language is properly propagated to the non-dash formats
expect(info.streaming_data?.formats[0].language).toBe('en')
})

test('Format#language single audio track with captions', async () => {
const info = await innertube.getBasicInfo('gisdyTBMNyQ')
expect(info.streaming_data).toBeDefined()

const audio_format = info.streaming_data?.adaptive_formats.find(format => format.has_audio)
expect(audio_format).toBeDefined()
expect(audio_format?.language).toBe('en')
expect(info.streaming_data?.formats[0].language).toBe('en')
})
})

test('Innertube#search', async () => {
const search = await innertube.search('Anton Petrov');
expect(search.page).toBeDefined();
Expand Down

0 comments on commit bdd98a3

Please sign in to comment.