Skip to content

Commit

Permalink
[ie/bluey] Add extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
kclauhk committed Jan 29, 2025
1 parent fd0e386 commit 8444896
Show file tree
Hide file tree
Showing 2 changed files with 311 additions and 0 deletions.
1 change: 1 addition & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@
from .blogger import BloggerIE
from .bloomberg import BloombergIE
from .bluesky import BlueskyIE
from .bluey import BlueyIE
from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE
from .boosty import BoostyIE
Expand Down
310 changes: 310 additions & 0 deletions yt_dlp/extractor/bluey.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
import re

from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
merge_dicts,
str_or_none,
traverse_obj,
url_or_none,
)


class BlueyIE(InfoExtractor):
_VALID_URL = r'https?://www\.bluey\.tv/(?:.+/)?(?P<id>[^/]+)/?$'
_TESTS = [{
# Episode (YouTube embeded: https://youtu.be/u6D2ucvSas0)
'url': 'https://www.bluey.tv/watch/season-1/mums-and-dads/',
'info_dict': {
'id': 'mums-and-dads',
'ext': 'mp4',
'title': 'Mums and Dads',
'description': 'md5:f542c691cbfbfa4cb2366f1c53dc0448',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2023/07/ABTI325R50_MUMS_AND_DADS_Image_15-scaled.jpg',
'timestamp': 1591362032,
'upload_date': '20200605',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 118,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'tags': 'count:18',
'heatmap': 'count:100',
'live_status': 'not_live',
'playable_in_embed': True,
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 33',
'episode_number': 33,
},
}, {
# Episode with trailer video
'url': 'https://www.bluey.tv/watch/season-3/the-sign/',
'info_dict': {
'id': 'the-sign',
'title': 'The Sign',
'description': 'md5:6e9b01b32f35bdcf33160c86a15080f7',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2024/02/Sign-Sq.png',
'uploader': 'Official Bluey TV',
'season': 'Season 3',
'season_number': 3,
'episode': 'Episode 49',
'episode_number': 49,
},
'playlist_count': 2,
}, {
# Minisode (Brightcove)
'url': 'https://www.bluey.tv/watch/minisodes/robo-bingo/',
'info_dict': {
'id': 'robo-bingo',
'ext': 'mp4',
'title': 'Robo Bingo',
'description': 'md5:b61c6d053eaebef5000acc9db2dd4afe',
'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/jit/6041795457001/0412a8e4-be18-45ae-b721-0fe483d07143/main/1280x720/9s994ms/match/image.jpg',
'upload_date': '20241206',
'uploader': 'Official Bluey TV',
'tags': [],
'episode': 'Episode 16',
'episode_number': 16,
'duration': 19989,
},
}, {
# Book-read (YouTube embeded: https://youtu.be/NbLxoLyPGyc)
'url': 'https://www.bluey.tv/watch/bluey-book-reads/charades-2/',
'info_dict': {
'id': 'charades-2',
'ext': 'mp4',
'title': 'Charades',
'description': 'md5:93bbd61380543064b77e7c0a4e463875',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2024/02/AVSA067W_BlueyBookReads_S01_E06_Charades_TitlePromo_16x9.png',
'timestamp': 1713538806,
'release_date': '20240419',
'release_timestamp': 1713538806,
'upload_date': '20240419',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 280,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'heatmap': 'count:100',
'live_status': 'not_live',
'playable_in_embed': True,
'tags': 'count:28',
},
}, {
# Bonus-bit (YouTube embeded: https://youtu.be/UUkb_b5UEE0)
'url': 'https://www.bluey.tv/watch/bonus-bits/tea-party/',
'info_dict': {
'id': 'tea-party',
'ext': 'mp4',
'title': 'Tea Party',
'description': 'md5:ed429cd457a0c657befd4b6d33a0d1b6',
'thumbnail': 'https://www.bluey.tv/wp-content/uploads/2021/03/Bluey_Tea_Party_001.jpg',
'timestamp': 1614960018,
'upload_date': '20210305',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 95,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'heatmap': 'count:100',
'live_status': 'not_live',
'playable_in_embed': True,
'tags': 'count:24',
},
}, {
# Characters (YouTube embeded: https://youtu.be/HlOIzz-GIxk)
'url': 'https://www.bluey.tv/characters/bluey/',
'info_dict': {
'id': 'bluey',
'ext': 'mp4',
'title': 'BLUEY\'S HIGHLIGHTS',
'description': 'md5:f5b2db5958c56e0929b914242a8a47b3',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1665759612,
'upload_date': '20221014',
'uploader': 'Official Bluey TV',
'uploader_id': '@BlueyOfficialChannel',
'uploader_url': 'https://www.youtube.com/@BlueyOfficialChannel',
'channel': 'Bluey - Official Channel',
'channel_id': 'UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_url': 'https://www.youtube.com/channel/UCVzLLZkDuFGAE2BGdBuBNBg',
'channel_follower_count': int,
'channel_is_verified': True,
'duration': 604,
'view_count': int,
'like_count': int,
'age_limit': 0,
'availability': 'public',
'categories': ['Film & Animation'],
'live_status': 'not_live',
'playable_in_embed': True,
'tags': 'count:24',
},
}]

def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)

def extract_brightcove(brightcove_id, video_id):
headers = {'Accept': 'application/json;pk=BCpkADawqM0-e9kbtiYMtk9IxVZUWQ1X3DfbKGkMTtgzX-8zRbWKYj31aVgMTPXxCK3Uy_J4wYE8mXuYHlLUhu47Tsco9l6H_-3_BJKL10ip7fnY8tUiCotYIoaMcOTeqCwM9Vn2trMyy3HM'}
if data := self._download_json(f'https://edge.api.brightcove.com/playback/v1/accounts/6041795457001/videos/{brightcove_id}',
video_id, headers=headers, fatal=False):
formats, subtitles = [], {}
for source in data.get('sources'):
if source.get('type') == 'application/x-mpegURL' and source.get('src'):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
source['src'], video_id, 'mp4', m3u8_id='hls', fatal=False)
for idx, f in enumerate(fmts):
fmts[idx]['format_id'] = f['format_id'].replace(' ', '').replace(')', '') + '-' + source['src'].split(':')[0]
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif source.get('type') == 'application/dash+xml' and source.get('src'):
fmts, subs = self._extract_mpd_formats_and_subtitles(
source['src'], video_id, mpd_id='dash', fatal=False)
for idx, f in enumerate(fmts):
fmts[idx]['format_id'] = f['format_id'] + '-' + source['src'].split(':')[0]
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(data, {
'id': ('id', {str}),
'title': (('name', 'description'), {str_or_none}),
'description': (('long_description', 'description'), {str_or_none}),
'thumbnails': (('poster', 'thumbnail'), {lambda x: [{
'url': x,
'preference': 0,
}] if url_or_none(x) else []}),
'tags': ('tags', {list}),
'upload_date': (('published_at', 'created_at'),
{lambda x: x[:10].replace('-', '') if x else None}),
'duration': ('duration', {int_or_none}),
}, get_all=False),
'formats': formats,
'subtitles': subtitles,
}
else:
return None

def extract_youtube(url):
youtube = YoutubeIE()
youtube._downloader = self._downloader
try:
return youtube._real_extract(url)
except ExtractorError as e:
youtube.to_screen(e)

entries, player_poster, featured_image, player_title = [], [], None, None
if player_data := re.findall(r'fe-(\w+)-player" data-props="({[^"]+?})"', webpage):
for idx, data in enumerate(player_data):
if video_data := self._parse_json(clean_html(data[1]), video_id):
player_title = traverse_obj(video_data, ('title', {lambda x: x if x != 'Watch the trailer' else None}))
player_poster.append(traverse_obj(video_data, {
'url': (('featuredImage', 'posterImage'), {url_or_none}),
}, get_all=False, default=None))
if poster := video_data.get('poster'):
if sizes := poster.get('sizes'):
for key in [k for k, v in sizes.items() if str(v)[:4] == 'http']:
player_poster.append({
'url': url_or_none(sizes[key]),
'width': int_or_none(sizes[f'{key}-width']),
'height': int_or_none(sizes[f'{key}-height']),
})
player_poster.append(traverse_obj(poster, {
'url': ('url', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}))
if idx == 0:
featured_image = traverse_obj(video_data, (('featuredImage', 'posterImage'), {url_or_none}), get_all=False)
if ((video_data.get('type') == 'brightcove' and video_data.get('brightcoveId'))
or (video_data.get('videoPlayer') == 'brightcove' and int_or_none(video_data.get('url')))):
if entry := extract_brightcove(video_data.get('brightcoveId') or video_data.get('url'), video_id):
entry['thumbnails'].extend(player_poster)
entries.append(entry)
elif ((video_data.get('type') == 'youtube' and video_data.get('youtubeId'))
or (video_data.get('videoPlayer') == 'youtube' and video_data.get('url'))):
if entry := extract_youtube(video_data.get('youtubeId') or video_data.get('url')):
entry['thumbnails'] = sorted(entry['thumbnails'], key=lambda d: d['preference'])
entry['thumbnails'][-1]['preference'] = -1
player_poster[-1]['preference'] = 0
entry['thumbnails'].extend(player_poster)
entries.append(entry)

if json_ld := list(self._yield_json_ld(webpage, video_id)):
info = {
'id': video_id,
**traverse_obj(json_ld[-1], {
'title': (('containsSeason', '@graph'), 0, (('episode', 'name'), 'name'),
{lambda x: re.sub(r'\W+Bluey Official Website$', '', x).split(' | ')[-1] if x else None}),
'description': (('containsSeason', '@graph'), 0,
(('episode', 'description'), 'description'), {str_or_none}),
'thumbnail': ('containsSeason', 0, 'episode', 'image',
{lambda x: x if url_or_none(x) else featured_image}),
'season': ('containsSeason', 0, 'name',
{lambda x: x if re.match(r'Season \d+$', x) else None}),
'season_number': ('containsSeason', 0, 'name',
{lambda x: int(x.replace('Season ', '')) if re.match(r'Season \d+$', x) else None}),
'episode': ('containsSeason', 0, 'episode', 'episodeNumber',
{lambda x: f'Episode {x}' if x else None}),
'episode_number': ('containsSeason', 0, 'episode', 'episodeNumber', {int_or_none}),
}, get_all=False),
}
else:
title = re.sub(r'\W+Bluey Official Website$', '', self._og_search_title(webpage))
info = {
'id': video_id,
'title': title.split(' | ')[-1],
'description': self._og_search_description(webpage),
'thumbnail': featured_image or self._og_search_thumbnail(webpage),
}
if season_number := self._search_regex(r' Season (\d+)', title, 'season_number', default=None):
info['season'] = f'Season {season_number}'
info['season_number'] = int(season_number)
if episode_number := self._search_regex(r' Episode (\d+)', title, 'episode_number', default=None):
info['episode'] = f'Episode {episode_number}'
info['episode_number'] = int(episode_number)
info['uploader'] = self._html_search_meta('article:author', webpage)

if len(entries) > 1:
return self.playlist_result(entries, video_id, **{
k: v for k, v in info.items() if v})
elif len(entries) == 1:
info['title'] = player_title or info['title']
info['description'] = join_nonempty(info.get('description'), entries[0].get('description'), delim='\n\n')
info['thumbnail'] = entries[0]['thumbnail'] = None
return merge_dicts(info, entries[0])
else:
return info

0 comments on commit 8444896

Please sign in to comment.