diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index b4daee54ea2..891ccbdf42f 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -17,6 +17,7 @@ parse_iso8601, try_get, unescapeHTML, + unified_timestamp, url_or_none, urlencode_postdata, urljoin, @@ -793,6 +794,19 @@ class BBCIE(BBCCoUkIE): 'description': 'Learn English words and phrases from this story', }, 'add_ie': [BBCCoUkIE.ie_key()], + }, { + # BBC Reel + 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness', + 'info_dict': { + 'id': 'p07c6sb9', + 'ext': 'mp4', + 'title': 'How positive thinking is harming your happiness', + 'description': 'md5:fad74b31da60d83b8265954ee42d85b4', + 'timestamp': 1559606400, + 'duration': 235, + 'thumbnail': 'https://ychef.files.bbci.co.uk/64x64/p07c9dsr.jpg', + 'upload_date': '20190604', + }, }] @classmethod @@ -959,6 +973,56 @@ def _real_extract(self, url): r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], webpage, 'vpid', default=None) + # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness) + if not programme_id: + initial_data = self._html_search_regex( + r'<[sS][cC][rR][iI][pP][tT]\b[^>]+\bid\s*=\s*(["\'])initial-data\1[^>]+\bdata-json\s*=\s*(["\'])([^\']+)\2[^>]*>', + webpage, 'initial data', default=None, group=3) + if initial_data: + # let's see if it actually is, or warning + initial_data = self._parse_json(initial_data, 'initial data', fatal=False) + if initial_data: + initial_data = try_get(initial_data, + lambda x: x['initData']['items'][0], dict) + if initial_data: + smp_data = initial_data.get('smpData', {}) + clip_data = try_get(smp_data, lambda x: x['items'][0], dict) + if clip_data: + initial_data.update(smp_data) + initial_data.update(clip_data) + programme_id = initial_data.get('versionID') + if programme_id: + alt_title = initial_data.get('shortTitle', playlist_id) + title = initial_data.get('title', alt_title) + if title == alt_title or alt_title == playlist_id: + alt_title = None + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + description = dict_get(initial_data, ['summary', 'shortSummary']) + timestamp = unified_timestamp(initial_data.get('displayDate')) + duration = initial_data.get('duration') + thumbnails = None + # if this has a $recipe, make thumbnails with it, else use it as-is + thumbnail = initial_data.get('holdingImageURL') + if thumbnail: + tnail = thumbnail.replace('$recipe', '64x64') + if tnail != thumbnail: + thumbnails = [ + {'url': thumbnail.replace('$recipe', '%dx%d' % (x, x)), + 'width': x, 'height': x} + for x in [16, 32, 64, 128, 512] + ] + thumbnail = None + else: + thumbnail = tnail + # return dict containing id + all the vars with value not None + # mandatory members formats and title are known to be set by now + vars = {'title', 'description', 'timestamp', 'duration', 'alt_title', + 'thumbnail', 'thumbnails', 'formats', 'subtitles'} + ret = {'id': programme_id} + ret.update([[x, locals().get(x)] for x in vars if locals().get(x) is not None]) + return ret + if programme_id: formats, subtitles = self._download_media_selector(programme_id) self._sort_formats(formats)