From 86c25bb67455509c72c8ba9517b1c27e7ebfb5db Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 21 Jul 2021 20:45:45 +0530 Subject: [PATCH] [youtube] Improve extraction of livestream metadata Modified from and closes #441 Authored by: pukkandan, krichbanana --- extractor/youtube.py | 45 ++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/extractor/youtube.py b/extractor/youtube.py index afe31a12dc8c..5ef59f680db7 100644 --- a/extractor/youtube.py +++ b/extractor/youtube.py @@ -43,6 +43,7 @@ parse_codecs, parse_count, parse_duration, + parse_iso8601, qualities, remove_start, smuggle_url, @@ -2678,17 +2679,10 @@ def feed_entry(name): else: self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - category = get_first(microformats, 'category') or search_meta('genre') - channel_id = get_first(video_details, 'channelId') \ - or get_first(microformats, 'externalChannelId') \ - or search_meta('channelId') - duration = int_or_none( - get_first(video_details, 'lengthSeconds') - or get_first(microformats, 'lengthSeconds')) \ - or parse_duration(search_meta('duration')) + live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) is_live = get_first(video_details, 'isLive') - is_upcoming = get_first(video_details, 'isUpcoming') - owner_profile_url = get_first(microformats, 'ownerProfileUrl') + if is_live is None: + is_live = get_first(live_broadcast_details, 'isLiveNow') streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[]) formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live)) @@ -2786,6 +2780,29 @@ def feed_entry(name): thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i) self._remove_duplicate_formats(thumbnails) + category = get_first(microformats, 'category') or search_meta('genre') + channel_id = str_or_none( + get_first(video_details, 'channelId') + or get_first(microformats, 'externalChannelId') + or search_meta('channelId')) + duration = int_or_none( + get_first(video_details, 'lengthSeconds') + or get_first(microformats, 'lengthSeconds') + or parse_duration(search_meta('duration'))) or None + owner_profile_url = get_first(microformats, 'ownerProfileUrl') + + live_content = get_first(video_details, 'isLiveContent') + is_upcoming = get_first(video_details, 'isUpcoming') + if is_live is None: + if is_upcoming or live_content is False: + is_live = False + if is_upcoming is None and (live_content or is_live): + is_upcoming = False + live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp')) + live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp')) + if not duration and live_endtime and live_starttime: + duration = live_endtime - live_starttime + info = { 'id': video_id, 'title': self._live_title(video_title) if is_live else video_title, @@ -2812,9 +2829,13 @@ def feed_entry(name): 'webpage_url': webpage_url, 'categories': [category] if category else None, 'tags': keywords, - 'is_live': is_live, 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), - 'was_live': get_first(video_details, 'isLiveContent'), + 'is_live': is_live, + 'was_live': (False if is_live or is_upcoming or live_content is False + else None if is_live is None or is_upcoming is None + else live_content), + 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL + 'release_timestamp': live_starttime, } pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)