Skip to content

Commit

Permalink
[youtube:tab] Pass innertube context and x-goog-visitor-id header alo…
Browse files Browse the repository at this point in the history
…ng with continuation requests (closes #28702)
  • Loading branch information
dstftw committed Apr 8, 2021
1 parent 27e5a44 commit 1b0a13f
Showing 1 changed file with 27 additions and 15 deletions.
42 changes: 27 additions & 15 deletions youtube_dl/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def _extract_ytcfg(self, video_id, webpage):
return self._parse_json(
self._search_regex(
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False)
default='{}'), video_id, fatal=False) or {}

def _extract_video(self, renderer):
video_id = renderer['videoId']
Expand Down Expand Up @@ -2475,7 +2475,7 @@ def _extract_continuation(cls, renderer):
ctp = continuation_ep.get('clickTrackingParams')
return YoutubeTabIE._build_continuation_query(continuation, ctp)

def _entries(self, tab, identity_token):
def _entries(self, tab, item_id, webpage):
tab_content = try_get(tab, lambda x: x['content'], dict)
if not tab_content:
return
Expand Down Expand Up @@ -2535,26 +2535,37 @@ def _entries(self, tab, identity_token):
yield entry
continuation = self._extract_continuation(rich_grid_renderer)

ytcfg = self._extract_ytcfg(item_id, webpage)
client_version = try_get(
ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or '2.20210407.08.00'

headers = {
'x-youtube-client-name': '1',
'x-youtube-client-version': '2.20201112.04.01',
'x-youtube-client-version': client_version,
'content-type': 'application/json',
}

context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict) or {
'client': {
'clientName': 'WEB',
'clientVersion': client_version,
}
}
visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)

identity_token = self._extract_identity_token(ytcfg, webpage)
if identity_token:
headers['x-youtube-identity-token'] = identity_token

data = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20201021.03.00',
}
},
'context': context,
}

for page_num in itertools.count(1):
if not continuation:
break
if visitor_data:
headers['x-goog-visitor-id'] = visitor_data
data['continuation'] = continuation['continuation']
data['clickTracking'] = {
'clickTrackingParams': continuation['itct']
Expand All @@ -2579,6 +2590,9 @@ def _entries(self, tab, identity_token):
if not response:
break

visitor_data = try_get(
response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data

continuation_contents = try_get(
response, lambda x: x['continuationContents'], dict)
if continuation_contents:
Expand Down Expand Up @@ -2687,7 +2701,7 @@ def _extract_alert(data):
alerts.append(text)
return '\n'.join(alerts)

def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
def _extract_from_tabs(self, item_id, webpage, data, tabs):
selected_tab = self._extract_selected_tab(tabs)
renderer = try_get(
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
Expand All @@ -2712,7 +2726,7 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
if renderer:
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
playlist = self.playlist_result(
self._entries(selected_tab, identity_token),
self._entries(selected_tab, item_id, webpage),
playlist_id=playlist_id, playlist_title=title,
playlist_description=description)
playlist.update(self._extract_uploader(data))
Expand All @@ -2736,8 +2750,7 @@ def _extract_from_playlist(self, item_id, url, data, playlist):
self._playlist_entries(playlist), playlist_id=playlist_id,
playlist_title=title)

def _extract_identity_token(self, webpage, item_id):
ytcfg = self._extract_ytcfg(item_id, webpage)
def _extract_identity_token(self, ytcfg, webpage):
if ytcfg:
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
if token:
Expand All @@ -2760,12 +2773,11 @@ def _real_extract(self, url):
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, item_id)
identity_token = self._extract_identity_token(webpage, item_id)
data = self._extract_yt_initial_data(item_id, webpage)
tabs = try_get(
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
if tabs:
return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
return self._extract_from_tabs(item_id, webpage, data, tabs)
playlist = try_get(
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
if playlist:
Expand Down

0 comments on commit 1b0a13f

Please sign in to comment.