Skip to content

Commit

Permalink
Merge pull request ytdl-org#140 from WolfganP/patch-1
Browse files Browse the repository at this point in the history
ITV BTCC new pages' URL update (articles instead of races)
  • Loading branch information
blackjack4494 authored Nov 30, 2020
2 parents 94c2909 + 85da405 commit e29288d
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions youtube_dlc/extractor/itv.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
merge_dicts,
parse_duration,
smuggle_url,
try_get,
url_or_none,
xpath_with_ns,
xpath_element,
Expand Down Expand Up @@ -280,12 +281,12 @@ def extract_subtitle(sub_url):
class ITVBTCCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
'info_dict': {
'id': 'btcc-2018-all-the-action-from-brands-hatch',
'title': 'BTCC 2018: All the action from Brands Hatch',
'id': 'btcc-2019-brands-hatch-gp-race-action',
'title': 'BTCC 2019: Brands Hatch GP race action',
},
'playlist_mincount': 9,
'playlist_count': 12,
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'

Expand All @@ -294,6 +295,16 @@ def _real_extract(self, url):

webpage = self._download_webpage(url, playlist_id)

json_map = try_get(self._parse_json(self._html_search_regex(
'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
lambda x: x['props']['pageProps']['article']['body']['content']) or []

# Discard empty objects
video_ids = []
for video in json_map:
if video['data'].get('id'):
video_ids.append(video['data']['id'])

entries = [
self.url_result(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
Expand All @@ -305,7 +316,7 @@ def _real_extract(self, url):
'referrer': url,
}),
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
for video_id in video_ids]

title = self._og_search_title(webpage, fatal=False)

Expand Down

0 comments on commit e29288d

Please sign in to comment.