Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Esquire] Add new extractor #12978

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions youtube_dl/extractor/esquire.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals

import re

from .adobepass import AdobePassIE
from ..utils import (
extract_attributes,
smuggle_url,
update_url_query,
)


class EsquireIE(AdobePassIE):
_VALID_URL = r'https?://tv\.esquire\.com/now/(?:[^/]+)/?(?:full-episode)?/(?P<title>[^/?#]+)/(?P<id>[^/?#]+)'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do not catch groups that you're not using.
something like:

_VALID_URL = r'https?://tv\.esquire\.com/now/(?:[^/]+/)?full-episode/[^/]+/(?P<id>\d+)'

_TESTS = [{
'url': 'http://tv.esquire.com/now/team-ninja-warrior/full-episode/finals-week-1/631550531649',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as the test require authentication to pass, it should be skipped(otherwise it will fail in Travis CI).

'md5': '436ee8095d7179704cf2738994d36a20',
'info_dict': {
'id': '631550531649',
'ext': 'mp4',
'title': 'Finals Week 1',
'description': 'The first finals episode features the winners of 3 qualifying episodes.',
'upload_date': '20160301',
'timestamp': 1456808400,
'uploader': 'NBCU-MPAT',
}
}, {
'url': 'http://tv.esquire.com/now/friday-night-tykes/full-episode/if-you-wanna-show…/903098435679',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the test case has the same flow as the previous one then there is no need for another test.

'md5': 'f1bf3f934ad55424d8c9333a4ab5d3aa',
'info_dict': {
'id': '903098435679',
'ext': 'mp4',
'title': 'If You Wanna Show\u2026',
'description': 'On the season Finale of Friday Night Tykes, two champions will be crowned.',
'upload_date': '20170321',
'timestamp': 1490068800,
'uploader': 'NBCU-MPAT',
}
}, {
'url': 'http://tv.esquire.com/now/full-episode/ninja-warrior-402/759415363504',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for test case related only to changes in _VALID_URL, just use only_matching.

'md5': '98a7e5cf805a8a9ebe436345a1bbeb58',
'info_dict': {
'id': '759415363504',
'ext': 'mp4',
'title': 'Ninja Warrior 402',
'description': 'Sasuke 4 (Pt. 2) Athletes compete the ultimate obstacle course.',
'uploader': 'NBCU-MPAT',
'timestamp': 1472875200,
'upload_date': '20160903',
}
}]

def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)

player_params = extract_attributes(self._search_regex(
r'(<section[^>]+data-tve-page-authz-player-container[^>]*>)', webpage, 'player params'))
video_id = player_params['data-mpx-id']
title = player_params['data-episode-title']

account_pid, path = re.search(
r'data-src=\"(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/embed/select/([\S]+)\"',
webpage).groups()

query = {
'mbr': 'true',
'manifest': 'm3u'
}

if player_params.get('data-entitlement') == 'auth':
adobe_pass = {}
drupal_settings = self._search_regex(
r'Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings', fatal=False)
if drupal_settings:
drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
if drupal_settings:
adobe_pass = drupal_settings.get('adobePass', {})
resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'esquire'),
title, video_id, player_params.get('data-episode-rating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, adobe_pass.get('adobePassRequestorId', 'esquire'), resource)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

esquire is not a valid adobe pass requestor id, the fallback value should be style(the value that is actually used now), the same apply to adobe pass resource id.


info = self._search_json_ld(webpage, video_id, default={})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the site does not offer JSON-LD data then there is no need to extract them.

info.update({
'_type': 'url_transparent',
'url': smuggle_url(update_url_query(
'http://link.theplatform.com/s/%s/%s' % (account_pid, path),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can be extracted directly from data-release-url in the webpage.

query), {'force_smil_url': True}),
'id': video_id,
'title': title,
'episode': title,
'ie_key': 'ThePlatform',
})
return info
1 change: 1 addition & 0 deletions youtube_dl/extractor/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@
ESPNIE,
ESPNArticleIE,
)
from .esquire import EsquireIE
from .esri import EsriVideoIE
from .etonline import ETOnlineIE
from .europa import EuropaIE
Expand Down