From 91e64c6941d9057b8b20405323988480746ead3e Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 08:14:15 -0400 Subject: [PATCH 1/6] [spreaker] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/spreaker.py | 161 +++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 youtube_dl/extractor/spreaker.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 897557f93b9..09ee08aadc4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,6 +958,7 @@ from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE from .sportschau import SportschauIE +from .spreaker import SpreakerIE from .sprout import SproutIE from .srgssr import ( SRGSSRIE, diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py new file mode 100644 index 00000000000..3982267a8e0 --- /dev/null +++ b/youtube_dl/extractor/spreaker.py @@ -0,0 +1,161 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none, ExtractorError + + +class SpreakerIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r"""(?x)^ + https?:// + (?:www.|api.)? + spreaker.com/ + (?: + show/[a-z0-9_-]+| + user/[a-z0-9_-]+/[a-z0-9_-]| + episode/(?P[0-9]+) + ) + """ + _TESTS = [ + { + 'url': 'https://www.spreaker.com/show/success-with-music', + 'info_dict': { + 'title': 'Success With Music', + 'id': 2317431, + }, + 'playlist_mincount': 14, + }, + { + 'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' + 'market-your-music-part-2'), + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + }, + { + 'url': 'https://api.spreaker.com/episode/12534508', + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + } + ] + + def _spreaker_episode_data_to_info(self, data): + upload_date = data['published_at'][0:10].replace('-', '') + author = data.get('author') + if not author: + author = {} + stats = data.get('stats') + view_count = like_count = comment_count = 0 + show = data.get('show') + if not show: + show = {} + else: + show_image = show.get('image') + if not show_image: + show_image = {} + + if stats: + view_count = (stats.get('plays', 0) + + stats.get('plays_streaming', 0) + + stats.get('plays_download', 0)) + like_count = stats.get('likes', 0) + comment_count = stats.get('messages', 0) + + return { + 'id': compat_str(data['episode_id']), + 'title': data['title'], + 'url': data['download_url'], + 'display_id': data.get('permalink'), + 'webpage_url': data.get('site_url'), + 'uploader': author.get('fullname'), + 'creator': author.get('fullname'), + 'release_date': upload_date, + 'upload_date': upload_date, + 'uploader_id': author.get('user_id'), + 'duration': int_or_none(data.get('length')), + 'view_count': int_or_none(view_count), + 'like_count': int_or_none(like_count), + 'comment_count': int_or_none(comment_count), + 'format': 'MPEG Layer 3', + 'format_id': 'mp3', + 'container': 'mp3', + 'ext': 'mp3', + 'thumbnail': show_image.get('big_url'), + 'language': show.get('language'), + 'thumbnails': [ + { + 'id': show_image.get('image_id'), + 'url': show_image.get('big_url'), + 'width': int_or_none(show_image.get('width')), + 'height': int_or_none(show_image.get('height')), + }, + { + 'url': show_image.get('large_url'), + }, + { + 'url': show_image.get('medium_url') + }, + { + 'url': show_image.get('small_url') + }, + ], + } + + def _real_extract(self, url): + episode_id = self._match_id(url) + + if re.match(r'^[0-9]+$', episode_id): + data_url = url + elif '/show/' in url: + html = self._download_webpage(url, None) + playlist_url = self._html_search_regex( + r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') + items = self._download_json(playlist_url, None) + items = items['response']['playlist']['items'] + + if not items: + raise ExtractorError('Empty playlist') + + urls = [x['api_url'] for x in items] + ret = [] + for index, url in enumerate(urls): + data = self._download_json(url, None)['response']['episode'] + dict_ = self._spreaker_episode_data_to_info(data) + dict_.update({ + 'playlist_id': compat_str(data['show_id']), + 'playlist_title': data['show']['title'], + 'playlist_index': index, + }) + ret.append(dict_) + + return self.playlist_result(ret, + data['show_id'], + data['show']['title']) + else: + html = self._download_webpage(url, None) + episode_id = self._html_search_regex( + r'data-episode_id="(?P[0-9]+)"', html, 'id') + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Could not find episode ID') + data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) + + data = self._download_json(data_url, episode_id)['response']['episode'] + if not data['download_enabled']: + raise ExtractorError('Not supported yet') + + return self._spreaker_episode_data_to_info(data) From 2eb228df1cb2ad0c91ef37fe25be47add412d313 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 16:38:47 -0400 Subject: [PATCH 2/6] [spreaker] Fixes requested Escape . in regexes Make separate extractors for episode page, playlist (show), API Support API's direct links to MP3 files Make counts set to None in case they are not found Handle when published_at is not present Other fixes --- youtube_dl/extractor/extractors.py | 6 +- youtube_dl/extractor/spreaker.py | 158 ++++++++++++++++------------- 2 files changed, 90 insertions(+), 74 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09ee08aadc4..7ca2cfd1903 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,7 +958,11 @@ from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE from .sportschau import SportschauIE -from .spreaker import SpreakerIE +from .spreaker import ( + SpreakerIE, + SpreakerAPIEpisodeIE, + SpreakerPlaylistIE +) from .sprout import SproutIE from .srgssr import ( SRGSSRIE, diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 3982267a8e0..d2fb6c30412 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -8,30 +8,51 @@ from ..utils import int_or_none, ExtractorError -class SpreakerIE(InfoExtractor): +class SpreakerPlaylistIE(InfoExtractor): IE_NAME = 'spreaker' - _VALID_URL = r"""(?x)^ - https?:// - (?:www.|api.)? - spreaker.com/ - (?: - show/[a-z0-9_-]+| - user/[a-z0-9_-]+/[a-z0-9_-]| - episode/(?P[0-9]+) - ) - """ - _TESTS = [ - { + _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' + _TEST = { 'url': 'https://www.spreaker.com/show/success-with-music', 'info_dict': { 'title': 'Success With Music', 'id': 2317431, }, 'playlist_mincount': 14, - }, + } + + def _real_extract(self, url): + html = self._download_webpage(url, None) + playlist_url = self._html_search_regex( + r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') + items = self._download_json(playlist_url, None) + items = items['response']['playlist']['items'] + + if not items: + raise ExtractorError('Empty playlist') + + urls = [x['api_url'] for x in items] + ret = [] + for index, url in enumerate(urls): + data = self._download_json(url, None)['response']['episode'] + dict_ = SpreakerIE._spreaker_episode_data_to_info(data) + dict_.update({ + 'playlist_id': compat_str(data['show_id']), + 'playlist_title': data['show']['title'], + 'playlist_index': index, + }) + ret.append(dict_) + + return self.playlist_result(ret, + data['show_id'], + data['show']['title']) + + +class SpreakerAPIEpisodeIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P[0-9]+)(?:/[^\.]+\.mp3$)?' + _TESTS = [ { - 'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' - 'market-your-music-part-2'), + 'url': 'https://api.spreaker.com/episode/12534508', 'info_dict': { 'id': '12534508', 'ext': 'mp3', @@ -42,7 +63,7 @@ class SpreakerIE(InfoExtractor): }, }, { - 'url': 'https://api.spreaker.com/episode/12534508', + 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', 'info_dict': { 'id': '12534508', 'ext': 'mp3', @@ -51,23 +72,48 @@ class SpreakerIE(InfoExtractor): 'uploader': 'SWM', 'uploader_id': 9780658, }, - } + }, ] - def _spreaker_episode_data_to_info(self, data): - upload_date = data['published_at'][0:10].replace('-', '') - author = data.get('author') - if not author: - author = {} - stats = data.get('stats') - view_count = like_count = comment_count = 0 - show = data.get('show') - if not show: - show = {} - else: - show_image = show.get('image') - if not show_image: - show_image = {} + def _real_extract(self, url): + episode_id = self._match_id(url) + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Invalid ID') + + url = 'https://api.spreaker.com/episode/%s' % (episode_id,) + data = self._download_json(url, episode_id)['response']['episode'] + if not data['download_enabled']: + raise ExtractorError('Not supported yet') + + return SpreakerIE._spreaker_episode_data_to_info(data) + + +class SpreakerIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]' + _TEST = { + 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'Marketing Your Music - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + } + + @staticmethod + def _spreaker_episode_data_to_info(data): + published_at = data.get('published_at') + upload_date = None + if published_at: + upload_date = published_at[0:10].replace('-', '') + author = data.get('author', {}) + stats = data.get('stats', {}) + view_count = like_count = comment_count = None + show = data.get('show', {}) + show_image = show.get('image', {}) if stats: view_count = (stats.get('plays', 0) + @@ -117,45 +163,11 @@ def _spreaker_episode_data_to_info(self, data): } def _real_extract(self, url): - episode_id = self._match_id(url) - - if re.match(r'^[0-9]+$', episode_id): - data_url = url - elif '/show/' in url: - html = self._download_webpage(url, None) - playlist_url = self._html_search_regex( - r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') - items = self._download_json(playlist_url, None) - items = items['response']['playlist']['items'] - - if not items: - raise ExtractorError('Empty playlist') - - urls = [x['api_url'] for x in items] - ret = [] - for index, url in enumerate(urls): - data = self._download_json(url, None)['response']['episode'] - dict_ = self._spreaker_episode_data_to_info(data) - dict_.update({ - 'playlist_id': compat_str(data['show_id']), - 'playlist_title': data['show']['title'], - 'playlist_index': index, - }) - ret.append(dict_) - - return self.playlist_result(ret, - data['show_id'], - data['show']['title']) - else: - html = self._download_webpage(url, None) - episode_id = self._html_search_regex( - r'data-episode_id="(?P[0-9]+)"', html, 'id') - if not re.match(r'^[0-9]+$', episode_id): - raise ExtractorError('Could not find episode ID') - data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - - data = self._download_json(data_url, episode_id)['response']['episode'] - if not data['download_enabled']: - raise ExtractorError('Not supported yet') + html = self._download_webpage(url, None) + episode_id = self._html_search_regex( + r'data-episode_id="(?P[0-9]+)"', html, 'id') + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Could not find episode ID') + data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - return self._spreaker_episode_data_to_info(data) + return self.url_result(data_url) From a894bc1c3e5580f90faa965d83073ec5be2a7159 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 16:42:45 -0400 Subject: [PATCH 3/6] [spreaker] Make IE_NAME values unique --- youtube_dl/extractor/spreaker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index d2fb6c30412..60f6af6243e 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -9,7 +9,7 @@ class SpreakerPlaylistIE(InfoExtractor): - IE_NAME = 'spreaker' + IE_NAME = 'spreaker:playlist' _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' _TEST = { 'url': 'https://www.spreaker.com/show/success-with-music', @@ -48,7 +48,7 @@ def _real_extract(self, url): class SpreakerAPIEpisodeIE(InfoExtractor): - IE_NAME = 'spreaker' + IE_NAME = 'spreaker:api' _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P[0-9]+)(?:/[^\.]+\.mp3$)?' _TESTS = [ { From 1c9e16b8b9f9fb73c5a717423891ccba96a3f41d Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Thu, 10 Aug 2017 23:28:19 -0400 Subject: [PATCH 4/6] [spreaker] Set extractor name to spreaker to override IE_NAME --- youtube_dl/extractor/spreaker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 60f6af6243e..40ccdd97300 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -160,6 +160,7 @@ def _spreaker_episode_data_to_info(data): 'url': show_image.get('small_url') }, ], + 'extractor': 'spreaker', } def _real_extract(self, url): From b653c19f8fa1d05b2b7479c0bf60e4c83e431717 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Fri, 11 Aug 2017 00:31:57 -0400 Subject: [PATCH 5/6] [spreaker] Handle when playlist JSON has multiple pages --- youtube_dl/extractor/spreaker.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index 40ccdd97300..e27078b7687 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -24,12 +24,30 @@ def _real_extract(self, url): html = self._download_webpage(url, None) playlist_url = self._html_search_regex( r'data-playlist_url="(?Phttps\://[^"]+")', html, 'url') - items = self._download_json(playlist_url, None) - items = items['response']['playlist']['items'] + items = self._download_json(playlist_url, + None, + 'Downloading playlist JSON') + playlist = items['response']['playlist'] + next_url = playlist.get('next_url') + items = playlist.get('items', []) if not items: raise ExtractorError('Empty playlist') + page_no = 2 + download_str = 'Downloading playlist JSON page #%d' + while next_url: + items_ = self._download_json(next_url, + None, + download_str % (page_no,)) + playlist_ = items_['response']['playlist'] + new_items = playlist_.get('items', []) + if not new_items: + break + items += new_items + next_url = playlist_.get('next_url') + page_no += 1 + urls = [x['api_url'] for x in items] ret = [] for index, url in enumerate(urls): From 5ec9047d2cba4e29aab1e5d1c5ee7d5b8a4b1ae7 Mon Sep 17 00:00:00 2001 From: Andrew Udvare Date: Sun, 1 Jul 2018 17:25:38 -0400 Subject: [PATCH 6/6] [spreaker] Fixes requested --- youtube_dl/extractor/extractors.py | 2 - youtube_dl/extractor/spreaker.py | 226 ++++++++++++++--------------- 2 files changed, 113 insertions(+), 115 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 95927dd7b63..03c857aace0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1022,10 +1022,8 @@ from .sport5 import Sport5IE from .sportbox import SportBoxEmbedIE from .sportdeutschland import SportDeutschlandIE -from .sportschau import SportschauIE from .spreaker import ( SpreakerIE, - SpreakerAPIEpisodeIE, SpreakerPlaylistIE ) from .springboardplatform import SpringboardPlatformIE diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py index e27078b7687..d89fcdc9276 100644 --- a/youtube_dl/extractor/spreaker.py +++ b/youtube_dl/extractor/spreaker.py @@ -8,16 +8,84 @@ from ..utils import int_or_none, ExtractorError +def _spreaker_episode_data_to_info(data): + published_at = data.get('published_at') + upload_date = None + if published_at: + upload_date = published_at[0:10].replace('-', '') + author = data.get('author', {}) + stats = data.get('stats', {}) + view_count = like_count = comment_count = None + show = data.get('show', {}) + show_image = show.get('image', {}) + + if stats: + plays = stats.get('plays') + plays_streaming = stats.get('plays_streaming') + plays_download = stats.get('plays_download') + view_count = None + for x in [plays, plays_streaming, plays_download]: + if x is None: + continue + if view_count is None: + view_count = x + else: + view_count += x + like_count = stats.get('likes') + comment_count = stats.get('messages') + + return { + 'id': compat_str(data['episode_id']), + 'title': data['title'], + 'url': data['download_url'], + 'display_id': data.get('permalink'), + 'webpage_url': data.get('site_url'), + 'uploader': author.get('fullname'), + 'creator': author.get('fullname'), + 'release_date': upload_date, + 'upload_date': upload_date, + 'uploader_id': author.get('user_id'), + 'duration': int_or_none(data.get('length')), + 'view_count': int_or_none(view_count), + 'like_count': int_or_none(like_count), + 'comment_count': int_or_none(comment_count), + 'format': 'MPEG Layer 3', + 'format_id': 'mp3', + 'container': 'mp3', + 'ext': 'mp3', + 'thumbnail': show_image.get('big_url'), + 'language': show.get('language'), + 'thumbnails': [ + { + 'id': show_image.get('image_id'), + 'url': show_image.get('big_url'), + 'width': int_or_none(show_image.get('width')), + 'height': int_or_none(show_image.get('height')), + }, + { + 'url': show_image.get('large_url'), + }, + { + 'url': show_image.get('medium_url') + }, + { + 'url': show_image.get('small_url') + }, + ], + 'extractor': 'spreaker', + } + + class SpreakerPlaylistIE(InfoExtractor): IE_NAME = 'spreaker:playlist' - _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' + _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/[a-z0-9_-]+' _TEST = { - 'url': 'https://www.spreaker.com/show/success-with-music', - 'info_dict': { - 'title': 'Success With Music', - 'id': 2317431, - }, - 'playlist_mincount': 14, + 'url': 'https://www.spreaker.com/show/success-with-music', + 'info_dict': { + 'title': 'Success With Music', + 'id': 2317431, + }, + 'playlist_mincount': 14, } def _real_extract(self, url): @@ -49,144 +117,76 @@ def _real_extract(self, url): page_no += 1 urls = [x['api_url'] for x in items] - ret = [] + entries = [] for index, url in enumerate(urls): data = self._download_json(url, None)['response']['episode'] - dict_ = SpreakerIE._spreaker_episode_data_to_info(data) - dict_.update({ - 'playlist_id': compat_str(data['show_id']), - 'playlist_title': data['show']['title'], - 'playlist_index': index, - }) - ret.append(dict_) - - return self.playlist_result(ret, + dict_ = _spreaker_episode_data_to_info(data) + entries.append(dict_) + + return self.playlist_result(entries, data['show_id'], data['show']['title']) -class SpreakerAPIEpisodeIE(InfoExtractor): - IE_NAME = 'spreaker:api' - _VALID_URL = r'^https?://(?:api\.)?spreaker\.com/(?:download/)?episode/(?P[0-9]+)(?:/[^\.]+\.mp3$)?' +class SpreakerIE(InfoExtractor): + IE_NAME = 'spreaker' + _VALID_URL = (r'https?://(?:(?:api|www)\.)?spreaker\.com/' + r'(?:(?:(?:download/)?episode/(?P[0-9]+)' + r'(?:/[^\.]+\.mp3$)?)|user/[a-z0-9_-]+/[a-z0-9_-]+)') _TESTS = [ { 'url': 'https://api.spreaker.com/episode/12534508', 'info_dict': { 'id': '12534508', 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', + 'title': 'EP:15 | Music Marketing (Likes) - Part 2', 'upload_date': '20170809', 'uploader': 'SWM', 'uploader_id': 9780658, }, }, { - 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', + 'url': ('https://api.spreaker.com/download/episode/12534508/' + 'swm_ep15_how_to_market_your_music_part_2.mp3'), 'info_dict': { 'id': '12534508', 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', + 'title': 'EP:15 | Music Marketing (Likes) - Part 2', 'upload_date': '20170809', 'uploader': 'SWM', 'uploader_id': 9780658, }, }, + { + 'url': ('https://www.spreaker.com/user/9780658/swm-ep15-how-to-' + 'market-your-music-part-2'), + 'info_dict': { + 'id': '12534508', + 'ext': 'mp3', + 'title': 'EP:15 | Music Marketing (Likes) - Part 2', + 'upload_date': '20170809', + 'uploader': 'SWM', + 'uploader_id': 9780658, + }, + } ] def _real_extract(self, url): episode_id = self._match_id(url) - if not re.match(r'^[0-9]+$', episode_id): - raise ExtractorError('Invalid ID') + if re.match(r'^[0-9]+$', episode_id): + url = 'https://api.spreaker.com/episode/%s' % (episode_id,) + else: + html = self._download_webpage(url, + None, + note='Downloading episode page') + episode_id = self._html_search_regex( + r'data-episode_id="(?P[0-9]+)"', html, 'id') + if not re.match(r'^[0-9]+$', episode_id): + raise ExtractorError('Could not find episode ID') + url = 'https://api.spreaker.com/episode/%s' % (episode_id) - url = 'https://api.spreaker.com/episode/%s' % (episode_id,) data = self._download_json(url, episode_id)['response']['episode'] if not data['download_enabled']: raise ExtractorError('Not supported yet') - return SpreakerIE._spreaker_episode_data_to_info(data) - - -class SpreakerIE(InfoExtractor): - IE_NAME = 'spreaker' - _VALID_URL = r'^https?://(?:www\.)?spreaker\.com/user/[a-z0-9_-]+/[a-z0-9_-]' - _TEST = { - 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', - 'info_dict': { - 'id': '12534508', - 'ext': 'mp3', - 'title': 'Marketing Your Music - Part 2', - 'upload_date': '20170809', - 'uploader': 'SWM', - 'uploader_id': 9780658, - }, - } - - @staticmethod - def _spreaker_episode_data_to_info(data): - published_at = data.get('published_at') - upload_date = None - if published_at: - upload_date = published_at[0:10].replace('-', '') - author = data.get('author', {}) - stats = data.get('stats', {}) - view_count = like_count = comment_count = None - show = data.get('show', {}) - show_image = show.get('image', {}) - - if stats: - view_count = (stats.get('plays', 0) + - stats.get('plays_streaming', 0) + - stats.get('plays_download', 0)) - like_count = stats.get('likes', 0) - comment_count = stats.get('messages', 0) - - return { - 'id': compat_str(data['episode_id']), - 'title': data['title'], - 'url': data['download_url'], - 'display_id': data.get('permalink'), - 'webpage_url': data.get('site_url'), - 'uploader': author.get('fullname'), - 'creator': author.get('fullname'), - 'release_date': upload_date, - 'upload_date': upload_date, - 'uploader_id': author.get('user_id'), - 'duration': int_or_none(data.get('length')), - 'view_count': int_or_none(view_count), - 'like_count': int_or_none(like_count), - 'comment_count': int_or_none(comment_count), - 'format': 'MPEG Layer 3', - 'format_id': 'mp3', - 'container': 'mp3', - 'ext': 'mp3', - 'thumbnail': show_image.get('big_url'), - 'language': show.get('language'), - 'thumbnails': [ - { - 'id': show_image.get('image_id'), - 'url': show_image.get('big_url'), - 'width': int_or_none(show_image.get('width')), - 'height': int_or_none(show_image.get('height')), - }, - { - 'url': show_image.get('large_url'), - }, - { - 'url': show_image.get('medium_url') - }, - { - 'url': show_image.get('small_url') - }, - ], - 'extractor': 'spreaker', - } - - def _real_extract(self, url): - html = self._download_webpage(url, None) - episode_id = self._html_search_regex( - r'data-episode_id="(?P[0-9]+)"', html, 'id') - if not re.match(r'^[0-9]+$', episode_id): - raise ExtractorError('Could not find episode ID') - data_url = 'https://api.spreaker.com/episode/%s' % (episode_id) - - return self.url_result(data_url) + return _spreaker_episode_data_to_info(data)