From 2190b6aaa1a65ad172f2e34382045b5753402cdc Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 17:16:19 -0500 Subject: [PATCH 01/14] [narando] Add new extractor --- youtube_dl/extractor/extractors.py | 3 +++ youtube_dl/extractor/narando.py | 42 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/narando.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 87c7d8b0ce6..ee54ea1b3e9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1480,3 +1480,6 @@ from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE from .zype import ZypeIE + + +from .narando import NarandoIE diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py new file mode 100644 index 00000000000..a492c7b3f8c --- /dev/null +++ b/youtube_dl/extractor/narando.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +class NarandoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' + _TEST = { + 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { +# 'id': 'b2t4t789kxgy9g7ms4rwjvvw', was being used as id previously, is internal video id + 'id': 'an-ihrem-selbstlob-erkennt-man-sie', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + 'url': 'https://static.narando.com/sounds/10492/original.mp3', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) +# webpage = self._download_webpage(url,"?") +# print(url) +# print('https://narando.com/articles/'+video_id) + webpage = self._download_webpage('https://narando.com/articles/'+video_id+"?", video_id)#for some reason, this absolutely refused to work, so I'm negating the video_id and just adding it directly + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') +# print(title) + player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') + player_page = self._download_webpage('https://narando.com/widget?r='+player_id+'&',player_id)#same as above + download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'mp3_ddl') + return { + 'id': video_id, + 'title': title, + 'url': download_url, + # TODO more properties (see youtube_dl/extractor/common.py) + } From a729d43d9e5c98035f7200182a4805e8aa4087fd Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 18:28:29 -0500 Subject: [PATCH 02/14] [narando] Add description support and improve code to meet youtube-dl's standards --- youtube_dl/extractor/narando.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index a492c7b3f8c..66a73359784 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -14,6 +14,7 @@ class NarandoIE(InfoExtractor): 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', + 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -27,16 +28,18 @@ def _real_extract(self, url): # webpage = self._download_webpage(url,"?") # print(url) # print('https://narando.com/articles/'+video_id) - webpage = self._download_webpage('https://narando.com/articles/'+video_id+"?", video_id)#for some reason, this absolutely refused to work, so I'm negating the video_id and just adding it directly + webpage = self._download_webpage('https://narando.com/articles/'+video_id, video_id) # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') # print(title) player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r='+player_id+'&',player_id)#same as above - download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'mp3_ddl') + player_page = self._download_webpage('https://narando.com/widget?r='+player_id, player_id) + download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') + description = self._html_search_regex(ur'', webpage, 'description') return { 'id': video_id, 'title': title, 'url': download_url, + 'description': description, # TODO more properties (see youtube_dl/extractor/common.py) } From fed1f5ee0f9dccc2bc71a251f0c5e7cd33c80c92 Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 18:38:08 -0500 Subject: [PATCH 03/14] [narando] fix flake8 issues --- youtube_dl/extractor/narando.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 66a73359784..bdb36f3e94a 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -3,13 +3,13 @@ from .common import InfoExtractor + class NarandoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' _TEST = { 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { -# 'id': 'b2t4t789kxgy9g7ms4rwjvvw', was being used as id previously, is internal video id 'id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', @@ -28,14 +28,14 @@ def _real_extract(self, url): # webpage = self._download_webpage(url,"?") # print(url) # print('https://narando.com/articles/'+video_id) - webpage = self._download_webpage('https://narando.com/articles/'+video_id, video_id) + webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') # print(title) player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r='+player_id, player_id) + player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') - description = self._html_search_regex(ur'', webpage, 'description') + description = self._html_search_regex(r'', webpage, 'description') return { 'id': video_id, 'title': title, From d33506b6d754807abaa11566287114cebe9109d9 Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 20:39:07 -0500 Subject: [PATCH 04/14] [narando] Fix bad method of extracting player_id --- youtube_dl/extractor/narando.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index bdb36f3e94a..78282e1b847 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -32,7 +32,7 @@ def _real_extract(self, url): # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') # print(title) - player_id = self._html_search_regex(" ".join(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)'.split()), webpage, 'player_id') + player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') description = self._html_search_regex(r'', webpage, 'description') From 92ae267c88520d95cc56198eb8f6f389778c32a0 Mon Sep 17 00:00:00 2001 From: ealgase Date: Tue, 20 Nov 2018 22:16:41 -0500 Subject: [PATCH 05/14] [narando] seperate [narando:player] extractor, improve code readability --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/narando.py | 48 ++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ee54ea1b3e9..b7cca0c254d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1482,4 +1482,4 @@ from .zype import ZypeIE -from .narando import NarandoIE +from .narando import NarandoIE, NarandoPlayerIE diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 78282e1b847..6673b80074e 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -5,6 +5,7 @@ class NarandoIE(InfoExtractor): + IE_NAME = "narando" _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' _TEST = { 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', @@ -15,31 +16,52 @@ class NarandoIE(InfoExtractor): 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) } } def _real_extract(self, url): video_id = self._match_id(url) -# webpage = self._download_webpage(url,"?") -# print(url) -# print('https://narando.com/articles/'+video_id) + webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) - # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') -# print(title) + player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) - download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'download_url') + mobj = NarandoPlayerIE() + download_url = mobj._real_extract("https://narando.com/widget?r=" + player_id)['url'] description = self._html_search_regex(r'', webpage, 'description') return { 'id': video_id, 'title': title, 'url': download_url, 'description': description, - # TODO more properties (see youtube_dl/extractor/common.py) + } + + +class NarandoPlayerIE(InfoExtractor): + IE_NAME = "narando:player" + _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' + _TEST = { + 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + 'url': 'https://static.narando.com/sounds/10492/original.mp3', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + print(video_id) + webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) + print(webpage) + title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') + + download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') + return { + 'id': video_id, + 'title': title, + 'url': download_url, } From 200ad7687a30ba0113660a581b40c1637cbe4863 Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 09:36:01 -0500 Subject: [PATCH 06/14] [narando] remove separate player extractor (was causing issues), add display_id attribute --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/narando.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b7cca0c254d..2c1df5efe19 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -672,6 +672,7 @@ MyviEmbedIE, ) from .myvidster import MyVidsterIE +from .narando import NarandoIE from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicIE, @@ -1482,4 +1483,3 @@ from .zype import ZypeIE -from .narando import NarandoIE, NarandoPlayerIE diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 6673b80074e..eea44542713 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -11,7 +11,8 @@ class NarandoIE(InfoExtractor): 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { - 'id': 'an-ihrem-selbstlob-erkennt-man-sie', + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'display_id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', @@ -27,17 +28,19 @@ def _real_extract(self, url): title = self._html_search_regex(r'

(.+?)

', webpage, 'title') player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') - mobj = NarandoPlayerIE() - download_url = mobj._real_extract("https://narando.com/widget?r=" + player_id)['url'] + player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) + download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'url') +# download_url = NarandoPlayerIE()._real_extract('https://narando.com/widget?r=' + player_id)['url'] description = self._html_search_regex(r'', webpage, 'description') return { - 'id': video_id, + 'display_id': video_id, + 'id': player_id, 'title': title, 'url': download_url, 'description': description, } - +"""to be implemented later class NarandoPlayerIE(InfoExtractor): IE_NAME = "narando:player" _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' @@ -54,9 +57,7 @@ class NarandoPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - print(video_id) webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) - print(webpage) title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') @@ -65,3 +66,4 @@ def _real_extract(self, url): 'title': title, 'url': download_url, } +""" From f8a4a388804c87d6a8042896a18d5e4986b7f782 Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 15:20:41 -0500 Subject: [PATCH 07/14] [narando] add separate [narando:player] extractor --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/narando.py | 54 ++++++++++++++---------------- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2c1df5efe19..c3d1b81a350 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -672,7 +672,10 @@ MyviEmbedIE, ) from .myvidster import MyVidsterIE -from .narando import NarandoIE +from .narando import ( + NarandoIE, + NarandoPlayerIE, +) from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicIE, diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index eea44542713..c8a8ef30c9d 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -4,66 +4,62 @@ from .common import InfoExtractor -class NarandoIE(InfoExtractor): - IE_NAME = "narando" - _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' +class NarandoPlayerIE(InfoExtractor): + IE_NAME = "narando:player" + _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' _TEST = { - 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', + 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { 'id': 'b2t4t789kxgy9g7ms4rwjvvw', - 'display_id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', - 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) + title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') + download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') - webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) - - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - - player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) - download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'url') -# download_url = NarandoPlayerIE()._real_extract('https://narando.com/widget?r=' + player_id)['url'] - description = self._html_search_regex(r'', webpage, 'description') return { - 'display_id': video_id, - 'id': player_id, + 'id': video_id, 'title': title, 'url': download_url, - 'description': description, } -"""to be implemented later -class NarandoPlayerIE(InfoExtractor): - IE_NAME = "narando:player" - _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' + +class NarandoIE(InfoExtractor): + IE_NAME = "narando" + _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' _TEST = { - 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', + 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'display_id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', + 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) - title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') + webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') + player_url = 'https://narando.com/widget?r=' + player_id + description = self._html_search_regex(r'', webpage, 'description') - download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') return { - 'id': video_id, + 'display_id': video_id, + 'id': player_id, 'title': title, - 'url': download_url, + 'url': player_url, + 'description': description, + '_type': 'url', } -""" From b709ed1fc9c551423ed8f9cba8ed6fb27fb6bbae Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 19:11:22 -0500 Subject: [PATCH 08/14] [narando] remove display_id attribute --- youtube_dl/extractor/narando.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index c8a8ef30c9d..ffe682f9ec1 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -39,7 +39,6 @@ class NarandoIE(InfoExtractor): 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { 'id': 'b2t4t789kxgy9g7ms4rwjvvw', - 'display_id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', @@ -56,7 +55,6 @@ def _real_extract(self, url): description = self._html_search_regex(r'', webpage, 'description') return { - 'display_id': video_id, 'id': player_id, 'title': title, 'url': player_url, From 6892fcbcc43e1ff03e6676b223897e06b35717d0 Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 20:12:49 -0500 Subject: [PATCH 09/14] [narando] remove description attribute (was broken in Python 3.2) --- youtube_dl/extractor/narando.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index ffe682f9ec1..2b05b705bcc 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -42,7 +42,6 @@ class NarandoIE(InfoExtractor): 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', - 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', } } @@ -52,12 +51,10 @@ def _real_extract(self, url): title = self._html_search_regex(r'

(.+?)

', webpage, 'title') player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') player_url = 'https://narando.com/widget?r=' + player_id - description = self._html_search_regex(r'', webpage, 'description') return { 'id': player_id, 'title': title, 'url': player_url, - 'description': description, '_type': 'url', } From 0c2fc982d37efb0e17ac69fee4b82355fa45c101 Mon Sep 17 00:00:00 2001 From: ealgase Date: Fri, 30 Nov 2018 19:07:26 -0500 Subject: [PATCH 10/14] [narando] Improve coding methods as requested by dstftw in #18268 --- youtube_dl/extractor/narando.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 2b05b705bcc..a6bb5f4735e 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -6,7 +6,7 @@ class NarandoPlayerIE(InfoExtractor): IE_NAME = "narando:player" - _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' + _VALID_URL = r'https://narando\.com/widget\?r=(?P\w+)' _TEST = { 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', @@ -14,26 +14,25 @@ class NarandoPlayerIE(InfoExtractor): 'id': 'b2t4t789kxgy9g7ms4rwjvvw', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', - 'url': 'https://static.narando.com/sounds/10492/original.mp3', } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) - title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') + title = self._html_search_regex(r'(.+?)', webpage, 'title') download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') - return { 'id': video_id, 'title': title, 'url': download_url, + 'vcodec': 'none', } class NarandoIE(InfoExtractor): IE_NAME = "narando" - _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' + _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P.+)' _TEST = { 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', @@ -41,15 +40,14 @@ class NarandoIE(InfoExtractor): 'id': 'b2t4t789kxgy9g7ms4rwjvvw', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', - 'url': 'https://static.narando.com/sounds/10492/original.mp3', } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) + webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') + player_id = self._html_search_regex(r'\s*https://narando.com/r/([^"]*)', webpage, 'player_id') player_url = 'https://narando.com/widget?r=' + player_id return { From 00bcff6387742a69f4ebf97acc5d7da56fab8a87 Mon Sep 17 00:00:00 2001 From: ealgase Date: Fri, 30 Nov 2018 21:43:17 -0500 Subject: [PATCH 11/14] [narando] use already given URL for downloading webpage --- youtube_dl/extractor/narando.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index a6bb5f4735e..0a81fa85132 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -19,7 +19,7 @@ class NarandoPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) + webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') return { From fc9ea7bd133034fdd4db4074cc60443e0cdc8f23 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 1 Dec 2018 14:47:41 -0500 Subject: [PATCH 12/14] [narando] remove superfluous whitespace --- youtube_dl/extractor/extractors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c3d1b81a350..33913715a50 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1484,5 +1484,3 @@ from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE from .zype import ZypeIE - - From 83809b6ee1e5ead1a1199b4df553cf657ba8f37b Mon Sep 17 00:00:00 2001 From: ealgase Date: Sun, 20 Jan 2019 19:00:43 -0500 Subject: [PATCH 13/14] [narando] improve coding methods to match standards, add support for more URL formats --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/narando.py | 47 ++++++++++++++++++------------ 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 33913715a50..c1e4aa308a6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -674,7 +674,7 @@ from .myvidster import MyVidsterIE from .narando import ( NarandoIE, - NarandoPlayerIE, + NarandoArticleIE, ) from .nationalgeographic import ( NationalGeographicVideoIE, diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 0a81fa85132..8f8fa0cdd72 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -4,9 +4,9 @@ from .common import InfoExtractor -class NarandoPlayerIE(InfoExtractor): - IE_NAME = "narando:player" - _VALID_URL = r'https://narando\.com/widget\?r=(?P\w+)' +class NarandoIE(InfoExtractor): + IE_NAME = 'narando' + _VALID_URL = r'https?://narando\.com/widget\?.*?r=(?P\w+)&?' _TEST = { 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', @@ -21,7 +21,7 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') - download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') + download_url = self._html_search_regex(r'
(.+)
', webpage, 'download_url') return { 'id': video_id, 'title': title, @@ -30,29 +30,40 @@ def _real_extract(self, url): } -class NarandoIE(InfoExtractor): - IE_NAME = "narando" - _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P.+)' - _TEST = { - 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', - 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', - 'info_dict': { - 'id': 'b2t4t789kxgy9g7ms4rwjvvw', - 'ext': 'mp3', - 'title': 'An ihrem Selbstlob erkennt man sie', +class NarandoArticleIE(InfoExtractor): + IE_NAME = "narando:article" + _VALID_URL = r'https?://(?:www\.)?narando\.com/(articles|r)/(?P.+)' + _TESTS = [ + { + 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + } + }, + { + 'url': 'https://narando.com/r/b2t4t789kxgy9g7ms4rwjvvw', #alternate URL format + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + } } - } + ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - player_id = self._html_search_regex(r'\s*https://narando.com/r/([^"]*)', webpage, 'player_id') - player_url = 'https://narando.com/widget?r=' + player_id + player_id = self._html_search_regex(r'https://narando.com/r/(.+?)\"', webpage, 'player_id') + url_result = 'https://narando.com/widget?r=' + player_id return { 'id': player_id, 'title': title, - 'url': player_url, + 'url': url_result, '_type': 'url', } From 9d5e117c16b6616df795995158de165358b28c68 Mon Sep 17 00:00:00 2001 From: Elliot Algase Date: Wed, 15 May 2019 22:04:02 -0400 Subject: [PATCH 14/14] [narando] extract thumbnails --- youtube_dl/extractor/narando.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 8f8fa0cdd72..c250cc997f7 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -6,6 +6,7 @@ class NarandoIE(InfoExtractor): IE_NAME = 'narando' + _THUMB_SIZES = ('small', 'square', 'medium', 'big', 'original') _VALID_URL = r'https?://narando\.com/widget\?.*?r=(?P\w+)&?' _TEST = { 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', @@ -22,11 +23,22 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') download_url = self._html_search_regex(r'
(.+)
', webpage, 'download_url') + thumbnail_id = self._html_search_regex(r'article_picture\/(.+?)\/small\.jpg', webpage, 'thumbnail_id', fatal=False) + thumbnail_dict = [] + thumb_id = 0 + for size in self._THUMB_SIZES: + thumbnail_dict.append({ + 'url': 'https://static.narando.com/article_picture/' + thumbnail_id + '/' + size + '.jpg', + 'id': size, + 'preference': thumb_id, + }) + thumb_id += 1 return { 'id': video_id, 'title': title, 'url': download_url, 'vcodec': 'none', + 'thumbnails': thumbnail_dict, } @@ -44,7 +56,7 @@ class NarandoArticleIE(InfoExtractor): } }, { - 'url': 'https://narando.com/r/b2t4t789kxgy9g7ms4rwjvvw', #alternate URL format + 'url': 'https://narando.com/r/b2t4t789kxgy9g7ms4rwjvvw', # alternate URL format 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { 'id': 'b2t4t789kxgy9g7ms4rwjvvw',