From 2c4b855795ab528a117b88b1e338418feaedc00b Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Mon, 18 Dec 2017 14:47:45 +0100 Subject: [PATCH 01/10] Fix for Heise extractor --- youtube_dl/extractor/heise.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 82e11a7d887..0d49702fb50 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -8,11 +8,12 @@ int_or_none, parse_iso8601, xpath_text, + re, ) class HeiseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.|m\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P[0-9]+)\.html' _TESTS = [{ 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', 'md5': 'ffed432483e922e88545ad9f2f15d30e', @@ -54,6 +55,9 @@ class HeiseIE(InfoExtractor): }] def _real_extract(self, url): + url = re.sub(r'https?://(?:www\.|m\.)?heise\.de/', + 'https://m.heise.de/', + url) video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -97,7 +101,7 @@ def _real_extract(self, url): description = self._og_search_description( webpage, default=None) or self._html_search_meta( - 'description', webpage) + 'description', webpage, default=None) return { 'id': video_id, @@ -106,6 +110,6 @@ def _real_extract(self, url): 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or self._og_search_thumbnail(webpage)), 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage)), + self._html_search_meta('date', webpage, default=None)), 'formats': formats, } From 117ed449a49093cd5300e8c212477704423c8e80 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Tue, 19 Dec 2017 15:47:55 +0100 Subject: [PATCH 02/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 0d49702fb50..410af24cf38 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -55,7 +55,7 @@ class HeiseIE(InfoExtractor): }] def _real_extract(self, url): - url = re.sub(r'https?://(?:www\.|m\.)?heise\.de/', + url = re.sub(r'https?://(?:www\.)?heise\.de/', 'https://m.heise.de/', url) video_id = self._match_id(url) From 7d4dd5dfad3efdbd4f3c37024dc42be6e994a946 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Tue, 19 Dec 2017 16:37:36 +0100 Subject: [PATCH 03/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 410af24cf38..d39c9056dff 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -55,7 +55,7 @@ class HeiseIE(InfoExtractor): }] def _real_extract(self, url): - url = re.sub(r'https?://(?:www\.)?heise\.de/', + url = re.sub(r'^https?://(?:www\.)?heise\.de/', 'https://m.heise.de/', url) video_id = self._match_id(url) From 6f0773184136097fa515880931a78df6731633f7 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Tue, 19 Dec 2017 19:33:35 +0100 Subject: [PATCH 04/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index d39c9056dff..a4a5e0a26c3 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( @@ -8,7 +10,6 @@ int_or_none, parse_iso8601, xpath_text, - re, ) From 1b07a645e8128c3d57b4945f298cff7528776869 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Tue, 19 Dec 2017 19:45:30 +0100 Subject: [PATCH 05/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index a4a5e0a26c3..435567f7def 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -56,11 +56,9 @@ class HeiseIE(InfoExtractor): }] def _real_extract(self, url): - url = re.sub(r'^https?://(?:www\.)?heise\.de/', - 'https://m.heise.de/', - url) - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobile_url = re.sub(r'^https?://(?:www\.)?heise\.de/', 'https://m.heise.de/', url) + video_id = self._match_id(mobile_url) + webpage = self._download_webpage(mobile_url, video_id) title = self._html_search_meta('fulltitle', webpage, default=None) if not title or title == "c't": From 182e2c602d9bda82c96f28af00fe02c848054663 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Tue, 19 Dec 2017 20:14:14 +0100 Subject: [PATCH 06/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 435567f7def..1225b3e64f4 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -56,7 +56,7 @@ class HeiseIE(InfoExtractor): }] def _real_extract(self, url): - mobile_url = re.sub(r'^https?://(?:www\.)?heise\.de/', 'https://m.heise.de/', url) + mobile_url = re.sub(r'^(https?://)(?:www\.)?(heise\.de/)', r'\1m.\2', url) video_id = self._match_id(mobile_url) webpage = self._download_webpage(mobile_url, video_id) From 7420fc5230d4d24ee620a67c8d061fc5cc591676 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Tue, 19 Dec 2017 21:07:20 +0100 Subject: [PATCH 07/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 1225b3e64f4..eca5b52676a 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -56,7 +56,7 @@ class HeiseIE(InfoExtractor): }] def _real_extract(self, url): - mobile_url = re.sub(r'^(https?://)(?:www\.)?(heise\.de/)', r'\1m.\2', url) + mobile_url = re.sub(r'^(https?://)(?:www\.)?', r'\1m.', url) video_id = self._match_id(mobile_url) webpage = self._download_webpage(mobile_url, video_id) From 4b1aead092c6edb40e96ec3273c58b424ee95ee3 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Wed, 20 Dec 2017 21:44:53 +0100 Subject: [PATCH 08/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index eca5b52676a..3039ac015db 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -58,7 +58,8 @@ class HeiseIE(InfoExtractor): def _real_extract(self, url): mobile_url = re.sub(r'^(https?://)(?:www\.)?', r'\1m.', url) video_id = self._match_id(mobile_url) - webpage = self._download_webpage(mobile_url, video_id) + webpage = self._download_webpage(url, video_id) + mobile_webpage = self._download_webpage(mobile_url, video_id) title = self._html_search_meta('fulltitle', webpage, default=None) if not title or title == "c't": @@ -72,10 +73,10 @@ def _real_extract(self, url): container_id = self._search_regex( r'
]+data-container="([0-9]+)"', - webpage, 'container ID') + mobile_webpage, 'container ID') sequenz_id = self._search_regex( r'
]+data-sequenz="([0-9]+)"', - webpage, 'sequenz ID') + mobile_webpage, 'sequenz ID') doc = self._download_xml( 'http://www.heise.de/videout/feed', video_id, query={ @@ -100,7 +101,7 @@ def _real_extract(self, url): description = self._og_search_description( webpage, default=None) or self._html_search_meta( - 'description', webpage, default=None) + 'description', webpage) return { 'id': video_id, @@ -109,6 +110,6 @@ def _real_extract(self, url): 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or self._og_search_thumbnail(webpage)), 'timestamp': parse_iso8601( - self._html_search_meta('date', webpage, default=None)), + self._html_search_meta('date', webpage)), 'formats': formats, } From f64318ab1951ef4cf7eb3cfc8e7e960cb4cf2660 Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Wed, 20 Dec 2017 21:49:12 +0100 Subject: [PATCH 09/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 3039ac015db..120fc2da2c9 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -57,7 +57,7 @@ class HeiseIE(InfoExtractor): def _real_extract(self, url): mobile_url = re.sub(r'^(https?://)(?:www\.)?', r'\1m.', url) - video_id = self._match_id(mobile_url) + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) mobile_webpage = self._download_webpage(mobile_url, video_id) From 01ed6b89e5eec5e99038a654c33d2a564f0a2e9a Mon Sep 17 00:00:00 2001 From: Hier631 <34473369+Hier631@users.noreply.github.com> Date: Wed, 20 Dec 2017 21:51:41 +0100 Subject: [PATCH 10/10] Updated fix for Heise extractor --- youtube_dl/extractor/heise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 120fc2da2c9..c9de583dddf 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -14,7 +14,7 @@ class HeiseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|m\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P[0-9]+)\.html' _TESTS = [{ 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', 'md5': 'ffed432483e922e88545ad9f2f15d30e',