From 18f011fd033fcb79e246388846b68f352be1757e Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Tue, 6 Mar 2018 22:40:15 +0100 Subject: [PATCH 1/3] [heise] fix title extraction, modify test accordingly --- youtube_dl/extractor/heise.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 8f49f52efd5..9053ce12c12 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -17,16 +17,14 @@ class HeiseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P[0-9]+)\.html' _TESTS = [{ 'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', - 'md5': 'ffed432483e922e88545ad9f2f15d30e', + 'md5': '0b0c0bc1e960d188982b2491ec2d7e76', 'info_dict': { - 'id': '2404147', + 'id': '1_kkrq94sm', 'ext': 'mp4', - 'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone", - 'format_id': 'mp4_720p', - 'timestamp': 1411812600, - 'upload_date': '20140927', - 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', - 'thumbnail': r're:^https?://.*/gallery/$', + 'title': "ct uplink 33", + 'format_id': 'mp4-2957', + 'timestamp': 1512734959, + 'upload_date': '20171208', } }, { # YouTube embed @@ -76,7 +74,9 @@ def _real_extract(self, url): if not title or title == "c't": title = self._search_regex( r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title') + webpage, 'title', default=None, fatal=False) + if not title: + self._og_search_title(webpage) yt_urls = YoutubeIE._extract_urls(webpage) if yt_urls: From 60b76bdb013d2430f759fbedcad1ba20f3a29794 Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Tue, 6 Mar 2018 23:19:54 +0100 Subject: [PATCH 2/3] [heise] fix my non-sense in title extraction --- youtube_dl/extractor/heise.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 9053ce12c12..768f995b3b8 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -74,9 +74,9 @@ def _real_extract(self, url): if not title or title == "c't": title = self._search_regex( r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title', default=None, fatal=False) + webpage, 'title', default=None) if not title: - self._og_search_title(webpage) + title = self._og_search_title(webpage) yt_urls = YoutubeIE._extract_urls(webpage) if yt_urls: From 47e2c48373ce1027d8f82e606578f25c701f0eca Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Wed, 7 Mar 2018 19:38:13 +0100 Subject: [PATCH 3/3] [heise] Further simplify title extraction --- youtube_dl/extractor/heise.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 768f995b3b8..f988ec16234 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -70,13 +70,11 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('fulltitle', webpage, default=None) - if not title or title == "c't": + title = self._og_search_title(webpage) + if not title: title = self._search_regex( r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', webpage, 'title', default=None) - if not title: - title = self._og_search_title(webpage) yt_urls = YoutubeIE._extract_urls(webpage) if yt_urls: