From de31e7f6383e8765f6c341a752e75dc0b6f63b69 Mon Sep 17 00:00:00 2001 From: renalid Date: Tue, 24 Nov 2020 14:15:23 +0100 Subject: [PATCH 1/6] [Generic] RSS adding item description Add description of RSS feed's items --- youtube_dl/extractor/generic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f10f11244b8..f25d6cd1fa0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2194,11 +2194,15 @@ def _extract_rss(self, url, video_id, doc): if not next_url: continue - + + item_desc_el = it.find('description') + item_desc = None if item_desc_el is None else item_desc_el.text + entries.append({ '_type': 'url_transparent', 'url': next_url, 'title': it.find('title').text, + 'description': item_desc }) return { From b3db1ecae4d269a03e3b4b6d48145348c3570f1b Mon Sep 17 00:00:00 2001 From: renalid Date: Sun, 29 Nov 2020 20:37:35 +0100 Subject: [PATCH 2/6] Adding test for RSS item description --- youtube_dl/extractor/generic.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f25d6cd1fa0..93b41f06abe 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -214,6 +214,24 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 100, }, + # RSS feed with items description + { + 'url': 'http://radiofrance-podcast.net/podcast09/rss_14631.xml', + 'info_dict': { + 'id': 'http://radiofrance-podcast.net/podcast09/rss_14631.xml', + 'title': 'Certains l\'aiment Fip', + 'description': 're:.*FIP met le cinéma.*' + }, + 'playlist': [{ + 'info_dict': { + 'id': '14631-22.11.2020-ITEMA_22491517-2020Y24252S0327', + 'ext': 'mp3', + 'title': 'Les musiques de Valérie Lemercier', + 'description': 're:.*occasion de la sortie.*', + 'upload_date': '20201122', + }, + }], + }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng { 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', From 11f6c0772bf166a46deff678b57f2aa81be14619 Mon Sep 17 00:00:00 2001 From: renalid Date: Sat, 5 Dec 2020 00:18:18 +0100 Subject: [PATCH 3/6] [Generic] change test case expected + xpath method for item desc. --- youtube_dl/extractor/generic.py | 41 +++++++++++---------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 93b41f06abe..51ba2807531 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -198,11 +198,19 @@ class GenericIE(InfoExtractor): { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', 'info_dict': { - 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', - 'ext': 'm4v', - 'upload_date': '20150228', - 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', - } + 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'title': 'MSNBC Rachel Maddow (video)', + 'description': 're:.*her unique approach to storytelling.*', + }, + 'playlist': [{ + 'info_dict': { + 'ext': 'mov', + 'id': 'pdv_maddow_netcast_mov-12-03-2020-223726', + 'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726', + 'description': 're:.*her unique approach to storytelling.*', + 'upload_date': '20201204', + }, + }], }, # RSS feed with enclosures and unsupported link URLs { @@ -214,24 +222,6 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 100, }, - # RSS feed with items description - { - 'url': 'http://radiofrance-podcast.net/podcast09/rss_14631.xml', - 'info_dict': { - 'id': 'http://radiofrance-podcast.net/podcast09/rss_14631.xml', - 'title': 'Certains l\'aiment Fip', - 'description': 're:.*FIP met le cinéma.*' - }, - 'playlist': [{ - 'info_dict': { - 'id': '14631-22.11.2020-ITEMA_22491517-2020Y24252S0327', - 'ext': 'mp3', - 'title': 'Les musiques de Valérie Lemercier', - 'description': 're:.*occasion de la sortie.*', - 'upload_date': '20201122', - }, - }], - }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng { 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', @@ -2213,14 +2203,11 @@ def _extract_rss(self, url, video_id, doc): if not next_url: continue - item_desc_el = it.find('description') - item_desc = None if item_desc_el is None else item_desc_el.text - entries.append({ '_type': 'url_transparent', 'url': next_url, 'title': it.find('title').text, - 'description': item_desc + 'description': xpath_text(it, 'description', default=None) }) return { From b6e37da79d24f90991e448d5dfd3db95f00f222d Mon Sep 17 00:00:00 2001 From: renalid Date: Sat, 5 Dec 2020 00:29:17 +0100 Subject: [PATCH 4/6] [Generic] flake8 review syntax fix --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 51ba2807531..edd308b031b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -194,12 +194,12 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 11, }, - # RSS feed with enclosure + # RSS feed with enclosure and item description { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', 'info_dict': { 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', - 'title': 'MSNBC Rachel Maddow (video)', + 'title': 'MSNBC Rachel Maddow (video)', 'description': 're:.*her unique approach to storytelling.*', }, 'playlist': [{ @@ -2202,7 +2202,7 @@ def _extract_rss(self, url, video_id, doc): if not next_url: continue - + entries.append({ '_type': 'url_transparent', 'url': next_url, From cdc3b5ca71fc6d231d4ea93e520351207aeec1be Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 6 Dec 2020 22:11:00 +0700 Subject: [PATCH 5/6] Update generic.py --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index edd308b031b..8e87f6919f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -194,7 +194,7 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 11, }, - # RSS feed with enclosure and item description + # RSS feed with enclosure { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', 'info_dict': { From bd989ca400f32631fe02ae94eb10e434016a0e58 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 6 Dec 2020 22:11:27 +0700 Subject: [PATCH 6/6] Update generic.py --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8e87f6919f0..8ed2789d0d7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2207,7 +2207,7 @@ def _extract_rss(self, url, video_id, doc): '_type': 'url_transparent', 'url': next_url, 'title': it.find('title').text, - 'description': xpath_text(it, 'description', default=None) + 'description': xpath_text(it, 'description', default=None), }) return {