From 93350ada58dc3a05de9f96229aa0826946637a92 Mon Sep 17 00:00:00 2001
From: Surkal <user@server.fake>
Date: Thu, 30 Jan 2020 14:41:21 +0100
Subject: [PATCH 1/2] [gulli] Add new extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/gulli.py      | 80 ++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 youtube_dl/extractor/gulli.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 1cab440f46e..a23cece4c73 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -419,6 +419,7 @@
 from .goshgay import GoshgayIE
 from .gputechconf import GPUTechConfIE
 from .groupon import GrouponIE
+from .gulli import GulliIE, GulliPlaylistIE
 from .hbo import HBOIE
 from .hearthisat import HearThisAtIE
 from .heise import HeiseIE
diff --git a/youtube_dl/extractor/gulli.py b/youtube_dl/extractor/gulli.py
new file mode 100644
index 00000000000..94e154fa872
--- /dev/null
+++ b/youtube_dl/extractor/gulli.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration, get_element_by_class, get_element_by_id
+
+
+class GulliIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?replay\.gulli\.fr/.+VOD(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://replay.gulli.fr/emissions/Wazup39/VOD69258840197000',
+        'info_dict': {
+            'id': '69258840197000',
+            'ext': 'mp4',
+            'title': 'Wazup - Mercredi 29/01/2020',
+            'duration': 269,
+            'description': "Le Magazine quotidien des 6-12 ans à la pointe de l'info culturelle ! 4min30 pour s'informer sur les tendances, les expositions, les sorties ciné, livres, spectacles, musique, mode... Le Wazup, c'est le meilleur de l'actu pour les enfants avec en prime des bonus inédits sur Gulli.fr.",
+            'thumbnail': "https://resize-gulli.jnsmedia.fr/rcrop/748,420/img/var/storage/imports/replay/images/473769_0.jpg",
+            'series': 'Wazup',
+            'season_number': 6,
+            'episode_number': 93,
+            'episode': "Wazup - Mercredi 29/01/2020",
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'"name"\: "([^"]+)', webpage, 'title')
+        thumbnail = self._html_search_regex(r'"thumbnailUrl"\: "([^"]+)', webpage, 'thumbnail', fatal=False)
+        description = self._html_search_regex(r'<span id="episode_description">(.+?)</span>', webpage, 'description', fatal=False)
+        duration = parse_duration(self._html_search_regex(r'"duration"\: "PT([^"]+)', webpage, 'duration', fatal=False))
+        series = self._og_search_property('title', webpage)
+        season_number = self._html_search_regex(r"'content_level_3': 's(\d+)e", webpage, 'season number', fatal=False)
+        episode_number = self._html_search_regex(r"'content_level_3': 's\d+e(\d+)", webpage, 'episode number', fatal=False)
+        episode = get_element_by_id('h1_episode_name', webpage)
+        download_url = self._html_search_regex(r'"contentUrl"\: "([^"]+)', webpage, 'download url')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'url': download_url,
+            'duration': duration,
+            'description': description,
+            'series': series,
+            'season_number': int(season_number),
+            'episode_number': int(episode_number),
+            'episode': episode,
+        }
+
+
+class GulliPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?replay\.gulli\.fr/.+/(?P<id>.+)'
+    _TESTS = [{
+        'url': 'https://replay.gulli.fr/dessins-animes/Beyblade-Burst-Turbo',
+        'info_dict': {
+            'id': 'Beyblade-Burst-Turbo',
+            'title': 'Beyblade Burst',
+        },
+        'playlist_mincount': 3,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+        entries = []
+        bloc_html = get_element_by_class('bloc_listing', webpage)
+        for mobj in re.finditer(r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % GulliIE._VALID_URL, bloc_html):
+            entries.append(self.url_result(mobj.group('url'), ie=GulliIE.ie_key()))
+
+        title = self._html_search_regex(
+            r'<h1>Regardez *(.+?)\n? *</h1>', webpage, 'playlist title',
+            fatal=False)
+
+        return self.playlist_result(entries, playlist_id, title)

From 10aec8380602c3fb95ed019cd7d5f50c549869a9 Mon Sep 17 00:00:00 2001
From: Surkal <user@server.fake>
Date: Thu, 30 Jan 2020 15:27:15 +0100
Subject: [PATCH 2/2] fix playlist regex

---
 youtube_dl/extractor/gulli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/gulli.py b/youtube_dl/extractor/gulli.py
index 94e154fa872..dc4e640cf7f 100644
--- a/youtube_dl/extractor/gulli.py
+++ b/youtube_dl/extractor/gulli.py
@@ -54,7 +54,7 @@ def _real_extract(self, url):
 
 
 class GulliPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?replay\.gulli\.fr/.+/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?replay\.gulli\.fr/.+/(?!VOD)(?P<id>[^/]+)(?!.+)'
     _TESTS = [{
         'url': 'https://replay.gulli.fr/dessins-animes/Beyblade-Burst-Turbo',
         'info_dict': {