From 9cbf7f15a9bddda8237886d90a45bcf33bc4301c Mon Sep 17 00:00:00 2001 From: rli99 Date: Sat, 11 Jul 2020 01:48:34 +1000 Subject: [PATCH 1/8] Add new extractor for Mildom. --- docs/supportedsites.md | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mildom.py | 53 ++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/mildom.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 35c1050e549..49b133b9378 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -483,6 +483,7 @@ - **Mgoon** - **MGTV**: 芒果TV - **MiaoPai** + - **Mildom**: mildom.com - **MinistryGrid** - **Minoto** - **miomio.tv** diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028f4..d3a8b007101 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -623,6 +623,7 @@ MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, ) +from .mildom import MildomIE from .ministrygrid import MinistryGridIE from .minoto import MinotoIE from .miomio import MioMioIE diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py new file mode 100644 index 00000000000..a431ce2d36b --- /dev/null +++ b/youtube_dl/extractor/mildom.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + try_get +) + + +class MildomIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mildom\.com/playback/(?P[0-9]+)\?v_id=(?P[-0-9]+)' + _VIDEO_INFO_BASE_URL = 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail' + _TEST = { + 'url': 'https://www.mildom.com/playback/10819667?v_id=10819667-1594032863', + 'md5': 'bed067a7dff3492184bd06d6131dd8be', + 'info_dict': { + 'id': '10819667-1594032863', + 'ext': 'mp4', + 'title': '月曜!雀荘ほめちぎり #1', + 'thumbnail': r're:^https?://.*\.png$', + 'description': '#1 記念すべき初回の出演者は声優の高木美佑さんとVtuber界の麻雀つよつよ先生こと千羽黒乃さん!\nMildom公式番組『麻雀番組』毎週月曜に生放送!\n麻雀アプリも使った視聴者対戦型麻雀バラエティ!', + 'uploader': '月曜!雀荘ほめちぎり' + } + } + + def _real_extract(self, url): + channel_id, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + thumbnail = self._html_search_meta( + 'og:image', + webpage, 'thumbnail', default=None) + + video_data = self._download_json( + self._VIDEO_INFO_BASE_URL + f'?v_id={video_id}', video_id) + playback_data = video_data['body']['playback'] + + video_url = playback_data['source_url'] + description = playback_data.get('video_intro') + uploader = try_get(playback_data, lambda x: x['author_info']['login_name'], compat_str) + title = playback_data.get('title') + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'uploader': uploader, + 'channel_id': channel_id, + 'thumbnail': thumbnail, + 'description': description + } From 8be011b2fca5c165875966c4778a2cddb8c777bf Mon Sep 17 00:00:00 2001 From: rli99 Date: Sat, 11 Jul 2020 02:03:47 +1000 Subject: [PATCH 2/8] Remove use of f string --- youtube_dl/extractor/mildom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py index a431ce2d36b..02d4b932392 100644 --- a/youtube_dl/extractor/mildom.py +++ b/youtube_dl/extractor/mildom.py @@ -34,7 +34,7 @@ def _real_extract(self, url): webpage, 'thumbnail', default=None) video_data = self._download_json( - self._VIDEO_INFO_BASE_URL + f'?v_id={video_id}', video_id) + self._VIDEO_INFO_BASE_URL + '?v_id=%s' % video_id, video_id) playback_data = video_data['body']['playback'] video_url = playback_data['source_url'] From 2f5eb728f7694f9048c9bd5534d42ea12f5f09ef Mon Sep 17 00:00:00 2001 From: rli99 Date: Sat, 11 Jul 2020 02:14:05 +1000 Subject: [PATCH 3/8] Fix unnecessary multi-line import --- youtube_dl/extractor/mildom.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py index 02d4b932392..3c5d4203549 100644 --- a/youtube_dl/extractor/mildom.py +++ b/youtube_dl/extractor/mildom.py @@ -5,9 +5,7 @@ from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - try_get -) +from ..utils import try_get class MildomIE(InfoExtractor): From f477b12f4760dea53aa2d13f07c1bbcc6fd9cf35 Mon Sep 17 00:00:00 2001 From: rli99 Date: Sat, 11 Jul 2020 14:36:46 +1000 Subject: [PATCH 4/8] Add fallback for title and thumbnail --- youtube_dl/extractor/mildom.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py index 3c5d4203549..f93e24edeab 100644 --- a/youtube_dl/extractor/mildom.py +++ b/youtube_dl/extractor/mildom.py @@ -26,11 +26,8 @@ class MildomIE(InfoExtractor): def _real_extract(self, url): channel_id, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id) - thumbnail = self._html_search_meta( - 'og:image', - webpage, 'thumbnail', default=None) + webpage = self._download_webpage(url, video_id) video_data = self._download_json( self._VIDEO_INFO_BASE_URL + '?v_id=%s' % video_id, video_id) playback_data = video_data['body']['playback'] @@ -39,6 +36,15 @@ def _real_extract(self, url): description = playback_data.get('video_intro') uploader = try_get(playback_data, lambda x: x['author_info']['login_name'], compat_str) title = playback_data.get('title') + if not title: + title = self._html_search_meta( + ['og:description', 'description'], + webpage, 'thumbnail', default=None) + thumbnail = playback_data.get('video_pic') + if not thumbnail: + thumbnail = self._html_search_meta( + 'og:image', + webpage, 'thumbnail', default=None) return { 'id': video_id, From 40d84550aa8556338f73da83a51e46f40f373c31 Mon Sep 17 00:00:00 2001 From: rli99 Date: Sat, 11 Jul 2020 14:37:09 +1000 Subject: [PATCH 5/8] Fix description search --- youtube_dl/extractor/mildom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py index f93e24edeab..04fc721bb22 100644 --- a/youtube_dl/extractor/mildom.py +++ b/youtube_dl/extractor/mildom.py @@ -39,7 +39,7 @@ def _real_extract(self, url): if not title: title = self._html_search_meta( ['og:description', 'description'], - webpage, 'thumbnail', default=None) + webpage, 'description', default=None) thumbnail = playback_data.get('video_pic') if not thumbnail: thumbnail = self._html_search_meta( From a75f38347cb0f49f75ed4ad7ad86eea99cdedd8f Mon Sep 17 00:00:00 2001 From: rli99 Date: Sat, 11 Jul 2020 14:38:34 +1000 Subject: [PATCH 6/8] Fix title display name --- youtube_dl/extractor/mildom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py index 04fc721bb22..20457eec2d6 100644 --- a/youtube_dl/extractor/mildom.py +++ b/youtube_dl/extractor/mildom.py @@ -39,7 +39,7 @@ def _real_extract(self, url): if not title: title = self._html_search_meta( ['og:description', 'description'], - webpage, 'description', default=None) + webpage, 'title', default=None) thumbnail = playback_data.get('video_pic') if not thumbnail: thumbnail = self._html_search_meta( From 0ca9dbd49b142840e67fb2ddf04ef6ce97b8383a Mon Sep 17 00:00:00 2001 From: rli99 Date: Sat, 11 Jul 2020 14:40:28 +1000 Subject: [PATCH 7/8] Fix indentation --- youtube_dl/extractor/mildom.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py index 20457eec2d6..45c67edb2ad 100644 --- a/youtube_dl/extractor/mildom.py +++ b/youtube_dl/extractor/mildom.py @@ -43,8 +43,8 @@ def _real_extract(self, url): thumbnail = playback_data.get('video_pic') if not thumbnail: thumbnail = self._html_search_meta( - 'og:image', - webpage, 'thumbnail', default=None) + 'og:image', + webpage, 'thumbnail', default=None) return { 'id': video_id, From 114f514efbe2382b5908078aa01974223fdb3951 Mon Sep 17 00:00:00 2001 From: rli99 Date: Thu, 8 Oct 2020 17:42:51 +1100 Subject: [PATCH 8/8] Fix test, updates based on review. --- youtube_dl/extractor/mildom.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/mildom.py b/youtube_dl/extractor/mildom.py index 45c67edb2ad..55e01140dde 100644 --- a/youtube_dl/extractor/mildom.py +++ b/youtube_dl/extractor/mildom.py @@ -10,41 +10,41 @@ class MildomIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?mildom\.com/playback/(?P[0-9]+)\?v_id=(?P[-0-9]+)' - _VIDEO_INFO_BASE_URL = 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail' + _VIDEO_INFO_BASE_URL = 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail?v_id=%s' _TEST = { 'url': 'https://www.mildom.com/playback/10819667?v_id=10819667-1594032863', 'md5': 'bed067a7dff3492184bd06d6131dd8be', 'info_dict': { 'id': '10819667-1594032863', 'ext': 'mp4', - 'title': '月曜!雀荘ほめちぎり #1', + 'title': '月曜!雀荘ほめちぎり #1 【麻雀】', 'thumbnail': r're:^https?://.*\.png$', 'description': '#1 記念すべき初回の出演者は声優の高木美佑さんとVtuber界の麻雀つよつよ先生こと千羽黒乃さん!\nMildom公式番組『麻雀番組』毎週月曜に生放送!\n麻雀アプリも使った視聴者対戦型麻雀バラエティ!', - 'uploader': '月曜!雀荘ほめちぎり' + 'uploader': '月曜!雀荘ほめちぎり【麻雀】' } } def _real_extract(self, url): channel_id, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, video_id) video_data = self._download_json( - self._VIDEO_INFO_BASE_URL + '?v_id=%s' % video_id, video_id) + self._VIDEO_INFO_BASE_URL % video_id, video_id) playback_data = video_data['body']['playback'] video_url = playback_data['source_url'] description = playback_data.get('video_intro') uploader = try_get(playback_data, lambda x: x['author_info']['login_name'], compat_str) title = playback_data.get('title') - if not title: - title = self._html_search_meta( - ['og:description', 'description'], - webpage, 'title', default=None) thumbnail = playback_data.get('video_pic') - if not thumbnail: - thumbnail = self._html_search_meta( - 'og:image', - webpage, 'thumbnail', default=None) + + if not title or not thumbnail: + webpage = self._download_webpage(url, video_id) + if not title: + title = self._html_search_meta( + ['og:description', 'description'], + webpage, 'title', default=None) + if not thumbnail: + thumbnail = self._og_search_thumbnail(webpage, default=None) return { 'id': video_id,