From d54c21303c3ec59a24cc5d12b418da130f4b3dd0 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sat, 28 Mar 2020 17:17:47 +0700 Subject: [PATCH 01/12] [zingmp3_vn] Add new extractor --- youtube_dl/extractor/extractors.py | 7 + youtube_dl/extractor/zingmp3_vn.py | 752 +++++++++++++++++++++++++++++ 2 files changed, 759 insertions(+) create mode 100644 youtube_dl/extractor/zingmp3_vn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ef803b8a78d..f3cfcb6c18a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1503,3 +1503,10 @@ from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE from .zype import ZypeIE + +from .zingmp3_vn import ( + Zingmp3_vnIE, + Zingmp3_vnPlaylistIE, + Zingmp3_vnChartIE, + Zingmp3_vnUserIE, +) \ No newline at end of file diff --git a/youtube_dl/extractor/zingmp3_vn.py b/youtube_dl/extractor/zingmp3_vn.py new file mode 100644 index 00000000000..73d8672af07 --- /dev/null +++ b/youtube_dl/extractor/zingmp3_vn.py @@ -0,0 +1,752 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import datetime +import hashlib +import hmac +import re +import time +from urllib.parse import quote, urljoin + +from .common import InfoExtractor +from ..utils import ( + url_or_none, + int_or_none, + try_get, + ExtractorError, + js_to_json +) + + +class Zingmp3_vnIE(InfoExtractor): + _VALID_URL = r'''(?x)^ + ((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P + (?:(zingmp3\.vn)| + (mp3\.zing\.vn)) + )\/(?Pbai-hat|video-clip|embed)\/(?P.*?)\/(?P.*?)\W + ''' + IE_NAME = 'zingmp3_vn' + IE_DESC = 'zingmp3.vn' + _TESTS = [{ + 'url': 'https://zingmp3.vn/bai-hat/Khoc-Cung-Em-Mr-Siro-Gray-Wind/ZWBI0DFI.html', + 'info_dict': { + 'id': 'ZWBI0DFI', + 'ext': 'mp3', + 'title': 'Khóc Cùng Em', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': str, + 'like_count': int, + 'comment_count': int, + 'view_count': int, + 'duration': 236, + }, + }, { + 'url': "https://zingmp3.vn/video-clip/Em-Gi-Oi-Jack-K-ICM/ZWAEFWIF.html", + 'info_dict': { + 'id': 'ZWAEFWIF', + 'ext': 'mp4', + 'title': "Em Gì Ơi", + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': str, + 'like_count': int, + 'comment_count': int, + 'view_count': int, + } + }, { + "url": 'https://zingmp3.vn/video-clip/Simple-Love-Obito-Seachains-Davis/ZWAFIZAZ.html', + 'info_dict': { + 'id': 'ZWAFIZAZ', + 'ext': 'mp4', + 'title': 'Simple Love', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': str, + 'like_count': int, + 'comment_count': int, + 'view_count': int + } + }, { + "url": 'https://zingmp3.vn/bai-hat/Marry-You-Bruno-Mars/ZWZE8I76.html', + 'info_dict': { + 'id': 'ZWZE8I76', + 'ext': 'mp3', + 'title': "Marry You", + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': str, + 'like_count': int, + 'comment_count': int, + 'view_count': int, + } + }, { + 'url': 'https://zingmp3.vn/bai-hat/Dap-An-Cua-Ban-A-Nhung/ZWB0OWIZ.html', + 'info_dict': { + 'id': 'ZWB0OWIZ', + 'ext': 'mp3', + 'title': "Đáp Án Của Bạn / 你的答案", + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': str, + 'like_count': int, + 'comment_count': int, + 'view_count': int, + } + }, { + 'url': 'https://zingmp3.vn/embed/song/ZWB06FEA?start=false', + 'info_dict': { + 'id': 'ZWB06FEA', + 'ext': 'mp3', + 'title': "Life Is Good", + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': str, + 'like_count': int, + 'comment_count': int, + 'view_count': int, + } + }, { + # This url can not listen or download if don't have account vip, use --cookies FILE to login + 'url': 'https://zingmp3.vn/bai-hat/The-Box-Roddy-Ricch/ZWB0ZF9Z.html', + "only_matching": True + }] + _default_host = "https://zingmp3.vn/" + + def _real_extract(self, url): + + mobj = re.search(self._VALID_URL, url) + if not mobj: + raise ExtractorError( + f'Zingmp3 no support for {url}.', expected=True + ) + video_id = mobj.group('id') + type = mobj.group('type') + slug = mobj.group('slug') + return self.extract_info_media(type, slug, video_id) + + def extract_info_media(self, type, slug, video_id): + formats = [] + name_api = '' + if type == 'bai-hat': + name_api = '/song/get-song-info' + elif type == 'embed': + if slug and slug == 'song': + name_api = '/song/get-song-info' + elif type == 'video-clip': + name_api = "/video/get-video-detail" + + api = self.get_api_with_signature(name_api=name_api, video_id=video_id) + info = self._download_json(url_or_request=api, video_id=video_id) + if type == 'video-clip' and not self._downloader.params.get("cookiefile"): + # TODO: Have api can get best quality like 1080p, 720p, default if dont have VIP just 480p is best quality. + # If requests are continuous without downtime, + # you may be blocked IP for a short period of time, + # So => time.sleep(2) + _api_video = """http://api.mp3.zing.vn/api/mobile/video/getvideoinfo?requestdata={"id":"%s"}""" + _json_video = self._download_json(url_or_request=_api_video % video_id, video_id=video_id) + info['data']['streaming']['data']['default'] = _json_video.get("source") + time.sleep(2) + + if self._downloader.params.get("cookiefile"): + # TODO: if have vip account can get 320 or lossless media, default is 128 + # TODO: Just support login with cookie file, use --cookies FILE. + api_download = self.get_api_with_signature(name_api='/download/get-streamings', video_id=video_id) + _json = self._download_json(url_or_request=api_download, video_id=video_id) + if _json and _json.get('msg') != 'Chỉ tài khoản VIP có thể tải bài hát này': + info['data']['streaming']['default'] = _json.get('data') + + def get_lyric(data): + """ + - Lyric is description and subtitle for media, use --get-description or --all-subs to get. + :param data: + :return: str + """ + lyric = data.get('lyric') or try_get(data, lambda x: x['lyrics'][0]['content']) + if url_or_none(lyric): + lyric = self._download_webpage(url_or_request=lyric, video_id=video_id) + if lyric: + return lyric + + def add_protocol(url): + if not url.startswith("http"): + return 'https:' + url + return url + + def convert_thumbnail(url): + if url: + return re.sub(r'w[0-9]+', 'w1080', url) + + if info.get('msg') == 'Success': + data = info.get('data') + title = data.get('title') + artists_names = data.get('artists_names') + thumbnail = convert_thumbnail(data.get('thumbnail')) + lyric = get_lyric(data) + like_count = int_or_none(data.get('like')) + comment_count = int_or_none(data.get('total_comment')) + view_count = int_or_none(data.get('listen')) + duration = int_or_none(data.get('duration')) + timestamp = int_or_none(data.get('timestamp')) + artists = [{ + "id": i.get('id'), + "name": i.get('name'), + 'link': urljoin(self._default_host, i.get('link')), + 'thumbnail': convert_thumbnail(i.get('thumbnail')), + 'follow_count': int_or_none(i.get('follow')) + } for i in data.get('artists', [])] + created_at = int_or_none(data.get('created_at')) + released_at = int_or_none(data.get('released_at')) + + streaming = data.get('streaming') + + if streaming.get('msg'): + if type == 'video-clip': + stream_data = streaming.get('data', dict) + for protocol, stream in stream_data.items(): + if protocol == 'default': + protocol = 'http' + for quality, url in stream.items(): + if url: + if protocol == 'hls': + m3u8_url = add_protocol(url) + m3u8_doc = self._download_webpage(m3u8_url, video_id) + m3u8 = self._parse_m3u8_formats(m3u8_doc, m3u8_url) + for i in m3u8: + i['ext'] = "mp4" + formats.extend(m3u8) + elif protocol == 'http': + formats.append({ + 'url': add_protocol(url), + 'ext': 'mp4', + 'protocol': protocol, + 'height': int_or_none(quality) or int_or_none(quality[:-1]) + }) + formats = sorted(formats, key=lambda x: x['height']) + else: + if streaming.get('msg') != "Success": + self.to_screen( + f" - {self.IE_NAME} requires authentication.\n" + f"\t\t- Because This media need VIP account to listen or watch.\n" + f"\t\t- You may want to use --cookies.\n\n" + ) + return + default = streaming.get('default') + for quality, url in default.items(): + if url: + if quality == 'lossless': + formats.append({ + 'url': add_protocol(url), + 'ext': 'flac', + 'protocol': 'http' + }) + else: + formats.append({ + 'url': add_protocol(url), + 'ext': 'mp3', + 'protocol': 'http' + }) + return { + 'id': video_id, + 'title': title, + 'artists_names': artists_names, + 'artists': artists, + 'thumbnail': thumbnail, + 'description': lyric, + "subtitles": { + "lyric": [ + { + "ext": "txt", + "data": lyric + } + ], + }, + 'created_at': created_at, + 'released_at': released_at, + 'like_count': like_count, + 'comment_count': comment_count, + 'view_count': view_count, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats + } + + def get_api_with_signature(self, name_api, video_id='', alias='', type='', new_release=False): + """ + - The api of this site has 1 param named sig => signature + - It uses the hash function of the variables ctime, id, and name_api. + - Sone api don't need id, just need ctime and name_api, + :param name_api: + :param video_id: + :param type: + :param new_release: + :return: api + """ + API_KEY = '38e8643fb0dc04e8d65b99994d3dafff' + SECRET_KEY = b'10a01dcf33762d3a204cb96429918ff6' + if not name_api: + return + + def get_hash256(string): + return hashlib.sha256(string.encode('utf-8')).hexdigest() + + def get_hmac512(string): + return hmac.new(SECRET_KEY, string.encode('utf-8'), hashlib.sha512).hexdigest() + + def get_request_path(data): + def mapping(key, value): + return quote(key) + "=" + quote(value) + + data = [mapping(k, v) for k, v in data.items()] + data = "&".join(data) + return data + + def get_api_by_id(id): + url = f"https://zingmp3.vn/api{name_api}?id={id}&" + time = str(int(datetime.datetime.now().timestamp())) + sha256 = get_hash256(f"ctime={time}id={id}") + + data = { + 'ctime': time, + 'api_key': API_KEY, + 'sig': get_hmac512(f"{name_api}{sha256}") + } + return url + get_request_path(data) + + def get_api_chart(type): + url = f"https://zingmp3.vn/api{name_api}?type={type}&" + time = str(int(datetime.datetime.now().timestamp())) + sha256 = get_hash256(f"ctime={time}") + + data = { + 'ctime': time, + 'api_key': API_KEY, + 'sig': get_hmac512(f"{name_api}{sha256}") + } + return url + get_request_path(data) + + def get_api_new_release(): + url = f"https://zingmp3.vn/api{name_api}?" + time = str(int(datetime.datetime.now().timestamp())) + sha256 = get_hash256(f"ctime={time}") + + data = { + 'ctime': time, + 'api_key': API_KEY, + 'sig': get_hmac512(f"{name_api}{sha256}") + } + return url + get_request_path(data) + + def get_api_download(id): + url = f"https://download.zingmp3.vn/api{name_api}?id={id}&" + time = str(int(datetime.datetime.now().timestamp())) + sha256 = get_hash256(f"ctime={time}id={id}") + + data = { + 'ctime': time, + 'api_key': API_KEY, + 'sig': get_hmac512(f"{name_api}{sha256}") + } + return url + get_request_path(data) + + def get_api_info_alias(alias): + url = f"https://zingmp3.vn/api{name_api}?alias={alias}&" + time = str(int(datetime.datetime.now().timestamp())) + sha256 = get_hash256(f"ctime={time}") + + data = { + 'ctime': time, + 'api_key': API_KEY, + 'sig': get_hmac512(f"{name_api}{sha256}") + } + return url + get_request_path(data) + + if 'download' in name_api: + return get_api_download(id=video_id) + if alias: + return get_api_info_alias(alias) + if video_id: + return get_api_by_id(video_id) + if type: + return get_api_chart(type) + if new_release: + return get_api_new_release() + return + + +class Zingmp3_vnPlaylistIE(Zingmp3_vnIE): + IE_NAME = "zingmp3_vn:playlist" + + _VALID_URL = r'''(?x)^ + ((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P + (?:(zingmp3\.vn)| + (mp3\.zing\.vn)) + )\/(?Palbum|playlist)\/(?P.*?)\/(?P.*?)\W + ''' + + _TESTS = [ + { + 'url': 'https://zingmp3.vn/album/Top-100-Nhac-Hoa-Hay-NhatVarious-Artists/ZWZB96EI.html', + "info_dict": { + 'id': "ZWZB96EI", + 'title': 'Top 100 Nhạc Hoa Hay Nhất' + }, + "playlist_count": 100 + }, + { + "url": "https://zingmp3.vn/album/Chi-Co-The-La-Mr-Siro-Mr-Siro/ZBWIA9DI.html", + "info_dict": { + "id": "ZBWIA9DI", + "title": 'Chỉ Có Thể Là Mr Siro' + }, + "playlist_mincount": 10 + }, + { + 'url': 'https://zingmp3.vn/playlist/Sofm-s-playlist/IWE606EA.html', + "info_dict": { + 'id': "IWE606EA", + 'title': "Sofm's playlist" + }, + "playlist_count": 69 + }, + { + "url": "https://zingmp3.vn/album/Top-100-Pop-Au-My-Hay-Nhat-Various-Artists/ZWZB96AB.html", + "info_dict": { + "id": "ZWZB96AB", + "title": "Top 100 Pop Âu Mỹ Hay Nhất" + }, + "playlist_count": 100, + } + ] + name_api = '/playlist/get-playlist-detail' + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + playlist_id = mobj.group('playlist_id') + return self._extract_playlist(id_playlist=playlist_id) + + def _extract_playlist(self, id_playlist): + api = self.get_api_with_signature(name_api=self.name_api, video_id=id_playlist) + info = self._download_json(url_or_request=api, video_id=id_playlist) + title_playlist = try_get(info, lambda x: x['data']['title']) + items = try_get(info, lambda x: x['data']['song']['items']) + entries = [] + for item in items: + url = urljoin(self._default_host, item.get('link')) + video_id = item.get('id') + entry = self.url_result( + url=url, + ie=Zingmp3_vnIE.ie_key(), + video_id=video_id + ) + entries.append(entry) + + return { + '_type': 'playlist', + 'id': id_playlist, + 'title': title_playlist, + 'entries': entries, + } + + +class Zingmp3_vnUserIE(Zingmp3_vnIE): + _VALID_URL = r'''(?x)^ + ((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P + (?:(zingmp3\.vn)| + (mp3\.zing\.vn)) + )\/(?Pnghe-si\/|) + (?P.*?) + (?:$|\/) + (?: + (?P.*?)\/(?P.*?\.) + | + (?P.*?$) + ) + ''' + IE_NAME = "zingmp3_vn:user" + _TESTS = [ + { + "url": "https://zingmp3.vn/Mr-Siro", + "info_dict": { + "id": "IWZ98609", + "title": "Mr-Siro-bai-hat" + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/onlyc", + "info_dict": { + "id": "IWZ9ZED8", + "title": 'onlyc-bai-hat' + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/nghe-si/Huong-Giang-Idol", + "info_dict": { + "id": "IWZ9CUWA", + "title": "Huong-Giang-Idol-bai-hat" + }, + "playlist_mincount": 5 + }, + { + 'url': "https://zingmp3.vn/nghe-si/Huong-Giang-Idol/bai-hat", + "info_dict": { + "id": "IWZ9CUWA", + "title": "Huong-Giang-Idol-bai-hat", + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/nghe-si/Huong-Giang-Idol/album", + "info_dict": { + "id": "IWZ9CUWA", + "title": "Huong-Giang-Idol-album", + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/nghe-si/Huong-Giang-Idol/video", + "info_dict": { + "id": "IWZ9CUWA", + "title": "Huong-Giang-Idol-video" + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/Mr-Siro/bai-hat", + "info_dict": { + "id": "IWZ98609", + "title": "Mr-Siro-bai-hat", + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/Mr-Siro/playlist", + "info_dict": { + "id": "IWZ98609", + "title": "Mr-Siro-playlist", + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/Mr-Siro/video", + "info_dict": { + "id": "IWZ98609", + "title": "Mr-Siro-video", + }, + "playlist_mincount": 5 + }, + { + "url": "https://zingmp3.vn/chu-de/Acoustic/IWZ977C8.html", + "info_dict": { + "id": "IWZ977C8", + "title": "Acoustic", + }, + "playlist_mincount": 3 + } + ] + list_name_api_user = { + 'bai-hat': "/song/get-list", + "playlist": "/playlist/get-list", + "album": "/playlist/get-list", + "video": "/video/get-list", + } + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + name = mobj.group('name') + slug_name = mobj.group('slug_name') + nghe_si = mobj.group('nghe_si') + if not slug_name: + slug_name = "bai-hat" + + name_api = self.list_name_api_user.get(slug_name) or None + self.id_artist = None + if nghe_si: + webpage = self._download_webpage(url_or_request=f"https://mp3.zing.vn/nghe-si/{name}", video_id=name) + self.id_artist = self._search_regex(r'''(?x) + \.*?)\" + \s+data-type=\"(?P.*?)\" + \s+data-name=\"(?P.*?)\".*?\> + ''', webpage, "artist id", group="id_artist") + else: + api = self.get_api_with_signature(name_api="/oa/get-artist-info", alias=name) + info = self._download_json(url_or_request=api, video_id=name) + if info.get('msg') == 'Success': + self.id_artist = try_get(info, lambda x: x['data']['artist_id']) + + if self.id_artist: + self.api = self.get_api_with_signature(name_api=name_api, video_id=self.id_artist) + return self.playlist_result( + entries=self._entries(), + playlist_id=self.id_artist, + playlist_title=f"{name}-{slug_name}" + ) + elif name == 'chu-de': + self.IE_NAME = "zingmp3_vn:chu-de" + name_chu_de = mobj.group('name_chu_de') + self.id_chu_de = mobj.group('id_chu_de') + self.api = self.get_api_with_signature(name_api="/topic/get-detail", video_id=self.id_chu_de) + return self.playlist_result( + entries=self._entries_for_chu_de(), + playlist_id=self.id_chu_de, + playlist_title=name_chu_de, + ) + + def _entries_for_chu_de(self): + info = self._download_json(url_or_request=self.api, video_id=self.id_chu_de) + if info.get('msg') != "Success": + return + items = try_get(info, lambda x: x['data']['playlist']['items']) + for item in items: + url = urljoin(self._default_host, item.get('link')) + media_id = item.get('id') + if 'album' in url or 'playlist' in url: + name_api = '/playlist/get-playlist-detail' + api = self.get_api_with_signature(name_api=name_api, video_id=media_id) + info_playlist = self._download_json(url_or_request=api, video_id=media_id) + items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items']) + for item_pl in items_playlist: + url = urljoin(self._default_host, item_pl.get('link')) + video_id = item_pl.get('id') + yield self.url_result( + url=url, + ie=Zingmp3_vnIE.ie_key(), + video_id=video_id + ) + + def _entries(self): + start = 0 + count = 30 + while True: + info = self._download_json(url_or_request=self.api, video_id=self.id_artist, query={ + 'type': 'artist', + 'start': start, + 'count': count, + 'sort': 'hot' + }) + if info.get('msg').lower() != "success": + break + items = try_get(info, lambda x: x['data']['items']) + for item in items: + url = urljoin(self._default_host, item.get('link')) + media_id = item.get('id') + if 'album' in url or 'playlist' in url: + name_api = '/playlist/get-playlist-detail' + api = self.get_api_with_signature(name_api=name_api, video_id=media_id) + info_playlist = self._download_json(url_or_request=api, video_id=media_id) + items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items']) + for item_pl in items_playlist: + url = urljoin(self._default_host, item_pl.get('link')) + video_id = item_pl.get('id') + yield self.url_result( + url=url, + ie=Zingmp3_vnIE.ie_key(), + video_id=video_id + ) + else: + yield self.url_result( + url=url, + ie=Zingmp3_vnIE.ie_key(), + video_id=media_id + ) + total = int_or_none(try_get(info, lambda x: x['data']['total'])) + start += count + + if total <= start: + break + + +class Zingmp3_vnChartIE(Zingmp3_vnIE): + IE_NAME = "zingmp3_vn:#zingchart" + _VALID_URL = r'''(?x)^ + ((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P + (?:(zingmp3\.vn)| + (mp3\.zing\.vn)) + )\/(?Pzing-chart-tuan|zing-chart|top-new-release)\/ + (?P.*?)(\.|\/)(?P.*?\.)? + ''' + _TESTS = [ + { + "url": "https://zingmp3.vn/zing-chart/bai-hat.html", + "info_dict": { + "title": "zing-chart-bai-hat", + }, + "playlist_count": 100 + }, + { + "url": "https://zingmp3.vn/zing-chart/video.html", + "info_dict": { + "title": "zing-chart-video", + }, + "playlist_count": 100 + }, + { + "url": "https://zingmp3.vn/zing-chart-tuan/bai-hat-US-UK/IWZ9Z0BW.html", + "info_dict": { + "title": "zing-chart-tuan-bai-hat-US-UK" + }, + "playlist_mincount": 20 + }, + { + "url": "https://zingmp3.vn/zing-chart-tuan/video-Kpop/IWZ9Z0BZ.html", + "info_dict": { + "title": "zing-chart-tuan-video-Kpop" + }, + "playlist_mincount": 20 + }, + { + "url": "https://zingmp3.vn/top-new-release/index.html", + "info_dict": { + "title": "top-new-release-index" + }, + "playlist_count": 100 + } + ] + list_name_api = { + 'zing-chart': { + 'name': '/chart-realtime/get-detail', + 'bai-hat': 'song', + 'index': 'song', + 'video': 'video', + }, + 'zing-chart-tuan': { + 'name': '/chart/get-chart', + }, + 'top-new-release': { + 'name': '/chart/get-chart-new-release' + } + } + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + name = mobj.group('name') + slug_name = mobj.group('slug_name') + + if name == 'zing-chart': + api = self.get_api_with_signature( + name_api=self.list_name_api.get(name).get('name'), + type=self.list_name_api.get(name).get(slug_name) + ) + elif name == 'zing-chart-tuan': + api = self.get_api_with_signature( + name_api=self.list_name_api.get(name).get('name'), + video_id=mobj.group('id_name') + ) + else: + api = self.get_api_with_signature( + name_api=self.list_name_api.get(name).get('name'), + new_release=True + ) + webpage = self._download_webpage(url_or_request=api, video_id=name) + info = self._parse_json(webpage, name, transform_source=js_to_json) + return self.playlist_result( + entries=self._entries(try_get(info, lambda x: x['data']['items'])), + playlist_title=f"{name}-{slug_name}" + ) + + def _entries(self, items): + for item in items: + url = urljoin(self._default_host, item.get('link')) + video_id = item.get('id') + yield self.url_result(url, ie=Zingmp3_vnIE.ie_key(), video_id=video_id) From 13f9633e6b16f00f2508a7e0d62c8047911785a5 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sat, 28 Mar 2020 17:33:17 +0700 Subject: [PATCH 02/12] [zingmp3_vn] Add new extractor --- youtube_dl/extractor/zingmp3_vn.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/youtube_dl/extractor/zingmp3_vn.py b/youtube_dl/extractor/zingmp3_vn.py index 73d8672af07..0258be522db 100644 --- a/youtube_dl/extractor/zingmp3_vn.py +++ b/youtube_dl/extractor/zingmp3_vn.py @@ -13,7 +13,6 @@ url_or_none, int_or_none, try_get, - ExtractorError, js_to_json ) @@ -111,10 +110,6 @@ class Zingmp3_vnIE(InfoExtractor): def _real_extract(self, url): mobj = re.search(self._VALID_URL, url) - if not mobj: - raise ExtractorError( - f'Zingmp3 no support for {url}.', expected=True - ) video_id = mobj.group('id') type = mobj.group('type') slug = mobj.group('slug') From 189186a9a6781cc907ff883cd8487a92c36a5535 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sat, 28 Mar 2020 18:11:57 +0700 Subject: [PATCH 03/12] [zingmp3_vn] Add new extractor --- youtube_dl/extractor/zingmp3_vn.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/zingmp3_vn.py b/youtube_dl/extractor/zingmp3_vn.py index 0258be522db..657e4ab90a1 100644 --- a/youtube_dl/extractor/zingmp3_vn.py +++ b/youtube_dl/extractor/zingmp3_vn.py @@ -108,13 +108,17 @@ class Zingmp3_vnIE(InfoExtractor): _default_host = "https://zingmp3.vn/" def _real_extract(self, url): - + self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) video_id = mobj.group('id') type = mobj.group('type') slug = mobj.group('slug') return self.extract_info_media(type, slug, video_id) + def convert_oldDomain_to_newDomain(self,url): + if 'mp3.zing.vn' in url: + url = url.replace('mp3.zing.vn','zingmp3.vn') + return url def extract_info_media(self, type, slug, video_id): formats = [] name_api = '' @@ -412,6 +416,7 @@ class Zingmp3_vnPlaylistIE(Zingmp3_vnIE): name_api = '/playlist/get-playlist-detail' def _real_extract(self, url): + self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) playlist_id = mobj.group('playlist_id') return self._extract_playlist(id_playlist=playlist_id) @@ -546,6 +551,7 @@ class Zingmp3_vnUserIE(Zingmp3_vnIE): } def _real_extract(self, url): + self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) name = mobj.group('name') slug_name = mobj.group('slug_name') @@ -714,6 +720,7 @@ class Zingmp3_vnChartIE(Zingmp3_vnIE): } def _real_extract(self, url): + self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) name = mobj.group('name') slug_name = mobj.group('slug_name') From 1eccbbbb3f4ad853cf70f7ad2cfa6532250f493c Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sat, 28 Mar 2020 21:08:00 +0700 Subject: [PATCH 04/12] [zingmp3_vn] Add new extractor --- youtube_dl/extractor/zingmp3_vn.py | 132 ++++++++++++++--------------- 1 file changed, 63 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/zingmp3_vn.py b/youtube_dl/extractor/zingmp3_vn.py index 657e4ab90a1..bee2fb1691a 100644 --- a/youtube_dl/extractor/zingmp3_vn.py +++ b/youtube_dl/extractor/zingmp3_vn.py @@ -21,8 +21,7 @@ class Zingmp3_vnIE(InfoExtractor): _VALID_URL = r'''(?x)^ ((http[s]?|fpt):)\/?\/(www\.|m\.|) (?P - (?:(zingmp3\.vn)| - (mp3\.zing\.vn)) + (zingmp3\.vn) )\/(?Pbai-hat|video-clip|embed)\/(?P.*?)\/(?P.*?)\W ''' IE_NAME = 'zingmp3_vn' @@ -108,31 +107,26 @@ class Zingmp3_vnIE(InfoExtractor): _default_host = "https://zingmp3.vn/" def _real_extract(self, url): - self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) video_id = mobj.group('id') - type = mobj.group('type') + _type = mobj.group('type') slug = mobj.group('slug') - return self.extract_info_media(type, slug, video_id) + return self.extract_info_media(_type, slug, video_id) - def convert_oldDomain_to_newDomain(self,url): - if 'mp3.zing.vn' in url: - url = url.replace('mp3.zing.vn','zingmp3.vn') - return url - def extract_info_media(self, type, slug, video_id): + def extract_info_media(self, _type, slug, video_id): formats = [] name_api = '' - if type == 'bai-hat': + if _type == 'bai-hat': name_api = '/song/get-song-info' - elif type == 'embed': + elif _type == 'embed': if slug and slug == 'song': name_api = '/song/get-song-info' - elif type == 'video-clip': + elif _type == 'video-clip': name_api = "/video/get-video-detail" api = self.get_api_with_signature(name_api=name_api, video_id=video_id) info = self._download_json(url_or_request=api, video_id=video_id) - if type == 'video-clip' and not self._downloader.params.get("cookiefile"): + if _type == 'video-clip' and not self._downloader.params.get("cookiefile"): # TODO: Have api can get best quality like 1080p, 720p, default if dont have VIP just 480p is best quality. # If requests are continuous without downtime, # you may be blocked IP for a short period of time, @@ -195,7 +189,7 @@ def convert_thumbnail(url): streaming = data.get('streaming') if streaming.get('msg'): - if type == 'video-clip': + if _type == 'video-clip': stream_data = streaming.get('data', dict) for protocol, stream in stream_data.items(): if protocol == 'default': @@ -220,9 +214,9 @@ def convert_thumbnail(url): else: if streaming.get('msg') != "Success": self.to_screen( - f" - {self.IE_NAME} requires authentication.\n" - f"\t\t- Because This media need VIP account to listen or watch.\n" - f"\t\t- You may want to use --cookies.\n\n" + "Zingmp3_vn requires authentication." + "Because This media need VIP account to listen or watch." + "You may want to use --cookies FILE." ) return default = streaming.get('default') @@ -265,11 +259,13 @@ def convert_thumbnail(url): 'formats': formats } - def get_api_with_signature(self, name_api, video_id='', alias='', type='', new_release=False): + def get_api_with_signature(self, name_api, video_id='', alias='', _type='', new_release=False): """ - The api of this site has 1 param named sig => signature - It uses the hash function of the variables ctime, id, and name_api. - Sone api don't need id, just need ctime and name_api, + :param _type: + :param alias: :param name_api: :param video_id: :param type: @@ -280,6 +276,7 @@ def get_api_with_signature(self, name_api, video_id='', alias='', type='', new_r SECRET_KEY = b'10a01dcf33762d3a204cb96429918ff6' if not name_api: return + _time = str(int(datetime.datetime.now().timestamp())) def get_hash256(string): return hashlib.sha256(string.encode('utf-8')).hexdigest() @@ -295,74 +292,69 @@ def mapping(key, value): data = "&".join(data) return data - def get_api_by_id(id): - url = f"https://zingmp3.vn/api{name_api}?id={id}&" - time = str(int(datetime.datetime.now().timestamp())) - sha256 = get_hash256(f"ctime={time}id={id}") + def get_api_by_id(_id): + url = r"https://zingmp3.vn/api%s?id=%s&" % (name_api, _id) + sha256 = get_hash256(r"ctime=%sid=%s" % (_time, _id)) data = { - 'ctime': time, + 'ctime': _time, 'api_key': API_KEY, - 'sig': get_hmac512(f"{name_api}{sha256}") + 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } return url + get_request_path(data) - def get_api_chart(type): - url = f"https://zingmp3.vn/api{name_api}?type={type}&" - time = str(int(datetime.datetime.now().timestamp())) - sha256 = get_hash256(f"ctime={time}") + def get_api_chart(_type): + url = r"https://zingmp3.vn/api%s?type=%s&" % (name_api, _type) + sha256 = get_hash256(r"ctime=%s" % (_time)) data = { - 'ctime': time, + 'ctime': _time, 'api_key': API_KEY, - 'sig': get_hmac512(f"{name_api}{sha256}") + 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } return url + get_request_path(data) def get_api_new_release(): - url = f"https://zingmp3.vn/api{name_api}?" - time = str(int(datetime.datetime.now().timestamp())) - sha256 = get_hash256(f"ctime={time}") + url = r"https://zingmp3.vn/api%s?" % (name_api) + sha256 = get_hash256(r"ctime=%s" % (_time)) data = { - 'ctime': time, + 'ctime': _time, 'api_key': API_KEY, - 'sig': get_hmac512(f"{name_api}{sha256}") + 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } return url + get_request_path(data) - def get_api_download(id): - url = f"https://download.zingmp3.vn/api{name_api}?id={id}&" - time = str(int(datetime.datetime.now().timestamp())) - sha256 = get_hash256(f"ctime={time}id={id}") + def get_api_download(_id): + url = r"https://download.zingmp3.vn/api%s?id=%s&" % (name_api, _id) + sha256 = get_hash256(r"ctime=%sid=%s" % (_time, _id)) data = { - 'ctime': time, + 'ctime': _time, 'api_key': API_KEY, - 'sig': get_hmac512(f"{name_api}{sha256}") + 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } return url + get_request_path(data) def get_api_info_alias(alias): - url = f"https://zingmp3.vn/api{name_api}?alias={alias}&" - time = str(int(datetime.datetime.now().timestamp())) - sha256 = get_hash256(f"ctime={time}") + url = r"https://zingmp3.vn/api%s?alias=%s&" % (name_api, alias) + sha256 = get_hash256(r"ctime=%s" % (_time)) data = { - 'ctime': time, + 'ctime': _time, 'api_key': API_KEY, - 'sig': get_hmac512(f"{name_api}{sha256}") + 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } return url + get_request_path(data) if 'download' in name_api: - return get_api_download(id=video_id) + return get_api_download(_id=video_id) if alias: return get_api_info_alias(alias) if video_id: return get_api_by_id(video_id) - if type: - return get_api_chart(type) + if _type: + return get_api_chart(_type) if new_release: return get_api_new_release() return @@ -374,8 +366,7 @@ class Zingmp3_vnPlaylistIE(Zingmp3_vnIE): _VALID_URL = r'''(?x)^ ((http[s]?|fpt):)\/?\/(www\.|m\.|) (?P - (?:(zingmp3\.vn)| - (mp3\.zing\.vn)) + (zingmp3\.vn) )\/(?Palbum|playlist)\/(?P.*?)\/(?P.*?)\W ''' @@ -416,7 +407,6 @@ class Zingmp3_vnPlaylistIE(Zingmp3_vnIE): name_api = '/playlist/get-playlist-detail' def _real_extract(self, url): - self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) playlist_id = mobj.group('playlist_id') return self._extract_playlist(id_playlist=playlist_id) @@ -449,13 +439,12 @@ class Zingmp3_vnUserIE(Zingmp3_vnIE): _VALID_URL = r'''(?x)^ ((http[s]?|fpt):)\/?\/(www\.|m\.|) (?P - (?:(zingmp3\.vn)| - (mp3\.zing\.vn)) + (zingmp3\.vn) )\/(?Pnghe-si\/|) (?P.*?) (?:$|\/) (?: - (?P.*?)\/(?P.*?\.) + (?P.*?)\/(?P.*?)\. | (?P.*?$) ) @@ -551,7 +540,6 @@ class Zingmp3_vnUserIE(Zingmp3_vnIE): } def _real_extract(self, url): - self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) name = mobj.group('name') slug_name = mobj.group('slug_name') @@ -562,7 +550,7 @@ def _real_extract(self, url): name_api = self.list_name_api_user.get(slug_name) or None self.id_artist = None if nghe_si: - webpage = self._download_webpage(url_or_request=f"https://mp3.zing.vn/nghe-si/{name}", video_id=name) + webpage = self._download_webpage(url_or_request=r"https://mp3.zing.vn/nghe-si/%s" % (name), video_id=name) self.id_artist = self._search_regex(r'''(?x) \.*?)\" @@ -580,7 +568,7 @@ def _real_extract(self, url): return self.playlist_result( entries=self._entries(), playlist_id=self.id_artist, - playlist_title=f"{name}-{slug_name}" + playlist_title=r"%s-%s" % (name, slug_name) ) elif name == 'chu-de': self.IE_NAME = "zingmp3_vn:chu-de" @@ -659,11 +647,11 @@ def _entries(self): class Zingmp3_vnChartIE(Zingmp3_vnIE): IE_NAME = "zingmp3_vn:#zingchart" + _VALID_URL = r'''(?x)^ ((http[s]?|fpt):)\/?\/(www\.|m\.|) (?P - (?:(zingmp3\.vn)| - (mp3\.zing\.vn)) + (zingmp3\.vn) )\/(?Pzing-chart-tuan|zing-chart|top-new-release)\/ (?P.*?)(\.|\/)(?P.*?\.)? ''' @@ -720,7 +708,6 @@ class Zingmp3_vnChartIE(Zingmp3_vnIE): } def _real_extract(self, url): - self.convert_oldDomain_to_newDomain(url) mobj = re.search(self._VALID_URL, url) name = mobj.group('name') slug_name = mobj.group('slug_name') @@ -728,7 +715,7 @@ def _real_extract(self, url): if name == 'zing-chart': api = self.get_api_with_signature( name_api=self.list_name_api.get(name).get('name'), - type=self.list_name_api.get(name).get(slug_name) + _type=self.list_name_api.get(name).get(slug_name) ) elif name == 'zing-chart-tuan': api = self.get_api_with_signature( @@ -740,12 +727,19 @@ def _real_extract(self, url): name_api=self.list_name_api.get(name).get('name'), new_release=True ) - webpage = self._download_webpage(url_or_request=api, video_id=name) - info = self._parse_json(webpage, name, transform_source=js_to_json) - return self.playlist_result( - entries=self._entries(try_get(info, lambda x: x['data']['items'])), - playlist_title=f"{name}-{slug_name}" - ) + count = 0 + info = None + while count != 3: + webpage = self._download_webpage(url_or_request=api, video_id=name) + if webpage: + info = self._parse_json(webpage, name, transform_source=js_to_json) + break + count += 1 + if info: + return self.playlist_result( + entries=self._entries(try_get(info, lambda x: x['data']['items'])), + playlist_title=r"%s-%s" % (name, slug_name) + ) def _entries(self, items): for item in items: From 7efe1860e9b2c6b8de2f1408491a16c555e89032 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sun, 29 Mar 2020 00:49:11 +0700 Subject: [PATCH 05/12] [zingmp3_vn] Add new extractor --- youtube_dl/extractor/zingmp3_vn.py | 53 +++++++++++++----------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/zingmp3_vn.py b/youtube_dl/extractor/zingmp3_vn.py index bee2fb1691a..5fd79ba8527 100644 --- a/youtube_dl/extractor/zingmp3_vn.py +++ b/youtube_dl/extractor/zingmp3_vn.py @@ -6,9 +6,12 @@ import hmac import re import time -from urllib.parse import quote, urljoin from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_urllib_parse +) from ..utils import ( url_or_none, int_or_none, @@ -179,7 +182,7 @@ def convert_thumbnail(url): artists = [{ "id": i.get('id'), "name": i.get('name'), - 'link': urljoin(self._default_host, i.get('link')), + 'link': compat_urllib_parse.urljoin(self._default_host, i.get('link')), 'thumbnail': convert_thumbnail(i.get('thumbnail')), 'follow_count': int_or_none(i.get('follow')) } for i in data.get('artists', [])] @@ -284,14 +287,6 @@ def get_hash256(string): def get_hmac512(string): return hmac.new(SECRET_KEY, string.encode('utf-8'), hashlib.sha512).hexdigest() - def get_request_path(data): - def mapping(key, value): - return quote(key) + "=" + quote(value) - - data = [mapping(k, v) for k, v in data.items()] - data = "&".join(data) - return data - def get_api_by_id(_id): url = r"https://zingmp3.vn/api%s?id=%s&" % (name_api, _id) sha256 = get_hash256(r"ctime=%sid=%s" % (_time, _id)) @@ -301,7 +296,7 @@ def get_api_by_id(_id): 'api_key': API_KEY, 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } - return url + get_request_path(data) + return url + compat_urllib_parse_urlencode(data) def get_api_chart(_type): url = r"https://zingmp3.vn/api%s?type=%s&" % (name_api, _type) @@ -312,7 +307,7 @@ def get_api_chart(_type): 'api_key': API_KEY, 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } - return url + get_request_path(data) + return url + compat_urllib_parse_urlencode(data) def get_api_new_release(): url = r"https://zingmp3.vn/api%s?" % (name_api) @@ -323,7 +318,7 @@ def get_api_new_release(): 'api_key': API_KEY, 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } - return url + get_request_path(data) + return url + compat_urllib_parse_urlencode(data) def get_api_download(_id): url = r"https://download.zingmp3.vn/api%s?id=%s&" % (name_api, _id) @@ -334,7 +329,7 @@ def get_api_download(_id): 'api_key': API_KEY, 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } - return url + get_request_path(data) + return url + compat_urllib_parse_urlencode(data) def get_api_info_alias(alias): url = r"https://zingmp3.vn/api%s?alias=%s&" % (name_api, alias) @@ -345,7 +340,7 @@ def get_api_info_alias(alias): 'api_key': API_KEY, 'sig': get_hmac512(r"%s%s" % (name_api, sha256)) } - return url + get_request_path(data) + return url + compat_urllib_parse_urlencode(data) if 'download' in name_api: return get_api_download(_id=video_id) @@ -418,7 +413,7 @@ def _extract_playlist(self, id_playlist): items = try_get(info, lambda x: x['data']['song']['items']) entries = [] for item in items: - url = urljoin(self._default_host, item.get('link')) + url = compat_urllib_parse.urljoin(self._default_host, item.get('link')) video_id = item.get('id') entry = self.url_result( url=url, @@ -442,12 +437,7 @@ class Zingmp3_vnUserIE(Zingmp3_vnIE): (zingmp3\.vn) )\/(?Pnghe-si\/|) (?P.*?) - (?:$|\/) - (?: - (?P.*?)\/(?P.*?)\. - | - (?P.*?$) - ) + (?:$|\/)(?P.*?$) ''' IE_NAME = "zingmp3_vn:user" _TESTS = [ @@ -530,7 +520,7 @@ class Zingmp3_vnUserIE(Zingmp3_vnIE): "title": "Acoustic", }, "playlist_mincount": 3 - } + }, ] list_name_api_user = { 'bai-hat': "/song/get-list", @@ -550,7 +540,7 @@ def _real_extract(self, url): name_api = self.list_name_api_user.get(slug_name) or None self.id_artist = None if nghe_si: - webpage = self._download_webpage(url_or_request=r"https://mp3.zing.vn/nghe-si/%s" % (name), video_id=name) + webpage = self._download_webpage(url_or_request=r"https://mp3.zing.vn/nghe-si/%s" % name, video_id=name) self.id_artist = self._search_regex(r'''(?x) \.*?)\" @@ -572,8 +562,9 @@ def _real_extract(self, url): ) elif name == 'chu-de': self.IE_NAME = "zingmp3_vn:chu-de" - name_chu_de = mobj.group('name_chu_de') - self.id_chu_de = mobj.group('id_chu_de') + rex = re.match(r"(?P.*)\/(?P.*?)\.", slug_name) + name_chu_de = rex.group('name_chu_de') + self.id_chu_de = rex.group('id_chu_de') self.api = self.get_api_with_signature(name_api="/topic/get-detail", video_id=self.id_chu_de) return self.playlist_result( entries=self._entries_for_chu_de(), @@ -587,7 +578,7 @@ def _entries_for_chu_de(self): return items = try_get(info, lambda x: x['data']['playlist']['items']) for item in items: - url = urljoin(self._default_host, item.get('link')) + url = compat_urllib_parse.urljoin(self._default_host, item.get('link')) media_id = item.get('id') if 'album' in url or 'playlist' in url: name_api = '/playlist/get-playlist-detail' @@ -595,7 +586,7 @@ def _entries_for_chu_de(self): info_playlist = self._download_json(url_or_request=api, video_id=media_id) items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items']) for item_pl in items_playlist: - url = urljoin(self._default_host, item_pl.get('link')) + url = compat_urllib_parse.urljoin(self._default_host, item_pl.get('link')) video_id = item_pl.get('id') yield self.url_result( url=url, @@ -617,7 +608,7 @@ def _entries(self): break items = try_get(info, lambda x: x['data']['items']) for item in items: - url = urljoin(self._default_host, item.get('link')) + url = compat_urllib_parse.urljoin(self._default_host, item.get('link')) media_id = item.get('id') if 'album' in url or 'playlist' in url: name_api = '/playlist/get-playlist-detail' @@ -625,7 +616,7 @@ def _entries(self): info_playlist = self._download_json(url_or_request=api, video_id=media_id) items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items']) for item_pl in items_playlist: - url = urljoin(self._default_host, item_pl.get('link')) + url = compat_urllib_parse.urljoin(self._default_host, item_pl.get('link')) video_id = item_pl.get('id') yield self.url_result( url=url, @@ -743,6 +734,6 @@ def _real_extract(self, url): def _entries(self, items): for item in items: - url = urljoin(self._default_host, item.get('link')) + url = compat_urllib_parse.urljoin(self._default_host, item.get('link')) video_id = item.get('id') yield self.url_result(url, ie=Zingmp3_vnIE.ie_key(), video_id=video_id) From 1ea535cd880181f3117d13eadcd8c6916f3548be Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sun, 29 Mar 2020 01:58:41 +0700 Subject: [PATCH 06/12] [zingmp3_vn] Add new extractor --- youtube_dl/extractor/zingmp3_vn.py | 275 +++++++++++++++++------------ 1 file changed, 162 insertions(+), 113 deletions(-) diff --git a/youtube_dl/extractor/zingmp3_vn.py b/youtube_dl/extractor/zingmp3_vn.py index 5fd79ba8527..aac1495f47f 100644 --- a/youtube_dl/extractor/zingmp3_vn.py +++ b/youtube_dl/extractor/zingmp3_vn.py @@ -97,13 +97,12 @@ class Zingmp3_vnIE(InfoExtractor): 'ext': 'mp3', 'title': "Life Is Good", 'thumbnail': r're:^https?://.*\.jpg$', - 'description': str, 'like_count': int, 'comment_count': int, 'view_count': int, } }, { - # This url can not listen or download if don't have account vip, use --cookies FILE to login + # This url can not listen if don't have account vip, use --cookies FILE to login 'url': 'https://zingmp3.vn/bai-hat/The-Box-Roddy-Ricch/ZWB0ZF9Z.html', "only_matching": True }] @@ -198,7 +197,7 @@ def convert_thumbnail(url): if protocol == 'default': protocol = 'http' for quality, url in stream.items(): - if url: + if url and url != "ERROR": if protocol == 'hls': m3u8_url = add_protocol(url) m3u8_doc = self._download_webpage(m3u8_url, video_id) @@ -300,7 +299,7 @@ def get_api_by_id(_id): def get_api_chart(_type): url = r"https://zingmp3.vn/api%s?type=%s&" % (name_api, _type) - sha256 = get_hash256(r"ctime=%s" % (_time)) + sha256 = get_hash256(r"ctime=%s" % _time) data = { 'ctime': _time, @@ -310,8 +309,8 @@ def get_api_chart(_type): return url + compat_urllib_parse_urlencode(data) def get_api_new_release(): - url = r"https://zingmp3.vn/api%s?" % (name_api) - sha256 = get_hash256(r"ctime=%s" % (_time)) + url = r"https://zingmp3.vn/api%s?" % name_api + sha256 = get_hash256(r"ctime=%s" % _time) data = { 'ctime': _time, @@ -362,7 +361,7 @@ class Zingmp3_vnPlaylistIE(Zingmp3_vnIE): ((http[s]?|fpt):)\/?\/(www\.|m\.|) (?P (zingmp3\.vn) - )\/(?Palbum|playlist)\/(?P.*?)\/(?P.*?)\W + )\/(?Palbum|playlist|chu-de)\/(?P.*?)\/(?P.*?)\W ''' _TESTS = [ @@ -397,17 +396,66 @@ class Zingmp3_vnPlaylistIE(Zingmp3_vnIE): "title": "Top 100 Pop Âu Mỹ Hay Nhất" }, "playlist_count": 100, + }, + { + "url": "https://zingmp3.vn/chu-de/Acoustic/IWZ977C8.html", + "info_dict": { + "id": "IWZ977C8", + "title": "Acoustic" + }, + "playlist_mincount": 3 + }, + { + "url": "https://zingmp3.vn/chu-de/Nhac-Khong-Loi/IWZ9ZIOI.html", + "info_dict": { + "id": "IWZ9ZIOI", + "title": "Nhac-Khong-Loi" + }, + "playlist_mincount": 10 } ] - name_api = '/playlist/get-playlist-detail' + name_api_album_or_playlist = '/playlist/get-playlist-detail' + name_api_topic = "/topic/get-detail" def _real_extract(self, url): mobj = re.search(self._VALID_URL, url) + _type = mobj.group('type') playlist_id = mobj.group('playlist_id') + slug = mobj.group('slug') + if _type == 'chu-de': + return self.playlist_result( + entries=self._entries_for_chu_de(id_chu_de=playlist_id), + playlist_id=playlist_id, + playlist_title=slug + ) return self._extract_playlist(id_playlist=playlist_id) + def _entries_for_chu_de(self, id_chu_de): + self.IE_NAME = "zingmp3_vn:chu-de" + api = self.get_api_with_signature(name_api=self.name_api_topic, video_id=id_chu_de) + info = self._download_json(url_or_request=api, video_id=id_chu_de) + if info.get('msg') != "Success": + return + items = try_get(info, lambda x: x['data']['playlist']['items']) + for item in items: + url = compat_urllib_parse.urljoin(self._default_host, item.get('link')) + media_id = item.get('id') + if 'album' in url or 'playlist' in url: + name_api = '/playlist/get-playlist-detail' + api = self.get_api_with_signature(name_api=name_api, video_id=media_id) + info_playlist = self._download_json(url_or_request=api, video_id=media_id) + items_playlist = try_get(info_playlist, lambda x: x['data']['song']['items']) + for item_pl in items_playlist: + url = compat_urllib_parse.urljoin(self._default_host, item_pl.get('link')) + video_id = item_pl.get('id') + yield self.url_result( + url=url, + ie=Zingmp3_vnIE.ie_key(), + video_id=video_id + ) + def _extract_playlist(self, id_playlist): - api = self.get_api_with_signature(name_api=self.name_api, video_id=id_playlist) + api = self.get_api_with_signature(name_api=self.name_api_album_or_playlist, video_id=id_playlist) info = self._download_json(url_or_request=api, video_id=id_playlist) title_playlist = try_get(info, lambda x: x['data']['title']) items = try_get(info, lambda x: x['data']['song']['items']) @@ -430,7 +478,110 @@ def _extract_playlist(self, id_playlist): } -class Zingmp3_vnUserIE(Zingmp3_vnIE): +class Zingmp3_vnChartIE(Zingmp3_vnIE): + IE_NAME = "zingmp3_vn:#zingchart" + + _VALID_URL = r'''(?x)^ + ((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P + (zingmp3\.vn) + )\/(?Pzing-chart-tuan|zing-chart|top-new-release)\/ + (?P.*?)(\.|\/)(?P.*?\.)? + ''' + _TESTS = [ + { + "url": "https://zingmp3.vn/zing-chart/bai-hat.html", + "info_dict": { + "title": "zing-chart-bai-hat", + }, + "playlist_count": 100 + }, + { + "url": "https://zingmp3.vn/zing-chart/video.html", + "info_dict": { + "title": "zing-chart-video", + }, + "playlist_count": 100 + }, + { + "url": "https://zingmp3.vn/zing-chart-tuan/bai-hat-US-UK/IWZ9Z0BW.html", + "info_dict": { + "title": "zing-chart-tuan-bai-hat-US-UK" + }, + "playlist_mincount": 20 + }, + { + "url": "https://zingmp3.vn/zing-chart-tuan/video-Kpop/IWZ9Z0BZ.html", + "info_dict": { + "title": "zing-chart-tuan-video-Kpop" + }, + "playlist_mincount": 20 + }, + { + "url": "https://zingmp3.vn/top-new-release/index.html", + "info_dict": { + "title": "top-new-release-index" + }, + "playlist_count": 100 + } + ] + list_name_api = { + 'zing-chart': { + 'name': '/chart-realtime/get-detail', + 'bai-hat': 'song', + 'index': 'song', + 'video': 'video', + }, + 'zing-chart-tuan': { + 'name': '/chart/get-chart', + }, + 'top-new-release': { + 'name': '/chart/get-chart-new-release' + } + } + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + name = mobj.group('name') + slug_name = mobj.group('slug_name') + + if name == 'zing-chart': + api = self.get_api_with_signature( + name_api=self.list_name_api.get(name).get('name'), + _type=self.list_name_api.get(name).get(slug_name) + ) + elif name == 'zing-chart-tuan': + api = self.get_api_with_signature( + name_api=self.list_name_api.get(name).get('name'), + video_id=mobj.group('id_name') + ) + else: + api = self.get_api_with_signature( + name_api=self.list_name_api.get(name).get('name'), + new_release=True + ) + count = 0 + info = None + while count != 3: + webpage = self._download_webpage(url_or_request=api, video_id=name) + if webpage: + info = self._parse_json(webpage, name, transform_source=js_to_json) + break + count += 1 + if info: + return self.playlist_result( + entries=self._entries(try_get(info, lambda x: x['data']['items'])), + playlist_title=r"%s-%s" % (name, slug_name) + ) + + def _entries(self, items): + for item in items: + url = compat_urllib_parse.urljoin(self._default_host, item.get('link')) + video_id = item.get('id') + yield self.url_result(url, ie=Zingmp3_vnIE.ie_key(), video_id=video_id) + + +r"""class Zingmp3_vnUserIE(Zingmp3_vnIE): _VALID_URL = r'''(?x)^ ((http[s]?|fpt):)\/?\/(www\.|m\.|) (?P @@ -634,106 +785,4 @@ def _entries(self): if total <= start: break - - -class Zingmp3_vnChartIE(Zingmp3_vnIE): - IE_NAME = "zingmp3_vn:#zingchart" - - _VALID_URL = r'''(?x)^ - ((http[s]?|fpt):)\/?\/(www\.|m\.|) - (?P - (zingmp3\.vn) - )\/(?Pzing-chart-tuan|zing-chart|top-new-release)\/ - (?P.*?)(\.|\/)(?P.*?\.)? - ''' - _TESTS = [ - { - "url": "https://zingmp3.vn/zing-chart/bai-hat.html", - "info_dict": { - "title": "zing-chart-bai-hat", - }, - "playlist_count": 100 - }, - { - "url": "https://zingmp3.vn/zing-chart/video.html", - "info_dict": { - "title": "zing-chart-video", - }, - "playlist_count": 100 - }, - { - "url": "https://zingmp3.vn/zing-chart-tuan/bai-hat-US-UK/IWZ9Z0BW.html", - "info_dict": { - "title": "zing-chart-tuan-bai-hat-US-UK" - }, - "playlist_mincount": 20 - }, - { - "url": "https://zingmp3.vn/zing-chart-tuan/video-Kpop/IWZ9Z0BZ.html", - "info_dict": { - "title": "zing-chart-tuan-video-Kpop" - }, - "playlist_mincount": 20 - }, - { - "url": "https://zingmp3.vn/top-new-release/index.html", - "info_dict": { - "title": "top-new-release-index" - }, - "playlist_count": 100 - } - ] - list_name_api = { - 'zing-chart': { - 'name': '/chart-realtime/get-detail', - 'bai-hat': 'song', - 'index': 'song', - 'video': 'video', - }, - 'zing-chart-tuan': { - 'name': '/chart/get-chart', - }, - 'top-new-release': { - 'name': '/chart/get-chart-new-release' - } - } - - def _real_extract(self, url): - mobj = re.search(self._VALID_URL, url) - name = mobj.group('name') - slug_name = mobj.group('slug_name') - - if name == 'zing-chart': - api = self.get_api_with_signature( - name_api=self.list_name_api.get(name).get('name'), - _type=self.list_name_api.get(name).get(slug_name) - ) - elif name == 'zing-chart-tuan': - api = self.get_api_with_signature( - name_api=self.list_name_api.get(name).get('name'), - video_id=mobj.group('id_name') - ) - else: - api = self.get_api_with_signature( - name_api=self.list_name_api.get(name).get('name'), - new_release=True - ) - count = 0 - info = None - while count != 3: - webpage = self._download_webpage(url_or_request=api, video_id=name) - if webpage: - info = self._parse_json(webpage, name, transform_source=js_to_json) - break - count += 1 - if info: - return self.playlist_result( - entries=self._entries(try_get(info, lambda x: x['data']['items'])), - playlist_title=r"%s-%s" % (name, slug_name) - ) - - def _entries(self, items): - for item in items: - url = compat_urllib_parse.urljoin(self._default_host, item.get('link')) - video_id = item.get('id') - yield self.url_result(url, ie=Zingmp3_vnIE.ie_key(), video_id=video_id) +""" From a49d4359b17ddabd8fa7aaa6edf2f4c5e2b5c221 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sun, 29 Mar 2020 01:59:15 +0700 Subject: [PATCH 07/12] [zingmp3_vn] Add new extractor --- youtube_dl/extractor/extractors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f3cfcb6c18a..3ad9dc3a3d9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1508,5 +1508,4 @@ Zingmp3_vnIE, Zingmp3_vnPlaylistIE, Zingmp3_vnChartIE, - Zingmp3_vnUserIE, ) \ No newline at end of file From cb2895076b066f581aae9516fc645509ddb23348 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Sun, 29 Mar 2020 14:46:57 +0700 Subject: [PATCH 08/12] Add extract User url --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/zingmp3_vn.py | 47 +++++------------------------- 2 files changed, 8 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3ad9dc3a3d9..f3cfcb6c18a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1508,4 +1508,5 @@ Zingmp3_vnIE, Zingmp3_vnPlaylistIE, Zingmp3_vnChartIE, + Zingmp3_vnUserIE, ) \ No newline at end of file diff --git a/youtube_dl/extractor/zingmp3_vn.py b/youtube_dl/extractor/zingmp3_vn.py index aac1495f47f..5cff7a74514 100644 --- a/youtube_dl/extractor/zingmp3_vn.py +++ b/youtube_dl/extractor/zingmp3_vn.py @@ -581,41 +581,17 @@ def _entries(self, items): yield self.url_result(url, ie=Zingmp3_vnIE.ie_key(), video_id=video_id) -r"""class Zingmp3_vnUserIE(Zingmp3_vnIE): +class Zingmp3_vnUserIE(Zingmp3_vnIE): _VALID_URL = r'''(?x)^ - ((http[s]?|fpt):)\/?\/(www\.|m\.|) - (?P - (zingmp3\.vn) - )\/(?Pnghe-si\/|) - (?P.*?) - (?:$|\/)(?P.*?$) + ((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P + (zingmp3\.vn) + )\/(?Pnghe-si\/|)(?P.*?) + (?:$|\/) + (?Pbai-hat|album|video|playlist)$ ''' IE_NAME = "zingmp3_vn:user" _TESTS = [ - { - "url": "https://zingmp3.vn/Mr-Siro", - "info_dict": { - "id": "IWZ98609", - "title": "Mr-Siro-bai-hat" - }, - "playlist_mincount": 5 - }, - { - "url": "https://zingmp3.vn/onlyc", - "info_dict": { - "id": "IWZ9ZED8", - "title": 'onlyc-bai-hat' - }, - "playlist_mincount": 5 - }, - { - "url": "https://zingmp3.vn/nghe-si/Huong-Giang-Idol", - "info_dict": { - "id": "IWZ9CUWA", - "title": "Huong-Giang-Idol-bai-hat" - }, - "playlist_mincount": 5 - }, { 'url': "https://zingmp3.vn/nghe-si/Huong-Giang-Idol/bai-hat", "info_dict": { @@ -664,14 +640,6 @@ def _entries(self, items): }, "playlist_mincount": 5 }, - { - "url": "https://zingmp3.vn/chu-de/Acoustic/IWZ977C8.html", - "info_dict": { - "id": "IWZ977C8", - "title": "Acoustic", - }, - "playlist_mincount": 3 - }, ] list_name_api_user = { 'bai-hat': "/song/get-list", @@ -785,4 +753,3 @@ def _entries(self): if total <= start: break -""" From 21d12c0be619fd539989d2b33e6a407fc0412ed8 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 Date: Mon, 30 Mar 2020 14:08:21 +0700 Subject: [PATCH 09/12] [axios] Add new extractor --- youtube_dl/extractor/axios.py | 137 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 8 ++ 2 files changed, 145 insertions(+) create mode 100644 youtube_dl/extractor/axios.py diff --git a/youtube_dl/extractor/axios.py b/youtube_dl/extractor/axios.py new file mode 100644 index 00000000000..5bdeafa9c4b --- /dev/null +++ b/youtube_dl/extractor/axios.py @@ -0,0 +1,137 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + try_get, + mimetype2ext, + clean_html +) + + +class AxiosIE(InfoExtractor): + IE_NAME = 'axios' + IE_DESC = 'www.axios.com' + _VALID_URL = r"""(?x)^ + ((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P + (www\.axios\.com) + )\/ + (?P.*?)\- + (?P[0-9a-z]{8}\-[0-9a-z]{4}-[a-z0-9]{4}.+?)\. + """ + __TESTS = [ + { + "url": r"https://www.axios.com/trump-coronavirus-restrictions-c3da2d28-b761-4b62-b6d6-734c059c6dba.html", + "info_dict": { + "id": "c3da2d28-b761-4b62-b6d6-734c059c6dba", + "title": '''Trump says he wants to "open" the country by Easter''', + "ext": "mp4", + "description": str, + 'thumbnails': [], + } + }, + { + "url": r"https://www.axios.com/coronavirus-texas-official-grandparents-die-172ca951-891c-44e7-a9ec-77c486e0c5c3.html", + "info_dict": { + "id": "172ca951-891c-44e7-a9ec-77c486e0c5c3", + "title": '''Texas Lt. Gov.: Grandparents would be willing to die to save the economy''', + "ext": "mp4", + "description": str, + 'thumbnails': [], + } + }, + { + "url": r"https://www.axios.com/cuomo-trump-mandatory-quarantine-panic-35ae54a1-0aa9-4a38-910d-647293002fc2.html", + "info_dict": { + "id": "35ae54a1-0aa9-4a38-910d-647293002fc2", + "title": '''Cuomo: Trump's mandatory quarantine comments "really panicked people"''', + "ext": "mp4", + "description": str, + 'thumbnails': [], + } + }, + { + "url": r"https://www.axios.com/coronavirus-louisiana-bel-edwards-ventilators-7810fc76-1825-41b2-8b22-f1cfc14e2ffe.html", + "info_dict": { + "id": "7810fc76-1825-41b2-8b22-f1cfc14e2ffe", + "title": '''Louisiana on track to exceed ventilator capacity this week, governor says''', + "ext": "mp4", + "description": str, + 'thumbnails': [], + } + }, + ] + api_jwplayer = r'http://content.jwplatform.com/v2/media/%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url_or_request=url, + video_id=video_id + ) + jwplayer_mobj = re.search( + r'''.+?)\".+?data-media-id=\"(?P.+?)\".+?\>\<\/amp-jwplayer>''', + webpage + ) + description = self._search_regex( + r'''(?P.*?)\<\/div\>''', + webpage, "Description", group="description" + ) + title = self._search_regex( + r'''(?P.*?)\<\/h1\>''', + webpage, "Title", group="title" + ) + description = clean_html(description) + # player_id = jwplayer_mobj.group("player_id") + media_id = jwplayer_mobj.group("media_id") + json_jwplayer = self._download_json( + url_or_request=self.api_jwplayer % media_id, + video_id=media_id, + ) + playlist = try_get(json_jwplayer, lambda x: x['playlist'][0]) + if playlist: + images = playlist.get('images') + thumbnails = [ + { + "url": img.get('src'), + "width": img.get('width') + } for img in images if img.get('src') + ] + sources = playlist.get('sources') or [] + formats = [] + for sour in sources: + if not sour: + continue + _type = sour.get('type') + ext = mimetype2ext(_type) + file = sour.get('file') + if ext == 'm3u8': + m3u8_doc = self._download_webpage(file, video_id=media_id) + formats.extend(self._parse_m3u8_formats(m3u8_doc, file)) + elif ext == 'mp4': + formats.append({ + "url": file, + "ext": ext, + "height": sour.get('height'), + "width": sour.get('width'), + 'protocol': 'http', + "label": sour.get("label") + }) + else: + formats.append({ + "url": file, + "ext": ext, + 'protocol': 'http', + "label": sour.get("label") + }) + formats.sort(key=lambda x: x.get("height") if x.get("height") else -1) + return { + "id": video_id, + "title": title.strip(), + "thumbnails": thumbnails, + "formats": formats, + "description": description + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f3cfcb6c18a..c75ca06c599 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1504,9 +1504,17 @@ from .zingmp3 import ZingMp3IE from .zype import ZypeIE +from .nhaccuatui import ( + NhaccuatuiIE +) + from .zingmp3_vn import ( Zingmp3_vnIE, Zingmp3_vnPlaylistIE, Zingmp3_vnChartIE, Zingmp3_vnUserIE, +) + +from .axios import ( + AxiosIE ) \ No newline at end of file From 54f314b7eeb7d474fdaaef49fa6eab4bd4ad5709 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 <hatienloi261299@gmail.com> Date: Tue, 31 Mar 2020 00:16:28 +0700 Subject: [PATCH 10/12] [axios] Add new extractor --- youtube_dl/extractor/extractors.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c75ca06c599..3f169056a6d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1504,8 +1504,8 @@ from .zingmp3 import ZingMp3IE from .zype import ZypeIE -from .nhaccuatui import ( - NhaccuatuiIE +from .axios import ( + AxiosIE ) from .zingmp3_vn import ( @@ -1514,7 +1514,3 @@ Zingmp3_vnChartIE, Zingmp3_vnUserIE, ) - -from .axios import ( - AxiosIE -) \ No newline at end of file From 687dd3be3f280e52d1e19b4f40e3ff0287ff6ff5 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 <hatienloi261299@gmail.com> Date: Tue, 31 Mar 2020 14:20:26 +0700 Subject: [PATCH 11/12] [likee] Add new extractor --- youtube_dl/extractor/extractors.py | 5 + youtube_dl/extractor/likee.py | 203 +++++++++++++++++++++++++++++ youtube_dl/extractor/lynda.py | 1 - 3 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/likee.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3f169056a6d..a208165530c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1508,6 +1508,11 @@ AxiosIE ) +from .likee import ( + LikeeIE, + LikeeUserIE +) + from .zingmp3_vn import ( Zingmp3_vnIE, Zingmp3_vnPlaylistIE, diff --git a/youtube_dl/extractor/likee.py b/youtube_dl/extractor/likee.py new file mode 100644 index 00000000000..17682e99db0 --- /dev/null +++ b/youtube_dl/extractor/likee.py @@ -0,0 +1,203 @@ +# coding: utf-8 +# Code by hatienl0i261299 - fb.com/100011734236090 - hatienloi261299@gmail.com +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode +) +from ..utils import ( + js_to_json, + int_or_none, + try_get +) + + +class LikeeIE(InfoExtractor): + _VALID_URL = r'''(?x)^((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P<site> + (likee\.com) + )\/(?P<user>@.+?)\/(video)\/(?P<id>[0-9]+)$ + ''' + IE_NAME = 'likee' + IE_DESC = 'likee.com' + _TESTS = [ + { + "url": "https://likee.com/@Inayat95/video/6808497581927578387", + "info_dict": { + "id": "6808497581927578387", + "ext": "mp4", + "title": "@Inayat95_6808497581927578387", + "description": str, + "thumbnail": r"re:^https?:.+?.jpg", + "uploader": str, + "uploader_id": int, + "like_count": int, + "comment_count": int, + "share_count": int, + "view_count": int, + "download_count": int + } + }, + { + "url": "https://likee.com/@Inayat95/video/6792552721999608595", + "info_dict": { + "id": "6792552721999608595", + "ext": "mp4", + "title": "@Inayat95_6792552721999608595", + "description": str, + "thumbnail": r"re:^https?:.+?.jpg", + "uploader": str, + "uploader_id": int, + "like_count": int, + "comment_count": int, + "share_count": int, + "view_count": int, + "download_count": int + } + }, + { + "url": "https://likee.com/@435421183/video/6802046076516688592", + "info_dict": { + "id": "6802046076516688592", + "ext": "mp4", + "title": "@435421183_6802046076516688592", + "description": str, + "thumbnail": r"re:^https?:.+?.jpg", + "uploader": str, + "uploader_id": int, + "like_count": int, + "comment_count": int, + "share_count": int, + "view_count": int, + "download_count": int + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group("id") + user = mobj.group("user") + webpage = self._download_webpage( + url_or_request=url, + video_id=video_id + ) + + info_video = self._regex_data(webpage, video_id) + + if info_video.get("post_id") == video_id: + formats = [{ + "url": info_video.get("video_url") or info_video.get("video_water_url"), + "ext": "mp4", + "height": int_or_none(info_video.get("video_height")), + "width": int_or_none(info_video.get("video_width")), + "protocol": "http", + }] + + def get_count(name): + return int_or_none(info_video.get(name), default=0) + + return { + "id": video_id, + "title": "%s_%s" % (user, video_id), + "description": info_video.get("msg_text") or '', + "thumbnail": info_video.get("image1") or info_video.get("image2") or info_video.get("image3"), + "like_count": get_count("like_count"), + "view_count": get_count("play_count"), + "share_count": get_count("share_count"), + "download_count": get_count("download_count"), + "comment_count": get_count("comment_count"), + "uploader": info_video.get("nick_name"), + "uploader_id": int_or_none(info_video.get("poster_uid")), + "formats": formats + } + + def _regex_data(self, webpage, video_id): + info_video = self._parse_json(self._search_regex( + r'''<script>window.data\s+=\s+(\{.+?\})\;''', + webpage, + "info video", + ), video_id, transform_source=js_to_json) + return info_video + + +class LikeeUserIE(LikeeIE): + _VALID_URL = r'''(?x)^((http[s]?|fpt):)\/?\/(www\.|m\.|) + (?P<site> + (likee\.com) + )\/(user)\/(?P<user>@.*?)(\W|$) + ''' + IE_NAME = "likee:user" + _TESTS = [ + { + "url": "https://likee.com/user/@Inayat95", + "info_dict": { + "id": "1357265683", + "title": "@Inayat95", + }, + "playlist_mincount": 10 + }, + { + "url": "https://likee.com/user/@435421183/", + "info_dict": { + "id": "681435856", + "title": "@435421183", + }, + "playlist_mincount": 5 + }, + { + "url": "https://likee.com/user/@52710468/", + "info_dict": { + "id": "1300330468", + "title": "@52710468", + }, + "playlist_mincount": 10 + } + ] + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + + user = mobj.group("user") + + webpage = self._download_webpage( + url_or_request=url, + video_id=user + ) + info_playlist = self._regex_data(webpage, user) + uid = try_get(info_playlist, lambda x: x['userinfo']['uid']) + + return self.playlist_result(entries=self._entries(uid,user),playlist_id=uid,playlist_title=user) + + def _entries(self,uid,user): + count = 50 + lastPostId = "" + while True: + info = self._download_json( + url_or_request="https://likee.com/official_website/VideoApi/getUserVideo", + video_id=lastPostId or uid, + data=compat_urllib_parse_urlencode({ + "uid": uid, + "count": count, + "lastPostId": lastPostId + }).encode() + ) + if info.get("msg") != "success": + break + videoList = try_get(info, lambda x: x['data']['videoList']) + video_id = '' + for video in videoList: + if not video: + continue + video_id = video.get("postId") + yield self.url_result( + url="https://likee.com/%s/video/%s" % (user,video_id), + ie=LikeeIE.ie_key(), + video_id=video_id + ) + lastPostId = video_id + if len(videoList) != count: + break diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index b3d8653d078..343b5eb423c 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -219,7 +219,6 @@ def _real_extract(self, url): self._sort_formats(formats) subtitles = self.extract_subtitles(video_id) - return { 'id': video_id, 'title': title, From 4311a4d6c364646c81efa7ef0a4e3eec998b57e2 Mon Sep 17 00:00:00 2001 From: hatienl0i261299 <hatienloi261299@gmail.com> Date: Tue, 31 Mar 2020 14:23:07 +0700 Subject: [PATCH 12/12] fix for flake8 --- youtube_dl/extractor/likee.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/likee.py b/youtube_dl/extractor/likee.py index 17682e99db0..c6e33b0e484 100644 --- a/youtube_dl/extractor/likee.py +++ b/youtube_dl/extractor/likee.py @@ -170,9 +170,9 @@ def _real_extract(self, url): info_playlist = self._regex_data(webpage, user) uid = try_get(info_playlist, lambda x: x['userinfo']['uid']) - return self.playlist_result(entries=self._entries(uid,user),playlist_id=uid,playlist_title=user) + return self.playlist_result(entries=self._entries(uid, user), playlist_id=uid, playlist_title=user) - def _entries(self,uid,user): + def _entries(self, uid, user): count = 50 lastPostId = "" while True: @@ -194,7 +194,7 @@ def _entries(self,uid,user): continue video_id = video.get("postId") yield self.url_result( - url="https://likee.com/%s/video/%s" % (user,video_id), + url="https://likee.com/%s/video/%s" % (user, video_id), ie=LikeeIE.ie_key(), video_id=video_id )