ytdl-org · hatienl0i2612 · Mar 28, 2020 · Mar 28, 2020 · Mar 28, 2020 · Mar 28, 2020
diff --git a/youtube_dl/extractor/axios.py b/youtube_dl/extractor/axios.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    try_get,
+    mimetype2ext,
+    clean_html
+)
+
+
+class AxiosIE(InfoExtractor):
+    IE_NAME = 'axios'
+    IE_DESC = 'www.axios.com'
+    _VALID_URL = r"""(?x)^
+            ((http[s]?|fpt):)\/?\/(www\.|m\.|)
+                (?P<site>
+                    (www\.axios\.com)
+                )\/
+            (?P<slug>.*?)\-
+            (?P<id>[0-9a-z]{8}\-[0-9a-z]{4}-[a-z0-9]{4}.+?)\.
+        """
+    __TESTS = [
+        {
+            "url": r"https://www.axios.com/trump-coronavirus-restrictions-c3da2d28-b761-4b62-b6d6-734c059c6dba.html",
+            "info_dict": {
+                "id": "c3da2d28-b761-4b62-b6d6-734c059c6dba",
+                "title": '''Trump says he wants to "open" the country by Easter''',
+                "ext": "mp4",
+                "description": str,
+                'thumbnails': [],
+            }
+        },
+        {
+            "url": r"https://www.axios.com/coronavirus-texas-official-grandparents-die-172ca951-891c-44e7-a9ec-77c486e0c5c3.html",
+            "info_dict": {
+                "id": "172ca951-891c-44e7-a9ec-77c486e0c5c3",
+                "title": '''Texas Lt. Gov.: Grandparents would be willing to die to save the economy''',
+                "ext": "mp4",
+                "description": str,
+                'thumbnails': [],
+            }
+        },
+        {
+            "url": r"https://www.axios.com/cuomo-trump-mandatory-quarantine-panic-35ae54a1-0aa9-4a38-910d-647293002fc2.html",
+            "info_dict": {
+                "id": "35ae54a1-0aa9-4a38-910d-647293002fc2",
+                "title": '''Cuomo: Trump's mandatory quarantine comments "really panicked people"''',
+                "ext": "mp4",
+                "description": str,
+                'thumbnails': [],
+            }
+        },
+        {
+            "url": r"https://www.axios.com/coronavirus-louisiana-bel-edwards-ventilators-7810fc76-1825-41b2-8b22-f1cfc14e2ffe.html",
+            "info_dict": {
+                "id": "7810fc76-1825-41b2-8b22-f1cfc14e2ffe",
+                "title": '''Louisiana on track to exceed ventilator capacity this week, governor says''',
+                "ext": "mp4",
+                "description": str,
+                'thumbnails': [],
+            }
+        },
+    ]
+    api_jwplayer = r'http://content.jwplatform.com/v2/media/%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            url_or_request=url,
+            video_id=video_id
+        )
+        jwplayer_mobj = re.search(
+            r'''<amp-jwplayer.+?data-player-id=\"(?P<player_id>.+?)\".+?data-media-id=\"(?P<media_id>.+?)\".+?\>\<\/amp-jwplayer>''',
+            webpage
+        )
+        description = self._search_regex(
+            r'''<div\s+class=\"b0w77w-0 jctzOA gtm-story-text\"\>(?P<description>.*?)\<\/div\>''',
+            webpage, "Description", group="description"
+        )
+        title = self._search_regex(
+            r'''<h1\s+class="sc-31t5q3-0 sc-1fjk95c-2 guveJc"\>(?P<title>.*?)\<\/h1\>''',
+            webpage, "Title", group="title"
+        )
+        description = clean_html(description)
+        # player_id = jwplayer_mobj.group("player_id")
+        media_id = jwplayer_mobj.group("media_id")
+        json_jwplayer = self._download_json(
+            url_or_request=self.api_jwplayer % media_id,
+            video_id=media_id,
+        )
+        playlist = try_get(json_jwplayer, lambda x: x['playlist'][0])
+        if playlist:
+            images = playlist.get('images')
+            thumbnails = [
+                {
+                    "url": img.get('src'),
+                    "width": img.get('width')
+                } for img in images if img.get('src')
+            ]
+            sources = playlist.get('sources') or []
+            formats = []
+            for sour in sources:
+                if not sour:
+                    continue
+                _type = sour.get('type')
+                ext = mimetype2ext(_type)
+                file = sour.get('file')
+                if ext == 'm3u8':
+                    m3u8_doc = self._download_webpage(file, video_id=media_id)
+                    formats.extend(self._parse_m3u8_formats(m3u8_doc, file))
+                elif ext == 'mp4':
+                    formats.append({
+                        "url": file,
+                        "ext": ext,
+                        "height": sour.get('height'),
+                        "width": sour.get('width'),
+                        'protocol': 'http',
+                        "label": sour.get("label")
+                    })
+                else:
+                    formats.append({
+                        "url": file,
+                        "ext": ext,
+                        'protocol': 'http',
+                        "label": sour.get("label")
+                    })
+            formats.sort(key=lambda x: x.get("height") if x.get("height") else -1)
+            return {
+                "id": video_id,
+                "title": title.strip(),
+                "thumbnails": thumbnails,
+                "formats": formats,
+                "description": description
+            }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
@@ -1503,3 +1503,19 @@
 from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import ZingMp3IE
 from .zype import ZypeIE
+
+from .axios import (
+    AxiosIE
+)
+
+from .likee import (
+    LikeeIE,
+    LikeeUserIE
+)
+
+from .zingmp3_vn import (
+    Zingmp3_vnIE,
+    Zingmp3_vnPlaylistIE,
+    Zingmp3_vnChartIE,
+    Zingmp3_vnUserIE,
+)
diff --git a/youtube_dl/extractor/likee.py b/youtube_dl/extractor/likee.py
@@ -0,0 +1,203 @@
+# coding: utf-8
+# Code by hatienl0i261299 - fb.com/100011734236090 - hatienloi261299@gmail.com
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlencode
+)
+from ..utils import (
+    js_to_json,
+    int_or_none,
+    try_get
+)
+
+
+class LikeeIE(InfoExtractor):
+    _VALID_URL = r'''(?x)^((http[s]?|fpt):)\/?\/(www\.|m\.|)
+            (?P<site>
+                (likee\.com)
+            )\/(?P<user>@.+?)\/(video)\/(?P<id>[0-9]+)$
+            '''
+    IE_NAME = 'likee'
+    IE_DESC = 'likee.com'
+    _TESTS = [
+        {
+            "url": "https://likee.com/@Inayat95/video/6808497581927578387",
+            "info_dict": {
+                "id": "6808497581927578387",
+                "ext": "mp4",
+                "title": "@Inayat95_6808497581927578387",
+                "description": str,
+                "thumbnail": r"re:^https?:.+?.jpg",
+                "uploader": str,
+                "uploader_id": int,
+                "like_count": int,
+                "comment_count": int,
+                "share_count": int,
+                "view_count": int,
+                "download_count": int
+            }
+        },
+        {
+            "url": "https://likee.com/@Inayat95/video/6792552721999608595",
+            "info_dict": {
+                "id": "6792552721999608595",
+                "ext": "mp4",
+                "title": "@Inayat95_6792552721999608595",
+                "description": str,
+                "thumbnail": r"re:^https?:.+?.jpg",
+                "uploader": str,
+                "uploader_id": int,
+                "like_count": int,
+                "comment_count": int,
+                "share_count": int,
+                "view_count": int,
+                "download_count": int
+            }
+        },
+        {
+            "url": "https://likee.com/@435421183/video/6802046076516688592",
+            "info_dict": {
+                "id": "6802046076516688592",
+                "ext": "mp4",
+                "title": "@435421183_6802046076516688592",
+                "description": str,
+                "thumbnail": r"re:^https?:.+?.jpg",
+                "uploader": str,
+                "uploader_id": int,
+                "like_count": int,
+                "comment_count": int,
+                "share_count": int,
+                "view_count": int,
+                "download_count": int
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group("id")
+        user = mobj.group("user")
+        webpage = self._download_webpage(
+            url_or_request=url,
+            video_id=video_id
+        )
+
+        info_video = self._regex_data(webpage, video_id)
+
+        if info_video.get("post_id") == video_id:
+            formats = [{
+                "url": info_video.get("video_url") or info_video.get("video_water_url"),
+                "ext": "mp4",
+                "height": int_or_none(info_video.get("video_height")),
+                "width": int_or_none(info_video.get("video_width")),
+                "protocol": "http",
+            }]
+
+            def get_count(name):
+                return int_or_none(info_video.get(name), default=0)
+
+            return {
+                "id": video_id,
+                "title": "%s_%s" % (user, video_id),
+                "description": info_video.get("msg_text") or '',
+                "thumbnail": info_video.get("image1") or info_video.get("image2") or info_video.get("image3"),
+                "like_count": get_count("like_count"),
+                "view_count": get_count("play_count"),
+                "share_count": get_count("share_count"),
+                "download_count": get_count("download_count"),
+                "comment_count": get_count("comment_count"),
+                "uploader": info_video.get("nick_name"),
+                "uploader_id": int_or_none(info_video.get("poster_uid")),
+                "formats": formats
+            }
+
+    def _regex_data(self, webpage, video_id):
+        info_video = self._parse_json(self._search_regex(
+            r'''<script>window.data\s+=\s+(\{.+?\})\;''',
+            webpage,
+            "info video",
+        ), video_id, transform_source=js_to_json)
+        return info_video
+
+
+class LikeeUserIE(LikeeIE):
+    _VALID_URL = r'''(?x)^((http[s]?|fpt):)\/?\/(www\.|m\.|)
+            (?P<site>
+                (likee\.com)
+            )\/(user)\/(?P<user>@.*?)(\W|$)
+            '''
+    IE_NAME = "likee:user"
+    _TESTS = [
+        {
+            "url": "https://likee.com/user/@Inayat95",
+            "info_dict": {
+                "id": "1357265683",
+                "title": "@Inayat95",
+            },
+            "playlist_mincount": 10
+        },
+        {
+            "url": "https://likee.com/user/@435421183/",
+            "info_dict": {
+                "id": "681435856",
+                "title": "@435421183",
+            },
+            "playlist_mincount": 5
+        },
+        {
+            "url": "https://likee.com/user/@52710468/",
+            "info_dict": {
+                "id": "1300330468",
+                "title": "@52710468",
+            },
+            "playlist_mincount": 10
+        }
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.search(self._VALID_URL, url)
+
+        user = mobj.group("user")
+
+        webpage = self._download_webpage(
+            url_or_request=url,
+            video_id=user
+        )
+        info_playlist = self._regex_data(webpage, user)
+        uid = try_get(info_playlist, lambda x: x['userinfo']['uid'])
+
+        return self.playlist_result(entries=self._entries(uid, user), playlist_id=uid, playlist_title=user)
+
+    def _entries(self, uid, user):
+        count = 50
+        lastPostId = ""
+        while True:
+            info = self._download_json(
+                url_or_request="https://likee.com/official_website/VideoApi/getUserVideo",
+                video_id=lastPostId or uid,
+                data=compat_urllib_parse_urlencode({
+                    "uid": uid,
+                    "count": count,
+                    "lastPostId": lastPostId
+                }).encode()
+            )
+            if info.get("msg") != "success":
+                break
+            videoList = try_get(info, lambda x: x['data']['videoList'])
+            video_id = ''
+            for video in videoList:
+                if not video:
+                    continue
+                video_id = video.get("postId")
+                yield self.url_result(
+                    url="https://likee.com/%s/video/%s" % (user, video_id),
+                    ie=LikeeIE.ie_key(),
+                    video_id=video_id
+                )
+            lastPostId = video_id
+            if len(videoList) != count:
+                break
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
@@ -219,7 +219,6 @@ def _real_extract(self, url):
         self._sort_formats(formats)
 
         subtitles = self.extract_subtitles(video_id)
-
         return {
             'id': video_id,
             'title': title,