diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e407ab3d992..6121ed84b0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1106,6 +1106,7 @@ from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE +from .telegram import TelegramIE from .telemb import TeleMBIE from .telequebec import ( TeleQuebecIE, diff --git a/youtube_dl/extractor/telegram.py b/youtube_dl/extractor/telegram.py new file mode 100644 index 00000000000..2d0b0873808 --- /dev/null +++ b/youtube_dl/extractor/telegram.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_timestamp, + int_or_none, + parse_duration, +) + + +class TelegramIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?t\.me/s/(?P[\s\S]+)' + _TESTS = [ + { + 'url': 'https://t.me/s/telegram/86', + 'md5': 'afd6c7f574fead14e35ca83e3705e01d', + 'info_dict': { + 'id': 'telegram/86', + 'ext': 'mp4', + 'title': 'Telegram News – Telegram', + 'description': 'This video should give you an idea of how the new account switching feature works – available on Android today and coming soon to other platforms. ✨🌟⭐️ Happy holidays!', + 'thumbnail': 're:https://cdn(.*).telesco.pe/file/(.*)', + 'uploader': 'Telegram News', + 'upload_date': '20171230', + 'timestamp': 1514667477, + } + }, { + 'url': 'https://t.me/s/leehsienloong/382', + 'md5': '01559de5a145d0547f4c53bd5340549d', + 'info_dict': { + 'id': 'leehsienloong/382', + 'ext': 'mp4', + 'title': 'Lee Hsien Loong – Telegram', + 'description': 'Dropped by Sengkang West this morning to say hi to everyone, and see how residents were doing. I was happy to see many young families out spending time with their little ones!', + 'thumbnail': 're:https://cdn(.*).telesco.pe/file/(.*)', + 'uploader': 'Lee Hsien Loong', + 'upload_date': '20200315', + 'timestamp': 1584276195, + 'duration': 57, + } + }, { + 'url': 'https://t.me/s/durov/82', + 'only_matching': True, + }, { + 'url': 'https://t.me/s/durov/83', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + original_webpage = self._download_webpage(url, video_id) + + this_div = self._search_regex( + r'
([\s\S]*?)', + original_webpage, 'pageblock') + + title = self._html_search_regex( + r'(.+?)', + original_webpage, 'title') + + description = self._html_search_regex( + r'
(.*)', + this_div, 'uploader', default=None) + + timestamp = unified_timestamp( + self._search_regex( + r'datetime="(.*)"', + this_div, 'upload_time', default=None) + ) + + duration = self._html_search_regex( + r'