From 2302f32ced5f9ca1959926be326d527c8794e8fa Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Mon, 1 Feb 2021 16:00:24 +0800 Subject: [PATCH 01/10] [Loom] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/loom.py | 127 +++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 youtube_dl/extractor/loom.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ab8d6a5a561..78bc67f0924 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -605,6 +605,7 @@ ) from .lnkgo import LnkGoIE from .localnews8 import LocalNews8IE +from .loom import LoomIE from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py new file mode 100644 index 00000000000..1a22d7aeb9a --- /dev/null +++ b/youtube_dl/extractor/loom.py @@ -0,0 +1,127 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_urllib_parse_urlencode, + compat_urllib_request +) +from ..utils import ( + js_to_json, + try_get, + unified_timestamp, + url_or_none +) + + +class LoomBaseInfoIE(InfoExtractor): + _BASE_URL = 'https://www.loom.com/' + + def _extract_video_info_json(self, webpage, video_id): + info = self._html_search_regex( + r'window.loomSSRVideo = (.+?);', + webpage, + 'info') + return self._parse_json(info, 'json', js_to_json) + + def _get_url_by_id_type(self, video_id, type): + request = compat_urllib_request.Request( + self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type, + {}) + (json, _) = self._download_json_handle(request, video_id) + return (url_or_none(json.get('url')), json.get('part_credentials')) + + def _get_m3u8_formats(self, url, video_id, credentials): + format_list = self._extract_m3u8_formats(url, video_id) + for item in format_list: + item['protocol'] = 'm3u8_native' + item['url'] += '?' + credentials + item['ext'] = 'mp4' + item['format_id'] = 'hls-' + str(item.get('height', 0)) + item['extra_param_to_segment_url'] = credentials + return format_list + + +class LoomIE(LoomBaseInfoIE): + _VALID_URL = r'https?://(?:www\.)?loom\.com/share/(?P[a-zA-Z0-9]+)' + _TESTS = [ + { + 'url': 'https://www.loom.com/share/31b41727a5b24dacb6c1417a565b2ebf', + 'md5': '8b94361aabff2075141dc60bd6d35453', + 'info_dict': { + 'id': '31b41727a5b24dacb6c1417a565b2ebf', + 'ext': 'mp4', + 'title': 'How to resize your camera bubble', + 'uploader': 'Allie Hitchcock', + 'upload_date': '20201007', + 'timestamp': 1602089241 + } + }, + { + 'url': 'https://www.loom.com/share/7e5168ec3b0744cab5e08a340cc7e086', + 'md5': '47dd14aa1d8054c249b68ca57ad9963f', + 'info_dict': { + 'id': '7e5168ec3b0744cab5e08a340cc7e086', + 'ext': 'mp4', + 'title': 'How to flip your camera ', + 'uploader': 'Matthew Flores', + 'upload_date': '20200423', + 'timestamp': 1587646164 + } + }, + { + 'url': 'https://www.loom.com/share/6670e3eba3c84dc09ada8306c7138075', + 'md5': 'bfad8181ed49d6252b10dfdeb46c535e', + 'info_dict': { + 'id': '6670e3eba3c84dc09ada8306c7138075', + 'ext': 'mp4', + 'title': 'How to record your first video on Loom', + 'uploader': 'Allie Hitchcock', + 'upload_date': '20201118', + 'timestamp': 1605729404 + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + info = self._extract_video_info_json(webpage, video_id) + + formats = [] + for type in ['transcoded-url', 'raw-url']: + (url, part_credentials) = self._get_url_by_id_type(video_id, type) + ext = self._search_regex( + r'\.([a-zA-Z0-9]+)\?', + url, 'ext', default=None) + if(ext != 'm3u8'): + formats.append({ + 'url': url, + 'ext': ext, + 'format_id': type, + 'width': try_get(info, lambda x: x['video_properties']['width']), + 'height': try_get(info, lambda x: x['video_properties']['height']) + }) + else: + credentials = compat_urllib_parse_urlencode(part_credentials) + m3u8_formats = self._get_m3u8_formats(url, video_id, credentials) + for i in range(len(m3u8_formats)): + formats.insert( + (-1, len(formats))[i == len(m3u8_formats) - 1], + m3u8_formats[i]) + + return { + 'id': info.get('id'), + 'title': info.get('name'), + 'formats': formats, + 'thumbnails': [ + { + 'id': key, + 'url': url_or_none(self._BASE_URL + value) + } for key, value in info.get('thumbnails').items() + ], + 'description': info.get('description'), + 'uploader': info.get('owner_full_name'), + 'timestamp': unified_timestamp(info.get('createdAt')) + } From 918f4f374a6e97acc9d0d3d556a9fd3c0544f154 Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 4 Feb 2021 00:06:40 +0800 Subject: [PATCH 02/10] [Loom] Update: Move related member functions into LoomIE --- youtube_dl/extractor/loom.py | 48 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index 1a22d7aeb9a..a386f6283a3 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -17,30 +17,6 @@ class LoomBaseInfoIE(InfoExtractor): _BASE_URL = 'https://www.loom.com/' - def _extract_video_info_json(self, webpage, video_id): - info = self._html_search_regex( - r'window.loomSSRVideo = (.+?);', - webpage, - 'info') - return self._parse_json(info, 'json', js_to_json) - - def _get_url_by_id_type(self, video_id, type): - request = compat_urllib_request.Request( - self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type, - {}) - (json, _) = self._download_json_handle(request, video_id) - return (url_or_none(json.get('url')), json.get('part_credentials')) - - def _get_m3u8_formats(self, url, video_id, credentials): - format_list = self._extract_m3u8_formats(url, video_id) - for item in format_list: - item['protocol'] = 'm3u8_native' - item['url'] += '?' + credentials - item['ext'] = 'mp4' - item['format_id'] = 'hls-' + str(item.get('height', 0)) - item['extra_param_to_segment_url'] = credentials - return format_list - class LoomIE(LoomBaseInfoIE): _VALID_URL = r'https?://(?:www\.)?loom\.com/share/(?P[a-zA-Z0-9]+)' @@ -83,6 +59,30 @@ class LoomIE(LoomBaseInfoIE): } ] + def _extract_video_info_json(self, webpage, video_id): + info = self._html_search_regex( + r'window.loomSSRVideo = (.+?);', + webpage, + 'info') + return self._parse_json(info, 'json', js_to_json) + + def _get_url_by_id_type(self, video_id, type): + request = compat_urllib_request.Request( + self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type, + {}) + json = self._download_json(request, video_id) + return (url_or_none(json.get('url')), json.get('part_credentials')) + + def _get_m3u8_formats(self, url, video_id, credentials): + format_list = self._extract_m3u8_formats(url, video_id) + for item in format_list: + item['protocol'] = 'm3u8_native' + item['url'] += '?' + credentials + item['ext'] = 'mp4' + item['format_id'] = 'hls-' + str(item.get('height', 0)) + item['extra_param_to_segment_url'] = credentials + return format_list + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) From 287e710bff8b59d0e64d4a6bd1a5ffd683ee2862 Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 4 Feb 2021 00:18:10 +0800 Subject: [PATCH 03/10] [Loom] Add: Additional playlist extractor for folder support --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/loom.py | 81 ++++++++++++++++++++++++++++-- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e5e48e5a42f..39fded35bb0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -605,7 +605,10 @@ ) from .lnkgo import LnkGoIE from .localnews8 import LocalNews8IE -from .loom import LoomIE +from .loom import ( + LoomIE, + LoomFolderIE +) from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index a386f6283a3..0f684f71820 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -1,8 +1,12 @@ from __future__ import unicode_literals +import json +import re + from .common import InfoExtractor from ..compat import ( + compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request ) @@ -19,7 +23,7 @@ class LoomBaseInfoIE(InfoExtractor): class LoomIE(LoomBaseInfoIE): - _VALID_URL = r'https?://(?:www\.)?loom\.com/share/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?loom\.com/share/(?!folder)(?P[a-zA-Z0-9]+)' _TESTS = [ { 'url': 'https://www.loom.com/share/31b41727a5b24dacb6c1417a565b2ebf', @@ -70,8 +74,8 @@ def _get_url_by_id_type(self, video_id, type): request = compat_urllib_request.Request( self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type, {}) - json = self._download_json(request, video_id) - return (url_or_none(json.get('url')), json.get('part_credentials')) + json_doc = self._download_json(request, video_id) + return (url_or_none(json_doc.get('url')), json_doc.get('part_credentials')) def _get_m3u8_formats(self, url, video_id, credentials): format_list = self._extract_m3u8_formats(url, video_id) @@ -125,3 +129,74 @@ def _real_extract(self, url): 'uploader': info.get('owner_full_name'), 'timestamp': unified_timestamp(info.get('createdAt')) } + + +class LoomFolderIE(LoomBaseInfoIE): + _VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P.+)/?' + _TESTS = [ + { + 'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c/List%20B-%20e%2C%20u', + 'info_dict': { + 'id': 'b14bf2c5ef434bca8ab3585b0c1e97d9', + 'title': 'List B- e, u' + }, + 'playlist_mincount': 4 + }, + { + 'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c', + 'info_dict': { + 'id': '997db4db046f43e5912f10dc5f817b5c', + 'title': 'Blending Lessons ' + }, + 'playlist_mincount': 16 + } + ] + + def _get_real_folder_id(self, path): + subfolders = re.match( + r'^([a-zA-Z0-9]+)(?:\/(.+))*$', + compat_urllib_parse_unquote(path)) + folder_names = subfolders.groups()[1:] + parent_folder_id = subfolders.group(1) + if(folder_names[0] is None): + return path + + # Fetch folder id + request = compat_urllib_request.Request( + self._BASE_URL + 'v1/folders/by_name', + json.dumps({ + 'folder_names': folder_names, + 'parent_folder_id': parent_folder_id + }).encode('utf-8')) + json_doc = self._download_json(request, parent_folder_id) + + return try_get(json_doc, lambda x: x['current_folder']['id']) + + def _get_folder_info(self, folder_id): + json_doc = self._download_json(url_or_none(self._BASE_URL + 'v1/folders/' + folder_id), folder_id) + videos = [] + + # Recursive call for subfolder + for folder in json_doc.get('folders'): + subfolder_info = self._get_folder_info(folder.get('id')) + videos.extend(subfolder_info.get('entries')) + videos.extend([val.get('id') for val in json_doc.get('videos')]) + + return { + 'id': folder_id, + 'title': json_doc.get('name'), + 'description': json_doc.get('description'), + 'entries': videos + } + + def _real_extract(self, url): + folder_id = self._match_id(url) + folder_id = self._get_real_folder_id(folder_id) + folder_info = self._get_folder_info(folder_id) + folder_info['_type'] = 'playlist' + + for i in range(len(folder_info['entries'])): + video_id = folder_info['entries'][i] + folder_info['entries'][i] = LoomIE(self._downloader)._real_extract(url_or_none(self._BASE_URL + 'share/' + video_id)) + + return folder_info From c9f3667e2efc9e8b77bab8364b38910fd4e6020a Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 4 Feb 2021 00:53:18 +0800 Subject: [PATCH 04/10] [Loom] Update: Change test case to avoid a false-positive result from test/test_unicode_literals.py --- youtube_dl/extractor/loom.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index 0f684f71820..0ef318725da 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -135,12 +135,12 @@ class LoomFolderIE(LoomBaseInfoIE): _VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P.+)/?' _TESTS = [ { - 'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c/List%20B-%20e%2C%20u', + 'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c/List%20A-%20a%2C%20i%2C%20o', 'info_dict': { - 'id': 'b14bf2c5ef434bca8ab3585b0c1e97d9', - 'title': 'List B- e, u' + 'id': '9a8a87f6b6f546d9a400c8e7575ff7f2', + 'title': 'List A- a, i, o' }, - 'playlist_mincount': 4 + 'playlist_mincount': 12 }, { 'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c', From 34e6a6b559d06326f7a4ec6b37bf3067e1e71bc5 Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Mon, 15 Feb 2021 14:07:44 +0800 Subject: [PATCH 05/10] [Loom] Moved functions to inline Removed if statement parentheses --- youtube_dl/extractor/loom.py | 48 +++++++++++++++--------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index 0ef318725da..dc381616cdd 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -63,43 +63,29 @@ class LoomIE(LoomBaseInfoIE): } ] - def _extract_video_info_json(self, webpage, video_id): - info = self._html_search_regex( - r'window.loomSSRVideo = (.+?);', - webpage, - 'info') - return self._parse_json(info, 'json', js_to_json) - - def _get_url_by_id_type(self, video_id, type): - request = compat_urllib_request.Request( - self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type, - {}) - json_doc = self._download_json(request, video_id) - return (url_or_none(json_doc.get('url')), json_doc.get('part_credentials')) - - def _get_m3u8_formats(self, url, video_id, credentials): - format_list = self._extract_m3u8_formats(url, video_id) - for item in format_list: - item['protocol'] = 'm3u8_native' - item['url'] += '?' + credentials - item['ext'] = 'mp4' - item['format_id'] = 'hls-' + str(item.get('height', 0)) - item['extra_param_to_segment_url'] = credentials - return format_list - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - info = self._extract_video_info_json(webpage, video_id) + info_json = self._html_search_regex( + r'window.loomSSRVideo = (.+?);', + webpage, + 'info') + info = self._parse_json(info_json, 'json', js_to_json) formats = [] for type in ['transcoded-url', 'raw-url']: - (url, part_credentials) = self._get_url_by_id_type(video_id, type) + request = compat_urllib_request.Request( + self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type, + {}) + json_doc = self._download_json(request, video_id) + url = url_or_none(json_doc.get('url')) + part_credentials = json_doc.get('part_credentials') + ext = self._search_regex( r'\.([a-zA-Z0-9]+)\?', url, 'ext', default=None) - if(ext != 'm3u8'): + if ext != 'm3u8': formats.append({ 'url': url, 'ext': ext, @@ -109,7 +95,13 @@ def _real_extract(self, url): }) else: credentials = compat_urllib_parse_urlencode(part_credentials) - m3u8_formats = self._get_m3u8_formats(url, video_id, credentials) + m3u8_formats = self._extract_m3u8_formats(url, video_id) + for item in m3u8_formats: + item['protocol'] = 'm3u8_native' + item['url'] += '?' + credentials + item['ext'] = 'mp4' + item['format_id'] = 'hls-' + str(item.get('height', 0)) + item['extra_param_to_segment_url'] = credentials for i in range(len(m3u8_formats)): formats.insert( (-1, len(formats))[i == len(m3u8_formats) - 1], From 29c4168ceca7943fec933992358cd71a37965e64 Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 25 Feb 2021 02:24:46 +0800 Subject: [PATCH 06/10] [Loom] Add missing parsing function --- youtube_dl/extractor/loom.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index dc381616cdd..53137f5a51f 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -11,6 +11,7 @@ compat_urllib_request ) from ..utils import ( + int_or_none, js_to_json, try_get, unified_timestamp, @@ -90,8 +91,8 @@ def _real_extract(self, url): 'url': url, 'ext': ext, 'format_id': type, - 'width': try_get(info, lambda x: x['video_properties']['width']), - 'height': try_get(info, lambda x: x['video_properties']['height']) + 'width': int_or_none(try_get(info, lambda x: x['video_properties']['width'])), + 'height': int_or_none(try_get(info, lambda x: x['video_properties']['height'])) }) else: credentials = compat_urllib_parse_urlencode(part_credentials) From 81bd98a03faec5868e1aee8cfb8a8c01667b98cf Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 25 Feb 2021 02:55:37 +0800 Subject: [PATCH 07/10] [Loom] Add fallback to mandatory attribute --- youtube_dl/extractor/loom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index 53137f5a51f..76cf9d30e26 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -109,7 +109,7 @@ def _real_extract(self, url): m3u8_formats[i]) return { - 'id': info.get('id'), + 'id': info.get('id') or video_id, 'title': info.get('name'), 'formats': formats, 'thumbnails': [ From 70b804526cf51a7132b02d682721a880bfafcf52 Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 25 Feb 2021 03:15:13 +0800 Subject: [PATCH 08/10] [Loom] Move request back into _download_json --- youtube_dl/extractor/loom.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index 76cf9d30e26..d79bb2d8fd7 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -76,10 +76,9 @@ def _real_extract(self, url): formats = [] for type in ['transcoded-url', 'raw-url']: - request = compat_urllib_request.Request( + json_doc = self._download_json( self._BASE_URL + 'api/campaigns/sessions/' + video_id + '/' + type, - {}) - json_doc = self._download_json(request, video_id) + video_id, data={}) url = url_or_none(json_doc.get('url')) part_credentials = json_doc.get('part_credentials') From 1b2651ed3031ffb54380d1896de5ef9c7964a02c Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 25 Feb 2021 03:46:55 +0800 Subject: [PATCH 09/10] [Loom] Use url_result instead --- youtube_dl/extractor/loom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index d79bb2d8fd7..c344cc12b58 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -189,6 +189,6 @@ def _real_extract(self, url): for i in range(len(folder_info['entries'])): video_id = folder_info['entries'][i] - folder_info['entries'][i] = LoomIE(self._downloader)._real_extract(url_or_none(self._BASE_URL + 'share/' + video_id)) + folder_info['entries'][i] = self.url_result(self._BASE_URL + 'share/' + video_id, 'Loom', video_id) return folder_info From e218b26725c5ade061c7f570e459dec6259c0519 Mon Sep 17 00:00:00 2001 From: Wong Yiu Hang Date: Thu, 25 Feb 2021 04:11:18 +0800 Subject: [PATCH 10/10] [Loom] Add url_or_none back --- youtube_dl/extractor/loom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/loom.py b/youtube_dl/extractor/loom.py index c344cc12b58..8cb7f88305e 100644 --- a/youtube_dl/extractor/loom.py +++ b/youtube_dl/extractor/loom.py @@ -189,6 +189,6 @@ def _real_extract(self, url): for i in range(len(folder_info['entries'])): video_id = folder_info['entries'][i] - folder_info['entries'][i] = self.url_result(self._BASE_URL + 'share/' + video_id, 'Loom', video_id) + folder_info['entries'][i] = self.url_result(url_or_none(self._BASE_URL + 'share/' + video_id), 'Loom', video_id) return folder_info