From 06205e8813b001616f6603cc6f6a8f5d8eb7c3eb Mon Sep 17 00:00:00 2001 From: Isaac-the-Man Date: Sun, 10 Jan 2021 10:37:54 -0500 Subject: [PATCH 1/6] [samplefocus] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/samplefocus.py | 51 +++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/samplefocus.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74743a44915..78e04a0dd1a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1002,6 +1002,7 @@ SafariApiIE, SafariCourseIE, ) +from .samplefocus import SampleFocusIE from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py new file mode 100644 index 00000000000..86001929eb4 --- /dev/null +++ b/youtube_dl/extractor/samplefocus.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + get_element_by_class, + extract_attributes, + get_element_by_id) + + +class SampleFocusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P[\w,\-,0-9]+)' + _TESTS = [{ + 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar', + 'md5': '48c8d62d60be467293912e0e619a5120', + 'info_dict': { + 'id': 'lil-peep-sad-emo-guitar', + 'ext': 'mp3', + 'title': 'Lil Peep Sad Emo Guitar', + 'description': 'Listen to Lil Peep Sad Emo Guitar. Royalty-Free sound that is tagged as electric guitar, emo, guitar, and lil peep. Download for FREE + discover 1000\'s of sounds.', + 'thumbnail': r're:^https?://.*\.png', + 'license': 'Standard License' + } + }, { + 'url': 'https://samplefocus.com/samples/dababy-style-bass-808', + 'only_matching': True + }, { + 'url': 'https://samplefocus.com/samples/young-chop-kick', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) or self._html_search_regex(r'

(.+?)

', webpage, 'title') + tb = self._og_search_thumbnail(webpage) or extract_attributes(get_element_by_class('waveform')).get('src') + + mp3_url = self._html_search_regex( + r'', + webpage, 'mp3 url') or extract_attributes(get_element_by_id('sample_mp3')).get('value') + + return { + 'id': video_id, + 'title': title, + 'url': mp3_url, + 'ext': 'mp3', + 'thumbnail': tb, + 'description': self._html_search_meta('description', webpage), + 'license': self._html_search_regex(r'(.+?)', webpage, 'license') + } From f0f1bab15968073ea38b7c7ec809b79d5a2af730 Mon Sep 17 00:00:00 2001 From: Isaac-the-Man Date: Mon, 11 Jan 2021 20:14:01 -0500 Subject: [PATCH 2/6] [samplefocus] fixed fatal handling --- youtube_dl/extractor/samplefocus.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py index 86001929eb4..61e1c827ddb 100644 --- a/youtube_dl/extractor/samplefocus.py +++ b/youtube_dl/extractor/samplefocus.py @@ -33,12 +33,12 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) or self._html_search_regex(r'

(.+?)

', webpage, 'title') + title = self._og_search_title(webpage) or self._html_search_regex(r'

(.+?)

', webpage, 'title', default=video_id) tb = self._og_search_thumbnail(webpage) or extract_attributes(get_element_by_class('waveform')).get('src') mp3_url = self._html_search_regex( r'', - webpage, 'mp3 url') or extract_attributes(get_element_by_id('sample_mp3')).get('value') + webpage, 'mp3 url', fatal=False) or extract_attributes(get_element_by_id('sample_mp3')).get('value') return { 'id': video_id, @@ -46,6 +46,6 @@ def _real_extract(self, url): 'url': mp3_url, 'ext': 'mp3', 'thumbnail': tb, - 'description': self._html_search_meta('description', webpage), + 'description': self._html_search_meta('description', webpage, fatal=False), 'license': self._html_search_regex(r'(.+?)', webpage, 'license') } From 4277082394d56523eaf56cc648ebb471df88a1ff Mon Sep 17 00:00:00 2001 From: Isaac-the-Man Date: Sat, 30 Jan 2021 16:32:14 -0500 Subject: [PATCH 3/6] [samplefocus] fixed coding convention --- youtube_dl/extractor/samplefocus.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py index 61e1c827ddb..bdd25d5763e 100644 --- a/youtube_dl/extractor/samplefocus.py +++ b/youtube_dl/extractor/samplefocus.py @@ -2,14 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - get_element_by_class, - extract_attributes, - get_element_by_id) class SampleFocusIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P[\w,\-,0-9]+)' + _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar', 'md5': '48c8d62d60be467293912e0e619a5120', @@ -18,7 +14,7 @@ class SampleFocusIE(InfoExtractor): 'ext': 'mp3', 'title': 'Lil Peep Sad Emo Guitar', 'description': 'Listen to Lil Peep Sad Emo Guitar. Royalty-Free sound that is tagged as electric guitar, emo, guitar, and lil peep. Download for FREE + discover 1000\'s of sounds.', - 'thumbnail': r're:^https?://.*\.png', + 'thumbnail': r're:^https?://.+\.png', 'license': 'Standard License' } }, { @@ -34,11 +30,16 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) or self._html_search_regex(r'

(.+?)

', webpage, 'title', default=video_id) - tb = self._og_search_thumbnail(webpage) or extract_attributes(get_element_by_class('waveform')).get('src') mp3_url = self._html_search_regex( - r'', - webpage, 'mp3 url', fatal=False) or extract_attributes(get_element_by_id('sample_mp3')).get('value') + r'', + webpage, 'mp3', fatal=False) or self._html_search_regex( + r'', + webpage, 'mp3 url') + + tb = self._og_search_thumbnail(webpage) or self._html_search_regex( + r'', + webpage, 'mp3', fatal=False) return { 'id': video_id, @@ -46,6 +47,6 @@ def _real_extract(self, url): 'url': mp3_url, 'ext': 'mp3', 'thumbnail': tb, - 'description': self._html_search_meta('description', webpage, fatal=False), + 'description': self._html_search_meta('description', webpage), 'license': self._html_search_regex(r'(.+?)', webpage, 'license') } From e5a34486c30b12abd546380bbf71bca8a463f5e8 Mon Sep 17 00:00:00 2001 From: Isaac-the-Man Date: Wed, 10 Feb 2021 07:10:03 -0500 Subject: [PATCH 4/6] [samplefocus] fixed fallbacks. --- youtube_dl/extractor/samplefocus.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py index bdd25d5763e..635fd236134 100644 --- a/youtube_dl/extractor/samplefocus.py +++ b/youtube_dl/extractor/samplefocus.py @@ -29,16 +29,17 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) or self._html_search_regex(r'

(.+?)

', webpage, 'title', default=video_id) + title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(r'

(.+?)

', webpage, 'title', default=video_id) mp3_url = self._html_search_regex( - r'', + r']+type="hidden"[^>]+id=(?:["\'])sample_mp3(?:["\'])[^>]+value=(?:["\'])(.+\.mp3\?[0-9]+)(?:["\'])', webpage, 'mp3', fatal=False) or self._html_search_regex( - r'', + r']+itemprop=(?:["\'])contentUrl(?:["\'])[^>]+content=(?:["\'])?(.+\.mp3\?[0-9]+)(?:["\'])?', webpage, 'mp3 url') + # print(mp3_url) tb = self._og_search_thumbnail(webpage) or self._html_search_regex( - r'', + r']+class=(?:["\'])waveform responsive-img[^>]+src=(?:["\'])([^"\']+)', webpage, 'mp3', fatal=False) return { From 942160c0523050d92d8c3b6aaa0d1794821c56f5 Mon Sep 17 00:00:00 2001 From: Isaac-the-Man Date: Thu, 11 Feb 2021 18:05:52 -0500 Subject: [PATCH 5/6] [samplefocus] fixed coding covention --- youtube_dl/extractor/samplefocus.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py index 635fd236134..d9512369b7c 100644 --- a/youtube_dl/extractor/samplefocus.py +++ b/youtube_dl/extractor/samplefocus.py @@ -29,14 +29,14 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(r'

(.+?)

', webpage, 'title', default=video_id) + title = self._og_search_title(webpage, fatal=False) or self._html_search_regex( + r'

(.+?)

', webpage, 'title', default=video_id) mp3_url = self._html_search_regex( - r']+type="hidden"[^>]+id=(?:["\'])sample_mp3(?:["\'])[^>]+value=(?:["\'])(.+\.mp3\?[0-9]+)(?:["\'])', + r']+id=(?:["\'])sample_mp3(?:["\'])[^>]+value=(?:["\'])(.+\?[0-9]+)(?:["\'])[^>]*>', webpage, 'mp3', fatal=False) or self._html_search_regex( - r']+itemprop=(?:["\'])contentUrl(?:["\'])[^>]+content=(?:["\'])?(.+\.mp3\?[0-9]+)(?:["\'])?', + r']+itemprop=(?:["\'])contentUrl(?:["\'])[^>]+content=(?:["\'])(?:["\'])[^>]*?>', webpage, 'mp3 url') - # print(mp3_url) tb = self._og_search_thumbnail(webpage) or self._html_search_regex( r']+class=(?:["\'])waveform responsive-img[^>]+src=(?:["\'])([^"\']+)', @@ -49,5 +49,5 @@ def _real_extract(self, url): 'ext': 'mp3', 'thumbnail': tb, 'description': self._html_search_meta('description', webpage), - 'license': self._html_search_regex(r'(.+?)', webpage, 'license') + 'license': self._html_search_regex(r']+href=(?:["\'])/license(?:["\'])[^>]*>([^<]+)<', webpage, 'license') } From 3f7c377002cf8ac750c34d68c3c64390894ec7c1 Mon Sep 17 00:00:00 2001 From: Isaac-the-Man Date: Sun, 14 Feb 2021 11:22:22 -0500 Subject: [PATCH 6/6] [samplefocus] fixed fallback re --- youtube_dl/extractor/samplefocus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/samplefocus.py b/youtube_dl/extractor/samplefocus.py index d9512369b7c..40ea840ce1b 100644 --- a/youtube_dl/extractor/samplefocus.py +++ b/youtube_dl/extractor/samplefocus.py @@ -33,9 +33,9 @@ def _real_extract(self, url): r'

(.+?)

', webpage, 'title', default=video_id) mp3_url = self._html_search_regex( - r']+id=(?:["\'])sample_mp3(?:["\'])[^>]+value=(?:["\'])(.+\?[0-9]+)(?:["\'])[^>]*>', + r']+id=(?:["\'])sample_mp3(?:["\'])[^>]+value=(?:["\'])(.+?)(?:["\'])[^>]*>', webpage, 'mp3', fatal=False) or self._html_search_regex( - r']+itemprop=(?:["\'])contentUrl(?:["\'])[^>]+content=(?:["\'])(?:["\'])[^>]*?>', + r']+itemprop=(?:["\'])contentUrl(?:["\'])[^>]+content=(?:["\'])?([^>"\']+)(?:["\'])?[^>]*>', webpage, 'mp3 url') tb = self._og_search_thumbnail(webpage) or self._html_search_regex(