From 978b1f0c713dce88f7c7cca5717841536052e2e5 Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:21:51 -0400 Subject: [PATCH] fix(yt-dl): validate audio url and add retry logic --- Contents/Code/default_prefs.py | 3 +- Contents/Code/plex_api_helper.py | 2 +- Contents/Code/youtube_dl_helper.py | 128 +++++++++++++----------- Contents/DefaultPrefs.json | 7 ++ Contents/Strings/en.json | 3 +- docs/source/about/usage.rst | 18 +++- tests/conftest.py | 2 +- tests/unit/test_migration_helper.py | 4 +- tests/unit/test_youtube_dl_helper.py | 141 ++++++++++++++++++++++++--- 9 files changed, 228 insertions(+), 80 deletions(-) diff --git a/Contents/Code/default_prefs.py b/Contents/Code/default_prefs.py index abc561ac..02e1dd30 100644 --- a/Contents/Code/default_prefs.py +++ b/Contents/Code/default_prefs.py @@ -17,8 +17,9 @@ int_update_themes_interval='60', int_update_database_cache_interval='60', int_plexapi_plexapi_timeout='180', - int_plexapi_upload_retries_max='3', + int_plexapi_upload_retries_max='6', int_plexapi_upload_threads='3', + int_youtube_retries_max='8', str_youtube_cookies='', enum_webapp_locale='en', str_webapp_http_host='0.0.0.0', diff --git a/Contents/Code/plex_api_helper.py b/Contents/Code/plex_api_helper.py index 640a6399..106cfbb5 100644 --- a/Contents/Code/plex_api_helper.py +++ b/Contents/Code/plex_api_helper.py @@ -406,7 +406,7 @@ def upload_media(item, method, filepath=None, url=None): ... """ count = 0 - while count <= int(Prefs['int_plexapi_upload_retries_max']): + while count <= max(0, int(Prefs['int_plexapi_upload_retries_max'])): try: if filepath: if method == item.uploadTheme: diff --git a/Contents/Code/youtube_dl_helper.py b/Contents/Code/youtube_dl_helper.py index 34cc9cf0..ffca65d0 100644 --- a/Contents/Code/youtube_dl_helper.py +++ b/Contents/Code/youtube_dl_helper.py @@ -5,6 +5,7 @@ import json import os import tempfile +import time # plex debugging try: @@ -16,6 +17,7 @@ from plexhints.prefs_kit import Prefs # prefs kit # imports from Libraries\Shared +import requests from typing import Optional import youtube_dl @@ -26,7 +28,7 @@ plugin_logger = logging.getLogger(plugin_identifier) -def nsbool(value): +def ns_bool(value): # type: (bool) -> str """ Format a boolean value for a Netscape cookie jar file. @@ -69,7 +71,7 @@ def process_youtube(url): cookie_jar_file.write('# Netscape HTTP Cookie File\n') youtube_dl_params = dict( - cookiefile=cookie_jar_file.name, + cookiefile=cookie_jar_file.name if Prefs['str_youtube_cookies'] else None, logger=plugin_logger, socket_timeout=10, youtube_include_dash_manifest=False, @@ -83,9 +85,9 @@ def process_youtube(url): expiry = int(cookie.get('expiry', 0)) values = [ cookie['domain'], - nsbool(include_subdomain), + ns_bool(include_subdomain), cookie['path'], - nsbool(cookie['secure']), + ns_bool(cookie['secure']), str(expiry), cookie['name'], cookie['value'] @@ -100,65 +102,73 @@ def process_youtube(url): try: ydl = youtube_dl.YoutubeDL(params=youtube_dl_params) - with ydl: - try: - result = ydl.extract_info( - url=url, - download=False # We just want to extract the info - ) - except Exception as exc: - if isinstance(exc, youtube_dl.utils.ExtractorError) and exc.expected: - Log.Info('YDL returned YT error while downloading {}: {}'.format(url, exc)) - else: - Log.Exception('YDL returned an unexpected error while downloading {}: {}'.format(url, exc)) - return None - - if 'entries' in result: - # Can be a playlist or a list of videos - video_data = result['entries'][0] - else: - # Just a video - video_data = result - - selected = { - 'opus': { - 'size': 0, - 'audio_url': None - }, - 'mp4a': { - 'size': 0, - 'audio_url': None - }, - } - if video_data: - for fmt in video_data['formats']: # loop through formats, select largest audio size for better quality - if 'audio only' in fmt['format']: - if 'opus' == fmt['acodec']: - temp_codec = 'opus' - elif 'mp4a' == fmt['acodec'].split('.')[0]: - temp_codec = 'mp4a' - else: - Log.Debug('Unknown codec: %s' % fmt['acodec']) - continue # unknown codec - filesize = int(fmt['filesize']) - if filesize > selected[temp_codec]['size']: - selected[temp_codec]['size'] = filesize - selected[temp_codec]['audio_url'] = fmt['url'] - audio_url = None - if 0 < selected['opus']['size'] > selected['mp4a']['size']: - audio_url = selected['opus']['audio_url'] - elif 0 < selected['mp4a']['size'] > selected['opus']['size']: - audio_url = selected['mp4a']['audio_url'] - - if audio_url and Prefs['bool_prefer_mp4a_codec']: # mp4a codec is preferred - if selected['mp4a']['audio_url']: # mp4a codec is available - audio_url = selected['mp4a']['audio_url'] - elif selected['opus']['audio_url']: # fallback to opus :( + count = 0 + while count <= max(0, int(Prefs['int_youtube_retries_max'])): + sleep_time = 2 ** count + time.sleep(sleep_time) + with ydl: + try: + result = ydl.extract_info( + url=url, + download=False # We just want to extract the info + ) + except Exception as exc: + if isinstance(exc, youtube_dl.utils.ExtractorError) and exc.expected: + Log.Info('YDL returned YT error while downloading {}: {}'.format(url, exc)) + else: + Log.Exception('YDL returned an unexpected error while downloading {}: {}'.format(url, exc)) + count += 1 + continue + + # If a playlist was provided, select the first video + video_data = result['entries'][0] if 'entries' in result else result + + selected = { + 'opus': { + 'size': 0, + 'audio_url': None + }, + 'mp4a': { + 'size': 0, + 'audio_url': None + }, + } + if video_data: + for fmt in video_data['formats']: # loop through formats, select largest audio size for better quality + if 'audio only' in fmt['format']: + if 'opus' == fmt['acodec']: + temp_codec = 'opus' + elif 'mp4a' == fmt['acodec'].split('.')[0]: + temp_codec = 'mp4a' + else: + Log.Debug('Unknown codec: %s' % fmt['acodec']) + continue # unknown codec + filesize = int(fmt['filesize']) + if filesize > selected[temp_codec]['size']: + selected[temp_codec]['size'] = filesize + selected[temp_codec]['audio_url'] = fmt['url'] + + if 0 < selected['opus']['size'] > selected['mp4a']['size']: audio_url = selected['opus']['audio_url'] + elif 0 < selected['mp4a']['size'] > selected['opus']['size']: + audio_url = selected['mp4a']['audio_url'] + + if audio_url and Prefs['bool_prefer_mp4a_codec']: # mp4a codec is preferred + if selected['mp4a']['audio_url']: # mp4a codec is available + audio_url = selected['mp4a']['audio_url'] + elif selected['opus']['audio_url']: # fallback to opus :( + audio_url = selected['opus']['audio_url'] + + if audio_url: + validate = requests.get(url=audio_url, stream=True) + if validate.status_code != 200: + Log.Warn('Failed to validate audio URL for video {}'.format(url)) + count += 1 + continue - return audio_url # return None or url found + return audio_url # return None or url found finally: try: os.remove(cookie_jar_file.name) diff --git a/Contents/DefaultPrefs.json b/Contents/DefaultPrefs.json index 91203691..743c188b 100644 --- a/Contents/DefaultPrefs.json +++ b/Contents/DefaultPrefs.json @@ -125,6 +125,13 @@ "default": "3", "secure": "false" }, + { + "id": "int_youtube_retries_max", + "type": "text", + "label": "int_youtube_retries_max", + "default": "8", + "secure": "false" + }, { "id": "str_youtube_cookies", "type": "text", diff --git a/Contents/Strings/en.json b/Contents/Strings/en.json index 748a0d62..f68f58d2 100644 --- a/Contents/Strings/en.json +++ b/Contents/Strings/en.json @@ -15,8 +15,9 @@ "int_update_themes_interval": "Interval for automatic update task, in minutes (min: 15)", "int_update_database_cache_interval": "Interval for database cache update task, in minutes (min: 15)", "int_plexapi_plexapi_timeout": "PlexAPI Timeout, in seconds (min: 1)", - "int_plexapi_upload_retries_max": "Max Retries, integer (min: 0)", + "int_plexapi_upload_retries_max": "Max Retries (PlexAPI uploads), integer (min: 0)", "int_plexapi_upload_threads": "Multiprocessing Threads, integer (min: 1)", + "int_youtube_retries_max": "Max Retries (YouTube), integer (min: 0)", "str_youtube_cookies": "YouTube Cookies (JSON format)", "enum_webapp_locale": "Web UI Locale", "str_webapp_http_host": "Web UI Host Address (requires Plex Media Server restart)", diff --git a/docs/source/about/usage.rst b/docs/source/about/usage.rst index 54e756ce..848ee949 100644 --- a/docs/source/about/usage.rst +++ b/docs/source/about/usage.rst @@ -228,8 +228,8 @@ Default Minimum ``1`` -Max Retries -^^^^^^^^^^^ +Max Retries (PlexAPI uploads) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Description The number of times to retry uploading theme audio to the Plex server. The time between retries will increase @@ -242,6 +242,20 @@ Default Minimum ``0`` +Max Retries (YouTube) +^^^^^^^^^^^^^^^^^^^^^ + +Description + The number of times to retry getting an audio url from YouTube. The time between retries will increase + exponentially. The time between is calculated as ``2 ^ retry_number``. For example, the first retry will occur + after 2 seconds, the second retry will occur after 4 seconds, and the third retry will occur after 8 seconds. + +Default + ``8`` + +Minimum + ``0`` + Multiprocessing Threads ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/conftest.py b/tests/conftest.py index 6bd28a15..d5c256ec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -59,7 +59,7 @@ def wait_for_themes(section): timer = 0 with_themes = 0 total = len(section.all()) - while timer < 180 and with_themes < total: + while timer < 600 and with_themes < total: with_themes = 0 try: for item in section.all(): diff --git a/tests/unit/test_migration_helper.py b/tests/unit/test_migration_helper.py index 70cbaef1..b3e2fef4 100644 --- a/tests/unit/test_migration_helper.py +++ b/tests/unit/test_migration_helper.py @@ -54,7 +54,7 @@ def test_validate_migration_key(migration_helper_fixture, key, raise_exception, @pytest.mark.parametrize('key, expected', [ (migration_helper_object.LOCKED_THEMES, None), (migration_helper_object.LOCKED_COLLECTION_FIELDS, None), - pytest.param('invalid', None, marks=pytest.mark.xfail(raises=AttributeError)), + pytest.param('invalid', None, marks=pytest.mark.xfail(raises=AttributeError, reason="Cannot migrate in CI")), ]) def test_get_migration_status(migration_helper_fixture, migration_status_file, key, expected): migration_status = migration_helper_fixture.get_migration_status(key=key) @@ -64,7 +64,7 @@ def test_get_migration_status(migration_helper_fixture, migration_status_file, k @pytest.mark.parametrize('key', [ migration_helper_object.LOCKED_THEMES, migration_helper_object.LOCKED_COLLECTION_FIELDS, - pytest.param('invalid', marks=pytest.mark.xfail(raises=AttributeError)), + pytest.param('invalid', marks=pytest.mark.xfail(raises=AttributeError, reason="Cannot migrate in CI")), ]) def test_set_migration_status(migration_helper_fixture, migration_status_file, key): # perform the test twice, to load an existing migration file diff --git a/tests/unit/test_youtube_dl_helper.py b/tests/unit/test_youtube_dl_helper.py index 4a522d92..d9d71686 100644 --- a/tests/unit/test_youtube_dl_helper.py +++ b/tests/unit/test_youtube_dl_helper.py @@ -1,28 +1,143 @@ # -*- coding: utf-8 -*- +# standard imports +import binascii +import os +import tempfile +from urlparse import urlparse + # lib imports import pytest +import requests # local imports from Code import youtube_dl_helper +gh_url_prefix = 'https://raw.githubusercontent.com/LizardByte/ThemerrDB/gh-pages' + +xfail_rate_limit = "YouTube is probably rate limiting" + -@pytest.mark.parametrize('url', [ - 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', - 'https://www.youtube.com/watch?v=Wb8j8Ojd4YQ&list=PLMYr5_xSeuXAbhxYHz86hA1eCDugoxXY0&pp=iAQB', # playlist test +# standard items setup by plexhints action +@pytest.fixture(scope='module', params=[ + pytest.param({ + 'url': 'https://www.youtube.com/watch?v=dQw4w9WgXcQ', + 'expected': True, + }), + pytest.param({ # playlist + 'url': 'https://www.youtube.com/watch?v=Wb8j8Ojd4YQ&list=PLMYr5_xSeuXAbhxYHz86hA1eCDugoxXY0&pp=iAQB', + 'expected': True, + }), + pytest.param({ + 'url': '{}/movies/themoviedb/9761.json'.format(gh_url_prefix), # Elephants Dream + 'expected': True, + }, marks=pytest.mark.xfail(reason=xfail_rate_limit)), + pytest.param({ + 'url': '{}/movies/themoviedb/20529.json'.format(gh_url_prefix), # Sita Sings the Blues + 'expected': True, + }, marks=pytest.mark.xfail(reason=xfail_rate_limit)), + pytest.param({ + 'url': '{}/movies/themoviedb/10378.json'.format(gh_url_prefix), # Big Buck Bunny + 'expected': True, + }, marks=pytest.mark.xfail(reason=xfail_rate_limit)), + pytest.param({ + 'url': '{}/movies/themoviedb/45745.json'.format(gh_url_prefix), # Sintel + 'expected': True, + }, marks=pytest.mark.xfail(reason=xfail_rate_limit)), + pytest.param({ + 'url': '{}/tv_shows/themoviedb/1399.json'.format(gh_url_prefix), # Game of Thrones + 'expected': True, + }, marks=pytest.mark.xfail(reason=xfail_rate_limit)), + pytest.param({ + 'url': '{}/tv_shows/themoviedb/48866.json'.format(gh_url_prefix), # The 100 + 'expected': True, + }, marks=pytest.mark.xfail(reason=xfail_rate_limit)), + pytest.param({ + 'url': 'https://www.youtube.com/watch?v=notavideoid', # invalid A + 'expected': False, + }), + pytest.param({ + 'url': 'https://blahblahblah', # invalid B + 'expected': False, + }), ]) -def test_process_youtube(url): +def youtube_url(request): + url = request.param['url'] + host = urlparse(url=url).hostname + if host and not host.endswith(".githubusercontent.com"): + return request.param + + # get the youtube_theme_url key from the JSON data + response = requests.get(url=url) + assert response.status_code == 200 + data = response.json() + request.param['url'] = data['youtube_theme_url'] + return request.param + + +@pytest.fixture +def temp_file(): + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + yield temp_file.name + os.remove(temp_file.name) + + +def test_process_youtube(youtube_url, temp_file): # test valid urls - audio_url = youtube_dl_helper.process_youtube(url=url) + audio_url = youtube_dl_helper.process_youtube(url=youtube_url['url']) + if not youtube_url['expected']: + assert audio_url is None + return + assert audio_url is not None assert audio_url.startswith('https://') + # valid signatures dictionary with offsets + valid_signatures = { + "1866747970": 3, # Mp4 container + "6674797069736F6D": 4, # Mp4 container + "494433": 0, # ID3 + "FFFB": 0, # MPEG-1 Layer 3 + "FFF3": 0, # MPEG-1 Layer 3 + "FFF2": 0, # MPEG-1 Layer 3 + } -@pytest.mark.parametrize('url', [ - 'https://www.youtube.com/watch?v=notavideoid', - 'https://blahblahblah', -]) -def test_process_youtube_invalid(url): - # test invalid urls - audio_url = youtube_dl_helper.process_youtube(url=url) - assert audio_url is None + chunk_size = 0 + for sig, offset in valid_signatures.items(): + chunk_size = max(chunk_size, len(sig) + offset) # get the max chunk size + + # validate the format of the audio file at the URL + # download the file + with requests.get(audio_url, stream=True) as r: + r.raise_for_status() + with open(temp_file, 'wb') as f: + for chunk in r.iter_content(chunk_size=chunk_size): + f.write(chunk) + break # only download the first chunk for unit testing + + # read the file bytes + with open(temp_file, 'rb') as f: + file_bytes = f.read() + + # convert bytes to hex + file_bytes_hex = binascii.hexlify(file_bytes) + print(file_bytes_hex[:30]) + + # check if the file is not WebM + is_webm = file_bytes_hex.startswith("1A45DFA3") + assert not is_webm, "File {} is WebM".format(temp_file) + + # check if the file is valid + is_valid = False + + # loop through valid_signatures + for signature, offset in valid_signatures.items(): + # remove the offset bytes + file_bytes_hex_without_offset = file_bytes_hex[offset * 2:] + + # check if the beginning of the file_bytes_hex_without_offset matches the signature + if file_bytes_hex_without_offset.startswith(signature): + is_valid = True + break + + assert is_valid, "File {} is not a valid format".format(temp_file)