diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index c8e85b5005a..fc5e9828e2c 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -82,6 +82,14 @@ 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', ), + ( + 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js', + 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw', + ), + ( + 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js', + 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw', + ), ] @@ -110,10 +118,17 @@ def test_youtube_extract_player_info(self): class TestSignature(unittest.TestCase): def setUp(self): TEST_DIR = os.path.dirname(os.path.abspath(__file__)) - self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') + self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs') if not os.path.exists(self.TESTDATA_DIR): os.mkdir(self.TESTDATA_DIR) + def tearDown(self): + try: + for f in os.listdir(self.TESTDATA_DIR): + os.remove(f) + except OSError: + pass + def t_factory(name, sig_func, url_pattern): def make_tfunc(url, sig_input, expected_sig): @@ -145,12 +160,7 @@ def signature(jscode, sig_input): def n_sig(jscode, sig_input): - # Pending implementation of _extract_n_function_name() or similar in - # youtube.py, hard-code here - # funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) - import re - funcname = re.search(r'[=(,&|](\w+)\(\w+\),\w+\.set\("n",', jscode) - funcname = funcname and funcname.group(1) + funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) return JSInterpreter(jscode).call_function(funcname, sig_input) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 63918924df8..7943b94f9d5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -28,6 +28,7 @@ dict_get, float_or_none, int_or_none, + js_to_json, mimetype2ext, parse_codecs, parse_duration, @@ -1391,9 +1392,16 @@ def _extract_player_url(self, webpage): # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116 # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377 def _extract_n_function_name(self, jscode): - return self._search_regex( - (r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',), - jscode, 'Initial JS player n function name', group='nfunc') + target = r'(?P[a-zA-Z0-9$]{3})(?:\[(?P\d+)\])?' + nfunc_and_idx = self._search_regex( + r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ), + jscode, 'Initial JS player n function name') + nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx') + if not idx: + return nfunc + return self._parse_json(self._search_regex( + r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode, + 'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)] def _extract_n_function(self, video_id, player_url): player_id = self._extract_player_info(player_url)