From 82e4eca711a128138ed0b84ddb4321e403d56340 Mon Sep 17 00:00:00 2001 From: Xiyue <113869642+xiyue077@users.noreply.github.com> Date: Tue, 11 Oct 2022 09:52:48 +1100 Subject: [PATCH 1/7] [motherless] Fixed the broken uploader_id in the extractor (#31243) * Fixed the broken uploader_id in the extractor. * Make uploader_id RE looser * Fix uploader_id in test Motherless_3 * Fix group pagination * # coding: utf-8 Co-authored-by: Andy Xuming Co-authored-by: dirkf --- youtube_dl/extractor/motherless.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index ef1e081f20e..35d2b46ed31 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import datetime @@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor): 'title': 'a/ Hot Teens', 'categories': list, 'upload_date': '20210104', - 'uploader_id': 'yonbiw', + 'uploader_id': 'anonymous', 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, }, @@ -127,7 +128,7 @@ def _real_extract(self, url): comment_count = webpage.count('class="media-comment-contents"') uploader_id = self._html_search_regex( - r'"thumb-member-username">\s+]+>\s*]+\bhref\s*=\s*['"]/m/([^"']+)''', webpage, 'uploader_id') categories = self._html_search_meta('keywords', webpage, default=None) @@ -169,7 +170,7 @@ class MotherlessGroupIE(InfoExtractor): 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of ' 'any kind!' }, - 'playlist_mincount': 9, + 'playlist_mincount': 0, }] @classmethod @@ -208,9 +209,9 @@ def _real_extract(self, url): r'([\w\s]+\w)\s+-', webpage, 'title', fatal=False) description = self._html_search_meta( 'description', webpage, fatal=False) - page_count = self._int(self._search_regex( - r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT', - webpage, 'page_count'), 'page_count') + page_count = str_to_int(self._search_regex( + r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b', + webpage, 'page_count', default='1')) PAGE_SIZE = 80 def _get_page(idx): From 2ced5a79128f53faad94dc494d05925eb957c414 Mon Sep 17 00:00:00 2001 From: dirkf <fieldhouse@gmx.net> Date: Tue, 9 Aug 2022 19:34:34 +0100 Subject: [PATCH 2/7] [test] Implement string "lambda x: condition(x)" as an expected value Semantics equivalent to `assert condition(got)` --- test/helper.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index e62aab11e77..c6a2f06670d 100644 --- a/test/helper.py +++ b/test/helper.py @@ -128,6 +128,12 @@ def expect_value(self, got, expected, field): self.assertTrue( contains_str in got, 'field %s (value: %r) should contain %r' % (field, got, contains_str)) + elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected): + fn = eval(expected) + suite = expected.split(':', 1)[1].strip() + self.assertTrue( + fn(got), + 'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got)) elif isinstance(expected, type): self.assertTrue( isinstance(got, expected), @@ -137,7 +143,7 @@ def expect_value(self, got, expected, field): elif isinstance(expected, list) and isinstance(got, list): self.assertEqual( len(expected), len(got), - 'Expect a list of length %d, but got a list of length %d for field %s' % ( + 'Expected a list of length %d, but got a list of length %d for field %s' % ( len(expected), len(got), field)) for index, (item_got, item_expected) in enumerate(zip(got, expected)): type_got = type(item_got) From c282e5f8d723763ba88c521221e4535f46453949 Mon Sep 17 00:00:00 2001 From: dirkf <fieldhouse@gmx.net> Date: Tue, 9 Aug 2022 19:37:58 +0100 Subject: [PATCH 3/7] [ZDF] Overhaul ZDF extractors * pull some yt-dlp changes into ZDFBaseIE._extract_format() * add test cases from yt-dlp to ZDFIE * fix crash in ZDFIE._extract_mobile() when object had no `formitaeten` * improve title extraction in ZDFChannelIE (remove trailing station ident) * avoid extracting non-video playlist items (fixes #31149) --- youtube_dl/extractor/zdf.py | 175 ++++++++++++++++++++---------------- 1 file changed, 100 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 3d39bb33aec..fcc63ef52ca 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -8,13 +8,14 @@ from ..utils import ( determine_ext, ExtractorError, + extract_attributes, float_or_none, int_or_none, merge_dicts, NO_DEFAULT, - orderedSet, parse_codecs, qualities, + str_or_none, try_get, unified_timestamp, update_url_query, @@ -57,28 +58,39 @@ def _extract_format(self, video_id, formats, format_urls, meta): format_urls.add(format_url) mime_type = meta.get('mimeType') ext = determine_ext(format_url) + + join_nonempty = lambda s, l: s.join(filter(None, l)) + meta_map = lambda t: map(lambda x: str_or_none(meta.get(x)), t) + if mime_type == 'application/x-mpegURL' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + new_formats = self._extract_m3u8_formats( format_url, video_id, 'mp4', m3u8_id='hls', - entry_protocol='m3u8_native', fatal=False)) + entry_protocol='m3u8_native', fatal=False) elif mime_type == 'application/f4m+xml' or ext == 'f4m': - formats.extend(self._extract_f4m_formats( - update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)) + new_formats = self._extract_f4m_formats( + update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False) else: f = parse_codecs(meta.get('mimeCodec')) + if not f: + data = meta.get('type', '').split('_') + if try_get(data, lambda x: x[2]) == ext: + f = dict(zip(('vcodec', 'acodec'), data[1])) + format_id = ['http'] - for p in (meta.get('type'), meta.get('quality')): - if p and isinstance(p, compat_str): - format_id.append(p) + format_id.extend(join_nonempty('-', meta_map(('type', 'quality')))) f.update({ 'url': format_url, 'format_id': '-'.join(format_id), - 'format_note': meta.get('quality'), - 'language': meta.get('language'), - 'quality': qualities(self._QUALITIES)(meta.get('quality')), - 'preference': -10, + 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)) }) - formats.append(f) + new_formats = [f] + + formats.extend(merge_dicts(f, { + 'format_note': join_nonempty(',', meta_map(('quality', 'class'))), + 'language': meta.get('language'), + 'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1, + 'quality': qualities(self._QUALITIES)(meta.get('quality')), + }) for f in new_formats) def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer): ptmd = self._call_api( @@ -107,6 +119,7 @@ def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer): 'type': f.get('type'), 'mimeType': f.get('mimeType'), 'quality': quality.get('quality'), + 'class': track.get('class'), 'language': track.get('language'), }) self._sort_formats(formats) @@ -171,6 +184,20 @@ class ZDFIE(ZDFBaseIE): 'duration': 2615, 'timestamp': 1465021200, 'upload_date': '20160604', + 'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806', + }, + }, { + 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', + 'md5': '1b93bdec7d02fc0b703c5e7687461628', + 'info_dict': { + 'ext': 'mp4', + 'id': 'video_funk_1770473', + 'duration': 1278, + 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.', + 'title': 'Alles ist verzaubert', + 'timestamp': 1635520560, + 'upload_date': '20211029', + 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799', }, }, { # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche @@ -204,6 +231,19 @@ class ZDFIE(ZDFBaseIE): 'timestamp': 1641355200, 'upload_date': '20220105', }, + 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"' + }, { + 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html', + 'info_dict': { + 'id': '191205_1800_sendung_sok8', + 'ext': 'mp4', + 'title': 'Das Geld anderer Leute', + 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d', + 'duration': 2581.0, + 'timestamp': 1654790700, + 'upload_date': '20220609', + 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350', + }, }] def _extract_entry(self, url, player, content, video_id): @@ -265,15 +305,16 @@ def _extract_mobile(self, video_id): 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, video_id) - document = video['document'] - - title = document['titel'] - content_id = document['basename'] - formats = [] - format_urls = set() - for f in document['formitaeten']: - self._extract_format(content_id, formats, format_urls, f) + formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) + document = formitaeten and video['document'] + if formitaeten: + title = document['titel'] + content_id = document['basename'] + + format_urls = set() + for f in formitaeten or []: + self._extract_format(content_id, formats, format_urls, f) self._sort_formats(formats) thumbnails = [] @@ -320,9 +361,9 @@ class ZDFChannelIE(ZDFBaseIE): 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', 'info_dict': { 'id': 'das-aktuelle-sportstudio', - 'title': 'das aktuelle sportstudio | ZDF', + 'title': 'das aktuelle sportstudio', }, - 'playlist_mincount': 23, + 'playlist_mincount': 18, }, { 'url': 'https://www.zdf.de/dokumentation/planet-e', 'info_dict': { @@ -330,6 +371,14 @@ class ZDFChannelIE(ZDFBaseIE): 'title': 'planet e.', }, 'playlist_mincount': 50, + }, { + 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest', + 'info_dict': { + 'id': 'aktenzeichen-xy-ungeloest', + 'title': 'Aktenzeichen XY... ungelöst', + 'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)", + }, + 'playlist_mincount': 2, }, { 'url': 'https://www.zdf.de/filme/taunuskrimi/', 'only_matching': True, @@ -339,60 +388,36 @@ class ZDFChannelIE(ZDFBaseIE): def suitable(cls, url): return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) + def _og_search_title(self, webpage, fatal=False): + title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal) + return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None + def _real_extract(self, url): channel_id = self._match_id(url) webpage = self._download_webpage(url, channel_id) - entries = [ - self.url_result(item_url, ie=ZDFIE.ie_key()) - for item_url in orderedSet(re.findall( - r'data-plusbar-url=["\'](http.+?\.html)', webpage))] - - return self.playlist_result( - entries, channel_id, self._og_search_title(webpage, fatal=False)) - - r""" - player = self._extract_player(webpage, channel_id) - - channel_id = self._search_regex( - r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage, - 'channel id', group='id') - - channel = self._call_api( - 'https://api.zdf.de/content/documents/%s.json' % channel_id, - player, url, channel_id) - - items = [] - for module in channel['module']: - for teaser in try_get(module, lambda x: x['teaser'], list) or []: - t = try_get( - teaser, lambda x: x['http://zdf.de/rels/target'], dict) - if not t: - continue - items.extend(try_get( - t, - lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'], - list) or []) - items.extend(try_get( - module, - lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'], - list) or []) - - entries = [] - entry_urls = set() - for item in items: - t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict) - if not t: - continue - sharing_url = t.get('http://zdf.de/rels/sharing-url') - if not sharing_url or not isinstance(sharing_url, compat_str): - continue - if sharing_url in entry_urls: - continue - entry_urls.add(sharing_url) - entries.append(self.url_result( - sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id'))) - - return self.playlist_result(entries, channel_id, channel.get('title')) - """ + matches = re.finditer( + r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL, + webpage) + + if self._downloader.params.get('noplaylist', False): + entry = next( + (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches), + None) + self.to_screen('Downloading just the main video because of --no-playlist') + if entry: + return entry + else: + self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, )) + + def check_video(m): + v_ref = self._search_regex( + r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ), + webpage, 'check id', default='') + v_ref = extract_attributes(v_ref) + return v_ref.get('data-target-video-type') != 'novideo' + + return self.playlist_from_matches( + (m.group('url') for m in matches if check_video(m)), + channel_id, self._og_search_title(webpage, fatal=False)) From 6e2626f092c63a5fa22a31df409610b5deaf3968 Mon Sep 17 00:00:00 2001 From: dirkf <fieldhouse@gmx.net> Date: Tue, 11 Oct 2022 05:58:10 +0100 Subject: [PATCH 4/7] [JSInterp] Improve separation logic Based on https://github.com/yt-dlp/yt-dlp/commit/0468a3b3253957bfbeb98b4a7c71542ff80e9e06 --- youtube_dl/jsinterp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 99dd9843549..530a705b4b9 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -214,7 +214,7 @@ class Exception(ExtractorError): def __init__(self, msg, *args, **kwargs): expr = kwargs.pop('expr', None) if expr is not None: - msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100]) + msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr) super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs) @classmethod @@ -268,7 +268,7 @@ def _separate(cls, expr, delim=',', max_split=None, skip_delims=None): elif in_quote == '/' and char in '[]': in_regex_char_group = char == '[' escaping = not escaping and in_quote and char == '\\' - after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op)) + after_op = not in_quote and (char in cls.OP_CHARS or char == '[' or (char.isspace() and after_op)) if char != delim[pos] or any(counters.values()) or in_quote: pos = skipping = 0 @@ -301,7 +301,7 @@ def _separate_at_paren(cls, expr, delim=None): separated = list(cls._separate(expr, delim, 1)) if len(separated) < 2: - raise cls.Exception('No terminating paren {delim} in {expr}'.format(**locals())) + raise cls.Exception('No terminating paren {delim} in {expr:.100}'.format(**locals())) return separated[0][1:].strip(), separated[1].strip() @staticmethod From c94a459a248352fd97dccc79ed6604a558459bfd Mon Sep 17 00:00:00 2001 From: dirkf <fieldhouse@gmx.net> Date: Tue, 11 Oct 2022 12:18:12 +0000 Subject: [PATCH 5/7] [utils] Sanitize look-alike Unicode glyphs in non-ID filename fields when --restrict-filenames Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test. --- youtube_dl/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fea38ed32e9..23a65a81c2a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -33,6 +33,7 @@ import tempfile import time import traceback +import unicodedata import xml.etree.ElementTree import zlib @@ -2118,6 +2119,9 @@ def replace_insane(char): return '_' return char + # Replace look-alike Unicode glyphs + if restricted and not is_id: + s = unicodedata.normalize('NFKC', s) # Handle timestamps s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) result = ''.join(map(replace_insane, s)) From 11b284c81fe2988813c817918536fc3a5630870a Mon Sep 17 00:00:00 2001 From: dirkf <fieldhouse@gmx.net> Date: Tue, 11 Oct 2022 12:36:44 +0000 Subject: [PATCH 6/7] [Common:JWPlayer] Fix x1000 scaling error See https://github.com/yt-dlp/yt-dlp/issues/5106#issuecomment-1264625161 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 797c35fd52d..1f33a1e06da 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2844,7 +2844,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, 'url': source_url, 'width': int_or_none(source.get('width')), 'height': height, - 'tbr': int_or_none(source.get('bitrate')), + 'tbr': int_or_none(source.get('bitrate'), scale=1000), 'ext': ext, } if source_url.startswith('rtmp'): From c91cbf60729af93c4677864aa6c8b74b576146ca Mon Sep 17 00:00:00 2001 From: Xie Yanbo <xieyanbo@gmail.com> Date: Tue, 11 Oct 2022 20:55:09 +0800 Subject: [PATCH 7/7] [netease] Get netease music download url through player api (#31235) * remove unplayable song from test * compatible with python 2 * using standard User_Agent, fix imports * use hash instead of long description * fix lint * fix hash --- test/test_aes.py | 9 +- youtube_dl/aes.py | 37 +++++++- youtube_dl/extractor/neteasemusic.py | 123 +++++++++++++++++++-------- 3 files changed, 129 insertions(+), 40 deletions(-) diff --git a/test/test_aes.py b/test/test_aes.py index cc89fb6ab27..0f181466bcf 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -8,7 +8,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text +from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes import base64 @@ -58,6 +58,13 @@ def test_decrypt_text(self): decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) + def test_ecb_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) + self.assertEqual( + encrypted, + b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index d0de2d93f39..a94a410798b 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -8,6 +8,18 @@ BLOCK_SIZE_BYTES = 16 +def pkcs7_padding(data): + """ + PKCS#7 padding + + @param {int[]} data cleartext + @returns {int[]} padding data + """ + + remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES + return data + [remaining_length] * remaining_length + + def aes_ctr_decrypt(data, key, counter): """ Decrypt with aes in counter mode @@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv): previous_cipher_block = iv for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - remaining_length = BLOCK_SIZE_BYTES - len(block) - block += [remaining_length] * remaining_length + block = pkcs7_padding(block) mixed_block = xor(block, previous_cipher_block) encrypted_block = aes_encrypt(mixed_block, expanded_key) @@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv): return encrypted_data +def aes_ecb_encrypt(data, key): + """ + Encrypt with aes in ECB mode. Using PKCS#7 padding + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block = pkcs7_padding(block) + + encrypted_block = aes_encrypt(block, expanded_key) + encrypted_data += encrypted_block + + return encrypted_data + + def key_expansion(data): """ Generate key schedule diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py index 978a05841ce..fad22a2cd0b 100644 --- a/youtube_dl/extractor/neteasemusic.py +++ b/youtube_dl/extractor/neteasemusic.py @@ -1,20 +1,31 @@ # coding: utf-8 from __future__ import unicode_literals -from hashlib import md5 from base64 import b64encode +from binascii import hexlify from datetime import datetime +from hashlib import md5 +from random import randint +import json import re +import time from .common import InfoExtractor +from ..aes import aes_ecb_encrypt, pkcs7_padding from ..compat import ( compat_urllib_parse_urlencode, compat_str, compat_itertools_count, ) from ..utils import ( - sanitized_Request, + ExtractorError, + bytes_to_intlist, float_or_none, + int_or_none, + intlist_to_bytes, + sanitized_Request, + std_headers, + try_get, ) @@ -35,32 +46,85 @@ def _encrypt(cls, dfsid): result = b64encode(m.digest()).decode('ascii') return result.replace('/', '_').replace('+', '-') + @classmethod + def make_player_api_request_data_and_headers(cls, song_id, bitrate): + KEY = b'e82ckenh8dichen8' + URL = '/api/song/enhance/player/url' + now = int(time.time() * 1000) + rand = randint(0, 1000) + cookie = { + 'osver': None, + 'deviceId': None, + 'appver': '8.0.0', + 'versioncode': '140', + 'mobilename': None, + 'buildver': '1623435496', + 'resolution': '1920x1080', + '__csrf': '', + 'os': 'pc', + 'channel': None, + 'requestId': '{0}_{1:04}'.format(now, rand), + } + request_text = json.dumps( + {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie}, + separators=(',', ':')) + message = 'nobody{0}use{1}md5forencrypt'.format( + URL, request_text).encode('latin1') + msg_digest = md5(message).hexdigest() + + data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format( + URL, request_text, msg_digest) + data = pkcs7_padding(bytes_to_intlist(data)) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY))) + encrypted_params = hexlify(encrypted).decode('ascii').upper() + + cookie = '; '.join( + ['{0}={1}'.format(k, v if v is not None else 'undefined') + for [k, v] in cookie.items()]) + + headers = { + 'User-Agent': std_headers['User-Agent'], + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': 'https://music.163.com', + 'Cookie': cookie, + } + return ('params={0}'.format(encrypted_params), headers) + + def _call_player_api(self, song_id, bitrate): + url = 'https://interface3.music.163.com/eapi/song/enhance/player/url' + data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate) + try: + return self._download_json( + url, song_id, data=data.encode('ascii'), headers=headers) + except ExtractorError as e: + if type(e.cause) in (ValueError, TypeError): + # JSON load failure + raise + except Exception: + pass + return {} + def extract_formats(self, info): formats = [] + song_id = info['id'] for song_format in self._FORMATS: details = info.get(song_format) if not details: continue - song_file_path = '/%s/%s.%s' % ( - self._encrypt(details['dfsId']), details['dfsId'], details['extension']) - - # 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature - # from NetEase's CDN provider that can be used if m5.music.126.net does not - # work, especially for users outside of Mainland China - # via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880 - for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net', - 'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'): - song_url = host + song_file_path + + bitrate = int_or_none(details.get('bitrate')) or 999000 + data = self._call_player_api(song_id, bitrate) + for song in try_get(data, lambda x: x['data'], list) or []: + song_url = try_get(song, lambda x: x['url']) if self._is_valid_url(song_url, info['id'], 'song'): formats.append({ 'url': song_url, 'ext': details.get('extension'), - 'abr': float_or_none(details.get('bitrate'), scale=1000), + 'abr': float_or_none(song.get('br'), scale=1000), 'format_id': song_format, - 'filesize': details.get('size'), - 'asr': details.get('sr') + 'filesize': int_or_none(song.get('size')), + 'asr': int_or_none(details.get('sr')), }) - break return formats @classmethod @@ -79,30 +143,16 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://music.163.com/#/song?id=32102397', - 'md5': 'f2e97280e6345c74ba9d5677dd5dcb45', + 'md5': '3e909614ce09b1ccef4a3eb205441190', 'info_dict': { 'id': '32102397', 'ext': 'mp3', - 'title': 'Bad Blood (feat. Kendrick Lamar)', + 'title': 'Bad Blood', 'creator': 'Taylor Swift / Kendrick Lamar', - 'upload_date': '20150517', - 'timestamp': 1431878400, - 'description': 'md5:a10a54589c2860300d02e1de821eb2ef', + 'upload_date': '20150516', + 'timestamp': 1431792000, + 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c', }, - 'skip': 'Blocked outside Mainland China', - }, { - 'note': 'No lyrics translation.', - 'url': 'http://music.163.com/#/song?id=29822014', - 'info_dict': { - 'id': '29822014', - 'ext': 'mp3', - 'title': '听见下雨的声音', - 'creator': '周杰伦', - 'upload_date': '20141225', - 'timestamp': 1419523200, - 'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c', - }, - 'skip': 'Blocked outside Mainland China', }, { 'note': 'No lyrics.', 'url': 'http://music.163.com/song?id=17241424', @@ -112,9 +162,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'title': 'Opus 28', 'creator': 'Dustin O\'Halloran', 'upload_date': '20080211', + 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4', 'timestamp': 1202745600, }, - 'skip': 'Blocked outside Mainland China', }, { 'note': 'Has translated name.', 'url': 'http://music.163.com/#/song?id=22735043', @@ -128,7 +178,6 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'timestamp': 1264608000, 'alt_title': '说出愿望吧(Genie)', }, - 'skip': 'Blocked outside Mainland China', }] def _process_lyrics(self, lyrics_info):