Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fork Sync #39

Merged
merged 1 commit into from
Nov 23, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion youtube_dl/extractor/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2162,7 +2162,7 @@ def extract_meta(field):
# Youtube Music Auto-generated description
release_date = release_year = None
if video_description:
mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
if mobj:
if not track:
track = mobj.group('track').strip()
Expand All @@ -2179,6 +2179,34 @@ def extract_meta(field):
if release_year:
release_year = int(release_year)

yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
for content in contents:
rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
multiple_songs = False
for row in rows:
if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
multiple_songs = True
break
for row in rows:
mrr = row.get('metadataRowRenderer') or {}
mrr_title = try_get(
mrr, lambda x: x['title']['simpleText'], compat_str)
mrr_contents = try_get(
mrr, lambda x: x['contents'][0], dict) or {}
mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
if not (mrr_title and mrr_contents_text):
continue
if mrr_title == 'License':
video_license = mrr_contents_text
elif not multiple_songs:
if mrr_title == 'Album':
album = mrr_contents_text
elif mrr_title == 'Artist':
artist = mrr_contents_text
elif mrr_title == 'Song':
track = mrr_contents_text

m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
video_webpage)
Expand Down