Skip to content

Commit

Permalink
yt_dlp: 96a134dea6397a5f2131947c427aac52c8b4e677
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed May 26, 2024
1 parent 4cf74c7 commit 11cdfef
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 40 deletions.
47 changes: 16 additions & 31 deletions lib/yt_dlp/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3384,23 +3384,16 @@ def manifest_url(manifest):
return formats

def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
mobj = re.search(
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
webpage)
if mobj:
try:
jwplayer_data = self._parse_json(mobj.group('options'),
video_id=video_id,
transform_source=transform_source)
except ExtractorError:
pass
else:
if isinstance(jwplayer_data, dict):
return jwplayer_data
return self._search_json(
r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
webpage, 'JWPlayer data', video_id,
# must be a {...} or sequence, ending
contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
transform_source=transform_source, default=None)

def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
webpage, video_id, transform_source=transform_source)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)

Expand Down Expand Up @@ -3432,22 +3425,14 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)

subtitles = {}
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
track_url = urljoin(base_url, track.get('file'))
if not track_url:
continue
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track_url)
})
for track in traverse_obj(video_data, (
'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
track_url = urljoin(base_url, track.get('file'))
if not track_url:
continue
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track_url)
})

entry = {
'id': this_video_id,
Expand Down
2 changes: 1 addition & 1 deletion lib/yt_dlp/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
}, 'aliases': {
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2023': [],
}
Expand Down
19 changes: 11 additions & 8 deletions lib/yt_dlp/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))


def extract_timezone(date_str):
def extract_timezone(date_str, default=None):
m = re.search(
r'''(?x)
^.{8,}? # >=8 char non-TZ prefix, if present
Expand All @@ -1146,21 +1146,25 @@ def extract_timezone(date_str):
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
$)
''', date_str)
timezone = None

if not m:
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
if timezone is not None:
date_str = date_str[:-len(m.group('tz'))]
timezone = dt.timedelta(hours=timezone or 0)
timezone = dt.timedelta(hours=timezone)
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
timezone = dt.timedelta()
else:
if m.group('sign'):
sign = 1 if m.group('sign') == '+' else -1
timezone = dt.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))

if timezone is None and default is not NO_DEFAULT:
timezone = default or dt.timedelta()

return timezone, date_str


Expand All @@ -1172,10 +1176,9 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):

date_str = re.sub(r'\.[0-9]+', '', date_str)

if timezone is None:
timezone, date_str = extract_timezone(date_str)
timezone, date_str = extract_timezone(date_str, timezone)

with contextlib.suppress(ValueError):
with contextlib.suppress(ValueError, TypeError):
date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
dt_ = dt.datetime.strptime(date_str, date_format) - timezone
return calendar.timegm(dt_.timetuple())
Expand Down

0 comments on commit 11cdfef

Please sign in to comment.