Skip to content

Commit

Permalink
[archiveorg] Fix extraction (closes #21330, closes #23586, closes #23…
Browse files Browse the repository at this point in the history
…700)
  • Loading branch information
TinyToweringTree committed Jan 24, 2020
1 parent 2a5c26c commit 8df0c2c
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions youtube_dl/extractor/archiveorg.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,12 @@ def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://archive.org/embed/' + video_id, video_id)
input_element_with_playlist = self._search_regex(
r'(<\s*input.*\s*class\s*=\s*[\'"].*\s*js-play8-playlist\s*.*[\'"]\s*.*>)',
webpage, 'jwplayer playlist')
jwplayer_playlist = self._parse_json(self._search_regex(
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
webpage, 'jwplayer playlist'), video_id)
r'.*\s+value\s*=\s*[\'"](.+)[\'"][\s/]',
input_element_with_playlist, 'playlist data'), video_id)
info = self._parse_jwplayer_data(
{'playlist': jwplayer_playlist}, video_id, base_url=url)

Expand All @@ -52,7 +55,7 @@ def get_optional(metadata, field):
metadata = self._download_json(
'http://archive.org/details/' + video_id, video_id, query={
'output': 'json',
})['metadata']
}).get('metadata', {})
info.update({
'title': get_optional(metadata, 'title') or info.get('title'),
'description': clean_html(get_optional(metadata, 'description')),
Expand Down

0 comments on commit 8df0c2c

Please sign in to comment.