From f25c204ed7bc1e1062b6a2d7e3583cee462756ea Mon Sep 17 00:00:00 2001 From: Adrian Heine Date: Wed, 3 Feb 2021 23:42:51 +0100 Subject: [PATCH] [ArchiveOrg] Fix extractor Closes #21330, closes #26780, closes #25277, closes #23586. --- youtube_dl/extractor/archiveorg.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index c79c58e8281..a0763514c01 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -16,7 +16,7 @@ class ArchiveOrgIE(InfoExtractor): 'md5': '8af1d4cf447933ed3c7f4871162602db', 'info_dict': { 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', - 'ext': 'ogg', + 'ext': 'ogv', 'title': '1968 Demo - FJCC Conference Presentation Reel #1', 'description': 'md5:da45c349df039f1cc8075268eb1b5c25', 'upload_date': '19681210', @@ -24,12 +24,12 @@ class ArchiveOrgIE(InfoExtractor): } }, { 'url': 'https://archive.org/details/Cops1922', - 'md5': '0869000b4ce265e8ca62738b336b268a', + 'md5': '9b865bfdb0ca6b955b93f4a446ddce82', 'info_dict': { 'id': 'Cops1922', - 'ext': 'mp4', + 'ext': 'ogv', 'title': 'Buster Keaton\'s "Cops" (1922)', - 'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6', + 'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c', } }, { 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', @@ -40,11 +40,16 @@ def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'http://archive.org/embed/' + video_id, video_id) - jwplayer_playlist = self._parse_json(self._search_regex( + jwplayer_playlist_string = self._search_regex( r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)", - webpage, 'jwplayer playlist'), video_id) - info = self._parse_jwplayer_data( - {'playlist': jwplayer_playlist}, video_id, base_url=url) + webpage, 'jwplayer playlist', default=None) + info = {} + if jwplayer_playlist_string: + jwplayer_playlist = self._parse_json(jwplayer_playlist_string, video_id) + info = self._parse_jwplayer_data({'playlist': jwplayer_playlist}, video_id, base_url=url) + else: + info['id'] = video_id + info.update(self._parse_html5_media_entries("https://archive.org", webpage, video_id)[0]) def get_optional(metadata, field): return metadata.get(field, [None])[0]