From 3bf4f7268cacbe594db7364eba796506306e9312 Mon Sep 17 00:00:00 2001 From: Vasyl' Vavrychuk Date: Sun, 5 Dec 2010 21:09:14 +0200 Subject: [PATCH 1/2] Implemented depositfiles support --- youtube-dl | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/youtube-dl b/youtube-dl index 22dd230ee2b..8840c284082 100755 --- a/youtube-dl +++ b/youtube-dl @@ -110,6 +110,17 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) +def sanitize_html_message(message): + """Sanitize message so it will be printed as it supposed to look in html + (i.e. remove multiple spaces)""" + + # Remove repeated spaces + words = message.split(' ') + stripped_words = [] + for word in words: + if len(word.strip()) > 0: + stripped_words.append(word.strip()) + return ' '.join(stripped_words) class DownloadError(Exception): """Download Error exception. @@ -159,6 +170,13 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected +class DownloadRestrictionError(Exception): + """Download Restriction Error exception. + + Some services may restrict downloading in different ways. For example + they may limit amount of downloads per period of time. + """ + class FileDownloader(object): """File Downloader class. @@ -2075,6 +2093,84 @@ class YoutubeUserIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return +class DepositFilesIE(InfoExtractor): + """Information extractor for depositfiles.com""" + + _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DepositFilesIE._VALID_URL, url) is not None) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) + + def _real_initialize(self): + return + + def _real_extract(self, url): + # At this point we have a new file + self._downloader.increment_downloads() + + file_id = url.split('/')[-1] + # Rebuild url in english locale + url = 'http://depositfiles.com/en/files/' + file_id + + # Retrieve file webpage with 'Free download' button pressed + free_download_indication = { 'gateway_result' : '1' } + request = urllib2.Request(url, urllib.urlencode(free_download_indication)) + try: + self.report_download_webpage(file_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) + return + + # Search for the real file URL + mobj = re.search(r'
(Attention.*?)', webpage, re.DOTALL) + if (mobj is not None) and (mobj.group(1) is not None): + raise DownloadRestrictionError(sanitize_html_message(mobj.group(1))) + + self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) + return + + file_url = mobj.group(1) + file_extension = os.path.splitext(file_url)[1][1:] + + # Search for file title + mobj = re.search(r'', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + file_title = simple_title = mobj.group(1).decode('utf-8') + + try: + # Process file information + self._downloader.process_info({ + 'id': file_id.decode('utf-8'), + 'url': file_url.decode('utf-8'), + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': file_title, + 'stitle': simple_title, + 'ext': file_extension.decode('utf-8'), + 'format': u'NA', + 'player_url': None, + }) + except UnavailableVideoError, err: + self._downloader.trouble(u'ERROR: unable to download file') + class PostProcessor(object): """Post Processor class. @@ -2308,6 +2404,7 @@ if __name__ == '__main__': photobucket_ie = PhotobucketIE() yahoo_ie = YahooIE() yahoo_search_ie = YahooSearchIE(yahoo_ie) + deposit_files_ie = DepositFilesIE() generic_ie = GenericIE() # File downloader @@ -2354,6 +2451,7 @@ if __name__ == '__main__': fd.add_info_extractor(photobucket_ie) fd.add_info_extractor(yahoo_ie) fd.add_info_extractor(yahoo_search_ie) + fd.add_info_extractor(deposit_files_ie) # This must come last since it's the # fallback if none of the others work From cefd22c648a629fb7d978bb02ebad7a2881eb495 Mon Sep 17 00:00:00 2001 From: Vasyl' Vavrychuk Date: Wed, 8 Dec 2010 11:10:28 +0200 Subject: [PATCH 2/2] Changes after code review --- youtube-dl | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/youtube-dl b/youtube-dl index 8840c284082..3de25f6b026 100755 --- a/youtube-dl +++ b/youtube-dl @@ -110,18 +110,6 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) -def sanitize_html_message(message): - """Sanitize message so it will be printed as it supposed to look in html - (i.e. remove multiple spaces)""" - - # Remove repeated spaces - words = message.split(' ') - stripped_words = [] - for word in words: - if len(word.strip()) > 0: - stripped_words.append(word.strip()) - return ' '.join(stripped_words) - class DownloadError(Exception): """Download Error exception. @@ -170,13 +158,6 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected -class DownloadRestrictionError(Exception): - """Download Restriction Error exception. - - Some services may restrict downloading in different ways. For example - they may limit amount of downloads per period of time. - """ - class FileDownloader(object): """File Downloader class. @@ -2126,7 +2107,7 @@ class DepositFilesIE(InfoExtractor): # Retrieve file webpage with 'Free download' button pressed free_download_indication = { 'gateway_result' : '1' } - request = urllib2.Request(url, urllib.urlencode(free_download_indication)) + request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) try: self.report_download_webpage(file_id) webpage = urllib2.urlopen(request).read() @@ -2140,9 +2121,10 @@ class DepositFilesIE(InfoExtractor): # Try to figure out reason of the error. mobj = re.search(r'(Attention.*?)', webpage, re.DOTALL) if (mobj is not None) and (mobj.group(1) is not None): - raise DownloadRestrictionError(sanitize_html_message(mobj.group(1))) - - self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) + restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() + self._downloader.trouble(u'ERROR: %s' % restriction_message) + else: + self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) return file_url = mobj.group(1) @@ -2153,7 +2135,7 @@ class DepositFilesIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return - file_title = simple_title = mobj.group(1).decode('utf-8') + file_title = mobj.group(1).decode('utf-8') try: # Process file information @@ -2163,7 +2145,7 @@ class DepositFilesIE(InfoExtractor): 'uploader': u'NA', 'upload_date': u'NA', 'title': file_title, - 'stitle': simple_title, + 'stitle': file_title, 'ext': file_extension.decode('utf-8'), 'format': u'NA', 'player_url': None,