Skip to content

Commit

Permalink
Merge pull request boto#1002 from jamesls/glacier-job-download
Browse files Browse the repository at this point in the history
Add a method to download an archive to a file
  • Loading branch information
garnaat committed Sep 20, 2012
2 parents 5b89d7e + e1c20f7 commit 8be4c17
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 5 deletions.
14 changes: 11 additions & 3 deletions boto/glacier/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#
import json


class UnexpectedHTTPResponseError(Exception):
def __init__(self, expected_responses, response):
self.status = response.status
Expand All @@ -31,11 +32,10 @@ def __init__(self, expected_responses, response):
body = json.loads(self.body)
self.code = body["code"]
msg = 'Expected %s, got ' % expected_responses
msg += '(%d, code=%s, message=%s)' % (expected_responses,
response.status,
msg += '(%d, code=%s, message=%s)' % (response.status,
self.code,
body["message"])
except:
except Exception:
msg = 'Expected %s, got (%d, %s)' % (expected_responses,
response.status,
self.body)
Expand All @@ -44,3 +44,11 @@ def __init__(self, expected_responses, response):

class UploadArchiveError(Exception):
pass


class DownloadArchiveError(Exception):
pass


class TreeHashDoesNotMatchError(DownloadArchiveError):
pass
63 changes: 61 additions & 2 deletions boto/glacier/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
import urllib
import json
import math
import socket

from .exceptions import TreeHashDoesNotMatchError, DownloadArchiveError
from .writer import bytes_to_hex, chunk_hashes, tree_hash


class Job(object):

DefaultPartSize = 4 * 1024 * 1024

ResponseDataElements = (('Action', 'action', None),
('ArchiveId', 'archive_id', None),
('ArchiveSizeInBytes', 'archive_size', 0),
Expand Down Expand Up @@ -75,3 +80,57 @@ def get_output(self, byte_range=None):
return self.vault.layer1.get_job_output(self.vault.name,
self.id,
byte_range)

def download_to_file(self, filename, chunk_size=DefaultPartSize,
verify_hashes=True, retry_exceptions=(socket.error,)):
"""Download an archive to a file.
:type filename: str
:param filename: The name of the file where the archive
contents will be saved.
:type chunk_size: int
:param chunk_size: The chunk size to use when downloading
the archive.
:type verify_hashes: bool
:param verify_hashes: Indicates whether or not to verify
the tree hashes for each downloaded chunk.
"""
num_chunks = int(math.ceil(self.archive_size / float(chunk_size)))
with open(filename, 'wb') as output_file:
self._download_to_fileob(output_file, num_chunks, chunk_size,
verify_hashes, retry_exceptions)

def _download_to_fileob(self, fileobj, num_chunks, chunk_size, verify_hashes,
retry_exceptions):
for i in xrange(num_chunks):
byte_range = ((i * chunk_size), ((i + 1) * chunk_size) - 1)
data, expected_tree_hash = self._download_byte_range(
byte_range, retry_exceptions)
if verify_hashes:
actual_tree_hash = bytes_to_hex(tree_hash(chunk_hashes(data)))
if expected_tree_hash != actual_tree_hash:
raise TreeHashDoesNotMatchError(
"The calculated tree hash %s does not match the "
"expected tree hash %s for the byte range %s" % (
actual_tree_hash, expected_tree_hash, byte_range))
fileobj.write(data)

def _download_byte_range(self, byte_range, retry_exceptions):
# You can occasionally get socket.errors when downloading
# chunks from Glacier, so each chunk can be retried up
# to 5 times.
for _ in xrange(5):
try:
response = self.get_output(byte_range)
data = response.read()
expected_tree_hash = response['TreeHash']
return data, expected_tree_hash
except retry_exceptions, e:
continue
else:
raise DownloadArchiveError("There was an error downloading"
"byte range %s: %s" % (byte_range,
e))

0 comments on commit 8be4c17

Please sign in to comment.