diff --git a/.gitignore b/.gitignore index edc76d0ca8..ecf63f949c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ MANIFEST .idea .tox .coverage +*flymake.py \ No newline at end of file diff --git a/README.rst b/README.rst index 59386a67b0..0b8e51e21e 100644 --- a/README.rst +++ b/README.rst @@ -37,6 +37,7 @@ At the moment, boto supports: * Amazon SimpleWorkflow * CloudSearch * Marketplace Web Services +* Glacier The goal of boto is to support the full breadth and depth of Amazon Web Services. In addition, boto provides support for other public diff --git a/bin/glacier b/bin/glacier new file mode 100755 index 0000000000..aad1e8befd --- /dev/null +++ b/bin/glacier @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Miguel Olivares http://moliware.com/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +""" + glacier + ~~~~~~~ + + Amazon Glacier tool built on top of boto. Look at the usage method to see + how to use it. + + Author: Miguel Olivares +""" +import sys + +from boto.glacier import connect_to_region +from getopt import getopt, GetoptError +from os.path import isfile + + +COMMANDS = ('vaults', 'jobs', 'upload') + + +def usage(): + print """ +glacier [args] + + Commands + vaults - Operations with vaults + jobs - Operations with jobs + upload - Upload files to a vault. If the vault doesn't exits, it is + created + + Common args: + access_key - Your AWS Access Key ID. If not supplied, boto will + use the value of the environment variable + AWS_ACCESS_KEY_ID + secret_key - Your AWS Secret Access Key. If not supplied, boto + will use the value of the environment variable + AWS_SECRET_ACCESS_KEY + region - AWS region to use. Possible vaules: us-east-1, us-west-1, + us-west-2, ap-northeast-1, eu-west-1. + Default: us-east-1 + + Vaults operations: + + List vaults: + glacier vaults + + Jobs operations: + + List jobs: + glacier jobs + + Uploading files: + + glacier upload + + Examples : + glacier upload pics *.jpg + glacier upload pics a.jpg b.jpg +""" + sys.exit() + + +def connect(region, debug_level=0, access_key=None, secret_key=None): + """ Connect to a specific region """ + return connect_to_region(region, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + debug=debug_level) + + +def list_vaults(region, access_key=None, secret_key=None): + layer2 = connect(region, access_key, secret_key) + for vault in layer2.list_vaults(): + print vault.arn + + +def list_jobs(vault_name, region, access_key=None, secret_key=None): + layer2 = connect(region, access_key, secret_key) + print layer2.layer1.list_jobs(vault_name) + + +def upload_files(vault_name, filenames, region, access_key=None, secret_key=None): + layer2 = connect(region, access_key, secret_key) + layer2.create_vault(vault_name) + glacier_vault = layer2.get_vault(vault_name) + for filename in filenames: + if isfile(filename): + print 'Uploading %s to %s' % (filename, vault_name) + glacier_vault.upload_archive(filename) + + +def main(): + if len(sys.argv) < 2: + usage() + + command = sys.argv[1] + if command not in COMMANDS: + usage() + + argv = sys.argv[2:] + options = 'a:s:r:' + long_options = ['access_key=', 'secret_key=', 'region='] + try: + opts, args = getopt(argv, options, long_options) + except GetoptError, e: + usage() + + # Parse agument + access_key = secret_key = None + region = 'us-east-1' + for option, value in opts: + if option in ('a', '--access_key'): + access_key = value + elif option in ('s', '--secret_key'): + secret_key = value + elif option in ('r', '--region'): + region = value + # handle each command + if command == 'vaults': + list_vaults(region, access_key, secret_key) + elif command == 'jobs': + if len(args) != 1: + usage() + list_jobs(args[0], region, access_key, secret_key) + elif command == 'upload': + if len(args) < 2: + usage() + upload_files(args[0], args[1:], region, access_key, secret_key) + + +if __name__ == '__main__': + main() diff --git a/boto/__init__.py b/boto/__init__.py index 5cc3f10a8d..4dd1f318a3 100644 --- a/boto/__init__.py +++ b/boto/__init__.py @@ -410,6 +410,24 @@ def connect_euca(host=None, aws_access_key_id=None, aws_secret_access_key=None, region=reg, port=port, path=path, is_secure=is_secure, **kwargs) +def connect_glacier(region=None, **kwargs): + """ + :type region: string + :param region: AWS Glacier region to connect to + + :type aws_access_key_id: string + :param aws_access_key_id: Your AWS Access Key ID + + :type aws_secret_access_key: string + :param aws_secret_access_key: Your AWS Secret Access Key + + :rtype: :class:`boto.glacier.layer2.Layer2` + :return: A connection to Amazon's Glacier Service + """ + + import boto.glacier + return boto.glacier.connect_to_region(region, **kwargs) + def connect_ec2_endpoint(url, aws_access_key_id=None, aws_secret_access_key=None, diff --git a/boto/auth.py b/boto/auth.py index 47c149af57..29f9ac53bc 100644 --- a/boto/auth.py +++ b/boto/auth.py @@ -366,6 +366,12 @@ def canonical_uri(self, http_request): return http_request.path def payload(self, http_request): + body = http_request.body + # If the body is a file like object, we can use + # boto.utils.compute_hash, which will avoid reading + # the entire body into memory. + if hasattr(body, 'seek') and hasattr(body, 'read'): + return boto.utils.compute_hash(body, hash_algorithm=sha256)[0] return sha256(http_request.body).hexdigest() def canonical_request(self, http_request): diff --git a/boto/glacier/__init__.py b/boto/glacier/__init__.py new file mode 100644 index 0000000000..a65733b274 --- /dev/null +++ b/boto/glacier/__init__.py @@ -0,0 +1,57 @@ +# Copyright (c) 2011 Mitch Garnaat http://garnaat.org/ +# Copyright (c) 2011 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from boto.ec2.regioninfo import RegionInfo + + +def regions(): + """ + Get all available regions for the Amazon Glacier service. + + :rtype: list + :return: A list of :class:`boto.regioninfo.RegionInfo` + """ + from boto.glacier.layer2 import Layer2 + return [RegionInfo(name='us-east-1', + endpoint='glacier.us-east-1.amazonaws.com', + connection_cls=Layer2), + RegionInfo(name='us-west-1', + endpoint='glacier.us-west-1.amazonaws.com', + connection_cls=Layer2), + RegionInfo(name='us-west-2', + endpoint='glacier.us-west-2.amazonaws.com', + connection_cls=Layer2), + RegionInfo(name='ap-northeast-1', + endpoint='glacier.ap-northeast-1.amazonaws.com', + connection_cls=Layer2), + RegionInfo(name='eu-west-1', + endpoint='glacier.eu-west-1.amazonaws.com', + connection_cls=Layer2), + ] + + +def connect_to_region(region_name, **kw_params): + for region in regions(): + if region.name == region_name: + return region.connect(**kw_params) + return None diff --git a/boto/glacier/concurrent.py b/boto/glacier/concurrent.py new file mode 100644 index 0000000000..b993c67436 --- /dev/null +++ b/boto/glacier/concurrent.py @@ -0,0 +1,213 @@ +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import os +import math +import threading +import hashlib +import time +import logging +from Queue import Queue, Empty + +from .writer import chunk_hashes, tree_hash, bytes_to_hex +from .exceptions import UploadArchiveError + + +DEFAULT_PART_SIZE = 4 * 1024 * 1024 +_END_SENTINEL = object() +log = logging.getLogger('boto.glacier.concurrent') + + +class ConcurrentUploader(object): + """Concurrently upload an archive to glacier. + + This class uses a thread pool to concurrently upload an archive + to glacier using the multipart upload API. + + The threadpool is completely managed by this class and is + transparent to the users of this class. + + """ + def __init__(self, api, vault_name, part_size=DEFAULT_PART_SIZE, + num_threads=10): + """ + :type api: :class:`boto.glacier.layer1.Layer1` + :param api: A layer1 glacier object. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type part_size: int + :param part_size: The size, in bytes, of the chunks to use when uploading + the archive parts. The part size must be a megabyte multiplied by + a power of two. + + """ + self._api = api + self._vault_name = vault_name + self._part_size = part_size + self._num_threads = num_threads + self._threads = [] + + def upload(self, filename, description=None): + """Concurrently create an archive. + + :type file: str + :param file: The filename to upload + + :type description: str + :param description: The description of the archive. + + :rtype: str + :return: The archive id of the newly created archive. + + """ + fileobj = open(filename, 'rb') + total_size = os.fstat(fileobj.fileno()).st_size + total_parts = int(math.ceil(total_size / float(self._part_size))) + hash_chunks = [None] * total_parts + worker_queue = Queue() + result_queue = Queue() + response = self._api.initiate_multipart_upload(self._vault_name, + self._part_size, + description) + upload_id = response['UploadId'] + # The basic idea is to add the chunks (the offsets not the actual + # contents) to a work queue, start up a thread pool, let the crank + # through the items in the work queue, and then place their results + # in a result queue which we use to complete the multipart upload. + self._add_work_items_to_queue(total_parts, worker_queue) + self._start_upload_threads(result_queue, upload_id, + worker_queue, filename) + try: + self._wait_for_upload_threads(hash_chunks, result_queue, total_parts) + except UploadArchiveError, e: + log.debug("An error occurred while uploading an archive, aborting " + "multipart upload.") + self._api.abort_multipart_upload(self._vault_name, upload_id) + raise e + log.debug("Completing upload.") + response = self._api.complete_multipart_upload( + self._vault_name, upload_id, bytes_to_hex(tree_hash(hash_chunks)), + total_size) + log.debug("Upload finished.") + return response['ArchiveId'] + + def _wait_for_upload_threads(self, hash_chunks, result_queue, total_parts): + for _ in xrange(total_parts): + result = result_queue.get() + if isinstance(result, Exception): + log.debug("An error was found in the result queue, terminating " + "threads: %s", result) + self._shutdown_threads() + raise UploadArchiveError("An error occurred while uploading " + "an archive: %s" % result) + # Each unit of work returns the tree hash for the given part + # number, which we use at the end to compute the tree hash of + # the entire archive. + part_number, tree_sha256 = result + hash_chunks[part_number] = tree_sha256 + self._shutdown_threads() + + def _shutdown_threads(self): + log.debug("Shutting down threads.") + for thread in self._threads: + thread.should_continue = False + for thread in self._threads: + thread.join() + log.debug("Threads have exited.") + + def _start_upload_threads(self, result_queue, upload_id, worker_queue, filename): + log.debug("Starting threads.") + for _ in xrange(self._num_threads): + thread = UploadWorkerThread(self._api, self._vault_name, filename, + upload_id, worker_queue, result_queue) + time.sleep(0.2) + thread.start() + self._threads.append(thread) + + def _add_work_items_to_queue(self, total_parts, worker_queue): + log.debug("Adding work items to queue.") + for i in xrange(total_parts): + worker_queue.put((i, self._part_size)) + for i in xrange(self._num_threads): + worker_queue.put(_END_SENTINEL) + + +class UploadWorkerThread(threading.Thread): + def __init__(self, api, vault_name, filename, upload_id, + worker_queue, result_queue, num_retries=5, + time_between_retries=5, + retry_exceptions=Exception): + threading.Thread.__init__(self) + self._api = api + self._vault_name = vault_name + self._filename = filename + self._fileobj = open(filename, 'rb') + self._worker_queue = worker_queue + self._result_queue = result_queue + self._upload_id = upload_id + self._num_retries = num_retries + self._time_between_retries = time_between_retries + self._retry_exceptions = retry_exceptions + self.should_continue = True + + def run(self): + while self.should_continue: + try: + work = self._worker_queue.get(timeout=1) + except Empty: + continue + if work is _END_SENTINEL: + return + result = self._process_chunk(work) + self._result_queue.put(result) + + def _process_chunk(self, work): + result = None + for _ in xrange(self._num_retries): + try: + result = self._upload_chunk(work) + break + except self._retry_exceptions, e: + log.error("Exception caught uploading part number %s for " + "vault %s, filename: %s", work[0], self._vault_name, + self._filename) + time.sleep(self._time_between_retries) + result = e + return result + + def _upload_chunk(self, work): + part_number, part_size = work + start_byte = part_number * part_size + self._fileobj.seek(start_byte) + contents = self._fileobj.read(part_size) + linear_hash = hashlib.sha256(contents).hexdigest() + tree_hash_bytes = tree_hash(chunk_hashes(contents)) + byte_range = (start_byte, start_byte + len(contents) - 1) + log.debug("Uploading chunk %s of size %s", part_number, part_size) + response = self._api.upload_part(self._vault_name, self._upload_id, + linear_hash, + bytes_to_hex(tree_hash_bytes), + byte_range, contents) + # Reading the response allows the connection to be reused. + response.read() + return (part_number, tree_hash_bytes) diff --git a/boto/glacier/exceptions.py b/boto/glacier/exceptions.py new file mode 100644 index 0000000000..3942da686a --- /dev/null +++ b/boto/glacier/exceptions.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import json + +class UnexpectedHTTPResponseError(Exception): + def __init__(self, expected_responses, response): + self.status = response.status + self.body = response.read() + self.code = None + try: + body = json.loads(self.body) + self.code = body["code"] + msg = 'Expected %s, got ' % expected_responses + msg += '(%d, code=%s, message=%s)' % (expected_responses, + response.status, + self.code, + body["message"]) + except: + msg = 'Expected %s, got (%d, %s)' % (expected_responses, + response.status, + self.body) + super(UnexpectedHTTPResponseError, self).__init__(msg) + + +class UploadArchiveError(Exception): + pass diff --git a/boto/glacier/job.py b/boto/glacier/job.py new file mode 100644 index 0000000000..cdb53bc643 --- /dev/null +++ b/boto/glacier/job.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import urllib +import json + + +class Job(object): + + ResponseDataElements = (('Action', 'action', None), + ('ArchiveId', 'archive_id', None), + ('ArchiveSizeInBytes', 'archive_size', 0), + ('Completed', 'completed', False), + ('CompletionDate', 'completion_date', None), + ('CreationDate', 'creation_date', None), + ('InventorySizeInBytes', 'inventory_size', 0), + ('JobDescription', 'description', None), + ('JobId', 'id', None), + ('SHA256TreeHash', 'sha256_treehash', None), + ('SNSTopic', 'sns_topic', None), + ('StatusCode', 'status_code', None), + ('StatusMessage', 'status_message', None), + ('VaultARN', 'arn', None)) + + def __init__(self, vault, response_data=None): + self.vault = vault + if response_data: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, response_data[response_name]) + else: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, default) + + def __repr__(self): + return 'Job(%s)' % self.arn + + def get_output(self, byte_range=None): + """ + This operation downloads the output of the job. Depending on + the job type you specified when you initiated the job, the + output will be either the content of an archive or a vault + inventory. + + You can download all the job output or download a portion of + the output by specifying a byte range. In the case of an + archive retrieval job, depending on the byte range you + specify, Amazon Glacier returns the checksum for the portion + of the data. You can compute the checksum on the client and + verify that the values match to ensure the portion you + downloaded is the correct data. + + :type byte_range: tuple + :param range: A tuple of integer specifying the slice (in bytes) + of the archive you want to receive + """ + return self.vault.layer1.get_job_output(self.vault.name, + self.id, + byte_range) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py new file mode 100644 index 0000000000..480f426b96 --- /dev/null +++ b/boto/glacier/layer1.py @@ -0,0 +1,626 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import os +import json +import urllib + +import boto.glacier +from boto.connection import AWSAuthConnection +from .exceptions import UnexpectedHTTPResponseError +from .response import GlacierResponse + + +class Layer1(AWSAuthConnection): + + Version = '2012-06-01' + """Glacier API version.""" + + def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, + account_id='-', is_secure=True, port=None, + proxy=None, proxy_port=None, + proxy_user=None, proxy_pass=None, debug=0, + https_connection_factory=None, path='/', + provider='aws', security_token=None, + suppress_consec_slashes=True, + region=None, region_name='us-east-1'): + + if not region: + for reg in boto.glacier.regions(): + if reg.name == region_name: + region = reg + break + + self.region = region + self.account_id = account_id + AWSAuthConnection.__init__(self, region.endpoint, + aws_access_key_id, aws_secret_access_key, + True, port, proxy, proxy_port, + proxy_user, proxy_pass, debug, + https_connection_factory, + path, provider, security_token, + suppress_consec_slashes) + + def _required_auth_capability(self): + return ['hmac-v4'] + + def make_request(self, verb, resource, headers=None, + data='', ok_responses=(200,), params=None, + response_headers=None): + if headers is None: + headers = {} + headers['x-amz-glacier-version'] = self.Version + uri = '/%s/%s' % (self.account_id, resource) + if params: + param_list = [] + for key, value in params: + param_list.append('%s=%s' % (urllib.quote(key), + urllib.quote(value))) + uri += '?' + '&'.join(param_list) + response = AWSAuthConnection.make_request(self, verb, uri, + headers=headers, + data=data) + if response.status in ok_responses: + return GlacierResponse(response, response_headers) + else: + # create glacier-specific exceptions + raise UnexpectedHTTPResponseError(ok_responses, response) + + # Vaults + + def list_vaults(self, limit=None, marker=None): + """ + This operation lists all vaults owned by the calling user’s + account. The list returned in the response is ASCII-sorted by + vault name. + + By default, this operation returns up to 1,000 items. If there + are more vaults to list, the marker field in the response body + contains the vault Amazon Resource Name (ARN) at which to + continue the list with a new List Vaults request; otherwise, + the marker field is null. In your next List Vaults request you + set the marker parameter to the value Amazon Glacier returned + in the responses to your previous List Vaults request. You can + also limit the number of vaults returned in the response by + specifying the limit parameter in the request. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the List Vaults + operation returns up to 1,000 items. + + :type marker: str + :param marker: A string used for pagination. marker specifies + the vault ARN after which the listing of vaults should + begin. (The vault specified by marker is not included in + the returned list.) Get the marker value from a previous + List Vaults response. You need to include the marker only + if you are continuing the pagination of results started in + a previous List Vaults request. Specifying an empty value + ("") for the marker returns a list of vaults starting + from the first vault. + """ + params = {} + if limit: + params['limit'] = limit + if marker: + params['marker'] = marker + return self.make_request('GET', 'vaults', params=params) + + def describe_vault(self, vault_name): + """ + This operation returns information about a vault, including + the vault Amazon Resource Name (ARN), the date the vault was + created, the number of archives contained within the vault, + and the total size of all the archives in the vault. The + number of archives and their total size are as of the last + vault inventory Amazon Glacier generated. Amazon Glacier + generates vault inventories approximately daily. This means + that if you add or remove an archive from a vault, and then + immediately send a Describe Vault request, the response might + not reflect the changes. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s' % vault_name + return self.make_request('GET', uri) + + def create_vault(self, vault_name): + """ + This operation creates a new vault with the specified name. + The name of the vault must be unique within a region for an + AWS account. You can create up to 1,000 vaults per + account. For information on creating more vaults, go to the + Amazon Glacier product detail page. + + You must use the following guidelines when naming a vault. + + Names can be between 1 and 255 characters long. + + Allowed characters are a–z, A–Z, 0–9, '_' (underscore), + '-' (hyphen), and '.' (period). + + This operation is idempotent, you can send the same request + multiple times and it has no further effect after the first + time Amazon Glacier creates the specified vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s' % vault_name + return self.make_request('PUT', uri, ok_responses=(201,), + response_headers=[('Location', 'Location')]) + + def delete_vault(self, vault_name): + """ + This operation deletes a vault. Amazon Glacier will delete a + vault only if there are no archives in the vault as per the + last inventory and there have been no writes to the vault + since the last inventory. If either of these conditions is not + satisfied, the vault deletion fails (that is, the vault is not + removed) and Amazon Glacier returns an error. + + This operation is idempotent, you can send the same request + multiple times and it has no further effect after the first + time Amazon Glacier delete the specified vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s' % vault_name + return self.make_request('DELETE', uri, ok_responses=(204,)) + + def get_vault_notifications(self, vault_name): + """ + This operation retrieves the notification-configuration + subresource set on the vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s/notification-configuration' % vault_name + return self.make_request('GET', uri) + + def set_vault_notifications(self, vault_name, notification_config): + """ + This operation retrieves the notification-configuration + subresource set on the vault. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type notification_config: dict + :param notification_config: A Python dictionary containing + an SNS Topic and events for which you want Amazon Glacier + to send notifications to the topic. Possible events are: + + * ArchiveRetrievalCompleted - occurs when a job that was + initiated for an archive retrieval is completed. + * InventoryRetrievalCompleted - occurs when a job that was + initiated for an inventory retrieval is completed. + + The format of the dictionary is: + + {'SNSTopic': 'mytopic', + 'Events': [event1,...]} + """ + uri = 'vaults/%s/notification-configuration' % vault_name + json_config = json.dumps(notification_config) + return self.make_request('PUT', uri, data=json_config, + ok_responses=(204,)) + + def delete_vault_notifications(self, vault_name): + """ + This operation deletes the notification-configuration + subresource set on the vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s/notification-configuration' % vault_name + return self.make_request('DELETE', uri, ok_responses=(204,)) + + # Jobs + + def list_jobs(self, vault_name, completed=None, status_code=None, + limit=None, marker=None): + """ + This operation lists jobs for a vault including jobs that are + in-progress and jobs that have recently finished. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type completed: boolean + :param completed: Specifies the state of the jobs to return. + If a value of True is passed, only completed jobs will + be returned. If a value of False is passed, only + uncompleted jobs will be returned. If no value is + passed, all jobs will be returned. + + :type status_code: string + :param status_code: Specifies the type of job status to return. + Valid values are: InProgress|Succeeded|Failed. If not + specified, jobs with all status codes are returned. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the List Jobs + operation returns up to 1,000 items. + + :type marker: str + :param marker: An opaque string used for pagination. marker + specifies the job at which the listing of jobs should + begin. Get the marker value from a previous List Jobs + response. You need only include the marker if you are + continuing the pagination of results started in a previous + List Jobs request. + + """ + params = {} + if limit: + params['limit'] = limit + if marker: + params['marker'] = marker + uri = 'vaults/%s/jobs' % vault_name + return self.make_request('GET', uri, params=params) + + def describe_job(self, vault_name, job_id): + """ + This operation returns information about a job you previously + initiated, including the job initiation date, the user who + initiated the job, the job status code/message and the Amazon + Simple Notification Service (Amazon SNS) topic to notify after + Amazon Glacier completes the job. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type job_id: str + :param job_id: The ID of the job. + """ + uri = 'vaults/%s/jobs/%s' % (vault_name, job_id) + return self.make_request('GET', uri, ok_responses=(200,)) + + def initiate_job(self, vault_name, job_data): + """ + This operation initiates a job of the specified + type. Retrieving an archive or a vault inventory are + asynchronous operations that require you to initiate a job. It + is a two-step process: + + * Initiate a retrieval job. + * After the job completes, download the bytes. + + The retrieval is executed asynchronously. When you initiate + a retrieval job, Amazon Glacier creates a job and returns a + job ID in the response. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type job_data: dict + :param job_data: A Python dictionary containing the + information about the requested job. The dictionary + can contain the following attributes: + + * ArchiveId - The ID of the archive you want to retrieve. + This field is required only if the Type is set to + archive-retrieval. + * Description - The optional description for the job. + * Format - When initiating a job to retrieve a vault + inventory, you can optionally add this parameter to + specify the output format. Valid values are: CSV|JSON. + * SNSTopic - The Amazon SNS topic ARN where Amazon Glacier + sends a notification when the job is completed and the + output is ready for you to download. + * Type - The job type. Valid values are: + archive-retrieval|inventory-retrieval + """ + uri = 'vaults/%s/jobs' % vault_name + response_headers = [('x-amz-job-id', u'JobId'), + ('Location', u'Location')] + json_job_data = json.dumps(job_data) + return self.make_request('POST', uri, data=json_job_data, + ok_responses=(202,), + response_headers=response_headers) + + def get_job_output(self, vault_name, job_id, byte_range=None): + """ + This operation downloads the output of the job you initiated + using Initiate a Job. Depending on the job type + you specified when you initiated the job, the output will be + either the content of an archive or a vault inventory. + + You can download all the job output or download a portion of + the output by specifying a byte range. In the case of an + archive retrieval job, depending on the byte range you + specify, Amazon Glacier returns the checksum for the portion + of the data. You can compute the checksum on the client and + verify that the values match to ensure the portion you + downloaded is the correct data. + + :type vault_name: str :param + :param vault_name: The name of the new vault + + :type job_id: str + :param job_id: The ID of the job. + + :type byte_range: tuple + :param range: A tuple of integers specifying the slice (in bytes) + of the archive you want to receive + """ + response_headers = [('x-amz-sha256-tree-hash', u'TreeHash'), + ('Content-Range', u'ContentRange'), + ('Content-Type', u'ContentType')] + headers = None + if byte_range: + headers = {'Range': 'bytes=%d-%d' % byte_range} + uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) + response = self.make_request('GET', uri, headers=headers, + ok_responses=(200, 206), + response_headers=response_headers) + return response + + # Archives + + def upload_archive(self, vault_name, archive, + linear_hash, tree_hash, description=None): + """ + This operation adds an archive to a vault. For a successful + upload, your data is durably persisted. In response, Amazon + Glacier returns the archive ID in the x-amz-archive-id header + of the response. You should save the archive ID returned so + that you can access the archive later. + + :type vault_name: str :param + :param vault_name: The name of the vault + + :type archive: bytes + :param archive: The data to upload. + + :type linear_hash: str + :param linear_hash: The SHA256 checksum (a linear hash) of the + payload. + + :type tree_hash: str + :param tree_hash: The user-computed SHA256 tree hash of the + payload. For more information on computing the + tree hash, see http://goo.gl/u7chF. + + :type description: str + :param description: An optional description of the archive. + """ + response_headers = [('x-amz-archive-id', u'ArchiveId'), + ('Location', u'Location'), + ('x-amz-sha256-tree-hash', u'TreeHash')] + uri = 'vaults/%s/archives' % vault_name + try: + content_length = str(len(archive)) + except TypeError: + # If a file like object is provided, try to retrieve + # the file size via fstat. + content_length = str(os.fstat(archive.fileno()).st_size) + headers = {'x-amz-content-sha256': linear_hash, + 'x-amz-sha256-tree-hash': tree_hash, + 'Content-Length': content_length} + if description: + headers['x-amz-archive-description'] = description + return self.make_request('POST', uri, headers=headers, + data=archive, ok_responses=(201,), + response_headers=response_headers) + + def delete_archive(self, vault_name, archive_id): + """ + This operation deletes an archive from a vault. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type archive_id: str + :param archive_id: The ID for the archive to be deleted. + """ + uri = 'vaults/%s/archives/%s' % (vault_name, archive_id) + return self.make_request('DELETE', uri, ok_responses=(204,)) + + # Multipart + + def initiate_multipart_upload(self, vault_name, part_size, + description=None): + """ + Initiate a multipart upload. Amazon Glacier creates a + multipart upload resource and returns it's ID. You use this + ID in subsequent multipart upload operations. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type description: str + :param description: An optional description of the archive. + + :type part_size: int + :param part_size: The size of each part except the last, in bytes. + The part size must be a multiple of 1024 KB multiplied by + a power of 2. The minimum allowable part size is 1MB and the + maximum is 4GB. + """ + response_headers = [('x-amz-multipart-upload-id', u'UploadId'), + ('Location', u'Location')] + headers = {'x-amz-part-size': str(part_size)} + if description: + headers['x-amz-archive-description'] = description + uri = 'vaults/%s/multipart-uploads' % vault_name + response = self.make_request('POST', uri, headers=headers, + ok_responses=(201,), + response_headers=response_headers) + return response + + def complete_multipart_upload(self, vault_name, upload_id, + sha256_treehash, archive_size): + """ + Call this to inform Amazon Glacier that all of the archive parts + have been uploaded and Amazon Glacier can now assemble the archive + from the uploaded parts. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + + :type sha256_treehash: str + :param sha256_treehash: The SHA256 tree hash of the entire + archive. It is the tree hash of SHA256 tree hash of the + individual parts. If the value you specify in the request + does not match the SHA256 tree hash of the final assembled + archive as computed by Amazon Glacier, Amazon Glacier + returns an error and the request fails. + + :type archive_size: int + :param archive_size: The total size, in bytes, of the entire + archive. This value should be the sum of all the sizes of + the individual parts that you uploaded. + """ + response_headers = [('x-amz-archive-id', u'ArchiveId'), + ('Location', u'Location')] + headers = {'x-amz-sha256-tree-hash': sha256_treehash, + 'x-amz-archive-size': str(archive_size)} + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) + response = self.make_request('POST', uri, headers=headers, + ok_responses=(201,), + response_headers=response_headers) + return response + + def abort_multipart_upload(self, vault_name, upload_id): + """ + Call this to abort a multipart upload identified by the upload ID. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + """ + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) + return self.make_request('DELETE', uri, ok_responses=(204,)) + + def list_multipart_uploads(self, vault_name, limit=None, marker=None): + """ + Lists in-progress multipart uploads for the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the operation + returns up to 1,000 items. + + :type marker: str + :param marker: An opaque string used for pagination. marker + specifies the item at which the listing should + begin. Get the marker value from a previous + response. You need only include the marker if you are + continuing the pagination of results started in a previous + request. + """ + params = {} + if limit: + params['limit'] = limit + if marker: + params['marker'] = marker + uri = 'vaults/%s/multipart-uploads' % vault_name + return self.make_request('GET', uri, params=params) + + def list_parts(self, vault_name, upload_id, limit=None, marker=None): + """ + Lists in-progress multipart uploads for the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the operation + returns up to 1,000 items. + + :type marker: str + :param marker: An opaque string used for pagination. marker + specifies the item at which the listing should + begin. Get the marker value from a previous + response. You need only include the marker if you are + continuing the pagination of results started in a previous + request. + """ + params = {} + if limit: + params['limit'] = limit + if marker: + params['marker'] = marker + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) + return self.make_request('GET', uri, params=params) + + def upload_part(self, vault_name, upload_id, linear_hash, + tree_hash, byte_range, part_data): + """ + Lists in-progress multipart uploads for the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type linear_hash: str + :param linear_hash: The SHA256 checksum (a linear hash) of the + payload. + + :type tree_hash: str + :param tree_hash: The user-computed SHA256 tree hash of the + payload. For more information on computing the + tree hash, see http://goo.gl/u7chF. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + + :type byte_range: tuple of ints + :param byte_range: Identfies the range of bytes in the assembled + archive that will be uploaded in this part. + + :type part_data: bytes + :param part_data: The data to be uploaded for the part + """ + headers = {'x-amz-content-sha256': linear_hash, + 'x-amz-sha256-tree-hash': tree_hash, + 'Content-Range': 'bytes %d-%d/*' % byte_range} + response_headers = [('x-amz-sha256-tree-hash', u'TreeHash')] + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) + return self.make_request('PUT', uri, headers=headers, + data=part_data, ok_responses=(204,), + response_headers=response_headers) diff --git a/boto/glacier/layer2.py b/boto/glacier/layer2.py new file mode 100644 index 0000000000..e519ca896c --- /dev/null +++ b/boto/glacier/layer2.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from .layer1 import Layer1 +from .vault import Vault + + +class Layer2(object): + """ + Provides a more pythonic and friendly interface to Glacier based on Layer1 + """ + + def __init__(self, *args, **kwargs): + # Accept a passed in layer1, mainly to allow easier testing + if "layer1" in kwargs: + self.layer1 = kwargs["layer1"] + else: + self.layer1 = Layer1(*args, **kwargs) + + def create_vault(self, name): + """Creates a vault. + + :type name: str + :param name: The name of the vault + + :rtype: :class:`boto.glacier.vault.Vault` + :return: A Vault object representing the vault. + """ + self.layer1.create_vault(name) + return self.get_vault(name) + + def delete_vault(self, name): + """Delete a vault. + + This operation deletes a vault. Amazon Glacier will delete a + vault only if there are no archives in the vault as per the + last inventory and there have been no writes to the vault + since the last inventory. If either of these conditions is not + satisfied, the vault deletion fails (that is, the vault is not + removed) and Amazon Glacier returns an error. + + This operation is idempotent, you can send the same request + multiple times and it has no further effect after the first + time Amazon Glacier delete the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault to delete. + """ + return self.layer1.delete_vault(name) + + def get_vault(self, name): + """ + Get an object representing a named vault from Glacier. This + operation does not check if the vault actually exists. + + :type name: str + :param name: The name of the vault + + :rtype: :class:`boto.glacier.vault.Vault` + :return: A Vault object representing the vault. + """ + response_data = self.layer1.describe_vault(name) + return Vault(self.layer1, response_data) + + def list_vaults(self): + """ + Return a list of all vaults associated with the account ID. + + :rtype: List of :class:`boto.glacier.vault.Vault` + :return: A list of Vault objects. + """ + response_data = self.layer1.list_vaults() + return [Vault(self.layer1, rd) for rd in response_data['VaultList']] diff --git a/boto/glacier/response.py b/boto/glacier/response.py new file mode 100644 index 0000000000..57bd4e4ee7 --- /dev/null +++ b/boto/glacier/response.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import json + +class GlacierResponse(dict): + """ + Represents a response from Glacier layer1. It acts as a dictionary + containing the combined keys received via JSON in the body (if + supplied) and headers. + """ + def __init__(self, http_response, response_headers): + self.http_response = http_response + self.status = http_response.status + self[u'RequestId'] = http_response.getheader('x-amzn-requestid') + if response_headers: + for header_name, item_name in response_headers: + self[item_name] = http_response.getheader(header_name) + if http_response.getheader('Content-Type') == 'application/json': + body = json.loads(http_response.read()) + self.update(body) + size = http_response.getheader('Content-Length', None) + if size is not None: + self.size = size + + def read(self, amt=None): + "Reads and returns the response body, or up to the next amt bytes." + return self.http_response.read(amt) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py new file mode 100644 index 0000000000..4d0e072334 --- /dev/null +++ b/boto/glacier/vault.py @@ -0,0 +1,271 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from .job import Job +from .writer import Writer, compute_hashes_from_fileobj +from .concurrent import ConcurrentUploader +import os.path + +_MEGABYTE = 1024 * 1024 + + +class Vault(object): + + DefaultPartSize = 4 * _MEGABYTE + SingleOperationThreshold = 100 * _MEGABYTE + + ResponseDataElements = (('VaultName', 'name', None), + ('VaultARN', 'arn', None), + ('CreationDate', 'creation_date', None), + ('LastInventoryDate', 'last_inventory_date', None), + ('SizeInBytes', 'size', 0), + ('NumberOfArchives', 'number_of_archives', 0)) + + def __init__(self, layer1, response_data=None): + self.layer1 = layer1 + if response_data: + for response_name, attr_name, default in self.ResponseDataElements: + value = response_data[response_name] + if isinstance(value, unicode): + value = value.encode('utf8') + setattr(self, attr_name, value) + else: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, default) + + def __repr__(self): + return 'Vault("%s")' % self.arn + + def delete(self): + """ + Delete's this vault. WARNING! + """ + self.layer1.delete_vault(self.name) + + def upload_archive(self, filename): + """ + Adds an archive to a vault. For archives greater than 100MB the + multipart upload will be used. + + :type file: str + :param file: A filename to upload + + :rtype: str + :return: The archive id of the newly created archive + """ + if os.path.getsize(filename) > self.SingleOperationThreshold: + return self.create_archive_from_file(filename) + return self._upload_archive_single_operation(filename) + + def _upload_archive_single_operation(self, filename): + """ + Adds an archive to a vault in a single operation. It's recommended for + archives less than 100MB + :type file: str + :param file: A filename to upload + + :rtype: str + :return: The archive id of the newly created archive + """ + with open(filename, 'rb') as fileobj: + linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj) + fileobj.seek(0) + response = self.layer1.upload_archive(self.name, fileobj, + linear_hash, tree_hash) + return response['ArchiveId'] + + def create_archive_writer(self, part_size=DefaultPartSize, + description=None): + """ + Create a new archive and begin a multi-part upload to it. + Returns a file-like object to which the data for the archive + can be written. Once all the data is written the file-like + object should be closed, you can then call the get_archive_id + method on it to get the ID of the created archive. + + :type part_size: int + :param part_size: The part size for the multipart upload. + + :rtype: :class:`boto.glaicer.writer.Writer` + :return: A Writer object that to which the archive data + should be written. + """ + response = self.layer1.initiate_multipart_upload(self.name, + part_size, + description) + return Writer(self, response['UploadId'], part_size=part_size) + + def create_archive_from_file(self, filename=None, file_obj=None): + """ + Create a new archive and upload the data from the given file + or file-like object. + + :type filename: str + :param filename: A filename to upload + + :type file_obj: file + :param file_obj: A file-like object to upload + + :rtype: str + :return: The archive id of the newly created archive + """ + if not file_obj: + file_obj = open(filename, "rb") + + writer = self.create_archive_writer() + while True: + data = file_obj.read(self.DefaultPartSize) + if not data: + break + writer.write(data) + writer.close() + return writer.get_archive_id() + + def concurrent_create_archive_from_file(self, filename): + """ + Create a new archive from a file and upload the given + file. + + This is a convenience method around the + :class:`boto.glacier.concurrent.ConcurrentUploader` + class. This method will perform a multipart upload + and upload the parts of the file concurrently. + + :type filename: str + :param filename: A filename to upload + + :raises: `boto.glacier.exception.UploadArchiveError` is an error + occurs during the upload process. + + :rtype: str + :return: The archive id of the newly created archive + + """ + uploader = ConcurrentUploader(self.layer1, self.name) + archive_id = uploader.upload(filename) + return archive_id + + def retrieve_archive(self, archive_id, sns_topic=None, + description=None): + """ + Initiate a archive retrieval job to download the data from an + archive. You will need to wait for the notification from + Amazon (via SNS) before you can actually download the data, + this takes around 4 hours. + + :type archive_id: str + :param archive_id: The id of the archive + + :type description: str + :param description: An optional description for the job. + + :type sns_topic: str + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier + sends notification when the job is completed and the output + is ready for you to download. + + :rtype: :class:`boto.glacier.job.Job` + :return: A Job object representing the retrieval job. + """ + job_data = {'Type': 'archive-retrieval', + 'ArchiveId': archive_id} + if sns_topic is not None: + job_data['SNSTopic'] = sns_topic + if description is not None: + job_data['Description'] = description + + response = self.layer1.initiate_job(self.name, job_data) + return self.get_job(response['JobId']) + + def retrieve_inventory(self, sns_topic=None, + description=None): + """ + Initiate a inventory retrieval job to list the items in the + vault. You will need to wait for the notification from + Amazon (via SNS) before you can actually download the data, + this takes around 4 hours. + + :type description: str + :param description: An optional description for the job. + + :type sns_topic: str + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier + sends notification when the job is completed and the output + is ready for you to download. + + :rtype: :class:`boto.glacier.job.Job` + :return: A Job object representing the retrieval job. + """ + job_data = {'Type': 'inventory-retrieval'} + if sns_topic is not None: + job_data['SNSTopic'] = sns_topic + if description is not None: + job_data['Description'] = description + + response = self.layer1.initiate_job(self.name, job_data) + return response['JobId'] + + def delete_archive(self, archive_id): + """ + This operation deletes an archive from the vault. + + :type archive_id: str + :param archive_id: The ID for the archive to be deleted. + """ + return self.layer1.delete_archive(self.name, archive_id) + + def get_job(self, job_id): + """ + Get an object representing a job in progress. + + :type job_id: str + :param job_id: The ID of the job + + :rtype: :class:`boto.glaicer.job.Job` + :return: A Job object representing the job. + """ + response_data = self.layer1.describe_job(self.name, job_id) + return Job(self, response_data) + + def list_jobs(self, completed=None, status_code=None): + """ + Return a list of Job objects related to this vault. + + :type completed: boolean + :param completed: Specifies the state of the jobs to return. + If a value of True is passed, only completed jobs will + be returned. If a value of False is passed, only + uncompleted jobs will be returned. If no value is + passed, all jobs will be returned. + + :type status_code: string + :param status_code: Specifies the type of job status to return. + Valid values are: InProgress|Succeeded|Failed. If not + specified, jobs with all status codes are returned. + + :rtype: list of :class:`boto.glaicer.job.Job` + :return: A list of Job objects related to this vault. + """ + response_data = self.layer1.list_jobs(self.name, completed, + status_code) + return [Job(self, jd) for jd in response_data['JobList']] diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py new file mode 100644 index 0000000000..42db99473a --- /dev/null +++ b/boto/glacier/writer.py @@ -0,0 +1,170 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# Tree hash implementation from Aaron Brady bradya@gmail.com +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import urllib +import hashlib +import math +import json + + +_ONE_MEGABYTE = 1024 * 1024 + + +def chunk_hashes(bytestring, chunk_size=_ONE_MEGABYTE): + chunk_count = int(math.ceil(len(bytestring) / float(chunk_size))) + hashes = [] + for i in xrange(chunk_count): + start = i * chunk_size + end = (i + 1) * chunk_size + hashes.append(hashlib.sha256(bytestring[start:end]).digest()) + return hashes + + +def tree_hash(fo): + """ + Given a hash of each 1MB chunk (from chunk_hashes) this will hash + together adjacent hashes until it ends up with one big one. So a + tree of hashes. + """ + hashes = [] + hashes.extend(fo) + while len(hashes) > 1: + new_hashes = [] + while True: + if len(hashes) > 1: + first = hashes.pop(0) + second = hashes.pop(0) + new_hashes.append(hashlib.sha256(first + second).digest()) + elif len(hashes) == 1: + only = hashes.pop(0) + new_hashes.append(only) + else: + break + hashes.extend(new_hashes) + return hashes[0] + + +def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024): + """Compute the linear and tree hash from a fileobj. + + This function will compute the linear/tree hash of a fileobj + in a single pass through the fileobj. + + :param fileobj: A file like object. + + :param chunk_size: The size of the chunks to use for the tree + hash. This is also the buffer size used to read from + `fileobj`. + + :rtype: tuple + :return: A tuple of (linear_hash, tree_hash). Both hashes + are returned in hex. + + """ + linear_hash = hashlib.sha256() + chunks = [] + chunk = fileobj.read(chunk_size) + while chunk: + linear_hash.update(chunk) + chunks.append(hashlib.sha256(chunk).digest()) + chunk = fileobj.read(chunk_size) + return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks)) + + +def bytes_to_hex(str): + return ''.join(["%02x" % ord(x) for x in str]).strip() + + +class Writer(object): + """ + Presents a file-like object for writing to a Amazon Glacier + Archive. The data is written using the multi-part upload API. + """ + def __init__(self, vault, upload_id, part_size): + self.vault = vault + self.upload_id = upload_id + self.part_size = part_size + + self._buffer_size = 0 + self._uploaded_size = 0 + self._buffer = [] + self._tree_hashes = [] + + self.archive_location = None + self.closed = False + + def send_part(self): + buf = "".join(self._buffer) + # Put back any data remaining over the part size into the + # buffer + if len(buf) > self.part_size: + self._buffer = [buf[self.part_size:]] + self._buffer_size = len(self._buffer[0]) + else: + self._buffer = [] + self._buffer_size = 0 + # The part we will send + part = buf[:self.part_size] + # Create a request and sign it + part_tree_hash = tree_hash(chunk_hashes(part)) + self._tree_hashes.append(part_tree_hash) + + hex_tree_hash = bytes_to_hex(part_tree_hash) + linear_hash = hashlib.sha256(part).hexdigest() + content_range = (self._uploaded_size, + (self._uploaded_size + len(part)) - 1) + response = self.vault.layer1.upload_part(self.vault.name, + self.upload_id, + linear_hash, + hex_tree_hash, + content_range, part) + self._uploaded_size += len(part) + + def write(self, str): + if self.closed: + raise ValueError("I/O operation on closed file") + if str == "": + return + self._buffer.append(str) + self._buffer_size += len(str) + while self._buffer_size > self.part_size: + self.send_part() + + def close(self): + if self.closed: + return + if self._buffer_size > 0: + self.send_part() + # Complete the multiplart glacier upload + hex_tree_hash = bytes_to_hex(tree_hash(self._tree_hashes)) + response = self.vault.layer1.complete_multipart_upload(self.vault.name, + self.upload_id, + hex_tree_hash, + self._uploaded_size) + self.archive_id = response['ArchiveId'] + self.closed = True + + def get_archive_id(self): + self.close() + return self.archive_id diff --git a/boto/utils.py b/boto/utils.py index 1d002e88e8..0945364f74 100644 --- a/boto/utils.py +++ b/boto/utils.py @@ -850,14 +850,18 @@ def compute_md5(fp, buf_size=8192, size=None): plain digest as the second element and the data size as the third element. """ - m = md5() + return compute_hash(fp, buf_size, size, hash_algorithm=md5) + + +def compute_hash(fp, buf_size=8192, size=None, hash_algorithm=md5): + hash_obj = hash_algorithm() spos = fp.tell() if size and size < buf_size: s = fp.read(size) else: s = fp.read(buf_size) while s: - m.update(s) + hash_obj.update(s) if size: size -= len(s) if size <= 0: @@ -866,11 +870,11 @@ def compute_md5(fp, buf_size=8192, size=None): s = fp.read(size) else: s = fp.read(buf_size) - hex_md5 = m.hexdigest() - base64md5 = base64.encodestring(m.digest()) - if base64md5[-1] == '\n': - base64md5 = base64md5[0:-1] + hex_digest = hash_obj.hexdigest() + base64_digest = base64.encodestring(hash_obj.digest()) + if base64_digest[-1] == '\n': + base64_digest = base64_digest[0:-1] # data_size based on bytes read. data_size = fp.tell() - spos fp.seek(spos) - return (hex_md5, base64md5, data_size) + return (hex_digest, base64_digest, data_size) diff --git a/docs/source/index.rst b/docs/source/index.rst index 90135d8839..1a7e930743 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -61,6 +61,7 @@ Currently Supported Services * **Storage** * :doc:`Simple Storage Service (S3) ` -- (:doc:`API Reference `) + * Amazon Glacier -- (:doc:`API Reference `) * Google Cloud Storage -- (:doc:`API Reference `) * **Workforce** diff --git a/docs/source/ref/glacier.rst b/docs/source/ref/glacier.rst new file mode 100644 index 0000000000..6f5ccbba27 --- /dev/null +++ b/docs/source/ref/glacier.rst @@ -0,0 +1,56 @@ +.. ref-glacier + +======= +Glaicer +======= + +boto.glacier +------------ + +.. automodule:: boto.glacier + :members: + :undoc-members: + +boto.glacier.layer1 +------------------ + +.. automodule:: boto.glacier.layer1 + :members: + :undoc-members: + +boto.glacier.layer2 +------------------- + +.. automodule:: boto.glacier.layer2 + :members: + :undoc-members: + +boto.glacier.vault +------------------ + +.. automodule:: boto.glacier.vault + :members: + :undoc-members: + +boto.glacier.job +---------------- + +.. automodule:: boto.glacier.job + :members: + :undoc-members: + +boto.glacier.writer +------------------- + +.. automodule:: boto.glacier.writer + :members: + :undoc-members: + +boto.glacier.exceptions +----------------------- + +.. automodule:: boto.glacier.exceptions + :members: + :undoc-members: + + diff --git a/docs/source/ref/index.rst b/docs/source/ref/index.rst index 4f36adf678..b13fc06bed 100644 --- a/docs/source/ref/index.rst +++ b/docs/source/ref/index.rst @@ -18,13 +18,14 @@ API Reference emr file fps + glacier gs iam manage mturk pyami rds - route53 + route53 s3 sdb services @@ -34,4 +35,4 @@ API Reference sts swf vpc - + diff --git a/setup.py b/setup.py index 41dc54f3da..662c5e1a00 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ "bin/list_instances", "bin/taskadmin", "bin/kill_instance", "bin/bundle_image", "bin/pyami_sendmail", "bin/lss3", "bin/cq", "bin/route53", "bin/s3multiput", "bin/cwutil", - "bin/instance_events", "bin/asadmin"], + "bin/instance_events", "bin/asadmin", "bin/glacier"], url = "https://github.com/boto/boto/", packages = ["boto", "boto.sqs", "boto.s3", "boto.gs", "boto.file", "boto.ec2", "boto.ec2.cloudwatch", "boto.ec2.autoscale", @@ -64,7 +64,7 @@ "boto.fps", "boto.emr", "boto.emr", "boto.sns", "boto.ecs", "boto.iam", "boto.route53", "boto.ses", "boto.cloudformation", "boto.sts", "boto.dynamodb", - "boto.swf", "boto.mws", "boto.cloudsearch"], + "boto.swf", "boto.mws", "boto.cloudsearch", "boto.glacier"], package_data = {"boto.cacerts": ["cacerts.txt"]}, license = "MIT", platforms = "Posix; MacOS X; Windows", diff --git a/tests/integration/glacier/__init__.py b/tests/integration/glacier/__init__.py new file mode 100644 index 0000000000..5326afc11b --- /dev/null +++ b/tests/integration/glacier/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# diff --git a/tests/integration/glacier/test_layer2.py b/tests/integration/glacier/test_layer2.py new file mode 100644 index 0000000000..caa44fa522 --- /dev/null +++ b/tests/integration/glacier/test_layer2.py @@ -0,0 +1,45 @@ +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import time +from tests.unit import unittest + +from boto.glacier.layer2 import Layer1, Layer2 + + +class TestGlacierLayer2(unittest.TestCase): + glacier = True + + def setUp(self): + self.layer2 = Layer2() + self.vault_name = 'testvault%s' % int(time.time()) + + def test_create_delete_vault(self): + vault = self.layer2.create_vault(self.vault_name) + retrieved_vault = self.layer2.get_vault(self.vault_name) + self.layer2.delete_vault(self.vault_name) + self.assertEqual(vault.name, retrieved_vault.name) + self.assertEqual(vault.arn, retrieved_vault.arn) + self.assertEqual(vault.creation_date, retrieved_vault.creation_date) + self.assertEqual(vault.last_inventory_date, + retrieved_vault.last_inventory_date) + self.assertEqual(vault.number_of_archives, + retrieved_vault.number_of_archives) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index 598b0f420b..4e52b76621 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -36,13 +36,23 @@ def _mexe_spy(self, request, *args, **kwargs): self.actual_request = request return self.original_mexe(request, *args, **kwargs) - def create_response(self, status_code, reason='', body=None): + def create_response(self, status_code, reason='', header=[], body=None): if body is None: body = self.default_body() response = Mock(spec=httplib.HTTPResponse) response.status = status_code response.read.return_value = body response.reason = reason + + response.getheaders.return_value = header + def overwrite_header(arg, default=None): + header_dict = dict(header) + if header_dict.has_key(arg): + return header_dict[arg] + else: + return default + response.getheader.side_effect = overwrite_header + return response def assert_request_parameters(self, params, ignore_params_values=None): @@ -57,8 +67,8 @@ def assert_request_parameters(self, params, ignore_params_values=None): del request_params[param] self.assertDictEqual(request_params, params) - def set_http_response(self, status_code, reason='', body=None): - http_response = self.create_response(status_code, reason, body) + def set_http_response(self, status_code, reason='', header=[], body=None): + http_response = self.create_response(status_code, reason, header, body) self.https_connection.getresponse.return_value = http_response def default_body(self): diff --git a/tests/unit/glacier/__init__.py b/tests/unit/glacier/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/glacier/test_layer1.py b/tests/unit/glacier/test_layer1.py new file mode 100644 index 0000000000..7d7b6fc273 --- /dev/null +++ b/tests/unit/glacier/test_layer1.py @@ -0,0 +1,78 @@ +from tests.unit import AWSMockServiceTestCase +from boto.glacier.layer1 import Layer1 +import json +import copy + + +class GlacierLayer1ConnectionBase(AWSMockServiceTestCase): + connection_class = Layer1 + + def setUp(self): + super(GlacierLayer1ConnectionBase, self).setUp() + self.json_header = [('Content-Type', 'application/json')] + self.vault_name = u'examplevault' + self.vault_arn = 'arn:aws:glacier:us-east-1:012345678901:vaults/' + \ + self.vault_name + self.vault_info = {u'CreationDate': u'2012-03-16T22:22:47.214Z', + u'LastInventoryDate': u'2012-03-21T22:06:51.218Z', + u'NumberOfArchives': 2, + u'SizeInBytes': 12334, + u'VaultARN': self.vault_arn, + u'VaultName': self.vault_name} + + +class GlacierVaultsOperations(GlacierLayer1ConnectionBase): + + def test_create_vault_parameters(self): + self.set_http_response(status_code=201) + self.service_connection.create_vault(self.vault_name) + + def test_list_vaults(self): + content = {u'Marker': None, + u'RequestId': None, + u'VaultList': [self.vault_info]} + self.set_http_response(status_code=200, header=self.json_header, + body=json.dumps(content)) + api_response = self.service_connection.list_vaults() + self.assertDictEqual(content, api_response) + + def test_describe_vaults(self): + content = copy.copy(self.vault_info) + content[u'RequestId'] = None + self.set_http_response(status_code=200, header=self.json_header, + body=json.dumps(content)) + api_response = self.service_connection.describe_vault(self.vault_name) + self.assertDictEqual(content, api_response) + + def test_delete_vault(self): + self.set_http_response(status_code=204) + self.service_connection.delete_vault(self.vault_name) + + +class GlacierJobOperations(GlacierLayer1ConnectionBase): + + def setUp(self): + super(GlacierJobOperations, self).setUp() + self.job_content = 'abc' * 1024 + + def test_initiate_archive_job(self): + content = {u'Type': u'archive-retrieval', + u'ArchiveId': u'AAABZpJrTyioDC_HsOmHae8EZp_uBSJr6cnGOLKp_XJCl-Q', + u'Description': u'Test Archive', + u'SNSTopic': u'Topic', + u'JobId': None, + u'Location': None, + u'RequestId': None} + self.set_http_response(status_code=202, header=self.json_header, + body=json.dumps(content)) + api_response = self.service_connection.initiate_job(self.vault_name, + self.job_content) + self.assertDictEqual(content, api_response) + + def test_get_archive_output(self): + header = [('Content-Type', 'application/octet-stream')] + self.set_http_response(status_code=200, header=header, + body=self.job_content) + response = self.service_connection.get_job_output(self.vault_name, + 'example-job-id') + self.assertEqual(self.job_content, response.read()) diff --git a/tests/unit/glacier/test_layer2.py b/tests/unit/glacier/test_layer2.py new file mode 100644 index 0000000000..a82a3a2e2c --- /dev/null +++ b/tests/unit/glacier/test_layer2.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from tests.unit import unittest + +from mock import Mock + +from boto.glacier.layer1 import Layer1 +from boto.glacier.layer2 import Layer2 +from boto.glacier.vault import Vault +from boto.glacier.vault import Job + +# Some fixture data from the Glacier docs +FIXTURE_VAULT = { + "CreationDate" : "2012-02-20T17:01:45.198Z", + "LastInventoryDate" : "2012-03-20T17:03:43.221Z", + "NumberOfArchives" : 192, + "SizeInBytes" : 78088912, + "VaultARN" : "arn:aws:glacier:us-east-1:012345678901:vaults/examplevault", + "VaultName" : "examplevault" +} + +FIXTURE_ARCHIVE_JOB = { + "Action": "ArchiveRetrieval", + "ArchiveId": ("NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUs" + "uhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqr" + "EXAMPLEArchiveId"), + "ArchiveSizeInBytes": 16777216, + "Completed": False, + "CreationDate": "2012-05-15T17:21:39.339Z", + "CompletionDate": "2012-05-15T17:21:43.561Z", + "InventorySizeInBytes": None, + "JobDescription": "My ArchiveRetrieval Job", + "JobId": ("HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5v" + "P54ZShjoQzQVVh7vEXAMPLEjobID"), + "SHA256TreeHash": ("beb0fe31a1c7ca8c6c04d574ea906e3f97b31fdca7571defb5b44dc" + "a89b5af60"), + "SNSTopic": "arn:aws:sns:us-east-1:012345678901:mytopic", + "StatusCode": "InProgress", + "StatusMessage": "Operation in progress.", + "VaultARN": "arn:aws:glacier:us-east-1:012345678901:vaults/examplevault" +} + + +class GlacierLayer2Base(unittest.TestCase): + def setUp(self): + self.mock_layer1 = Mock(spec=Layer1) + + +class TestGlacierLayer2Connection(GlacierLayer2Base): + def setUp(self): + GlacierLayer2Base.setUp(self) + self.layer2 = Layer2(layer1=self.mock_layer1) + + def test_create_vault(self): + self.mock_layer1.describe_vault.return_value = FIXTURE_VAULT + self.layer2.create_vault("My Vault") + self.mock_layer1.create_vault.assert_called_with("My Vault") + + def test_get_vault(self): + self.mock_layer1.describe_vault.return_value = FIXTURE_VAULT + vault = self.layer2.get_vault("examplevault") + self.assertEqual(vault.layer1, self.mock_layer1) + self.assertEqual(vault.name, "examplevault") + self.assertEqual(vault.size, 78088912) + self.assertEqual(vault.number_of_archives, 192) + + def list_vaults(self): + self.mock_layer1.list_vaults.return_value = [FIXTURE_VAULT] + vaults = self.layer2.list_vaults() + self.assertEqual(vaults[0].name, "examplevault") + + +class TestVault(GlacierLayer2Base): + def setUp(self): + GlacierLayer2Base.setUp(self) + self.vault = Vault(self.mock_layer1, FIXTURE_VAULT) + + # TODO: Tests for the other methods of uploading + + def test_create_archive_writer(self): + self.mock_layer1.initiate_multipart_upload.return_value = { + "UploadId": "UPLOADID"} + writer = self.vault.create_archive_writer(description="stuff") + self.mock_layer1.initiate_multipart_upload.assert_called_with( + "examplevault", self.vault.DefaultPartSize, "stuff") + self.assertEqual(writer.vault, self.vault) + self.assertEqual(writer.upload_id, "UPLOADID") + + def test_delete_vault(self): + self.vault.delete_archive("archive") + self.mock_layer1.delete_archive.assert_called_with("examplevault", + "archive") + + def test_get_job(self): + self.mock_layer1.describe_job.return_value = FIXTURE_ARCHIVE_JOB + job = self.vault.get_job( + "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPA" + "dTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEA" + "rchiveId") + self.assertEqual(job.action, "ArchiveRetrieval") + + def test_list_jobs(self): + self.mock_layer1.list_jobs.return_value = { + "JobList": [FIXTURE_ARCHIVE_JOB]} + jobs = self.vault.list_jobs(False, "InProgress") + self.mock_layer1.list_jobs.assert_called_with("examplevault", + False, "InProgress") + self.assertEqual(jobs[0].archive_id, + "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z" + "8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs0" + "1MNGntHEQL8MBfGlqrEXAMPLEArchiveId") + + +class TestJob(GlacierLayer2Base): + def setUp(self): + GlacierLayer2Base.setUp(self) + self.vault = Vault(self.mock_layer1, FIXTURE_VAULT) + self.job = Job(self.vault, FIXTURE_ARCHIVE_JOB) + + def test_get_job_output(self): + self.mock_layer1.get_job_output.return_value = "TEST_OUTPUT" + self.job.get_output((0,100)) + self.mock_layer1.get_job_output.assert_called_with( + "examplevault", + "HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5vP" + "54ZShjoQzQVVh7vEXAMPLEjobID", (0,100)) diff --git a/tests/unit/glacier/test_writer.py b/tests/unit/glacier/test_writer.py new file mode 100644 index 0000000000..216429fdc0 --- /dev/null +++ b/tests/unit/glacier/test_writer.py @@ -0,0 +1,26 @@ +from hashlib import sha256 + +from tests.unit import unittest +import mock + +from boto.glacier.writer import Writer, chunk_hashes + + +class TestChunking(unittest.TestCase): + def test_chunk_hashes_exact(self): + chunks = chunk_hashes('a' * (2 * 1024 * 1024)) + self.assertEqual(len(chunks), 2) + self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest()) + + def test_chunks_with_leftovers(self): + bytestring = 'a' * (2 * 1024 * 1024 + 20) + chunks = chunk_hashes(bytestring) + self.assertEqual(len(chunks), 3) + self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest()) + self.assertEqual(chunks[1], sha256('a' * 1024 * 1024).digest()) + self.assertEqual(chunks[2], sha256('a' * 20).digest()) + + def test_less_than_one_chunk(self): + chunks = chunk_hashes('aaaa') + self.assertEqual(len(chunks), 1) + self.assertEqual(chunks[0], sha256('aaaa').digest())