From 310e9c6cfe6e61ab08377e69b5f63b43bbadaf8f Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Wed, 22 Aug 2012 09:56:19 -0700 Subject: [PATCH 01/62] Initial glacier module. Only supports vault and job related requests so far. --- boto/glacier/__init__.py | 60 +++++++ boto/glacier/layer1.py | 341 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 401 insertions(+) create mode 100644 boto/glacier/__init__.py create mode 100644 boto/glacier/layer1.py diff --git a/boto/glacier/__init__.py b/boto/glacier/__init__.py new file mode 100644 index 0000000000..612e571cbf --- /dev/null +++ b/boto/glacier/__init__.py @@ -0,0 +1,60 @@ +# Copyright (c) 2011 Mitch Garnaat http://garnaat.org/ +# Copyright (c) 2011 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from boto.ec2.regioninfo import RegionInfo + + +def regions(): + """ + Get all available regions for the Amazon Glacier service. + + :rtype: list + :return: A list of :class:`boto.regioninfo.RegionInfo` + """ + from boto.glacier.layer1 import Layer1 + return [RegionInfo(name='us-east-1', + endpoint='glacier.us-east-1.amazonaws.com', + connection_cls=Layer1), + RegionInfo(name='us-west-1', + endpoint='glacier.us-west-1.amazonaws.com', + connection_cls=Layer1), + RegionInfo(name='us-west-2', + endpoint='glacier.us-west-2.amazonaws.com', + connection_cls=Layer1), + RegionInfo(name='ap-northeast-1', + endpoint='glacier.ap-northeast-1.amazonaws.com', + connection_cls=Layer1), + RegionInfo(name='ap-southeast-1', + endpoint='glacier.ap-southeast-1.amazonaws.com', + connection_cls=Layer1), + RegionInfo(name='eu-west-1', + endpoint='glacier.eu-west-1.amazonaws.com', + connection_cls=Layer1), + ] + + +def connect_to_region(region_name, **kw_params): + for region in regions(): + if region.name == region_name: + return region.connect(**kw_params) + return None diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py new file mode 100644 index 0000000000..850213ef99 --- /dev/null +++ b/boto/glacier/layer1.py @@ -0,0 +1,341 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import json +import boto +from boto.connection import AWSAuthConnection + +boto.set_stream_logger('glacier') + + +class Layer1(AWSAuthConnection): + + DefaultRegionName = 'us-east-1' + """The default region to connect to.""" + + Version = '2012-06-01' + """Glacier API version.""" + + def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, + account_id='-', is_secure=True, port=None, + proxy=None, proxy_port=None, + debug=2, security_token=None, region=None): + if not region: + region_name = boto.config.get('DynamoDB', 'region', + self.DefaultRegionName) + for reg in boto.glacier.regions(): + if reg.name == region_name: + region = reg + break + + self.region = region + self.account_id = account_id + AWSAuthConnection.__init__(self, region.endpoint, + aws_access_key_id, aws_secret_access_key, + True, port, proxy, proxy_port, debug=debug, + security_token=security_token) + + def _required_auth_capability(self): + return ['hmac-v4'] + + def make_request(self, verb, resource, headers=None, data=''): + if headers is None: + headers = {} + headers = {'x-amz-glacier-version': self.Version} + uri = '/%s/%s' % (self.account_id, resource) + response = AWSAuthConnection.make_request(self, verb, uri, + headers=headers, + data=data) + body = response.read() + if body: + boto.log.debug(body) + body = json.loads(body) + return body + + # Vaults + + def list_vaults(self, limit=None, marker=None): + """ + This operation lists all vaults owned by the calling user’s + account. The list returned in the response is ASCII-sorted by + vault name. + + By default, this operation returns up to 1,000 items. If there + are more vaults to list, the marker field in the response body + contains the vault Amazon Resource Name (ARN) at which to + continue the list with a new List Vaults request; otherwise, + the marker field is null. In your next List Vaults request you + set the marker parameter to the value Amazon Glacier returned + in the responses to your previous List Vaults request. You can + also limit the number of vaults returned in the response by + specifying the limit parameter in the request. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the List Vaults + operation returns up to 1,000 items. + + :type marker: str + :param marker: A string used for pagination. marker specifies + the vault ARN after which the listing of vaults should + begin. (The vault specified by marker is not included in + the returned list.) Get the marker value from a previous + List Vaults response. You need to include the marker only + if you are continuing the pagination of results started in + a previous List Vaults request. Specifying an empty value + ("") for the marker returns a list of vaults starting + from the first vault. + """ + return self.make_request('GET', 'vaults') + + def describe_vault(self, vault_name): + """ + This operation returns information about a vault, including + the vault Amazon Resource Name (ARN), the date the vault was + created, the number of archives contained within the vault, + and the total size of all the archives in the vault. The + number of archives and their total size are as of the last + vault inventory Amazon Glacier generated. Amazon Glacier + generates vault inventories approximately daily. This means + that if you add or remove an archive from a vault, and then + immediately send a Describe Vault request, the response might + not reflect the changes. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s' % vault_name + return self.make_request('GET', uri) + + def create_vault(self, vault_name): + """ + This operation creates a new vault with the specified name. + The name of the vault must be unique within a region for an + AWS account. You can create up to 1,000 vaults per + account. For information on creating more vaults, go to the + Amazon Glacier product detail page. + + You must use the following guidelines when naming a vault. + + Names can be between 1 and 255 characters long. + + Allowed characters are a–z, A–Z, 0–9, '_' (underscore), + '-' (hyphen), and '.' (period). + + This operation is idempotent, you can send the same request + multiple times and it has no further effect after the first + time Amazon Glacier creates the specified vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s' % vault_name + return self.make_request('PUT', uri) + + def delete_vault(self, vault_name): + """ + This operation deletes a vault. Amazon Glacier will delete a + vault only if there are no archives in the vault as per the + last inventory and there have been no writes to the vault + since the last inventory. If either of these conditions is not + satisfied, the vault deletion fails (that is, the vault is not + removed) and Amazon Glacier returns an error. + + This operation is idempotent, you can send the same request + multiple times and it has no further effect after the first + time Amazon Glacier delete the specified vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s' % vault_name + return self.make_request('DELETE', uri) + + def get_vault_notifications(self, vault_name): + """ + This operation retrieves the notification-configuration + subresource set on the vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s/notification-configuration' % vault_name + return self.make_request('GET', uri) + + def set_vault_notifications(self, vault_name, notification_config): + """ + This operation retrieves the notification-configuration + subresource set on the vault. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type notification_config: dict + :param notification_config: A Python dictionary containing + an SNS Topic and events for which you want Amazon Glacier + to send notifications to the topic. Possible events are: + + * ArchiveRetrievalCompleted - occurs when a job that was + initiated for an archive retrieval is completed. + * InventoryRetrievalCompleted - occurs when a job that was + initiated for an inventory retrieval is completed. + + The format of the dictionary is: + + {'SNSTopic': 'mytopic', + 'Events': [event1,...]} + """ + uri = 'vaults/%s/notification-configuration' % vault_name + json_config = json.dumps(notification_config) + return self.make_request('PUT', uri, data=json_config) + + def delete_vault_notifications(self, vault_name): + """ + This operation deletes the notification-configuration + subresource set on the vault. + + :type vault_name: str + :param vault_name: The name of the new vault + """ + uri = 'vaults/%s/notification-configuration' % vault_name + return self.make_request('DELETE', uri) + + # Jobs + + def list_jobs(self, vault_name, completed=None, limit=None, + marker=None, status_code=None): + """ + This operation lists jobs for a vault including jobs that are + in-progress and jobs that have recently finished. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type completed: boolean + :param completed: Specifies the state of the jobs to return. + If a value of True is passed, only completed jobs will + be returned. If a value of False is passed, only + uncompleted jobs will be returned. If no value is + passed, all jobs will be returned. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the List Jobs + operation returns up to 1,000 items. + + :type marker: str + :param marker: An opaque string used for pagination. marker + specifies the job at which the listing of jobs should + begin. Get the marker value from a previous List Jobs + response. You need only include the marker if you are + continuing the pagination of results started in a previous + List Jobs request. + + :type status_code: string + :param status_code: Specifies the type of job status to return. + Valid values are: InProgress|Succeeded|Failed. If not + specified, jobs with all status codes are returned. + """ + uri = 'vaults/%s/jobs' % vault_name + return self.make_request('GET', uri) + + def describe_job(self, vault_name, job_id): + """ + This operation returns information about a job you previously + initiated, including the job initiation date, the user who + initiated the job, the job status code/message and the Amazon + Simple Notification Service (Amazon SNS) topic to notify after + Amazon Glacier completes the job. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type job_id: str + :param job_id: The ID of the job. + """ + uri = 'vaults/%s/jobs/%s' % (vault_name, job_id) + return self.make_request('GET', uri) + + def initiate_job(self, vault_name, job_data): + """ + This operation initiates a job of the specified + type. Retrieving an archive or a vault inventory are + asynchronous operations that require you to initiate a job. It + is a two-step process: + + * Initiate a retrieval job. + * After the job completes, download the bytes. + + The retrieval is executed asynchronously. When you initiate + a retrieval job, Amazon Glacier creates a job and returns a + job ID in the response. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type job_data: dict + :param job_data: A Python dictionary containing the + information about the requested job. The dictionary + can contain the following attributes: + + * ArchiveId - The ID of the archive you want to retrieve. + This field is required only if the Type is set to + archive-retrieval. + * Description - The optional description for the job. + * Format - When initiating a job to retrieve a vault + inventory, you can optionally add this parameter to + specify the output format. Valid values are: CSV|JSON. + * SNSTopic - The Amazon SNS topic ARN where Amazon Glacier + sends a notification when the job is completed and the + output is ready for you to download. + * Type - The job type. Valid values are: + archive-retrieval|inventory-retrieval + """ + uri = 'vaults/%s/jobs' % vault_name + json_job_data = json.dumps(job_data) + return self.make_request('POST', uri, data=json_job_data) + + def get_job_output(self, vault_name, job_id): + """ + This operation downloads the output of the job you initiated + using Initiate a Job. Depending on the job type + you specified when you initiated the job, the output will be + either the content of an archive or a vault inventory. + + You can download all the job output or download a portion of + the output by specifying a byte range. In the case of an + archive retrieval job, depending on the byte range you + specify, Amazon Glacier returns the checksum for the portion + of the data. You can compute the checksum on the client and + verify that the values match to ensure the portion you + downloaded is the correct data. + + :type vault_name: str :param + vault_name: The name of the new vault + + :type job_id: str + :param job_id: The ID of the job. + """ + uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) + return self.make_request('GET', uri) From 957fdf4808de4cd44a991aa25c58b5d74a90bd48 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Wed, 22 Aug 2012 10:49:56 -0700 Subject: [PATCH 02/62] Handling errors a bit better. --- boto/glacier/layer1.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 850213ef99..6f459a45ef 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -59,7 +59,8 @@ def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, def _required_auth_capability(self): return ['hmac-v4'] - def make_request(self, verb, resource, headers=None, data=''): + def make_request(self, verb, resource, headers=None, + data='', ok_responses=(200,)): if headers is None: headers = {} headers = {'x-amz-glacier-version': self.Version} @@ -68,10 +69,17 @@ def make_request(self, verb, resource, headers=None, data=''): headers=headers, data=data) body = response.read() - if body: - boto.log.debug(body) - body = json.loads(body) - return body + if response.status in ok_responses: + if body: + boto.log.debug(body) + body = json.loads(body) + return body + else: + msg = 'Expected %s, got (%d, %s)' % (ok_responses, + response.status, + body) + # create glacier-specific exceptions + raise BaseException(msg) # Vaults @@ -151,7 +159,7 @@ def create_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('PUT', uri) + return self.make_request('PUT', uri, ok_responses=(201,)) def delete_vault(self, vault_name): """ @@ -170,7 +178,7 @@ def delete_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('DELETE', uri) + return self.make_request('DELETE', uri, ok_responses=(204,)) def get_vault_notifications(self, vault_name): """ @@ -208,7 +216,8 @@ def set_vault_notifications(self, vault_name, notification_config): """ uri = 'vaults/%s/notification-configuration' % vault_name json_config = json.dumps(notification_config) - return self.make_request('PUT', uri, data=json_config) + return self.make_request('PUT', uri, data=json_config, + ok_responses=(204,)) def delete_vault_notifications(self, vault_name): """ @@ -219,7 +228,7 @@ def delete_vault_notifications(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s/notification-configuration' % vault_name - return self.make_request('DELETE', uri) + return self.make_request('DELETE', uri, ok_responses=(204,)) # Jobs @@ -275,7 +284,7 @@ def describe_job(self, vault_name, job_id): :param job_id: The ID of the job. """ uri = 'vaults/%s/jobs/%s' % (vault_name, job_id) - return self.make_request('GET', uri) + return self.make_request('GET', uri, ok_responses=(201,)) def initiate_job(self, vault_name, job_data): """ @@ -314,7 +323,8 @@ def initiate_job(self, vault_name, job_data): """ uri = 'vaults/%s/jobs' % vault_name json_job_data = json.dumps(job_data) - return self.make_request('POST', uri, data=json_job_data) + return self.make_request('POST', uri, data=json_job_data, + ok_responses=(202,)) def get_job_output(self, vault_name, job_id): """ From 70ea28f8828a9fe360839e82c43dd6fcf01aa365 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Wed, 22 Aug 2012 10:56:53 -0700 Subject: [PATCH 03/62] Correcting available regions. --- boto/glacier/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/boto/glacier/__init__.py b/boto/glacier/__init__.py index 612e571cbf..ac519fc00e 100644 --- a/boto/glacier/__init__.py +++ b/boto/glacier/__init__.py @@ -44,9 +44,6 @@ def regions(): RegionInfo(name='ap-northeast-1', endpoint='glacier.ap-northeast-1.amazonaws.com', connection_cls=Layer1), - RegionInfo(name='ap-southeast-1', - endpoint='glacier.ap-southeast-1.amazonaws.com', - connection_cls=Layer1), RegionInfo(name='eu-west-1', endpoint='glacier.eu-west-1.amazonaws.com', connection_cls=Layer1), From 0dfd612032a47defadb6fef83742620f37a945e8 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Wed, 22 Aug 2012 11:19:00 -0700 Subject: [PATCH 04/62] Adding single operation archive operations. --- boto/glacier/layer1.py | 53 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 6f459a45ef..ad3a36bbe5 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -342,10 +342,61 @@ def get_job_output(self, vault_name, job_id): downloaded is the correct data. :type vault_name: str :param - vault_name: The name of the new vault + :param vault_name: The name of the new vault :type job_id: str :param job_id: The ID of the job. """ uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) return self.make_request('GET', uri) + + # Archives + + def upload_archive(self, vault_name, archive, + linear_hash, tree_hash, description=None): + """ + This operation adds an archive to a vault. For a successful + upload, your data is durably persisted. In response, Amazon + Glacier returns the archive ID in the x-amz-archive-id header + of the response. You should save the archive ID returned so + that you can access the archive later. + + :type vault_name: str :param + :param vault_name: The name of the vault + + :type archive: bytes + :param archive: The data to upload. + + :type linear_hash: str + :param linear_hash: The SHA256 checksum (a linear hash) of the + payload. + + :type tree_hash: str + :param tree_hash: The user-computed SHA256 tree hash of the + payload. For more information on computing the + tree hash, see http://goo.gl/u7chF. + + :type description: str + :param description: An optional description of the archive. + """ + uri = 'vaults/%s/archives' % vault_name + headers = {'x-amz-content-sha256': linear_hash, + 'x-amz-sha256-tree-hash': tree_hash, + 'x-amz-content-length': len(archive)} + if description: + headers['x-amz-archive-description'] = description + return self.make_request('GET', uri, headers=headers, + data=archive, ok_responses=(201,)) + + def delete_archive(self, vault_name, archive_id): + """ + This operation deletes an archive from a vault. + + :type vault_name: str + :param vault_name: The name of the new vault + + :type archive_id: str + :param archive_id: The ID for the archive to be deleted. + """ + uri = 'vaults/%s/archives/%s' % (vault_name, archive_id) + return self.make_request('DELETE', uri, ok_responses=(204,)) From b6ac15a2aa89ecbcbc208ead6c965dc80eff6368 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 26 Aug 2012 09:00:28 +0200 Subject: [PATCH 05/62] Fix upload archive function. Transform content length attribute to string, avoid overriding header in make request, use POST instead of GET for upload request. --- boto/glacier/layer1.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index ad3a36bbe5..71d166db43 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -63,7 +63,7 @@ def make_request(self, verb, resource, headers=None, data='', ok_responses=(200,)): if headers is None: headers = {} - headers = {'x-amz-glacier-version': self.Version} + headers['x-amz-glacier-version'] = self.Version uri = '/%s/%s' % (self.account_id, resource) response = AWSAuthConnection.make_request(self, verb, uri, headers=headers, @@ -382,10 +382,10 @@ def upload_archive(self, vault_name, archive, uri = 'vaults/%s/archives' % vault_name headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, - 'x-amz-content-length': len(archive)} + 'x-amz-content-length': str(len(archive))} if description: headers['x-amz-archive-description'] = description - return self.make_request('GET', uri, headers=headers, + return self.make_request('POST', uri, headers=headers, data=archive, ok_responses=(201,)) def delete_archive(self, vault_name, archive_id): From 1ca92079da6ecb69f744646ee79a2feca4b99c98 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 26 Aug 2012 20:08:20 +0200 Subject: [PATCH 06/62] Ability to mock HTTP response header in AWSMockServiceTestCase. We can now set the HTTP headers in AWSMockServiceTestCase.set_http_response(). This allows to test different behaviour in code as a result of different HTTP response headers. --- tests/unit/__init__.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index 598b0f420b..8c9a5ec3d0 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -36,13 +36,21 @@ def _mexe_spy(self, request, *args, **kwargs): self.actual_request = request return self.original_mexe(request, *args, **kwargs) - def create_response(self, status_code, reason='', body=None): + def create_response(self, status_code, reason='', header=[], body=None): if body is None: body = self.default_body() response = Mock(spec=httplib.HTTPResponse) response.status = status_code response.read.return_value = body response.reason = reason + + response.getheaders.return_value = header + def overwrite_header(arg): + header_dict = dict(header) + if header_dict.has_key(arg): + return header_dict[arg] + response.getheader.side_effect = overwrite_header + return response def assert_request_parameters(self, params, ignore_params_values=None): @@ -57,8 +65,8 @@ def assert_request_parameters(self, params, ignore_params_values=None): del request_params[param] self.assertDictEqual(request_params, params) - def set_http_response(self, status_code, reason='', body=None): - http_response = self.create_response(status_code, reason, body) + def set_http_response(self, status_code, reason='', header=[], body=None): + http_response = self.create_response(status_code, reason, header, body) self.https_connection.getresponse.return_value = http_response def default_body(self): From 60c8a70112273b3d2285f5bbe72a34bba5486629 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 26 Aug 2012 20:14:55 +0200 Subject: [PATCH 07/62] Support for range attribute in get_job_output (layer1 Glacier API). This involved changing the make_request function a bit as the get_job_output request does not return JSON for archive retrieval jobs. make_request now looks at the Content-Type of the response argument to determine if we have a JSON response and transforms accordingly. --- boto/glacier/layer1.py | 54 ++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 71d166db43..8ac83985cb 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -23,7 +23,7 @@ # import json -import boto +import boto.glacier from boto.connection import AWSAuthConnection boto.set_stream_logger('glacier') @@ -39,7 +39,7 @@ class Layer1(AWSAuthConnection): def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, account_id='-', is_secure=True, port=None, - proxy=None, proxy_port=None, + proxy=None, proxy_port=None, https_connection_factory=None, debug=2, security_token=None, region=None): if not region: region_name = boto.config.get('DynamoDB', 'region', @@ -54,6 +54,8 @@ def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, AWSAuthConnection.__init__(self, region.endpoint, aws_access_key_id, aws_secret_access_key, True, port, proxy, proxy_port, debug=debug, + https_connection_factory=\ + https_connection_factory, security_token=security_token) def _required_auth_capability(self): @@ -68,16 +70,14 @@ def make_request(self, verb, resource, headers=None, response = AWSAuthConnection.make_request(self, verb, uri, headers=headers, data=data) - body = response.read() if response.status in ok_responses: - if body: - boto.log.debug(body) - body = json.loads(body) - return body + is_json = response.getheader('Content-Type') == 'application/json' + body = json.loads(response.read()) if is_json else response.read() + return dict(response.getheaders()), body else: msg = 'Expected %s, got (%d, %s)' % (ok_responses, response.status, - body) + response.read()) # create glacier-specific exceptions raise BaseException(msg) @@ -115,7 +115,7 @@ def list_vaults(self, limit=None, marker=None): ("") for the marker returns a list of vaults starting from the first vault. """ - return self.make_request('GET', 'vaults') + return self.make_request('GET', 'vaults')[1] def describe_vault(self, vault_name): """ @@ -134,7 +134,7 @@ def describe_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('GET', uri) + return self.make_request('GET', uri)[1] def create_vault(self, vault_name): """ @@ -159,7 +159,7 @@ def create_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('PUT', uri, ok_responses=(201,)) + return self.make_request('PUT', uri, ok_responses=(201,))[1] def delete_vault(self, vault_name): """ @@ -178,7 +178,7 @@ def delete_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('DELETE', uri, ok_responses=(204,)) + return self.make_request('DELETE', uri, ok_responses=(204,))[1] def get_vault_notifications(self, vault_name): """ @@ -189,7 +189,7 @@ def get_vault_notifications(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s/notification-configuration' % vault_name - return self.make_request('GET', uri) + return self.make_request('GET', uri)[1] def set_vault_notifications(self, vault_name, notification_config): """ @@ -217,7 +217,7 @@ def set_vault_notifications(self, vault_name, notification_config): uri = 'vaults/%s/notification-configuration' % vault_name json_config = json.dumps(notification_config) return self.make_request('PUT', uri, data=json_config, - ok_responses=(204,)) + ok_responses=(204,))[1] def delete_vault_notifications(self, vault_name): """ @@ -228,7 +228,7 @@ def delete_vault_notifications(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s/notification-configuration' % vault_name - return self.make_request('DELETE', uri, ok_responses=(204,)) + return self.make_request('DELETE', uri, ok_responses=(204,))[1] # Jobs @@ -267,7 +267,7 @@ def list_jobs(self, vault_name, completed=None, limit=None, specified, jobs with all status codes are returned. """ uri = 'vaults/%s/jobs' % vault_name - return self.make_request('GET', uri) + return self.make_request('GET', uri)[1] def describe_job(self, vault_name, job_id): """ @@ -284,7 +284,7 @@ def describe_job(self, vault_name, job_id): :param job_id: The ID of the job. """ uri = 'vaults/%s/jobs/%s' % (vault_name, job_id) - return self.make_request('GET', uri, ok_responses=(201,)) + return self.make_request('GET', uri, ok_responses=(200,))[1] def initiate_job(self, vault_name, job_data): """ @@ -324,9 +324,9 @@ def initiate_job(self, vault_name, job_data): uri = 'vaults/%s/jobs' % vault_name json_job_data = json.dumps(job_data) return self.make_request('POST', uri, data=json_job_data, - ok_responses=(202,)) + ok_responses=(202,))[1] - def get_job_output(self, vault_name, job_id): + def get_job_output(self, vault_name, job_id, byte_range=None): """ This operation downloads the output of the job you initiated using Initiate a Job. Depending on the job type @@ -346,9 +346,21 @@ def get_job_output(self, vault_name, job_id): :type job_id: str :param job_id: The ID of the job. + + :type byte_range: tuple + :param range: A tuple of integer specifying the slice (in bytes) + of the archive you want to receive """ uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) - return self.make_request('GET', uri) + headers = None + if byte_range: + headers = { 'Range': 'bytes=%s-%s' % (byte_range[0], byte_range[1]) } + header, body = self.make_request('GET', uri, headers=headers, + ok_responses=(200, 206)) + checksum = header.get('x-amz-sha256-tree-hash') + # TODO not sure if we want to verify checksum in this abstraction level + # and do a retry? + return (checksum, body) # Archives @@ -399,4 +411,4 @@ def delete_archive(self, vault_name, archive_id): :param archive_id: The ID for the archive to be deleted. """ uri = 'vaults/%s/archives/%s' % (vault_name, archive_id) - return self.make_request('DELETE', uri, ok_responses=(204,)) + return self.make_request('DELETE', uri, ok_responses=(204,))[1] From 28d4bb3d1bfff08026c899f5d54ac0a3680c9bdc Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sun, 26 Aug 2012 20:19:36 +0200 Subject: [PATCH 08/62] Added some very basic unit tests for glacier layer1 API. --- tests/unit/glacier/__init__.py | 0 tests/unit/glacier/test_layer1.py | 70 +++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/unit/glacier/__init__.py create mode 100644 tests/unit/glacier/test_layer1.py diff --git a/tests/unit/glacier/__init__.py b/tests/unit/glacier/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/glacier/test_layer1.py b/tests/unit/glacier/test_layer1.py new file mode 100644 index 0000000000..c121abf990 --- /dev/null +++ b/tests/unit/glacier/test_layer1.py @@ -0,0 +1,70 @@ +from tests.unit import AWSMockServiceTestCase +from boto.glacier.layer1 import Layer1 +import json + +class GlacierLayer1ConnectionBase(AWSMockServiceTestCase): + connection_class = Layer1 + + def setUp(self): + super(GlacierLayer1ConnectionBase, self).setUp() + self.json_header = [('Content-Type', 'application/json')] + self.vault_name = u'examplevault' + self.vault_arn = 'arn:aws:glacier:us-east-1:012345678901:vaults/' + \ + self.vault_name + self.vault_info = {'CreationDate':'2012-03-16T22:22:47.214Z', + 'LastInventoryDate': '2012-03-21T22:06:51.218Z', + 'NumberOfArchives': 2, + 'SizeInBytes': 12334, + 'VaultARN': self.vault_arn, + 'VaultName': self.vault_name } + +class GlacierVaultsOperations(GlacierLayer1ConnectionBase): + + def test_create_vault_parameters(self): + self.set_http_response(status_code=201) + self.service_connection.create_vault(self.vault_name) + + def test_list_vaults(self): + content = {'Marker': None, + 'VaultList': [self.vault_info]} + self.set_http_response(status_code=200, header=self.json_header, + body=json.dumps(content)) + api_response = self.service_connection.list_vaults() + self.assertDictEqual(content, api_response) + + def test_describe_vaults(self): + content = self.vault_info + self.set_http_response(status_code=200, header=self.json_header, + body=json.dumps(content)) + api_response = self.service_connection.describe_vault(self.vault_name) + self.assertDictEqual(content, api_response) + + def test_delete_vault(self): + self.set_http_response(status_code=204) + self.service_connection.delete_vault(self.vault_name) + +class GlacierJobOperations(GlacierLayer1ConnectionBase): + + def setUp(self): + super(GlacierJobOperations, self).setUp() + self.job_content = 'abc'*1024 + + def test_initiate_archive_job(self): + content = {'Type': 'archive-retrieval', + 'ArchiveId': 'AAABZpJrTyioDC_HsOmHae8EZp_uBSJr6cnGOLKp_XJCl-Q', + 'Description': 'Test Archive', + 'SNSTopic': 'Topic'} + self.set_http_response(status_code=202, header=self.json_header, + body=json.dumps(content)) + api_response = self.service_connection.initiate_job(self.vault_name, + self.job_content) + self.assertDictEqual(content, api_response) + + def test_get_archive_output(self): + header = [('Content-Type', 'application/octet-stream')] + self.set_http_response(status_code=200, header=header, + body=self.job_content) + _, data = self.service_connection.get_job_output(self.vault_name, + 'example-job-id') + self.assertEqual(self.job_content, data) + From 2846ca6d183e4d7b8ae65db8c6e19fa9807e17db Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Mon, 27 Aug 2012 11:24:05 +0100 Subject: [PATCH 09/62] Started work on layer2 for Glacier --- boto/glacier/job.py | 57 +++++++++++++++++++ boto/glacier/layer1.py | 7 +-- boto/glacier/layer2.py | 45 +++++++++++++++ boto/glacier/vault.py | 125 +++++++++++++++++++++++++++++++++++++++++ boto/glacier/writer.py | 125 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 354 insertions(+), 5 deletions(-) create mode 100644 boto/glacier/job.py create mode 100644 boto/glacier/layer2.py create mode 100644 boto/glacier/vault.py create mode 100644 boto/glacier/writer.py diff --git a/boto/glacier/job.py b/boto/glacier/job.py new file mode 100644 index 0000000000..2d7a1f61b8 --- /dev/null +++ b/boto/glacier/job.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import urllib + +class Job(object): + def __init__(self, vault, job_id): + self.vault = vault + self.job_id = job_id + + def make_request(self, verb, resource, headers=None, + data='', ok_responses=(200,)): + resource = "jobs/%s/%s" % (urllib.quote(self.job_id), resource) + return self.vault.make_request(verb,resource, headers, data,ok_responses) + + def get_output(self, range_from=None, range_to=None): + """ + Get the output of a job. In the case of an archive retrieval + job this will be the data of the archive itself. + + Optionally, a range can be specified to only get a part of the data. + + + :type range_from: int + :param range_from: The first byte to get + + :type range_to: int + :param range_to: The last byte to get + + :rtype: :class:`boto.connection.HttpResponse + :return: A response object from which the output can be read. + """ + headers = {} + if range_from is not None or range_to is not None: + assert range_from is not None and range_to is not None, "If you specify one of range_from or range_to you must specify the other" + headers["Range"] = "bytes %d-%d" % (range_from, range_to) + response = self.make_request("GET", "output", headers=headers) + return response diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 71d166db43..2572873206 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -68,13 +68,10 @@ def make_request(self, verb, resource, headers=None, response = AWSAuthConnection.make_request(self, verb, uri, headers=headers, data=data) - body = response.read() if response.status in ok_responses: - if body: - boto.log.debug(body) - body = json.loads(body) - return body + return response else: + body = response.read() msg = 'Expected %s, got (%d, %s)' % (ok_responses, response.status, body) diff --git a/boto/glacier/layer2.py b/boto/glacier/layer2.py new file mode 100644 index 0000000000..dca6d7aa38 --- /dev/null +++ b/boto/glacier/layer2.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from .layer1 import Layer1 +from .vault import Vault +class Layer2(object): + """ + Provides a more pythonic and friendly interface to Glacier based on Layer1 + """ + + def __init__(self, *args, **kwargs): + self.layer1 = Layer1(*args, **kwargs) + + def get_vault(self, name): + """ + Get an object representing a named vault from Glacier. This + operation does not check if the vault actually exists. + + :type name: syr + :param name: The name of the vault + + :rtype: :class:`boto.glaicer.vault.Vault` + :return: A Valut object representing the vault. + """ + return Vault(self.layer1, name) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py new file mode 100644 index 0000000000..4f2df93886 --- /dev/null +++ b/boto/glacier/vault.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from .job import Job +from .writer import Writer +import urllib +import json + +class Vault(object): + DefaultPartSize = 128*1024*1024 #128MB + + def __init__(self, layer1, name): + self.layer1 = layer1 + self.name = name + + def make_request(self, verb, resource, headers=None, + data='', ok_responses=(200,)): + resource = "vaults/%s/%s" % (urllib.quote(self.name), resource) + return self.layer1.make_request(verb,resource, headers, data,ok_responses) + + def create_archive_writer(self, part_size=DefaultPartSize): + """ + Create a new archive and begin a multi-part upload to it. + Returns a file-like object to which the data for the archive + can be written. Once all the data is written the file-like + object should be closed, you can then call the get_archive_id + method on it to get the ID of the created archive. + + :type archive_name: str + :param archive_name: The name of the archive + + :type part_size: int + :param part_size: The part size for the multipart upload. + + :rtype: :class:`boto.glaicer.writer.Writer` + :return: A Writer object that to which the archive data should be written. + """ + + headers = { + "x-amz-part-size": str(part_size) + } + response = self.make_request("POST", "multipart-uploads", headers=headers, ok_responses=(201,)) + upload_id = response.getheader("x-amz-multipart-upload-id") + return Writer(self, upload_id, part_size=part_size) + + def create_archive_from_file(self, file=None, file_obj=None): + """ + Create a new archive and upload the data from the given file + or file-like object. + + :type file: str + :param file: A filename to upload + + :type file_obj: file + :param file_obj: A file-like object to upload + + :rtype: str + :return: The archive id of the newly created archive + """ + if not file_obj: + file_obj = open(file, "rb") + writer = self.create_archive_writer(archive_name) + while True: + data = file_obj.read(1024*1024*4) + if not data: + break + writer.write(data) + writer.close() + return writer.get_archive_id() + + def retrieve_archive(self, archive_name, sns_topic=None, description=None): + """ + Initiate a archive retrieval job to download the data from an + archive. You will need to wait for the notification from + Amazon (via SNS) before you can actually download the data, + this takes around 4 hours. + + :type archive_name: str + :param archive_name: The name of the archive + + :rtype: :class:`boto.glaicer.job.Job` + :return: A Job object representing the retrieval job. + """ + params = {"Type": "archive-retrieval", "ArchiveId": archive_name} + if sns_topic is not None: + params["SNSTopic"] = sns_topic + if description is not None: + params["Description"] = description + + response = self.make_request("POST", "jobs", None, json.dumps(params), ok_responses=(202,)) + job_id = response.getheader("x-amz-job-id") + job = Job(self, job_id) + return job + + def get_job(self, job_id): + """ + Get an object representing a job in progress. + + :type job_id: str + :param job_id: The ID of the job + + :rtype: :class:`boto.glaicer.job.Job` + :return: A Job object representing the job. + """ + return Job(self, job_id) diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py new file mode 100644 index 0000000000..5756b79977 --- /dev/null +++ b/boto/glacier/writer.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import urllib +import hashlib +import math +import json + +def chunk_hashes(str): + """ + Break up the byte-string into 1MB chunks and return sha256 hashes + for each. + """ + chunk = 1024*1024 + chunk_count = int(math.ceil(len(str)/float(chunk))) + chunks = [str[i*chunk:(i+1)*chunk] for i in range(chunk_count)] + return [hashlib.sha256(x).digest() for x in chunks] + +def tree_hash(hashes): + """ + Given a hash of each 1MB chunk (from chunk_hashes) this will hash + together adjacent hashes until it ends up with one big one. So a + tree of hashes. + """ + while len(hashes) > 1: + hashes = [hashlib.sha256("".join(h[i:i+1])).digest() for i in range(i,2)] + return hashes[0] + +def bytes_to_hex(str): + return ''.join( [ "%02x" % ord( x ) for x in str] ).strip() + +class Writer(object): + """ + Presents a file-like object for writing to a Amazon Glacier + Archive. The data is written using the multi-part upload API. + """ + def __init__(self, vault, upload_id, part_size): + self.vault = vault + self.upload_id = upload_id + self.part_size = part_size + self.buffer_size = 0 + self.uploaded_size = 0 + self.buffer = [] + self.vault = vault + self.tree_hashes = [] + self.archive_location = None + self.closed = False + + def make_request(self, verb, headers=None, + data='', ok_responses=(200,)): + resource = "multipart-uploads/%s" % (urllib.quote(self.upload_id),) + return self.vault.make_request(verb, resource, headers, data, ok_responses) + + def send_part(self): + buf = "".join(self.buffer) + # Put back any data remaining over the part size into the + # buffer + if len(buf) < self.part_size: + self.buffer = [buf[self.part_size:]] + self.buffer_size = len(self.buffer[0]) + else: + self.buffer = [] + self.buffer_size = 0 + # The part we will send + part = buf[:self.part_size] + # Create a request and sign it + part_tree_hash = tree_hash(chunk_hashes(part)) + self.tree_hashes.append(part_tree_hash) + + headers = { + "Content-Range": "bytes %d-%d/*" % (self.uploaded_size, (self.uploaded_size+len(part))-1), + "Content-Length": str(len(part)), + "Content-Type": "application/octet-stream", + "x-amz-sha256-tree-hash": bytes_to_hex(part_tree_hash), + "x-amz-content-sha256": hashlib.sha256(part).hexdigest() + } + + repsonse = self.make_request("PUT", headers, part, ok_responses=(204,)) + + self.uploaded_size += len(part) + + def write(self, str): + assert not self.closed, "Tried to write to a Writer that is already closed!" + self.buffer.append(str) + self.buffer_size += len(str) + while self.buffer_size > self.part_size: + self.send_part() + + def close(self): + if self.closed: + return + if self.buffer_size > 0: + self.send_part() + # Complete the multiplart glacier upload + headers = { + "x-amz-sha256-tree-hash": bytes_to_hex(tree_hash(self.tree_hashes)), + "x-amz-archive-size": str(self.uploaded_size) + } + response = self.make_request("POST", headers, ok_responses=(201,)) + self.archive_id = response.getheader("x-amz-archive-id") + self.closed = True + + def get_archive_id(self): + self.close() + return self.archive_id From fb1bb672bc72bd91622a74c82caeadd68336775a Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Mon, 27 Aug 2012 13:14:41 +0100 Subject: [PATCH 10/62] Removed additional indentation that got added just before the last commit --- boto/glacier/writer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 5756b79977..84e8b5e24e 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -116,10 +116,10 @@ def close(self): "x-amz-sha256-tree-hash": bytes_to_hex(tree_hash(self.tree_hashes)), "x-amz-archive-size": str(self.uploaded_size) } - response = self.make_request("POST", headers, ok_responses=(201,)) - self.archive_id = response.getheader("x-amz-archive-id") - self.closed = True + response = self.make_request("POST", headers, ok_responses=(201,)) + self.archive_id = response.getheader("x-amz-archive-id") + self.closed = True - def get_archive_id(self): - self.close() - return self.archive_id + def get_archive_id(self): + self.close() + return self.archive_id From e0c2b16d50575264e3a746297d0668a60b7c2154 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Mon, 27 Aug 2012 20:23:04 +0100 Subject: [PATCH 11/62] Added a more useful exception for bad http responses from Glacier --- boto/glacier/exceptions.py | 18 ++++++++++++++++++ boto/glacier/layer1.py | 12 ++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 boto/glacier/exceptions.py diff --git a/boto/glacier/exceptions.py b/boto/glacier/exceptions.py new file mode 100644 index 0000000000..d62e81f1b8 --- /dev/null +++ b/boto/glacier/exceptions.py @@ -0,0 +1,18 @@ +import json +class UnexpectedHTTPResponseError(Exception): + def __init__(self, expected_responses, response): + self.status = response.status + self.body = response.read() + self.code = None + try: + body = json.loads(self.body) + self.code = body["code"] + msg = 'Expected %s, got (%d, code=%s, message=%s)' % (expected_responses, + response.status, + self.code, + body["message"]) + except: + msg = 'Expected %s, got (%d, %s)' % (expected_responses, + response.status, + self.body) + super(UnexpectedHTTPResponseError, self).__init__(msg) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 2572873206..0b4c0f0e0f 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -25,10 +25,10 @@ import json import boto from boto.connection import AWSAuthConnection +from .exceptions import UnexpectedHTTPResponseError boto.set_stream_logger('glacier') - class Layer1(AWSAuthConnection): DefaultRegionName = 'us-east-1' @@ -40,7 +40,7 @@ class Layer1(AWSAuthConnection): def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, account_id='-', is_secure=True, port=None, proxy=None, proxy_port=None, - debug=2, security_token=None, region=None): + debug=0, security_token=None, region=None): if not region: region_name = boto.config.get('DynamoDB', 'region', self.DefaultRegionName) @@ -53,7 +53,7 @@ def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, self.account_id = account_id AWSAuthConnection.__init__(self, region.endpoint, aws_access_key_id, aws_secret_access_key, - True, port, proxy, proxy_port, debug=debug, + is_secure, port, proxy, proxy_port, debug=debug, security_token=security_token) def _required_auth_capability(self): @@ -71,12 +71,8 @@ def make_request(self, verb, resource, headers=None, if response.status in ok_responses: return response else: - body = response.read() - msg = 'Expected %s, got (%d, %s)' % (ok_responses, - response.status, - body) # create glacier-specific exceptions - raise BaseException(msg) + raise UnexpectedHTTPResponseError(ok_responses, response) # Vaults From f89fa27d6f77183b4652d4c69271c7024cf52962 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Mon, 27 Aug 2012 21:41:52 +0100 Subject: [PATCH 12/62] Fixed hash generation and ranged download --- boto/glacier/job.py | 3 ++- boto/glacier/layer1.py | 2 +- boto/glacier/writer.py | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/boto/glacier/job.py b/boto/glacier/job.py index 2d7a1f61b8..a3e9ffd4b3 100644 --- a/boto/glacier/job.py +++ b/boto/glacier/job.py @@ -21,6 +21,7 @@ # IN THE SOFTWARE. # import urllib +import json class Job(object): def __init__(self, vault, job_id): @@ -52,6 +53,6 @@ def get_output(self, range_from=None, range_to=None): headers = {} if range_from is not None or range_to is not None: assert range_from is not None and range_to is not None, "If you specify one of range_from or range_to you must specify the other" - headers["Range"] = "bytes %d-%d" % (range_from, range_to) + headers["Range"] = "bytes=%d-%d" % (range_from, range_to) response = self.make_request("GET", "output", headers=headers) return response diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 0b4c0f0e0f..288c0fc080 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -27,7 +27,7 @@ from boto.connection import AWSAuthConnection from .exceptions import UnexpectedHTTPResponseError -boto.set_stream_logger('glacier') +#boto.set_stream_logger('glacier') class Layer1(AWSAuthConnection): diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 84e8b5e24e..55e23bd3da 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -43,7 +43,7 @@ def tree_hash(hashes): tree of hashes. """ while len(hashes) > 1: - hashes = [hashlib.sha256("".join(h[i:i+1])).digest() for i in range(i,2)] + hashes = [hashlib.sha256("".join(hashes[i:i+2])).digest() for i in range(0, len(hashes),2)] return hashes[0] def bytes_to_hex(str): @@ -75,7 +75,7 @@ def send_part(self): buf = "".join(self.buffer) # Put back any data remaining over the part size into the # buffer - if len(buf) < self.part_size: + if len(buf) > self.part_size: self.buffer = [buf[self.part_size:]] self.buffer_size = len(self.buffer[0]) else: @@ -101,6 +101,8 @@ def send_part(self): def write(self, str): assert not self.closed, "Tried to write to a Writer that is already closed!" + if str == "": + return self.buffer.append(str) self.buffer_size += len(str) while self.buffer_size > self.part_size: From 0e510d6d51a14259099e5923225f0ef0bf40b7bf Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Mon, 27 Aug 2012 22:17:39 +0100 Subject: [PATCH 13/62] Another fix to tree hash generation --- boto/glacier/vault.py | 2 +- boto/glacier/writer.py | 32 +++++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 4f2df93886..916e27c528 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -27,7 +27,7 @@ import json class Vault(object): - DefaultPartSize = 128*1024*1024 #128MB + DefaultPartSize = 4*1024*1024 #128MB def __init__(self, layer1, name): self.layer1 = layer1 diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 55e23bd3da..9bb9f437f0 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# Some code from Urban Skudnik urban.skudnik@gmail.com # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the @@ -36,15 +37,28 @@ def chunk_hashes(str): chunks = [str[i*chunk:(i+1)*chunk] for i in range(chunk_count)] return [hashlib.sha256(x).digest() for x in chunks] -def tree_hash(hashes): - """ - Given a hash of each 1MB chunk (from chunk_hashes) this will hash - together adjacent hashes until it ends up with one big one. So a - tree of hashes. - """ - while len(hashes) > 1: - hashes = [hashlib.sha256("".join(hashes[i:i+2])).digest() for i in range(0, len(hashes),2)] - return hashes[0] +def tree_hash(fo): + """ + Given a hash of each 1MB chunk (from chunk_hashes) this will hash + together adjacent hashes until it ends up with one big one. So a + tree of hashes. + """ + hashes = [] + hashes.extend(fo) + while len(hashes) > 1: + new_hashes = [] + while True: + if len(hashes) > 1: + first = hashes.pop(0) + second = hashes.pop(0) + new_hashes.append(hashlib.sha256(first + second).digest()) + elif len(hashes) == 1: + only = hashes.pop(0) + new_hashes.append(only) + else: + break + hashes.extend(new_hashes) + return hashes[0] def bytes_to_hex(str): return ''.join( [ "%02x" % ord( x ) for x in str] ).strip() From f32c2d0696d68f935391ee0af73389ac2866fcf0 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Mon, 27 Aug 2012 14:25:54 -0700 Subject: [PATCH 14/62] Merging local changes to Layer1 constructor. --- .gitignore | 1 + boto/glacier/layer1.py | 27 ++++++++++++++------------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index edc76d0ca8..ecf63f949c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ MANIFEST .idea .tox .coverage +*flymake.py \ No newline at end of file diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 8ac83985cb..f90e4c1842 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -31,19 +31,19 @@ class Layer1(AWSAuthConnection): - DefaultRegionName = 'us-east-1' - """The default region to connect to.""" - Version = '2012-06-01' """Glacier API version.""" def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, account_id='-', is_secure=True, port=None, - proxy=None, proxy_port=None, https_connection_factory=None, - debug=2, security_token=None, region=None): + proxy=None, proxy_port=None, + proxy_user=None, proxy_pass=None, debug=2, + https_connection_factory=None, path='/', + provider='aws', security_token=None, + suppress_consec_slashes=True, + region=None, region_name='us-east-1'): + if not region: - region_name = boto.config.get('DynamoDB', 'region', - self.DefaultRegionName) for reg in boto.glacier.regions(): if reg.name == region_name: region = reg @@ -53,10 +53,11 @@ def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, self.account_id = account_id AWSAuthConnection.__init__(self, region.endpoint, aws_access_key_id, aws_secret_access_key, - True, port, proxy, proxy_port, debug=debug, - https_connection_factory=\ + True, port, proxy, proxy_port, + proxy_user, proxy_pass, debug, https_connection_factory, - security_token=security_token) + path, provider, security_token, + suppress_consec_slashes) def _required_auth_capability(self): return ['hmac-v4'] @@ -346,16 +347,16 @@ def get_job_output(self, vault_name, job_id, byte_range=None): :type job_id: str :param job_id: The ID of the job. - + :type byte_range: tuple - :param range: A tuple of integer specifying the slice (in bytes) + :param range: A tuple of integer specifying the slice (in bytes) of the archive you want to receive """ uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) headers = None if byte_range: headers = { 'Range': 'bytes=%s-%s' % (byte_range[0], byte_range[1]) } - header, body = self.make_request('GET', uri, headers=headers, + header, body = self.make_request('GET', uri, headers=headers, ok_responses=(200, 206)) checksum = header.get('x-amz-sha256-tree-hash') # TODO not sure if we want to verify checksum in this abstraction level From e56958d8cb2349f9b356cdd599933a2a9371df3b Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Tue, 28 Aug 2012 10:48:40 +0100 Subject: [PATCH 15/62] Support partial returns from job output (for ranged queries) --- boto/glacier/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/job.py b/boto/glacier/job.py index a3e9ffd4b3..f01ce70b34 100644 --- a/boto/glacier/job.py +++ b/boto/glacier/job.py @@ -54,5 +54,5 @@ def get_output(self, range_from=None, range_to=None): if range_from is not None or range_to is not None: assert range_from is not None and range_to is not None, "If you specify one of range_from or range_to you must specify the other" headers["Range"] = "bytes=%d-%d" % (range_from, range_to) - response = self.make_request("GET", "output", headers=headers) + response = self.make_request("GET", "output", headers=headers, ok_responses=(200,206)) return response From dfb127d85adb0f61a29806f5911cd65ceb64c0c7 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Tue, 28 Aug 2012 10:49:38 +0100 Subject: [PATCH 16/62] Updated credit for tree hash code, I'd put the wrong name! --- boto/glacier/writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 9bb9f437f0..b02be8f303 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ -# Some code from Urban Skudnik urban.skudnik@gmail.com +# Tree hash implementation from Aaron Brady bradya@gmail.com # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the From b57485a984e3e6b77495b862c863678e6eac34e6 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Tue, 28 Aug 2012 12:58:39 -0700 Subject: [PATCH 17/62] Trying to get the layer separation sorted out. Also added multipart operations to layer1. --- boto/glacier/exceptions.py | 32 ++++- boto/glacier/job.py | 69 ++++++---- boto/glacier/layer1.py | 256 +++++++++++++++++++++++++++++++++---- boto/glacier/layer2.py | 22 +++- boto/glacier/vault.py | 94 ++++++++++---- boto/glacier/writer.py | 161 +++++++++++------------ 6 files changed, 465 insertions(+), 169 deletions(-) diff --git a/boto/glacier/exceptions.py b/boto/glacier/exceptions.py index 480129e994..459f0f123f 100644 --- a/boto/glacier/exceptions.py +++ b/boto/glacier/exceptions.py @@ -1,4 +1,25 @@ -import json +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# class UnexpectedHTTPResponseError(Exception): @@ -9,10 +30,11 @@ def __init__(self, expected_responses, response): try: body = json.loads(self.body) self.code = body["code"] - msg = 'Expected %s, got (%d, code=%s, message=%s)' % (expected_responses, - response.status, - self.code, - body["message"]) + msg = 'Expected %s, got ' % expected_responses + msg += '(%d, code=%s, message=%s)' % (expected_responses, + response.status, + self.code, + body["message"]) except: msg = 'Expected %s, got (%d, %s)' % (expected_responses, response.status, diff --git a/boto/glacier/job.py b/boto/glacier/job.py index a3e9ffd4b3..cdb53bc643 100644 --- a/boto/glacier/job.py +++ b/boto/glacier/job.py @@ -23,36 +23,55 @@ import urllib import json + class Job(object): - def __init__(self, vault, job_id): - self.vault = vault - self.job_id = job_id - def make_request(self, verb, resource, headers=None, - data='', ok_responses=(200,)): - resource = "jobs/%s/%s" % (urllib.quote(self.job_id), resource) - return self.vault.make_request(verb,resource, headers, data,ok_responses) + ResponseDataElements = (('Action', 'action', None), + ('ArchiveId', 'archive_id', None), + ('ArchiveSizeInBytes', 'archive_size', 0), + ('Completed', 'completed', False), + ('CompletionDate', 'completion_date', None), + ('CreationDate', 'creation_date', None), + ('InventorySizeInBytes', 'inventory_size', 0), + ('JobDescription', 'description', None), + ('JobId', 'id', None), + ('SHA256TreeHash', 'sha256_treehash', None), + ('SNSTopic', 'sns_topic', None), + ('StatusCode', 'status_code', None), + ('StatusMessage', 'status_message', None), + ('VaultARN', 'arn', None)) - def get_output(self, range_from=None, range_to=None): - """ - Get the output of a job. In the case of an archive retrieval - job this will be the data of the archive itself. + def __init__(self, vault, response_data=None): + self.vault = vault + if response_data: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, response_data[response_name]) + else: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, default) - Optionally, a range can be specified to only get a part of the data. + def __repr__(self): + return 'Job(%s)' % self.arn - - :type range_from: int - :param range_from: The first byte to get + def get_output(self, byte_range=None): + """ + This operation downloads the output of the job. Depending on + the job type you specified when you initiated the job, the + output will be either the content of an archive or a vault + inventory. - :type range_to: int - :param range_to: The last byte to get + You can download all the job output or download a portion of + the output by specifying a byte range. In the case of an + archive retrieval job, depending on the byte range you + specify, Amazon Glacier returns the checksum for the portion + of the data. You can compute the checksum on the client and + verify that the values match to ensure the portion you + downloaded is the correct data. - :rtype: :class:`boto.connection.HttpResponse - :return: A response object from which the output can be read. + :type byte_range: tuple + :param range: A tuple of integer specifying the slice (in bytes) + of the archive you want to receive """ - headers = {} - if range_from is not None or range_to is not None: - assert range_from is not None and range_to is not None, "If you specify one of range_from or range_to you must specify the other" - headers["Range"] = "bytes=%d-%d" % (range_from, range_to) - response = self.make_request("GET", "output", headers=headers) - return response + return self.vault.layer1.get_job_output(self.vault.name, + self.id, + byte_range) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index fc1c3aea71..6b95d4a819 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -64,18 +64,31 @@ def _required_auth_capability(self): return ['hmac-v4'] def make_request(self, verb, resource, headers=None, - data='', ok_responses=(200,)): + data='', ok_responses=(200,), params=None, + response_headers=None): if headers is None: headers = {} headers['x-amz-glacier-version'] = self.Version uri = '/%s/%s' % (self.account_id, resource) + if params: + param_list = [] + for key, value in params: + params_list.append('%s=%s' % (urllib.quote(key), + urllib.quote(value))) + uri += '?' + '&'.join(param_list) response = AWSAuthConnection.make_request(self, verb, uri, headers=headers, data=data) if response.status in ok_responses: - is_json = response.getheader('Content-Type') == 'application/json' - body = json.loads(response.read()) if is_json else response.read() - return dict(response.getheaders()), body + if response.getheader('Content-Type') == 'application/json': + body = json.loads(response.read()) + else: + body = {'Response': response.read()} + body['RequestId'] = response.getheader('x-amzn-requestid') + if response_headers: + for header_name, item_name in response_headers: + body[item_name] = response.getheader(header_name) + return body else: # create glacier-specific exceptions raise UnexpectedHTTPResponseError(ok_responses, response) @@ -114,7 +127,12 @@ def list_vaults(self, limit=None, marker=None): ("") for the marker returns a list of vaults starting from the first vault. """ - return self.make_request('GET', 'vaults')[1] + params = {} + if limit: + params['limit': limit] + if marker: + params['marker': marker] + return self.make_request('GET', 'vaults', params=params) def describe_vault(self, vault_name): """ @@ -133,7 +151,7 @@ def describe_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('GET', uri)[1] + return self.make_request('GET', uri) def create_vault(self, vault_name): """ @@ -158,7 +176,8 @@ def create_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('PUT', uri, ok_responses=(201,))[1] + return self.make_request('PUT', uri, ok_responses=(201,), + response_headers=[('Location', 'Location')]) def delete_vault(self, vault_name): """ @@ -177,7 +196,7 @@ def delete_vault(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s' % vault_name - return self.make_request('DELETE', uri, ok_responses=(204,))[1] + return self.make_request('DELETE', uri, ok_responses=(204,)) def get_vault_notifications(self, vault_name): """ @@ -188,7 +207,7 @@ def get_vault_notifications(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s/notification-configuration' % vault_name - return self.make_request('GET', uri)[1] + return self.make_request('GET', uri) def set_vault_notifications(self, vault_name, notification_config): """ @@ -216,7 +235,7 @@ def set_vault_notifications(self, vault_name, notification_config): uri = 'vaults/%s/notification-configuration' % vault_name json_config = json.dumps(notification_config) return self.make_request('PUT', uri, data=json_config, - ok_responses=(204,))[1] + ok_responses=(204,)) def delete_vault_notifications(self, vault_name): """ @@ -227,12 +246,12 @@ def delete_vault_notifications(self, vault_name): :param vault_name: The name of the new vault """ uri = 'vaults/%s/notification-configuration' % vault_name - return self.make_request('DELETE', uri, ok_responses=(204,))[1] + return self.make_request('DELETE', uri, ok_responses=(204,)) # Jobs - def list_jobs(self, vault_name, completed=None, limit=None, - marker=None, status_code=None): + def list_jobs(self, vault_name, completed=None, status_code=None, + limit=None, marker=None): """ This operation lists jobs for a vault including jobs that are in-progress and jobs that have recently finished. @@ -247,6 +266,11 @@ def list_jobs(self, vault_name, completed=None, limit=None, uncompleted jobs will be returned. If no value is passed, all jobs will be returned. + :type status_code: string + :param status_code: Specifies the type of job status to return. + Valid values are: InProgress|Succeeded|Failed. If not + specified, jobs with all status codes are returned. + :type limit: int :param limit: The maximum number of items returned in the response. If you don't specify a value, the List Jobs @@ -260,13 +284,14 @@ def list_jobs(self, vault_name, completed=None, limit=None, continuing the pagination of results started in a previous List Jobs request. - :type status_code: string - :param status_code: Specifies the type of job status to return. - Valid values are: InProgress|Succeeded|Failed. If not - specified, jobs with all status codes are returned. """ + params = {} + if limit: + params['limit': limit] + if marker: + params['marker': marker] uri = 'vaults/%s/jobs' % vault_name - return self.make_request('GET', uri)[1] + return self.make_request('GET', uri, params=params) def describe_job(self, vault_name, job_id): """ @@ -283,7 +308,7 @@ def describe_job(self, vault_name, job_id): :param job_id: The ID of the job. """ uri = 'vaults/%s/jobs/%s' % (vault_name, job_id) - return self.make_request('GET', uri, ok_responses=(200,))[1] + return self.make_request('GET', uri, ok_responses=(200,)) def initiate_job(self, vault_name, job_data): """ @@ -321,9 +346,12 @@ def initiate_job(self, vault_name, job_data): archive-retrieval|inventory-retrieval """ uri = 'vaults/%s/jobs' % vault_name + response_headers = [('x-amz-job-id', 'JobId'), + ('Location', 'Location')] json_job_data = json.dumps(job_data) return self.make_request('POST', uri, data=json_job_data, - ok_responses=(202,))[1] + ok_responses=(202,), + response_headers=response_headers) def get_job_output(self, vault_name, job_id, byte_range=None): """ @@ -347,13 +375,14 @@ def get_job_output(self, vault_name, job_id, byte_range=None): :param job_id: The ID of the job. :type byte_range: tuple - :param range: A tuple of integer specifying the slice (in bytes) + :param range: A tuple of integers specifying the slice (in bytes) of the archive you want to receive """ uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) headers = None if byte_range: - headers = { 'Range': 'bytes=%s-%s' % (byte_range[0], byte_range[1]) } + headers = {'Range': 'bytes=%d-%d' % (byte_range[0], + byte_range[1])} header, body = self.make_request('GET', uri, headers=headers, ok_responses=(200, 206)) checksum = header.get('x-amz-sha256-tree-hash') @@ -410,4 +439,185 @@ def delete_archive(self, vault_name, archive_id): :param archive_id: The ID for the archive to be deleted. """ uri = 'vaults/%s/archives/%s' % (vault_name, archive_id) - return self.make_request('DELETE', uri, ok_responses=(204,))[1] + return self.make_request('DELETE', uri, ok_responses=(204,)) + + # Multipart + + def initiate_multipart_upload(self, vault_name, part_size, + description=None): + """ + Initiate a multipart upload. Amazon Glacier creates a + multipart upload resource and returns it's ID. You use this + ID in subsequent multipart upload operations. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type description: str + :param description: An optional description of the archive. + + :type part_size: int + :param part_size: The size of each part except the last, in bytes. + The part size must be a multiple of 1024 KB multiplied by + a power of 2. The minimum allowable part size is 1MB and the + maximum is 4GB. + """ + response_headers = [('x-amz-multipart-upload-id', 'UploadId'), + ('Location', 'Location')] + headers = {'x-amz-part-size': str(part_size)} + if description: + headers['x-amz-archive-description'] = description + uri = 'vaults/%s/%s/multipart-uploads' % vault_name + response = self.make_request('POST', uri, headers=headers, + ok_responses=(201,), + response_headers=response_headers) + return response + + def complete_multipart_upload(self, vault_name, upload_id, + sha256_treehash, archive_size): + """ + Call this to inform Amazon Glacier that all of the archive parts + have been uploaded and Amazon Glacier can now assemble the archive + from the uploaded parts. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + + :type sha256_treehash: str + :param sha256_treehash: The SHA256 tree hash of the entire + archive. It is the tree hash of SHA256 tree hash of the + individual parts. If the value you specify in the request + does not match the SHA256 tree hash of the final assembled + archive as computed by Amazon Glacier, Amazon Glacier + returns an error and the request fails. + + :type archive_size: int + :param archive_size: The total size, in bytes, of the entire + archive. This value should be the sum of all the sizes of + the individual parts that you uploaded. + """ + response_headers = [('x-amz-archive-id', 'ArchiveId'), + ('Location', 'Location')] + headers = {'x-amz-sha256-tree-hash': sha256_treehash, + 'x-amz-archive-size': str(part_size)} + uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + response = self.make_request('POST', uri, headers=headers, + ok_responses=(201,), + response_headers=response_headers) + return response + + def abort_multipart_upload(self, vault_name, upload_id): + """ + Call this to abort a multipart upload identified by the upload ID. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + """ + uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + return self.make_request('DELETE', uri, ok_responses=(204,)) + + def list_multipart_uploads(self, vault_name, limit=None, marker=None): + """ + Lists in-progress multipart uploads for the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the operation + returns up to 1,000 items. + + :type marker: str + :param marker: An opaque string used for pagination. marker + specifies the item at which the listing should + begin. Get the marker value from a previous + response. You need only include the marker if you are + continuing the pagination of results started in a previous + request. + """ + params = {} + if limit: + params['limit': limit] + if marker: + params['marker': marker] + uri = 'vaults/%s/%s/multipart-uploads' % vault_name + return self.make_request('GET', uri, params=params) + + def list_parts(self, vault_name, upload_id, limit=None, marker=None): + """ + Lists in-progress multipart uploads for the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + + :type limit: int + :param limit: The maximum number of items returned in the + response. If you don't specify a value, the operation + returns up to 1,000 items. + + :type marker: str + :param marker: An opaque string used for pagination. marker + specifies the item at which the listing should + begin. Get the marker value from a previous + response. You need only include the marker if you are + continuing the pagination of results started in a previous + request. + """ + params = {} + if limit: + params['limit': limit] + if marker: + params['marker': marker] + uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + return self.make_request('GET', uri, params=params) + + def upload_part(self, vault_name, upload_id, linear_hash, + treehash, range, part_data): + """ + Lists in-progress multipart uploads for the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type linear_hash: str + :param linear_hash: The SHA256 checksum (a linear hash) of the + payload. + + :type tree_hash: str + :param tree_hash: The user-computed SHA256 tree hash of the + payload. For more information on computing the + tree hash, see http://goo.gl/u7chF. + + :type upload_id: str + :param upload_id: The unique ID associated with this upload + operation. + + :type range: tuple of ints + :param range: Identfies the range of bytes in the assembled + archive that will be uploaded in this part. + + :type part_data: bytes + :param part_data: The data to be uploaded for the part + """ + headers = {'x-amz-content-sha256': linear_hash, + 'x-amz-sha256-tree-hash': tree_hash, + 'Content-Range': 'bytes=%d-%d' % (byte_range[0], + byte_range[1])} + response_headers = [('x-amz-sha256-tree-hash', 'TreeHash')] + uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + return self.make_request('PUT', uri, headers=headers, + data=part_data, ok_responses=(204,), + response_headers=response_headers) diff --git a/boto/glacier/layer2.py b/boto/glacier/layer2.py index a5fee1c062..294480383d 100644 --- a/boto/glacier/layer2.py +++ b/boto/glacier/layer2.py @@ -33,15 +33,29 @@ class Layer2(object): def __init__(self, *args, **kwargs): self.layer1 = Layer1(*args, **kwargs) + def create_vault(self, name): + return self.layer1.create_vault(name) + def get_vault(self, name): """ Get an object representing a named vault from Glacier. This operation does not check if the vault actually exists. - :type name: syr + :type name: str :param name: The name of the vault - :rtype: :class:`boto.glaicer.vault.Vault` - :return: A Valut object representing the vault. + :rtype: :class:`boto.glacier.vault.Vault` + :return: A Vault object representing the vault. + """ + response_data = self.layer1.describe_vault(name) + return Vault(self.layer1, response_data) + + def list_vaults(self): + """ + Return a list of all vaults associated with the account ID. + + :rtype: List of :class:`boto.glacier.vault.Vault` + :return: A list of Vault objects. """ - return Vault(self.layer1, name) + response_data = self.layer1.list_vaults() + return [Vault(self.layer1, rd) for rd in response_data['VaultList']] diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 3a67afd00c..07ca2853b4 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -31,15 +31,24 @@ class Vault(object): DefaultPartSize = 4 * 1024 * 1024 #128MB - def __init__(self, layer1, name): + ResponseDataElements = (('VaultName', 'name', None), + ('VaultARN', 'arn', None), + ('CreationDate', 'creation_date', None), + ('LastInventoryDate', 'last_inventory_date', None), + ('SizeInBytes', 'size', 0), + ('NumberOfArchives', 'number_of_archives', 0)) + + def __init__(self, layer1, response_data=None): self.layer1 = layer1 - self.name = name + if response_data: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, response_data[response_name]) + else: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, default) - def make_request(self, verb, resource, headers=None, - data='', ok_responses=(200,)): - resource = "vaults/%s/%s" % (urllib.quote(self.name), resource) - return self.layer1.make_request(verb,resource, headers, - data,ok_responses) + def __repr__(self): + return 'Vault("%s")' % self.arn def create_archive_writer(self, part_size=DefaultPartSize): """ @@ -59,14 +68,10 @@ def create_archive_writer(self, part_size=DefaultPartSize): :return: A Writer object that to which the archive data should be written. """ - - headers = { - "x-amz-part-size": str(part_size) - } - response = self.make_request("POST", "multipart-uploads", - headers=headers, ok_responses=(201,)) - upload_id = response.getheader("x-amz-multipart-upload-id") - return Writer(self, upload_id, part_size=part_size) + response = self.layer1.initiate_multipart_upload(self.name, + part_size, + description) + return Writer(self, response['UploadId'], part_size=part_size) def create_archive_from_file(self, file=None, file_obj=None): """ @@ -93,7 +98,8 @@ def create_archive_from_file(self, file=None, file_obj=None): writer.close() return writer.get_archive_id() - def retrieve_archive(self, archive_name, sns_topic=None, description=None): + def retrieve_archive(self, archive_name, sns_topic=None, + description=None, format='JSON'): """ Initiate a archive retrieval job to download the data from an archive. You will need to wait for the notification from @@ -103,21 +109,31 @@ def retrieve_archive(self, archive_name, sns_topic=None, description=None): :type archive_name: str :param archive_name: The name of the archive - :rtype: :class:`boto.glaicer.job.Job` + :type description: str + :param description: An optional description for the job. + + :type sns_topic: str + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier + sends notification when the job is completed and the output + is ready for you to download. + + :type format: str + :param format: Specify the output format. Valid values are: + CSV|JSON. Default is JSON. + + :rtype: :class:`boto.glacier.job.Job` :return: A Job object representing the retrieval job. """ - params = {"Type": "archive-retrieval", "ArchiveId": archive_name} + job_data = {'Type': 'archive-retrieval', + 'ArchiveId': archive_name, + 'Format': format} if sns_topic is not None: - params["SNSTopic"] = sns_topic + job_data['SNSTopic'] = sns_topic if description is not None: - params["Description"] = description + job_data['Description'] = description - response = self.make_request("POST", "jobs", None, - json.dumps(params), - ok_responses=(202,)) - job_id = response.getheader("x-amz-job-id") - job = Job(self, job_id) - return job + response = self.layer1.initiate_job(self.name, job_data) + return response['JobId'] def get_job(self, job_id): """ @@ -129,4 +145,28 @@ def get_job(self, job_id): :rtype: :class:`boto.glaicer.job.Job` :return: A Job object representing the job. """ - return Job(self, job_id) + response_data = self.layer1.describe_job(job_id) + return Job(self, response_data) + + def list_jobs(self, completed=None, status_code=None): + """ + Return a list of Job objects related to this vault. + + :type completed: boolean + :param completed: Specifies the state of the jobs to return. + If a value of True is passed, only completed jobs will + be returned. If a value of False is passed, only + uncompleted jobs will be returned. If no value is + passed, all jobs will be returned. + + :type status_code: string + :param status_code: Specifies the type of job status to return. + Valid values are: InProgress|Succeeded|Failed. If not + specified, jobs with all status codes are returned. + + :rtype: list of :class:`boto.glaicer.job.Job` + :return: A list of Job objects related to this vault. + """ + response_data = self.layer1.list_jobs(self.name, completed, + status_code) + return [Job(self, jd) for jd in response_data['JobList']] diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 95a50829c4..0a8eea0931 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -29,14 +29,14 @@ def chunk_hashes(str): - """ - Break up the byte-string into 1MB chunks and return sha256 hashes - for each. - """ - chunk = 1024 * 1024 - chunk_count = int(math.ceil(len(str) / float(chunk))) - chunks = [str[i * chunk:(i + 1) * chunk] for i in range(chunk_count)] - return [hashlib.sha256(x).digest() for x in chunks] + """ + Break up the byte-string into 1MB chunks and return sha256 hashes + for each. + """ + chunk = 1024 * 1024 + chunk_count = int(math.ceil(len(str) / float(chunk))) + chunks = [str[i * chunk:(i + 1) * chunk] for i in range(chunk_count)] + return [hashlib.sha256(x).digest() for x in chunks] def tree_hash(fo): @@ -64,83 +64,74 @@ def tree_hash(fo): def bytes_to_hex(str): - return ''.join(["%02x" % ord(x) for x in str]).strip() + return ''.join(["%02x" % ord(x) for x in str]).strip() class Writer(object): - """ - Presents a file-like object for writing to a Amazon Glacier - Archive. The data is written using the multi-part upload API. - """ - def __init__(self, vault, upload_id, part_size): - self.vault = vault - self.upload_id = upload_id - self.part_size = part_size - self.buffer_size = 0 - self.uploaded_size = 0 - self.buffer = [] - self.vault = vault - self.tree_hashes = [] - self.archive_location = None - self.closed = False - - def make_request(self, verb, headers=None, - data='', ok_responses=(200,)): - resource = "multipart-uploads/%s" % (urllib.quote(self.upload_id),) - return self.vault.make_request(verb, resource, headers, data, - ok_responses) - - def send_part(self): - buf = "".join(self.buffer) - # Put back any data remaining over the part size into the - # buffer - if len(buf) > self.part_size: - self.buffer = [buf[self.part_size:]] - self.buffer_size = len(self.buffer[0]) - else: - self.buffer = [] - self.buffer_size = 0 - # The part we will send - part = buf[:self.part_size] - # Create a request and sign it - part_tree_hash = tree_hash(chunk_hashes(part)) - self.tree_hashes.append(part_tree_hash) - - headers = { - "Content-Range": "bytes %d-%d/*" % (self.uploaded_size, (self.uploaded_size+len(part))-1), - "Content-Length": str(len(part)), - "Content-Type": "application/octet-stream", - "x-amz-sha256-tree-hash": bytes_to_hex(part_tree_hash), - "x-amz-content-sha256": hashlib.sha256(part).hexdigest() - } - - repsonse = self.make_request("PUT", headers, part, ok_responses=(204,)) - - self.uploaded_size += len(part) - - def write(self, str): - assert not self.closed, "Tried to write to a Writer that is already closed!" - if str == "": - return - self.buffer.append(str) - self.buffer_size += len(str) - while self.buffer_size > self.part_size: - self.send_part() - - def close(self): - if self.closed: - return - if self.buffer_size > 0: - self.send_part() - # Complete the multiplart glacier upload - headers = { - "x-amz-sha256-tree-hash": bytes_to_hex(tree_hash(self.tree_hashes)), - "x-amz-archive-size": str(self.uploaded_size) - } - response = self.make_request("POST", headers, ok_responses=(201,)) - self.archive_id = response.getheader("x-amz-archive-id") - self.closed = True - - def get_archive_id(self): - self.close() - return self.archive_id + """ + Presents a file-like object for writing to a Amazon Glacier + Archive. The data is written using the multi-part upload API. + """ + def __init__(self, vault, upload_id, part_size): + self.vault = vault + self.upload_id = upload_id + self.part_size = part_size + self.buffer_size = 0 + self.uploaded_size = 0 + self.buffer = [] + self.vault = vault + self.tree_hashes = [] + self.archive_location = None + self.closed = False + + def send_part(self): + buf = "".join(self.buffer) + # Put back any data remaining over the part size into the + # buffer + if len(buf) > self.part_size: + self.buffer = [buf[self.part_size:]] + self.buffer_size = len(self.buffer[0]) + else: + self.buffer = [] + self.buffer_size = 0 + # The part we will send + part = buf[:self.part_size] + # Create a request and sign it + part_tree_hash = tree_hash(chunk_hashes(part)) + self.tree_hashes.append(part_tree_hash) + + tree_hash = bytes_to_hex(part_tree_hash) + linear_hash = hashlib.sha256(part).hexdigest() + content_range = (self.uploaded_size, + (self.uploaded_size+len(part))-1) + response = self.vault.layer1.upload_part(self.vault.name, + self.upload_id, + linear_hash, + tree_hash, + content_range, part) + self.uploaded_size += len(part) + + def write(self, str): + assert not self.closed, "Tried to write to a Writer that is already closed!" + if str == "": + return + self.buffer.append(str) + self.buffer_size += len(str) + while self.buffer_size > self.part_size: + self.send_part() + + def close(self): + if self.closed: + return + if self.buffer_size > 0: + self.send_part() + # Complete the multiplart glacier upload + tree_hash = bytes_to_hex(tree_hash(self.tree_hashes)) + response = self.vault.layer1.complete_multipart_upload(tree_hash, + self.uploaded_size) + self.archive_id = response['ArchiveId'] + self.closed = True + + def get_archive_id(self): + self.close() + return self.archive_id From 54c84d14bc79c79e55f58a673dc77ebb1a33aa62 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Tue, 28 Aug 2012 14:14:10 -0700 Subject: [PATCH 18/62] Fixed a bug in get_job_output. It was still expecting the old-style response from make_request. --- boto/glacier/layer1.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 6b95d4a819..ff701642cf 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -378,17 +378,20 @@ def get_job_output(self, vault_name, job_id, byte_range=None): :param range: A tuple of integers specifying the slice (in bytes) of the archive you want to receive """ - uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) + response_headers = [('x-amz-sha256-tree-hash', 'TreeHash'), + ('Content-Range': 'ContentRange'), + ('Content-Type': 'ContentType')] headers = None if byte_range: headers = {'Range': 'bytes=%d-%d' % (byte_range[0], byte_range[1])} - header, body = self.make_request('GET', uri, headers=headers, - ok_responses=(200, 206)) - checksum = header.get('x-amz-sha256-tree-hash') + uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) + response = self.make_request('GET', uri, headers=headers, + ok_responses=(200, 206), + response_headers=response_headers) # TODO not sure if we want to verify checksum in this abstraction level # and do a retry? - return (checksum, body) + return response # Archives From b55c4f12dc99787b83a8fc11d306882d081f1b46 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Tue, 28 Aug 2012 14:48:31 -0700 Subject: [PATCH 19/62] Fixed a typo bug. --- boto/glacier/layer1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index ff701642cf..aacfd9bfdf 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -379,8 +379,8 @@ def get_job_output(self, vault_name, job_id, byte_range=None): of the archive you want to receive """ response_headers = [('x-amz-sha256-tree-hash', 'TreeHash'), - ('Content-Range': 'ContentRange'), - ('Content-Type': 'ContentType')] + ('Content-Range', 'ContentRange'), + ('Content-Type', 'ContentType')] headers = None if byte_range: headers = {'Range': 'bytes=%d-%d' % (byte_range[0], From 54914f41a072e8ed823789377a2816e145b80926 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Tue, 28 Aug 2012 16:51:43 -0700 Subject: [PATCH 20/62] Fixing typos. Fixes #946. --- boto/glacier/layer1.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index aacfd9bfdf..a0533ab09b 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -470,7 +470,7 @@ def initiate_multipart_upload(self, vault_name, part_size, headers = {'x-amz-part-size': str(part_size)} if description: headers['x-amz-archive-description'] = description - uri = 'vaults/%s/%s/multipart-uploads' % vault_name + uri = 'vaults/%s/multipart-uploads' % vault_name response = self.make_request('POST', uri, headers=headers, ok_responses=(201,), response_headers=response_headers) @@ -506,8 +506,8 @@ def complete_multipart_upload(self, vault_name, upload_id, response_headers = [('x-amz-archive-id', 'ArchiveId'), ('Location', 'Location')] headers = {'x-amz-sha256-tree-hash': sha256_treehash, - 'x-amz-archive-size': str(part_size)} - uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + 'x-amz-archive-size': str(archive_size)} + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) response = self.make_request('POST', uri, headers=headers, ok_responses=(201,), response_headers=response_headers) @@ -588,7 +588,7 @@ def list_parts(self, vault_name, upload_id, limit=None, marker=None): return self.make_request('GET', uri, params=params) def upload_part(self, vault_name, upload_id, linear_hash, - treehash, range, part_data): + tree_hash, byte_range, part_data): """ Lists in-progress multipart uploads for the specified vault. @@ -608,8 +608,8 @@ def upload_part(self, vault_name, upload_id, linear_hash, :param upload_id: The unique ID associated with this upload operation. - :type range: tuple of ints - :param range: Identfies the range of bytes in the assembled + :type byte_range: tuple of ints + :param byte_range: Identfies the range of bytes in the assembled archive that will be uploaded in this part. :type part_data: bytes @@ -620,7 +620,7 @@ def upload_part(self, vault_name, upload_id, linear_hash, 'Content-Range': 'bytes=%d-%d' % (byte_range[0], byte_range[1])} response_headers = [('x-amz-sha256-tree-hash', 'TreeHash')] - uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('PUT', uri, headers=headers, data=part_data, ok_responses=(204,), response_headers=response_headers) From 05ff4258541684c4c6aa36ddcdd112a6f17c68d2 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Tue, 28 Aug 2012 17:21:14 -0700 Subject: [PATCH 21/62] Fixing unit tests to work again. --- boto/glacier/layer1.py | 26 ++++++++-------- tests/unit/glacier/test_layer1.py | 50 ++++++++++++++++++------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index a0533ab09b..b981bb4271 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -83,8 +83,8 @@ def make_request(self, verb, resource, headers=None, if response.getheader('Content-Type') == 'application/json': body = json.loads(response.read()) else: - body = {'Response': response.read()} - body['RequestId'] = response.getheader('x-amzn-requestid') + body = {u'Response': response.read()} + body[u'RequestId'] = response.getheader('x-amzn-requestid') if response_headers: for header_name, item_name in response_headers: body[item_name] = response.getheader(header_name) @@ -177,7 +177,7 @@ def create_vault(self, vault_name): """ uri = 'vaults/%s' % vault_name return self.make_request('PUT', uri, ok_responses=(201,), - response_headers=[('Location', 'Location')]) + response_headers=[('uLocation', 'uLocation')]) def delete_vault(self, vault_name): """ @@ -346,8 +346,8 @@ def initiate_job(self, vault_name, job_data): archive-retrieval|inventory-retrieval """ uri = 'vaults/%s/jobs' % vault_name - response_headers = [('x-amz-job-id', 'JobId'), - ('Location', 'Location')] + response_headers = [('x-amz-job-id', u'JobId'), + ('Location', u'Location')] json_job_data = json.dumps(job_data) return self.make_request('POST', uri, data=json_job_data, ok_responses=(202,), @@ -378,9 +378,9 @@ def get_job_output(self, vault_name, job_id, byte_range=None): :param range: A tuple of integers specifying the slice (in bytes) of the archive you want to receive """ - response_headers = [('x-amz-sha256-tree-hash', 'TreeHash'), - ('Content-Range', 'ContentRange'), - ('Content-Type', 'ContentType')] + response_headers = [('x-amz-sha256-tree-hash', u'TreeHash'), + ('Content-Range', u'ContentRange'), + ('Content-Type', u'ContentType')] headers = None if byte_range: headers = {'Range': 'bytes=%d-%d' % (byte_range[0], @@ -465,8 +465,8 @@ def initiate_multipart_upload(self, vault_name, part_size, a power of 2. The minimum allowable part size is 1MB and the maximum is 4GB. """ - response_headers = [('x-amz-multipart-upload-id', 'UploadId'), - ('Location', 'Location')] + response_headers = [('x-amz-multipart-upload-id', u'UploadId'), + ('Location', u'Location')] headers = {'x-amz-part-size': str(part_size)} if description: headers['x-amz-archive-description'] = description @@ -503,8 +503,8 @@ def complete_multipart_upload(self, vault_name, upload_id, archive. This value should be the sum of all the sizes of the individual parts that you uploaded. """ - response_headers = [('x-amz-archive-id', 'ArchiveId'), - ('Location', 'Location')] + response_headers = [('x-amz-archive-id', u'ArchiveId'), + ('Location', u'Location')] headers = {'x-amz-sha256-tree-hash': sha256_treehash, 'x-amz-archive-size': str(archive_size)} uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) @@ -619,7 +619,7 @@ def upload_part(self, vault_name, upload_id, linear_hash, 'x-amz-sha256-tree-hash': tree_hash, 'Content-Range': 'bytes=%d-%d' % (byte_range[0], byte_range[1])} - response_headers = [('x-amz-sha256-tree-hash', 'TreeHash')] + response_headers = [('x-amz-sha256-tree-hash', u'TreeHash')] uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('PUT', uri, headers=headers, data=part_data, ok_responses=(204,), diff --git a/tests/unit/glacier/test_layer1.py b/tests/unit/glacier/test_layer1.py index c121abf990..afad423881 100644 --- a/tests/unit/glacier/test_layer1.py +++ b/tests/unit/glacier/test_layer1.py @@ -1,6 +1,8 @@ from tests.unit import AWSMockServiceTestCase from boto.glacier.layer1 import Layer1 import json +import copy + class GlacierLayer1ConnectionBase(AWSMockServiceTestCase): connection_class = Layer1 @@ -11,12 +13,13 @@ def setUp(self): self.vault_name = u'examplevault' self.vault_arn = 'arn:aws:glacier:us-east-1:012345678901:vaults/' + \ self.vault_name - self.vault_info = {'CreationDate':'2012-03-16T22:22:47.214Z', - 'LastInventoryDate': '2012-03-21T22:06:51.218Z', - 'NumberOfArchives': 2, - 'SizeInBytes': 12334, - 'VaultARN': self.vault_arn, - 'VaultName': self.vault_name } + self.vault_info = {u'CreationDate': u'2012-03-16T22:22:47.214Z', + u'LastInventoryDate': u'2012-03-21T22:06:51.218Z', + u'NumberOfArchives': 2, + u'SizeInBytes': 12334, + u'VaultARN': self.vault_arn, + u'VaultName': self.vault_name} + class GlacierVaultsOperations(GlacierLayer1ConnectionBase): @@ -25,16 +28,18 @@ def test_create_vault_parameters(self): self.service_connection.create_vault(self.vault_name) def test_list_vaults(self): - content = {'Marker': None, - 'VaultList': [self.vault_info]} - self.set_http_response(status_code=200, header=self.json_header, + content = {u'Marker': None, + u'RequestId': None, + u'VaultList': [self.vault_info]} + self.set_http_response(status_code=200, header=self.json_header, body=json.dumps(content)) api_response = self.service_connection.list_vaults() self.assertDictEqual(content, api_response) def test_describe_vaults(self): - content = self.vault_info - self.set_http_response(status_code=200, header=self.json_header, + content = copy.copy(self.vault_info) + content[u'RequestId'] = None + self.set_http_response(status_code=200, header=self.json_header, body=json.dumps(content)) api_response = self.service_connection.describe_vault(self.vault_name) self.assertDictEqual(content, api_response) @@ -42,29 +47,32 @@ def test_describe_vaults(self): def test_delete_vault(self): self.set_http_response(status_code=204) self.service_connection.delete_vault(self.vault_name) - + + class GlacierJobOperations(GlacierLayer1ConnectionBase): def setUp(self): super(GlacierJobOperations, self).setUp() - self.job_content = 'abc'*1024 + self.job_content = 'abc' * 1024 def test_initiate_archive_job(self): - content = {'Type': 'archive-retrieval', - 'ArchiveId': 'AAABZpJrTyioDC_HsOmHae8EZp_uBSJr6cnGOLKp_XJCl-Q', - 'Description': 'Test Archive', - 'SNSTopic': 'Topic'} + content = {u'Type': u'archive-retrieval', + u'ArchiveId': u'AAABZpJrTyioDC_HsOmHae8EZp_uBSJr6cnGOLKp_XJCl-Q', + u'Description': u'Test Archive', + u'SNSTopic': u'Topic', + u'JobId': None, + u'Location': None, + u'RequestId': None} self.set_http_response(status_code=202, header=self.json_header, body=json.dumps(content)) api_response = self.service_connection.initiate_job(self.vault_name, self.job_content) self.assertDictEqual(content, api_response) - + def test_get_archive_output(self): header = [('Content-Type', 'application/octet-stream')] self.set_http_response(status_code=200, header=header, body=self.job_content) - _, data = self.service_connection.get_job_output(self.vault_name, + response = self.service_connection.get_job_output(self.vault_name, 'example-job-id') - self.assertEqual(self.job_content, data) - + self.assertEqual(self.job_content, response['Response']) From 24d13c7ae6091791407798ed14ab48708399387f Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Wed, 29 Aug 2012 09:51:53 -0700 Subject: [PATCH 22/62] I have the layer1 upload_archive method working now, although it's difficult to test since archive inventories are only constructed once a day in Glacier. Have not been able to get the Writer to work at all yet. --- boto/glacier/layer1.py | 6 +++++- boto/glacier/vault.py | 8 +++----- boto/glacier/writer.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index b981bb4271..7f2faada8a 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -422,6 +422,9 @@ def upload_archive(self, vault_name, archive, :type description: str :param description: An optional description of the archive. """ + response_headers = [('x-amz-archive-id', u'ArchiveId'), + ('Location', u'Location'), + ('x-amz-sha256-tree-hash', u'TreeHash')] uri = 'vaults/%s/archives' % vault_name headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, @@ -429,7 +432,8 @@ def upload_archive(self, vault_name, archive, if description: headers['x-amz-archive-description'] = description return self.make_request('POST', uri, headers=headers, - data=archive, ok_responses=(201,)) + data=archive, ok_responses=(201,), + response_headers=response_headers) def delete_archive(self, vault_name, archive_id): """ diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 07ca2853b4..54d5341f13 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -50,7 +50,8 @@ def __init__(self, layer1, response_data=None): def __repr__(self): return 'Vault("%s")' % self.arn - def create_archive_writer(self, part_size=DefaultPartSize): + def create_archive_writer(self, part_size=DefaultPartSize, + description=None): """ Create a new archive and begin a multi-part upload to it. Returns a file-like object to which the data for the archive @@ -58,9 +59,6 @@ def create_archive_writer(self, part_size=DefaultPartSize): object should be closed, you can then call the get_archive_id method on it to get the ID of the created archive. - :type archive_name: str - :param archive_name: The name of the archive - :type part_size: int :param part_size: The part size for the multipart upload. @@ -89,7 +87,7 @@ def create_archive_from_file(self, file=None, file_obj=None): """ if not file_obj: file_obj = open(file, "rb") - writer = self.create_archive_writer(archive_name) + writer = self.create_archive_writer() while True: data = file_obj.read(1024 * 1024 * 4) if not data: diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 1713071332..e9ab676ce9 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -100,14 +100,14 @@ def send_part(self): part_tree_hash = tree_hash(chunk_hashes(part)) self.tree_hashes.append(part_tree_hash) - tree_hash = bytes_to_hex(part_tree_hash) + hex_tree_hash = bytes_to_hex(part_tree_hash) linear_hash = hashlib.sha256(part).hexdigest() content_range = (self.uploaded_size, (self.uploaded_size+len(part))-1) response = self.vault.layer1.upload_part(self.vault.name, self.upload_id, linear_hash, - tree_hash, + hex_tree_hash, content_range, part) self.uploaded_size += len(part) From b5ae5c805c984bd8a2db23293c536e56318ddd2e Mon Sep 17 00:00:00 2001 From: moliware Date: Fri, 31 Aug 2012 12:41:08 +0200 Subject: [PATCH 23/62] Single operation upload for Vault class. Also a method that decides the upload method (single operation / multipart) --- boto/glacier/layer1.py | 32 ++++++++++++++-------------- boto/glacier/vault.py | 47 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 7f2faada8a..9314f2a0ba 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -83,8 +83,8 @@ def make_request(self, verb, resource, headers=None, if response.getheader('Content-Type') == 'application/json': body = json.loads(response.read()) else: - body = {u'Response': response.read()} - body[u'RequestId'] = response.getheader('x-amzn-requestid') + body = {'Response': response.read()} + body['RequestId'] = response.getheader('x-amzn-requestid') if response_headers: for header_name, item_name in response_headers: body[item_name] = response.getheader(header_name) @@ -346,8 +346,8 @@ def initiate_job(self, vault_name, job_data): archive-retrieval|inventory-retrieval """ uri = 'vaults/%s/jobs' % vault_name - response_headers = [('x-amz-job-id', u'JobId'), - ('Location', u'Location')] + response_headers = [('x-amz-job-id', 'JobId'), + ('Location', 'Location')] json_job_data = json.dumps(job_data) return self.make_request('POST', uri, data=json_job_data, ok_responses=(202,), @@ -378,9 +378,9 @@ def get_job_output(self, vault_name, job_id, byte_range=None): :param range: A tuple of integers specifying the slice (in bytes) of the archive you want to receive """ - response_headers = [('x-amz-sha256-tree-hash', u'TreeHash'), - ('Content-Range', u'ContentRange'), - ('Content-Type', u'ContentType')] + response_headers = [('x-amz-sha256-tree-hash', 'TreeHash'), + ('Content-Range', 'ContentRange'), + ('Content-Type', 'ContentType')] headers = None if byte_range: headers = {'Range': 'bytes=%d-%d' % (byte_range[0], @@ -422,9 +422,9 @@ def upload_archive(self, vault_name, archive, :type description: str :param description: An optional description of the archive. """ - response_headers = [('x-amz-archive-id', u'ArchiveId'), - ('Location', u'Location'), - ('x-amz-sha256-tree-hash', u'TreeHash')] + response_headers = [('x-amz-archive-id', 'ArchiveId'), + ('Location', 'Location'), + ('x-amz-sha256-tree-hash', 'TreeHash')] uri = 'vaults/%s/archives' % vault_name headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, @@ -469,8 +469,8 @@ def initiate_multipart_upload(self, vault_name, part_size, a power of 2. The minimum allowable part size is 1MB and the maximum is 4GB. """ - response_headers = [('x-amz-multipart-upload-id', u'UploadId'), - ('Location', u'Location')] + response_headers = [('x-amz-multipart-upload-id', 'UploadId'), + ('Location', 'Location')] headers = {'x-amz-part-size': str(part_size)} if description: headers['x-amz-archive-description'] = description @@ -507,8 +507,8 @@ def complete_multipart_upload(self, vault_name, upload_id, archive. This value should be the sum of all the sizes of the individual parts that you uploaded. """ - response_headers = [('x-amz-archive-id', u'ArchiveId'), - ('Location', u'Location')] + response_headers = [('x-amz-archive-id', 'ArchiveId'), + ('Location', 'Location')] headers = {'x-amz-sha256-tree-hash': sha256_treehash, 'x-amz-archive-size': str(archive_size)} uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) @@ -621,9 +621,9 @@ def upload_part(self, vault_name, upload_id, linear_hash, """ headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, - 'Content-Range': 'bytes=%d-%d' % (byte_range[0], + 'Content-Range': 'bytes %d-%d/*' % (byte_range[0], byte_range[1])} - response_headers = [('x-amz-sha256-tree-hash', u'TreeHash')] + response_headers = [('x-amz-sha256-tree-hash', 'TreeHash')] uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('PUT', uri, headers=headers, data=part_data, ok_responses=(204,), diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 54d5341f13..500272c743 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -22,9 +22,11 @@ # from .job import Job -from .writer import Writer -import urllib +from .writer import Writer, bytes_to_hex, chunk_hashes, tree_hash +import hashlib import json +import os.path +import urllib class Vault(object): @@ -42,7 +44,10 @@ def __init__(self, layer1, response_data=None): self.layer1 = layer1 if response_data: for response_name, attr_name, default in self.ResponseDataElements: - setattr(self, attr_name, response_data[response_name]) + value = response_data[response_name] + if isinstance(value, unicode): + value.encode('utf8') + setattr(self, attr_name, value) else: for response_name, attr_name, default in self.ResponseDataElements: setattr(self, attr_name, default) @@ -50,6 +55,41 @@ def __init__(self, layer1, response_data=None): def __repr__(self): return 'Vault("%s")' % self.arn + def upload_archive(self, filename): + """ + Adds an archive to a vault. For archives greater than 100MB the + multipart upload will be used. + + :type file: str + :param file: A filename to upload + + :rtype: str + :return: The archive id of the newly created archive + """ + megabyte = 1024 * 1024 + if os.path.getsize(filename) > 100 * megabyte: + return self.create_archive_from_file(filename) + return self.upload_archive_single_operation(filename) + + def upload_archive_single_operation(self, filename): + """ + Adds an archive to a vault in a single operation. It's recommended for + archives less than 100MB + :type file: str + :param file: A filename to upload + + :rtype: str + :return: The archive id of the newly created archive + """ + archive = '' + with open(filename, 'rb') as fd: + archive = fd.read() + linear_hash = hashlib.sha256(archive).hexdigest() + hex_tree_hash = bytes_to_hex(tree_hash(chunk_hashes(archive))) + response = self.layer1.upload_archive(self.name, archive, linear_hash, + hex_tree_hash) + return response['ArchiveId'] + def create_archive_writer(self, part_size=DefaultPartSize, description=None): """ @@ -87,6 +127,7 @@ def create_archive_from_file(self, file=None, file_obj=None): """ if not file_obj: file_obj = open(file, "rb") + writer = self.create_archive_writer() while True: data = file_obj.read(1024 * 1024 * 4) From 77f2697b3ee0a435d340d9265372cc7205bbd23a Mon Sep 17 00:00:00 2001 From: moliware Date: Fri, 31 Aug 2012 18:28:55 +0200 Subject: [PATCH 24/62] Fixed syntax errors in Writer.close --- boto/glacier/writer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index e9ab676ce9..4efeda0e76 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -126,8 +126,10 @@ def close(self): if self.buffer_size > 0: self.send_part() # Complete the multiplart glacier upload - tree_hash = bytes_to_hex(tree_hash(self.tree_hashes)) - response = self.vault.layer1.complete_multipart_upload(tree_hash, + hex_tree_hash = bytes_to_hex(tree_hash(self.tree_hashes)) + response = self.vault.layer1.complete_multipart_upload(self.vault.name, + self.upload_id, + hex_tree_hash, self.uploaded_size) self.archive_id = response['ArchiveId'] self.closed = True From 1f282e06288cc479009f71e51545d00013b00eda Mon Sep 17 00:00:00 2001 From: Patrick Lucas Date: Fri, 31 Aug 2012 18:19:14 -0400 Subject: [PATCH 25/62] Fix small typo introduced in 05ff4258 --- boto/glacier/layer1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 7f2faada8a..e9e0a3490a 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -177,7 +177,7 @@ def create_vault(self, vault_name): """ uri = 'vaults/%s' % vault_name return self.make_request('PUT', uri, ok_responses=(201,), - response_headers=[('uLocation', 'uLocation')]) + response_headers=[(u'Location', u'Location')]) def delete_vault(self, vault_name): """ From 33e0c0803de7edd96b2d1d8fca4a14e5dc828ca3 Mon Sep 17 00:00:00 2001 From: Gerd Zellweger Date: Sat, 1 Sep 2012 14:26:41 +0200 Subject: [PATCH 26/62] Fixed some issues with multi-part upload. Remove equality sign in Content-Range header as this leads to InvalidParameterValueException. Also fix call to complete_multipart_upload and resolve name clash between local variable and function. --- boto/glacier/layer1.py | 6 ++---- boto/glacier/writer.py | 5 +++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index e9e0a3490a..2a41cfe28f 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -383,8 +383,7 @@ def get_job_output(self, vault_name, job_id, byte_range=None): ('Content-Type', u'ContentType')] headers = None if byte_range: - headers = {'Range': 'bytes=%d-%d' % (byte_range[0], - byte_range[1])} + headers = {'Range': 'bytes=%d-%d' % byte_range} uri = 'vaults/%s/jobs/%s/output' % (vault_name, job_id) response = self.make_request('GET', uri, headers=headers, ok_responses=(200, 206), @@ -621,8 +620,7 @@ def upload_part(self, vault_name, upload_id, linear_hash, """ headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, - 'Content-Range': 'bytes=%d-%d' % (byte_range[0], - byte_range[1])} + 'Content-Range': 'bytes %d-%d/*' % byte_range} response_headers = [('x-amz-sha256-tree-hash', u'TreeHash')] uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('PUT', uri, headers=headers, diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index e9ab676ce9..c5594adb50 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -126,8 +126,9 @@ def close(self): if self.buffer_size > 0: self.send_part() # Complete the multiplart glacier upload - tree_hash = bytes_to_hex(tree_hash(self.tree_hashes)) - response = self.vault.layer1.complete_multipart_upload(tree_hash, + hash = bytes_to_hex(tree_hash(self.tree_hashes)) + response = self.vault.layer1.complete_multipart_upload(self.vault.name, + self.upload_id, hash, self.uploaded_size) self.archive_id = response['ArchiveId'] self.closed = True From 75932f71c0ee267bb306033a76cc3546592dbc66 Mon Sep 17 00:00:00 2001 From: moliware Date: Sun, 2 Sep 2012 12:48:42 +0200 Subject: [PATCH 27/62] Fixes UnicodeDecodeError raised when you upload binary files --- boto/glacier/vault.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 500272c743..ade37d77d8 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -46,7 +46,7 @@ def __init__(self, layer1, response_data=None): for response_name, attr_name, default in self.ResponseDataElements: value = response_data[response_name] if isinstance(value, unicode): - value.encode('utf8') + value = value.encode('utf8') setattr(self, attr_name, value) else: for response_name, attr_name, default in self.ResponseDataElements: From c492ecf8828ef86f160147e439fe15f506318216 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Mon, 3 Sep 2012 10:02:04 -0700 Subject: [PATCH 28/62] Adding a delete_archive method to Vault. Also a few PEP8 fixes. --- boto/glacier/vault.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index ade37d77d8..f3874d8710 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -24,14 +24,12 @@ from .job import Job from .writer import Writer, bytes_to_hex, chunk_hashes, tree_hash import hashlib -import json import os.path -import urllib class Vault(object): - DefaultPartSize = 4 * 1024 * 1024 #128MB + DefaultPartSize = 4 * 1024 * 1024 # 128MB ResponseDataElements = (('VaultName', 'name', None), ('VaultARN', 'arn', None), @@ -57,7 +55,7 @@ def __repr__(self): def upload_archive(self, filename): """ - Adds an archive to a vault. For archives greater than 100MB the + Adds an archive to a vault. For archives greater than 100MB the multipart upload will be used. :type file: str @@ -174,6 +172,15 @@ def retrieve_archive(self, archive_name, sns_topic=None, response = self.layer1.initiate_job(self.name, job_data) return response['JobId'] + def delete_archive(self, archive_id): + """ + This operation deletes an archive from the vault. + + :type archive_id: str + :param archive_id: The ID for the archive to be deleted. + """ + return self.layer1.delete_archive(self.name, archive_id) + def get_job(self, job_id): """ Get an object representing a job in progress. From ec94d61680a6945dc646d06b90d9e82e542eb726 Mon Sep 17 00:00:00 2001 From: moliware Date: Mon, 3 Sep 2012 20:54:39 +0200 Subject: [PATCH 29/62] Connecting to Layer2 by default in glacier module --- boto/glacier/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/boto/glacier/__init__.py b/boto/glacier/__init__.py index ac519fc00e..a65733b274 100644 --- a/boto/glacier/__init__.py +++ b/boto/glacier/__init__.py @@ -31,22 +31,22 @@ def regions(): :rtype: list :return: A list of :class:`boto.regioninfo.RegionInfo` """ - from boto.glacier.layer1 import Layer1 + from boto.glacier.layer2 import Layer2 return [RegionInfo(name='us-east-1', endpoint='glacier.us-east-1.amazonaws.com', - connection_cls=Layer1), + connection_cls=Layer2), RegionInfo(name='us-west-1', endpoint='glacier.us-west-1.amazonaws.com', - connection_cls=Layer1), + connection_cls=Layer2), RegionInfo(name='us-west-2', endpoint='glacier.us-west-2.amazonaws.com', - connection_cls=Layer1), + connection_cls=Layer2), RegionInfo(name='ap-northeast-1', endpoint='glacier.ap-northeast-1.amazonaws.com', - connection_cls=Layer1), + connection_cls=Layer2), RegionInfo(name='eu-west-1', endpoint='glacier.eu-west-1.amazonaws.com', - connection_cls=Layer1), + connection_cls=Layer2), ] From 57dabee6ad6d58f71994adc6d246becc98c8056e Mon Sep 17 00:00:00 2001 From: moliware Date: Mon, 3 Sep 2012 21:19:50 +0200 Subject: [PATCH 30/62] unicode constants in the response dict were not the responsible of the UnicodeDecodeError --- boto/glacier/layer1.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 25ebcc5bfd..e7fff7f416 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -83,8 +83,8 @@ def make_request(self, verb, resource, headers=None, if response.getheader('Content-Type') == 'application/json': body = json.loads(response.read()) else: - body = {'Response': response.read()} - body['RequestId'] = response.getheader('x-amzn-requestid') + body = {u'Response': response.read()} + body[u'RequestId'] = response.getheader('x-amzn-requestid') if response_headers: for header_name, item_name in response_headers: body[item_name] = response.getheader(header_name) @@ -346,8 +346,8 @@ def initiate_job(self, vault_name, job_data): archive-retrieval|inventory-retrieval """ uri = 'vaults/%s/jobs' % vault_name - response_headers = [('x-amz-job-id', 'JobId'), - ('Location', 'Location')] + response_headers = [('x-amz-job-id', u'JobId'), + ('Location', u'Location')] json_job_data = json.dumps(job_data) return self.make_request('POST', uri, data=json_job_data, ok_responses=(202,), @@ -378,9 +378,9 @@ def get_job_output(self, vault_name, job_id, byte_range=None): :param range: A tuple of integers specifying the slice (in bytes) of the archive you want to receive """ - response_headers = [('x-amz-sha256-tree-hash', 'TreeHash'), - ('Content-Range', 'ContentRange'), - ('Content-Type', 'ContentType')] + response_headers = [('x-amz-sha256-tree-hash', u'TreeHash'), + ('Content-Range', u'ContentRange'), + ('Content-Type', u'ContentType')] headers = None if byte_range: headers = {'Range': 'bytes=%d-%d' % byte_range} @@ -421,9 +421,9 @@ def upload_archive(self, vault_name, archive, :type description: str :param description: An optional description of the archive. """ - response_headers = [('x-amz-archive-id', 'ArchiveId'), - ('Location', 'Location'), - ('x-amz-sha256-tree-hash', 'TreeHash')] + response_headers = [('x-amz-archive-id', u'ArchiveId'), + ('Location', u'Location'), + ('x-amz-sha256-tree-hash', u'TreeHash')] uri = 'vaults/%s/archives' % vault_name headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, @@ -468,8 +468,8 @@ def initiate_multipart_upload(self, vault_name, part_size, a power of 2. The minimum allowable part size is 1MB and the maximum is 4GB. """ - response_headers = [('x-amz-multipart-upload-id', 'UploadId'), - ('Location', 'Location')] + response_headers = [('x-amz-multipart-upload-id', u'UploadId'), + ('Location', u'Location')] headers = {'x-amz-part-size': str(part_size)} if description: headers['x-amz-archive-description'] = description @@ -506,8 +506,8 @@ def complete_multipart_upload(self, vault_name, upload_id, archive. This value should be the sum of all the sizes of the individual parts that you uploaded. """ - response_headers = [('x-amz-archive-id', 'ArchiveId'), - ('Location', 'Location')] + response_headers = [('x-amz-archive-id', u'ArchiveId'), + ('Location', u'Location')] headers = {'x-amz-sha256-tree-hash': sha256_treehash, 'x-amz-archive-size': str(archive_size)} uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) @@ -621,7 +621,7 @@ def upload_part(self, vault_name, upload_id, linear_hash, headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, 'Content-Range': 'bytes %d-%d/*' % byte_range} - response_headers = [('x-amz-sha256-tree-hash', 'TreeHash')] + response_headers = [('x-amz-sha256-tree-hash', u'TreeHash')] uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('PUT', uri, headers=headers, data=part_data, ok_responses=(204,), From 795f5175f14b1c751678063d71747c7d60776097 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Mon, 3 Sep 2012 12:20:12 -0700 Subject: [PATCH 31/62] Adding glacier to the doc indexes. --- docs/source/index.rst | 1 + docs/source/ref/glacier.rst | 56 +++++++++++++++++++++++++++++++++++++ docs/source/ref/index.rst | 5 ++-- 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 docs/source/ref/glacier.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index e808f13d1d..82e435712f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -61,6 +61,7 @@ Currently Supported Services * **Storage** * :doc:`Simple Storage Service (S3) ` -- (:doc:`API Reference `) + * Amazon Glacier -- (:doc:`API Reference `) * Google Cloud Storage -- (:doc:`API Reference `) * **Workforce** diff --git a/docs/source/ref/glacier.rst b/docs/source/ref/glacier.rst new file mode 100644 index 0000000000..6f5ccbba27 --- /dev/null +++ b/docs/source/ref/glacier.rst @@ -0,0 +1,56 @@ +.. ref-glacier + +======= +Glaicer +======= + +boto.glacier +------------ + +.. automodule:: boto.glacier + :members: + :undoc-members: + +boto.glacier.layer1 +------------------ + +.. automodule:: boto.glacier.layer1 + :members: + :undoc-members: + +boto.glacier.layer2 +------------------- + +.. automodule:: boto.glacier.layer2 + :members: + :undoc-members: + +boto.glacier.vault +------------------ + +.. automodule:: boto.glacier.vault + :members: + :undoc-members: + +boto.glacier.job +---------------- + +.. automodule:: boto.glacier.job + :members: + :undoc-members: + +boto.glacier.writer +------------------- + +.. automodule:: boto.glacier.writer + :members: + :undoc-members: + +boto.glacier.exceptions +----------------------- + +.. automodule:: boto.glacier.exceptions + :members: + :undoc-members: + + diff --git a/docs/source/ref/index.rst b/docs/source/ref/index.rst index 4f36adf678..b13fc06bed 100644 --- a/docs/source/ref/index.rst +++ b/docs/source/ref/index.rst @@ -18,13 +18,14 @@ API Reference emr file fps + glacier gs iam manage mturk pyami rds - route53 + route53 s3 sdb services @@ -34,4 +35,4 @@ API Reference sts swf vpc - + From aca42e5a3901327662571be14af3d04cda2eaf8e Mon Sep 17 00:00:00 2001 From: moliware Date: Mon, 3 Sep 2012 21:21:43 +0200 Subject: [PATCH 32/62] Command line tool for amazon glacier --- bin/glacier | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 4 +- 2 files changed, 156 insertions(+), 2 deletions(-) create mode 100755 bin/glacier diff --git a/bin/glacier b/bin/glacier new file mode 100755 index 0000000000..aad1e8befd --- /dev/null +++ b/bin/glacier @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Miguel Olivares http://moliware.com/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +""" + glacier + ~~~~~~~ + + Amazon Glacier tool built on top of boto. Look at the usage method to see + how to use it. + + Author: Miguel Olivares +""" +import sys + +from boto.glacier import connect_to_region +from getopt import getopt, GetoptError +from os.path import isfile + + +COMMANDS = ('vaults', 'jobs', 'upload') + + +def usage(): + print """ +glacier [args] + + Commands + vaults - Operations with vaults + jobs - Operations with jobs + upload - Upload files to a vault. If the vault doesn't exits, it is + created + + Common args: + access_key - Your AWS Access Key ID. If not supplied, boto will + use the value of the environment variable + AWS_ACCESS_KEY_ID + secret_key - Your AWS Secret Access Key. If not supplied, boto + will use the value of the environment variable + AWS_SECRET_ACCESS_KEY + region - AWS region to use. Possible vaules: us-east-1, us-west-1, + us-west-2, ap-northeast-1, eu-west-1. + Default: us-east-1 + + Vaults operations: + + List vaults: + glacier vaults + + Jobs operations: + + List jobs: + glacier jobs + + Uploading files: + + glacier upload + + Examples : + glacier upload pics *.jpg + glacier upload pics a.jpg b.jpg +""" + sys.exit() + + +def connect(region, debug_level=0, access_key=None, secret_key=None): + """ Connect to a specific region """ + return connect_to_region(region, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + debug=debug_level) + + +def list_vaults(region, access_key=None, secret_key=None): + layer2 = connect(region, access_key, secret_key) + for vault in layer2.list_vaults(): + print vault.arn + + +def list_jobs(vault_name, region, access_key=None, secret_key=None): + layer2 = connect(region, access_key, secret_key) + print layer2.layer1.list_jobs(vault_name) + + +def upload_files(vault_name, filenames, region, access_key=None, secret_key=None): + layer2 = connect(region, access_key, secret_key) + layer2.create_vault(vault_name) + glacier_vault = layer2.get_vault(vault_name) + for filename in filenames: + if isfile(filename): + print 'Uploading %s to %s' % (filename, vault_name) + glacier_vault.upload_archive(filename) + + +def main(): + if len(sys.argv) < 2: + usage() + + command = sys.argv[1] + if command not in COMMANDS: + usage() + + argv = sys.argv[2:] + options = 'a:s:r:' + long_options = ['access_key=', 'secret_key=', 'region='] + try: + opts, args = getopt(argv, options, long_options) + except GetoptError, e: + usage() + + # Parse agument + access_key = secret_key = None + region = 'us-east-1' + for option, value in opts: + if option in ('a', '--access_key'): + access_key = value + elif option in ('s', '--secret_key'): + secret_key = value + elif option in ('r', '--region'): + region = value + # handle each command + if command == 'vaults': + list_vaults(region, access_key, secret_key) + elif command == 'jobs': + if len(args) != 1: + usage() + list_jobs(args[0], region, access_key, secret_key) + elif command == 'upload': + if len(args) < 2: + usage() + upload_files(args[0], args[1:], region, access_key, secret_key) + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 41dc54f3da..662c5e1a00 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ "bin/list_instances", "bin/taskadmin", "bin/kill_instance", "bin/bundle_image", "bin/pyami_sendmail", "bin/lss3", "bin/cq", "bin/route53", "bin/s3multiput", "bin/cwutil", - "bin/instance_events", "bin/asadmin"], + "bin/instance_events", "bin/asadmin", "bin/glacier"], url = "https://github.com/boto/boto/", packages = ["boto", "boto.sqs", "boto.s3", "boto.gs", "boto.file", "boto.ec2", "boto.ec2.cloudwatch", "boto.ec2.autoscale", @@ -64,7 +64,7 @@ "boto.fps", "boto.emr", "boto.emr", "boto.sns", "boto.ecs", "boto.iam", "boto.route53", "boto.ses", "boto.cloudformation", "boto.sts", "boto.dynamodb", - "boto.swf", "boto.mws", "boto.cloudsearch"], + "boto.swf", "boto.mws", "boto.cloudsearch", "boto.glacier"], package_data = {"boto.cacerts": ["cacerts.txt"]}, license = "MIT", platforms = "Posix; MacOS X; Windows", From 95101f28c303ce0283534d3e2b80d68246475b89 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Wed, 5 Sep 2012 09:51:50 +0100 Subject: [PATCH 33/62] Archive retrieval job should not include a format (that's for listing operations) --- boto/glacier/vault.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index f3874d8710..e691e14306 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -136,7 +136,7 @@ def create_archive_from_file(self, file=None, file_obj=None): return writer.get_archive_id() def retrieve_archive(self, archive_name, sns_topic=None, - description=None, format='JSON'): + description=None): """ Initiate a archive retrieval job to download the data from an archive. You will need to wait for the notification from @@ -154,16 +154,11 @@ def retrieve_archive(self, archive_name, sns_topic=None, sends notification when the job is completed and the output is ready for you to download. - :type format: str - :param format: Specify the output format. Valid values are: - CSV|JSON. Default is JSON. - :rtype: :class:`boto.glacier.job.Job` :return: A Job object representing the retrieval job. """ job_data = {'Type': 'archive-retrieval', - 'ArchiveId': archive_name, - 'Format': format} + 'ArchiveId': archive_name} if sns_topic is not None: job_data['SNSTopic'] = sns_topic if description is not None: From a2a505a63ea12d1a908cafcd6378d99eb5b918fb Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Wed, 5 Sep 2012 09:52:12 +0100 Subject: [PATCH 34/62] describe_job needs an archive name as a well as a job id --- boto/glacier/vault.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index e691e14306..15ede7472e 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -186,7 +186,7 @@ def get_job(self, job_id): :rtype: :class:`boto.glaicer.job.Job` :return: A Job object representing the job. """ - response_data = self.layer1.describe_job(job_id) + response_data = self.layer1.describe_job(self.name, job_id) return Job(self, response_data) def list_jobs(self, completed=None, status_code=None): From 11b5fb614bf2a03b6c014cf968a178e3c6f6058f Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Wed, 5 Sep 2012 17:29:34 +0100 Subject: [PATCH 35/62] Return the response and not the fully read body for non-json responses (otherwise you get the whole output of a job read into memory) --- boto/glacier/layer1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index e7fff7f416..bb7623b9a6 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -83,7 +83,7 @@ def make_request(self, verb, resource, headers=None, if response.getheader('Content-Type') == 'application/json': body = json.loads(response.read()) else: - body = {u'Response': response.read()} + body = {'Response': response} body[u'RequestId'] = response.getheader('x-amzn-requestid') if response_headers: for header_name, item_name in response_headers: From 720a947a3c6e5c0bddaeb6de3906740296bc2870 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Wed, 5 Sep 2012 23:08:48 +0100 Subject: [PATCH 36/62] Added a GlacierResponse object --- boto/glacier/exceptions.py | 2 +- boto/glacier/layer1.py | 11 ++------- boto/glacier/response.py | 47 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 boto/glacier/response.py diff --git a/boto/glacier/exceptions.py b/boto/glacier/exceptions.py index 459f0f123f..0092c62e54 100644 --- a/boto/glacier/exceptions.py +++ b/boto/glacier/exceptions.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. # - +import json class UnexpectedHTTPResponseError(Exception): def __init__(self, expected_responses, response): diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index bb7623b9a6..dca5f06c89 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -26,6 +26,7 @@ import boto.glacier from boto.connection import AWSAuthConnection from .exceptions import UnexpectedHTTPResponseError +from .response import GlacierResponse #boto.set_stream_logger('glacier') @@ -80,15 +81,7 @@ def make_request(self, verb, resource, headers=None, headers=headers, data=data) if response.status in ok_responses: - if response.getheader('Content-Type') == 'application/json': - body = json.loads(response.read()) - else: - body = {'Response': response} - body[u'RequestId'] = response.getheader('x-amzn-requestid') - if response_headers: - for header_name, item_name in response_headers: - body[item_name] = response.getheader(header_name) - return body + return GlacierResponse(response, response_headers) else: # create glacier-specific exceptions raise UnexpectedHTTPResponseError(ok_responses, response) diff --git a/boto/glacier/response.py b/boto/glacier/response.py new file mode 100644 index 0000000000..57bd4e4ee7 --- /dev/null +++ b/boto/glacier/response.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import json + +class GlacierResponse(dict): + """ + Represents a response from Glacier layer1. It acts as a dictionary + containing the combined keys received via JSON in the body (if + supplied) and headers. + """ + def __init__(self, http_response, response_headers): + self.http_response = http_response + self.status = http_response.status + self[u'RequestId'] = http_response.getheader('x-amzn-requestid') + if response_headers: + for header_name, item_name in response_headers: + self[item_name] = http_response.getheader(header_name) + if http_response.getheader('Content-Type') == 'application/json': + body = json.loads(http_response.read()) + self.update(body) + size = http_response.getheader('Content-Length', None) + if size is not None: + self.size = size + + def read(self, amt=None): + "Reads and returns the response body, or up to the next amt bytes." + return self.http_response.read(amt) From 3ceeede15de8b769c2d20e9dc884df12b72f029f Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Thu, 6 Sep 2012 09:54:50 +0100 Subject: [PATCH 37/62] Fixed the tests to work with GlacierResponse --- tests/unit/__init__.py | 4 +++- tests/unit/glacier/test_layer1.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index 8c9a5ec3d0..4e52b76621 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -45,10 +45,12 @@ def create_response(self, status_code, reason='', header=[], body=None): response.reason = reason response.getheaders.return_value = header - def overwrite_header(arg): + def overwrite_header(arg, default=None): header_dict = dict(header) if header_dict.has_key(arg): return header_dict[arg] + else: + return default response.getheader.side_effect = overwrite_header return response diff --git a/tests/unit/glacier/test_layer1.py b/tests/unit/glacier/test_layer1.py index afad423881..7d7b6fc273 100644 --- a/tests/unit/glacier/test_layer1.py +++ b/tests/unit/glacier/test_layer1.py @@ -75,4 +75,4 @@ def test_get_archive_output(self): body=self.job_content) response = self.service_connection.get_job_output(self.vault_name, 'example-job-id') - self.assertEqual(self.job_content, response['Response']) + self.assertEqual(self.job_content, response.read()) From c8da962f22029d9ae84bedd884973fa2dbefa2c0 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 6 Sep 2012 09:24:31 -0700 Subject: [PATCH 38/62] Update Layer2.create_vault to return a Vault object This makes it consistent with Layer2.get_vault. --- boto/glacier/layer2.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/boto/glacier/layer2.py b/boto/glacier/layer2.py index 294480383d..00b98f0954 100644 --- a/boto/glacier/layer2.py +++ b/boto/glacier/layer2.py @@ -34,7 +34,16 @@ def __init__(self, *args, **kwargs): self.layer1 = Layer1(*args, **kwargs) def create_vault(self, name): - return self.layer1.create_vault(name) + """Creates a vault. + + :type name: str + :param name: The name of the vault + + :rtype: :class:`boto.glacier.vault.Vault` + :return: A Vault object representing the vault. + """ + self.layer1.create_vault(name) + return self.get_vault(name) def get_vault(self, name): """ From f85bc3c5a6b3c27002d6c91514c11d7cfee800be Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 6 Sep 2012 09:25:43 -0700 Subject: [PATCH 39/62] Add Layer2.delete_vault This allow layer2 to be used to both create and delete vaults. I've also added the start of integration tests for layer2 that will create, retrieve, and delete a vault. --- boto/glacier/layer2.py | 19 ++++++++++ tests/integration/glacier/__init__.py | 0 tests/integration/glacier/test_layer2.py | 45 ++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 tests/integration/glacier/__init__.py create mode 100644 tests/integration/glacier/test_layer2.py diff --git a/boto/glacier/layer2.py b/boto/glacier/layer2.py index 00b98f0954..84e3d56a4c 100644 --- a/boto/glacier/layer2.py +++ b/boto/glacier/layer2.py @@ -45,6 +45,25 @@ def create_vault(self, name): self.layer1.create_vault(name) return self.get_vault(name) + def delete_vault(self, name): + """Delete a vault. + + This operation deletes a vault. Amazon Glacier will delete a + vault only if there are no archives in the vault as per the + last inventory and there have been no writes to the vault + since the last inventory. If either of these conditions is not + satisfied, the vault deletion fails (that is, the vault is not + removed) and Amazon Glacier returns an error. + + This operation is idempotent, you can send the same request + multiple times and it has no further effect after the first + time Amazon Glacier delete the specified vault. + + :type vault_name: str + :param vault_name: The name of the vault to delete. + """ + return self.layer1.delete_vault(name) + def get_vault(self, name): """ Get an object representing a named vault from Glacier. This diff --git a/tests/integration/glacier/__init__.py b/tests/integration/glacier/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration/glacier/test_layer2.py b/tests/integration/glacier/test_layer2.py new file mode 100644 index 0000000000..caa44fa522 --- /dev/null +++ b/tests/integration/glacier/test_layer2.py @@ -0,0 +1,45 @@ +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import time +from tests.unit import unittest + +from boto.glacier.layer2 import Layer1, Layer2 + + +class TestGlacierLayer2(unittest.TestCase): + glacier = True + + def setUp(self): + self.layer2 = Layer2() + self.vault_name = 'testvault%s' % int(time.time()) + + def test_create_delete_vault(self): + vault = self.layer2.create_vault(self.vault_name) + retrieved_vault = self.layer2.get_vault(self.vault_name) + self.layer2.delete_vault(self.vault_name) + self.assertEqual(vault.name, retrieved_vault.name) + self.assertEqual(vault.arn, retrieved_vault.arn) + self.assertEqual(vault.creation_date, retrieved_vault.creation_date) + self.assertEqual(vault.last_inventory_date, + retrieved_vault.last_inventory_date) + self.assertEqual(vault.number_of_archives, + retrieved_vault.number_of_archives) From ae81ad0e9c3c8333f406854eafd228a32e85f6cb Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Thu, 6 Sep 2012 18:58:48 +0100 Subject: [PATCH 40/62] Added some tests for glacier layer 2 --- boto/glacier/job.py | 1 + boto/glacier/layer1.py | 2 +- boto/glacier/layer2.py | 12 ++- boto/glacier/vault.py | 19 +++-- boto/glacier/writer.py | 41 +++++----- tests/unit/glacier/test_layer2.py | 129 ++++++++++++++++++++++++++++++ 6 files changed, 176 insertions(+), 28 deletions(-) create mode 100644 tests/unit/glacier/test_layer2.py diff --git a/boto/glacier/job.py b/boto/glacier/job.py index cdb53bc643..26e861111b 100644 --- a/boto/glacier/job.py +++ b/boto/glacier/job.py @@ -45,6 +45,7 @@ def __init__(self, vault, response_data=None): self.vault = vault if response_data: for response_name, attr_name, default in self.ResponseDataElements: + print response_name, attr_name setattr(self, attr_name, response_data[response_name]) else: for response_name, attr_name, default in self.ResponseDataElements: diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index dca5f06c89..ab517ba6fe 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -39,7 +39,7 @@ class Layer1(AWSAuthConnection): def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, account_id='-', is_secure=True, port=None, proxy=None, proxy_port=None, - proxy_user=None, proxy_pass=None, debug=2, + proxy_user=None, proxy_pass=None, debug=0, https_connection_factory=None, path='/', provider='aws', security_token=None, suppress_consec_slashes=True, diff --git a/boto/glacier/layer2.py b/boto/glacier/layer2.py index 294480383d..fbfb184335 100644 --- a/boto/glacier/layer2.py +++ b/boto/glacier/layer2.py @@ -31,9 +31,19 @@ class Layer2(object): """ def __init__(self, *args, **kwargs): - self.layer1 = Layer1(*args, **kwargs) + # Accept a passed in layer1, mainly to allow easier testing + if "layer1" in kwargs: + self.layer1 = kwargs["layer1"] + else: + self.layer1 = Layer1(*args, **kwargs) def create_vault(self, name): + """ + Create a new vault. + + :type name: str + :param name: The name of the vault + """ return self.layer1.create_vault(name) def get_vault(self, name): diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 15ede7472e..9ba2c83fb7 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -53,6 +53,12 @@ def __init__(self, layer1, response_data=None): def __repr__(self): return 'Vault("%s")' % self.arn + def delete(self): + """ + Delete's this vault. WARNING! + """ + self.layer1.delete_vault(self.name) + def upload_archive(self, filename): """ Adds an archive to a vault. For archives greater than 100MB the @@ -67,9 +73,9 @@ def upload_archive(self, filename): megabyte = 1024 * 1024 if os.path.getsize(filename) > 100 * megabyte: return self.create_archive_from_file(filename) - return self.upload_archive_single_operation(filename) + return self._upload_archive_single_operation(filename) - def upload_archive_single_operation(self, filename): + def _upload_archive_single_operation(self, filename): """ Adds an archive to a vault in a single operation. It's recommended for archives less than 100MB @@ -135,7 +141,7 @@ def create_archive_from_file(self, file=None, file_obj=None): writer.close() return writer.get_archive_id() - def retrieve_archive(self, archive_name, sns_topic=None, + def retrieve_archive(self, archive_id, sns_topic=None, description=None): """ Initiate a archive retrieval job to download the data from an @@ -143,8 +149,8 @@ def retrieve_archive(self, archive_name, sns_topic=None, Amazon (via SNS) before you can actually download the data, this takes around 4 hours. - :type archive_name: str - :param archive_name: The name of the archive + :type archive_id: str + :param archive_id: The id of the archive :type description: str :param description: An optional description for the job. @@ -158,7 +164,7 @@ def retrieve_archive(self, archive_name, sns_topic=None, :return: A Job object representing the retrieval job. """ job_data = {'Type': 'archive-retrieval', - 'ArchiveId': archive_name} + 'ArchiveId': archive_id} if sns_topic is not None: job_data['SNSTopic'] = sns_topic if description is not None: @@ -187,6 +193,7 @@ def get_job(self, job_id): :return: A Job object representing the job. """ response_data = self.layer1.describe_job(self.name, job_id) + print response_data return Job(self, response_data) def list_jobs(self, completed=None, status_code=None): diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 2c0464148c..a62ef89c93 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -76,61 +76,62 @@ def __init__(self, vault, upload_id, part_size): self.vault = vault self.upload_id = upload_id self.part_size = part_size - self.buffer_size = 0 - self.uploaded_size = 0 - self.buffer = [] - self.vault = vault - self.tree_hashes = [] + + self._buffer_size = 0 + self._uploaded_size = 0 + self._buffer = [] + self._tree_hashes = [] + self.archive_location = None self.closed = False def send_part(self): - buf = "".join(self.buffer) + buf = "".join(self._buffer) # Put back any data remaining over the part size into the # buffer if len(buf) > self.part_size: - self.buffer = [buf[self.part_size:]] - self.buffer_size = len(self.buffer[0]) + self._buffer = [buf[self.part_size:]] + self._buffer_size = len(self._buffer[0]) else: - self.buffer = [] - self.buffer_size = 0 + self._buffer = [] + self._buffer_size = 0 # The part we will send part = buf[:self.part_size] # Create a request and sign it part_tree_hash = tree_hash(chunk_hashes(part)) - self.tree_hashes.append(part_tree_hash) + self._tree_hashes.append(part_tree_hash) hex_tree_hash = bytes_to_hex(part_tree_hash) linear_hash = hashlib.sha256(part).hexdigest() - content_range = (self.uploaded_size, - (self.uploaded_size + len(part)) - 1) + content_range = (self._uploaded_size, + (self._uploaded_size + len(part)) - 1) response = self.vault.layer1.upload_part(self.vault.name, self.upload_id, linear_hash, hex_tree_hash, content_range, part) - self.uploaded_size += len(part) + self._uploaded_size += len(part) def write(self, str): assert not self.closed, "Tried to write to a Writer that is already closed!" if str == "": return - self.buffer.append(str) - self.buffer_size += len(str) - while self.buffer_size > self.part_size: + self._buffer.append(str) + self._buffer_size += len(str) + while self._buffer_size > self.part_size: self.send_part() def close(self): if self.closed: return - if self.buffer_size > 0: + if self._buffer_size > 0: self.send_part() # Complete the multiplart glacier upload - hex_tree_hash = bytes_to_hex(tree_hash(self.tree_hashes)) + hex_tree_hash = bytes_to_hex(tree_hash(self._tree_hashes)) response = self.vault.layer1.complete_multipart_upload(self.vault.name, self.upload_id, hex_tree_hash, - self.uploaded_size) + self._uploaded_size) self.archive_id = response['ArchiveId'] self.closed = True diff --git a/tests/unit/glacier/test_layer2.py b/tests/unit/glacier/test_layer2.py new file mode 100644 index 0000000000..8ebb8202de --- /dev/null +++ b/tests/unit/glacier/test_layer2.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +try: + import unittest2 as unittest +except ImportError: + import unittest +import httplib + +from mock import Mock + +from boto.glacier.layer1 import Layer1 +from boto.glacier.layer2 import Layer2 +from boto.glacier.vault import Vault +from boto.glacier.vault import Job + +# Some fixture data from the Glacier docs +FIXTURE_VAULT = { + "CreationDate" : "2012-02-20T17:01:45.198Z", + "LastInventoryDate" : "2012-03-20T17:03:43.221Z", + "NumberOfArchives" : 192, + "SizeInBytes" : 78088912, + "VaultARN" : "arn:aws:glacier:us-east-1:012345678901:vaults/examplevault", + "VaultName" : "examplevault" +} + +FIXTURE_ARCHIVE_JOB = { + "Action": "ArchiveRetrieval", + "ArchiveId": "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEArchiveId", + "ArchiveSizeInBytes": 16777216, + "Completed": False, + "CreationDate": "2012-05-15T17:21:39.339Z", + "CompletionDate": "2012-05-15T17:21:43.561Z", + "InventorySizeInBytes": None, + "JobDescription": "My ArchiveRetrieval Job", + "JobId": "HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5vP54ZShjoQzQVVh7vEXAMPLEjobID", + "SHA256TreeHash": "beb0fe31a1c7ca8c6c04d574ea906e3f97b31fdca7571defb5b44dca89b5af60", + "SNSTopic": "arn:aws:sns:us-east-1:012345678901:mytopic", + "StatusCode": "InProgress", + "StatusMessage": "Operation in progress.", + "VaultARN": "arn:aws:glacier:us-east-1:012345678901:vaults/examplevault" +} + +class GlacierLayer2Base(unittest.TestCase): + def setUp(self): + self.mock_layer1 = Mock(spec=Layer1) + +class TestGlacierLayer2Connection(GlacierLayer2Base): + def setUp(self): + GlacierLayer2Base.setUp(self) + self.layer2 = Layer2(layer1=self.mock_layer1) + + def test_create_vault(self): + self.layer2.create_vault("My Vault") + self.mock_layer1.create_vault.assert_called_with("My Vault") + + def test_get_vault(self): + self.mock_layer1.describe_vault.return_value = FIXTURE_VAULT + vault = self.layer2.get_vault("examplevault") + assert vault.layer1 == self.mock_layer1 + assert vault.name == "examplevault" + assert vault.size == 78088912 + assert vault.number_of_archives == 192 + + def list_vaults(self): + self.mock_layer1.list_vaults.return_value = [FIXTURE_VAULT] + vaults = self.layer2.list_vaults() + assert vaults[0].name == "examplevault" + +class TestVault(GlacierLayer2Base): + def setUp(self): + GlacierLayer2Base.setUp(self) + self.vault = Vault(self.mock_layer1, FIXTURE_VAULT) + + # TODO: Tests for the other methods of uploading + + def test_create_archive_writer(self): + self.mock_layer1.initiate_multipart_upload.return_value = {"UploadId": "UPLOADID"} + writer = self.vault.create_archive_writer(description="stuff") + self.mock_layer1.initiate_multipart_upload.assert_called_with("examplevault", self.vault.DefaultPartSize, "stuff") + assert writer.vault == self.vault + assert writer.upload_id == "UPLOADID" + + def test_delete_vault(self): + self.vault.delete_archive("archive") + self.mock_layer1.delete_archive.assert_called_with("examplevault","archive") + + def test_get_job(self): + self.mock_layer1.describe_job.return_value = FIXTURE_ARCHIVE_JOB + job = self.vault.get_job("NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEArchiveId") + assert job.action == "ArchiveRetrieval" + + def test_list_jobs(self): + self.mock_layer1.list_jobs.return_value = {"JobList": [FIXTURE_ARCHIVE_JOB]} + jobs = self.vault.list_jobs(False, "InProgress") + self.mock_layer1.list_jobs.assert_called_with("examplevault", False, "InProgress") + assert jobs[0].archive_id == "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEArchiveId" + +class TestJob(GlacierLayer2Base): + def setUp(self): + GlacierLayer2Base.setUp(self) + self.vault = Vault(self.mock_layer1, FIXTURE_VAULT) + self.job = Job(self.vault, FIXTURE_ARCHIVE_JOB) + + def test_get_job_output(self): + self.mock_layer1.get_job_output.return_value = "TEST_OUTPUT" + self.job.get_output((0,100)) + self.mock_layer1.get_job_output.assert_called_with("examplevault", "HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5vP54ZShjoQzQVVh7vEXAMPLEjobID", (0,100)) + From 10736e4cf54ce0077aacedb9e40fb818dc1a89fe Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Thu, 6 Sep 2012 19:55:01 +0100 Subject: [PATCH 41/62] Remove print statements, whoops --- boto/glacier/job.py | 1 - boto/glacier/vault.py | 1 - 2 files changed, 2 deletions(-) diff --git a/boto/glacier/job.py b/boto/glacier/job.py index 26e861111b..cdb53bc643 100644 --- a/boto/glacier/job.py +++ b/boto/glacier/job.py @@ -45,7 +45,6 @@ def __init__(self, vault, response_data=None): self.vault = vault if response_data: for response_name, attr_name, default in self.ResponseDataElements: - print response_name, attr_name setattr(self, attr_name, response_data[response_name]) else: for response_name, attr_name, default in self.ResponseDataElements: diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 9ba2c83fb7..a10ac2413a 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -193,7 +193,6 @@ def get_job(self, job_id): :return: A Job object representing the job. """ response_data = self.layer1.describe_job(self.name, job_id) - print response_data return Job(self, response_data) def list_jobs(self, completed=None, status_code=None): From bac528e4d176b297448dc80ec5755c1100f57c24 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Thu, 6 Sep 2012 20:32:44 +0100 Subject: [PATCH 42/62] Not-yet-working integration test (commented out) --- .../glacier/test_glacier_layer2.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/integration/glacier/test_glacier_layer2.py diff --git a/tests/integration/glacier/test_glacier_layer2.py b/tests/integration/glacier/test_glacier_layer2.py new file mode 100644 index 0000000000..2395187389 --- /dev/null +++ b/tests/integration/glacier/test_glacier_layer2.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +from boto.glacier import connect_to_region +import uuid +import unittest + +class GlaicerTest (unittest.TestCase): + glacier = True + + def setUp(self): + self.conn = connect_to_region("us-east-1") + self.vault_name = 'boto-test-vault-%s' % (uuid.uuid1(),) + self.conn.create_vault(self.vault_name) + self.vault = self.conn.get_vault(self.vault_name) + + def tearDown(self): + self.vault.delete() + + ## Once you write to a vault you can't delete it for a few hours, + ## so this test doesn't work so well. + # def test_upload_vault(self): + # writer = self.vault.create_archive_writer(description="Hello world") + # # Would be nicer to write enough to splill over into a second + # # part, but that takes ages! + # for i in range(12): + # writer.write("X" * 1024) + # writer.close() + # archive_id = writer.get_archive_id() + + # job_id = self.vault.retrieve_archive(archive_id, description="my job") + + # # Usually at this point you;d wait for the notification via + # # SNS (which takes about 5 hours) + + # job = self.vault.get_job(job_id) + # assert job.description == "my job" + # assert job.archive_size == 1024*12 + + # self.vault.delete_archive(archive_id) From c0d1f76ad03bee6c1e82cbb19c3d2f89b00f1809 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Thu, 6 Sep 2012 20:34:09 +0100 Subject: [PATCH 43/62] Added a very basic integration test for glacier --- tests/integration/glacier/__init__.py | 22 +++++++++++++++++++ .../glacier/test_glacier_layer2.py | 4 ++++ 2 files changed, 26 insertions(+) create mode 100644 tests/integration/glacier/__init__.py diff --git a/tests/integration/glacier/__init__.py b/tests/integration/glacier/__init__.py new file mode 100644 index 0000000000..5326afc11b --- /dev/null +++ b/tests/integration/glacier/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# diff --git a/tests/integration/glacier/test_glacier_layer2.py b/tests/integration/glacier/test_glacier_layer2.py index 2395187389..a0b84ee143 100644 --- a/tests/integration/glacier/test_glacier_layer2.py +++ b/tests/integration/glacier/test_glacier_layer2.py @@ -37,6 +37,10 @@ def setUp(self): def tearDown(self): self.vault.delete() + + def test_vault_name(self): + assert self.vault.name == self.vault_name + ## Once you write to a vault you can't delete it for a few hours, ## so this test doesn't work so well. # def test_upload_vault(self): From 263b57404ef777b41a3c0261b405fd2f734f1295 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Thu, 6 Sep 2012 20:39:12 +0100 Subject: [PATCH 44/62] Fixed tests --- tests/unit/glacier/test_layer2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/glacier/test_layer2.py b/tests/unit/glacier/test_layer2.py index 8ebb8202de..f6c9098a15 100644 --- a/tests/unit/glacier/test_layer2.py +++ b/tests/unit/glacier/test_layer2.py @@ -71,6 +71,7 @@ def setUp(self): self.layer2 = Layer2(layer1=self.mock_layer1) def test_create_vault(self): + self.mock_layer1.describe_vault.return_value = FIXTURE_VAULT self.layer2.create_vault("My Vault") self.mock_layer1.create_vault.assert_called_with("My Vault") From acf17b8f391dc4803487146517e80af246da8174 Mon Sep 17 00:00:00 2001 From: Thomas Parslow Date: Thu, 6 Sep 2012 20:49:41 +0100 Subject: [PATCH 45/62] Added glacier to the readme :) --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 2a16bb67ab..0ed8e07952 100644 --- a/README.rst +++ b/README.rst @@ -37,6 +37,7 @@ At the moment, boto supports: * Amazon SimpleWorkflow * CloudSearch * Marketplace Web Services +* Glacier The goal of boto is to support the full breadth and depth of Amazon Web Services. In addition, boto provides support for other public From 788ad932dd030c3468f4ec111277b75c57457b6a Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 6 Sep 2012 14:37:27 -0700 Subject: [PATCH 46/62] Pep8 cleanup to glacer l2 unit tests Also switched the assert statements to use assertEqual for better error diagnostics. --- tests/unit/glacier/test_layer2.py | 77 +++++++++++++++++++------------ 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/tests/unit/glacier/test_layer2.py b/tests/unit/glacier/test_layer2.py index f6c9098a15..a82a3a2e2c 100644 --- a/tests/unit/glacier/test_layer2.py +++ b/tests/unit/glacier/test_layer2.py @@ -21,11 +21,7 @@ # IN THE SOFTWARE. # -try: - import unittest2 as unittest -except ImportError: - import unittest -import httplib +from tests.unit import unittest from mock import Mock @@ -46,30 +42,36 @@ FIXTURE_ARCHIVE_JOB = { "Action": "ArchiveRetrieval", - "ArchiveId": "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEArchiveId", + "ArchiveId": ("NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUs" + "uhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqr" + "EXAMPLEArchiveId"), "ArchiveSizeInBytes": 16777216, "Completed": False, "CreationDate": "2012-05-15T17:21:39.339Z", "CompletionDate": "2012-05-15T17:21:43.561Z", "InventorySizeInBytes": None, "JobDescription": "My ArchiveRetrieval Job", - "JobId": "HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5vP54ZShjoQzQVVh7vEXAMPLEjobID", - "SHA256TreeHash": "beb0fe31a1c7ca8c6c04d574ea906e3f97b31fdca7571defb5b44dca89b5af60", + "JobId": ("HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5v" + "P54ZShjoQzQVVh7vEXAMPLEjobID"), + "SHA256TreeHash": ("beb0fe31a1c7ca8c6c04d574ea906e3f97b31fdca7571defb5b44dc" + "a89b5af60"), "SNSTopic": "arn:aws:sns:us-east-1:012345678901:mytopic", "StatusCode": "InProgress", "StatusMessage": "Operation in progress.", "VaultARN": "arn:aws:glacier:us-east-1:012345678901:vaults/examplevault" -} +} + class GlacierLayer2Base(unittest.TestCase): def setUp(self): self.mock_layer1 = Mock(spec=Layer1) + class TestGlacierLayer2Connection(GlacierLayer2Base): def setUp(self): GlacierLayer2Base.setUp(self) self.layer2 = Layer2(layer1=self.mock_layer1) - + def test_create_vault(self): self.mock_layer1.describe_vault.return_value = FIXTURE_VAULT self.layer2.create_vault("My Vault") @@ -78,15 +80,16 @@ def test_create_vault(self): def test_get_vault(self): self.mock_layer1.describe_vault.return_value = FIXTURE_VAULT vault = self.layer2.get_vault("examplevault") - assert vault.layer1 == self.mock_layer1 - assert vault.name == "examplevault" - assert vault.size == 78088912 - assert vault.number_of_archives == 192 + self.assertEqual(vault.layer1, self.mock_layer1) + self.assertEqual(vault.name, "examplevault") + self.assertEqual(vault.size, 78088912) + self.assertEqual(vault.number_of_archives, 192) def list_vaults(self): self.mock_layer1.list_vaults.return_value = [FIXTURE_VAULT] vaults = self.layer2.list_vaults() - assert vaults[0].name == "examplevault" + self.assertEqual(vaults[0].name, "examplevault") + class TestVault(GlacierLayer2Base): def setUp(self): @@ -96,27 +99,39 @@ def setUp(self): # TODO: Tests for the other methods of uploading def test_create_archive_writer(self): - self.mock_layer1.initiate_multipart_upload.return_value = {"UploadId": "UPLOADID"} + self.mock_layer1.initiate_multipart_upload.return_value = { + "UploadId": "UPLOADID"} writer = self.vault.create_archive_writer(description="stuff") - self.mock_layer1.initiate_multipart_upload.assert_called_with("examplevault", self.vault.DefaultPartSize, "stuff") - assert writer.vault == self.vault - assert writer.upload_id == "UPLOADID" + self.mock_layer1.initiate_multipart_upload.assert_called_with( + "examplevault", self.vault.DefaultPartSize, "stuff") + self.assertEqual(writer.vault, self.vault) + self.assertEqual(writer.upload_id, "UPLOADID") def test_delete_vault(self): self.vault.delete_archive("archive") - self.mock_layer1.delete_archive.assert_called_with("examplevault","archive") - + self.mock_layer1.delete_archive.assert_called_with("examplevault", + "archive") + def test_get_job(self): self.mock_layer1.describe_job.return_value = FIXTURE_ARCHIVE_JOB - job = self.vault.get_job("NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEArchiveId") - assert job.action == "ArchiveRetrieval" - + job = self.vault.get_job( + "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPA" + "dTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEA" + "rchiveId") + self.assertEqual(job.action, "ArchiveRetrieval") + def test_list_jobs(self): - self.mock_layer1.list_jobs.return_value = {"JobList": [FIXTURE_ARCHIVE_JOB]} + self.mock_layer1.list_jobs.return_value = { + "JobList": [FIXTURE_ARCHIVE_JOB]} jobs = self.vault.list_jobs(False, "InProgress") - self.mock_layer1.list_jobs.assert_called_with("examplevault", False, "InProgress") - assert jobs[0].archive_id == "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs01MNGntHEQL8MBfGlqrEXAMPLEArchiveId" - + self.mock_layer1.list_jobs.assert_called_with("examplevault", + False, "InProgress") + self.assertEqual(jobs[0].archive_id, + "NkbByEejwEggmBz2fTHgJrg0XBoDfjP4q6iu87-TjhqG6eGoOY9Z" + "8i1_AUyUsuhPAdTqLHy8pTl5nfCFJmDl2yEZONi5L26Omw12vcs0" + "1MNGntHEQL8MBfGlqrEXAMPLEArchiveId") + + class TestJob(GlacierLayer2Base): def setUp(self): GlacierLayer2Base.setUp(self) @@ -126,5 +141,7 @@ def setUp(self): def test_get_job_output(self): self.mock_layer1.get_job_output.return_value = "TEST_OUTPUT" self.job.get_output((0,100)) - self.mock_layer1.get_job_output.assert_called_with("examplevault", "HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5vP54ZShjoQzQVVh7vEXAMPLEjobID", (0,100)) - + self.mock_layer1.get_job_output.assert_called_with( + "examplevault", + "HkF9p6o7yjhFx-K3CGl6fuSm6VzW9T7esGQfco8nUXVYwS0jlb5gq1JZ55yHgt5vP" + "54ZShjoQzQVVh7vEXAMPLEjobID", (0,100)) From f1b007e6c8f29096e5e20266c66df873b777dc4e Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 6 Sep 2012 15:06:50 -0700 Subject: [PATCH 47/62] Remove leading whitespace in glacier modules --- boto/glacier/vault.py | 2 +- boto/glacier/writer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index a10ac2413a..f2c5ef3ecd 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -58,7 +58,7 @@ def delete(self): Delete's this vault. WARNING! """ self.layer1.delete_vault(self.name) - + def upload_archive(self, filename): """ Adds an archive to a vault. For archives greater than 100MB the diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index a62ef89c93..aca94e7676 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -76,12 +76,12 @@ def __init__(self, vault, upload_id, part_size): self.vault = vault self.upload_id = upload_id self.part_size = part_size - + self._buffer_size = 0 self._uploaded_size = 0 self._buffer = [] self._tree_hashes = [] - + self.archive_location = None self.closed = False From f117db58ae25a788a0ab522e89986cea6bded31a Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 6 Sep 2012 10:55:18 -0700 Subject: [PATCH 48/62] Reduce memory usage for chunk_hashes On a 180MB file this reduced total memory usage by approximately 40%. This was also marginally faster (but not by much). I've also added the start of unittests for the writer module, and I've written some very basic unittests for the chunk_hashes function. --- boto/glacier/writer.py | 20 +++++++++++--------- tests/unit/glacier/test_writer.py | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 tests/unit/glacier/test_writer.py diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index aca94e7676..b57723c749 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -28,15 +28,17 @@ import json -def chunk_hashes(str): - """ - Break up the byte-string into 1MB chunks and return sha256 hashes - for each. - """ - chunk = 1024 * 1024 - chunk_count = int(math.ceil(len(str) / float(chunk))) - chunks = [str[i * chunk:(i + 1) * chunk] for i in range(chunk_count)] - return [hashlib.sha256(x).digest() for x in chunks] +_ONE_MEGABYTE = 1024 * 1024 + + +def chunk_hashes(bytestring, chunk_size=_ONE_MEGABYTE): + chunk_count = int(math.ceil(len(bytestring) / float(chunk_size))) + hashes = [] + for i in xrange(chunk_count): + start = i * chunk_size + end = (i + 1) * chunk_size + hashes.append(hashlib.sha256(bytestring[start:end]).digest()) + return hashes def tree_hash(fo): diff --git a/tests/unit/glacier/test_writer.py b/tests/unit/glacier/test_writer.py new file mode 100644 index 0000000000..216429fdc0 --- /dev/null +++ b/tests/unit/glacier/test_writer.py @@ -0,0 +1,26 @@ +from hashlib import sha256 + +from tests.unit import unittest +import mock + +from boto.glacier.writer import Writer, chunk_hashes + + +class TestChunking(unittest.TestCase): + def test_chunk_hashes_exact(self): + chunks = chunk_hashes('a' * (2 * 1024 * 1024)) + self.assertEqual(len(chunks), 2) + self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest()) + + def test_chunks_with_leftovers(self): + bytestring = 'a' * (2 * 1024 * 1024 + 20) + chunks = chunk_hashes(bytestring) + self.assertEqual(len(chunks), 3) + self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest()) + self.assertEqual(chunks[1], sha256('a' * 1024 * 1024).digest()) + self.assertEqual(chunks[2], sha256('a' * 20).digest()) + + def test_less_than_one_chunk(self): + chunks = chunk_hashes('aaaa') + self.assertEqual(len(chunks), 1) + self.assertEqual(chunks[0], sha256('aaaa').digest()) From 15a370b45e422dbd551bc2aafbefc194b51eed26 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 6 Sep 2012 17:03:13 -0700 Subject: [PATCH 49/62] Allow single op threshold to be configurable This is the threshold at which the Vault class will use a multipart upload instead of a single operation upload. --- boto/glacier/vault.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index f2c5ef3ecd..d51bcbf294 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -26,10 +26,12 @@ import hashlib import os.path +_MEGABYTE = 1024 * 1024 class Vault(object): - DefaultPartSize = 4 * 1024 * 1024 # 128MB + DefaultPartSize = 4 * _MEGABYTE + SingleOperationThreshold = 100 * _MEGABYTE ResponseDataElements = (('VaultName', 'name', None), ('VaultARN', 'arn', None), @@ -70,8 +72,7 @@ def upload_archive(self, filename): :rtype: str :return: The archive id of the newly created archive """ - megabyte = 1024 * 1024 - if os.path.getsize(filename) > 100 * megabyte: + if os.path.getsize(filename) > self.SingleOperationThreshold: return self.create_archive_from_file(filename) return self._upload_archive_single_operation(filename) @@ -134,7 +135,7 @@ def create_archive_from_file(self, file=None, file_obj=None): writer = self.create_archive_writer() while True: - data = file_obj.read(1024 * 1024 * 4) + data = file_obj.read(self.DefaultPartSize) if not data: break writer.write(data) From 8cd93058131df93932d8ea99e4e78ce0695a3ad8 Mon Sep 17 00:00:00 2001 From: tedder Date: Sat, 8 Sep 2012 20:40:34 -0700 Subject: [PATCH 50/62] add connect_glacier to the main boto object. --- boto/__init__.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/boto/__init__.py b/boto/__init__.py index 98df93644b..a76cf6b9df 100644 --- a/boto/__init__.py +++ b/boto/__init__.py @@ -408,6 +408,24 @@ def connect_euca(host=None, aws_access_key_id=None, aws_secret_access_key=None, region=reg, port=port, path=path, is_secure=is_secure, **kwargs) +def connect_glacier(region=None, **kwargs): + """ + :type region: string + :param region: AWS Glacier region to connect to + + :type aws_access_key_id: string + :param aws_access_key_id: Your AWS Access Key ID + + :type aws_secret_access_key: string + :param aws_secret_access_key: Your AWS Secret Access Key + + :rtype: :class:`boto.glacier.layer2.Layer2` + :return: A connection to Amazon's Glacier Service + """ + + import boto.glacier + return boto.glacier.connect_to_region(region, **kwargs) + def connect_ec2_endpoint(url, aws_access_key_id=None, aws_secret_access_key=None, From 75c474aacc0fba1a35c2a7696aa6d45f6d424dfb Mon Sep 17 00:00:00 2001 From: Matthew Copperwaite Date: Mon, 10 Sep 2012 20:39:01 +0100 Subject: [PATCH 51/62] Added retreive-inventory function --- boto/glacier/vault.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index a10ac2413a..037379e769 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -172,6 +172,34 @@ def retrieve_archive(self, archive_id, sns_topic=None, response = self.layer1.initiate_job(self.name, job_data) return response['JobId'] + + def retrieve_inventory(self, sns_topic=None, + description=None): + """ + Initiate a inventory retrieval job to list the items in the + vault. You will need to wait for the notification from + Amazon (via SNS) before you can actually download the data, + this takes around 4 hours. + + :type description: str + :param description: An optional description for the job. + + :type sns_topic: str + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier + sends notification when the job is completed and the output + is ready for you to download. + + :rtype: :class:`boto.glacier.job.Job` + :return: A Job object representing the retrieval job. + """ + job_data = {'Type': 'inventory-retrieval'} + if sns_topic is not None: + job_data['SNSTopic'] = sns_topic + if description is not None: + job_data['Description'] = description + + response = self.layer1.initiate_job(self.name, job_data) + return response['JobId'] def delete_archive(self, archive_id): """ From 64694a65c685a0a1c708a0b69a5bead52e1817a4 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Mon, 10 Sep 2012 15:08:30 -0700 Subject: [PATCH 52/62] Allow upload_archive to accept file like objects The entire file is never read into memory, only (configurable) single chunks are read at a single time. --- boto/auth.py | 6 ++++++ boto/glacier/layer1.py | 12 +++++++++--- boto/glacier/vault.py | 14 ++++++-------- boto/glacier/writer.py | 27 +++++++++++++++++++++++++++ boto/utils.py | 18 +++++++++++------- 5 files changed, 59 insertions(+), 18 deletions(-) diff --git a/boto/auth.py b/boto/auth.py index c0ca4f1023..8e0a4c1961 100644 --- a/boto/auth.py +++ b/boto/auth.py @@ -350,6 +350,12 @@ def canonical_uri(self, http_request): return http_request.path def payload(self, http_request): + body = http_request.body + # If the body is a file like object, we can use + # boto.utils.compute_hash, which will avoid reading + # the entire body into memory. + if hasattr(body, 'seek') and hasattr(body, 'read'): + return boto.utils.compute_hash(body, hash_algorithm=sha256)[0] return sha256(http_request.body).hexdigest() def canonical_request(self, http_request): diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index ab517ba6fe..20e4fcd89b 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -22,14 +22,14 @@ # IN THE SOFTWARE. # +import os import json + import boto.glacier from boto.connection import AWSAuthConnection from .exceptions import UnexpectedHTTPResponseError from .response import GlacierResponse -#boto.set_stream_logger('glacier') - class Layer1(AWSAuthConnection): @@ -418,9 +418,15 @@ def upload_archive(self, vault_name, archive, ('Location', u'Location'), ('x-amz-sha256-tree-hash', u'TreeHash')] uri = 'vaults/%s/archives' % vault_name + try: + content_length = str(len(archive)) + except TypeError: + # If a file like object is provided, try to retrieve + # the file size via fstat. + content_length = str(os.fstat(archive.fileno()).st_size) headers = {'x-amz-content-sha256': linear_hash, 'x-amz-sha256-tree-hash': tree_hash, - 'x-amz-content-length': str(len(archive))} + 'Content-Length': content_length} if description: headers['x-amz-archive-description'] = description return self.make_request('POST', uri, headers=headers, diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index d51bcbf294..dc5cab0ed4 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -22,7 +22,8 @@ # from .job import Job -from .writer import Writer, bytes_to_hex, chunk_hashes, tree_hash +from .writer import Writer, bytes_to_hex, chunk_hashes, tree_hash, \ + compute_hashes_from_fileobj import hashlib import os.path @@ -86,13 +87,10 @@ def _upload_archive_single_operation(self, filename): :rtype: str :return: The archive id of the newly created archive """ - archive = '' - with open(filename, 'rb') as fd: - archive = fd.read() - linear_hash = hashlib.sha256(archive).hexdigest() - hex_tree_hash = bytes_to_hex(tree_hash(chunk_hashes(archive))) - response = self.layer1.upload_archive(self.name, archive, linear_hash, - hex_tree_hash) + with open(filename, 'rb') as fileobj: + linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj) + response = self.layer1.upload_archive(self.name, open(filename), linear_hash, + tree_hash) return response['ArchiveId'] def create_archive_writer(self, part_size=DefaultPartSize, diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index aca94e7676..6811912382 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -63,6 +63,33 @@ def tree_hash(fo): return hashes[0] +def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024): + """Compute the linear and tree hash from a fileobj. + + This function will compute the linear/tree hash of a fileobj + in a single pass through the fileobj. + + :param fileobj: A file like object. + + :param chunk_size: The size of the chunks to use for the tree + hash. This is also the buffer size used to read from + `fileobj`. + + :rtype: tuple + :return: A tuple of (linear_hash, tree_hash). Both hashes + are returned in hex. + + """ + linear_hash = hashlib.sha256() + chunks = [] + chunk = fileobj.read(chunk_size) + while chunk: + linear_hash.update(chunk) + chunks.append(hashlib.sha256(chunk).digest()) + chunk = fileobj.read(chunk_size) + return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks)) + + def bytes_to_hex(str): return ''.join(["%02x" % ord(x) for x in str]).strip() diff --git a/boto/utils.py b/boto/utils.py index e74ce15b88..cf24b9bd4c 100644 --- a/boto/utils.py +++ b/boto/utils.py @@ -849,14 +849,18 @@ def compute_md5(fp, buf_size=8192, size=None): plain digest as the second element and the data size as the third element. """ - m = md5() + return compute_hash(fp, buf_size, size, hash_algorithm=md5) + + +def compute_hash(fp, buf_size=8192, size=None, hash_algorithm=md5): + hash_obj = hash_algorithm() spos = fp.tell() if size and size < buf_size: s = fp.read(size) else: s = fp.read(buf_size) while s: - m.update(s) + hash_obj.update(s) if size: size -= len(s) if size <= 0: @@ -865,11 +869,11 @@ def compute_md5(fp, buf_size=8192, size=None): s = fp.read(size) else: s = fp.read(buf_size) - hex_md5 = m.hexdigest() - base64md5 = base64.encodestring(m.digest()) - if base64md5[-1] == '\n': - base64md5 = base64md5[0:-1] + hex_digest = hash_obj.hexdigest() + base64_digest = base64.encodestring(hash_obj.digest()) + if base64_digest[-1] == '\n': + base64_digest = base64_digest[0:-1] # data_size based on bytes read. data_size = fp.tell() - spos fp.seek(spos) - return (hex_md5, base64md5, data_size) + return (hex_digest, base64_digest, data_size) From de28284cf0af58ead7cb74722bcb1888c2395fa9 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Mon, 10 Sep 2012 16:20:05 -0700 Subject: [PATCH 53/62] Raise a ValueError when writing to a closed file This is consistent with other file like objects including: * file * tempfile * StringIO --- boto/glacier/writer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 3210e26c6f..42db99473a 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -142,7 +142,8 @@ def send_part(self): self._uploaded_size += len(part) def write(self, str): - assert not self.closed, "Tried to write to a Writer that is already closed!" + if self.closed: + raise ValueError("I/O operation on closed file") if str == "": return self._buffer.append(str) From 9c884d947fb01738c652e8e27a60a874f9a3988c Mon Sep 17 00:00:00 2001 From: Robie Basak Date: Sun, 16 Sep 2012 13:24:07 +0100 Subject: [PATCH 54/62] glacier: fix Vault.retrieve_archive result type retrieve_archive is documented to return a Job object, but currently it returns a job id string. As this is layer 2, it would make more sense to return a full Job object, so do this instead. --- boto/glacier/vault.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 5b4c36fe9b..c14b24e3bf 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -170,7 +170,7 @@ def retrieve_archive(self, archive_id, sns_topic=None, job_data['Description'] = description response = self.layer1.initiate_job(self.name, job_data) - return response['JobId'] + return self.get_job(response['JobId']) def retrieve_inventory(self, sns_topic=None, description=None): From 667f05a3bee66695fa88f4a44890a6f59836d0b9 Mon Sep 17 00:00:00 2001 From: Mitch Garnaat Date: Sun, 16 Sep 2012 13:02:18 -0700 Subject: [PATCH 55/62] A few PEP8 cleanups. --- boto/glacier/vault.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index c14b24e3bf..4db86dac58 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -29,6 +29,7 @@ _MEGABYTE = 1024 * 1024 + class Vault(object): DefaultPartSize = 4 * _MEGABYTE @@ -89,8 +90,8 @@ def _upload_archive_single_operation(self, filename): """ with open(filename, 'rb') as fileobj: linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj) - response = self.layer1.upload_archive(self.name, open(filename), linear_hash, - tree_hash) + response = self.layer1.upload_archive(self.name, open(filename), + linear_hash, tree_hash) return response['ArchiveId'] def create_archive_writer(self, part_size=DefaultPartSize, @@ -171,7 +172,7 @@ def retrieve_archive(self, archive_id, sns_topic=None, response = self.layer1.initiate_job(self.name, job_data) return self.get_job(response['JobId']) - + def retrieve_inventory(self, sns_topic=None, description=None): """ From 508e1c127e8523492b64ba10f658761403b8f5e1 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Mon, 17 Sep 2012 09:59:44 -0700 Subject: [PATCH 56/62] Ensure file is opened in binary mode, fixes #988 Was able to repro the issue by trying to submit a binary file, verified the fix works with binary files on windows. --- boto/glacier/vault.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 4db86dac58..48d2a40537 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -90,8 +90,9 @@ def _upload_archive_single_operation(self, filename): """ with open(filename, 'rb') as fileobj: linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj) - response = self.layer1.upload_archive(self.name, open(filename), - linear_hash, tree_hash) + fileobj.seek(0) + response = self.layer1.upload_archive(self.name, fileobj, + linear_hash, tree_hash) return response['ArchiveId'] def create_archive_writer(self, part_size=DefaultPartSize, From d632d1b3b50641d909b199df8eae8885bced7146 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Tue, 18 Sep 2012 12:24:51 -0700 Subject: [PATCH 57/62] Bug fix: import urllib The make_request method uses urllib yet it is not imported. --- boto/glacier/layer1.py | 1 + 1 file changed, 1 insertion(+) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 20e4fcd89b..2f5093619e 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -24,6 +24,7 @@ import os import json +import urllib import boto.glacier from boto.connection import AWSAuthConnection From d7057d994a9c1f68cebf9ffe89561ef7079f9d8a Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Tue, 18 Sep 2012 12:25:40 -0700 Subject: [PATCH 58/62] Bug fix: s/params_list/param_list/ --- boto/glacier/layer1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 2f5093619e..9c2047a00d 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -75,7 +75,7 @@ def make_request(self, verb, resource, headers=None, if params: param_list = [] for key, value in params: - params_list.append('%s=%s' % (urllib.quote(key), + param_list.append('%s=%s' % (urllib.quote(key), urllib.quote(value))) uri += '?' + '&'.join(param_list) response = AWSAuthConnection.make_request(self, verb, uri, From 22f960b3eb587b248652bdae4e389c8c77dd62be Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Tue, 18 Sep 2012 12:27:33 -0700 Subject: [PATCH 59/62] Bug fix: limit/marker should be keys in params dict --- boto/glacier/layer1.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 9c2047a00d..109faf884a 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -123,9 +123,9 @@ def list_vaults(self, limit=None, marker=None): """ params = {} if limit: - params['limit': limit] + params['limit'] = limit if marker: - params['marker': marker] + params['marker'] = marker return self.make_request('GET', 'vaults', params=params) def describe_vault(self, vault_name): @@ -281,9 +281,9 @@ def list_jobs(self, vault_name, completed=None, status_code=None, """ params = {} if limit: - params['limit': limit] + params['limit'] = limit if marker: - params['marker': marker] + params['marker'] = marker uri = 'vaults/%s/jobs' % vault_name return self.make_request('GET', uri, params=params) @@ -552,9 +552,9 @@ def list_multipart_uploads(self, vault_name, limit=None, marker=None): """ params = {} if limit: - params['limit': limit] + params['limit'] = limit if marker: - params['marker': marker] + params['marker'] = marker uri = 'vaults/%s/%s/multipart-uploads' % vault_name return self.make_request('GET', uri, params=params) @@ -584,9 +584,9 @@ def list_parts(self, vault_name, upload_id, limit=None, marker=None): """ params = {} if limit: - params['limit': limit] + params['limit'] = limit if marker: - params['marker': marker] + params['marker'] = marker uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('GET', uri, params=params) From 82509839eb78b3340d28d0a2dff717e6d3ba9a80 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Tue, 18 Sep 2012 12:29:44 -0700 Subject: [PATCH 60/62] Pep 8 cleanup --- boto/glacier/layer1.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 109faf884a..88d202e1c8 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -76,7 +76,7 @@ def make_request(self, verb, resource, headers=None, param_list = [] for key, value in params: param_list.append('%s=%s' % (urllib.quote(key), - urllib.quote(value))) + urllib.quote(value))) uri += '?' + '&'.join(param_list) response = AWSAuthConnection.make_request(self, verb, uri, headers=headers, @@ -382,8 +382,6 @@ def get_job_output(self, vault_name, job_id, byte_range=None): response = self.make_request('GET', uri, headers=headers, ok_responses=(200, 206), response_headers=response_headers) - # TODO not sure if we want to verify checksum in this abstraction level - # and do a retry? return response # Archives From b0e557cb020a2372de6621679af23010e1904f8d Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Tue, 18 Sep 2012 12:31:40 -0700 Subject: [PATCH 61/62] Bug fix: format strings had an extra '%s' This was for abort_multipart_upload and list_parts, and list_multipart_uploads --- boto/glacier/layer1.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/boto/glacier/layer1.py b/boto/glacier/layer1.py index 88d202e1c8..480f426b96 100644 --- a/boto/glacier/layer1.py +++ b/boto/glacier/layer1.py @@ -525,7 +525,7 @@ def abort_multipart_upload(self, vault_name, upload_id): :param upload_id: The unique ID associated with this upload operation. """ - uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('DELETE', uri, ok_responses=(204,)) def list_multipart_uploads(self, vault_name, limit=None, marker=None): @@ -553,7 +553,7 @@ def list_multipart_uploads(self, vault_name, limit=None, marker=None): params['limit'] = limit if marker: params['marker'] = marker - uri = 'vaults/%s/%s/multipart-uploads' % vault_name + uri = 'vaults/%s/multipart-uploads' % vault_name return self.make_request('GET', uri, params=params) def list_parts(self, vault_name, upload_id, limit=None, marker=None): @@ -585,7 +585,7 @@ def list_parts(self, vault_name, upload_id, limit=None, marker=None): params['limit'] = limit if marker: params['marker'] = marker - uri = 'vaults/%s/%s/multipart-uploads/%s' % (vault_name, upload_id) + uri = 'vaults/%s/multipart-uploads/%s' % (vault_name, upload_id) return self.make_request('GET', uri, params=params) def upload_part(self, vault_name, upload_id, linear_hash, From 8cae83e8be1ba4f6575097b7bb6c4bbe72d34323 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Mon, 17 Sep 2012 10:46:36 -0700 Subject: [PATCH 62/62] Add threaded multipart archive upload for glacier This will internally create a thread pool that concurrently uploads the various chunks of a file using the multipart API for glacier. When testing upload speeds, this was approximately 13 times faster than the single threaded archive upload. It also is much better at sustaining a constant upload rate, in the single threaded upload, the upload rate fluctuated quite a bit (which makes sense given that it part of the time it's reading from disk and computing hashes instead of sending data). For comparison, for a 1GB file: Operation | Time (secs) | Average Upload rate (MB/s) | --------------------------------------------------------------- s3multiput 216 6.0 glacier-single 1291 2.0 glacier-concurrent 98 11.5 --- boto/glacier/concurrent.py | 213 +++++++++++++++++++++++++++++++++++++ boto/glacier/exceptions.py | 4 + boto/glacier/vault.py | 39 +++++-- 3 files changed, 248 insertions(+), 8 deletions(-) create mode 100644 boto/glacier/concurrent.py diff --git a/boto/glacier/concurrent.py b/boto/glacier/concurrent.py new file mode 100644 index 0000000000..b993c67436 --- /dev/null +++ b/boto/glacier/concurrent.py @@ -0,0 +1,213 @@ +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import os +import math +import threading +import hashlib +import time +import logging +from Queue import Queue, Empty + +from .writer import chunk_hashes, tree_hash, bytes_to_hex +from .exceptions import UploadArchiveError + + +DEFAULT_PART_SIZE = 4 * 1024 * 1024 +_END_SENTINEL = object() +log = logging.getLogger('boto.glacier.concurrent') + + +class ConcurrentUploader(object): + """Concurrently upload an archive to glacier. + + This class uses a thread pool to concurrently upload an archive + to glacier using the multipart upload API. + + The threadpool is completely managed by this class and is + transparent to the users of this class. + + """ + def __init__(self, api, vault_name, part_size=DEFAULT_PART_SIZE, + num_threads=10): + """ + :type api: :class:`boto.glacier.layer1.Layer1` + :param api: A layer1 glacier object. + + :type vault_name: str + :param vault_name: The name of the vault. + + :type part_size: int + :param part_size: The size, in bytes, of the chunks to use when uploading + the archive parts. The part size must be a megabyte multiplied by + a power of two. + + """ + self._api = api + self._vault_name = vault_name + self._part_size = part_size + self._num_threads = num_threads + self._threads = [] + + def upload(self, filename, description=None): + """Concurrently create an archive. + + :type file: str + :param file: The filename to upload + + :type description: str + :param description: The description of the archive. + + :rtype: str + :return: The archive id of the newly created archive. + + """ + fileobj = open(filename, 'rb') + total_size = os.fstat(fileobj.fileno()).st_size + total_parts = int(math.ceil(total_size / float(self._part_size))) + hash_chunks = [None] * total_parts + worker_queue = Queue() + result_queue = Queue() + response = self._api.initiate_multipart_upload(self._vault_name, + self._part_size, + description) + upload_id = response['UploadId'] + # The basic idea is to add the chunks (the offsets not the actual + # contents) to a work queue, start up a thread pool, let the crank + # through the items in the work queue, and then place their results + # in a result queue which we use to complete the multipart upload. + self._add_work_items_to_queue(total_parts, worker_queue) + self._start_upload_threads(result_queue, upload_id, + worker_queue, filename) + try: + self._wait_for_upload_threads(hash_chunks, result_queue, total_parts) + except UploadArchiveError, e: + log.debug("An error occurred while uploading an archive, aborting " + "multipart upload.") + self._api.abort_multipart_upload(self._vault_name, upload_id) + raise e + log.debug("Completing upload.") + response = self._api.complete_multipart_upload( + self._vault_name, upload_id, bytes_to_hex(tree_hash(hash_chunks)), + total_size) + log.debug("Upload finished.") + return response['ArchiveId'] + + def _wait_for_upload_threads(self, hash_chunks, result_queue, total_parts): + for _ in xrange(total_parts): + result = result_queue.get() + if isinstance(result, Exception): + log.debug("An error was found in the result queue, terminating " + "threads: %s", result) + self._shutdown_threads() + raise UploadArchiveError("An error occurred while uploading " + "an archive: %s" % result) + # Each unit of work returns the tree hash for the given part + # number, which we use at the end to compute the tree hash of + # the entire archive. + part_number, tree_sha256 = result + hash_chunks[part_number] = tree_sha256 + self._shutdown_threads() + + def _shutdown_threads(self): + log.debug("Shutting down threads.") + for thread in self._threads: + thread.should_continue = False + for thread in self._threads: + thread.join() + log.debug("Threads have exited.") + + def _start_upload_threads(self, result_queue, upload_id, worker_queue, filename): + log.debug("Starting threads.") + for _ in xrange(self._num_threads): + thread = UploadWorkerThread(self._api, self._vault_name, filename, + upload_id, worker_queue, result_queue) + time.sleep(0.2) + thread.start() + self._threads.append(thread) + + def _add_work_items_to_queue(self, total_parts, worker_queue): + log.debug("Adding work items to queue.") + for i in xrange(total_parts): + worker_queue.put((i, self._part_size)) + for i in xrange(self._num_threads): + worker_queue.put(_END_SENTINEL) + + +class UploadWorkerThread(threading.Thread): + def __init__(self, api, vault_name, filename, upload_id, + worker_queue, result_queue, num_retries=5, + time_between_retries=5, + retry_exceptions=Exception): + threading.Thread.__init__(self) + self._api = api + self._vault_name = vault_name + self._filename = filename + self._fileobj = open(filename, 'rb') + self._worker_queue = worker_queue + self._result_queue = result_queue + self._upload_id = upload_id + self._num_retries = num_retries + self._time_between_retries = time_between_retries + self._retry_exceptions = retry_exceptions + self.should_continue = True + + def run(self): + while self.should_continue: + try: + work = self._worker_queue.get(timeout=1) + except Empty: + continue + if work is _END_SENTINEL: + return + result = self._process_chunk(work) + self._result_queue.put(result) + + def _process_chunk(self, work): + result = None + for _ in xrange(self._num_retries): + try: + result = self._upload_chunk(work) + break + except self._retry_exceptions, e: + log.error("Exception caught uploading part number %s for " + "vault %s, filename: %s", work[0], self._vault_name, + self._filename) + time.sleep(self._time_between_retries) + result = e + return result + + def _upload_chunk(self, work): + part_number, part_size = work + start_byte = part_number * part_size + self._fileobj.seek(start_byte) + contents = self._fileobj.read(part_size) + linear_hash = hashlib.sha256(contents).hexdigest() + tree_hash_bytes = tree_hash(chunk_hashes(contents)) + byte_range = (start_byte, start_byte + len(contents) - 1) + log.debug("Uploading chunk %s of size %s", part_number, part_size) + response = self._api.upload_part(self._vault_name, self._upload_id, + linear_hash, + bytes_to_hex(tree_hash_bytes), + byte_range, contents) + # Reading the response allows the connection to be reused. + response.read() + return (part_number, tree_hash_bytes) diff --git a/boto/glacier/exceptions.py b/boto/glacier/exceptions.py index 0092c62e54..3942da686a 100644 --- a/boto/glacier/exceptions.py +++ b/boto/glacier/exceptions.py @@ -40,3 +40,7 @@ def __init__(self, expected_responses, response): response.status, self.body) super(UnexpectedHTTPResponseError, self).__init__(msg) + + +class UploadArchiveError(Exception): + pass diff --git a/boto/glacier/vault.py b/boto/glacier/vault.py index 48d2a40537..4d0e072334 100644 --- a/boto/glacier/vault.py +++ b/boto/glacier/vault.py @@ -22,9 +22,8 @@ # from .job import Job -from .writer import Writer, bytes_to_hex, chunk_hashes, tree_hash, \ - compute_hashes_from_fileobj -import hashlib +from .writer import Writer, compute_hashes_from_fileobj +from .concurrent import ConcurrentUploader import os.path _MEGABYTE = 1024 * 1024 @@ -116,13 +115,13 @@ def create_archive_writer(self, part_size=DefaultPartSize, description) return Writer(self, response['UploadId'], part_size=part_size) - def create_archive_from_file(self, file=None, file_obj=None): + def create_archive_from_file(self, filename=None, file_obj=None): """ Create a new archive and upload the data from the given file or file-like object. - :type file: str - :param file: A filename to upload + :type filename: str + :param filename: A filename to upload :type file_obj: file :param file_obj: A file-like object to upload @@ -131,7 +130,7 @@ def create_archive_from_file(self, file=None, file_obj=None): :return: The archive id of the newly created archive """ if not file_obj: - file_obj = open(file, "rb") + file_obj = open(filename, "rb") writer = self.create_archive_writer() while True: @@ -142,6 +141,30 @@ def create_archive_from_file(self, file=None, file_obj=None): writer.close() return writer.get_archive_id() + def concurrent_create_archive_from_file(self, filename): + """ + Create a new archive from a file and upload the given + file. + + This is a convenience method around the + :class:`boto.glacier.concurrent.ConcurrentUploader` + class. This method will perform a multipart upload + and upload the parts of the file concurrently. + + :type filename: str + :param filename: A filename to upload + + :raises: `boto.glacier.exception.UploadArchiveError` is an error + occurs during the upload process. + + :rtype: str + :return: The archive id of the newly created archive + + """ + uploader = ConcurrentUploader(self.layer1, self.name) + archive_id = uploader.upload(filename) + return archive_id + def retrieve_archive(self, archive_id, sns_topic=None, description=None): """ @@ -175,7 +198,7 @@ def retrieve_archive(self, archive_id, sns_topic=None, return self.get_job(response['JobId']) def retrieve_inventory(self, sns_topic=None, - description=None): + description=None): """ Initiate a inventory retrieval job to list the items in the vault. You will need to wait for the notification from