Skip to content

Commit

Permalink
Merge pull request #73 from meltmedia/develop
Browse files Browse the repository at this point in the history
Update to the S3 multi-part upload
  • Loading branch information
vincentbraun authored Nov 30, 2017
2 parents 423e97b + 4e78b6b commit 4af8827
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 70 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ python:
- "2.7"
# command to install dependencies
install:
- "pip install -r requirements.txt --use-mirrors"
- pip install coveralls --use-mirrors
- "pip install -r requirements.txt"
- pip install coveralls
# command to run tests
script:
- "nosetests --with-coverage --cover-erase --cover-branches --cover-package=the_ark"
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
boto==2.29.1
coverage==3.7.1
coverage==4.4.2
flake8==2.3.0
jsonschema
mandrill==1.0.57
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def readme():

setup(
name="theark",
version="0.0.6",
version="0.0.8",
author="meltmedia QA Team",
author_email="qa-d@meltmedia.com",
description="meltQA Tools Common Library.",
Expand All @@ -23,7 +23,7 @@ def readme():
install_requires=[
"boto >= 2.29.1",
"requests >= 2.3.0",
"selenium >= 2.45.0"
"selenium >= 2.53.0"
],
classifiers=[
"Development Status :: 1 - Planning",
Expand Down
91 changes: 73 additions & 18 deletions tests/test_s3_client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import unittest
import urlparse
from the_ark.s3_client import S3Client, S3ClientException
from mock import Mock, patch
import the_ark.s3_client
from boto.s3.key import Key
from boto.s3.connection import S3ResponseError
from mock import Mock, patch
from cStringIO import StringIO

__author__ = 'chaley'
Expand All @@ -18,19 +19,19 @@ class S3InitTestCase(unittest.TestCase):
@patch('boto.s3.connection.S3Connection')
def test_class_init(self, s3con):
s3con.return_value = {}
client = S3Client(bucket)
client = the_ark.s3_client.S3Client(bucket)
self.assertIsNotNone(client)

@patch('boto.s3.connection.S3Connection')
def test_class_init_fail(self, s3con):
s3con.side_effect = Exception('Boom')
client = S3Client(bucket)
client = the_ark.s3_client.S3Client(bucket)
self.assertIsNone(client.s3_connection)


class S3MethodTestCase(unittest.TestCase):
def setUp(self):
self.client = S3Client(bucket)
self.client = the_ark.s3_client.S3Client(bucket)
self.client.s3_connection = Mock()
self.client.bucket = Mock()

Expand Down Expand Up @@ -80,7 +81,7 @@ def test_verify_file(self):
def test_verify_file_boom(self):
self.client.bucket.get_key.side_effect = Exception(
'Here Comes the Boom!')
with self.assertRaises(S3ClientException):
with self.assertRaises(the_ark.s3_client.S3ClientException):
self.client.verify_file('stuff', 'more stuff')

@patch('the_ark.s3_client.S3Client.verify_file')
Expand All @@ -98,21 +99,60 @@ def test_get_file_with_no_file(self, verify):
mock_file = Mock()
verify.return_value = False
self.client.bucket.get_key.return_value = mock_file
with self.assertRaises(S3ClientException):
with self.assertRaises(the_ark.s3_client.S3ClientException):
self.client.get_file('stuff', 'more stuff')

def test_get_file_boom(self):
self.client.bucket.get_key.side_effect = Exception(
'Here Comes the Boom!')
with self.assertRaises(S3ClientException):
with self.assertRaises(the_ark.s3_client.S3ClientException):
self.client.get_file('stuff', 'more stuff')

def test_store_file_boom(self):
with self.assertRaises(S3ClientException):
self.client.store_file('stuff', 'more stuff', 'file_name')
def test_store_file_s3_client_error(self):
with self.assertRaises(the_ark.s3_client.S3ClientException):
self.client.store_file(s3_path='path', file_to_store='file', filename="bob's file")

with self.assertRaises(S3ClientException):
self.client.store_file('stuff', 'more stuff', filename="bob's file")
@patch('the_ark.s3_client.S3Client._split_file')
@patch('shutil.rmtree')
@patch('urlparse.urlparse')
@patch('mimetypes.guess_type')
@patch('boto.s3.key.Key.set_contents_from_file')
@patch('os.path.getsize')
@patch('os.listdir')
def test_store_file_multipart_s3_error(self, listdir, get_size, set_contents, guess_type, url_parse, rmtree, split_file):
url_parse.return_value = url_parse_base.scheme, url_parse_base.netloc, url_parse_base.path, \
url_parse_base.params, url_parse_base.query, url_parse_base.fragment
guess_type.return_value("image/png")
mock_multipart_upload = Mock()
mock_file = Mock(spec_set=str)
temp_dir = Mock()
split_file.return_value = temp_dir
listdir.return_value = []
self.client.bucket.initiate_multipart_upload.return_value = mock_multipart_upload
mock_multipart_upload.complete_upload.side_effect = S3ResponseError("error", "S3 multipart exception")
get_size.return_value = the_ark.s3_client.DEFAULT_MINIMUM_SPLIT_AT_SIZE + 1000
set_contents.return_value(True)
self.client.store_file(s3_path='path', file_to_store=mock_file, filename="bob's file")
mock_multipart_upload.cancel_upload.assert_called_once_with()
rmtree.assert_called_once_with(temp_dir)

@patch('the_ark.s3_client.S3Client._split_file')
@patch('urlparse.urlparse')
@patch('mimetypes.guess_type')
@patch('boto.s3.key.Key.set_contents_from_file')
@patch('os.path.getsize')
def test_store_file_multipart_unexpected_error(self, get_size, set_contents, guess_type, url_parse, split_file):
url_parse.return_value = url_parse_base.scheme, url_parse_base.netloc, url_parse_base.path, \
url_parse_base.params, url_parse_base.query, url_parse_base.fragment
guess_type.return_value("image/png")
mock_multipart_upload = Mock()
mock_file = Mock(spec_set=str)
split_file.side_effect = the_ark.s3_client.S3ClientException("Unexpected split exception")
self.client.bucket.initiate_multipart_upload.return_value = mock_multipart_upload
get_size.return_value = the_ark.s3_client.DEFAULT_MINIMUM_SPLIT_AT_SIZE + 1000
set_contents.return_value(True)
self.client.store_file(s3_path='path', file_to_store=mock_file, filename="bob's file")
mock_multipart_upload.cancel_upload.assert_called_once_with()

@patch('urlparse.urlparse')
@patch('mimetypes.guess_type')
Expand Down Expand Up @@ -162,10 +202,25 @@ def test_get_most_recent_file_from_s3_key_list(self):
@patch('os.path.getsize')
def test_split_file_boom(self, get_size, make_dir):
make_dir.side_effect = Exception('Here Comes the Boom?')
get_size.return_value = 9999999999999
with self.assertRaises(S3ClientException):
self.client.store_file(
'stuff', "./tests/etc/test.png", return_url=False, filename="this file")
get_size.return_value = 9999999999
with self.assertRaises(the_ark.s3_client.S3ClientException):
self.client._split_file("test")

@patch('tempfile.mkdtemp')
@patch('os.path.getsize')
def test_split_file_too_large(self, get_size, make_dir):
make_dir.side_effect = Exception('Here Comes the Boom?')
get_size.return_value = (the_ark.s3_client.DEFAULT_FILE_SPLIT_SIZE * the_ark.s3_client.MAX_FILE_SPLITS) + 100
with self.assertRaises(the_ark.s3_client.S3ClientException):
self.client._split_file("test")

@patch('tempfile.mkdtemp')
@patch('os.path.getsize')
def test_split_file_too_small(self, get_size, make_dir):
make_dir.side_effect = Exception('Here Comes the Boom?')
get_size.return_value = (the_ark.s3_client.DEFAULT_FILE_SPLIT_SIZE - 100)
with self.assertRaises(the_ark.s3_client.S3ClientException):
self.client._split_file("test")

@patch('urlparse.urlparse')
@patch('mimetypes.guess_type')
Expand All @@ -176,7 +231,7 @@ def test_store_file_with_split(self, get_size, set_contents, guess_type, url_par
url_parse_sec_token.params, url_parse_sec_token.query, url_parse_sec_token.fragment
guess_type.return_value("image/png")
set_contents.return_value(True)
get_size.return_value = 9999999999999
get_size.return_value = the_ark.s3_client.DEFAULT_MINIMUM_SPLIT_AT_SIZE + 100
self.client.store_file(
'stuff', "./tests/etc/test.png", return_url=True, filename="this file")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_selenium_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def test_visible_invalid(self):

def test_get_valid(self):
valid_css_selector = ".valid"
self.assertEqual(self.sh.get_element(valid_css_selector).location, {'y': 21, 'x': 48})
self.assertEqual(self.sh.get_element(valid_css_selector).location, {'y': 21.4375, 'x': 48})

def test_get_invalid(self):
self.assertRaises(selenium_helpers.ElementError, self.sh.get_element, ".invalid")
Expand Down
111 changes: 65 additions & 46 deletions the_ark/s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@
import tempfile
import urllib
import urlparse
import logging

from boto.s3.key import Key
from StringIO import StringIO

logger = logging.getLogger(__name__)

MAX_FILE_SPLITS = 9999
DEFAULT_FILE_SPLIT_SIZE = 6291456
DEFAULT_MINIMUM_SPLIT_AT_SIZE = 20000000


class S3Client(object):
"""A client that helps user to send and get files from S3"""
Expand Down Expand Up @@ -42,7 +49,8 @@ def connect(self):
message = "Exception while connecting to S3: {0}".format(s3_connection_exception)
raise S3ClientException(message)

def store_file(self, s3_path, file_to_store, filename, return_url=False, mime_type=None, chunk_at_size=20000000):
def store_file(self, s3_path, file_to_store, filename, return_url=False, mime_type=None,
chunk_at_size=DEFAULT_MINIMUM_SPLIT_AT_SIZE):
"""
Pushes the desired file up to S3 (e.g. log file).
:param
Expand All @@ -66,30 +74,41 @@ def store_file(self, s3_path, file_to_store, filename, return_url=False, mime_ty
s3_file.set_metadata('Content-Type', mime_type)

# - Check if file is a buffer or disk file and if file that is getting uploaded is greater than
# chunk_at_size then upload cool multi style
if type(file_to_store) == str and os.path.getsize(file_to_store) > chunk_at_size:
file_count = 0
# - Split the file and get it chunky
split_file_dir = self._split_file(file_to_store)

# - Initiate the file to be uploaded in parts
# chunk_at_size then upload cool multi style
mutli_part_upload_successful = False
if isinstance(file_to_store, str) and os.path.getsize(file_to_store) > chunk_at_size:
split_file_dir = None
multipart_file = self.bucket.initiate_multipart_upload(key_name=s3_file.key, metadata=s3_file.metadata)

# - Upload the file parts
for files in os.listdir(split_file_dir):
file_count += 1
file_part = open(os.path.join(split_file_dir, files), 'rb')
multipart_file.upload_part_from_file(file_part, file_count)

# - Complete the upload
multipart_file.complete_upload()

# - Remove the folder from splitting the file
shutil.rmtree(split_file_dir)
try:
# - Split the file and get it chunky
split_file_dir = self._split_file(file_to_store)

# - Upload the file parts
file_count = 0
for files in os.listdir(split_file_dir):
file_count += 1
file_part = open(os.path.join(split_file_dir, files), 'rb')
multipart_file.upload_part_from_file(file_part, file_count)

# - Complete the upload
multipart_file.complete_upload()
mutli_part_upload_successful = True
except boto.s3.connection.S3ResponseError as s3_error:
logger.warning("A S3 Response error was caught while attempting to chunk and upload the PDF | {}\n"
"Will now attempt to send the file as a whole...".format(s3_error))
multipart_file.cancel_upload()
except Exception as s3_error:
logger.warning("Unexpected Error encountered an issue while chunking and uploading the PDF | {}\n"
"Will now attempt to send the file as a whole...".format(s3_error))
multipart_file.cancel_upload()
finally:
# - Remove the folder from splitting the file
if split_file_dir:
shutil.rmtree(split_file_dir)

# - Upload the file as a whole
else:
# - Determine whether the file_to_store is an object or file path/string
if not mutli_part_upload_successful:
file_type = type(file_to_store)
if file_type in [str, unicode]:
s3_file.set_contents_from_filename(file_to_store)
Expand Down Expand Up @@ -204,42 +223,42 @@ def get_most_recent_file_from_s3_key_list(self, key_list):
most_recent_key = key
return most_recent_key

def _split_file(self, from_file, file_chunk_size=5242880):
def _split_file(self, from_file, file_chunk_size=DEFAULT_FILE_SPLIT_SIZE):
"""
Split a given file into smaller chunks named partXXXX into a temp at a default size of ~ 5 mb. The temp
folder should be deleted after use.
Split a given file into smaller chunks named partXXXX into a temp at a default size of ~ 6 mb. The temp
folder should be deleted after use.
WARNING: You cannot split into more than 9999 files.
WARNING: You cannot split into more than 9999 files.
:param
- from_file: string - the file to split up
- file_chunk_size: int - number of Bytes each split should contain (Should be > 5 MB for Amazon S3 minimum)
:return:
- temp_dir: string - temp folder location of split file, use to iterate through the split files
"""
if os.path.getsize(from_file) > (MAX_FILE_SPLITS * file_chunk_size):
raise S3ClientException("Could not split the file.\nError: Input file is too large!\n")
elif os.path.getsize(from_file) < DEFAULT_FILE_SPLIT_SIZE:
raise S3ClientException("Could not split the file.\nError: Input file is too small!\n")

:param
- from_file: string - the file to split up
- file_chunk_size: int - the size the file should be chunked to (default ~ 5 mb for Amazon S3 minimum)
:return:
- temp_dir: string - temp folder location of split file, use to iterate through the split files
"""
try:
temp_dir = tempfile.mkdtemp()
part_number = 0
input_file = open(from_file, 'rb') # use binary mode on Windows
while 1: # eof=empty string from read
chunk = input_file.read(file_chunk_size) # get next part <= chunk size
if not chunk: break
part_number += 1
filename = os.path.join(temp_dir, ('part%04d' % part_number))
fileobject = open(filename, 'wb')
fileobject.write(chunk)
fileobject.close() # or simply open( ).write( )
input_file.close()
assert part_number <= 9999 # join sort fails if 5 digits
part_num = 0
with open(from_file, 'rb') as input_file:
chunk = input_file.read(file_chunk_size)
while chunk:
part_num += 1
open(os.path.join(temp_dir, ('part%04d' % part_num)), 'wb').write(chunk)
chunk = input_file.read(file_chunk_size)

return temp_dir
except Exception as e:
print "Could not split the file.\nError: {}\n".format(e)
raise e
raise S3ClientException("Could not split the file.\nError: {}\n".format(e))


class S3ClientException(Exception):
def __init__(self, message):
self.msg = message

def __str__(self):
return self.msg
return self.msg

0 comments on commit 4af8827

Please sign in to comment.