diff --git a/awscli/customizations/s3/description.py b/awscli/customizations/s3/description.py index bd7f3267923f..7c8370fea661 100644 --- a/awscli/customizations/s3/description.py +++ b/awscli/customizations/s3/description.py @@ -65,6 +65,18 @@ def add_param_descriptions(params_dict): params_dict['delete']['documents'] = "Files that exist in the " \ "destination but not in the source are deleted during sync." + params_dict['follow-symlinks']['documents'] = "Symbolic links are " \ + "followed only when uploading to S3 from the local filesystem. Note" \ + " that S3 does not support symbolic links, so the contents of the link" \ + " target are uploaded under the name of the link. When neither " \ + "``--follow-symlinks`` nor ``--no-follow-symlinks`` is specifed, " \ + " the default is to follow symlinks." + + params_dict['no-follow-symlinks']['documents'] = "Symbolic links are " \ + "ignored when uploading to S3 from the local filesystem. When neither " \ + "``--follow-symlinks`` nor ``--no-follow-symlinks`` is specifed, " \ + " the default is to follow symlinks." + params_dict['exclude']['documents'] = "Exclude all files or objects" \ " from the command that matches the specified pattern." diff --git a/awscli/customizations/s3/filegenerator.py b/awscli/customizations/s3/filegenerator.py index 53660c3d11cc..f78169c80286 100644 --- a/awscli/customizations/s3/filegenerator.py +++ b/awscli/customizations/s3/filegenerator.py @@ -31,7 +31,7 @@ class FileDecodingError(Exception): ADVICE = ( "Please check your locale settings. The filename was decoded as: %s\n" "On posix platforms, check the LC_CTYPE environment variable." - % (sys.getfilesystemencoding()) + % (sys.getfilesystemencoding()) ) def __init__(self, directory, filename): @@ -54,10 +54,11 @@ class FileGenerator(object): under the same common prefix. The generator yields corresponding ``FileInfo`` objects to send to a ``Comparator`` or ``S3Handler``. """ - def __init__(self, service, endpoint, operation_name, parameters): + def __init__(self, service, endpoint, operation_name, follow_symlinks=True): self._service = service self._endpoint = endpoint self.operation_name = operation_name + self.follow_symlinks = follow_symlinks def call(self, files): """ @@ -103,38 +104,42 @@ def list_files(self, path, dir_op): """ join, isdir, isfile = os.path.join, os.path.isdir, os.path.isfile error, listdir = os.error, os.listdir - if not dir_op: - size, last_update = get_file_stat(path) - yield path, size, last_update - else: - # We need to list files in byte order based on the full - # expanded path of the key: 'test/1/2/3.txt' However, listdir() - # will only give us contents a single directory at a time, so we'll - # get 'test'. At the same time we don't want to load the entire - # list of files into memory. This is handled by first going - # through the current directory contents and adding the directory - # separator to any directories. We can then sort the contents, - # and ensure byte order. - names = listdir(path) - self._check_paths_decoded(path, names) - for i, name in enumerate(names): - file_path = join(path, name) - if isdir(file_path): - names[i] = name + os.path.sep - names.sort() - for name in names: - file_path = join(path, name) - if isdir(file_path): - # Anything in a directory will have a prefix of this - # current directory and will come before the - # remaining contents in this directory. This means we need - # to recurse into this sub directory before yielding the - # rest of this directory's contents. - for x in self.list_files(file_path, dir_op): - yield x - else: - size, last_update = get_file_stat(file_path) - yield file_path, size, last_update + if not self.should_ignore_file(path): + if not dir_op: + size, last_update = get_file_stat(path) + yield path, size, last_update + else: + # We need to list files in byte order based on the full + # expanded path of the key: 'test/1/2/3.txt' However, + # listdir() will only give us contents a single directory + # at a time, so we'll get 'test'. At the same time we don't + # want to load the entire list of files into memory. This + # is handled by first going through the current directory + # contents and adding the directory separator to any + # directories. We can then sort the contents, + # and ensure byte order. + names = listdir(path) + self._check_paths_decoded(path, names) + for i, name in enumerate(names): + file_path = join(path, name) + if isdir(file_path): + names[i] = name + os.path.sep + names.sort() + for name in names: + file_path = join(path, name) + if not self.should_ignore_file(file_path): + if isdir(file_path): + # Anything in a directory will have a prefix of + # this current directory and will come before the + # remaining contents in this directory. This + # means we need to recurse into this sub directory + # before yielding the rest of this directory's + # contents. + for x in self.list_files(file_path, dir_op): + yield x + else: + size, last_update = get_file_stat(file_path) + yield file_path, size, last_update def _check_paths_decoded(self, path, names): # We can get a UnicodeDecodeError if we try to listdir() and @@ -147,6 +152,20 @@ def _check_paths_decoded(self, path, names): if not isinstance(name, six.text_type): raise FileDecodingError(path, name) + def should_ignore_file(self, path): + """ + This function checks whether a file should be ignored in the + file generation process. This includes symlinks that are not to be + followed. + """ + if not self.follow_symlinks: + if os.path.isdir(path) and path.endswith(os.sep): + # Trailing slash must be removed to check if it is a symlink. + path = path[:-1] + if os.path.islink(path): + return True + return False + def list_objects(self, s3_path, dir_op): """ This function yields the appropriate object or objects under a diff --git a/awscli/customizations/s3/s3.py b/awscli/customizations/s3/s3.py index ecff9a3a2077..4a9e437c6de1 100644 --- a/awscli/customizations/s3/s3.py +++ b/awscli/customizations/s3/s3.py @@ -179,9 +179,9 @@ def _create_subcommand_table(self): self._session.emit('building-operation-table.%s' % self._name, operation_table=subcommand_table, session=self._session) - subcommand_table['help'] = S3HelpCommand(self._session, self, - command_table=subcommand_table, - arg_table=None) + subcommand_table['help'] = \ + S3HelpCommand(self._session, self, + command_table=subcommand_table, arg_table=None) return subcommand_table def _get_command_usage(self, cmd_class): @@ -312,9 +312,9 @@ def _create_parameter_table(self): def _populate_parameter_table(self): parameter_table = {} for param in CMD_DICT[self._name]['params']: - parameter_table[param] = S3Parameter(param, - PARAMS_DICT[param]['options'], - PARAMS_DICT[param]['documents']) + parameter_table[param] = \ + S3Parameter(param, PARAMS_DICT[param]['options'], + PARAMS_DICT[param]['documents']) return parameter_table def _build_call_parameters(self, args, service_params): @@ -414,7 +414,8 @@ def _make_last_mod_str(self, last_mod): last_mod = parse(last_mod) last_mod = last_mod.astimezone(tzlocal()) last_mod_tup = (str(last_mod.year), str(last_mod.month).zfill(2), - str(last_mod.day).zfill(2), str(last_mod.hour).zfill(2), + str(last_mod.day).zfill(2), + str(last_mod.hour).zfill(2), str(last_mod.minute).zfill(2), str(last_mod.second).zfill(2)) last_mod_str = "%s-%s-%s %s:%s:%s" % last_mod_tup @@ -445,9 +446,11 @@ def _do_command(self, parsed_args, parsed_globals): def _build_website_configuration(self, parsed_args): website_config = {} if parsed_args.index_document is not None: - website_config['IndexDocument'] = {'Suffix': parsed_args.index_document} + website_config['IndexDocument'] = \ + {'Suffix': parsed_args.index_document} if parsed_args.error_document is not None: - website_config['ErrorDocument'] = {'Key': parsed_args.error_document} + website_config['ErrorDocument'] = \ + {'Key': parsed_args.error_document} return website_config def _get_bucket_name(self, path): @@ -559,12 +562,11 @@ def run(self): 'rb': 'remove_bucket' } operation_name = cmd_translation[paths_type][self.cmd] - file_generator = FileGenerator(self._service, self._endpoint, operation_name, - self.parameters) + self.parameters['follow_symlinks']) rev_generator = FileGenerator(self._service, self._endpoint, '', - self.parameters) + self.parameters['follow_symlinks']) taskinfo = [TaskInfo(src=files['src']['path'], src_type='s3', operation_name=operation_name, @@ -575,36 +577,35 @@ def run(self): command_dict = {} if self.cmd == 'sync': command_dict = {'setup': [files, rev_files], - 'file_generator': [file_generator, - rev_generator], - 'filters': [create_filter(self.parameters), - create_filter(self.parameters)], - 'comparator': [Comparator(self.parameters)], - 's3_handler': [s3handler]} + 'file_generator': [file_generator, + rev_generator], + 'filters': [create_filter(self.parameters), + create_filter(self.parameters)], + 'comparator': [Comparator(self.parameters)], + 's3_handler': [s3handler]} elif self.cmd == 'cp': command_dict = {'setup': [files], - 'file_generator': [file_generator], - 'filters': [create_filter(self.parameters)], - 's3_handler': [s3handler]} + 'file_generator': [file_generator], + 'filters': [create_filter(self.parameters)], + 's3_handler': [s3handler]} elif self.cmd == 'rm': command_dict = {'setup': [files], - 'file_generator': [file_generator], - 'filters': [create_filter(self.parameters)], - 's3_handler': [s3handler]} + 'file_generator': [file_generator], + 'filters': [create_filter(self.parameters)], + 's3_handler': [s3handler]} elif self.cmd == 'mv': command_dict = {'setup': [files], - 'file_generator': [file_generator], - 'filters': [create_filter(self.parameters)], - 's3_handler': [s3handler]} + 'file_generator': [file_generator], + 'filters': [create_filter(self.parameters)], + 's3_handler': [s3handler]} elif self.cmd == 'mb': command_dict = {'setup': [taskinfo], - 's3_handler': [s3handler]} + 's3_handler': [s3handler]} elif self.cmd == 'rb': command_dict = {'setup': [taskinfo], - 's3_handler': [s3handler]} + 's3_handler': [s3handler]} files = command_dict['setup'] - while self.instructions: instruction = self.instructions.pop(0) file_list = [] @@ -644,6 +645,8 @@ def __init__(self, session, cmd, parameters): self.parameters = parameters if 'dir_op' not in parameters: self.parameters['dir_op'] = False + if 'follow_symlinks' not in parameters: + self.parameters['follow_symlinks'] = True if self.cmd in ['sync', 'mb', 'rb']: self.parameters['dir_op'] = True @@ -761,7 +764,8 @@ def check_force(self, parsed_globals): bucket = find_bucket_key(self.parameters['src'][5:])[0] path = 's3://' + bucket try: - del_objects = S3SubCommand('rm', self.session, {'nargs': 1}) + del_objects = S3SubCommand('rm', self.session, + {'nargs': 1}) del_objects([path, '--recursive'], parsed_globals) except: pass @@ -774,7 +778,8 @@ def add_endpoint_url(self, parsed_globals): Adds endpoint_url to the parameters. """ if 'endpoint_url' in parsed_globals: - self.parameters['endpoint_url'] = getattr(parsed_globals, 'endpoint_url') + self.parameters['endpoint_url'] = getattr(parsed_globals, + 'endpoint_url') else: self.parameters['endpoint_url'] = None @@ -788,8 +793,8 @@ def add_verify_ssl(self, parsed_globals): # keys for help command and doc generation. CMD_DICT = {'cp': {'options': {'nargs': 2}, 'params': ['dryrun', 'quiet', 'recursive', - 'include', 'exclude', 'acl', - 'no-guess-mime-type', + 'include', 'exclude', 'acl', 'follow-symlinks', + 'no-follow-symlinks', 'no-guess-mime-type', 'sse', 'storage-class', 'grants', 'website-redirect', 'content-type', 'cache-control', 'content-disposition', @@ -797,7 +802,8 @@ def add_verify_ssl(self, parsed_globals): 'expires']}, 'mv': {'options': {'nargs': 2}, 'params': ['dryrun', 'quiet', 'recursive', - 'include', 'exclude', 'acl', + 'include', 'exclude', 'acl', 'follow-symlinks', + 'no-follow-symlinks', 'sse', 'storage-class', 'grants', 'website-redirect', 'content-type', 'cache-control', 'content-disposition', @@ -809,7 +815,8 @@ def add_verify_ssl(self, parsed_globals): 'sync': {'options': {'nargs': 2}, 'params': ['dryrun', 'delete', 'exclude', 'include', 'quiet', 'acl', 'grants', - 'no-guess-mime-type', + 'no-guess-mime-type', 'follow-symlinks', + 'no-follow-symlinks', 'sse', 'storage-class', 'content-type', 'cache-control', 'content-disposition', 'content-encoding', 'content-language', @@ -836,6 +843,11 @@ def add_verify_ssl(self, parsed_globals): 'delete': {'options': {'action': 'store_true'}}, 'quiet': {'options': {'action': 'store_true'}}, 'force': {'options': {'action': 'store_true'}}, + 'follow-symlinks': {'options': {'action': 'store_true', + 'default': True}}, + 'no-follow-symlinks': {'options': {'action': 'store_false', + 'dest': 'follow_symlinks', + 'default': True}}, 'no-guess-mime-type': {'options': {'action': 'store_false', 'dest': 'guess_mime_type', 'default': True}}, diff --git a/tests/integration/customizations/s3/test_filegenerator.py b/tests/integration/customizations/s3/test_filegenerator.py index 52e206d70dec..08850ef50b22 100644 --- a/tests/integration/customizations/s3/test_filegenerator.py +++ b/tests/integration/customizations/s3/test_filegenerator.py @@ -48,10 +48,9 @@ def test_s3_file(self): input_s3_file = {'src': {'path': self.file1, 'type': 's3'}, 'dest': {'path': 'text1.txt', 'type': 'local'}, 'dir_op': False, 'use_src_name': False} - params = {'region': 'us-east-1'} expected_file_size = 15 result_list = list( - FileGenerator(self.service, self.endpoint, '', params).call( + FileGenerator(self.service, self.endpoint, '').call( input_s3_file)) file_info = FileInfo(src=self.file1, dest='text1.txt', compare_key='text1.txt', @@ -73,9 +72,8 @@ def test_s3_directory(self): input_s3_file = {'src': {'path': self.bucket+'/', 'type': 's3'}, 'dest': {'path': '', 'type': 'local'}, 'dir_op': True, 'use_src_name': True} - params = {'region': 'us-east-1'} result_list = list( - FileGenerator(self.service, self.endpoint, '', params).call( + FileGenerator(self.service, self.endpoint, '').call( input_s3_file)) file_info = FileInfo(src=self.file2, dest='another_directory' + os.sep + 'text2.txt', @@ -106,10 +104,9 @@ def test_s3_delete_directory(self): input_s3_file = {'src': {'path': self.bucket+'/', 'type': 's3'}, 'dest': {'path': '', 'type': 'local'}, 'dir_op': True, 'use_src_name': True} - params = {'region': 'us-east-1'} result_list = list( FileGenerator(self.service, self.endpoint, - 'delete', params).call( + 'delete').call( input_s3_file)) file_info1 = FileInfo( diff --git a/tests/integration/customizations/s3/test_plugin.py b/tests/integration/customizations/s3/test_plugin.py index fd6dd0a8e84a..bd7e897338bd 100644 --- a/tests/integration/customizations/s3/test_plugin.py +++ b/tests/integration/customizations/s3/test_plugin.py @@ -156,7 +156,8 @@ def test_mv_local_to_s3(self): # When we move an object, the local file is gone: self.assertTrue(not os.path.exists(full_path)) # And now resides in s3. - self.assert_key_contents_equal(bucket_name, 'foo.txt', 'this is foo.txt') + self.assert_key_contents_equal(bucket_name, 'foo.txt', + 'this is foo.txt') def test_mv_s3_to_local(self): bucket_name = self.create_bucket() @@ -210,8 +211,10 @@ def test_mv_s3_to_s3_multipart_recursive(self): to_bucket)) self.assert_no_errors(p) # Nothing's in the from_bucket. - self.assertTrue(not self.key_exists(from_bucket, key_name='largefile')) - self.assertTrue(not self.key_exists(from_bucket, key_name='smallfile')) + self.assertTrue(not self.key_exists(from_bucket, + key_name='largefile')) + self.assertTrue(not self.key_exists(from_bucket, + key_name='smallfile')) # And both files are in the to_bucket. self.assertTrue(self.key_exists(to_bucket, key_name='largefile')) @@ -240,7 +243,8 @@ def test_mv_with_large_file(self): p = aws('s3 mv s3://%s/foo.txt %s' % (bucket_name, foo_txt)) self.assert_no_errors(p) self.assertTrue(os.path.exists(foo_txt)) - self.assertEqual(os.path.getsize(foo_txt), len(file_contents.getvalue())) + self.assertEqual(os.path.getsize(foo_txt), + len(file_contents.getvalue())) def test_mv_to_nonexistent_bucket(self): full_path = self.files.create_file('foo.txt', 'this is foo.txt') @@ -252,7 +256,8 @@ def test_cant_move_file_onto_itself_small_file(self): # immediately validate that we can't move a file onto itself. bucket_name = self.create_bucket() self.put_object(bucket_name, key_name='key.txt', contents='foo') - p = aws('s3 mv s3://%s/key.txt s3://%s/key.txt' % (bucket_name, bucket_name)) + p = aws('s3 mv s3://%s/key.txt s3://%s/key.txt' % + (bucket_name, bucket_name)) self.assertEqual(p.rc, 255) self.assertIn('Cannot mv a file onto itself', p.stderr) @@ -263,17 +268,20 @@ def test_cant_move_large_file_onto_itself(self): # not allow large files to be mv'd onto themselves. file_contents = six.BytesIO(b'a' * (1024 * 1024 * 10)) bucket_name = self.create_bucket() - self.put_object(bucket_name, key_name='key.txt', contents=file_contents) - p = aws('s3 mv s3://%s/key.txt s3://%s/key.txt' % (bucket_name, bucket_name)) + self.put_object(bucket_name, key_name='key.txt', + contents=file_contents) + p = aws('s3 mv s3://%s/key.txt s3://%s/key.txt' % + (bucket_name, bucket_name)) self.assertEqual(p.rc, 255) self.assertIn('Cannot mv a file onto itself', p.stderr) class TestRm(BaseS3CLICommand): @unittest.skipIf(platform.system() not in ['Darwin', 'Linux'], - 'Newline in filename test not valid on windows.') + 'Newline in filename test not valid on windows.') # Windows won't let you do this. You'll get: - # [Errno 22] invalid mode ('w') or filename: # 'c:\\windows\\temp\\tmp0fv8uu\\foo\r.txt' + # [Errno 22] invalid mode ('w') or filename: + # 'c:\\windows\\temp\\tmp0fv8uu\\foo\r.txt' def test_rm_with_newlines(self): bucket_name = self.create_bucket() @@ -346,7 +354,8 @@ def test_cp_s3_s3_multipart(self): file_contents = six.BytesIO(b'abcd' * (1024 * 1024 * 10)) self.put_object(from_bucket, 'foo.txt', file_contents) - p = aws('s3 cp s3://%s/foo.txt s3://%s/foo.txt' % (from_bucket, to_bucket)) + p = aws('s3 cp s3://%s/foo.txt s3://%s/foo.txt' % + (from_bucket, to_bucket)) self.assert_no_errors(p) self.assert_key_contents_equal(to_bucket, 'foo.txt', file_contents) self.assertTrue(self.key_exists(from_bucket, key_name='foo.txt')) @@ -367,7 +376,8 @@ def test_download_large_file(self): # This will force a multipart download. bucket_name = self.create_bucket() foo_contents = six.BytesIO(b'abcd' * (1024 * 1024 * 10)) - self.put_object(bucket_name, key_name='foo.txt', contents=foo_contents) + self.put_object(bucket_name, key_name='foo.txt', + contents=foo_contents) local_foo_txt = self.files.full_path('foo.txt') p = aws('s3 cp s3://%s/foo.txt %s' % (bucket_name, local_foo_txt)) self.assert_no_errors(p) @@ -375,13 +385,15 @@ def test_download_large_file(self): len(foo_contents.getvalue())) @unittest.skipIf(platform.system() not in ['Darwin', 'Linux'], - 'SIGINT not supported on Windows.') + 'SIGINT not supported on Windows.') def test_download_ctrl_c_does_not_hang(self): bucket_name = self.create_bucket() foo_contents = six.BytesIO(b'abcd' * (1024 * 1024 * 20)) - self.put_object(bucket_name, key_name='foo.txt', contents=foo_contents) + self.put_object(bucket_name, key_name='foo.txt', + contents=foo_contents) local_foo_txt = self.files.full_path('foo.txt') - process = aws('s3 cp s3://%s/foo.txt %s' % (bucket_name, local_foo_txt), wait_for_finish=False) + process = aws('s3 cp s3://%s/foo.txt %s' % + (bucket_name, local_foo_txt), wait_for_finish=False) # Give it some time to start up and enter it's main task loop. time.sleep(1) # The process has 30 seconds to finish after being sent a Ctrl+C, @@ -394,8 +406,10 @@ def test_download_ctrl_c_does_not_hang(self): break else: process.kill() - self.fail("CLI did not exist within 30 seconds of receiving a Ctrl+C") - # A Ctrl+C should have a non-zero RC. We either caught the process in + self.fail("CLI did not exist within 30 seconds of " + "receiving a Ctrl+C") + # A Ctrl+C should have a non-zero RC. + # We either caught the process in # its main polling loop (rc=1), or it was successfully terminated by # the SIGINT (rc=-2). self.assertIn(process.returncode, [1, -2]) @@ -534,19 +548,81 @@ def test_sync_with_delete_option_with_same_prefix(self): @unittest.skipIf(platform.system() not in ['Darwin', 'Linux'], 'Symlink tests only supported on mac/linux') -class TestBadSymlinks(BaseS3CLICommand): - def test_bad_symlink_stops_sync_process(self): - bucket_name = self.create_bucket() - nested_dir = os.path.join(self.files.rootdir, 'realfiles') - os.mkdir(nested_dir) - full_path = self.files.create_file(os.path.join(nested_dir, 'foo.txt'), - contents='foo.txt contents') - symlink_dir = os.path.join(self.files.rootdir, 'symlinkdir') - os.mkdir(symlink_dir) - os.symlink(full_path, os.path.join(symlink_dir, 'a-goodsymlink')) - os.symlink('non-existent-file', os.path.join(symlink_dir, 'b-badsymlink')) - os.symlink(full_path, os.path.join(symlink_dir, 'c-goodsymlink')) - p = aws('s3 sync %s s3://%s/' % (symlink_dir, bucket_name)) +class TestSymlinks(BaseS3CLICommand): + """ + This class test the ability to follow or not follow symlinks. + """ + def extra_setup(self): + self.bucket_name = self.create_bucket() + self.nested_dir = os.path.join(self.files.rootdir, 'realfiles') + os.mkdir(self.nested_dir) + self.sample_file = \ + self.files.create_file(os.path.join(self.nested_dir, 'foo.txt'), + contents='foo.txt contents') + # Create a symlink to foo.txt. + os.symlink(self.sample_file, os.path.join(self.files.rootdir, + 'a-goodsymlink')) + # Create a bad symlink. + os.symlink('non-existent-file', os.path.join(self.files.rootdir, + 'b-badsymlink')) + # Create a symlink to directory where foo.txt is. + os.symlink(self.nested_dir, os.path.join(self.files.rootdir, + 'c-goodsymlink')) + + def test_no_follow_symlinks(self): + p = aws('s3 sync %s s3://%s/ --no-follow-symlinks' % (self.files.rootdir, + self.bucket_name)) + self.assert_no_errors(p) + self.assertTrue(not self.key_exists(self.bucket_name, + 'a-goodsymlink')) + self.assertTrue(not self.key_exists(self.bucket_name, + 'b-badsymlink')) + self.assertTrue(not self.key_exists(self.bucket_name, + 'c-goodsymlink/foo.txt')) + self.assertEqual(self.get_key_contents(self.bucket_name, + key_name='realfiles/foo.txt'), + 'foo.txt contents') + + def test_follow_symlinks(self): + # Get rid of the bad symlink first. + os.remove(os.path.join(self.files.rootdir, 'b-badsymlink')) + p = aws('s3 sync %s s3://%s/ --follow-symlinks' % + (self.files.rootdir, self.bucket_name)) + self.assert_no_errors(p) + self.assertEqual(self.get_key_contents(self.bucket_name, + key_name='a-goodsymlink'), + 'foo.txt contents') + self.assertTrue(not self.key_exists(self.bucket_name, + 'b-badsymlink')) + self.assertEqual( + self.get_key_contents(self.bucket_name, + key_name='c-goodsymlink/foo.txt'), + 'foo.txt contents') + self.assertEqual(self.get_key_contents(self.bucket_name, + key_name='realfiles/foo.txt'), + 'foo.txt contents') + + def test_follow_symlinks_default(self): + # Get rid of the bad symlink first. + os.remove(os.path.join(self.files.rootdir, 'b-badsymlink')) + p = aws('s3 sync %s s3://%s/' % + (self.files.rootdir, self.bucket_name)) + self.assert_no_errors(p) + self.assertEqual(self.get_key_contents(self.bucket_name, + key_name='a-goodsymlink'), + 'foo.txt contents') + self.assertTrue(not self.key_exists(self.bucket_name, + 'b-badsymlink')) + self.assertEqual( + self.get_key_contents(self.bucket_name, + key_name='c-goodsymlink/foo.txt'), + 'foo.txt contents') + self.assertEqual(self.get_key_contents(self.bucket_name, + key_name='realfiles/foo.txt'), + 'foo.txt contents') + + def test_bad_symlink(self): + p = aws('s3 sync %s s3://%s/' % (self.files.rootdir, self.bucket_name)) self.assertEqual(p.rc, 1, p.stdout) self.assertIn('[Errno 2] No such file or directory', p.stdout) @@ -560,7 +636,8 @@ class TestUnicode(BaseS3CLICommand): def test_cp(self): bucket_name = self.create_bucket() - local_example1_txt = self.files.create_file(u'\u00e9xample.txt', 'example1 contents') + local_example1_txt = \ + self.files.create_file(u'\u00e9xample.txt', 'example1 contents') s3_example1_txt = 's3://%s/%s' % (bucket_name, os.path.basename(local_example1_txt)) local_example2_txt = self.files.full_path(u'\u00e9xample2.txt') @@ -576,8 +653,10 @@ def test_cp(self): def test_recursive_cp(self): bucket_name = self.create_bucket() - local_example1_txt = self.files.create_file(u'\u00e9xample.txt', 'example1 contents') - local_example2_txt = self.files.create_file(u'\u00e9xample2.txt', 'example2 contents') + local_example1_txt = self.files.create_file(u'\u00e9xample.txt', + 'example1 contents') + local_example2_txt = self.files.create_file(u'\u00e9xample2.txt', + 'example2 contents') p = aws('s3 cp %s s3://%s --recursive --quiet' % ( self.files.rootdir, bucket_name)) self.assert_no_errors(p) @@ -669,7 +748,9 @@ def extra_setup(self): def test_mb_rb(self): p = aws('s3 mb s3://%s' % self.bucket_name) self.assert_no_errors(p) - + + # Give the bucket time to form. + time.sleep(1) response = self.list_buckets() self.assertIn(self.bucket_name, [b['Name'] for b in response]) @@ -745,8 +826,8 @@ def assert_max_memory_used(self, process, max_mem_allowed, full_command): failure_message = ( 'Exceeded max memory allowed (%s MB) for command ' '"%s": %s MB' % (self.max_mem_allowed / 1024.0 / 1024.0, - full_command, - peak_memory / 1024.0 / 1024.0)) + full_command, + peak_memory / 1024.0 / 1024.0)) self.fail(failure_message) def test_transfer_single_large_file(self): @@ -764,14 +845,16 @@ def test_transfer_single_large_file(self): bucket_name, foo_txt) p = aws(download_full_command, collect_memory=True) self.assert_no_errors(p) - self.assert_max_memory_used(p, self.max_mem_allowed, download_full_command) + self.assert_max_memory_used(p, self.max_mem_allowed, + download_full_command) class TestWebsiteConfiguration(BaseS3CLICommand): def test_create_website_index_configuration(self): bucket_name = self.create_bucket() # Supply only --index-document argument. - full_command = 's3 website %s --index-document index.html' % (bucket_name) + full_command = 's3 website %s --index-document index.html' % \ + (bucket_name) p = aws(full_command) self.assertEqual(p.rc, 0) self.assert_no_errors(p) @@ -821,8 +904,9 @@ def test_basic_exclude_filter_for_single_file(self): def test_explicitly_exclude_single_file(self): full_path = self.files.create_file('foo.txt', 'this is foo.txt') - p = aws('s3 cp %s s3://random-bucket-name/ --dryrun --exclude foo.txt' - % full_path) + p = aws('s3 cp %s s3://random-bucket-name/' + ' --dryrun --exclude foo.txt' + % full_path) self.assert_no_files_would_be_uploaded(p) def test_cwd_doesnt_matter(self): @@ -901,7 +985,8 @@ def test_exclude_filter_with_delete(self): self.assertTrue( self.key_exists(bucket_name, key_name='bar.py'), ("The --delete flag was not applied to the receiving " - "end, the 'bar.py' file was deleted even though it was excluded.")) + "end, the 'bar.py' file was deleted even though it" + " was excluded.")) def test_exclude_filter_with_relative_path(self): # Same test as test_exclude_filter_with_delete, except we don't @@ -917,14 +1002,16 @@ def test_exclude_filter_with_relative_path(self): try: os.chdir(self.files.rootdir) # Note how we're using "." for the source directory. - p = aws("s3 sync . s3://%s/ --exclude '*.py' --delete" % bucket_name) + p = aws("s3 sync . s3://%s/ --exclude '*.py' --delete" + % bucket_name) finally: os.chdir(cwd) self.assert_no_errors(p) self.assertTrue( self.key_exists(bucket_name, key_name='bar.py'), ("The --delete flag was not applied to the receiving " - "end, the 'bar.py' file was deleted even though it was excluded.")) + "end, the 'bar.py' file was deleted even though" + " it was excluded.")) class TestFileWithSpaces(BaseS3CLICommand): diff --git a/tests/unit/customizations/s3/test_filegenerator.py b/tests/unit/customizations/s3/test_filegenerator.py index 32163bcabb24..8efa04a893c9 100644 --- a/tests/unit/customizations/s3/test_filegenerator.py +++ b/tests/unit/customizations/s3/test_filegenerator.py @@ -11,7 +11,8 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os -from awscli.testutils import unittest +import platform +from awscli.testutils import unittest, FileCreator import tempfile import shutil @@ -30,10 +31,12 @@ class LocalFileGeneratorTest(unittest.TestCase): def setUp(self): - self.local_file = six.text_type(os.path.abspath('.') + os.sep + 'some_directory' \ - + os.sep + 'text1.txt') - self.local_dir = six.text_type(os.path.abspath('.') + os.sep + 'some_directory' \ - + os.sep) + self.local_file = six.text_type(os.path.abspath('.') + + os.sep + 'some_directory' + + os.sep + 'text1.txt') + self.local_dir = six.text_type(os.path.abspath('.') + + os.sep + 'some_directory' + + os.sep) self.session = FakeSession() self.service = self.session.get_service('s3') self.endpoint = self.service.get_endpoint('us-east-1') @@ -53,7 +56,7 @@ def test_local_file(self): 'dir_op': False, 'use_src_name': False} params = {'region': 'us-east-1'} files = FileGenerator(self.service, - self.endpoint, '', params).call(input_local_file) + self.endpoint, '').call(input_local_file) result_list = [] for filename in files: result_list.append(filename) @@ -78,8 +81,8 @@ def test_local_directory(self): 'type': 's3'}, 'dir_op': True, 'use_src_name': True} params = {'region': 'us-east-1'} - files = FileGenerator(self.service, self.endpoint, - '', params).call(input_local_dir) + files = FileGenerator(self.service, + self.endpoint,'').call(input_local_dir) result_list = [] for filename in files: result_list.append(filename) @@ -105,6 +108,180 @@ def test_local_directory(self): compare_files(self, result_list[i], ref_list[i]) +@unittest.skipIf(platform.system() not in ['Darwin', 'Linux'], + 'Symlink tests only supported on mac/linux') +class TestIgnoreFilesLocally(unittest.TestCase): + """ + This class tests the ability to ignore particular files. This includes + skipping symlink when desired. + """ + def setUp(self): + self.session = FakeSession() + self.service = self.session.get_service('s3') + self.endpoint = self.service.get_endpoint('us-east-1') + self.files = FileCreator() + + def tearDown(self): + self.files.remove_all() + + def test_bad_symlink(self): + path = os.path.join(self.files.rootdir, 'badsymlink') + os.symlink('non-existent-file', path) + filegenerator = FileGenerator(self.service, self.endpoint, + '', True) + self.assertFalse(filegenerator.should_ignore_file(path)) + + def test_skip_symlink(self): + filename = 'foo.txt' + self.files.create_file(os.path.join(self.files.rootdir, + filename), + contents='foo.txt contents') + sym_path = os.path.join(self.files.rootdir, 'symlink') + os.symlink(filename, sym_path) + filegenerator = FileGenerator(self.service, self.endpoint, + '', False) + self.assertTrue(filegenerator.should_ignore_file(sym_path)) + + def test_no_skip_symlink(self): + filename = 'foo.txt' + path = self.files.create_file(os.path.join(self.files.rootdir, + filename), + contents='foo.txt contents') + sym_path = os.path.join(self.files.rootdir, 'symlink') + os.symlink(path, sym_path) + filegenerator = FileGenerator(self.service, self.endpoint, + '', True) + self.assertFalse(filegenerator.should_ignore_file(sym_path)) + self.assertFalse(filegenerator.should_ignore_file(path)) + + def test_no_skip_symlink_dir(self): + filename = 'dir' + path = os.path.join(self.files.rootdir, 'dir/') + os.mkdir(path) + sym_path = os.path.join(self.files.rootdir, 'symlink') + os.symlink(path, sym_path) + filegenerator = FileGenerator(self.service, self.endpoint, + '', True) + self.assertFalse(filegenerator.should_ignore_file(sym_path)) + self.assertFalse(filegenerator.should_ignore_file(path)) + + +@unittest.skipIf(platform.system() not in ['Darwin', 'Linux'], + 'Symlink tests only supported on mac/linux') +class TestSymlinksIgnoreFiles(unittest.TestCase): + """ + This class tests the ability to list out the correct local files + depending on if symlinks are being followed. Also tests to ensure + broken symlinks fail. + """ + def setUp(self): + self.session = FakeSession() + self.service = self.session.get_service('s3') + self.endpoint = self.service.get_endpoint('us-east-1') + self.files = FileCreator() + # List of local filenames. + self.filenames = [] + self.root = self.files.rootdir + self.bucket = 'bucket/' + filename_1 = self.files.create_file('foo.txt', + contents='foo.txt contents') + self.filenames.append(filename_1) + nested_dir = os.path.join(self.root, 'realfiles') + os.mkdir(nested_dir) + filename_2 = self.files.create_file(os.path.join(nested_dir, + 'bar.txt'), + contents='bar.txt contents') + self.filenames.append(filename_2) + # Names of symlinks. + self.symlinks = [] + # Names of files if symlinks are followed. + self.symlink_files = [] + # Create symlink to file foo.txt. + symlink_1 = os.path.join(self.root, 'symlink_1') + os.symlink(filename_1, symlink_1) + self.symlinks.append(symlink_1) + self.symlink_files.append(symlink_1) + # Create a symlink to a file that does not exist. + symlink_2 = os.path.join(self.root, 'symlink_2') + os.symlink('non-existent-file', symlink_2) + self.symlinks.append(symlink_2) + # Create a symlink to directory realfiles + symlink_3 = os.path.join(self.root, 'symlink_3') + os.symlink(nested_dir, symlink_3) + self.symlinks.append(symlink_3) + self.symlink_files.append(os.path.join(symlink_3, 'bar.txt')) + + def tearDown(self): + self.files.remove_all() + + def test_no_follow_symlink(self): + abs_root = six.text_type(os.path.abspath(self.root) + os.sep) + input_local_dir = {'src': {'path': abs_root, + 'type': 'local'}, + 'dest': {'path': self.bucket, + 'type': 's3'}, + 'dir_op': True, 'use_src_name': True} + file_infos = FileGenerator(self.service, self.endpoint, + '', False).call(input_local_dir) + self.filenames.sort() + result_list = [] + for file_info in file_infos: + result_list.append(getattr(file_info, 'src')) + self.assertEqual(len(result_list), len(self.filenames)) + # Just check to make sure the right local files are generated. + for i in range(len(result_list)): + filename = six.text_type(os.path.abspath(self.filenames[i])) + self.assertEqual(result_list[i], filename) + + def test_follow_bad_symlink(self): + """ + This tests to make sure it fails when following bad symlinks. + """ + abs_root = six.text_type(os.path.abspath(self.root) + os.sep) + input_local_dir = {'src': {'path': abs_root, + 'type': 'local'}, + 'dest': {'path': self.bucket, + 'type': 's3'}, + 'dir_op': True, 'use_src_name': True} + file_infos = FileGenerator(self.service, self.endpoint, + '', True).call(input_local_dir) + result_list = [] + rc = 0 + try: + for file_info in file_infos: + result_list.append(getattr(file_info, 'src')) + rc = 1 + except OSError as e: + pass + # Error shows up as ValueError in Python 3. + except ValueError as e: + pass + self.assertEquals(0, rc) + + + def test_follow_symlink(self): + # First remove the bad symlink. + os.remove(os.path.join(self.root, 'symlink_2')) + abs_root = six.text_type(os.path.abspath(self.root) + os.sep) + input_local_dir = {'src': {'path': abs_root, + 'type': 'local'}, + 'dest': {'path': self.bucket, + 'type': 's3'}, + 'dir_op': True, 'use_src_name': True} + file_infos = FileGenerator(self.service, self.endpoint, + '', True).call(input_local_dir) + all_filenames = self.filenames + self.symlink_files + all_filenames.sort() + result_list = [] + for file_info in file_infos: + result_list.append(getattr(file_info, 'src')) + self.assertEqual(len(result_list), len(all_filenames)) + # Just check to make sure the right local files are generated. + for i in range(len(result_list)): + filename = six.text_type(os.path.abspath(all_filenames[i])) + self.assertEqual(result_list[i], filename) + + class TestListFilesLocally(unittest.TestCase): maxDiff = None @@ -117,7 +294,7 @@ def tearDown(self): @mock.patch('os.listdir') def test_error_raised_on_decoding_error(self, listdir_mock): # On Python3, sys.getdefaultencoding - file_generator = FileGenerator(None, None, None, None) + file_generator = FileGenerator(None, None, None) # utf-8 encoding for U+2713. listdir_mock.return_value = [b'\xe2\x9c\x93'] with self.assertRaises(FileDecodingError): @@ -132,7 +309,7 @@ def test_list_files_is_in_sorted_order(self): os.mkdir(p(self.directory, 'test')) open(p(self.directory, 'test', 'foo.txt'), 'w').close() - file_generator = FileGenerator(None, None, None, None) + file_generator = FileGenerator(None, None, None) values = list(el[0] for el in file_generator.list_files( self.directory, dir_op=True)) self.assertEqual(values, list(sorted(values))) @@ -150,7 +327,7 @@ def test_list_local_files_with_unicode_chars(self): open(p(self.directory, u'a\u0300a', u'z'), 'w').close() open(p(self.directory, u'a\u0300a', u'\u00e6'), 'w').close() - file_generator = FileGenerator(None, None, None, None) + file_generator = FileGenerator(None, None, None) values = list(el[0] for el in file_generator.list_files( self.directory, dir_op=True)) expected_order = [os.path.join(self.directory, el) for el in [ @@ -188,7 +365,8 @@ def test_s3_file(self): 'dest': {'path': 'text1.txt', 'type': 'local'}, 'dir_op': False, 'use_src_name': False} params = {'region': 'us-east-1'} - files = FileGenerator(self.service, self.endpoint, '', params).call(input_s3_file) + files = FileGenerator(self.service, self.endpoint, + '').call(input_s3_file) result_list = [] for filename in files: result_list.append(filename) @@ -215,12 +393,14 @@ def test_s3_directory(self): 'dest': {'path': '', 'type': 'local'}, 'dir_op': True, 'use_src_name': True} params = {'region': 'us-east-1'} - files = FileGenerator(self.service, self.endpoint, '', params).call(input_s3_file) + files = FileGenerator(self.service, self.endpoint, + '').call(input_s3_file) result_list = [] for filename in files: result_list.append(filename) file_info = FileInfo(src=self.file2, - dest='another_directory' + os.sep + 'text2.txt', + dest='another_directory' + os.sep + + 'text2.txt', compare_key='another_directory/text2.txt', size=result_list[0].size, last_update=result_list[0].last_update, @@ -250,9 +430,8 @@ def test_s3_delete_directory(self): input_s3_file = {'src': {'path': self.bucket + '/', 'type': 's3'}, 'dest': {'path': '', 'type': 'local'}, 'dir_op': True, 'use_src_name': True} - params = {'region': 'us-east-1'} - files = FileGenerator(self.service, self.endpoint, 'delete', params).call( - input_s3_file) + files = FileGenerator(self.service, self.endpoint, + 'delete').call(input_s3_file) result_list = [] for filename in files: result_list.append(filename) diff --git a/tests/unit/customizations/s3/test_s3.py b/tests/unit/customizations/s3/test_s3.py index a160d9a53493..50839f35c86d 100644 --- a/tests/unit/customizations/s3/test_s3.py +++ b/tests/unit/customizations/s3/test_s3.py @@ -184,7 +184,8 @@ def test_run_cp_put(self): params = {'dir_op': False, 'dryrun': True, 'quiet': False, 'src': local_file, 'dest': s3_file, 'filters': filters, 'paths_type': 'locals3', 'region': 'us-east-1', - 'endpoint_url': None, 'verify_ssl': None} + 'endpoint_url': None, 'verify_ssl': None, + 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -199,7 +200,8 @@ def test_error_on_same_line_as_status(self): params = {'dir_op': False, 'dryrun': False, 'quiet': False, 'src': local_file, 'dest': s3_file, 'filters': filters, 'paths_type': 'locals3', 'region': 'us-east-1', - 'endpoint_url': None, 'verify_ssl': None} + 'endpoint_url': None, 'verify_ssl': None, + 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -221,7 +223,8 @@ def test_run_cp_get(self): params = {'dir_op': False, 'dryrun': True, 'quiet': False, 'src': s3_file, 'dest': local_file, 'filters': filters, 'paths_type': 's3local', 'region': 'us-east-1', - 'endpoint_url': None, 'verify_ssl': None} + 'endpoint_url': None, 'verify_ssl': None, + 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -237,7 +240,8 @@ def test_run_cp_copy(self): params = {'dir_op': False, 'dryrun': True, 'quiet': False, 'src': s3_file, 'dest': s3_file, 'filters': filters, 'paths_type': 's3s3', 'region': 'us-east-1', - 'endpoint_url': None, 'verify_ssl': None} + 'endpoint_url': None, 'verify_ssl': None, + 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -253,7 +257,8 @@ def test_run_mv(self): params = {'dir_op': False, 'dryrun': True, 'quiet': False, 'src': s3_file, 'dest': s3_file, 'filters': filters, 'paths_type': 's3s3', 'region': 'us-east-1', - 'endpoint_url': None, 'verify_ssl': None} + 'endpoint_url': None, 'verify_ssl': None, + 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'mv', params) cmd_arc.create_instructions() cmd_arc.run() @@ -269,7 +274,8 @@ def test_run_remove(self): params = {'dir_op': False, 'dryrun': True, 'quiet': False, 'src': s3_file, 'dest': s3_file, 'filters': filters, 'paths_type': 's3', 'region': 'us-east-1', - 'endpoint_url': None, 'verify_ssl': None} + 'endpoint_url': None, 'verify_ssl': None, + 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'rm', params) cmd_arc.create_instructions() cmd_arc.run() @@ -289,7 +295,8 @@ def test_run_sync(self): params = {'dir_op': True, 'dryrun': True, 'quiet': False, 'src': local_dir, 'dest': s3_prefix, 'filters': filters, 'paths_type': 'locals3', 'region': 'us-east-1', - 'endpoint_url': None, 'verify_ssl': None} + 'endpoint_url': None, 'verify_ssl': None, + 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'sync', params) cmd_arc.create_instructions() cmd_arc.run() @@ -304,7 +311,7 @@ def test_run_mb(self): params = {'dir_op': True, 'dryrun': True, 'quiet': False, 'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3', 'region': 'us-east-1', 'endpoint_url': None, - 'verify_ssl': None} + 'verify_ssl': None, 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'mb', params) cmd_arc.create_instructions() cmd_arc.run() @@ -319,7 +326,7 @@ def test_run_rb(self): params = {'dir_op': True, 'dryrun': True, 'quiet': False, 'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3', 'region': 'us-east-1', 'endpoint_url': None, - 'verify_ssl': None} + 'verify_ssl': None, 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'rb', params) cmd_arc.create_instructions() rc = cmd_arc.run() @@ -335,7 +342,7 @@ def test_run_rb_nonzero_rc(self): params = {'dir_op': True, 'dryrun': False, 'quiet': False, 'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3', 'region': 'us-east-1', 'endpoint_url': None, - 'verify_ssl': None} + 'verify_ssl': None, 'follow_symlinks': True} cmd_arc = CommandArchitecture(self.session, 'rb', params) cmd_arc.create_instructions() rc = cmd_arc.run() diff --git a/tests/unit/test_completer.py b/tests/unit/test_completer.py index 82a2d3392120..d4e79e52533b 100644 --- a/tests/unit/test_completer.py +++ b/tests/unit/test_completer.py @@ -69,12 +69,14 @@ '--recursive', '--website-redirect', '--quiet', '--acl', '--storage-class', '--sse', '--exclude', '--include', + '--follow-symlinks', '--no-follow-symlinks', '--cache-control', '--content-type', '--content-disposition', '--content-encoding', '--content-language', '--expires', '--grants'] + GLOBALOPTS)), ('aws s3 cp --quiet -', -1, set(['--no-guess-mime-type', '--dryrun', '--recursive', '--content-type', + '--follow-symlinks', '--no-follow-symlinks', '--content-disposition', '--cache-control', '--content-encoding', '--content-language', '--expires', '--website-redirect', '--acl',