Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FSReader: add include dot files flag #314

Merged
merged 2 commits into from
Jul 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions exporters/readers/fs_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ def _get_input_files(cls, input_specification):
directory = cls._get_pointer(dir_pointer)

out.extend(cls._get_directory_files(
directory, input_unit.get('pattern')))
directory=directory,
pattern=input_unit.get('pattern'),
include_dot_files=input_unit.get('include_dot_files', False)))
else:
raise ConfigurationError('Input must only contain strings or dicts')
return out
Expand Down Expand Up @@ -145,18 +147,23 @@ def _get_pointer(cls, path_pointer):
return f.read().strip()

@classmethod
def _get_directory_files(cls, directory, pattern=None):
if pattern is None:
def filepath_matches(x):
return True
else:
filepath_matches = re.compile(pattern).search
def _get_directory_files(cls, directory, pattern=None,
include_dot_files=False):
match_funcs = []
if pattern is not None:
match_funcs.append(re.compile(pattern).search)

if not include_dot_files:
def is_non_dot_file(filepath):
return not os.path.basename(filepath).startswith('.')

match_funcs.append(is_non_dot_file)

return [
filepath
for dirpath, directories, filenames in os.walk(directory)
for filepath in (os.path.join(dirpath, f) for f in filenames)
if filepath_matches(filepath)
if all(mf(filepath) for mf in match_funcs)
]

def get_next_batch(self):
Expand Down
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[flake8]
max-line-length = 100
exclude = .git,venv
exclude = .git,venv

[pytest]
python_classes = Test* *Test
63 changes: 50 additions & 13 deletions tests/test_readers_fs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import unittest
from gzip import GzipFile

from exporters.readers import FSReader
from exporters.exceptions import ConfigurationError

Expand All @@ -7,21 +8,22 @@
import pytest


class FSReaderTest(unittest.TestCase):
def setUp(self):
self.options = {
class FSReaderTest(object):
@classmethod
def setup_class(cls):
cls.options = {
'input': {
'dir': './tests/data/fs_reader_test',
}
}

self.options_pointer = {
cls.options_pointer = {
'input': {
'dir_pointer': './tests/data/fs_reader_pointer',
}
}

self.options_empty_folder = {
cls.options_empty_folder = {
'input': {
'dir': './tests/data/fs_reader_empty_folder',
}
Expand All @@ -44,7 +46,7 @@ def test_read_from_folder(self):
]
reader = self._make_fs_reader(self.options)
batch = list(reader.get_next_batch())
self.assertEqual(expected, batch)
assert expected == batch

def test_read_from_pointer(self):
expected = [
Expand All @@ -53,12 +55,12 @@ def test_read_from_pointer(self):
]
reader = self._make_fs_reader(self.options_pointer)
batch = list(reader.get_next_batch())
self.assertEqual(expected, batch)
assert expected == batch

def test_read_from_empty_folder(self):
reader = self._make_fs_reader(self.options_empty_folder)
list(reader.get_next_batch())
self.assertTrue(reader.is_finished())
assert reader.is_finished()

def test_read_from_file(self):
reader = self._make_fs_reader({
Expand All @@ -68,7 +70,7 @@ def test_read_from_file(self):
expected = [
{u'item': u'value1'}, {u'item': u'value2'}, {u'item': u'value3'}
]
self.assertEqual(expected, batch)
assert expected == batch

def test_read_from_multiple_files(self):
reader = self._make_fs_reader({
Expand All @@ -82,7 +84,7 @@ def test_read_from_multiple_files(self):
{u'item': u'value1'}, {u'item': u'value2'}, {u'item': u'value3'},
{u'item': u'value1'}, {u'item': u'value2'}, {u'item': u'value3'},
]
self.assertEqual(expected, batch)
assert expected == batch

def test_read_from_file_and_dir(self):
reader = self._make_fs_reader({
Expand All @@ -97,7 +99,7 @@ def test_read_from_file_and_dir(self):
{u'item': u'value1'}, {u'item': u'value2'}, {u'item': u'value3'},
{u'item2': u'value1'}, {u'item2': u'value2'}, {u'item2': u'value3'},
]
self.assertEqual(expected, batch)
assert expected == batch

def test_dir_specification_no_dir_or_dir_pointer(self):
with pytest.raises(ConfigurationError) as err:
Expand All @@ -124,4 +126,39 @@ def test_dir_specification_with_pattern(self):
{u'item2': u'value1'}, {u'item2': u'value2'}, {u'item2': u'value3'},
]
batch = list(reader.get_next_batch())
self.assertEqual(expected, batch)
assert expected == batch

def test_dot_files_ignored_by_default(self, tmpdir_with_dotfiles):
reader = self._make_fs_reader({'input': {
'dir': tmpdir_with_dotfiles.strpath,
}})
assert list(reader.get_next_batch()) == [{"bar": 1}]

reader = self._make_fs_reader({'input': {
'dir': tmpdir_with_dotfiles.strpath,
'pattern': r'/\.[^/]*$',
}})
assert list(reader.get_next_batch()) == []

def test_dot_files_included_with_flag(self, tmpdir_with_dotfiles):
reader = self._make_fs_reader({'input': {
'dir': tmpdir_with_dotfiles.strpath,
'pattern': r'/\.[^/]*$',
'include_dot_files': True,
}})
assert list(reader.get_next_batch()) == [{"foo": 1}]

reader = self._make_fs_reader({'input': {
'dir': tmpdir_with_dotfiles.strpath,
'include_dot_files': True,
}})
assert list(reader.get_next_batch()) == [{"foo": 1}, {"bar": 1}]


@pytest.fixture
def tmpdir_with_dotfiles(tmpdir):
with GzipFile(tmpdir.join('.foo.jl.gz').strpath, 'w') as zf:
zf.write('{"foo": 1}')
with GzipFile(tmpdir.join('bar.jl.gz').strpath, 'w') as zf:
zf.write('{"bar": 1}')
return tmpdir