Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IO-507: Fixing imports with spaces #501

Merged
merged 4 commits into from
Dec 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 63 additions & 5 deletions darwin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,30 @@
SUPPORTED_EXTENSIONS = SUPPORTED_IMAGE_EXTENSIONS + SUPPORTED_VIDEO_EXTENSIONS


def is_extension_allowed(extension: str) -> bool:
def is_extension_allowed_by_filename(filename: str) -> bool:
"""
Returns whether or not the given video or image extension is allowed.

Parameters
----------
filename : str
The filename.

Returns
-------
bool
Whether or not the given extension of the filename is allowed.
"""
return any([filename.lower().endswith(ext) for ext in SUPPORTED_EXTENSIONS])


@deprecation.deprecated(deprecated_in="0.8.4", current_version=__version__)
def is_extension_allowed(extension: str) -> bool:
"""
Returns whether or not the given extension is allowed.
@Deprecated. Use is_extension_allowed_by_filename instead, and pass full filename.
This is due to the fact that some extensions now include multiple dots, e.g. .nii.gz

Parameters
----------
extension : str
Expand All @@ -66,6 +86,24 @@ def is_extension_allowed(extension: str) -> bool:
return extension.lower() in SUPPORTED_EXTENSIONS


def is_image_extension_allowed_by_filename(filename: str) -> bool:
"""
Returns whether or not the given image extension is allowed.

Parameters
----------
filename : str
The image extension.

Returns
-------
bool
Whether or not the given extension is allowed.
"""
return any([filename.lower().endswith(ext) for ext in SUPPORTED_IMAGE_EXTENSIONS])


@deprecation.deprecated(deprecated_in="0.8.4", current_version=__version__)
def is_image_extension_allowed(extension: str) -> bool:
"""
Returns whether or not the given image extension is allowed.
Expand All @@ -83,6 +121,24 @@ def is_image_extension_allowed(extension: str) -> bool:
return extension.lower() in SUPPORTED_IMAGE_EXTENSIONS


def is_video_extension_allowed_by_filename(extension: str) -> bool:
"""
Returns whether or not the given image extension is allowed.

Parameters
----------
extension : str
The image extension.

Returns
-------
bool
Whether or not the given extension is allowed.
"""
return any([extension.lower().endswith(ext) for ext in SUPPORTED_VIDEO_EXTENSIONS])


@deprecation.deprecated(deprecated_in="0.8.4", current_version=__version__)
def is_video_extension_allowed(extension: str) -> bool:
"""
Returns whether or not the given video extension is allowed.
Expand Down Expand Up @@ -210,13 +266,15 @@ def find_files(
for f in files:
path = Path(f)
if path.is_dir():
found_files.extend([f for f in path.glob(pattern) if is_extension_allowed("".join(f.suffixes))])
elif is_extension_allowed("".join(path.suffixes)):
found_files.extend([f for f in path.glob(pattern) if is_extension_allowed_by_filename(str(path))])
elif is_extension_allowed_by_filename(str(path)):
found_files.append(path)
else:
raise UnsupportedFileType(path)

return [f for f in found_files if f not in map(Path, files_to_exclude)]
files_to_exclude_full_paths = [str(Path(f)) for f in files_to_exclude]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turned into two lines for readability, because applying the str as

[f for f in found file if str(f) not in map(str, map(Path, files_to_exclude))]

is not too readable, and import this will tell us it should be.

Comparing by string, because comparing PosixPath, WindowsPath, or generally PurePath objects is quite unreliable. Iteratively comparing path.parents and then path.name works, and casting to str provides ...parents/name in a single string - which is more efficient to compare.

Comparison of parents and then name is O(n)=n*(p+1) - where p is number of parents. Comparison of string output is O(n)=1 - at least on the surface - but under the hood, we should at least assume that pathlib is relatively optimised.


return [f for f in found_files if str(f) not in files_to_exclude_full_paths]


def secure_continue_request() -> bool:
Expand Down Expand Up @@ -919,7 +977,7 @@ def get_response_content(response: Response) -> Any:

def _parse_version(data) -> dt.AnnotationFileVersion:
version_string = data.get("version", "1.0")
major, minor, suffix = re.findall("^(\d+)\.(\d+)(.*)$", version_string)[0]
major, minor, suffix = re.findall(r"^(\d+)\.(\d+)(.*)$", version_string)[0]
return dt.AnnotationFileVersion(int(major), int(minor), suffix)


Expand Down
136 changes: 136 additions & 0 deletions tests/darwin/utils/find_files_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from dataclasses import dataclass
from pathlib import Path, PosixPath
from typing import Any, Callable, Dict, List, Optional
from unittest import TestCase, skip
from unittest.mock import MagicMock, patch

from darwin.exceptions import UnsupportedFileType
from darwin.utils import (
SUPPORTED_EXTENSIONS,
SUPPORTED_IMAGE_EXTENSIONS,
SUPPORTED_VIDEO_EXTENSIONS,
find_files,
is_extension_allowed,
)


class FindFileTestCase(TestCase):
fake_invalid_files = [
"/testdir.invalidextension",
"/testdir/testdir2.invalidextension",
]
fake_supported_files = [f"testdir/testfile{ext}" for ext in SUPPORTED_EXTENSIONS]
fake_supported_files_varied_case = [f"testdir/testdir2/testfile{ext.upper()}" for ext in SUPPORTED_EXTENSIONS]
fake_files = [
"testdir/testdir2/testfile.png",
"testdir/testdir2/testfile2.png",
"testdir/testfile.png",
*fake_supported_files,
*fake_supported_files_varied_case,
]
fake_file_expected_length = len(fake_files) - len(fake_invalid_files)

def setUp(self) -> None:
return super().setUp()

def tearDown(self) -> None:
return super().tearDown()


class TestFindFiles(FindFileTestCase):
@patch("darwin.utils.is_extension_allowed_by_filename", return_value=True)
def test_find_files_returns_a_list_of_files(self, mock_is_extension_allowed):
output = find_files(self.fake_files, files_to_exclude=[], recursive=False)

self.assertIsInstance(output, list)
[self.assertIsInstance(file, Path) for file in output]

@patch("darwin.utils.is_extension_allowed_by_filename", return_value=True)
def test_find_files_excludes_files_in_excluded_list(self, mock_is_extension_allowed):
output = find_files(
self.fake_files,
files_to_exclude=[
"testdir/testdir2/testfile.png",
"testdir/testdir2/testfile2.png",
],
recursive=False,
)

self.assertEqual(len(self.fake_files) - 2, len(output))

@patch("darwin.utils.is_extension_allowed_by_filename", return_value=False)
def test_raises_error_unsupported_filetype(self, mock_is_extension_allowed):
with self.assertRaises(UnsupportedFileType):
find_files(["1"], files_to_exclude=[], recursive=False)

@patch("darwin.utils.Path", autospec=True)
@patch("darwin.utils.is_extension_allowed_by_filename")
def test_uses_correct_glob_if_recursive(self, mock_is_extension_allowed, mock_path):
mock_path.is_dir.return_value = True
mock_path.glob.return_value = ["1"]

find_files(["1"], files_to_exclude=[], recursive=True)

mock_path.return_value.glob.assert_called_once_with("**/*")

@patch("darwin.utils.Path", autospec=True)
@patch("darwin.utils.is_extension_allowed_by_filename")
def test_uses_correct_glob_if_not_recursive(self, mock_is_extension_allowed, mock_path):
mock_path.is_dir.return_value = True
mock_path.glob.return_value = ["1"]

find_files(["1"], files_to_exclude=[], recursive=False)

mock_path.return_value.glob.assert_called_once_with("*")


class TestIsExtensionAllowedByFilenameFunctions(FindFileTestCase):
@dataclass
class Dependencies:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dependency injection approach used because pytest already has is_allowed_by_filename in memory from the file header, and this guarantees a new memory pointer for each test - pytest runs 4 in parallel as default.

ieabf: Optional[Callable[[str], bool]] = None
iveabf: Optional[Callable[[str], bool]] = None
iieabf: Optional[Callable[[str], bool]] = None

def dependency_factory(self) -> Dependencies:
"""
Dependency injection factory that allows different instantions of the dependencies
to avoid race condition failures and flaky tests.

returns: Dependencies
"""
from darwin.utils import is_extension_allowed_by_filename as ieabf
from darwin.utils import is_image_extension_allowed_by_filename as iieabf
from darwin.utils import is_video_extension_allowed_by_filename as iveabf

return self.Dependencies(ieabf=ieabf, iveabf=iveabf, iieabf=iieabf)

def test_ieabf_returns_true_for_a_valid_extension(self):
valid_extensions = [*self.fake_supported_files, *self.fake_supported_files_varied_case]
results = [self.dependency_factory().ieabf(file) for file in valid_extensions]

self.assertTrue(all(results))

def test_ieabf_returns_false_for_an_invalid_extension(self):
results = [self.dependency_factory().ieabf(file) for file in self.fake_invalid_files]

self.assertFalse(all(results))

def test_iveabf_returns_true_for_a_valid_extension(self):
results = [self.dependency_factory().iveabf(file) for file in SUPPORTED_VIDEO_EXTENSIONS]

self.assertTrue(all(results))

def test_iveabf_returns_false_for_an_invalid_extension(self):
results = [self.dependency_factory().iveabf(file) for file in self.fake_invalid_files]

self.assertFalse(all(results))

def test_iieabf_returns_true_for_a_valid_extension(self):
results = [self.dependency_factory().iieabf(file) for file in SUPPORTED_IMAGE_EXTENSIONS]

self.assertTrue(all(results))

def test_iieabf_returns_false_for_an_invalid_extension(self):
results = [self.dependency_factory().iieabf(file) for file in self.fake_invalid_files]

self.assertFalse(all(results))
6 changes: 3 additions & 3 deletions tests/darwin/utils_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from unittest.mock import MagicMock, patch
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Formatted made changes to a file I saved in advertently.


from requests import Response

import darwin.datatypes as dt
Expand Down Expand Up @@ -470,11 +471,10 @@ def it_returns_json():
response: Response = Response()
response.headers["content-type"] = "application/json"
response._content = b'{"key":"a"}'
assert {'key':'a'} == get_response_content(response)
assert {"key": "a"} == get_response_content(response)

def it_returns_text():
response: Response = Response()
response.headers["content-type"] = "text/plain"
response._content = b'hello'
response._content = b"hello"
assert "hello" == get_response_content(response)