Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to_checksums with None values in dicts and recursion #4579

Open
wants to merge 5 commits into
base: 5.0.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 68 additions & 30 deletions easybuild/framework/easyconfig/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,33 +510,51 @@ def to_dependencies(dep_list):
return [to_dependency(dep) for dep in dep_list]


def to_checksums(checksums):
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
res = []
for checksum in checksums:
# each list entry can be:
# * None (indicates no checksum)
# * a string (SHA256 checksum)
# * a tuple with 2 elements: checksum type + checksum value
# * a list of checksums (i.e. multiple checksums for a single file)
# * a dict (filename to checksum mapping)
if isinstance(checksum, str):
res.append(checksum)
elif isinstance(checksum, (list, tuple)):
# 2 elements + only string/int values => a checksum tuple
if len(checksum) == 2 and all(isinstance(x, (str, int)) for x in checksum):
res.append(tuple(checksum))
def _to_checksum(checksum, list_level=0, allow_dict=True):
"""Ensure the correct element type for each checksum in the checksum list"""
# each entry can be:
# * None (indicates no checksum)
# * a string (SHA256 checksum)
# * a list or tuple with 2 elements: checksum type + checksum value
# * a list or tuple of checksums (i.e. multiple checksums for a single file)
# * a dict (filename to checksum mapping)
if checksum is None or isinstance(checksum, str):
return checksum
elif isinstance(checksum, (list, tuple)):
if len(checksum) == 2 and isinstance(checksum[0], str) and isinstance(checksum[1], (str, int)):
# 2 elements so either:
# - a checksum tuple (2nd element string or int)
# - 2 alternative checksums (tuple)
# - 2 checksums that must each match (list)
# --> Convert to tuple only if we can exclude the 3rd case
if not isinstance(checksum[1], str) or list_level > 0:
return tuple(checksum)
else:
res.append(to_checksums(checksum))
elif isinstance(checksum, dict):
validated_dict = {}
for key, value in checksum.items():
validated_dict[key] = to_checksums(value)
res.append(validated_dict)
else:
res.append(checksum)
return checksum
elif list_level < 2:
# Alternative checksums or multiple checksums for a single file
# Allowed to nest (at most) 2 times, e.g. [[[type, value]]] == [[(type, value)]]
# None is not allowed here
if any(x is None for x in checksum):
raise ValueError('Unexpected None in ' + str(checksum))
if isinstance(checksum, tuple) or list_level > 0:
# When we already are in a tuple no further recursion is allowed -> set list_level very high
return tuple(_to_checksum(x, list_level=99, allow_dict=allow_dict) for x in checksum)
else:
return list(_to_checksum(x, list_level=list_level+1, allow_dict=allow_dict) for x in checksum)
elif isinstance(checksum, dict) and allow_dict:
return {key: _to_checksum(value, allow_dict=False) for key, value in checksum.items()}

return res
# Not returned -> Wrong type/format
raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum)))


def to_checksums(checksums):
"""Ensure correct element types for list of checksums: convert list elements to tuples."""
try:
return [_to_checksum(checksum) for checksum in checksums]
except ValueError as e:
raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e)


def ensure_iterable_license_specs(specs):
Expand Down Expand Up @@ -613,19 +631,39 @@ def ensure_iterable_license_specs(specs):
}))
# checksums is a list of checksums, one entry per file (source/patch)
# each entry can be:
# None
# a single checksum value (string)
# a single checksum value of a specified type (2-tuple, 1st element is checksum type, 2nd element is checksum)
# a list of checksums (of different types, perhaps different formats), which should *all* be valid
# a dictionary with a mapping from filename to checksum value
CHECKSUM_LIST = (list, as_hashable({'elem_types': [str, tuple, STRING_DICT]}))
CHECKSUMS = (list, as_hashable({'elem_types': [str, tuple, STRING_DICT, CHECKSUM_LIST]}))
# a tuple of checksums (of different types, perhaps different formats), where one should be valid
# a dictionary with a mapping from filename to checksum (None, value, type&value, alternatives)

# Type & value, value may be an int for type "size"
# This is a bit too permissive as it allows the first element to be an int and doesn't restrict the number of elements
CHECKSUM_AND_TYPE = (tuple, as_hashable({'elem_types': [str, int]}))
CHECKSUM_LIST = (list, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE]}))
CHECKSUM_TUPLE = (tuple, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE]}))
CHECKSUM_DICT = (dict, as_hashable(
{
'elem_types': [type(None), str, CHECKSUM_AND_TYPE, CHECKSUM_TUPLE, CHECKSUM_LIST],
'key_types': [str],
}
))
# At the top-level we allow tuples/lists containing a dict
CHECKSUM_LIST_W_DICT = (list, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE, CHECKSUM_DICT]}))
CHECKSUM_TUPLE_W_DICT = (tuple, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE, CHECKSUM_DICT]}))

CHECKSUMS = (list, as_hashable({'elem_types': [type(None), str, CHECKSUM_AND_TYPE,
CHECKSUM_LIST_W_DICT, CHECKSUM_TUPLE_W_DICT, CHECKSUM_DICT]}))

CHECKABLE_TYPES = [CHECKSUM_LIST, CHECKSUMS, DEPENDENCIES, DEPENDENCY_DICT, LIST_OF_STRINGS,
CHECKABLE_TYPES = [CHECKSUM_AND_TYPE, CHECKSUM_LIST, CHECKSUM_TUPLE,
CHECKSUM_LIST_W_DICT, CHECKSUM_TUPLE_W_DICT, CHECKSUM_DICT, CHECKSUMS,
DEPENDENCIES, DEPENDENCY_DICT, LIST_OF_STRINGS,
SANITY_CHECK_PATHS_DICT, SANITY_CHECK_PATHS_ENTRY, STRING_DICT, STRING_OR_TUPLE_LIST,
STRING_OR_TUPLE_DICT, STRING_OR_TUPLE_OR_DICT_LIST, TOOLCHAIN_DICT, TUPLE_OF_STRINGS]

# easy types, that can be verified with isinstance
EASY_TYPES = [str, bool, dict, int, list, str, tuple]
EASY_TYPES = [str, bool, dict, int, list, str, tuple, type(None)]

# type checking is skipped for easyconfig parameters names not listed in PARAMETER_TYPES
PARAMETER_TYPES = {
Expand Down
142 changes: 120 additions & 22 deletions test/framework/type_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,16 +171,18 @@ def test_check_type_of_param_value_sanity_check_paths(self):
out = {'files': ['bin/foo', ('bin/bar', 'bin/baz')], 'dirs': [('lib', 'lib64', 'lib32')]}
self.assertEqual(check_type_of_param_value('sanity_check_paths', inp, auto_convert=True), (True, out))

def test_check_type_of_param_value_checksums(self):
"""Test check_type_of_param_value function for checksums."""

md5_checksum = 'fa618be8435447a017fd1bf2c7ae9224'
sha256_checksum1 = 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265'
sha256_checksum2 = 'b5f9cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551'
sha256_checksum3 = '033be54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde'

@staticmethod
def get_valid_checksums_values():
"""Return list of values valid for the 'checksums' EC parameter"""

# Using (actually invalid) prefix to better detect those in case of errors
md5_checksum = 'md518be8435447a017fd1bf2c7ae9224'
sha256_checksum1 = 'sha18be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265'
sha256_checksum2 = 'sha2cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551'
sha256_checksum3 = 'sha3e54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde'
filesize = 45617379
# valid values for 'checksums' easyconfig parameters
inputs = [
return [
[],
# single checksum (one file)
[md5_checksum],
Expand All @@ -190,6 +192,7 @@ def test_check_type_of_param_value_checksums(self):
# one checksum of specific type (as 2-tuple)
[('md5', md5_checksum)],
[('sha256', sha256_checksum1)],
[('size', filesize)],
# alternative checksums for a single file (n-tuple)
[(sha256_checksum1, sha256_checksum2)],
[(sha256_checksum1, sha256_checksum2, sha256_checksum3)],
Expand All @@ -213,17 +216,37 @@ def test_check_type_of_param_value_checksums(self):
# two checksums for a single file, *both* should match
[sha256_checksum1, md5_checksum],
# three checksums for a single file, *all* should match
[sha256_checksum1, ('md5', md5_checksum), {'foo.txt': sha256_checksum1}],
[sha256_checksum1, ('md5', md5_checksum), ('size', filesize)],
# single checksum for a single file
sha256_checksum1,
# filename-to-checksum mapping
{'foo.txt': sha256_checksum1, 'bar.txt': sha256_checksum2},
{'foo.txt': sha256_checksum1, 'bar.txt': sha256_checksum2, 'baz.txt': ('size', filesize)},
# 3 alternative checksums for a single file, one match is sufficient
(sha256_checksum1, sha256_checksum2, sha256_checksum3),
]
# two alternative checksums for a single file (not to be confused by checksum-type & -value tuple)
(sha256_checksum1, md5_checksum),
# three alternative checksums for a single file of different types
(sha256_checksum1, ('md5', md5_checksum), ('size', filesize)),
# alternative checksums in dicts are also allowed
{'foo.txt': (sha256_checksum2, sha256_checksum3), 'bar.txt': (sha256_checksum1, md5_checksum)},
# Same but with lists -> all must match for each file
{'foo.txt': [sha256_checksum2, sha256_checksum3], 'bar.txt': [sha256_checksum1, md5_checksum]},
],
# None is allowed, meaning skip the checksum
[
None,
# Also in mappings
{'foo.txt': sha256_checksum1, 'bar.txt': None},
],
]
for inp in inputs:
self.assertEqual(check_type_of_param_value('checksums', inp), (True, inp))

def test_check_type_of_param_value_checksums(self):
"""Test check_type_of_param_value function for checksums."""

for inp in TypeCheckingTest.get_valid_checksums_values():
type_ok, newval = check_type_of_param_value('checksums', inp)
self.assertIs(type_ok, True, 'Failed for ' + str(inp))
self.assertEqual(newval, inp)

def test_check_type_of_param_value_patches(self):
"""Test check_type_of_param_value function for patches."""
Expand Down Expand Up @@ -705,19 +728,94 @@ def test_to_sanity_check_paths_dict(self):

def test_to_checksums(self):
"""Test to_checksums function."""
# Some hand-crafted examples. Only the types are important, values are for easier verification
test_inputs = [
['be662daa971a640e40be5c804d9d7d10'],
['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')],
[['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]],
[('md5', 'be662daa971a640e40be5c804d9d7d10')],
['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'),
('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
('size', 273)],
['checksumvalue'],
[('md5', 'md5checksumvalue')],
['file_1_checksum', ('md5', 'file_2_md5_checksum')],
# One checksum per file, some with checksum type
[
'be662daa971a640e40be5c804d9d7d10',
('adler32', '0x998410035'),
('crc32', '0x1553842328'),
('md5', 'be662daa971a640e40be5c804d9d7d10'),
('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'),
# int type as the 2nd value
('size', 273),
],
# None values should not be filtered out, but left in place
[None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None],
[None, 'checksum', None],
# Alternative checksums, not to be confused with multiple checksums for a file
[('main_checksum', 'alternative_checksum')],
[('1st_of_3', '2nd_of_3', '3rd_of_3')],
# Lists must be kept: This means all must match
[['checksum_1_in_list']],
[['checksum_must_match', 'this_must_also_match']],
[['1st_of_3_list', '2nd_of_3_list', '3rd_of_3_list']],
# Alternative checksums with types
[
(('adler32', '1st_adler'), ('crc32', '1st_crc')),
(('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')),
],
# Entries can be dicts even containing `None`
[
{
'src-arm.tgz': 'arm_checksum',
'src-x86.tgz': ('mainchecksum', 'altchecksum'),
'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')),
'git-clone.tgz': None,
},
{
'src': ['checksum_must_match', 'this_must_also_match']
},
# 2nd required checksum a dict
['first_checksum', {'src-arm': 'arm_checksum'}]
],
]
for checksums in test_inputs:
self.assertEqual(to_checksums(checksums), checksums)
# Also reuse the checksums we use in test_check_type_of_param_value_checksums
# When a checksum is valid it must not be modified
for checksums in TypeCheckingTest.get_valid_checksums_values():
self.assertEqual(to_checksums(checksums), checksums)

# List in list converted to tuple -> alternatives or checksum with type
checksums = [['1stchecksum', ['md5', 'md5sum']]]
checksums_expected = [['1stchecksum', ('md5', 'md5sum')]]
self.assertEqual(to_checksums(checksums), checksums_expected)

# Error detection
wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))]
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting)
correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]
self.assertEqual(to_checksums(correct_nesting), correct_nesting)
# YEB (YAML EC) doesn't has tuples so it uses lists instead which need to get converted
correct_nesting_yeb = [[['1stchecksum', ['md5', 'md5sum'], ['md5', 'altmd5sum']]]]
correct_nesting_yeb_conv = [[('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]]
self.assertEqual(to_checksums(correct_nesting_yeb), correct_nesting_yeb_conv)
self.assertEqual(to_checksums(correct_nesting_yeb_conv), correct_nesting_yeb_conv)

unexpected_set = [('1stchecksum', {'md5', 'md5sum'})]
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set)
unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}]
self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict)
correct_dict = [{'src': ('md5sum', 'shasum')}]
self.assertEqual(to_checksums(correct_dict), correct_dict)
correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}]
correct_dict_2 = [{'src': [('md5', 'md5sum'), ('sha', 'shasum')]}]
self.assertEqual(to_checksums(correct_dict_2), correct_dict_2)
self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # inner lists to tuples

unexpected_Nones = [
[('1stchecksum', None)],
[['1stchecksum', None]],
[{'src': ('md5sum', None)}],
[{'src': ['md5sum', None]}],
]
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[0])
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[1])
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[2])
self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[3])

def test_ensure_iterable_license_specs(self):
"""Test ensure_iterable_license_specs function."""
Expand Down
Loading