diff --git a/easybuild/framework/easyconfig/types.py b/easybuild/framework/easyconfig/types.py index f902716dfa..51d58db36b 100644 --- a/easybuild/framework/easyconfig/types.py +++ b/easybuild/framework/easyconfig/types.py @@ -510,33 +510,51 @@ def to_dependencies(dep_list): return [to_dependency(dep) for dep in dep_list] -def to_checksums(checksums): - """Ensure correct element types for list of checksums: convert list elements to tuples.""" - res = [] - for checksum in checksums: - # each list entry can be: - # * None (indicates no checksum) - # * a string (SHA256 checksum) - # * a tuple with 2 elements: checksum type + checksum value - # * a list of checksums (i.e. multiple checksums for a single file) - # * a dict (filename to checksum mapping) - if isinstance(checksum, str): - res.append(checksum) - elif isinstance(checksum, (list, tuple)): - # 2 elements + only string/int values => a checksum tuple - if len(checksum) == 2 and all(isinstance(x, (str, int)) for x in checksum): - res.append(tuple(checksum)) +def _to_checksum(checksum, list_level=0, allow_dict=True): + """Ensure the correct element type for each checksum in the checksum list""" + # each entry can be: + # * None (indicates no checksum) + # * a string (SHA256 checksum) + # * a list or tuple with 2 elements: checksum type + checksum value + # * a list or tuple of checksums (i.e. multiple checksums for a single file) + # * a dict (filename to checksum mapping) + if checksum is None or isinstance(checksum, str): + return checksum + elif isinstance(checksum, (list, tuple)): + if len(checksum) == 2 and isinstance(checksum[0], str) and isinstance(checksum[1], (str, int)): + # 2 elements so either: + # - a checksum tuple (2nd element string or int) + # - 2 alternative checksums (tuple) + # - 2 checksums that must each match (list) + # --> Convert to tuple only if we can exclude the 3rd case + if not isinstance(checksum[1], str) or list_level > 0: + return tuple(checksum) else: - res.append(to_checksums(checksum)) - elif isinstance(checksum, dict): - validated_dict = {} - for key, value in checksum.items(): - validated_dict[key] = to_checksums(value) - res.append(validated_dict) - else: - res.append(checksum) + return checksum + elif list_level < 2: + # Alternative checksums or multiple checksums for a single file + # Allowed to nest (at most) 2 times, e.g. [[[type, value]]] == [[(type, value)]] + # None is not allowed here + if any(x is None for x in checksum): + raise ValueError('Unexpected None in ' + str(checksum)) + if isinstance(checksum, tuple) or list_level > 0: + # When we already are in a tuple no further recursion is allowed -> set list_level very high + return tuple(_to_checksum(x, list_level=99, allow_dict=allow_dict) for x in checksum) + else: + return list(_to_checksum(x, list_level=list_level+1, allow_dict=allow_dict) for x in checksum) + elif isinstance(checksum, dict) and allow_dict: + return {key: _to_checksum(value, allow_dict=False) for key, value in checksum.items()} - return res + # Not returned -> Wrong type/format + raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum))) + + +def to_checksums(checksums): + """Ensure correct element types for list of checksums: convert list elements to tuples.""" + try: + return [_to_checksum(checksum) for checksum in checksums] + except ValueError as e: + raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e) def ensure_iterable_license_specs(specs): @@ -613,19 +631,39 @@ def ensure_iterable_license_specs(specs): })) # checksums is a list of checksums, one entry per file (source/patch) # each entry can be: +# None # a single checksum value (string) # a single checksum value of a specified type (2-tuple, 1st element is checksum type, 2nd element is checksum) # a list of checksums (of different types, perhaps different formats), which should *all* be valid -# a dictionary with a mapping from filename to checksum value -CHECKSUM_LIST = (list, as_hashable({'elem_types': [str, tuple, STRING_DICT]})) -CHECKSUMS = (list, as_hashable({'elem_types': [str, tuple, STRING_DICT, CHECKSUM_LIST]})) +# a tuple of checksums (of different types, perhaps different formats), where one should be valid +# a dictionary with a mapping from filename to checksum (None, value, type&value, alternatives) + +# Type & value, value may be an int for type "size" +# This is a bit too permissive as it allows the first element to be an int and doesn't restrict the number of elements +CHECKSUM_AND_TYPE = (tuple, as_hashable({'elem_types': [str, int]})) +CHECKSUM_LIST = (list, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE]})) +CHECKSUM_TUPLE = (tuple, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE]})) +CHECKSUM_DICT = (dict, as_hashable( + { + 'elem_types': [type(None), str, CHECKSUM_AND_TYPE, CHECKSUM_TUPLE, CHECKSUM_LIST], + 'key_types': [str], + } +)) +# At the top-level we allow tuples/lists containing a dict +CHECKSUM_LIST_W_DICT = (list, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE, CHECKSUM_DICT]})) +CHECKSUM_TUPLE_W_DICT = (tuple, as_hashable({'elem_types': [str, CHECKSUM_AND_TYPE, CHECKSUM_DICT]})) + +CHECKSUMS = (list, as_hashable({'elem_types': [type(None), str, CHECKSUM_AND_TYPE, + CHECKSUM_LIST_W_DICT, CHECKSUM_TUPLE_W_DICT, CHECKSUM_DICT]})) -CHECKABLE_TYPES = [CHECKSUM_LIST, CHECKSUMS, DEPENDENCIES, DEPENDENCY_DICT, LIST_OF_STRINGS, +CHECKABLE_TYPES = [CHECKSUM_AND_TYPE, CHECKSUM_LIST, CHECKSUM_TUPLE, + CHECKSUM_LIST_W_DICT, CHECKSUM_TUPLE_W_DICT, CHECKSUM_DICT, CHECKSUMS, + DEPENDENCIES, DEPENDENCY_DICT, LIST_OF_STRINGS, SANITY_CHECK_PATHS_DICT, SANITY_CHECK_PATHS_ENTRY, STRING_DICT, STRING_OR_TUPLE_LIST, STRING_OR_TUPLE_DICT, STRING_OR_TUPLE_OR_DICT_LIST, TOOLCHAIN_DICT, TUPLE_OF_STRINGS] # easy types, that can be verified with isinstance -EASY_TYPES = [str, bool, dict, int, list, str, tuple] +EASY_TYPES = [str, bool, dict, int, list, str, tuple, type(None)] # type checking is skipped for easyconfig parameters names not listed in PARAMETER_TYPES PARAMETER_TYPES = { diff --git a/test/framework/type_checking.py b/test/framework/type_checking.py index ded577ef69..d6e26964b2 100644 --- a/test/framework/type_checking.py +++ b/test/framework/type_checking.py @@ -171,16 +171,18 @@ def test_check_type_of_param_value_sanity_check_paths(self): out = {'files': ['bin/foo', ('bin/bar', 'bin/baz')], 'dirs': [('lib', 'lib64', 'lib32')]} self.assertEqual(check_type_of_param_value('sanity_check_paths', inp, auto_convert=True), (True, out)) - def test_check_type_of_param_value_checksums(self): - """Test check_type_of_param_value function for checksums.""" - - md5_checksum = 'fa618be8435447a017fd1bf2c7ae9224' - sha256_checksum1 = 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265' - sha256_checksum2 = 'b5f9cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551' - sha256_checksum3 = '033be54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde' - + @staticmethod + def get_valid_checksums_values(): + """Return list of values valid for the 'checksums' EC parameter""" + + # Using (actually invalid) prefix to better detect those in case of errors + md5_checksum = 'md518be8435447a017fd1bf2c7ae9224' + sha256_checksum1 = 'sha18be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265' + sha256_checksum2 = 'sha2cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551' + sha256_checksum3 = 'sha3e54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde' + filesize = 45617379 # valid values for 'checksums' easyconfig parameters - inputs = [ + return [ [], # single checksum (one file) [md5_checksum], @@ -190,6 +192,7 @@ def test_check_type_of_param_value_checksums(self): # one checksum of specific type (as 2-tuple) [('md5', md5_checksum)], [('sha256', sha256_checksum1)], + [('size', filesize)], # alternative checksums for a single file (n-tuple) [(sha256_checksum1, sha256_checksum2)], [(sha256_checksum1, sha256_checksum2, sha256_checksum3)], @@ -213,17 +216,37 @@ def test_check_type_of_param_value_checksums(self): # two checksums for a single file, *both* should match [sha256_checksum1, md5_checksum], # three checksums for a single file, *all* should match - [sha256_checksum1, ('md5', md5_checksum), {'foo.txt': sha256_checksum1}], + [sha256_checksum1, ('md5', md5_checksum), ('size', filesize)], # single checksum for a single file sha256_checksum1, # filename-to-checksum mapping - {'foo.txt': sha256_checksum1, 'bar.txt': sha256_checksum2}, + {'foo.txt': sha256_checksum1, 'bar.txt': sha256_checksum2, 'baz.txt': ('size', filesize)}, # 3 alternative checksums for a single file, one match is sufficient (sha256_checksum1, sha256_checksum2, sha256_checksum3), - ] + # two alternative checksums for a single file (not to be confused by checksum-type & -value tuple) + (sha256_checksum1, md5_checksum), + # three alternative checksums for a single file of different types + (sha256_checksum1, ('md5', md5_checksum), ('size', filesize)), + # alternative checksums in dicts are also allowed + {'foo.txt': (sha256_checksum2, sha256_checksum3), 'bar.txt': (sha256_checksum1, md5_checksum)}, + # Same but with lists -> all must match for each file + {'foo.txt': [sha256_checksum2, sha256_checksum3], 'bar.txt': [sha256_checksum1, md5_checksum]}, + ], + # None is allowed, meaning skip the checksum + [ + None, + # Also in mappings + {'foo.txt': sha256_checksum1, 'bar.txt': None}, + ], ] - for inp in inputs: - self.assertEqual(check_type_of_param_value('checksums', inp), (True, inp)) + + def test_check_type_of_param_value_checksums(self): + """Test check_type_of_param_value function for checksums.""" + + for inp in TypeCheckingTest.get_valid_checksums_values(): + type_ok, newval = check_type_of_param_value('checksums', inp) + self.assertIs(type_ok, True, 'Failed for ' + str(inp)) + self.assertEqual(newval, inp) def test_check_type_of_param_value_patches(self): """Test check_type_of_param_value function for patches.""" @@ -705,19 +728,94 @@ def test_to_sanity_check_paths_dict(self): def test_to_checksums(self): """Test to_checksums function.""" + # Some hand-crafted examples. Only the types are important, values are for easier verification test_inputs = [ - ['be662daa971a640e40be5c804d9d7d10'], - ['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')], - [['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]], - [('md5', 'be662daa971a640e40be5c804d9d7d10')], - ['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'), - ('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), - ('size', 273)], + ['checksumvalue'], + [('md5', 'md5checksumvalue')], + ['file_1_checksum', ('md5', 'file_2_md5_checksum')], + # One checksum per file, some with checksum type + [ + 'be662daa971a640e40be5c804d9d7d10', + ('adler32', '0x998410035'), + ('crc32', '0x1553842328'), + ('md5', 'be662daa971a640e40be5c804d9d7d10'), + ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), + # int type as the 2nd value + ('size', 273), + ], # None values should not be filtered out, but left in place - [None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None], + [None, 'checksum', None], + # Alternative checksums, not to be confused with multiple checksums for a file + [('main_checksum', 'alternative_checksum')], + [('1st_of_3', '2nd_of_3', '3rd_of_3')], + # Lists must be kept: This means all must match + [['checksum_1_in_list']], + [['checksum_must_match', 'this_must_also_match']], + [['1st_of_3_list', '2nd_of_3_list', '3rd_of_3_list']], + # Alternative checksums with types + [ + (('adler32', '1st_adler'), ('crc32', '1st_crc')), + (('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')), + ], + # Entries can be dicts even containing `None` + [ + { + 'src-arm.tgz': 'arm_checksum', + 'src-x86.tgz': ('mainchecksum', 'altchecksum'), + 'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')), + 'git-clone.tgz': None, + }, + { + 'src': ['checksum_must_match', 'this_must_also_match'] + }, + # 2nd required checksum a dict + ['first_checksum', {'src-arm': 'arm_checksum'}] + ], ] for checksums in test_inputs: self.assertEqual(to_checksums(checksums), checksums) + # Also reuse the checksums we use in test_check_type_of_param_value_checksums + # When a checksum is valid it must not be modified + for checksums in TypeCheckingTest.get_valid_checksums_values(): + self.assertEqual(to_checksums(checksums), checksums) + + # List in list converted to tuple -> alternatives or checksum with type + checksums = [['1stchecksum', ['md5', 'md5sum']]] + checksums_expected = [['1stchecksum', ('md5', 'md5sum')]] + self.assertEqual(to_checksums(checksums), checksums_expected) + + # Error detection + wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting) + correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))] + self.assertEqual(to_checksums(correct_nesting), correct_nesting) + # YEB (YAML EC) doesn't has tuples so it uses lists instead which need to get converted + correct_nesting_yeb = [[['1stchecksum', ['md5', 'md5sum'], ['md5', 'altmd5sum']]]] + correct_nesting_yeb_conv = [[('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]] + self.assertEqual(to_checksums(correct_nesting_yeb), correct_nesting_yeb_conv) + self.assertEqual(to_checksums(correct_nesting_yeb_conv), correct_nesting_yeb_conv) + + unexpected_set = [('1stchecksum', {'md5', 'md5sum'})] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set) + unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict) + correct_dict = [{'src': ('md5sum', 'shasum')}] + self.assertEqual(to_checksums(correct_dict), correct_dict) + correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}] + correct_dict_2 = [{'src': [('md5', 'md5sum'), ('sha', 'shasum')]}] + self.assertEqual(to_checksums(correct_dict_2), correct_dict_2) + self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # inner lists to tuples + + unexpected_Nones = [ + [('1stchecksum', None)], + [['1stchecksum', None]], + [{'src': ('md5sum', None)}], + [{'src': ['md5sum', None]}], + ] + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[0]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[1]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[2]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[3]) def test_ensure_iterable_license_specs(self): """Test ensure_iterable_license_specs function."""