From 72ca38514cdfd21df593639920a8952ca19a4bc8 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 09:46:02 -0400 Subject: [PATCH 01/10] Add new MEG structure. --- src/schema/rules/datatypes/meg.yaml | 51 +++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/schema/rules/datatypes/meg.yaml b/src/schema/rules/datatypes/meg.yaml index 132a0b81e9..86d9a55dfc 100644 --- a/src/schema/rules/datatypes/meg.yaml +++ b/src/schema/rules/datatypes/meg.yaml @@ -1,4 +1,5 @@ --- +# MEG data files # First group - suffixes: - meg @@ -24,6 +25,49 @@ split: optional # Second group +# Specifically, it's dat files with "acq-calibration" +- suffixes: + - meg + extensions: + - .dat + entities: + subject: required + session: optional + acquisition: + requirement: required + type: string + enum: + - calibration + +# Third group +- suffixes: + - meg + extensions: + - .fif + entities: + subject: required + session: optional + task: required + acquisition: optional + run: optional + processing: optional + split: optional + +# Fourth group +- suffixes: + - meg + extensions: + - .fif + entities: + subject: required + session: optional + acquisition: + requirement: required + type: string + enum: + - crosstalk + +# Headshape files - suffixes: - headshape extensions: @@ -33,7 +77,7 @@ session: optional acquisition: optional -# Third group +# Marker files - suffixes: - markers extensions: @@ -46,7 +90,7 @@ acquisition: optional space: optional -# Fourth +# Coordinate systems - suffixes: - coordsystem extensions: @@ -56,6 +100,7 @@ session: optional acquisition: optional +# Channel files - suffixes: - channels extensions: @@ -69,6 +114,7 @@ run: optional processing: optional +# Events files - suffixes: - events extensions: @@ -81,6 +127,7 @@ acquisition: optional run: optional +# Head photos - suffixes: - photo extensions: From 84394e776c209062bd17676b8f2d01ded66835cc Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 09:46:08 -0400 Subject: [PATCH 02/10] Document new rules format. --- src/schema/README.md | 51 +++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/src/schema/README.md b/src/schema/README.md index 948d687a52..4d9eb51283 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -383,58 +383,51 @@ The `extensions` entry is a list of valid file extensions. The `entities` entry is a dictionary in which the keys are entity names and the values are whether the entity is required or optional for that suffix. Any entities that are not present in this dictionary are not allowed in files with any of the suffixes in the group. +In rare occasions, there are restrictions on valid entity values +(for example, some suffixes may only allow an `acq` value of `calibration`). +In those cases, the entity's value will be another object, rather than a string indicating the requirement level. +This object will contain at least two keys: "requirement" and "type". **NOTE**: The order in which entities appear in these dictionaries does not reflect how they should appear in filenames. That information is present in `rules/entities.yaml`. -As an example, let us look at part of `func.yaml`: +As an example, let us look at part of `meg.yaml`: ```yaml - suffixes: - - bold - - cbv - - sbref + - meg extensions: - - .nii.gz - - .nii - - .json + - .fif entities: subject: required session: optional task: required acquisition: optional - ceagent: optional - reconstruction: optional - direction: optional run: optional - echo: optional - part: optional + processing: optional + split: optional -# Phase (deprecated) - suffixes: - - phase # deprecated + - meg extensions: - - .nii.gz - - .nii - - .json + - .fif entities: subject: required session: optional - task: required - acquisition: optional - ceagent: optional - reconstruction: optional - direction: optional - run: optional - echo: optional + acquisition: + requirement: required + type: string + enum: + - crosstalk ``` -In this case, the first group has three suffixes: `bold`, `cbv`, and `sbref`. -The second group has one suffix: `phase`. -While the valid extensions are the same for both groups (`.nii.gz`, `.nii`, and `.json`), the entities are not. +In this case, the first group has one suffix: `meg`. +The second group has the same suffix (`meg`), but describes different rules for files with that suffix. +While the valid extension is the same for both groups (`.fif`), the entities are not. -Specifically, files with the `phase` suffix may not have a `part` entity, -while this is an option for files with the `bold`, `cbv`, or `sbref` suffixes. +Specifically, files in the first group may have `task`, `run`, `processing`, and `split` entities, +while files in the second group may not. +Also, when files in the second group have the `acq` entity, the associated value MUST be `crosstalk`. ### `entities.yaml` From 6a676aa0ca3a9a70f08b1f7a98a3e5a509bc0aa5 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 09:46:23 -0400 Subject: [PATCH 03/10] Support the new structure in the code. --- tools/schemacode/schema.py | 55 +++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/tools/schemacode/schema.py b/tools/schemacode/schema.py index 8a9c137ccf..f72f229419 100644 --- a/tools/schemacode/schema.py +++ b/tools/schemacode/schema.py @@ -208,6 +208,23 @@ def make_entity_definitions(schema): return text +def _add_entity(filename_template, entity_pattern, requirement_level): + if requirement_level == "required": + if len(filename_template.strip()): + filename_template += "_" + entity_pattern + else: + # Only the first entity doesn't need an underscore + filename_template += entity_pattern + else: + if len(filename_template.strip()): + filename_template += "[_" + entity_pattern + "]" + else: + # Only the first entity doesn't need an underscore + filename_template += "[" + entity_pattern + "]" + + return filename_template + + def make_filename_template(schema, **kwargs): """Create codeblocks containing example filename patterns for a given datatype. @@ -248,23 +265,31 @@ def make_filename_template(schema, **kwargs): for group in schema["rules"]["datatypes"][datatype]: string = "\t\t\t" for ent in entity_order: - ent_format = "{}-<{}>".format( - schema["objects"]["entities"][ent]["entity"], - schema["objects"]["entities"][ent].get("format", "label") - ) + if "enum" in schema["objects"]["entities"][ent].keys(): + # Entity key-value pattern with specific allowed values + ent_format = "{}-<{}>".format( + schema["objects"]["entities"][ent]["entity"], + "|".join(schema["objects"]["entities"][ent]["enum"]), + ) + else: + # Standard entity key-value pattern with simple label/index + ent_format = "{}-<{}>".format( + schema["objects"]["entities"][ent]["entity"], + schema["objects"]["entities"][ent].get("format", "label"), + ) + if ent in group["entities"]: - if group["entities"][ent] == "required": - if len(string.strip()): - string += "_" + ent_format - else: - # Only the first entity doesn't need an underscore - string += ent_format + if isinstance(group["entities"][ent], dict): + if "enum" in group["entities"][ent].keys(): + # Overwrite the filename pattern based on the valid values + ent_format = "{}-<{}>".format( + schema["objects"]["entities"][ent]["entity"], + "|".join(group["entities"][ent]["enum"]), + ) + + string = _add_entity(string, ent_format, group["entities"][ent]["requirement"]) else: - if len(string.strip()): - string += "[_" + ent_format + "]" - else: - # Only the first entity doesn't need an underscore - string += "[" + ent_format + "]" + string = _add_entity(string, ent_format, group["entities"][ent]) # In cases of large numbers of suffixes, # we use the "suffix" variable and expect a table later in the spec From f19ef56e41336e47fa7ae497faa9afbdb70603e4 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 10:04:30 -0400 Subject: [PATCH 04/10] Fix entity tables bug. I definitely made a mistake in #883. --- tools/schemacode/schema.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/schemacode/schema.py b/tools/schemacode/schema.py index f72f229419..c953021f76 100644 --- a/tools/schemacode/schema.py +++ b/tools/schemacode/schema.py @@ -357,18 +357,18 @@ def make_entity_table(schema, tablefmt="github", **kwargs): # import pdb; pdb.set_trace() header = ["Entity", "DataType"] formats = ["Format", "DataType"] - entity_to_col = {} table = [formats] # Compose header and formats first - for i, (entity, spec) in enumerate(schema["objects"]["entities"].items()): - entity_shorthand = schema["objects"]["entities"][entity]["entity"] - header.append(spec["name"]) + entity_to_col = {ent: i for i, ent in enumerate(schema["rules"]["entities"])} + for i_entity, entity in enumerate(schema["rules"]["entities"]): + entity_spec = schema["objects"]["entities"][entity] + entity_shorthand = entity_spec["entity"] + header.append(entity_spec["name"]) formats.append( - f'[`{entity_shorthand}-<{spec.get("format", "label")}>`]' + f'[`{entity_shorthand}-<{entity_spec.get("format", "label")}>`]' f"({ENTITIES_FILE}#{entity_shorthand})" ) - entity_to_col[entity] = i + 1 # Go through data types for dtype, dtype_specs in schema["rules"]["datatypes"].items(): @@ -386,8 +386,13 @@ def make_entity_table(schema, tablefmt="github", **kwargs): # TODO:
is specific for html form suffixes_str = " ".join(suffixes) if suffixes else "" dtype_row = [dtype] + ([""] * len(entity_to_col)) - for ent, req in spec.get("entities", []).items(): - dtype_row[entity_to_col[ent]] = req.upper() + for ent, ent_info in spec.get("entities", []).items(): + if isinstance(ent_info, dict): + requirement_level = ent_info["requirement"] + else: + requirement_level = ent_info + + dtype_row[entity_to_col[ent]] = requirement_level.upper() # Merge specs within dtypes if they share all of the same entities if dtype_row in dtype_rows.values(): From 6daac64ee28df23586c9e8f3d12165f1bfdc2ddb Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 10:04:40 -0400 Subject: [PATCH 05/10] Clean up code. --- tools/schemacode/schema.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/schemacode/schema.py b/tools/schemacode/schema.py index c953021f76..0551563f54 100644 --- a/tools/schemacode/schema.py +++ b/tools/schemacode/schema.py @@ -209,6 +209,7 @@ def make_entity_definitions(schema): def _add_entity(filename_template, entity_pattern, requirement_level): + """Add entity pattern to filename template based on requirement level.""" if requirement_level == "required": if len(filename_template.strip()): filename_template += "_" + entity_pattern @@ -287,7 +288,11 @@ def make_filename_template(schema, **kwargs): "|".join(group["entities"][ent]["enum"]), ) - string = _add_entity(string, ent_format, group["entities"][ent]["requirement"]) + string = _add_entity( + string, + ent_format, + group["entities"][ent]["requirement"], + ) else: string = _add_entity(string, ent_format, group["entities"][ent]) From 6970a876bc690cad860f6ef09f34b406cc32efa2 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 11:04:17 -0400 Subject: [PATCH 06/10] Fix the entity table more. --- tools/schemacode/schema.py | 45 ++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/tools/schemacode/schema.py b/tools/schemacode/schema.py index 0551563f54..4b4bbc3193 100644 --- a/tools/schemacode/schema.py +++ b/tools/schemacode/schema.py @@ -365,8 +365,8 @@ def make_entity_table(schema, tablefmt="github", **kwargs): table = [formats] # Compose header and formats first - entity_to_col = {ent: i for i, ent in enumerate(schema["rules"]["entities"])} - for i_entity, entity in enumerate(schema["rules"]["entities"]): + all_entities = schema["rules"]["entities"] + for entity in all_entities: entity_spec = schema["objects"]["entities"][entity] entity_shorthand = entity_spec["entity"] header.append(entity_spec["name"]) @@ -380,8 +380,8 @@ def make_entity_table(schema, tablefmt="github", **kwargs): dtype_rows = {} # each dtype could have multiple specs - for spec in dtype_specs: - suffixes = spec.get("suffixes") + for i_dtype_spec, dtype_spec in enumerate(dtype_specs): + suffixes = dtype_spec.get("suffixes") # Skip this part of the schema if no suffixes are found. # This is a hack to work around filter_schema's limitations. @@ -390,25 +390,41 @@ def make_entity_table(schema, tablefmt="github", **kwargs): # TODO:
is specific for html form suffixes_str = " ".join(suffixes) if suffixes else "" - dtype_row = [dtype] + ([""] * len(entity_to_col)) - for ent, ent_info in spec.get("entities", []).items(): + dtype_row = [dtype] + ([""] * len(all_entities)) + for ent, ent_info in dtype_spec.get("entities", {}).items(): if isinstance(ent_info, dict): requirement_level = ent_info["requirement"] else: requirement_level = ent_info - dtype_row[entity_to_col[ent]] = requirement_level.upper() + dtype_row[all_entities.index(ent) + 1] = requirement_level.upper() - # Merge specs within dtypes if they share all of the same entities if dtype_row in dtype_rows.values(): - for k, v in dtype_rows.items(): - if dtype_row == v: - dtype_rows.pop(k) - new_k = k + " " + suffixes_str - new_k = new_k.strip() - dtype_rows[new_k] = v + # Merge specs within dtypes if they share all of the same entities + for existing_suffixes_str, existing_entities in dtype_rows.items(): + if dtype_row == existing_entities: + dtype_rows.pop(existing_suffixes_str) + split_old_suffixes = existing_suffixes_str.split(" ") + split_suffixes = suffixes_str.split(" ") + split_new_suffixes = sorted(list(set(split_suffixes + split_old_suffixes))) + if "also" in split_new_suffixes: + split_new_suffixes.remove("also") + new_suffixes_str = " ".join(split_new_suffixes) + new_suffixes_str = "also " + new_suffixes_str + else: + new_suffixes_str = " ".join(split_new_suffixes) + + dtype_rows[new_suffixes_str] = existing_entities break + + elif suffixes_str in dtype_rows.keys(): + # Create new lines for multiple specs with the same dtype and suffix, + # but different entities + # Unfortunately, the keys need to be unique + dtype_rows["also " + suffixes_str] = dtype_row + else: + # Otherwise, just add the new suffix group dtype_rows[suffixes_str] = dtype_row # Reformat first column @@ -416,6 +432,7 @@ def make_entity_table(schema, tablefmt="github", **kwargs): dtype + "
({})".format(k): v for k, v in dtype_rows.items() } dtype_rows = [[k] + v for k, v in dtype_rows.items()] + table += dtype_rows # Create multi-level index because first two rows are headers From c556c076dcb18bcfe584209fcd4b0ac0959ebb52 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 14 Oct 2021 11:05:04 -0400 Subject: [PATCH 07/10] Drop unnecessary MEG group. --- src/schema/rules/datatypes/meg.yaml | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/schema/rules/datatypes/meg.yaml b/src/schema/rules/datatypes/meg.yaml index 86d9a55dfc..2b77b6b209 100644 --- a/src/schema/rules/datatypes/meg.yaml +++ b/src/schema/rules/datatypes/meg.yaml @@ -40,20 +40,7 @@ - calibration # Third group -- suffixes: - - meg - extensions: - - .fif - entities: - subject: required - session: optional - task: required - acquisition: optional - run: optional - processing: optional - split: optional - -# Fourth group +# fif files with "acq-crosstalk" - suffixes: - meg extensions: From 59d92c515c021f99e1c1baf488221fa1e999f0df Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 15 Oct 2021 13:27:38 -0400 Subject: [PATCH 08/10] Try something. --- tools/schemacode/schema.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tools/schemacode/schema.py b/tools/schemacode/schema.py index 4b4bbc3193..b36f95e95c 100644 --- a/tools/schemacode/schema.py +++ b/tools/schemacode/schema.py @@ -378,6 +378,7 @@ def make_entity_table(schema, tablefmt="github", **kwargs): # Go through data types for dtype, dtype_specs in schema["rules"]["datatypes"].items(): dtype_rows = {} + duplicate_row_counter = 0 # each dtype could have multiple specs for i_dtype_spec, dtype_spec in enumerate(dtype_specs): @@ -403,16 +404,21 @@ def make_entity_table(schema, tablefmt="github", **kwargs): # Merge specs within dtypes if they share all of the same entities for existing_suffixes_str, existing_entities in dtype_rows.items(): if dtype_row == existing_entities: + # Combine suffixes from the existing row with ones from the new row dtype_rows.pop(existing_suffixes_str) - split_old_suffixes = existing_suffixes_str.split(" ") - split_suffixes = suffixes_str.split(" ") - split_new_suffixes = sorted(list(set(split_suffixes + split_old_suffixes))) - if "also" in split_new_suffixes: - split_new_suffixes.remove("also") - new_suffixes_str = " ".join(split_new_suffixes) - new_suffixes_str = "also " + new_suffixes_str + old_suffix_list = existing_suffixes_str.split(" ") + new_suffix_list = suffixes_str.split(" ") + comb_suffix_list = sorted(list(set(new_suffix_list + old_suffix_list))) + number_suffixes = list(filter(str.isnumeric, comb_suffix_list)) + if len(number_suffixes) == 1: + number = number_suffixes[0] + comb_suffix_list.remove(number) + new_suffixes_str = " ".join(comb_suffix_list) + new_suffixes_str = number + " " + new_suffixes_str + elif len(number_suffixes) > 1: + raise Exception("Something's wrong here.") else: - new_suffixes_str = " ".join(split_new_suffixes) + new_suffixes_str = " ".join(comb_suffix_list) dtype_rows[new_suffixes_str] = existing_entities break @@ -421,7 +427,7 @@ def make_entity_table(schema, tablefmt="github", **kwargs): # Create new lines for multiple specs with the same dtype and suffix, # but different entities # Unfortunately, the keys need to be unique - dtype_rows["also " + suffixes_str] = dtype_row + dtype_rows[str(duplicate_row_counter) + " " + suffixes_str] = dtype_row else: # Otherwise, just add the new suffix group From 2c3853353bad767ccd0b22c9690a9952adc78c9b Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 15 Oct 2021 14:05:25 -0400 Subject: [PATCH 09/10] Handle arbitrary numbers of rows and just use duplicate name. --- tools/schemacode/schema.py | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/tools/schemacode/schema.py b/tools/schemacode/schema.py index b36f95e95c..48e57eb86f 100644 --- a/tools/schemacode/schema.py +++ b/tools/schemacode/schema.py @@ -11,7 +11,7 @@ import yaml from tabulate import tabulate -from . import utils +import utils lgr = utils.get_logger() # Basic settings for output, for now just basic @@ -409,15 +409,21 @@ def make_entity_table(schema, tablefmt="github", **kwargs): old_suffix_list = existing_suffixes_str.split(" ") new_suffix_list = suffixes_str.split(" ") comb_suffix_list = sorted(list(set(new_suffix_list + old_suffix_list))) + + # Identify if the list of suffixes comes from an existing alternate row number_suffixes = list(filter(str.isnumeric, comb_suffix_list)) if len(number_suffixes) == 1: + # Suffixes come from an existing alternate row number = number_suffixes[0] comb_suffix_list.remove(number) new_suffixes_str = " ".join(comb_suffix_list) + # Retain the old number new_suffixes_str = number + " " + new_suffixes_str elif len(number_suffixes) > 1: + # The row exists already, but contains multiple numbers raise Exception("Something's wrong here.") else: + # It's a new row new_suffixes_str = " ".join(comb_suffix_list) dtype_rows[new_suffixes_str] = existing_entities @@ -426,17 +432,17 @@ def make_entity_table(schema, tablefmt="github", **kwargs): elif suffixes_str in dtype_rows.keys(): # Create new lines for multiple specs with the same dtype and suffix, # but different entities - # Unfortunately, the keys need to be unique + # Unfortunately, the keys need to be unique, so we include a number + # NOTE: This assumes that no suffix in BIDS will ever be purely numeric. dtype_rows[str(duplicate_row_counter) + " " + suffixes_str] = dtype_row + duplicate_row_counter += 1 else: # Otherwise, just add the new suffix group dtype_rows[suffixes_str] = dtype_row - # Reformat first column - dtype_rows = { - dtype + "
({})".format(k): v for k, v in dtype_rows.items() - } + # Add datatype to first column and reformat it + dtype_rows = {dtype + "
({})".format(k): v for k, v in dtype_rows.items()} dtype_rows = [[k] + v for k, v in dtype_rows.items()] table += dtype_rows @@ -451,6 +457,26 @@ def make_entity_table(schema, tablefmt="github", **kwargs): table = utils.drop_unused_entities(table) table = utils.flatten_multiindexed_columns(table) + # Remove fake numeric suffixes from first column + def _remove_numeric_suffixes(string): + import re + + suffix_str = re.findall("\((.+)\)", string) + # The "Format" row should be skipped + if not suffix_str: + return string + + suffix_str = suffix_str[0] # Only one parenthesis should appear + suffixes = suffix_str.split(" ") + suffixes = list(filter(lambda v: not str.isnumeric(v), suffixes)) + suffix_str2 = " ".join(suffixes) + new_string = string.replace(f"({suffix_str})", f"({suffix_str2})") + return new_string + + table[table.index.name] = table.index + table[table.index.name] = table[table.index.name].apply(_remove_numeric_suffixes) + table = table.set_index(table.index.name, drop=True) + # Print it as markdown table_str = tabulate(table, headers="keys", tablefmt=tablefmt) return table_str From 6632126dc3ca1feb2fa7efb7fafff8d420c9f6eb Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 15 Oct 2021 14:08:18 -0400 Subject: [PATCH 10/10] Forgot to revert the import change. --- tools/schemacode/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/schemacode/schema.py b/tools/schemacode/schema.py index 48e57eb86f..daa9011a83 100644 --- a/tools/schemacode/schema.py +++ b/tools/schemacode/schema.py @@ -11,7 +11,7 @@ import yaml from tabulate import tabulate -import utils +from . import utils lgr = utils.get_logger() # Basic settings for output, for now just basic