[ENH] Link to filename element definitions in filename templates (#1228)

* Try linking to entities in make_filename_template. * Update tools/schemacode/bidsschematools/render.py Co-authored-by: Chris Markiewicz <effigies@gmail.com> * Update render.py * Link datatype. * Link to suffix as well. * Extensions. * Fix. * Update render.py * Run black. * Make things wonderful. * Update render.py * Update test. * Fix PDF build? * Update macros.py * Okay, fix more stuff. * Le sigh * Docstring improvements. * Update tools/schemacode/bidsschematools/render.py Co-authored-by: Chris Markiewicz <effigies@gmail.com> * Update render.py * Glossary headings have to be lower case. * Update render.py * Update tools/schemacode/bidsschematools/render.py Co-authored-by: Chris Markiewicz <effigies@gmail.com> Co-authored-by: Chris Markiewicz <effigies@gmail.com> Co-authored-by: Chris Markiewicz <markiewicz@stanford.edu>
bids-standard · Aug 23, 2022 · ea25a5f · ea25a5f
1 parent fd0b7d4
commit ea25a5f
Show file tree

Hide file tree

Showing 6 changed files with 246 additions and 34 deletions.
diff --git a/pdf_build_src/process_markdowns.py b/pdf_build_src/process_markdowns.py
@@ -7,13 +7,13 @@
 well.
 """
 
-from datetime import datetime
 import json
 import os
 import posixpath
 import re
 import subprocess
 import sys
+from datetime import datetime
 
 import numpy as np
 
@@ -626,9 +626,14 @@ def process_macros(duplicated_src_dir_path):
                 # switch "use_pipe" flag OFF to render examples
                 if "make_filetree_example" in function_string:
                     function_string = function_string.replace(
-                    ")",
-                    ", False)"
+                        ")",
+                        ", False)",
                     )
+
+                # switch "pdf_format" ON to render filename templates
+                if "make_filename_template" in function_string:
+                    function_string = function_string.replace(")", ", pdf_format=True)")
+
                 # Run the function to get the output
                 new = eval(function_string)
                 # Replace the code snippet with the function output

diff --git a/tools/mkdocs_macros_bids/macros.py b/tools/mkdocs_macros_bids/macros.py
@@ -61,12 +61,17 @@ def _get_source_path(level=1):
     return caller.f_locals["_Context__self"]["page"].file.src_path
 
 
-def make_filename_template(**kwargs):
-    """Generate a filename template snippet from the schema, based on specific
-    filters.
+def make_filename_template(pdf_format=False, **kwargs):
+    """Generate a filename template snippet from the schema, based on specific filters.
 
     Parameters
     ----------
+    pdf_format : bool, optional
+        If True, the filename template will be compiled as a standard markdown code block,
+        without any hyperlinks, so that the specification's PDF build will look right.
+        If False, the filename template will use HTML and include hyperlinks.
+        This works on the website.
+        Default is False.
     kwargs : dict
         Keyword arguments used to filter the schema.
         Example kwargs that may be used include: "suffixes", "datatypes",
@@ -79,7 +84,11 @@ def make_filename_template(**kwargs):
         in the schema, after filtering.
     """
     schema_obj = schema.load_schema()
-    codeblock = render.make_filename_template(schema_obj, **kwargs)
+    codeblock = render.make_filename_template(
+        schema_obj,
+        pdf_format=pdf_format,
+        **kwargs,
+    )
     return codeblock
 
 

diff --git a/tools/schemacode/bidsschematools/render.py b/tools/schemacode/bidsschematools/render.py
@@ -10,7 +10,7 @@
 from tabulate import tabulate
 
 from . import utils
-from .schema import BIDSSchemaError, Namespace, filter_schema
+from .schema import BIDSSchemaError, Namespace, filter_schema, load_schema
 
 lgr = utils.get_logger()
 # Basic settings for output, for now just basic
@@ -239,17 +239,35 @@ def _add_entity(filename_template, entity_pattern, requirement_level):
     return filename_template
 
 
-def make_filename_template(schema, n_dupes_to_combine=6, **kwargs):
+def make_filename_template(
+    schema=None,
+    src_path=None,
+    n_dupes_to_combine=6,
+    pdf_format=False,
+    **kwargs,
+):
     """Create codeblocks containing example filename patterns for a given datatype.
 
+    By default, this function uses HTML, instead of direct Markdown codeblocks,
+    so that it can embed hyperlinks within the filenames.
+
     Parameters
     ----------
     schema : dict
         The schema object, which is a dictionary with nested dictionaries and
         lists stored within it.
+    src_path : str | None
+        The file where this macro is called, which may be explicitly provided
+        by the "page.file.src_path" variable.
     n_dupes_to_combine : int
         The minimum number of suffixes/extensions to combine in the template as
         <suffix>/<extension>.
+    pdf_format : bool, optional
+        If True, the filename template will be compiled as a standard markdown code block,
+        without any hyperlinks, so that the specification's PDF build will look right.
+        If False, the filename template will use HTML and include hyperlinks.
+        This works on the website.
+        Default is False.
     kwargs : dict
         Keyword arguments used to filter the schema.
         Example kwargs that may be used include: "suffixes", "datatypes",
@@ -260,49 +278,104 @@ def make_filename_template(schema, n_dupes_to_combine=6, **kwargs):
     codeblock : str
         A multiline string containing the filename templates for file types
         in the schema, after filtering.
+
+    Notes
+    -----
+    This function doesn't use src_path, because the hyperlinks use absolute paths to HTML files.
+    It would be nice, at some point, to use src_path in conjunction with paths to markdown files,
+    like other functions do, instead.
     """
+    if not schema:
+        schema = load_schema()
+
     schema = Namespace(filter_schema(schema.to_dict(), **kwargs))
     entity_order = schema["rules"]["entities"]
+    entities_path = "/99-appendices/09-entities.html"
+    glossary_path = "/99-appendices/14-glossary.html"
 
     paragraph = ""
     # Parent directories
-    paragraph += "{}-<{}>/\n\t[{}-<{}>/]\n".format(
-        schema["objects"]["entities"]["subject"]["name"],
-        schema["objects"]["entities"]["subject"]["format"],
-        schema["objects"]["entities"]["session"]["name"],
-        schema["objects"]["entities"]["session"]["format"],
+    sub_string = (
+        f'{schema["objects"]["entities"]["subject"]["name"]}-'
+        f'<{schema["objects"]["entities"]["subject"]["format"]}>'
     )
+    paragraph += utils._link_with_html(
+        sub_string,
+        html_path=entities_path,
+        heading="sub",
+        pdf_format=pdf_format,
+    )
+    paragraph += "/\n\t["
+    ses_string = (
+        f'{schema["objects"]["entities"]["session"]["name"]}-'
+        f'<{schema["objects"]["entities"]["session"]["format"]}>'
+    )
+    paragraph += utils._link_with_html(
+        ses_string,
+        html_path=entities_path,
+        heading="ses",
+        pdf_format=pdf_format,
+    )
+    paragraph += "/]\n"
 
     datatypes = schema.rules.datatypes
 
     for datatype in datatypes:
-        # XXX We should have a full rethink of the schema hierarchy...
+        # NOTE: We should have a full rethink of the schema hierarchy
+        # so that derivatives aren't treated like a "datatype"
         if datatype == "derivatives":
             continue
-        paragraph += "\t\t{}/\n".format(datatype)
+
+        paragraph += "\t\t"
+        paragraph += utils._link_with_html(
+            datatype,
+            html_path=glossary_path,
+            heading=f"{datatype.lower()}-datatypes",
+            pdf_format=pdf_format,
+        )
+        paragraph += "/\n"
 
         # Unique filename patterns
         for group in datatypes[datatype].values():
             string = "\t\t\t"
             for ent in entity_order:
                 if "enum" in schema["objects"]["entities"][ent].keys():
                     # Entity key-value pattern with specific allowed values
-                    ent_format = "{}-<{}>".format(
-                        schema["objects"]["entities"][ent]["name"],
-                        "|".join(schema["objects"]["entities"][ent]["enum"]),
+                    ent_format = (
+                        f'{schema["objects"]["entities"][ent]["name"]}-'
+                        f'<{"|".join(schema["objects"]["entities"][ent]["enum"])}>'
+                    )
+                    ent_format = utils._link_with_html(
+                        ent_format,
+                        html_path=entities_path,
+                        heading=schema["objects"]["entities"][ent]["name"],
+                        pdf_format=pdf_format,
                     )
                 else:
                     # Standard entity key-value pattern with simple label/index
-                    ent_format = "{}-<{}>".format(
+                    ent_format = utils._link_with_html(
                         schema["objects"]["entities"][ent]["name"],
+                        html_path=entities_path,
+                        heading=schema["objects"]["entities"][ent]["name"],
+                        pdf_format=pdf_format,
+                    )
+                    ent_format += "-"
+                    ent_format += "<" if pdf_format else "&lt;"
+                    ent_format += utils._link_with_html(
                         schema["objects"]["entities"][ent].get("format", "label"),
+                        html_path=glossary_path,
+                        heading=(
+                            f'{schema["objects"]["entities"][ent].get("format", "label")}-formats'
+                        ),
+                        pdf_format=pdf_format,
                     )
+                    ent_format += ">" if pdf_format else "&gt;"
 
                 if ent in group["entities"]:
                     if isinstance(group["entities"][ent], dict):
                         if "enum" in group["entities"][ent].keys():
-                            # Overwrite the filename pattern based on the valid values
-                            ent_format = "{}-<{}>".format(
+                            # Overwrite the filename pattern using valid values
+                            ent_format = "{}-&lt;{}&gt;".format(
                                 schema["objects"]["entities"][ent]["name"],
                                 "|".join(group["entities"][ent]["enum"]),
                             )
@@ -318,36 +391,88 @@ def make_filename_template(schema, n_dupes_to_combine=6, **kwargs):
             # In cases of large numbers of suffixes,
             # we use the "suffix" variable and expect a table later in the spec
             if len(group["suffixes"]) >= n_dupes_to_combine:
-                suffix = "_<suffix>"
-                string += suffix
+                string += "_"
+                string += "<" if pdf_format else "&lt;"
+                string += utils._link_with_html(
+                    "suffix",
+                    html_path=glossary_path,
+                    heading="suffix-common_principles",
+                    pdf_format=pdf_format,
+                )
+                string += ">" if pdf_format else "&gt;"
                 strings = [string]
             else:
-                strings = [string + "_" + suffix for suffix in group["suffixes"]]
+                strings = []
+                for suffix in group["suffixes"]:
+                    # The glossary indexes by the suffix identifier (TwoPE instead of 2PE),
+                    # but the rules reference the actual suffix string (2PE instead of TwoPE),
+                    # so we need to look it up.
+                    suffix_id = [
+                        k for k, v in schema["objects"]["suffixes"].items() if v["value"] == suffix
+                    ][0]
+
+                    suffix_string = utils._link_with_html(
+                        suffix,
+                        html_path=glossary_path,
+                        heading=f"{suffix_id.lower()}-suffixes",
+                        pdf_format=pdf_format,
+                    )
+                    strings.append(f"{string}_{suffix_string}")
 
             # Add extensions
             full_strings = []
             extensions = group["extensions"]
             extensions = [ext if ext != "*" else ".<extension>" for ext in extensions]
-            extensions = utils.combine_extensions(extensions)
             if len(extensions) >= n_dupes_to_combine:
                 # Combine exts when there are many, but keep JSON separate
                 if ".json" in extensions:
                     extensions = [".<extension>", ".json"]
                 else:
                     extensions = [".<extension>"]
 
+            ext_headings = []
+            for extension in extensions:
+                # The glossary indexes by the extension identifier (niigz instead of .nii.gz),
+                # but the rules reference the actual suffix string (.nii.gz instead of niigz),
+                # so we need to look it up.
+                ext_id = [
+                    k
+                    for k, v in schema["objects"]["extensions"].items()
+                    if v["value"] == extension
+                ]
+                if ext_id:
+                    ext_id = ext_id[0]
+                    ext_headings.append(f"{ext_id.lower()}-extensions")
+                else:
+                    ext_headings.append("extension-common_principles")
+
+            extensions = utils.combine_extensions(
+                extensions,
+                html_path=glossary_path,
+                heading_lst=ext_headings,
+                pdf_format=pdf_format,
+            )
+
             for extension in extensions:
                 for string in strings:
-                    new_string = string + extension
+                    new_string = f"{string}{extension}"
                     full_strings.append(new_string)
 
             full_strings = sorted(full_strings)
             if full_strings:
                 paragraph += "\n".join(full_strings) + "\n"
 
     paragraph = paragraph.rstrip()
-    codeblock = "Template:\n```Text\n" + paragraph + "\n```"
+    if pdf_format:
+        codeblock = f"Template:\n```Text\n{paragraph}\n```"
+    else:
+        codeblock = (
+            f'Template:\n<div class="highlight"><pre><code>{paragraph}\n</code></pre></div>'
+        )
+
     codeblock = codeblock.expandtabs(4)
+    codeblock = codeblock.replace("SPEC_ROOT", get_relpath(src_path))
+
     return codeblock
 
 

diff --git a/tools/schemacode/bidsschematools/tests/test_render.py b/tools/schemacode/bidsschematools/tests/test_render.py
@@ -72,7 +72,7 @@ def test_make_filename_template(schema_obj, schema_dir):
         * all files under the datatype rules subdirectory have corresponding entries.
         This may need to be updated for schema hierarchy changes.
     """
-    filename_template = render.make_filename_template(schema_obj)
+    filename_template = render.make_filename_template(schema_obj, pdf_format=True)
 
     # Test predefined substrings
     expected_template_part = """

diff --git a/tools/schemacode/bidsschematools/tests/test_utils.py b/tools/schemacode/bidsschematools/tests/test_utils.py
@@ -5,7 +5,7 @@ def test_combine_extensions():
     """A unit test for utils.combine_extensions."""
     test_extensions = ["nii.gz", "nii", "json"]
     target_combined = ["nii[.gz]", "json"]
-    test_combined = utils.combine_extensions(test_extensions)
+    test_combined = utils.combine_extensions(test_extensions, pdf_format=True)
     assert test_combined == target_combined