From fcf465633831d09446f12e38ff6b416c439e27cd Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 1 May 2023 23:13:19 +0200 Subject: [PATCH] Add `` block for including non-code files (#370) * Add `` for including normal text/markdown files * Also strip trailing characters for non-code blocks * Add test case for `convert_include` * Fix broken tests Was including the last newline (when this should be removed with .`rstrip()`) * Fix quality (reformat w/ `black`) * Replace canonical tests with comments --- src/doc_builder/convert_md_to_mdx.py | 59 +++++++++++++-------- tests/data/convert_include_dummy.txt | 14 +++++ tests/test_convert_md_to_mdx.py | 77 +++++++++++++++++++++++++--- 3 files changed, 122 insertions(+), 28 deletions(-) create mode 100644 tests/data/convert_include_dummy.txt diff --git a/src/doc_builder/convert_md_to_mdx.py b/src/doc_builder/convert_md_to_mdx.py index 31a7f8aa..63446edd 100644 --- a/src/doc_builder/convert_md_to_mdx.py +++ b/src/doc_builder/convert_md_to_mdx.py @@ -115,44 +115,58 @@ def clean_doctest_syntax(text): return text -_re_literalinclude = re.compile(r"([ \t]*)(((?!).)*)<\/literalinclude>", re.DOTALL) +_re_include_template = r"([ \t]*)<{include_name}>(((?!<{include_name}>).)*)<\/{include_name}>" +_re_include = re.compile(_re_include_template.format(include_name="include"), re.DOTALL) +_re_literalinclude = re.compile(_re_include_template.format(include_name="literalinclude"), re.DOTALL) -def convert_literalinclude_helper(match, page_info): +def convert_file_include_helper(match, page_info, is_code=True): """ - Convert a literalinclude regex match into markdown code blocks by opening a file and - copying specified start-end section into markdown code block. + Convert an `include` or `literalinclude` regex match into markdown blocks or markdown code blocks, + by opening a file and copying specified start-end section into markdown block. + + If `is_code` is True, the block will be rendered as a code block, otherwise it will be rendered + as a markdown block. """ - literalinclude_info = json.loads(match[2].strip()) + include_info = json.loads(match[2].strip()) indent = match[1] + include_name = "literalinclude" if is_code else "include" if tempfile.gettempdir() in str(page_info["path"]): - return "\n`Please restart doc-builder preview commands to see literalinclude rendered`\n" - file = page_info["path"].parent / literalinclude_info["path"] + return f"\n`Please restart doc-builder preview commands to see {include_name} rendered`\n" + file = page_info["path"].parent / include_info["path"] with open(file, "r", encoding="utf-8-sig") as reader: lines = reader.readlines() - literalinclude = lines # defaults to entire file - if "start-after" in literalinclude_info or "end-before" in literalinclude_info: + include = lines # defaults to entire file + if "start-after" in include_info or "end-before" in include_info: start_after, end_before = -1, -1 for idx, line in enumerate(lines): line = line.strip() line = re.sub(r"\W+$", "", line) - if line.endswith(literalinclude_info["start-after"]): + if line.endswith(include_info["start-after"]): start_after = idx + 1 - if line.endswith(literalinclude_info["end-before"]): + if line.endswith(include_info["end-before"]): end_before = idx if start_after == -1 or end_before == -1: - raise ValueError(f"The following 'literalinclude' does NOT exist:\n{match[0]}") - literalinclude = lines[start_after:end_before] - literalinclude = [indent + line[literalinclude_info.get("dedent", 0) :] for line in literalinclude] - literalinclude = "".join(literalinclude) - return f"""{indent}```{literalinclude_info.get('language', '')}\n{literalinclude.rstrip()}\n{indent}```""" + raise ValueError(f"The following '{include_name}' does NOT exist:\n{match[0]}") + include = lines[start_after:end_before] + include = [indent + line[include_info.get("dedent", 0) :] for line in include] + include = "".join(include).rstrip() + return f"""{indent}```{include_info.get('language', '')}\n{include}\n{indent}```""" if is_code else include + + +def convert_include(text, page_info): + """ + Convert an `include` into markdown. + """ + text = _re_include.sub(lambda m: convert_file_include_helper(m, page_info, is_code=False), text) + return text def convert_literalinclude(text, page_info): """ - Convert a literalinclude into markdown code blocks. + Convert a `literalinclude` into markdown code blocks. """ - text = _re_literalinclude.sub(lambda m: convert_literalinclude_helper(m, page_info), text) + text = _re_literalinclude.sub(lambda m: convert_file_include_helper(m, page_info, is_code=True), text) return text @@ -168,10 +182,13 @@ def convert_md_docstring_to_mdx(docstring, page_info): def process_md(text, page_info): """ Processes markdown by: - 1. Converting literalinclude - 2. Converting special characters - 3. Converting image links + 1. Converting include + 2. Converting literalinclude + 3. Converting special characters + 4. Clean doctest syntax + 5. Converting image links """ + text = convert_include(text, page_info) text = convert_literalinclude(text, page_info) text = convert_special_chars(text) text = clean_doctest_syntax(text) diff --git a/tests/data/convert_include_dummy.txt b/tests/data/convert_include_dummy.txt new file mode 100644 index 00000000..8ead5bab --- /dev/null +++ b/tests/data/convert_include_dummy.txt @@ -0,0 +1,14 @@ + +# This is the first header +Other text 1 + + + +# This is the second header +Other text 2 + + + +# This is the third header +Other text 3 + \ No newline at end of file diff --git a/tests/test_convert_md_to_mdx.py b/tests/test_convert_md_to_mdx.py index cdd63ebb..1eab17b8 100644 --- a/tests/test_convert_md_to_mdx.py +++ b/tests/test_convert_md_to_mdx.py @@ -19,6 +19,7 @@ from doc_builder.convert_md_to_mdx import ( convert_img_links, + convert_include, convert_literalinclude, convert_md_to_mdx, convert_special_chars, @@ -129,16 +130,78 @@ def test_process_md(self): &lt;>""" self.assertEqual(process_md(text, page_info), expected_conversion) + def test_convert_include(self): + path = Path(__file__).resolve() + page_info = {"path": path} + + # canonical test: + # + # { + # "path": "./data/convert_include_dummy.txt", + # "start-after": "START header_1", + # "end-before": "END header_1" + # } + # + + # test entire file + text = """ +{"path": "./data/convert_include_dummy.txt"} +""" + expected_conversion = """ +# This is the first header +Other text 1 + + + +# This is the second header +Other text 2 + + + +# This is the third header +Other text 3 +""" + self.assertEqual(convert_include(text, page_info), expected_conversion) + + # test with indent + text = """Some text + +{"path": "./data/convert_include_dummy.txt", +"start-after": "START header_1", +"end-before": "END header_1"} +""" + expected_conversion = """Some text + # This is the first header + Other text 1""" + self.assertEqual(convert_include(text, page_info), expected_conversion) + + # test with dedent + text = """Some text + +{"path": "./data/convert_include_dummy.txt", +"start-after": "START header_1", +"end-before": "END header_1", +"dedent": 10} +""" + expected_conversion = """Some text + the first header + 1""" + self.assertEqual(convert_include(text, page_info), expected_conversion) + def test_convert_literalinclude(self): path = Path(__file__).resolve() page_info = {"path": path} - # test canonical - text = """ -{"path": "./data/convert_literalinclude_dummy.txt", -"language": "python", -"start-after": "START python_import", -"end-before": "END python_import"} -""" + + # canonical test: + # + # { + # "path": "./data/convert_literalinclude_dummy.txt", + # "language": "python", + # "start-after": "START python_import", + # "end-before": "END python_import" + # } + # + # test entire file text = """ {"path": "./data/convert_literalinclude_dummy.txt",