From 3f7fcc6794f496d5d5e40335b361a20fc82ca3d1 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 7 Sep 2024 14:39:48 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=91=8C=20Improve=20parsing=20of=20nested?= =?UTF-8?q?=20amsmath=20(#119)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous logic was problematic for amsmath blocks nested in other blocs (such as blockquotes) The new parsing code now principally follows the logic in `markdown_it/rules_block/fence.py` (see also https://spec.commonmark.org/0.30/#fenced-code-blocks), except that: 1. it allows for a closing tag on the same line as the opening tag, and 2. it does not allow for an opening tag without closing tag (i.e. no auto-closing) --- mdit_py_plugins/amsmath/__init__.py | 71 +++++++++++++++++------------ tests/fixtures/amsmath.md | 47 +++++++++++++++++-- 2 files changed, 85 insertions(+), 33 deletions(-) diff --git a/mdit_py_plugins/amsmath/__init__.py b/mdit_py_plugins/amsmath/__init__.py index 0aed5c1..0712618 100644 --- a/mdit_py_plugins/amsmath/__init__.py +++ b/mdit_py_plugins/amsmath/__init__.py @@ -54,7 +54,7 @@ # whose total width is the actual width of the contents; # thus they can be used as a component in a containing expression -RE_OPEN = re.compile(r"\\begin\{(" + "|".join(ENVIRONMENTS) + r")([\*]?)\}") +RE_OPEN = r"\\begin\{(" + "|".join(ENVIRONMENTS) + r")([\*]?)\}" def amsmath_plugin( @@ -95,47 +95,60 @@ def render_amsmath_block( md.add_render_rule("amsmath", render_amsmath_block) -def match_environment(string: str) -> None | tuple[str, str, int]: - match_open = RE_OPEN.match(string) - if not match_open: - return None - environment = match_open.group(1) - numbered = match_open.group(2) - match_close = re.search( - r"\\end\{" + environment + numbered.replace("*", r"\*") + "\\}", string - ) - if not match_close: - return None - return (environment, numbered, match_close.end()) - - def amsmath_block( state: StateBlock, startLine: int, endLine: int, silent: bool ) -> bool: + # note the code principally follows the logic in markdown_it/rules_block/fence.py, + # except that: + # (a) it allows for closing tag on same line as opening tag + # (b) it does not allow for opening tag without closing tag (i.e. no auto-closing) + if is_code_block(state, startLine): return False - begin = state.bMarks[startLine] + state.tShift[startLine] + # does the first line contain the beginning of an amsmath environment + first_start = state.bMarks[startLine] + state.tShift[startLine] + first_end = state.eMarks[startLine] + first_text = state.src[first_start:first_end] - outcome = match_environment(state.src[begin:]) - if not outcome: + if not (match_open := re.match(RE_OPEN, first_text)): return False - environment, numbered, endpos = outcome - endpos += begin - - line = startLine - while line < endLine: - if endpos >= state.bMarks[line] and endpos <= state.eMarks[line]: - # line for end of block math found ... - state.line = line + 1 + + # construct the closing tag + environment = match_open.group(1) + numbered = match_open.group(2) + closing = rf"\end{{{match_open.group(1)}{match_open.group(2)}}}" + + # start looking for the closing tag, including the current line + nextLine = startLine - 1 + + while True: + nextLine += 1 + if nextLine >= endLine: + # reached the end of the block without finding the closing tag + return False + + next_start = state.bMarks[nextLine] + state.tShift[nextLine] + next_end = state.eMarks[nextLine] + if next_start < first_end and state.sCount[nextLine] < state.blkIndent: + # non-empty line with negative indent should stop the list: + # - \begin{align} + # test + return False + + if state.src[next_start:next_end].rstrip().endswith(closing): + # found the closing tag break - line += 1 + + state.line = nextLine + 1 if not silent: token = state.push("amsmath", "math", 0) token.block = True - token.content = state.src[begin:endpos] + token.content = state.getLines( + startLine, state.line, state.sCount[startLine], False + ) token.meta = {"environment": environment, "numbered": numbered} - token.map = [startLine, line] + token.map = [startLine, nextLine] return True diff --git a/tests/fixtures/amsmath.md b/tests/fixtures/amsmath.md index ed83488..0bea6ab 100644 --- a/tests/fixtures/amsmath.md +++ b/tests/fixtures/amsmath.md @@ -11,6 +11,15 @@ a = 1 . +equation environment on one line: +. +\begin{equation}a = 1\end{equation} +. +
+\begin{equation}a = 1\end{equation} +
+. + equation* environment: . \begin{equation*} @@ -181,13 +190,43 @@ equation environment, in list:
  • \begin{equation} - a = 1 - \end{equation} +a = 1 +\end{equation}
  • . +equation environment, in block quote: +. +> \begin{matrix} +> -0.707 & 0.408 & 0.577 \\ +> -0.707 & -0.408 & -0.577 \\ +> -0. & -0.816 & 0.577 +> \end{matrix} + +> \begin{equation} +a = 1 +\end{equation} +. +
    +
    +\begin{matrix} +-0.707 & 0.408 & 0.577 \\ +-0.707 & -0.408 & -0.577 \\ +-0. & -0.816 & 0.577 +\end{matrix} +
    +
    +
    +
    +\begin{equation} +a = 1 +\end{equation} +
    +
    +. + `alignat` environment and HTML escaping . \begin{alignat}{3} @@ -242,7 +281,7 @@ Indented by 4 spaces, DISABLE-CODEBLOCKS .
    \begin{equation} - a = 1 - \end{equation} +a = 1 +\end{equation}
    .