From a7e53e7798a14234bc9212bd18e2930417dbbd41 Mon Sep 17 00:00:00 2001
From: Alessio Gravili <alessio@gravili.de>
Date: Wed, 16 Oct 2024 08:48:23 -0600
Subject: [PATCH] [lexical-markdown] Feature: add ability to hook into the
 import process for multiline element transformers (#6682)

Co-authored-by: Sherry <potatowagon@meta.com>
---
 .../lexical-markdown/src/MarkdownImport.ts    |  24 +++-
 .../src/MarkdownTransformers.ts               |  13 ++
 .../__tests__/unit/LexicalMarkdown.test.ts    | 117 ++++++++++++++++++
 3 files changed, 149 insertions(+), 5 deletions(-)
diff --git a/packages/lexical-markdown/src/MarkdownImport.ts b/packages/lexical-markdown/src/MarkdownImport.ts
index 99b7900b144..2f7dc27324b 100644
--- a/packages/lexical-markdown/src/MarkdownImport.ts
+++ b/packages/lexical-markdown/src/MarkdownImport.ts
@@ -117,16 +117,30 @@ function $importMultiline(
   multilineElementTransformers: Array<MultilineElementTransformer>,
   rootNode: ElementNode,
 ): [boolean, number] {
-  for (const {
-    regExpStart,
-    regExpEnd,
-    replace,
-  } of multilineElementTransformers) {
+  for (const transformer of multilineElementTransformers) {
+    const {handleImportAfterStartMatch, regExpEnd, regExpStart, replace} =
+      transformer;
+
     const startMatch = lines[startLineIndex].match(regExpStart);
     if (!startMatch) {
       continue; // Try next transformer
     }
 
+    if (handleImportAfterStartMatch) {
+      const result = handleImportAfterStartMatch({
+        lines,
+        rootNode,
+        startLineIndex,
+        startMatch,
+        transformer,
+      });
+      if (result === null) {
+        continue;
+      } else if (result) {
+        return result;
+      }
+    }
+
     const regexpEndRegex: RegExp | undefined =
       typeof regExpEnd === 'object' && 'regExp' in regExpEnd
         ? regExpEnd.regExp
diff --git a/packages/lexical-markdown/src/MarkdownTransformers.ts b/packages/lexical-markdown/src/MarkdownTransformers.ts
index b724b3ba0ca..2a335156213 100644
--- a/packages/lexical-markdown/src/MarkdownTransformers.ts
+++ b/packages/lexical-markdown/src/MarkdownTransformers.ts
@@ -75,6 +75,19 @@ export type ElementTransformer = {
 };
 
 export type MultilineElementTransformer = {
+  /**
+   * Use this function to manually handle the import process, once the `regExpStart` has matched successfully.
+   * Without providing this function, the default behavior is to match until `regExpEnd` is found, or until the end of the document if `regExpEnd.optional` is true.
+   *
+   * @returns a tuple or null. The first element of the returned tuple is a boolean indicating if a multiline element was imported. The second element is the index of the last line that was processed. If null is returned, the next multilineElementTransformer will be tried. If undefined is returned, the default behavior will be used.
+   */
+  handleImportAfterStartMatch?: (args: {
+    lines: Array<string>;
+    rootNode: ElementNode;
+    startLineIndex: number;
+    startMatch: RegExpMatchArray;
+    transformer: MultilineElementTransformer;
+  }) => [boolean, number] | null | undefined;
   dependencies: Array<Klass<LexicalNode>>;
   /**
    * `export` is called when the `$convertToMarkdownString` is called to convert the editor state into markdown.
diff --git a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts
index d4c1a90148a..be7199eefab 100644
--- a/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts
+++ b/packages/lexical-markdown/src/__tests__/unit/LexicalMarkdown.test.ts
@@ -23,6 +23,7 @@ import {
   TRANSFORMERS,
 } from '../..';
 import {
+  CODE,
   MultilineElementTransformer,
   normalizeMarkdown,
 } from '../../MarkdownTransformers';
@@ -58,6 +59,115 @@ const MDX_HTML_TRANSFORMER: MultilineElementTransformer = {
   type: 'multiline-element',
 };
 
+const CODE_TAG_COUNTER_EXAMPLE: MultilineElementTransformer = {
+  dependencies: CODE.dependencies,
+  export: CODE.export,
+  handleImportAfterStartMatch({lines, rootNode, startLineIndex, startMatch}) {
+    const regexpEndRegex: RegExp | undefined = /[ \t]*```$/;
+
+    const isEndOptional = false;
+
+    let endLineIndex = startLineIndex;
+    const linesLength = lines.length;
+
+    let openedSubStartMatches = 0;
+
+    // check every single line for the closing match. It could also be on the same line as the opening match.
+    while (endLineIndex < linesLength) {
+      const potentialSubStartMatch =
+        lines[endLineIndex].match(/^[ \t]*```(\w+)?/);
+
+      const endMatch = regexpEndRegex
+        ? lines[endLineIndex].match(regexpEndRegex)
+        : null;
+
+      if (potentialSubStartMatch) {
+        if (endMatch) {
+          if ((potentialSubStartMatch.index ?? 0) < (endMatch.index ?? 0)) {
+            openedSubStartMatches++;
+          }
+        } else {
+          openedSubStartMatches++;
+        }
+      }
+
+      if (endMatch) {
+        openedSubStartMatches--;
+      }
+
+      if (!endMatch || openedSubStartMatches > 0) {
+        if (
+          !isEndOptional ||
+          (isEndOptional && endLineIndex < linesLength - 1) // Optional end, but didn't reach the end of the document yet => continue searching for potential closing match
+        ) {
+          endLineIndex++;
+          continue; // Search next line for closing match
+        }
+      }
+
+      // Now, check if the closing match matched is the same as the opening match.
+      // If it is, we need to continue searching for the actual closing match.
+      if (
+        endMatch &&
+        startLineIndex === endLineIndex &&
+        endMatch.index === startMatch.index
+      ) {
+        endLineIndex++;
+        continue; // Search next line for closing match
+      }
+
+      // At this point, we have found the closing match. Next: calculate the lines in between open and closing match
+      // This should not include the matches themselves, and be split up by lines
+      const linesInBetween: string[] = [];
+
+      if (endMatch && startLineIndex === endLineIndex) {
+        linesInBetween.push(
+          lines[startLineIndex].slice(
+            startMatch[0].length,
+            -endMatch[0].length,
+          ),
+        );
+      } else {
+        for (let i = startLineIndex; i <= endLineIndex; i++) {
+          if (i === startLineIndex) {
+            const text = lines[i].slice(startMatch[0].length);
+            linesInBetween.push(text); // Also include empty text
+          } else if (i === endLineIndex && endMatch) {
+            const text = lines[i].slice(0, -endMatch[0].length);
+            linesInBetween.push(text); // Also include empty text
+          } else {
+            linesInBetween.push(lines[i]);
+          }
+        }
+      }
+
+      if (
+        CODE.replace(
+          rootNode,
+          null,
+          startMatch,
+          endMatch,
+          linesInBetween,
+          true,
+        ) !== false
+      ) {
+        // Return here. This $importMultiline function is run line by line and should only process a single multiline element at a time.
+        return [true, endLineIndex];
+      }
+
+      // The replace function returned false, despite finding the matching open and close tags => this transformer does not want to handle it.
+      // Thus, we continue letting the remaining transformers handle the passed lines of text from the beginning
+      break;
+    }
+
+    // No multiline transformer handled this line successfully
+    return [false, startLineIndex];
+  },
+  regExpStart: CODE.regExpStart,
+  replace: CODE.replace,
+  type: 'multiline-element',
+};
+
 describe('Markdown', () => {
   type Input = Array<{
     html: string;
@@ -344,6 +454,13 @@ describe('Markdown', () => {
       shouldMergeAdjacentLines: true,
       skipExport: true,
     },
+    {
+      customTransformers: [CODE_TAG_COUNTER_EXAMPLE],
+      // Ensure special ``` code block supports nested code blocks
+      html: '<pre spellcheck="false" data-language="ts" data-highlight-language="ts"><span style="white-space: pre-wrap;">Code\n```ts\nSub Code\n```</span></pre>',
+      md: '```ts\nCode\n```ts\nSub Code\n```\n```',
+      skipExport: true,
+    },
   ];
 
   const HIGHLIGHT_TEXT_MATCH_IMPORT: TextMatchTransformer = {