From c03724b550ab4735d39a8d6cd159631550e1e73c Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Wed, 24 Apr 2024 11:39:10 -0400
Subject: [PATCH 1/5] Refactor `abbr` Extension

A new `AbbrTreeprocessor` has been introduced, which replaces the now
deprecated `AbbrInlineProcessor`. Abbreviation processing now happens
after Attribute Lists, avoiding a conflict between the two extensions.
Fixes #1460.
---
 docs/changelog.md                         |  8 +++
 markdown/extensions/abbr.py               | 75 ++++++++++++++++++-----
 tests/test_syntax/extensions/test_abbr.py | 57 ++++++++++++++++-
 3 files changed, 124 insertions(+), 16 deletions(-)
diff --git a/docs/changelog.md b/docs/changelog.md
index 86b3b5fd..25476435 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -10,6 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [unreleased]
 
+### Changed
+
+#### Refactor `abbr` Extension
+
+A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated
+`AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists,
+avoiding a conflict between the two extensions (#1460).
+
 ### Fixed
 
 * Fixed links to source code on GitHub from the documentation (#1453).
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index 1c7185b2..6dbfcf51 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -25,7 +25,8 @@
 from . import Extension
 from ..blockprocessors import BlockProcessor
 from ..inlinepatterns import InlineProcessor
-from ..util import AtomicString
+from ..treeprocessors import Treeprocessor
+from ..util import AtomicString, deprecated
 import re
 import xml.etree.ElementTree as etree
 
@@ -34,22 +35,71 @@ class AbbrExtension(Extension):
     """ Abbreviation Extension for Python-Markdown. """
 
     def extendMarkdown(self, md):
-        """ Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
-        md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16)
-
-
-class AbbrPreprocessor(BlockProcessor):
-    """ Abbreviation Preprocessor - parse text for abbr references. """
+        """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
+        treeprocessor = AbbrTreeprocessor(md)
+        md.treeprocessors.register(treeprocessor, 'abbr', 7)
+        md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, treeprocessor.abbrs), 'abbr', 16)
+
+
+class AbbrTreeprocessor(Treeprocessor):
+    """ Replace abbr text with `<abbr>` elements. """
+
+    def __init__(self, md: Markdown | None=None):
+        self.abbrs = {}
+        self.RE = None
+        super().__init__(md)
+
+    def iter_element(self, el, parent=None):
+        ''' Resursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
+        for child in reversed(el):
+            self.iter_element(child, el)
+        if text := el.text:
+            for m in reversed(list(self.RE.finditer(text))):
+                abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
+                abbr.text = AtomicString(m.group(0))
+                abbr.tail = text[m.end():]
+                el.insert(0, abbr)
+                text = text[:m.start()]
+            el.text = text
+        if parent and el.tail:
+            tail = el.tail
+            index = list(parent).index(el) + 1
+            for m in reversed(list(self.RE.finditer(tail))):
+                abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
+                abbr.text = AtomicString(m.group(0))
+                abbr.tail = tail[m.end():]
+                parent.insert(index, abbr)
+                tail = tail[:m.start()]
+            el.tail = tail
+
+    def run(self, root: etree.Element) -> etree.Element | None:
+        ''' Step through tree to find known abbreviations. '''
+        if not self.abbrs:
+            # No abbrs defined. Skip running processor.
+            return
+        # Build and compile regex
+        self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in self.abbrs.keys()) })\\b")
+        # Step through tree and modify on matches
+        self.iter_element(root)
+        return
+
+
+class AbbrBlockprocessor(BlockProcessor):
+    """ Abbreviation Blockprocessor - parse text for abbr references. """
 
     RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
 
+    def __init__(self, parser, abbrs):
+        self.abbrs = abbrs
+        super().__init__(parser)
+
     def test(self, parent: etree.Element, block: str) -> bool:
         return True
 
     def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         """
         Find and remove all Abbreviation references from the text.
-        Each reference is set as a new `AbbrPattern` in the markdown instance.
+        Each reference is added to the abbrs collection.
 
         """
         block = blocks.pop(0)
@@ -57,9 +107,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         if m:
             abbr = m.group('abbr').strip()
             title = m.group('title').strip()
-            self.parser.md.inlinePatterns.register(
-                AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
-            )
+            self.abbrs[abbr] = title
             if block[m.end():].strip():
                 # Add any content after match back to blocks as separate block
                 blocks.insert(0, block[m.end():].lstrip('\n'))
@@ -71,11 +119,8 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         blocks.insert(0, block)
         return False
 
-    def _generate_pattern(self, text: str) -> str:
-        """ Given a string, returns a regex pattern to match that string. """
-        return f"(?P<abbr>\\b{ re.escape(text) }\\b)"
-
 
+@deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.")
 class AbbrInlineProcessor(InlineProcessor):
     """ Abbreviation inline pattern. """
 
diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py
index e11e8d30..61021368 100644
--- a/tests/test_syntax/extensions/test_abbr.py
+++ b/tests/test_syntax/extensions/test_abbr.py
@@ -60,7 +60,7 @@ def test_abbr_lower(self):
             )
         )
 
-    def test_abbr_multiple(self):
+    def test_abbr_multiple_in_text(self):
         self.assertMarkdownRenders(
             self.dedent(
                 """
@@ -79,6 +79,44 @@ def test_abbr_multiple(self):
             )
         )
 
+    def test_abbr_multiple_in_tail(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                *The* HTML specification
+                is maintained by the W3C.
+
+                *[HTML]: Hyper Text Markup Language
+                *[W3C]:  World Wide Web Consortium
+                """
+            ),
+            self.dedent(
+                """
+                <p><em>The</em> <abbr title="Hyper Text Markup Language">HTML</abbr> specification
+                is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p>
+                """
+            )
+        )
+
+    def test_abbr_multiple_nested(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                The *HTML* specification
+                is maintained by the *W3C*.
+
+                *[HTML]: Hyper Text Markup Language
+                *[W3C]:  World Wide Web Consortium
+                """
+            ),
+            self.dedent(
+                """
+                <p>The <em><abbr title="Hyper Text Markup Language">HTML</abbr></em> specification
+                is maintained by the <em><abbr title="World Wide Web Consortium">W3C</abbr></em>.</p>
+                """
+            )
+        )
+
     def test_abbr_override(self):
         self.assertMarkdownRenders(
             self.dedent(
@@ -325,3 +363,20 @@ def test_abbr_bracket(self):
                 """
             )
         )
+
+    def test_abbr_with_attr_list(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                *[abbr]: Abbreviation Definition
+
+                ![Image with abbr in title](abbr.png){title="Image with abbr in title"}
+                """
+            ),
+            self.dedent(
+                """
+                <p><img alt="Image with abbr in title" src="abbr.png" title="Image with abbr in title" /></p>
+                """
+            ),
+            extensions = ['abbr', 'attr_list']
+        )

From 3110977699f4f229089f24dafcd7bd142a651318 Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Wed, 24 Apr 2024 13:47:59 -0400
Subject: [PATCH 2/5] cleanup

---
 markdown/extensions/abbr.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index 6dbfcf51..e8b827a8 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -27,9 +27,14 @@
 from ..inlinepatterns import InlineProcessor
 from ..treeprocessors import Treeprocessor
 from ..util import AtomicString, deprecated
+from typing import TYPE_CHECKING
 import re
 import xml.etree.ElementTree as etree
 
+if TYPE_CHECKING:  # pragma: no cover
+    from .. import Markdown
+    from ..blockparsers import BlockParser
+
 
 class AbbrExtension(Extension):
     """ Abbreviation Extension for Python-Markdown. """
@@ -42,15 +47,15 @@ def extendMarkdown(self, md):
 
 
 class AbbrTreeprocessor(Treeprocessor):
-    """ Replace abbr text with `<abbr>` elements. """
+    """ Replace abbreviation text with `<abbr>` elements. """
 
-    def __init__(self, md: Markdown | None=None):
+    def __init__(self, md: Markdown | None = None):
         self.abbrs = {}
         self.RE = None
         super().__init__(md)
 
-    def iter_element(self, el, parent=None):
-        ''' Resursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
+    def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -> None:
+        ''' Recursively iterate over elements, run regex on text and wrap matches in `abbr` tags. '''
         for child in reversed(el):
             self.iter_element(child, el)
         if text := el.text:
@@ -89,7 +94,7 @@ class AbbrBlockprocessor(BlockProcessor):
 
     RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
 
-    def __init__(self, parser, abbrs):
+    def __init__(self, parser: BlockParser, abbrs: dict):
         self.abbrs = abbrs
         super().__init__(parser)
 
@@ -98,8 +103,8 @@ def test(self, parent: etree.Element, block: str) -> bool:
 
     def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         """
-        Find and remove all Abbreviation references from the text.
-        Each reference is added to the abbrs collection.
+        Find and remove all abbreviation references from the text.
+        Each reference is added to the abbreviation collection.
 
         """
         block = blocks.pop(0)

From 7d9d4d2e7307eabc0a5ebee2caa8c15ff0efed6a Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Wed, 24 Apr 2024 14:37:23 -0400
Subject: [PATCH 3/5] reset

---
 docs/changelog.md                         |  2 ++
 markdown/extensions/abbr.py               | 27 +++++++++++++++--------
 tests/test_syntax/extensions/test_abbr.py | 16 +++++++++++++-
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 25476435..0802167c 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -18,6 +18,8 @@ A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated
 `AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists,
 avoiding a conflict between the two extensions (#1460).
 
+A call to `Markdown.reset()` now clears all previously defined abbreviations.
+
 ### Fixed
 
 * Fixed links to source code on GitHub from the documentation (#1453).
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index e8b827a8..ef157625 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -39,19 +39,28 @@
 class AbbrExtension(Extension):
     """ Abbreviation Extension for Python-Markdown. """
 
+    def __init__(self, **kwargs):
+        """ Initiate Extension and set up configs. """
+        super().__init__(**kwargs)
+        self.abbrs = {}
+
+    def reset(self):
+        """ Clear all previously defined abbreviations. """
+        self.abbrs.clear()
+
     def extendMarkdown(self, md):
         """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
-        treeprocessor = AbbrTreeprocessor(md)
-        md.treeprocessors.register(treeprocessor, 'abbr', 7)
-        md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, treeprocessor.abbrs), 'abbr', 16)
+        md.registerExtension(self)
+        md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
+        md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16)
 
 
 class AbbrTreeprocessor(Treeprocessor):
     """ Replace abbreviation text with `<abbr>` elements. """
 
-    def __init__(self, md: Markdown | None = None):
-        self.abbrs = {}
-        self.RE = None
+    def __init__(self, md: Markdown | None = None, abbrs: dict | None = None):
+        self.abbrs: dict = abbrs if abbrs is not None else {}
+        self.RE: re.RegexObject | None = None
         super().__init__(md)
 
     def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -> None:
@@ -80,7 +89,7 @@ def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -
     def run(self, root: etree.Element) -> etree.Element | None:
         ''' Step through tree to find known abbreviations. '''
         if not self.abbrs:
-            # No abbrs defined. Skip running processor.
+            # No abbreviations defined. Skip running processor.
             return
         # Build and compile regex
         self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in self.abbrs.keys()) })\\b")
@@ -90,12 +99,12 @@ def run(self, root: etree.Element) -> etree.Element | None:
 
 
 class AbbrBlockprocessor(BlockProcessor):
-    """ Abbreviation Blockprocessor - parse text for abbr references. """
+    """ Parse text for abbreviation references. """
 
     RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
 
     def __init__(self, parser: BlockParser, abbrs: dict):
-        self.abbrs = abbrs
+        self.abbrs: dict = abbrs
         super().__init__(parser)
 
     def test(self, parent: etree.Element, block: str) -> bool:
diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py
index 61021368..012e5718 100644
--- a/tests/test_syntax/extensions/test_abbr.py
+++ b/tests/test_syntax/extensions/test_abbr.py
@@ -21,6 +21,8 @@
 """
 
 from markdown.test_tools import TestCase
+from markdown import Markdown
+from markdown.extensions.abbr import AbbrExtension
 
 
 class TestAbbr(TestCase):
@@ -378,5 +380,17 @@ def test_abbr_with_attr_list(self):
                 <p><img alt="Image with abbr in title" src="abbr.png" title="Image with abbr in title" /></p>
                 """
             ),
-            extensions = ['abbr', 'attr_list']
+            extensions=['abbr', 'attr_list']
         )
+
+    def test_abbr_reset(self):
+        ext = AbbrExtension()
+        md = Markdown(extensions=[ext])
+        md.convert('*[abbr]: Abbreviation Definition')
+        self.assertEqual(ext.abbrs, {'abbr': 'Abbreviation Definition'})
+        md.convert('*[ABBR]: Capitalised Abbreviation')
+        self.assertEqual(ext.abbrs, {'abbr': 'Abbreviation Definition', 'ABBR': 'Capitalised Abbreviation'})
+        md.reset()
+        self.assertEqual(ext.abbrs, {})
+        md.convert('*[foo]: Foo Definition')
+        self.assertEqual(ext.abbrs, {'foo': 'Foo Definition'})

From 3b757e09bd6b72c940ee474d410f6ce0f589bf4d Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Wed, 24 Apr 2024 15:23:56 -0400
Subject: [PATCH 4/5] Note name change AbbrPreprocessor=>AbbrBlockprocessor

---
 docs/changelog.md           | 2 ++
 markdown/extensions/abbr.py | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/docs/changelog.md b/docs/changelog.md
index 0802167c..7a91c8ae 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -18,6 +18,8 @@ A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated
 `AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists,
 avoiding a conflict between the two extensions (#1460).
 
+The `AbbrPreprocessor` class has been renamed to `AbbrBlockprocessor`, which better reflects what it is. `AbbrPreprocessor` has been deprecated.
+
 A call to `Markdown.reset()` now clears all previously defined abbreviations.
 
 ### Fixed
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index ef157625..8458ade6 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -134,6 +134,9 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         return False
 
 
+AbbrPreprocessor = deprecated("This class has been renamed to `AbbrBlockprocessor`.")(AbbrBlockprocessor)
+
+
 @deprecated("This class will be removed in the future; use `AbbrTreeprocessor` instead.")
 class AbbrInlineProcessor(InlineProcessor):
     """ Abbreviation inline pattern. """

From 684f05f469b96c443f748022e9cfe66ff3d046be Mon Sep 17 00:00:00 2001
From: Waylan Limberg <waylan.limberg@icloud.com>
Date: Thu, 25 Apr 2024 09:41:23 -0400
Subject: [PATCH 5/5] final cleanup

---
 docs/changelog.md           | 3 ++-
 markdown/extensions/abbr.py | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 7a91c8ae..4c308899 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -18,7 +18,8 @@ A new `AbbrTreeprocessor` has been introduced, which replaces the now deprecated
 `AbbrInlineProcessor`. Abbreviation processing now happens after Attribute Lists,
 avoiding a conflict between the two extensions (#1460).
 
-The `AbbrPreprocessor` class has been renamed to `AbbrBlockprocessor`, which better reflects what it is. `AbbrPreprocessor` has been deprecated.
+The `AbbrPreprocessor` class has been renamed to `AbbrBlockprocessor`, which
+better reflects what it is. `AbbrPreprocessor` has been deprecated.
 
 A call to `Markdown.reset()` now clears all previously defined abbreviations.
 
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index 8458ade6..1f81cab3 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -92,10 +92,9 @@ def run(self, root: etree.Element) -> etree.Element | None:
             # No abbreviations defined. Skip running processor.
             return
         # Build and compile regex
-        self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in self.abbrs.keys()) })\\b")
+        self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in self.abbrs) })\\b")
         # Step through tree and modify on matches
         self.iter_element(root)
-        return
 
 
 class AbbrBlockprocessor(BlockProcessor):