Python-Markdown · waylan · Jun 11, 2024 · May 22, 2024 · May 23, 2024 · Jun 3, 2024
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -23,6 +23,24 @@ better reflects what it is. `AbbrPreprocessor` has been deprecated.
 
 A call to `Markdown.reset()` now clears all previously defined abbreviations.
 
+Abbreviations are now sorted by length before executing `AbbrTreeprocessor`
+to ensure that multi-word abbreviations are implemented even if an abbreviation
+exists for one of those component words. (#1465)
+
+Empty abbreviations are now skipped by `AbbrTreeprocessor`. This avoids applying
+abbr tags to text without a title value. This also allows disabling an
+abbreviation, which may be useful for documents that uses two terms with
+identical abbreviations.
+
+Added an optional `glossary` configuration option to the abbreviations extension.
+This provides a simple and efficient way to apply abbreviations to every page.
+
+Added an optional `use_last_abbr` configuration option to the abbreviations
+extension. Default (`True`) maintains the existing behavior. `False` causes
+the extension to only use the first instance of an abbreviation, rather than
+the last. 
+
+
 ### Fixed
 
 * Fixed links to source code on GitHub from the documentation (#1453).

diff --git a/docs/extensions/abbreviations.md b/docs/extensions/abbreviations.md
@@ -1,5 +1,10 @@
 title: Abbreviations Extension
 
+ABBR
+
+*[ABBR]: Abbreviation
+*[ABBR]: Override Ignored
+
 Abbreviations
 =============
 
@@ -46,7 +51,20 @@ Usage
 See [Extensions](index.md) for general extension usage. Use `abbr` as the name
 of the extension.
 
-This extension does not accept any special configuration options.
+The following options are provided to configure the output:
+
+* **`use_last_abbr`**:
+    `True` to use the last instance of an abbreviation, rather than the first instance.
+
+    This is useful when auto-appending glossary files to pages while still wanting the page's
+    abbreviations to take precedence. Not recommended for use with the `glossary` option.
+
+* **`glossary`**:
+    Path to a Markdown file containing abbreviations to be applied to every page.
+
+    The abbreviations from this file will be the default abbreviations applied to every page with
+    abbreviations defined on the page taking precedence (unless also using `use_last_abbr`). The
+    glossary syntax should use the same Markdown syntax described on this page.
 
 A trivial example:
 

diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
@@ -23,6 +23,7 @@
 from __future__ import annotations
 
 from . import Extension
+from ..util import parseBoolValue
 from ..blockprocessors import BlockProcessor
 from ..inlinepatterns import InlineProcessor
 from ..treeprocessors import Treeprocessor
@@ -41,18 +42,41 @@ class AbbrExtension(Extension):
 
     def __init__(self, **kwargs):
         """ Initiate Extension and set up configs. """
+        self.config = {
+            'glossary': [
+                {},
+                'A dictionary where the `key` is the abbreviation and the `value` is the definition.'
+                "Default: `{}`"
+            ],
+        }
+        """ Default configuration options. """
         super().__init__(**kwargs)
         self.abbrs = {}
+        self.glossary = {}
 
     def reset(self):
         """ Clear all previously defined abbreviations. """
         self.abbrs.clear()
+        if (self.glossary):
+            self.abbrs.update(self.glossary)
+
+    def reset_glossary(self):
+        """ Clear all abbreviations from the glossary. """
+        self.glossary.clear()
+
+    def load_glossary(self, dictionary : dict[str, str]):
+        """Adds `dictionary` to our glossary. Any abbreviations that already exist will be overwritten."""
+        if dictionary:
+            self.glossary = {**dictionary, **self.glossary}
 
     def extendMarkdown(self, md):
         """ Insert `AbbrTreeprocessor` and `AbbrBlockprocessor`. """
+        if (self.config['glossary'][0]):
+            self.load_glossary(self.config['glossary'][0])
+        self.abbrs.update(self.glossary)
         md.registerExtension(self)
         md.treeprocessors.register(AbbrTreeprocessor(md, self.abbrs), 'abbr', 7)
-        md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs), 'abbr', 16)
+        md.parser.blockprocessors.register(AbbrBlockprocessor(md.parser, self.abbrs, self.getConfigs()), 'abbr', 16)
 
 
 class AbbrTreeprocessor(Treeprocessor):
@@ -69,13 +93,14 @@ def iter_element(self, el: etree.Element, parent: etree.Element | None = None) -
             self.iter_element(child, el)
         if text := el.text:
             for m in reversed(list(self.RE.finditer(text))):
-                abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
-                abbr.text = AtomicString(m.group(0))
-                abbr.tail = text[m.end():]
-                el.insert(0, abbr)
-                text = text[:m.start()]
+                if self.abbrs[m.group(0)]:
+                    abbr = etree.Element('abbr', {'title': self.abbrs[m.group(0)]})
+                    abbr.text = AtomicString(m.group(0))
+                    abbr.tail = text[m.end():]
+                    el.insert(0, abbr)
+                    text = text[:m.start()]
             el.text = text
-        if parent and el.tail:
+        if parent is not None and el.tail:
             tail = el.tail
             index = list(parent).index(el) + 1
             for m in reversed(list(self.RE.finditer(tail))):
@@ -92,7 +117,9 @@ def run(self, root: etree.Element) -> etree.Element | None:
             # No abbreviations defined. Skip running processor.
             return
         # Build and compile regex
-        self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in self.abbrs) })\\b")
+        abbr_list = list(self.abbrs.keys())
+        abbr_list.sort(key=len, reverse=True)
+        self.RE = re.compile(f"\\b(?:{ '|'.join(re.escape(key) for key in abbr_list) })\\b")
         # Step through tree and modify on matches
         self.iter_element(root)
 
@@ -102,7 +129,7 @@ class AbbrBlockprocessor(BlockProcessor):
 
     RE = re.compile(r'^[*]\[(?P<abbr>[^\\]*?)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
 
-    def __init__(self, parser: BlockParser, abbrs: dict):
+    def __init__(self, parser: BlockParser, abbrs: dict, config: dict):
         self.abbrs: dict = abbrs
         super().__init__(parser)
 

diff --git a/tests/test_syntax/extensions/test_abbr.py b/tests/test_syntax/extensions/test_abbr.py
@@ -20,6 +20,7 @@
 License: BSD (see LICENSE.md for details).
 """
 
+import os
 from markdown.test_tools import TestCase
 from markdown import Markdown
 from markdown.extensions.abbr import AbbrExtension
@@ -119,21 +120,67 @@ def test_abbr_multiple_nested(self):
             )
         )
 
-    def test_abbr_override(self):
+    def test_abbr_glossary(self):
+
+        glossary = {
+            "ABBR" : "Abbreviation",
+            "abbr" : "Abbreviation",
+            "HTML" : "Hyper Text Markup Language",
+            "W3C" : "World Wide Web Consortium"
+        }
+
         self.assertMarkdownRenders(
             self.dedent(
                 """
                 ABBR
+                abbr
 
-                *[ABBR]: Ignored
-                *[ABBR]: The override
+                HTML
+                W3C
                 """
             ),
             self.dedent(
                 """
-                <p><abbr title="The override">ABBR</abbr></p>
+                <p><abbr title="Abbreviation">ABBR</abbr>
+                <abbr title="Abbreviation">abbr</abbr></p>
+                <p><abbr title="Hyper Text Markup Language">HTML</abbr>
+                <abbr title="World Wide Web Consortium">W3C</abbr></p>
                 """
-            )
+            ),
+            extensions=[AbbrExtension(glossary=glossary)]
+        )
+
+    def test_abbr_glossary_2(self):
+
+        glossary = {
+            "ABBR" : "Abbreviation",
+            "abbr" : "Abbreviation",
+            "HTML" : "Hyper Text Markup Language",
+            "W3C" : "World Wide Web Consortium"
+        }
+
+        glossary_2 = {
+            "ABBR" : "New Abbreviation"
+        }
+
+        abbr_ext = AbbrExtension(glossary=glossary)
+        abbr_ext.load_glossary(glossary_2)
+
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                ABBR abbr
+
+                HTML W3C
+                """
+            ),
+            self.dedent(
+                """
+                <p><abbr title="New Abbreviation">ABBR</abbr> <abbr title="Abbreviation">abbr</abbr></p>
+                <p><abbr title="Hyper Text Markup Language">HTML</abbr> <abbr title="World Wide Web Consortium">W3C</abbr></p>
+                """
+            ),
+            extensions=[abbr_ext]
         )
 
     def test_abbr_nested(self):
@@ -383,6 +430,47 @@ def test_abbr_with_attr_list(self):
             extensions=['abbr', 'attr_list']
         )
 
+    def test_abbr_superset_vs_subset(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                abbr, SS, and abbr-SS should have different definitions.
+
+                *[abbr]: Abbreviation Definition
+                *[abbr-SS]: Abbreviation Superset Definition
+                *[SS]: Superset Definition
+                """
+            ),
+            self.dedent(
+                """
+                <p><abbr title="Abbreviation Definition">abbr</abbr>, """
+                + """<abbr title="Superset Definition">SS</abbr>, """
+                + """and <abbr title="Abbreviation Superset Definition">abbr-SS</abbr> """
+                + """should have different definitions.</p>
+                """
+            )
+        )
+
+    def test_abbr_empty(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                """
+                *[abbr]: Abbreviation Definition
+
+                abbr
+
+                *[abbr]:
+
+                Testing document text.
+                """
+            ),
+            self.dedent(
+                """
+                <p>abbr</p>\n<p>Testing document text.</p>
+                """
+            )
+        )
+
     def test_abbr_reset(self):
         ext = AbbrExtension()
         md = Markdown(extensions=[ext])
@@ -394,3 +482,7 @@ def test_abbr_reset(self):
         self.assertEqual(ext.abbrs, {})
         md.convert('*[foo]: Foo Definition')
         self.assertEqual(ext.abbrs, {'foo': 'Foo Definition'})
+
+import unittest
+if __name__ == '__main__':
+    unittest.main()