Merge pull request #3535 from Textualize/regex-error

Regex error
Textualize · Oct 22, 2024 · afcc5c5 · afcc5c5
2 parents 0f2f51b + 60f3b61
commit afcc5c5
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 3 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## [13.9.3] - 2024-10-22
+
+### Fixed
+
+- Fixed broken regex that may have resulted in poor performance. https://github.com/Textualize/rich/pull/3535
+
 ## [13.9.2] - 2024-10-04
 
 ### Fixed
@@ -2097,6 +2104,7 @@ Major version bump for a breaking change to `Text.stylize signature`, which corr
 
 - First official release, API still to be stabilized
 
+[13.9.3]: https://github.com/textualize/rich/compare/v13.9.2...v13.9.3
 [13.9.2]: https://github.com/textualize/rich/compare/v13.9.1...v13.9.2
 [13.9.1]: https://github.com/textualize/rich/compare/v13.9.0...v13.9.1
 [13.9.0]: https://github.com/textualize/rich/compare/v13.8.1...v13.9.0

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "rich"
 homepage = "https://github.com/Textualize/rich"
 documentation = "https://rich.readthedocs.io/en/latest/"
-version = "13.9.2"
+version = "13.9.3"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
 authors = ["Will McGugan <willmcgugan@gmail.com>"]
 license = "MIT"

diff --git a/rich/cells.py b/rich/cells.py
@@ -7,7 +7,9 @@
 from ._cell_widths import CELL_WIDTHS
 
 # Regex to match sequence of the most common character ranges
-_is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match
+_is_single_cell_widths = re.compile(
+    "^[\u0020-\u007e\u00a0-\u02ff\u0370-\u0482\u2500-\u25FF]*$"
+).match
 
 
 @lru_cache(4096)

diff --git a/rich/segment.py b/rich/segment.py
@@ -161,10 +161,14 @@ def split_cells(self, cut: int) -> Tuple["Segment", "Segment"]:
         If the cut point falls in the middle of a 2-cell wide character then it is replaced
         by two spaces, to preserve the display width of the parent segment.
 
+        Args:
+            cut (int): Offset within the segment to cut.
+
         Returns:
             Tuple[Segment, Segment]: Two segments.
         """
         text, style, control = self
+        assert cut >= 0
 
         if _is_single_cell_widths(text):
             # Fast path with all 1 cell characters

diff --git a/tests/test_cells.py b/tests/test_cells.py
@@ -1,5 +1,7 @@
+import string
+
 from rich import cells
-from rich.cells import chop_cells
+from rich.cells import _is_single_cell_widths, chop_cells
 
 
 def test_cell_len_long_string():
@@ -59,3 +61,21 @@ def test_chop_cells_mixed_width():
     """Mixed single and double-width characters."""
     text = "あ1り234が5と6う78"
     assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"]
+
+
+def test_is_single_cell_widths() -> None:
+    # Check _is_single_cell_widths reports correctly
+    for character in string.printable:
+        if ord(character) >= 32:
+            assert _is_single_cell_widths(character)
+
+    BOX = "┌─┬┐│ ││├─┼┤│ ││├─┼┤├─┼┤│ ││└─┴┘"
+
+    for character in BOX:
+        assert _is_single_cell_widths(character)
+
+    for character in "💩😽":
+        assert not _is_single_cell_widths(character)
+
+    for character in "わさび":
+        assert not _is_single_cell_widths(character)