Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize glob functionality. #108

Merged
merged 7 commits into from
Dec 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions newsfragments/108.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactored glob functionality to support a more generalized solution with support for platform-specific path separators.
5 changes: 3 additions & 2 deletions zipp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re

from .py310compat import text_encoding
from .glob import translate
from .glob import Translator


__all__ = ['Path']
Expand Down Expand Up @@ -397,7 +397,8 @@ def glob(self, pattern):
raise ValueError(f"Unacceptable pattern: {pattern!r}")

prefix = re.escape(self.at)
matches = re.compile(prefix + translate(pattern)).fullmatch
tr = Translator(seps='/')
matches = re.compile(prefix + tr.translate(pattern)).fullmatch
return map(self._next, filter(matches, self.root.namelist()))

def rglob(self, pattern):
Expand Down
112 changes: 89 additions & 23 deletions zipp/glob.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,97 @@
import os
import re


def translate(pattern):
r"""
Given a glob pattern, produce a regex that matches it.
_default_seps = os.sep + str(os.altsep) * bool(os.altsep)

>>> translate('*.txt')
'[^/]*\\.txt'
>>> translate('a?txt')
'a[^/]txt'
>>> translate('**/*')
'.*/[^/]*'

class Translator:
"""
>>> Translator('xyz')
Traceback (most recent call last):
...
AssertionError: Invalid separators

>>> Translator('')
Traceback (most recent call last):
...
AssertionError: Invalid separators
"""
return ''.join(map(replace, separate(pattern)))

seps: str

def __init__(self, seps: str = _default_seps):
assert seps and set(seps) <= set(_default_seps), "Invalid separators"
self.seps = seps

def translate(self, pattern):
"""
Given a glob pattern, produce a regex that matches it.
"""
return self.extend(self.translate_core(pattern))

def extend(self, pattern):
r"""
Extend regex for pattern-wide concerns.

Apply '(?s:)' to create a non-matching group that
matches newlines (valid on Unix).

Append '\Z' to imply fullmatch even when match is used.
"""
return rf'(?s:{pattern})\Z'

def translate_core(self, pattern):
r"""
Given a glob pattern, produce a regex that matches it.

>>> t = Translator()
>>> t.translate_core('*.txt').replace('\\\\', '')
'[^/]*\\.txt'
>>> t.translate_core('a?txt')
'a[^/]txt'
>>> t.translate_core('**/*').replace('\\\\', '')
'.*/[^/][^/]*'
"""
self.restrict_rglob(pattern)
return ''.join(map(self.replace, separate(self.star_not_empty(pattern))))

def replace(self, match):
"""
Perform the replacements for a match from :func:`separate`.
"""
return match.group('set') or (
re.escape(match.group(0))
.replace('\\*\\*', r'.*')
.replace('\\*', rf'[^{re.escape(self.seps)}]*')
.replace('\\?', r'[^/]')
)

def restrict_rglob(self, pattern):
"""
Raise ValueError if ** appears in anything but a full path segment.

>>> Translator().translate('**foo')
Traceback (most recent call last):
...
ValueError: ** must appear alone in a path segment
"""
seps_pattern = rf'[{re.escape(self.seps)}]+'
segments = re.split(seps_pattern, pattern)
if any('**' in segment and segment != '**' for segment in segments):
raise ValueError("** must appear alone in a path segment")

def star_not_empty(self, pattern):
"""
Ensure that * will not match an empty segment.
"""

def handle_segment(match):
segment = match.group(0)
return '?*' if segment == '*' else segment

not_seps_pattern = rf'[^{re.escape(self.seps)}]+'
return re.sub(not_seps_pattern, handle_segment, pattern)


def separate(pattern):
Expand All @@ -25,16 +104,3 @@ def separate(pattern):
['a', '[?]', 'txt']
"""
return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)


def replace(match):
"""
Perform the replacements for a match from :func:`separate`.
"""

return match.group('set') or (
re.escape(match.group(0))
.replace('\\*\\*', r'.*')
.replace('\\*', r'[^/]*')
.replace('\\?', r'[^/]')
)
Loading