From f10d098191b46292c3305c1dc084031499dc51c3 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 8 Oct 2022 15:28:49 -0400 Subject: [PATCH] gh-98098: Move zipfile into a package. --- Lib/{zipfile.py => zipfile/__init__.py} | 387 +----------------------- 1 file changed, 7 insertions(+), 380 deletions(-) rename Lib/{zipfile.py => zipfile/__init__.py} (88%) diff --git a/Lib/zipfile.py b/Lib/zipfile/__init__.py similarity index 88% rename from Lib/zipfile.py rename to Lib/zipfile/__init__.py index b6465373c107d50..ead0740a0eefec8 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile/__init__.py @@ -6,17 +6,13 @@ import binascii import importlib.util import io -import itertools import os -import posixpath import shutil import stat import struct import sys import threading import time -import contextlib -import pathlib try: import zlib # We may need its compression method @@ -2183,381 +2179,12 @@ def _compile(file, optimize=-1): return (fname, archivename) -def _parents(path): - """ - Given a path with elements separated by - posixpath.sep, generate all parents of that path. - - >>> list(_parents('b/d')) - ['b'] - >>> list(_parents('/b/d/')) - ['/b'] - >>> list(_parents('b/d/f/')) - ['b/d', 'b'] - >>> list(_parents('b')) - [] - >>> list(_parents('')) - [] - """ - return itertools.islice(_ancestry(path), 1, None) - - -def _ancestry(path): - """ - Given a path with elements separated by - posixpath.sep, generate all elements of that path - - >>> list(_ancestry('b/d')) - ['b/d', 'b'] - >>> list(_ancestry('/b/d/')) - ['/b/d', '/b'] - >>> list(_ancestry('b/d/f/')) - ['b/d/f', 'b/d', 'b'] - >>> list(_ancestry('b')) - ['b'] - >>> list(_ancestry('')) - [] - """ - path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: - yield path - path, tail = posixpath.split(path) - - -_dedupe = dict.fromkeys -"""Deduplicate an iterable in original order""" - - -def _difference(minuend, subtrahend): - """ - Return items in minuend not in subtrahend, retaining order - with O(1) lookup. - """ - return itertools.filterfalse(set(subtrahend).__contains__, minuend) - - -class CompleteDirs(ZipFile): - """ - A ZipFile subclass that ensures that implied directories - are always included in the namelist. - """ - - @staticmethod - def _implied_dirs(names): - parents = itertools.chain.from_iterable(map(_parents, names)) - as_dirs = (p + posixpath.sep for p in parents) - return _dedupe(_difference(as_dirs, names)) - - def namelist(self): - names = super(CompleteDirs, self).namelist() - return names + list(self._implied_dirs(names)) - - def _name_set(self): - return set(self.namelist()) - - def resolve_dir(self, name): - """ - If the name represents a directory, return that name - as a directory (with the trailing slash). - """ - names = self._name_set() - dirname = name + '/' - dir_match = name not in names and dirname in names - return dirname if dir_match else name - - @classmethod - def make(cls, source): - """ - Given a source (filename or zipfile), return an - appropriate CompleteDirs subclass. - """ - if isinstance(source, CompleteDirs): - return source - - if not isinstance(source, ZipFile): - return cls(source) - - # Only allow for FastLookup when supplied zipfile is read-only - if 'r' not in source.mode: - cls = CompleteDirs - - source.__class__ = cls - return source - - -class FastLookup(CompleteDirs): - """ - ZipFile subclass to ensure implicit - dirs exist and are resolved rapidly. - """ - - def namelist(self): - with contextlib.suppress(AttributeError): - return self.__names - self.__names = super(FastLookup, self).namelist() - return self.__names - - def _name_set(self): - with contextlib.suppress(AttributeError): - return self.__lookup - self.__lookup = super(FastLookup, self)._name_set() - return self.__lookup - - -class Path: - """ - A pathlib-compatible interface for zip files. - - Consider a zip file with this structure:: - - . - ├── a.txt - └── b - ├── c.txt - └── d - └── e.txt - - >>> data = io.BytesIO() - >>> zf = ZipFile(data, 'w') - >>> zf.writestr('a.txt', 'content of a') - >>> zf.writestr('b/c.txt', 'content of c') - >>> zf.writestr('b/d/e.txt', 'content of e') - >>> zf.filename = 'mem/abcde.zip' - - Path accepts the zipfile object itself or a filename - - >>> root = Path(zf) - - From there, several path operations are available. - - Directory iteration (including the zip file itself): +from ._path import ( # noqa: E402 + Path, - >>> a, b = root.iterdir() - >>> a - Path('mem/abcde.zip', 'a.txt') - >>> b - Path('mem/abcde.zip', 'b/') + # used privately for tests + CompleteDirs, # noqa: F401 +) - name property: - - >>> b.name - 'b' - - join with divide operator: - - >>> c = b / 'c.txt' - >>> c - Path('mem/abcde.zip', 'b/c.txt') - >>> c.name - 'c.txt' - - Read text: - - >>> c.read_text() - 'content of c' - - existence: - - >>> c.exists() - True - >>> (b / 'missing.txt').exists() - False - - Coercion to string: - - >>> import os - >>> str(c).replace(os.sep, posixpath.sep) - 'mem/abcde.zip/b/c.txt' - - At the root, ``name``, ``filename``, and ``parent`` - resolve to the zipfile. Note these attributes are not - valid and will raise a ``ValueError`` if the zipfile - has no filename. - - >>> root.name - 'abcde.zip' - >>> str(root.filename).replace(os.sep, posixpath.sep) - 'mem/abcde.zip' - >>> str(root.parent) - 'mem' - """ - - __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" - - def __init__(self, root, at=""): - """ - Construct a Path from a ZipFile or filename. - - Note: When the source is an existing ZipFile object, - its type (__class__) will be mutated to a - specialized type. If the caller wishes to retain the - original type, the caller should either create a - separate ZipFile object or pass a filename. - """ - self.root = FastLookup.make(root) - self.at = at - - def open(self, mode='r', *args, pwd=None, **kwargs): - """ - Open this entry as text or binary following the semantics - of ``pathlib.Path.open()`` by passing arguments through - to io.TextIOWrapper(). - """ - if self.is_dir(): - raise IsADirectoryError(self) - zip_mode = mode[0] - if not self.exists() and zip_mode == 'r': - raise FileNotFoundError(self) - stream = self.root.open(self.at, zip_mode, pwd=pwd) - if 'b' in mode: - if args or kwargs: - raise ValueError("encoding args invalid for binary operation") - return stream - else: - kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) - return io.TextIOWrapper(stream, *args, **kwargs) - - @property - def name(self): - return pathlib.Path(self.at).name or self.filename.name - - @property - def suffix(self): - return pathlib.Path(self.at).suffix or self.filename.suffix - - @property - def suffixes(self): - return pathlib.Path(self.at).suffixes or self.filename.suffixes - - @property - def stem(self): - return pathlib.Path(self.at).stem or self.filename.stem - - @property - def filename(self): - return pathlib.Path(self.root.filename).joinpath(self.at) - - def read_text(self, *args, **kwargs): - kwargs["encoding"] = io.text_encoding(kwargs.get("encoding")) - with self.open('r', *args, **kwargs) as strm: - return strm.read() - - def read_bytes(self): - with self.open('rb') as strm: - return strm.read() - - def _is_child(self, path): - return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") - - def _next(self, at): - return self.__class__(self.root, at) - - def is_dir(self): - return not self.at or self.at.endswith("/") - - def is_file(self): - return self.exists() and not self.is_dir() - - def exists(self): - return self.at in self.root._name_set() - - def iterdir(self): - if not self.is_dir(): - raise ValueError("Can't listdir a file") - subs = map(self._next, self.root.namelist()) - return filter(self._is_child, subs) - - def __str__(self): - return posixpath.join(self.root.filename, self.at) - - def __repr__(self): - return self.__repr.format(self=self) - - def joinpath(self, *other): - next = posixpath.join(self.at, *other) - return self._next(self.root.resolve_dir(next)) - - __truediv__ = joinpath - - @property - def parent(self): - if not self.at: - return self.filename.parent - parent_at = posixpath.dirname(self.at.rstrip('/')) - if parent_at: - parent_at += '/' - return self._next(parent_at) - - -def main(args=None): - import argparse - - description = 'A simple command-line interface for zipfile module.' - parser = argparse.ArgumentParser(description=description) - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('-l', '--list', metavar='', - help='Show listing of a zipfile') - group.add_argument('-e', '--extract', nargs=2, - metavar=('', ''), - help='Extract zipfile into target dir') - group.add_argument('-c', '--create', nargs='+', - metavar=('', ''), - help='Create zipfile from sources') - group.add_argument('-t', '--test', metavar='', - help='Test if a zipfile is valid') - parser.add_argument('--metadata-encoding', metavar='', - help='Specify encoding of member names for -l, -e and -t') - args = parser.parse_args(args) - - encoding = args.metadata_encoding - - if args.test is not None: - src = args.test - with ZipFile(src, 'r', metadata_encoding=encoding) as zf: - badfile = zf.testzip() - if badfile: - print("The following enclosed file is corrupted: {!r}".format(badfile)) - print("Done testing") - - elif args.list is not None: - src = args.list - with ZipFile(src, 'r', metadata_encoding=encoding) as zf: - zf.printdir() - - elif args.extract is not None: - src, curdir = args.extract - with ZipFile(src, 'r', metadata_encoding=encoding) as zf: - zf.extractall(curdir) - - elif args.create is not None: - if encoding: - print("Non-conforming encodings not supported with -c.", - file=sys.stderr) - sys.exit(1) - - zip_name = args.create.pop(0) - files = args.create - - def addToZip(zf, path, zippath): - if os.path.isfile(path): - zf.write(path, zippath, ZIP_DEFLATED) - elif os.path.isdir(path): - if zippath: - zf.write(path, zippath) - for nm in sorted(os.listdir(path)): - addToZip(zf, - os.path.join(path, nm), os.path.join(zippath, nm)) - # else: ignore - - with ZipFile(zip_name, 'w') as zf: - for path in files: - zippath = os.path.basename(path) - if not zippath: - zippath = os.path.basename(os.path.dirname(path)) - if zippath in ('', os.curdir, os.pardir): - zippath = '' - addToZip(zf, path, zippath) - - -if __name__ == "__main__": - main() +# used privately for tests +from .__main__ import main # noqa: F401, E402