From df8d4c83a6e1727e766191896aeabde886979587 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 25 Oct 2020 14:21:46 -0400 Subject: [PATCH] bpo-41490: ``path`` and ``contents`` to aggressively close handles (#22915) * bpo-41490: ``path`` method to aggressively close handles * Add blurb * In ZipReader.contents, eagerly evaluate the contents to release references to the zipfile. * Instead use _ensure_sequence to ensure any iterable from a reader is eagerly converted to a list if it's not already a sequence. --- Lib/importlib/_common.py | 5 +- Lib/importlib/readers.py | 6 +- Lib/importlib/resources.py | 37 +++++--- Lib/test/test_importlib/test_resource.py | 79 ++++++++++++++++++ .../test_importlib/zipdata01/ziptestdata.zip | Bin 876 -> 1131 bytes .../test_importlib/zipdata02/ziptestdata.zip | Bin 698 -> 698 bytes .../2020-10-23-08-54-47.bpo-41490.-Yk6OD.rst | 3 + 7 files changed, 114 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-10-23-08-54-47.bpo-41490.-Yk6OD.rst diff --git a/Lib/importlib/_common.py b/Lib/importlib/_common.py index b15c59eb9c98af..71ce6af8cc9851 100644 --- a/Lib/importlib/_common.py +++ b/Lib/importlib/_common.py @@ -88,6 +88,7 @@ def _tempfile(reader, suffix=''): try: os.write(fd, reader()) os.close(fd) + del reader yield pathlib.Path(raw_path) finally: try: @@ -97,14 +98,12 @@ def _tempfile(reader, suffix=''): @functools.singledispatch -@contextlib.contextmanager def as_file(path): """ Given a Traversable object, return that object as a path on the local file system in a context manager. """ - with _tempfile(path.read_bytes, suffix=path.name) as local: - yield local + return _tempfile(path.read_bytes, suffix=path.name) @as_file.register(pathlib.Path) diff --git a/Lib/importlib/readers.py b/Lib/importlib/readers.py index 6331e4daf4313c..74a63e4a7535bd 100644 --- a/Lib/importlib/readers.py +++ b/Lib/importlib/readers.py @@ -22,8 +22,8 @@ def files(self): class ZipReader(abc.TraversableResources): def __init__(self, loader, module): _, _, name = module.rpartition('.') - prefix = loader.prefix.replace('\\', '/') + name + '/' - self.path = zipfile.Path(loader.archive, prefix) + self.prefix = loader.prefix.replace('\\', '/') + name + '/' + self.archive = loader.archive def open_resource(self, resource): try: @@ -38,4 +38,4 @@ def is_resource(self, path): return target.is_file() and target.exists() def files(self): - return self.path + return zipfile.Path(self.archive, self.prefix) diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py index 4535619f4f0143..4169171b189cca 100644 --- a/Lib/importlib/resources.py +++ b/Lib/importlib/resources.py @@ -1,8 +1,9 @@ import os +import io from . import _common from ._common import as_file, files -from contextlib import contextmanager, suppress +from contextlib import suppress from importlib.abc import ResourceLoader from io import BytesIO, TextIOWrapper from pathlib import Path @@ -10,6 +11,8 @@ from typing import ContextManager, Iterable, Union from typing import cast from typing.io import BinaryIO, TextIO +from collections.abc import Sequence +from functools import singledispatch __all__ = [ @@ -102,22 +105,26 @@ def path( """ reader = _common.get_resource_reader(_common.get_package(package)) return ( - _path_from_reader(reader, resource) + _path_from_reader(reader, _common.normalize_path(resource)) if reader else _common.as_file( _common.files(package).joinpath(_common.normalize_path(resource))) ) -@contextmanager def _path_from_reader(reader, resource): - norm_resource = _common.normalize_path(resource) + return _path_from_resource_path(reader, resource) or \ + _path_from_open_resource(reader, resource) + + +def _path_from_resource_path(reader, resource): with suppress(FileNotFoundError): - yield Path(reader.resource_path(norm_resource)) - return - opener_reader = reader.open_resource(norm_resource) - with _common._tempfile(opener_reader.read, suffix=norm_resource) as res: - yield res + return Path(reader.resource_path(resource)) + + +def _path_from_open_resource(reader, resource): + saved = io.BytesIO(reader.open_resource(resource).read()) + return _common._tempfile(saved.read, suffix=resource) def is_resource(package: Package, name: str) -> bool: @@ -146,7 +153,7 @@ def contents(package: Package) -> Iterable[str]: package = _common.get_package(package) reader = _common.get_resource_reader(package) if reader is not None: - return reader.contents() + return _ensure_sequence(reader.contents()) # Is the package a namespace package? By definition, namespace packages # cannot have resources. namespace = ( @@ -156,3 +163,13 @@ def contents(package: Package) -> Iterable[str]: if namespace or not package.__spec__.has_location: return () return list(item.name for item in _common.from_package(package).iterdir()) + + +@singledispatch +def _ensure_sequence(iterable): + return list(iterable) + + +@_ensure_sequence.register(Sequence) +def _(iterable): + return iterable diff --git a/Lib/test/test_importlib/test_resource.py b/Lib/test/test_importlib/test_resource.py index f88d92d1546729..1d1bdad1b218df 100644 --- a/Lib/test/test_importlib/test_resource.py +++ b/Lib/test/test_importlib/test_resource.py @@ -1,10 +1,14 @@ import sys import unittest +import uuid +import pathlib from . import data01 from . import zipdata01, zipdata02 from . import util from importlib import resources, import_module +from test.support import import_helper +from test.support.os_helper import unlink class ResourceTests: @@ -162,5 +166,80 @@ def test_namespaces_cannot_have_resources(self): 'test.test_importlib.data03.namespace', 'resource1.txt') +class DeletingZipsTest(unittest.TestCase): + """Having accessed resources in a zip file should not keep an open + reference to the zip. + """ + ZIP_MODULE = zipdata01 + + def setUp(self): + modules = import_helper.modules_setup() + self.addCleanup(import_helper.modules_cleanup, *modules) + + data_path = pathlib.Path(self.ZIP_MODULE.__file__) + data_dir = data_path.parent + self.source_zip_path = data_dir / 'ziptestdata.zip' + self.zip_path = pathlib.Path('{}.zip'.format(uuid.uuid4())).absolute() + self.zip_path.write_bytes(self.source_zip_path.read_bytes()) + sys.path.append(str(self.zip_path)) + self.data = import_module('ziptestdata') + + def tearDown(self): + try: + sys.path.remove(str(self.zip_path)) + except ValueError: + pass + + try: + del sys.path_importer_cache[str(self.zip_path)] + del sys.modules[self.data.__name__] + except KeyError: + pass + + try: + unlink(self.zip_path) + except OSError: + # If the test fails, this will probably fail too + pass + + def test_contents_does_not_keep_open(self): + c = resources.contents('ziptestdata') + self.zip_path.unlink() + del c + + def test_is_resource_does_not_keep_open(self): + c = resources.is_resource('ziptestdata', 'binary.file') + self.zip_path.unlink() + del c + + def test_is_resource_failure_does_not_keep_open(self): + c = resources.is_resource('ziptestdata', 'not-present') + self.zip_path.unlink() + del c + + @unittest.skip("Desired but not supported.") + def test_path_does_not_keep_open(self): + c = resources.path('ziptestdata', 'binary.file') + self.zip_path.unlink() + del c + + def test_entered_path_does_not_keep_open(self): + # This is what certifi does on import to make its bundle + # available for the process duration. + c = resources.path('ziptestdata', 'binary.file').__enter__() + self.zip_path.unlink() + del c + + def test_read_binary_does_not_keep_open(self): + c = resources.read_binary('ziptestdata', 'binary.file') + self.zip_path.unlink() + del c + + def test_read_text_does_not_keep_open(self): + c = resources.read_text('ziptestdata', 'utf-8.file', encoding='utf-8') + self.zip_path.unlink() + del c + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_importlib/zipdata01/ziptestdata.zip b/Lib/test/test_importlib/zipdata01/ziptestdata.zip index 8d8fa97f199cc29f6905404ea05f88926658ee2b..12f7872cd596ace39602223cc127ad6c13bbca89 100644 GIT binary patch literal 1131 zcmaJf(O}q#y>WP~hS1_KM0; zZZ9ZMoby*q+jcU^M!lRZCWnq|8>yJcgV5iQg=(7BvzVsk`Xdpvb$K(1B8*Z7wXI&u z9nGslC>#lZwb`ATC>_0DAc%4YledMdrbk{kVrS>iO&!0PM+nyvrbi_Wfa5fs)Y7dbLRJa2Gynh4&re;NK57z)QD%O}fMH67EA*Tv15S`|s;=?O?$3rK!uN#^h| z^rj*3V>+JUmKVZ#EtBxBh$Jr(31EuADn$6uxHab9Y5Jpv#H&I$@@?0h!h$U;m zW}lx}BPkHz&B!FejBx4XgUk{vTN=d~C+o9FPyWFyu7Ga23e098y`?b{q)?BKje(6} NJ0kGw*zAHbvki50HhIS zpApCbKmlY2FfxfS;|@5WZ@^$lBZ$HojJOQ}1tbhCX*2~gkqyBJR#bz~0~TRW9u|XO zz61wzfH$g9Fs&dO)lJ*bjKT&?E(z02lENC@Ig!?SR;Polaa10BMBT zX9RKpPypEhj7%cTxC0L88!*_?2%>NXBW^=L0SN