From 6a830016d7512cf1183b325b827629eeb22b56a1 Mon Sep 17 00:00:00 2001 From: Sean Gillies Date: Wed, 28 Feb 2024 20:01:53 -0700 Subject: [PATCH 1/2] Add rasterio's python opener from 1.4a1 and supporting things * Modern _path module and updated usage * A setup.cfg for editable installs * A Dockerfile and Makefile for testing Resolves #1328 --- Dockerfile | 30 +++ Makefile | 48 ++++ fiona/__init__.py | 134 ++++++----- fiona/_env.pyx | 4 +- fiona/_path.py | 217 ++++++++++++++++++ fiona/_vsiopener.pxd | 4 + fiona/_vsiopener.pyx | 491 +++++++++++++++++++++++++++++++++++++++++ fiona/collection.py | 11 +- fiona/errors.py | 8 + fiona/gdal.pxi | 181 +++++++++------ fiona/ogrext.pyx | 2 +- fiona/path.py | 201 ++--------------- fiona/session.py | 6 +- pyproject.toml | 9 +- requirements-dev.txt | 4 + requirements.txt | 1 + setup.cfg | 17 ++ setup.py | 7 + tests/test_pyopener.py | 27 +++ 19 files changed, 1080 insertions(+), 322 deletions(-) create mode 100644 Dockerfile create mode 100644 Makefile create mode 100644 fiona/_path.py create mode 100644 fiona/_vsiopener.pxd create mode 100644 fiona/_vsiopener.pyx create mode 100644 setup.cfg create mode 100644 tests/test_pyopener.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..e5eaa453e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +ARG GDAL=ubuntu-small-3.6.4 +FROM ghcr.io/osgeo/gdal:${GDAL} AS gdal +ARG PYTHON_VERSION=3.9 +ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" +RUN apt-get update && apt-get install -y software-properties-common +RUN add-apt-repository -y ppa:deadsnakes/ppa +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + g++ \ + gdb \ + make \ + python3-pip \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-dev \ + python${PYTHON_VERSION}-venv \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY requirements*.txt ./ +RUN python${PYTHON_VERSION} -m venv /venv && \ + /venv/bin/python -m pip install -U build pip && \ + /venv/bin/python -m pip install -r requirements-dev.txt && \ + /venv/bin/python -m pip list + +FROM gdal +COPY . . +RUN /venv/bin/python -m build -o wheels +RUN /venv/bin/python -m pip install --no-index -f wheels fiona[test] +ENTRYPOINT ["/venv/bin/fio"] +CMD ["--help"] diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..bcd5874c6 --- /dev/null +++ b/Makefile @@ -0,0 +1,48 @@ +PYTHON_VERSION ?= 3.9 +GDAL ?= ubuntu-small-3.6.4 +all: deps clean install test + +.PHONY: docs + +install: + python setup.py build_ext + pip install -e .[all] + +deps: + pip install -r requirements-dev.txt + +clean: + pip uninstall -y fiona || echo "no need to uninstall" + python setup.py clean --all + find . -name '__pycache__' -delete -print -o -name '*.pyc' -delete -print + touch fiona/*.pyx + +sdist: + python setup.py sdist + +test: + python -m pytest --maxfail 1 -v --cov fiona --cov-report html --pdb tests + +docs: + cd docs && make apidocs && make html + +doctest: + py.test --doctest-modules fiona --doctest-glob='*.rst' docs/*.rst + +dockertestimage: + docker build --target gdal --build-arg GDAL=$(GDAL) --build-arg PYTHON_VERSION=$(PYTHON_VERSION) -t fiona:$(GDAL)-py$(PYTHON_VERSION) . + +dockertest: dockertestimage + docker run -it -v $(shell pwd):/app -v /tmp:/tmp --env AWS_ACCESS_KEY_ID --env AWS_SECRET_ACCESS_KEY --entrypoint=/bin/bash fiona:$(GDAL)-py$(PYTHON_VERSION) -c '/venv/bin/python -m pip install --editable . --no-build-isolation && /venv/bin/python -B -m pytest -m "not wheel" --cov fiona --cov-report term-missing $(OPTS)' + +dockershell: dockertestimage + docker run -it -v $(shell pwd):/app --env AWS_ACCESS_KEY_ID --env AWS_SECRET_ACCESS_KEY --entrypoint=/bin/bash fiona:$(GDAL)-py$(PYTHON_VERSION) -c '/venv/bin/python -m pip install --editable . --no-build-isolation && /bin/bash' + +dockersdist: dockertestimage + docker run -it -v $(shell pwd):/app --env AWS_ACCESS_KEY_ID --env AWS_SECRET_ACCESS_KEY --entrypoint=/bin/bash fiona:$(GDAL)-py$(PYTHON_VERSION) -c '/venv/bin/python -m build --sdist' + +dockergdb: dockertestimage + docker run -it -v $(shell pwd):/app --env AWS_ACCESS_KEY_ID --env AWS_SECRET_ACCESS_KEY --entrypoint=/bin/bash fiona:$(GDAL)-py$(PYTHON_VERSION) -c '/venv/bin/python -m pip install --editable . --no-build-isolation && gdb -ex=r --args /venv/bin/python -B -m pytest -m "not wheel" --cov fiona --cov-report term-missing $(OPTS)' + +dockerdocs: dockertestimage + docker run -it -v $(shell pwd):/app --entrypoint=/bin/bash fiona:$(GDAL)-py$(PYTHON_VERSION) -c 'source /venv/bin/activate && cd docs && make clean && make html' diff --git a/fiona/__init__.py b/fiona/__init__.py index e49597eb4..ab1e4cc93 100644 --- a/fiona/__init__.py +++ b/fiona/__init__.py @@ -20,6 +20,7 @@ """ +from contextlib import ExitStack import glob import logging import os @@ -46,6 +47,7 @@ ) from fiona._env import driver_count from fiona._show_versions import show_versions +from fiona._vsiopener import _opener_registration from fiona.collection import BytesCollection, Collection from fiona.drvsupport import supported_drivers from fiona.env import ensure_env_with_credentials, Env @@ -60,7 +62,7 @@ _remove, _remove_layer, ) -from fiona.path import ParsedPath, parse_path, vsi_path +from fiona._path import _ParsedPath, _UnparsedPath, _parse_path, _vsi_path from fiona.vfs import parse_paths as vfs_parse_paths # These modules are imported by fiona.ogrext, but are also import here to @@ -82,7 +84,7 @@ "remove", ] -__version__ = "2.0dev" +__version__ = "1.10dev" __gdal_version__ = get_gdal_release_name() gdal_version = get_gdal_version_tuple() @@ -104,6 +106,7 @@ def open( enabled_drivers=None, crs_wkt=None, allow_unsupported_drivers=False, + opener=None, **kwargs ): """Open a collection for read, append, or write @@ -191,6 +194,19 @@ def open( Defaults to GDAL's default (WKT1_GDAL for GDAL 3). allow_unsupported_drivers : bool If set to true do not limit GDAL drivers to set set of known working. + opener : callable or obj, optional + A custom dataset opener which can serve GDAL's virtual + filesystem machinery via Python file-like objects. The + underlying file-like object is obtained by calling *opener* with + (*fp*, *mode*) or (*fp*, *mode* + "b") depending on the format + driver's native mode. *opener* must return a Python file-like + object that provides read, seek, tell, and close methods. Note: + only one opener at a time per fp, mode pair is allowed. + + Alternatively, opener may be a filesystem object from a package + like fsspec that provides the following methods: isdir(), + isfile(), ls(), mtime(), open(), and size(). The exact interface + is defined in the fiona._vsiopener._AbstractOpener class. kwargs : mapping Other driver-specific parameters that will be interpreted by the OGR library as layer creation or opening options. @@ -273,49 +289,67 @@ def func(*args, **kwds): # At this point, the fp argument is a string or path-like object # which can be converted to a string. else: - # If a pathlib.Path instance is given, convert it to a string path. - if isinstance(fp, Path): - fp = str(fp) + stack = ExitStack() - if vfs: - warnings.warn( - "The vfs keyword argument is deprecated and will be removed in version 2.0.0. Instead, pass a URL that uses a zip or tar (for example) scheme.", - FionaDeprecationWarning, - stacklevel=2, - ) - path, scheme, archive = vfs_parse_paths(fp, vfs=vfs) - path = ParsedPath(path, archive, scheme) + if hasattr(fp, "path") and hasattr(fp, "fs"): + log.debug("Detected fp is an OpenFile: fp=%r", fp) + raw_dataset_path = fp.path + opener = fp.fs.open else: - path = parse_path(fp) - - if mode in ("a", "r"): - colxn = Collection( - path, - mode, - driver=driver, - encoding=encoding, - layer=layer, - enabled_drivers=enabled_drivers, - allow_unsupported_drivers=allow_unsupported_drivers, - **kwargs - ) - elif mode == "w": - colxn = Collection( - path, - mode, - crs=crs, - driver=driver, - schema=schema, - encoding=encoding, - layer=layer, - enabled_drivers=enabled_drivers, - crs_wkt=crs_wkt, - allow_unsupported_drivers=allow_unsupported_drivers, - **kwargs - ) - else: - raise ValueError("mode string must be one of {'r', 'w', 'a'}") - + raw_dataset_path = os.fspath(fp) + + try: + if opener: + log.debug("Registering opener: raw_dataset_path=%r, mode=%r, opener=%r", raw_dataset_path, mode, opener) + vsi_path_ctx = _opener_registration(raw_dataset_path, mode[0], opener) + registered_vsi_path = stack.enter_context(vsi_path_ctx) + log.debug("Registered vsi path: registered_vsi_path%r", registered_vsi_path) + path = _UnparsedPath(registered_vsi_path) + else: + if vfs: + warnings.warn( + "The vfs keyword argument is deprecated and will be removed in version 2.0.0. Instead, pass a URL that uses a zip or tar (for example) scheme.", + FionaDeprecationWarning, + stacklevel=2, + ) + path, scheme, archive = vfs_parse_paths(fp, vfs=vfs) + path = _ParsedPath(path, archive, scheme) + else: + path = _parse_path(fp) + + if mode in ("a", "r"): + colxn = Collection( + path, + mode, + driver=driver, + encoding=encoding, + layer=layer, + enabled_drivers=enabled_drivers, + allow_unsupported_drivers=allow_unsupported_drivers, + **kwargs + ) + elif mode == "w": + colxn = Collection( + path, + mode, + crs=crs, + driver=driver, + schema=schema, + encoding=encoding, + layer=layer, + enabled_drivers=enabled_drivers, + crs_wkt=crs_wkt, + allow_unsupported_drivers=allow_unsupported_drivers, + **kwargs + ) + else: + raise ValueError("mode string must be one of {'r', 'w', 'a'}") + + except Exception: + stack.close() + raise + + colxn._env = stack return colxn @@ -392,8 +426,8 @@ def listdir(fp): if not isinstance(fp, str): raise TypeError("invalid path: %r" % fp) - pobj = parse_path(fp) - return _listdir(vsi_path(pobj)) + pobj = _parse_path(fp) + return _listdir(_vsi_path(pobj)) @ensure_env_with_credentials @@ -442,13 +476,13 @@ def listlayers(fp, vfs=None, **kwargs): FionaDeprecationWarning, stacklevel=2, ) - pobj_vfs = parse_path(vfs) - pobj_path = parse_path(fp) - pobj = ParsedPath(pobj_path.path, pobj_vfs.path, pobj_vfs.scheme) + pobj_vfs = _parse_path(vfs) + pobj_path = _parse_path(fp) + pobj = _ParsedPath(pobj_path.path, pobj_vfs.path, pobj_vfs.scheme) else: - pobj = parse_path(fp) + pobj = _parse_path(fp) - return _listlayers(vsi_path(pobj), **kwargs) + return _listlayers(_vsi_path(pobj), **kwargs) def prop_width(val): diff --git a/fiona/_env.pyx b/fiona/_env.pyx index af0603cc6..ea3f47bd4 100644 --- a/fiona/_env.pyx +++ b/fiona/_env.pyx @@ -17,6 +17,7 @@ import threading from fiona._err cimport exc_wrap_int, exc_wrap_ogrerr from fiona._err import CPLE_BaseError +from fiona._vsiopener cimport install_pyopener_plugin from fiona.errors import EnvError level_map = { @@ -60,7 +61,7 @@ except ImportError: pass - +cdef VSIFilesystemPluginCallbacksStruct* pyopener_plugin = NULL cdef bint is_64bit = sys.maxsize > 2 ** 32 cdef void set_proj_search_path(object path): @@ -408,6 +409,7 @@ cdef class GDALEnv(ConfigEnv): GDALAllRegister() OGRRegisterAll() + install_pyopener_plugin(pyopener_plugin) if 'GDAL_DATA' in os.environ: log.debug("GDAL_DATA found in environment.") diff --git a/fiona/_path.py b/fiona/_path.py new file mode 100644 index 000000000..045a62a09 --- /dev/null +++ b/fiona/_path.py @@ -0,0 +1,217 @@ +"""Dataset paths, identifiers, and filenames + +Note: this module is not part of Rasterio's API. It is for internal use +only. + +""" + +import pathlib +import re +import sys +from urllib.parse import urlparse + +import attr + +from fiona.errors import PathError + +# Supported URI schemes and their mapping to GDAL's VSI suffix. +# TODO: extend for other cloud plaforms. +SCHEMES = { + 'ftp': 'curl', + 'gzip': 'gzip', + 'http': 'curl', + 'https': 'curl', + 's3': 's3', + 'tar': 'tar', + 'zip': 'zip', + 'file': 'file', + 'oss': 'oss', + 'gs': 'gs', + 'az': 'az', +} + +ARCHIVESCHEMES = set +CURLSCHEMES = set([k for k, v in SCHEMES.items() if v == 'curl']) + +# TODO: extend for other cloud plaforms. +REMOTESCHEMES = set([k for k, v in SCHEMES.items() if v in ('curl', 's3', 'oss', 'gs', 'az',)]) + + +class _Path: + """Base class for dataset paths""" + + def as_vsi(self): + return _vsi_path(self) + + +@attr.s(slots=True) +class _ParsedPath(_Path): + """Result of parsing a dataset URI/Path + + Attributes + ---------- + path : str + Parsed path. Includes the hostname and query string in the case + of a URI. + archive : str + Parsed archive path. + scheme : str + URI scheme such as "https" or "zip+s3". + """ + path = attr.ib() + archive = attr.ib() + scheme = attr.ib() + + @classmethod + def from_uri(cls, uri): + parts = urlparse(uri) + path = pathlib.Path(parts.path).as_posix() if parts.path else parts.path + scheme = parts.scheme or None + + if parts.query: + path += "?" + parts.query + + if scheme and scheme.startswith(("gzip", "tar", "zip")): + path_parts = path.split('!') + path = path_parts.pop() if path_parts else None + archive = path_parts.pop() if path_parts else None + else: + archive = None + + if parts.scheme and parts.netloc: + if archive: + archive = parts.netloc + archive + else: + path = parts.netloc + path + + return _ParsedPath(path, archive, scheme) + + @property + def name(self): + """The parsed path's original URI""" + if not self.scheme: + return self.path + elif self.archive: + return "{}://{}!{}".format(self.scheme, self.archive, self.path) + else: + return "{}://{}".format(self.scheme, self.path) + + @property + def is_remote(self): + """Test if the path is a remote, network URI""" + return bool(self.scheme) and self.scheme.split("+")[-1] in REMOTESCHEMES + + @property + def is_local(self): + """Test if the path is a local URI""" + return not self.scheme or (self.scheme and self.scheme.split('+')[-1] not in REMOTESCHEMES) + + +@attr.s(slots=True) +class _UnparsedPath(_Path): + """Encapsulates legacy GDAL filenames + + Attributes + ---------- + path : str + The legacy GDAL filename. + """ + path = attr.ib() + + @property + def name(self): + """The unparsed path's original path""" + return self.path + + +def _parse_path(path): + """Parse a dataset's identifier or path into its parts + + Parameters + ---------- + path : str or path-like object + The path to be parsed. + + Returns + ------- + ParsedPath or UnparsedPath + + Notes + ----- + When legacy GDAL filenames are encountered, they will be returned + in a UnparsedPath. + + """ + if isinstance(path, _Path): + return path + + elif pathlib and isinstance(path, pathlib.PurePath): + return _ParsedPath(path.as_posix(), None, None) + + elif isinstance(path, str): + + if sys.platform == "win32" and re.match(r"^[a-zA-Z]\:", path): + if pathlib: + return _ParsedPath(pathlib.Path(path).as_posix(), None, None) + else: + return _UnparsedPath(path) + + elif path.startswith('/vsi'): + return _UnparsedPath(path) + + else: + parts = urlparse(path) + + else: + raise PathError("invalid path '{!r}'".format(path)) + + # if the scheme is not one of Rasterio's supported schemes, we + # return an UnparsedPath. + if parts.scheme: + + if all(p in SCHEMES for p in parts.scheme.split('+')): + return _ParsedPath.from_uri(path) + + return _UnparsedPath(path) + + +def _vsi_path(path): + """Convert a parsed path to a GDAL VSI path + + Parameters + ---------- + path : Path + A ParsedPath or UnparsedPath object. + + Returns + ------- + str + + """ + if isinstance(path, _UnparsedPath): + return path.path + + elif isinstance(path, _ParsedPath): + + if not path.scheme: + return path.path + + else: + if path.scheme.split('+')[-1] in CURLSCHEMES: + suffix = '{}://'.format(path.scheme.split('+')[-1]) + else: + suffix = '' + + prefix = '/'.join('vsi{0}'.format(SCHEMES[p]) for p in path.scheme.split('+') if p != 'file') + + if prefix: + if path.archive: + result = '/{}/{}{}/{}'.format(prefix, suffix, path.archive, path.path.lstrip('/')) + else: + result = '/{}/{}{}'.format(prefix, suffix, path.path) + else: + result = path.path + return result + + else: + raise ValueError("path must be a ParsedPath or UnparsedPath object") diff --git a/fiona/_vsiopener.pxd b/fiona/_vsiopener.pxd new file mode 100644 index 000000000..983e23b8b --- /dev/null +++ b/fiona/_vsiopener.pxd @@ -0,0 +1,4 @@ +include "gdal.pxi" + +cdef int install_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct) +cdef void uninstall_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct) diff --git a/fiona/_vsiopener.pyx b/fiona/_vsiopener.pyx new file mode 100644 index 000000000..c94be1f24 --- /dev/null +++ b/fiona/_vsiopener.pyx @@ -0,0 +1,491 @@ +# cython: language_level=3, boundscheck=False +# distutils: language = c++ +"""Bridge between Python file openers and GDAL VSI. + +Based on _filepath.pyx. +""" + +# include "gdal.pxi" + +import contextlib +from contextvars import ContextVar +import logging +import os +from pathlib import Path + +import stat + +from urllib.parse import urlparse +from uuid import uuid4 + +from libc.string cimport memcpy +cimport numpy as np + +from fiona.errors import OpenerRegistrationError + +log = logging.getLogger(__name__) + + +# NOTE: This has to be defined outside of gdal.pxi or other C extensions will +# try to compile C++ only code included in this header. +cdef extern from "cpl_vsi_virtual.h": + cdef cppclass VSIFileManager: + @staticmethod + void* GetHandler(const char*) + + +# Prefix for all in-memory paths used by GDAL's VSI system +# Except for errors and log messages this shouldn't really be seen by the user +cdef str PREFIX = "/vsipyopener/" +cdef bytes PREFIX_BYTES = PREFIX.encode("utf-8") + +# This is global state for the Python filesystem plugin. It currently only +# contains path -> PyOpenerBase (or subclass) instances. This is used by +# the plugin to determine what "files" exist on "disk". +# Currently the only way to "create" a file in the filesystem is to add +# an entry to this dictionary. GDAL will then Open the path later. +_OPENER_REGISTRY = ContextVar("opener_registery") +_OPENER_REGISTRY.set({}) +_OPEN_FILE_EXIT_STACKS = ContextVar("open_file_exit_stacks") +_OPEN_FILE_EXIT_STACKS.set({}) + + +cdef int install_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct): + """Install handlers for python file openers if it isn't already installed.""" + cdef char **registered_prefixes = VSIGetFileSystemsPrefixes() + cdef int prefix_index = CSLFindString(registered_prefixes, PREFIX_BYTES) + CSLDestroy(registered_prefixes) + + if prefix_index < 0: + log.debug("Installing Python opener handler plugin...") + callbacks_struct = VSIAllocFilesystemPluginCallbacksStruct() + callbacks_struct.open = pyopener_open + callbacks_struct.eof = pyopener_eof + callbacks_struct.tell = pyopener_tell + callbacks_struct.seek = pyopener_seek + callbacks_struct.read = pyopener_read + callbacks_struct.write = pyopener_write + callbacks_struct.close = pyopener_close + callbacks_struct.read_dir = pyopener_read_dir + callbacks_struct.stat = pyopener_stat + callbacks_struct.pUserData = _OPENER_REGISTRY + retval = VSIInstallPluginHandler(PREFIX_BYTES, callbacks_struct) + VSIFreeFilesystemPluginCallbacksStruct(callbacks_struct) + return retval + else: + return 0 + + +cdef void uninstall_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct): + if callbacks_struct is not NULL: + callbacks_struct.pUserData = NULL + VSIFreeFilesystemPluginCallbacksStruct(callbacks_struct) + callbacks_struct = NULL + + +cdef int pyopener_stat( + void *pUserData, + const char *pszFilename, + VSIStatBufL *pStatBuf, + int nFlags +) with gil: + """Provides POSIX stat data to GDAL from a Python filesystem.""" + # Convert the given filename to a registry key. + # Reminder: openers are registered by URI scheme, authority, and + # *directory* path. + urlpath = pszFilename.decode("utf-8") + parsed_uri = urlparse(urlpath) + parent = Path(parsed_uri.path).parent + + # Note that "r" mode is used here under the assumption that GDAL + # doesn't read_dir when writing data. Could be wrong! + mode = "r" + key = ((parsed_uri.scheme, parsed_uri.netloc, parent.as_posix()), mode) + + registry = _OPENER_REGISTRY.get() + log.debug("Looking up opener in pyopener_stat: registry=%r, key=%r", registry, key) + try: + file_opener = registry[key] + except KeyError as err: + errmsg = f"Opener not found: {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return -1 + + try: + if file_opener.isfile(urlpath): + fmode = 0o170000 | stat.S_IFREG + elif file_opener.isdir(urlpath): + fmode = 0o170000 | stat.S_IFDIR + else: + # No such file or directory. + return -1 + size = file_opener.size(urlpath) + mtime = file_opener.mtime(urlpath) + except (FileNotFoundError, KeyError): + # No such file or directory. + return -1 + except Exception as err: + errmsg = f"Opener failed to determine file info: {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return -1 + + pStatBuf.st_size = size + pStatBuf.st_mode = fmode + pStatBuf.st_mtime = mtime + return 0 + + +cdef char ** pyopener_read_dir( + void *pUserData, + const char *pszDirname, + int nMaxFiles +) with gil: + """Provides a directory listing to GDAL from a Python filesystem.""" + urlpath = pszDirname.decode("utf-8") + parsed_uri = urlparse(urlpath) + + # Note that "r" mode is used here under the assumption that GDAL + # doesn't read_dir when writing data. Could be wrong! + mode = "r" + key = ((parsed_uri.scheme, parsed_uri.netloc, parsed_uri.path), mode[0]) + + registry = _OPENER_REGISTRY.get() + log.debug("Looking up opener in pyopener_read_dir: registry=%r, key=%r", registry, key) + try: + file_opener = registry[key] + except KeyError as err: + errmsg = f"Opener not found: {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return NULL + + try: + # GDAL wants relative file names. + contents = [Path(item).name for item in file_opener.ls(urlpath)] + log.debug("Looking for dir contents: urlpath=%r, contents=%r", urlpath, contents) + except (FileNotFoundError, KeyError): + # No such file or directory. + return NULL + except Exception as err: + errmsg = f"Opener failed to determine directory contents: {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return NULL + + cdef char **name_list = NULL + + for name in contents: + fname = name.encode("utf-8") + name_list = CSLAddString(name_list, fname) + + return name_list + + +cdef void* pyopener_open( + void *pUserData, + const char *pszFilename, + const char *pszAccess +) with gil: + """Access files in the virtual filesystem. + + This function is mandatory in the GDAL Filesystem Plugin API. + GDAL may call this function multiple times per filename and each + result must be seperately seekable. + """ + urlpath = pszFilename.decode("utf-8") + mode = pszAccess.decode("utf-8") + parsed_uri = urlparse(urlpath) + path_to_check = Path(parsed_uri.path) + parent = path_to_check.parent + key = ((parsed_uri.scheme, parsed_uri.netloc, parent.as_posix()), mode[0]) + + registry = _OPENER_REGISTRY.get() + log.debug("Looking up opener in pyopener_open: registry=%r, key=%r", registry, key) + try: + file_opener = registry[key] + except KeyError as err: + errmsg = f"Opener not found: {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return NULL + + cdef object file_obj + + try: + file_obj = file_opener.open(urlpath, mode) + except ValueError as err: + # ZipFile.open doesn't accept binary modes like "rb" and will + # raise ValueError if given one. We strip the mode in this case. + try: + file_obj = file_opener.open(urlpath, mode.rstrip("b")) + except Exception as err: + return NULL + except Exception as err: + return NULL + + log.debug("Opened file object: file_obj=%r, mode=%r", file_obj, mode) + + # Before we return, we attempt to enter the file object's context + # and store an exit callback stack for it. + stack = contextlib.ExitStack() + + try: + file_obj = stack.enter_context(file_obj) + except (AttributeError, TypeError) as err: + log.error("File object is not a context manager: file_obj=%r", file_obj) + errmsg = f"Opener failed to open file with arguments ({repr(urlpath)}, {repr(mode)}): {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return NULL + except FileNotFoundError as err: + errmsg = "OpenFile didn't resolve".encode("utf-8") + return NULL + else: + exit_stacks = _OPEN_FILE_EXIT_STACKS.get() + exit_stacks[file_obj] = stack + _OPEN_FILE_EXIT_STACKS.set(exit_stacks) + log.debug("Returning: file_obj=%r", file_obj) + return file_obj + + +cdef int pyopener_eof(void *pFile) with gil: + cdef object file_obj = pFile + if file_obj.read(1): + file_obj.seek(-1, 1) + return 1 + else: + return 0 + +cdef vsi_l_offset pyopener_tell(void *pFile) with gil: + cdef object file_obj = pFile + return file_obj.tell() + + +cdef int pyopener_seek(void *pFile, vsi_l_offset nOffset, int nWhence) with gil: + cdef object file_obj = pFile + # TODO: Add "seekable" check? + file_obj.seek(nOffset, nWhence) + return 0 + + +cdef size_t pyopener_read(void *pFile, void *pBuffer, size_t nSize, size_t nCount) with gil: + cdef object file_obj = pFile + cdef bytes python_data = file_obj.read(nSize * nCount) + cdef int num_bytes = len(python_data) + # NOTE: We have to cast to char* first, otherwise Cython doesn't do the conversion properly + memcpy(pBuffer, python_data, num_bytes) + return (num_bytes / nSize) + + +cdef size_t pyopener_write(void *pFile, void *pBuffer, size_t nSize, size_t nCount) with gil: + cdef object file_obj = pFile + buffer_len = nSize * nCount + cdef np.uint8_t [:] buff_view = pBuffer + log.debug("Writing data: buff_view=%r", buff_view) + return file_obj.write(buff_view) + + +cdef int pyopener_close(void *pFile) with gil: + cdef object file_obj = pFile + log.debug("Closing: file_obj=%r", file_obj) + exit_stacks = _OPEN_FILE_EXIT_STACKS.get() + stack = exit_stacks.pop(file_obj) + stack.close() + _OPEN_FILE_EXIT_STACKS.set(exit_stacks) + return 0 + + +@contextlib.contextmanager +def _opener_registration(urlpath, mode, obj): + parsed_uri = urlparse(urlpath) + path_to_check = Path(parsed_uri.path) + parent = path_to_check.parent + key = ((parsed_uri.scheme, parsed_uri.netloc, parent.as_posix()), mode[0]) + # Might raise. + opener = _create_opener(obj) + + registry = _OPENER_REGISTRY.get() + if key in registry: + if registry[key] != opener: + raise OpenerRegistrationError(f"Opener already registered for urlpath and mode.") + else: + try: + yield f"{PREFIX}{urlpath}" + finally: + registry = _OPENER_REGISTRY.get() + _ = registry.pop(key, None) + _OPENER_REGISTRY.set(registry) + else: + registry[key] = opener + _OPENER_REGISTRY.set(registry) + try: + yield f"{PREFIX}{urlpath}" + finally: + registry = _OPENER_REGISTRY.get() + _ = registry.pop(key, None) + _OPENER_REGISTRY.set(registry) + + +class _AbstractOpener: + """Adapts a Python object to the opener interface.""" + def open(self, path, mode="r", **kwds): + """Get a Python file object for a resource. + + Parameters + ---------- + path : str + The identifier/locator for a resource within a filesystem. + mode : str + Opening mode. + kwds : dict + Opener specific options. Encoding, etc. + + Returns + ------- + obj + A Python 'file' object with methods read/write, seek, tell, + etc. + """ + raise NotImplementedError + def isfile(self, path): + """Test if the resource is a 'file', a sequence of bytes. + + Parameters + ---------- + path : str + The identifier/locator for a resource within a filesystem. + + Returns + ------- + bool + """ + raise NotImplementedError + def isdir(self, path): + """Test if the resource is a 'directory', a container. + + Parameters + ---------- + path : str + The identifier/locator for a resource within a filesystem. + + Returns + ------- + bool + """ + raise NotImplementedError + def ls(self, path): + """Get a 'directory' listing. + + Parameters + ---------- + path : str + The identifier/locator for a directory within a filesystem. + + Returns + ------- + list of str + List of 'path' paths relative to the directory. + """ + raise NotImplementedError + def mtime(self, path): + """Get the mtime of a resource.. + + Parameters + ---------- + path : str + The identifier/locator for a directory within a filesystem. + + Returns + ------- + int + Modification timestamp in seconds. + """ + raise NotImplementedError + def size(self, path): + """Get the size, in bytes, of a resource.. + + Parameters + ---------- + path : str + The identifier/locator for a resource within a filesystem. + + Returns + ------- + int + """ + raise NotImplementedError + + +class _FileOpener(_AbstractOpener): + """Adapts a Python file object to the opener interface.""" + def __init__(self, obj): + self._obj = obj + def open(self, path, mode="r", **kwds): + return self._obj(path, mode=mode, **kwds) + def isfile(self, path): + return True + def isdir(self, path): + return False + def ls(self, path): + return [] + def mtime(self, path): + return 0 + def size(self, path): + with self._obj(path) as f: + f.seek(0, os.SEEK_END) + return f.tell() + + +class _FilesystemOpener(_AbstractOpener): + """Adapts an fsspec filesystem object to the opener interface.""" + def __init__(self, obj): + self._obj = obj + def open(self, path, mode="r", **kwds): + return self._obj.open(path, mode=mode, **kwds) + def isfile(self, path): + return self._obj.isfile(path) + def isdir(self, path): + return self._obj.isdir(path) + def ls(self, path): + return self._obj.ls(path) + def mtime(self, path): + try: + mtime = int(self._obj.modified(path).timestamp()) + except NotImplementedError: + mtime = 0 + log.debug("Modification time: mtime=%r", mtime) + return mtime + def size(self, path): + return self._obj.size(path) + + +class _AltFilesystemOpener(_FilesystemOpener): + """Adapts a tiledb virtual filesystem object to the opener interface.""" + def isfile(self, path): + return self._obj.is_file(path) + def isdir(self, path): + return self._obj.is_dir(path) + def mtime(self, path): + return 0 + def size(self, path): + return self._obj.file_size(path) + + +def _create_opener(obj): + """Adapt Python file and fsspec objects to the opener interface.""" + if isinstance(obj, _AbstractOpener): + opener = obj + elif callable(obj): + opener = _FileOpener(obj) + elif hasattr(obj, "file_size"): + opener = _AltFilesystemOpener(obj) + else: + opener = _FilesystemOpener(obj) + + # Before returning we do a quick check that the opener will + # plausibly function. + try: + _ = opener.size("test") + except (AttributeError, TypeError, ValueError) as err: + raise OpenerRegistrationError(f"Opener is invalid.") from err + except Exception: + # We expect the path to not resolve. + pass + + return opener diff --git a/fiona/collection.py b/fiona/collection.py index 91a79991b..7e2fca97b 100644 --- a/fiona/collection.py +++ b/fiona/collection.py @@ -29,7 +29,7 @@ _driver_converts_field_type_silently_to_str, _driver_supports_field, ) -from fiona.path import Path, vsi_path, parse_path +from fiona._path import _Path, _vsi_path, _parse_path _GDAL_VERSION_TUPLE = get_gdal_version_tuple() @@ -85,7 +85,7 @@ def __init__( """ self._closed = True - if not isinstance(path, (str, Path)): + if not isinstance(path, (str, _Path)): raise TypeError(f"invalid path: {path!r}") if not isinstance(mode, str) or mode not in ("r", "w", "a"): raise TypeError(f"invalid mode: {mode!r}") @@ -149,7 +149,6 @@ def __init__( self.ignore_fields = ignore_fields self.ignore_geometry = bool(ignore_geometry) self._allow_unsupported_drivers = allow_unsupported_drivers - self._env = None self._closed = True # Check GDAL version against drivers @@ -169,10 +168,10 @@ def __init__( if vsi: self.path = vfs.vsi_path(path, vsi, archive) - path = parse_path(self.path) + path = _parse_path(self.path) else: - path = parse_path(path) - self.path = vsi_path(path) + path = _parse_path(path) + self.path = _vsi_path(path) if mode == "w": if layer and not isinstance(layer, str): diff --git a/fiona/errors.py b/fiona/errors.py index d13839624..66e8d86d6 100644 --- a/fiona/errors.py +++ b/fiona/errors.py @@ -75,6 +75,14 @@ class TransformError(FionaError): """Raised if a coordinate transformation fails.""" +class OpenerRegistrationError(FionaError): + """Raised when a Python file opener can not be registered.""" + + +class PathError(FionaError): + """Raised when a dataset path is malformed or invalid""" + + class FionaDeprecationWarning(DeprecationWarning): """A warning about deprecation of Fiona features""" diff --git a/fiona/gdal.pxi b/fiona/gdal.pxi index 2559e4ed2..b2b3d5a31 100644 --- a/fiona/gdal.pxi +++ b/fiona/gdal.pxi @@ -24,17 +24,13 @@ cdef extern from "cpl_string.h": int CSLCount(char **papszStrList) char **CSLDuplicate(char **papszStrList) int CSLFindName(char **papszStrList, const char *pszName) + int CSLFindString(char **papszStrList, const char *pszString) int CSLFetchBoolean(char **papszStrList, const char *pszName, int default) const char *CSLFetchNameValue(char **papszStrList, const char *pszName) char **CSLMerge(char **first, char **second) -cdef extern from "sys/stat.h" nogil: - struct stat: - int st_mode - cdef extern from "cpl_error.h" nogil: - ctypedef enum CPLErr: CE_None CE_Debug @@ -42,10 +38,10 @@ cdef extern from "cpl_error.h" nogil: CE_Failure CE_Fatal - # CPLErrorNum eludes me at the moment, I'm calling it 'int' - # for now. + ctypedef int CPLErrorNum ctypedef void (*CPLErrorHandler)(CPLErr, int, const char*) + void CPLError(CPLErr eErrClass, CPLErrorNum err_no, const char *template, ...) void CPLErrorReset() int CPLGetLastErrorNo() const char* CPLGetLastErrorMsg() @@ -55,10 +51,66 @@ cdef extern from "cpl_error.h" nogil: cdef extern from "cpl_vsi.h" nogil: - - ctypedef int vsi_l_offset + ctypedef unsigned long long vsi_l_offset ctypedef FILE VSILFILE - ctypedef stat VSIStatBufL + ctypedef struct VSIStatBufL: + long st_size + long st_mode + int st_mtime + ctypedef enum VSIRangeStatus: + VSI_RANGE_STATUS_UNKNOWN, + VSI_RANGE_STATUS_DATA, + VSI_RANGE_STATUS_HOLE, + + # GDAL Plugin System (GDAL 3.0+) + # Filesystem functions + ctypedef int (*VSIFilesystemPluginStatCallback)(void*, const char*, VSIStatBufL*, int) # Optional + ctypedef int (*VSIFilesystemPluginUnlinkCallback)(void*, const char*) # Optional + ctypedef int (*VSIFilesystemPluginRenameCallback)(void*, const char*, const char*) # Optional + ctypedef int (*VSIFilesystemPluginMkdirCallback)(void*, const char*, long) # Optional + ctypedef int (*VSIFilesystemPluginRmdirCallback)(void*, const char*) # Optional + ctypedef char** (*VSIFilesystemPluginReadDirCallback)(void*, const char*, int) # Optional + ctypedef char** (*VSIFilesystemPluginSiblingFilesCallback)(void*, const char*) # Optional (GDAL 3.2+) + ctypedef void* (*VSIFilesystemPluginOpenCallback)(void*, const char*, const char*) + # File functions + ctypedef vsi_l_offset (*VSIFilesystemPluginTellCallback)(void*) + ctypedef int (*VSIFilesystemPluginSeekCallback)(void*, vsi_l_offset, int) + ctypedef size_t (*VSIFilesystemPluginReadCallback)(void*, void*, size_t, size_t) + ctypedef int (*VSIFilesystemPluginReadMultiRangeCallback)(void*, int, void**, const vsi_l_offset*, const size_t*) # Optional + ctypedef VSIRangeStatus (*VSIFilesystemPluginGetRangeStatusCallback)(void*, vsi_l_offset, vsi_l_offset) # Optional + ctypedef int (*VSIFilesystemPluginEofCallback)(void*) # Mandatory? + ctypedef size_t (*VSIFilesystemPluginWriteCallback)(void*, const void*, size_t, size_t) + ctypedef int (*VSIFilesystemPluginFlushCallback)(void*) # Optional + ctypedef int (*VSIFilesystemPluginTruncateCallback)(void*, vsi_l_offset) + ctypedef int (*VSIFilesystemPluginCloseCallback)(void*) # Optional + # Plugin function container struct + ctypedef struct VSIFilesystemPluginCallbacksStruct: + void *pUserData + VSIFilesystemPluginStatCallback stat + VSIFilesystemPluginUnlinkCallback unlink + VSIFilesystemPluginRenameCallback rename + VSIFilesystemPluginMkdirCallback mkdir + VSIFilesystemPluginRmdirCallback rmdir + VSIFilesystemPluginReadDirCallback read_dir + VSIFilesystemPluginOpenCallback open + VSIFilesystemPluginTellCallback tell + VSIFilesystemPluginSeekCallback seek + VSIFilesystemPluginReadCallback read + VSIFilesystemPluginReadMultiRangeCallback read_multi_range + VSIFilesystemPluginGetRangeStatusCallback get_range_status + VSIFilesystemPluginEofCallback eof + VSIFilesystemPluginWriteCallback write + VSIFilesystemPluginFlushCallback flush + VSIFilesystemPluginTruncateCallback truncate + VSIFilesystemPluginCloseCallback close + size_t nBufferSize + size_t nCacheSize + VSIFilesystemPluginSiblingFilesCallback sibling_files + + int VSIInstallPluginHandler(const char*, const VSIFilesystemPluginCallbacksStruct*) + VSIFilesystemPluginCallbacksStruct* VSIAllocFilesystemPluginCallbacksStruct() + void VSIFreeFilesystemPluginCallbacksStruct(VSIFilesystemPluginCallbacksStruct*) + char** VSIGetFileSystemsPrefixes() unsigned char *VSIGetMemFileBuffer(const char *path, vsi_l_offset *data_len, @@ -68,72 +120,24 @@ cdef extern from "cpl_vsi.h" nogil: VSILFILE* VSIFOpenL(const char *path, const char *mode) int VSIFCloseL(VSILFILE *fp) int VSIUnlink(const char *path) - + int VSIMkdir(const char *path, long mode) + char** VSIReadDir(const char *path) + int VSIRmdir(const char *path) + int VSIRmdirRecursive(const char *path) int VSIFFlushL(VSILFILE *fp) size_t VSIFReadL(void *buffer, size_t nSize, size_t nCount, VSILFILE *fp) - char** VSIReadDir(const char* pszPath) int VSIFSeekL(VSILFILE *fp, vsi_l_offset nOffset, int nWhence) vsi_l_offset VSIFTellL(VSILFILE *fp) int VSIFTruncateL(VSILFILE *fp, vsi_l_offset nNewSize) size_t VSIFWriteL(void *buffer, size_t nSize, size_t nCount, VSILFILE *fp) - + int VSIStatL(const char *pszFilename, VSIStatBufL *psStatBuf) int VSIMkdir(const char *path, long mode) int VSIRmdir(const char *path) int VSIStatL(const char *pszFilename, VSIStatBufL *psStatBuf) - int VSI_ISDIR(int mode) - - -cdef extern from "ogr_srs_api.h" nogil: - - ctypedef int OGRErr - ctypedef void * OGRCoordinateTransformationH - ctypedef void * OGRSpatialReferenceH - - OGRCoordinateTransformationH OCTNewCoordinateTransformation( - OGRSpatialReferenceH source, - OGRSpatialReferenceH dest) - void OCTDestroyCoordinateTransformation( - OGRCoordinateTransformationH source) - int OCTTransform(OGRCoordinateTransformationH ct, int nCount, double *x, - double *y, double *z) - int OSRAutoIdentifyEPSG(OGRSpatialReferenceH srs) - void OSRCleanup() - OGRSpatialReferenceH OSRClone(OGRSpatialReferenceH srs) - int OSRExportToProj4(OGRSpatialReferenceH srs, char **params) - int OSRExportToWkt(OGRSpatialReferenceH srs, char **params) - const char *OSRGetAuthorityName(OGRSpatialReferenceH srs, const char *key) - const char *OSRGetAuthorityCode(OGRSpatialReferenceH srs, const char *key) - int OSRImportFromEPSG(OGRSpatialReferenceH srs, int code) - int OSRImportFromProj4(OGRSpatialReferenceH srs, const char *proj) - int OSRImportFromWkt(OGRSpatialReferenceH srs, char **wkt) - int OSRIsGeographic(OGRSpatialReferenceH srs) - int OSRIsProjected(OGRSpatialReferenceH srs) - int OSRIsSame(OGRSpatialReferenceH srs1, OGRSpatialReferenceH srs2) - OGRSpatialReferenceH OSRNewSpatialReference(const char *wkt) - void OSRRelease(OGRSpatialReferenceH srs) - int OSRSetFromUserInput(OGRSpatialReferenceH srs, const char *input) - double OSRGetLinearUnits(OGRSpatialReferenceH srs, char **ppszName) - double OSRGetAngularUnits(OGRSpatialReferenceH srs, char **ppszName) - int OSREPSGTreatsAsLatLong(OGRSpatialReferenceH srs) - int OSREPSGTreatsAsNorthingEasting(OGRSpatialReferenceH srs) - OGRSpatialReferenceH *OSRFindMatches(OGRSpatialReferenceH srs, char **options, int *entries, int **matchConfidence) - void OSRFreeSRSArray(OGRSpatialReferenceH *srs) - ctypedef enum OSRAxisMappingStrategy: - OAMS_TRADITIONAL_GIS_ORDER - - const char* OSRGetName(OGRSpatialReferenceH hSRS) - void OSRSetAxisMappingStrategy(OGRSpatialReferenceH hSRS, OSRAxisMappingStrategy) - void OSRSetPROJSearchPaths(const char *const *papszPaths) - char ** OSRGetPROJSearchPaths() - OGRErr OSRExportToWktEx(OGRSpatialReferenceH, char ** ppszResult, - const char* const* papszOptions) - OGRErr OSRExportToPROJJSON(OGRSpatialReferenceH hSRS, - char ** ppszReturn, - const char* const* papszOptions) + int VSI_ISDIR(int mode) cdef extern from "ogr_core.h" nogil: - ctypedef int OGRErr char *OGRGeometryTypeToName(int type) @@ -241,13 +245,58 @@ cdef extern from "ogr_core.h" nogil: double MaxY char * OGRGeometryTypeToName(int) - - char * ODsCCreateLayer = "CreateLayer" char * ODsCDeleteLayer = "DeleteLayer" char * ODsCTransactions = "Transactions" +cdef extern from "ogr_srs_api.h" nogil: + ctypedef void * OGRCoordinateTransformationH + ctypedef void * OGRSpatialReferenceH + + OGRCoordinateTransformationH OCTNewCoordinateTransformation( + OGRSpatialReferenceH source, + OGRSpatialReferenceH dest) + void OCTDestroyCoordinateTransformation( + OGRCoordinateTransformationH source) + int OCTTransform(OGRCoordinateTransformationH ct, int nCount, double *x, + double *y, double *z) + int OSRAutoIdentifyEPSG(OGRSpatialReferenceH srs) + void OSRCleanup() + OGRSpatialReferenceH OSRClone(OGRSpatialReferenceH srs) + int OSRExportToProj4(OGRSpatialReferenceH srs, char **params) + int OSRExportToWkt(OGRSpatialReferenceH srs, char **params) + const char *OSRGetAuthorityName(OGRSpatialReferenceH srs, const char *key) + const char *OSRGetAuthorityCode(OGRSpatialReferenceH srs, const char *key) + int OSRImportFromEPSG(OGRSpatialReferenceH srs, int code) + int OSRImportFromProj4(OGRSpatialReferenceH srs, const char *proj) + int OSRImportFromWkt(OGRSpatialReferenceH srs, char **wkt) + int OSRIsGeographic(OGRSpatialReferenceH srs) + int OSRIsProjected(OGRSpatialReferenceH srs) + int OSRIsSame(OGRSpatialReferenceH srs1, OGRSpatialReferenceH srs2) + OGRSpatialReferenceH OSRNewSpatialReference(const char *wkt) + void OSRRelease(OGRSpatialReferenceH srs) + int OSRSetFromUserInput(OGRSpatialReferenceH srs, const char *input) + double OSRGetLinearUnits(OGRSpatialReferenceH srs, char **ppszName) + double OSRGetAngularUnits(OGRSpatialReferenceH srs, char **ppszName) + int OSREPSGTreatsAsLatLong(OGRSpatialReferenceH srs) + int OSREPSGTreatsAsNorthingEasting(OGRSpatialReferenceH srs) + OGRSpatialReferenceH *OSRFindMatches(OGRSpatialReferenceH srs, char **options, int *entries, int **matchConfidence) + void OSRFreeSRSArray(OGRSpatialReferenceH *srs) + ctypedef enum OSRAxisMappingStrategy: + OAMS_TRADITIONAL_GIS_ORDER + + const char* OSRGetName(OGRSpatialReferenceH hSRS) + void OSRSetAxisMappingStrategy(OGRSpatialReferenceH hSRS, OSRAxisMappingStrategy) + void OSRSetPROJSearchPaths(const char *const *papszPaths) + char ** OSRGetPROJSearchPaths() + OGRErr OSRExportToWktEx(OGRSpatialReferenceH, char ** ppszResult, + const char* const* papszOptions) + OGRErr OSRExportToPROJJSON(OGRSpatialReferenceH hSRS, + char ** ppszReturn, + const char* const* papszOptions) + + cdef extern from "gdal.h" nogil: ctypedef void * GDALMajorObjectH diff --git a/fiona/ogrext.pyx b/fiona/ogrext.pyx index bb21de005..c63004ae5 100644 --- a/fiona/ogrext.pyx +++ b/fiona/ogrext.pyx @@ -33,7 +33,7 @@ from fiona.errors import ( TransactionError, GeometryTypeValidationError, DatasetDeleteError, AttributeFilterError, FeatureWarning, FionaDeprecationWarning, UnsupportedGeometryTypeError) from fiona.model import decode_object, Feature, Geometry, Properties -from fiona.path import vsi_path +from fiona._path import _vsi_path from fiona.rfc3339 import parse_date, parse_datetime, parse_time from fiona.rfc3339 import FionaDateType, FionaDateTimeType, FionaTimeType from fiona.schema import FIELD_TYPES, FIELD_TYPES_MAP, normalize_field_type diff --git a/fiona/path.py b/fiona/path.py index 2087bc5b9..72f25ac8b 100644 --- a/fiona/path.py +++ b/fiona/path.py @@ -1,192 +1,17 @@ -"""Dataset paths, identifiers, and filenames""" +"""Dataset paths, identifiers, and filenames -import re -import sys +Note well: this module is deprecated in 1.3.0 and will be removed in a +future version. +""" -import attr +import warnings -from urllib.parse import urlparse +from fiona._path import _ParsedPath as ParsedPath +from fiona._path import _UnparsedPath as UnparsedPath +from fiona._path import _parse_path as parse_path +from fiona._path import _vsi_path as vsi_path +from fiona.errors import FionaDeprecationWarning -# Supported URI schemes and their mapping to GDAL's VSI suffix. -# TODO: extend for other cloud plaforms. -SCHEMES = { - "ftp": "curl", - "gzip": "gzip", - "http": "curl", - "https": "curl", - "s3": "s3", - "tar": "tar", - "zip": "zip", - "file": "file", - "gs": "gs", - "oss": "oss", - "az": "az", -} - -CURLSCHEMES = {k for k, v in SCHEMES.items() if v == 'curl'} - -# TODO: extend for other cloud plaforms. -REMOTESCHEMES = { - k for k, v in SCHEMES.items() if v in ("curl", "s3", "gs", "oss", "az") -} - - -class Path: - """Base class for dataset paths""" - - -@attr.s(slots=True) -class ParsedPath(Path): - """Result of parsing a dataset URI/Path - - Attributes - ---------- - path : str - Parsed path. Includes the hostname and query string in the case - of a URI. - archive : str - Parsed archive path. - scheme : str - URI scheme such as "https" or "zip+s3". - """ - path = attr.ib() - archive = attr.ib() - scheme = attr.ib() - - @classmethod - def from_uri(cls, uri): - parts = urlparse(uri) - path = parts.path - scheme = parts.scheme or None - - if parts.query: - path += "?" + parts.query - - if parts.scheme and parts.netloc: - path = parts.netloc + path - - parts = path.split('!') - path = parts.pop() if parts else None - archive = parts.pop() if parts else None - return ParsedPath(path, archive, scheme) - - @property - def name(self): - """The parsed path's original URI""" - if not self.scheme: - return self.path - elif self.archive: - return f"{self.scheme}://{self.archive}!{self.path}" - else: - return f"{self.scheme}://{self.path}" - - @property - def is_remote(self): - """Test if the path is a remote, network URI""" - return self.scheme and self.scheme.split('+')[-1] in REMOTESCHEMES - - @property - def is_local(self): - """Test if the path is a local URI""" - return not self.scheme or (self.scheme and self.scheme.split('+')[-1] not in REMOTESCHEMES) - - -@attr.s(slots=True) -class UnparsedPath(Path): - """Encapsulates legacy GDAL filenames - - Attributes - ---------- - path : str - The legacy GDAL filename. - """ - path = attr.ib() - - @property - def name(self): - """The unparsed path's original path""" - return self.path - - -def parse_path(path): - """Parse a dataset's identifier or path into its parts - - Parameters - ---------- - path : str or path-like object - The path to be parsed. - - Returns - ------- - ParsedPath or UnparsedPath - - Notes - ----- - When legacy GDAL filenames are encountered, they will be returned - in a UnparsedPath. - """ - if isinstance(path, Path): - return path - - # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like - # URL schemes - elif sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path): - return UnparsedPath(path) - - elif path.startswith('/vsi'): - return UnparsedPath(path) - - elif re.match("^[a-z0-9\\+]*://", path): - parts = urlparse(path) - - # if the scheme is not one of Rasterio's supported schemes, we - # return an UnparsedPath. - if parts.scheme and not all(p in SCHEMES for p in parts.scheme.split('+')): - return UnparsedPath(path) - - else: - return ParsedPath.from_uri(path) - - else: - return UnparsedPath(path) - - -def vsi_path(path): - """Convert a parsed path to a GDAL VSI path - - Parameters - ---------- - path : Path - A ParsedPath or UnparsedPath object. - - Returns - ------- - str - """ - if isinstance(path, UnparsedPath): - return path.path - - elif isinstance(path, ParsedPath): - - if not path.scheme: - return path.path - - else: - if path.scheme.split('+')[-1] in CURLSCHEMES: - suffix = f"{path.scheme.split('+')[-1]}://" - else: - suffix = '' - - prefix = '/'.join(f'vsi{SCHEMES[p]}' for p in path.scheme.split('+') if p != 'file') - - if prefix: - if path.archive: - result = f"/{prefix}/{suffix}{path.archive}/{path.path.lstrip('/')}" - else: - result = f'/{prefix}/{suffix}{path.path}' - else: - result = path.path - return result - - else: - raise ValueError("path must be a ParsedPath or UnparsedPath object") +warnings.warn( + "fiona.path will be removed in version 2.0.", FionaDeprecationWarning +) diff --git a/fiona/session.py b/fiona/session.py index 30ac0b5fc..9d3571d73 100644 --- a/fiona/session.py +++ b/fiona/session.py @@ -4,7 +4,7 @@ import os import warnings -from fiona.path import parse_path, UnparsedPath +from fiona._path import _parse_path, _UnparsedPath log = logging.getLogger(__name__) @@ -97,9 +97,9 @@ def cls_from_path(path): if not path: return DummySession - path = parse_path(path) + path = _parse_path(path) - if isinstance(path, UnparsedPath) or path.is_local: + if isinstance(path, _UnparsedPath) or path.is_local: return DummySession elif ( diff --git a/pyproject.toml b/pyproject.toml index 94f84af2d..7f9c8359f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,5 @@ [build-system] -requires = [ - "cython~=3.0.2", - "oldest-supported-numpy", - "setuptools>=67.8", - "wheel", -] +requires = ["setuptools>=67.8", "cython~=3.0.2", "numpy>=1.25,<2"] build-backend = "setuptools.build_meta" [project] @@ -32,7 +27,6 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: GIS", ] - requires-python = ">=3.7" dependencies = [ "attrs>=19.2.0", @@ -41,6 +35,7 @@ dependencies = [ "click-plugins>=1.0", "cligj>=0.5", 'importlib-metadata;python_version<"3.10"', + "numpy>=1.25,<2", ] [project.optional-dependencies] diff --git a/requirements-dev.txt b/requirements-dev.txt index ab7d06d0a..637a1cff9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,9 +1,13 @@ -r requirements.txt + +aiohttp boto3>=1.3.1 coverage~=6.5 cython>=3 +fsspec pytest~=7.2 pytest-cov~=4.0 pytz==2022.6 +requests setuptools wheel diff --git a/requirements.txt b/requirements.txt index 8d5f9bcef..d95684658 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ click-plugins cligj>=0.5.0 importlib-metadata;python_version<"3.10" munch>=2.3.2 +numpy>=1.25,<2 certifi diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..f1c7afc83 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,17 @@ +[options.entry_points] +console_scripts = + fio = fiona.fio.main:main_group +fiona.fio_commands = + bounds = fiona.fio.bounds:bounds + calc = fiona.fio.calc:calc + cat = fiona.fio.cat:cat + collect = fiona.fio.collect:collect + distrib = fiona.fio.distrib:distrib + dump = fiona.fio.dump:dump + env = fiona.fio.env:env + filter = fiona.fio.filter:filter + info = fiona.fio.info:info + insp = fiona.fio.insp:insp + load = fiona.fio.load:load + ls = fiona.fio.ls:ls + rm = fiona.fio.rm:rm diff --git a/setup.py b/setup.py index 6cc63e11f..140e588e5 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,12 @@ def copy_data_tree(datadir, destdir): gdal_major_version = 0 gdal_minor_version = 0 +try: + import numpy as np + include_dirs.append(np.get_include()) +except ImportError: + raise SystemExit("ERROR: Numpy and its headers are required to run setup().") + if 'clean' not in sys.argv: try: gdal_config = os.environ.get('GDAL_CONFIG', 'gdal-config') @@ -191,6 +197,7 @@ def copy_data_tree(datadir, destdir): ext_modules = cythonize( [ Extension("fiona._geometry", ["fiona/_geometry.pyx"], **ext_options), + Extension("fiona._vsiopener", ["fiona/_vsiopener.pyx"], **ext_options), Extension("fiona.schema", ["fiona/schema.pyx"], **ext_options), Extension("fiona._transform", ["fiona/_transform.pyx"], **ext_options_cpp), Extension("fiona.crs", ["fiona/crs.pyx"], **ext_options), diff --git a/tests/test_pyopener.py b/tests/test_pyopener.py new file mode 100644 index 000000000..8170a893d --- /dev/null +++ b/tests/test_pyopener.py @@ -0,0 +1,27 @@ +"""Tests of the Python opener VSI plugin.""" + +import io + +import fsspec +import pytest + +import fiona + + +def test_opener_io_open(path_grenada_geojson): + """Use io.open as opener.""" + with fiona.open(path_grenada_geojson, opener=io.open) as colxn: + profile = colxn.profile + assert profile["driver"] == "GeoJSON" + assert len(colxn) == 1 + + +def test_opener_fsspec_zip_fs(): + """Use fsspec zip filesystem as opener.""" + fs = fsspec.filesystem("zip", fo="tests/data/coutwildrnp.zip") + with fiona.open("coutwildrnp.shp", opener=fs) as colxn: + profile = colxn.profile + assert profile["driver"] == "ESRI Shapefile" + assert len(colxn) == 67 + assert colxn.schema["geometry"] == "Polygon" + assert "AGBUR" in colxn.schema["properties"] From 62b1a2f37c406082e5eb05496b7397127f8d9d46 Mon Sep 17 00:00:00 2001 From: Sean Gillies Date: Thu, 29 Feb 2024 10:26:05 -0700 Subject: [PATCH 2/2] Add two more virtual filesystem tests --- Makefile | 6 ++++++ tests/test_pyopener.py | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/Makefile b/Makefile index bcd5874c6..242fa6398 100644 --- a/Makefile +++ b/Makefile @@ -46,3 +46,9 @@ dockergdb: dockertestimage dockerdocs: dockertestimage docker run -it -v $(shell pwd):/app --entrypoint=/bin/bash fiona:$(GDAL)-py$(PYTHON_VERSION) -c 'source /venv/bin/activate && cd docs && make clean && make html' + +dockertestimage-amd64: + docker build --platform linux/amd64 --target gdal --build-arg GDAL=$(GDAL) --build-arg PYTHON_VERSION=$(PYTHON_VERSION) -t fiona-amd64:$(GDAL)-py$(PYTHON_VERSION) . + +dockertest-amd64: dockertestimage-amd64 + docker run -it -v $(shell pwd):/app -v /tmp:/tmp --env AWS_ACCESS_KEY_ID --env AWS_SECRET_ACCESS_KEY --entrypoint=/bin/bash fiona-amd64:$(GDAL)-py$(PYTHON_VERSION) -c '/venv/bin/python -m pip install --editable . --no-build-isolation && /venv/bin/python -B -m pytest -m "not wheel" --cov fiona --cov-report term-missing $(OPTS)' diff --git a/tests/test_pyopener.py b/tests/test_pyopener.py index 8170a893d..b92f25d5a 100644 --- a/tests/test_pyopener.py +++ b/tests/test_pyopener.py @@ -25,3 +25,30 @@ def test_opener_fsspec_zip_fs(): assert len(colxn) == 67 assert colxn.schema["geometry"] == "Polygon" assert "AGBUR" in colxn.schema["properties"] + + +def test_opener_fsspec_zip_http_fs(): + """Use fsspec zip+http filesystem as opener.""" + fs = fsspec.filesystem( + "zip", + target_protocol="http", + fo="https://github.com/Toblerity/Fiona/files/11151652/coutwildrnp.zip", + ) + with fiona.open("coutwildrnp.shp", opener=fs) as colxn: + profile = colxn.profile + assert profile["driver"] == "ESRI Shapefile" + assert len(colxn) == 67 + assert colxn.schema["geometry"] == "Polygon" + assert "AGBUR" in colxn.schema["properties"] + + +def test_opener_tiledb_file(): + """Use tiledb vfs as opener.""" + tiledb = pytest.importorskip("tiledb") + fs = tiledb.VFS() + with fiona.open("tests/data/coutwildrnp.shp", opener=fs) as colxn: + profile = colxn.profile + assert profile["driver"] == "ESRI Shapefile" + assert len(colxn) == 67 + assert colxn.schema["geometry"] == "Polygon" + assert "AGBUR" in colxn.schema["properties"]