From ecfaa54d494954e90d807dce59f951615c4e39ff Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 17 Feb 2023 01:23:41 +0000 Subject: [PATCH 01/17] a little cleanup + inclusion of deprecat library for deprecation warnings --- datacube/drivers/postgis/_fields.py | 4 +- datacube/drivers/postgres/_fields.py | 4 +- datacube/model/__init__.py | 10 +++- datacube/utils/__init__.py | 2 - datacube/utils/documents.py | 73 ++++++++++------------------ setup.py | 1 + tests/test_utils_docs.py | 12 ++--- 7 files changed, 44 insertions(+), 62 deletions(-) diff --git a/datacube/drivers/postgis/_fields.py b/datacube/drivers/postgis/_fields.py index c0066d4149..f17f702f45 100755 --- a/datacube/drivers/postgis/_fields.py +++ b/datacube/drivers/postgis/_fields.py @@ -22,7 +22,7 @@ from datacube import utils from datacube.model.fields import Expression, Field from datacube.model import Range -from datacube.utils import get_doc_offset_safe +from datacube.utils import get_doc_offset from datacube.drivers.postgis._schema import Dataset, search_field_index_map from datacube.utils import cached_property @@ -197,7 +197,7 @@ def _extract_offset_value(self, doc, doc_offsets, agg_function): # It's a single offset. doc_offsets = [doc_offsets] - values = (get_doc_offset_safe(offset, doc) for offset in doc_offsets) + values = (get_doc_offset(offset, doc) for offset in doc_offsets) values = [self.parse_value(v) for v in values if v is not None] if not values: diff --git a/datacube/drivers/postgres/_fields.py b/datacube/drivers/postgres/_fields.py index 68819e7ab9..c4a464f4c9 100755 --- a/datacube/drivers/postgres/_fields.py +++ b/datacube/drivers/postgres/_fields.py @@ -20,7 +20,7 @@ from datacube import utils from datacube.model.fields import Expression, Field from datacube.model import Range -from datacube.utils import get_doc_offset_safe +from datacube.utils import get_doc_offset from .sql import FLOAT8RANGE from datacube.utils.dates import tz_aware @@ -167,7 +167,7 @@ def _extract_offset_value(self, doc, doc_offsets, agg_function): # It's a single offset. doc_offsets = [doc_offsets] - values = (get_doc_offset_safe(offset, doc) for offset in doc_offsets) + values = (get_doc_offset(offset, doc) for offset in doc_offsets) values = [self.parse_value(v) for v in values if v is not None] if not values: diff --git a/datacube/model/__init__.py b/datacube/model/__init__.py index c2db5cfcdc..39a87c7254 100644 --- a/datacube/model/__init__.py +++ b/datacube/model/__init__.py @@ -34,6 +34,8 @@ "ExtraDimensions", "IngestorConfig" ] +from deprecat import deprecat + _LOG = logging.getLogger(__name__) DEFAULT_SPATIAL_DIMS = ('y', 'x') # Used when product lacks grid_spec @@ -93,6 +95,9 @@ def __init__(self, self.archived_time = archived_time @property + @deprecat( + reason="The 'type' attribute has been deprecated. Please use the 'product' attribute instead.", + version='1.9.0') def type(self) -> "Product": # For compatibility return self.product @@ -231,6 +236,9 @@ def is_archived(self) -> bool: return self.archived_time is not None @property + @deprecat( + reason="The 'is_active' attribute has been deprecated. Please use 'is_archived' instead.", + version="1.9.0") def is_active(self) -> bool: """ Is this dataset active? @@ -307,7 +315,7 @@ def __repr__(self) -> str: @property def metadata(self) -> DocReader: - return self.metadata_type.dataset_reader(self.metadata_doc) + return self.metadata_type.dataset_reader(self._metadata_doc) def metadata_doc_without_lineage(self) -> Dict[str, Any]: """ Return metadata document without nested lineage datasets diff --git a/datacube/utils/__init__.py b/datacube/utils/__init__.py index 4c064d6b98..1596a1e7bf 100644 --- a/datacube/utils/__init__.py +++ b/datacube/utils/__init__.py @@ -21,7 +21,6 @@ validate_document, NoDatesSafeLoader, get_doc_offset, - get_doc_offset_safe, netcdf_extract_string, without_lineage_sources, schema_validated, @@ -61,7 +60,6 @@ "validate_document", "NoDatesSafeLoader", "get_doc_offset", - "get_doc_offset_safe", "netcdf_extract_string", "without_lineage_sources", "unsqueeze_data_array", diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index 1fe8724121..2089ee46d5 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -13,7 +13,6 @@ import collections.abc from collections import OrderedDict from contextlib import contextmanager -from itertools import chain from pathlib import Path from urllib.parse import urlparse from urllib.request import urlopen @@ -21,6 +20,8 @@ from copy import deepcopy from uuid import UUID +from deprecat import deprecat + import numpy import toolz # type: ignore[import] import yaml @@ -275,22 +276,13 @@ class UnknownMetadataType(InvalidDocException): pass -def get_doc_offset(offset, document): - """ - :type offset: list[str] - :type document: dict - - """ - return toolz.get_in(offset, document, no_default=True) - - -def get_doc_offset_safe(offset, document, value_if_missing=None): +def get_doc_offset(offset, document, default=None): """ :type offset: list[str] :type document: dict """ - return toolz.get_in(offset, document, default=value_if_missing) + return toolz.get_in(offset, document, default=default) def documents_equal(d1, d2): @@ -440,7 +432,7 @@ def id(self): def sources(self): if self._sources is None: self._sources = {k: SimpleDocNav(v) - for k, v in get_doc_offset_safe(self._sources_path, self._doc, {}).items()} + for k, v in get_doc_offset(self._sources_path, self._doc, {}).items()} return self._sources @property @@ -483,21 +475,17 @@ def __init__(self, type_definition, search_fields, doc): # The field offsets that the datacube itself understands: id, format, sources etc. # (See the metadata-type-schema.yaml or the comments in default-metadata-types.yaml) - self.__dict__['_system_offsets'] = {name: field - for name, field in type_definition.items() + self.__dict__['_system_offsets'] = {name: offset + for name, offset in type_definition.items() if name != 'search_fields'} def __getattr__(self, name): - offset = self._system_offsets.get(name) - field = self._search_fields.get(name) - if offset: - return get_doc_offset_safe(offset, self._doc) - elif field: - return field.extract(self._doc) + if name in self.fields.keys(): + return self.fields[name] else: raise AttributeError( 'Unknown field %r. Expected one of %r' % ( - name, list(chain(self._system_offsets.keys(), self._search_fields.keys())) + name, list(self.fields.keys()) ) ) @@ -506,42 +494,35 @@ def __setattr__(self, name, val): if offset is None: raise AttributeError( 'Unknown field offset %r. Expected one of %r' % ( - name, list(self._fields.keys()) + name, list(self.system_fields.keys()) ) ) - return _set_doc_offset(offset, self._doc, val) + return _set_doc_offset(offset, self.doc, val) + + def __dir__(self): + return list(self.fields) @property - def fields(self): - fields = {} - fields.update(self.search_fields) - fields.update(self.system_fields) - return fields + def doc(self): + return self._doc @property def search_fields(self): - fields = {} - for name, field in self._search_fields.items(): - try: - fields[name] = field.extract(self._doc) - except (AttributeError, KeyError, ValueError): - continue - return fields + return {name: field.extract(self.doc) + for name, field in self._search_fields.items()} @property def system_fields(self): - fields = {} - for name, offset in self._system_offsets.items(): - try: - fields[name] = get_doc_offset(offset, self._doc) - except (AttributeError, KeyError, ValueError): - continue - return fields + return {name: field + for name, offset in self._system_offsets.items() + if (field := get_doc_offset(offset, self.doc) is not None)} - def __dir__(self): - return list(self.fields) + @property + def fields(self): + return {**self.system_fields, **self.search_fields} +@deprecat(deprecated_args={'inplace': {'version': '1.9.0', 'reason': 'not being used'}}) def without_lineage_sources(doc: Dict[str, Any], spec, inplace: bool = False) -> Dict[str, Any]: @@ -551,7 +532,7 @@ def without_lineage_sources(doc: Dict[str, Any], :param spec: Product or MetadataType according to which `doc` to be interpreted :param bool inplace: If True modify `doc` in place """ - + # TODO: the inplace param doesn't seem to be used if not inplace: doc = deepcopy(doc) diff --git a/setup.py b/setup.py index 0802a713f3..7821b02e4c 100755 --- a/setup.py +++ b/setup.py @@ -111,6 +111,7 @@ 'toolz', 'xarray>=0.9', # >0.9 fixes most problems with `crs` attributes being lost 'packaging', + 'deprecat', ], extras_require=extras_require, tests_require=tests_require, diff --git a/tests/test_utils_docs.py b/tests/test_utils_docs.py index 80d4f0e010..8b8f50c9e2 100644 --- a/tests/test_utils_docs.py +++ b/tests/test_utils_docs.py @@ -32,7 +32,6 @@ DocReader, is_supported_document_type, get_doc_offset, - get_doc_offset_safe, _set_doc_offset, transform_object_tree, metadata_subset, @@ -583,14 +582,9 @@ def test_is_supported_doc_type(): def test_doc_offset(): assert get_doc_offset(['a'], {'a': 4}) == 4 assert get_doc_offset(['a', 'b'], {'a': {'b': 4}}) == 4 - with pytest.raises(KeyError): - get_doc_offset(['a'], {}) - - assert get_doc_offset_safe(['a'], {'a': 4}) == 4 - assert get_doc_offset_safe(['a', 'b'], {'a': {'b': 4}}) == 4 - assert get_doc_offset_safe(['a'], {}) is None - assert get_doc_offset_safe(['a', 'b', 'c'], {'a': {'b': {}}}, 10) == 10 - assert get_doc_offset_safe(['a', 'b', 'c'], {'a': {'b': []}}, 11) == 11 + assert get_doc_offset(['a'], {}) is None + assert get_doc_offset(['a', 'b', 'c'], {'a': {'b': {}}}, 10) == 10 + assert get_doc_offset(['a', 'b', 'c'], {'a': {'b': []}}, 11) == 11 doc = {'a': 4} _set_doc_offset(['a'], doc, 5) From 7da071aa430321b7a1f0745969c19167d3f5a069 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 17 Feb 2023 02:26:08 +0000 Subject: [PATCH 02/17] update docker constraints to include deprecat --- docker/constraints.in | 2 ++ docker/constraints.txt | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/docker/constraints.in b/docker/constraints.in index be24f578a2..8937c2f9fa 100644 --- a/docker/constraints.in +++ b/docker/constraints.in @@ -57,3 +57,5 @@ setuptools_scm>=3.4 toml wheel twine + +deprecat diff --git a/docker/constraints.txt b/docker/constraints.txt index 114dd381a9..84c4b70d05 100644 --- a/docker/constraints.txt +++ b/docker/constraints.txt @@ -103,6 +103,8 @@ dask==2023.2.0 # distributed decorator==5.1.1 # via validators +deprecat==2.1.1 + # via -r constraints.in distributed==2023.2.0 # via -r constraints.in docutils==0.18.1 @@ -137,7 +139,12 @@ imagesize==1.4.1 importlib-metadata==6.0.0 # via # keyring + # sphinx # twine +importlib-resources==5.10.2 + # via + # jsonschema + # keyring iniconfig==2.0.0 # via pytest isodate==0.6.1 @@ -237,6 +244,8 @@ pillow==9.4.0 # via matplotlib pkginfo==1.9.6 # via twine +pkgutil-resolve-name==1.3.10 + # via jsonschema pluggy==1.0.0 # via pytest psutil==5.9.4 @@ -410,6 +419,7 @@ types-toml==0.10.8.3 typing-extensions==4.4.0 # via # pygeoif + # rich # setuptools-scm # sqlalchemy urllib3==1.26.14 @@ -430,7 +440,9 @@ werkzeug==2.2.2 wheel==0.38.4 # via -r constraints.in wrapt==1.11.2 - # via astroid + # via + # astroid + # deprecat xarray==2023.2.0 # via -r constraints.in xmltodict==0.13.0 From b58c2f080eb57748768b3f974566c9fb6433d658 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 17 Feb 2023 02:35:12 +0000 Subject: [PATCH 03/17] fix typo --- datacube/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacube/model/__init__.py b/datacube/model/__init__.py index 39a87c7254..a54356940c 100644 --- a/datacube/model/__init__.py +++ b/datacube/model/__init__.py @@ -315,7 +315,7 @@ def __repr__(self) -> str: @property def metadata(self) -> DocReader: - return self.metadata_type.dataset_reader(self._metadata_doc) + return self.metadata_type.dataset_reader(self.metadata_doc) def metadata_doc_without_lineage(self) -> Dict[str, Any]: """ Return metadata document without nested lineage datasets From b73c4ed3c928372b93e70f41b9d53be156e1bccc Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 20 Feb 2023 01:50:01 +0000 Subject: [PATCH 04/17] replace _set_doc_offset with toolz.update_in --- datacube/utils/documents.py | 13 +------------ tests/test_utils_docs.py | 8 -------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index 2089ee46d5..175dd93abe 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -449,17 +449,6 @@ def without_location(self): return SimpleDocNav(toolz.dissoc(self._doc, 'location')) -def _set_doc_offset(offset, document, value): - """ - :type offset: list[str] - :type document: dict - - """ - read_offset = offset[:-1] - sub_doc = get_doc_offset(read_offset, document) - sub_doc[offset[-1]] = value - - class DocReader(object): def __init__(self, type_definition, search_fields, doc): """ @@ -497,7 +486,7 @@ def __setattr__(self, name, val): name, list(self.system_fields.keys()) ) ) - return _set_doc_offset(offset, self.doc, val) + self.doc = toolz.update_in(self.doc, offset, lambda _: val) def __dir__(self): return list(self.fields) diff --git a/tests/test_utils_docs.py b/tests/test_utils_docs.py index 8b8f50c9e2..4eacd394f7 100644 --- a/tests/test_utils_docs.py +++ b/tests/test_utils_docs.py @@ -32,7 +32,6 @@ DocReader, is_supported_document_type, get_doc_offset, - _set_doc_offset, transform_object_tree, metadata_subset, ) @@ -586,13 +585,6 @@ def test_doc_offset(): assert get_doc_offset(['a', 'b', 'c'], {'a': {'b': {}}}, 10) == 10 assert get_doc_offset(['a', 'b', 'c'], {'a': {'b': []}}, 11) == 11 - doc = {'a': 4} - _set_doc_offset(['a'], doc, 5) - assert doc == {'a': 5} - doc = {'a': {'b': 4}} - _set_doc_offset(['a', 'b'], doc, 'c') - assert doc == {'a': {'b': 'c'}} - def test_transform_object_tree(): def add_one(a): From a0fba3f0b80d43d1e35a8cd7fac86a498c290efe Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 20 Feb 2023 04:43:05 +0000 Subject: [PATCH 05/17] remove unnecessary param; import --- datacube/model/__init__.py | 2 +- datacube/model/properties.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/datacube/model/__init__.py b/datacube/model/__init__.py index a54356940c..e6be9b82f3 100644 --- a/datacube/model/__init__.py +++ b/datacube/model/__init__.py @@ -21,7 +21,7 @@ schema_validated, DocReader from datacube.index.eo3 import is_doc_eo3 from .fields import Field, get_dataset_fields -from ._base import Range, ranges_overlap # noqa: F401 +from ._base import Range from .eo3 import validate_eo3_compatible_type from .lineage import LineageDirection, LineageTree, LineageRelation, InconsistentLineageException # noqa: F401 diff --git a/datacube/model/properties.py b/datacube/model/properties.py index 01bec1bdfc..6408057f93 100644 --- a/datacube/model/properties.py +++ b/datacube/model/properties.py @@ -63,7 +63,7 @@ def datetime_type(value): def of_enum_type( - vals: Union[EnumMeta, Tuple[str, ...]] = None, lower=False, upper=False, strict=True + vals: Union[EnumMeta, Tuple[str, ...]] = None, lower=False, strict=True ) -> Callable[[str], str]: if isinstance(vals, EnumMeta): vals = tuple(vals.__members__.keys()) @@ -72,8 +72,6 @@ def normalise(v: str): if isinstance(v, Enum): v = v.name - if upper: - v = v.upper() if lower: v = v.lower() From 8c8e8dfbe10a86c5012f5f7fec3991e1a2cb273b Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 24 Feb 2023 03:38:16 +0000 Subject: [PATCH 06/17] Replace DatasetType with Product --- datacube/api/grid_workflow.py | 2 +- datacube/index/memory/_products.py | 2 +- datacube/index/null/_datasets.py | 4 ++-- datacube/index/null/_products.py | 6 +++--- datacube/index/postgres/index.py | 2 +- datacube/model/utils.py | 2 +- datacube/scripts/ingest.py | 10 +++++----- datacube/testutils/__init__.py | 4 ++-- datacube/virtual/impl.py | 20 ++++++++++---------- 9 files changed, 26 insertions(+), 26 deletions(-) diff --git a/datacube/api/grid_workflow.py b/datacube/api/grid_workflow.py index d61595b38d..3c86451a8d 100644 --- a/datacube/api/grid_workflow.py +++ b/datacube/api/grid_workflow.py @@ -70,7 +70,7 @@ def shape(self): @property def product(self): """ - :rtype: datacube.model.DatasetType + :rtype: datacube.model.Product """ return self.sources.values[0][0].product diff --git a/datacube/index/memory/_products.py b/datacube/index/memory/_products.py index bad72c5371..d0cce50152 100644 --- a/datacube/index/memory/_products.py +++ b/datacube/index/memory/_products.py @@ -9,7 +9,7 @@ from datacube.index.fields import as_expression from datacube.index.abstract import AbstractProductResource, QueryField from datacube.index.memory._metadata_types import MetadataTypeResource -from datacube.model import DatasetType as Product +from datacube.model import Product from datacube.utils import changes, jsonify_document, _readable_offset from datacube.utils.changes import AllowPolicy, Change, Offset, check_doc_unchanged, get_doc_changes, classify_changes from datacube.utils.documents import metadata_subset diff --git a/datacube/index/null/_datasets.py b/datacube/index/null/_datasets.py index f8686669d0..42c9ec351e 100755 --- a/datacube/index/null/_datasets.py +++ b/datacube/index/null/_datasets.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 from datacube.index.abstract import AbstractDatasetResource, DSID -from datacube.model import Dataset, DatasetType +from datacube.model import Dataset, Product from typing import Iterable @@ -31,7 +31,7 @@ def add(self, dataset: Dataset, with_lineage: bool = True) -> Dataset: raise NotImplementedError() - def search_product_duplicates(self, product: DatasetType, *args): + def search_product_duplicates(self, product: Product, *args): return [] def can_update(self, dataset, updates_allowed=None): diff --git a/datacube/index/null/_products.py b/datacube/index/null/_products.py index 6e4f3892b2..a2788a283b 100644 --- a/datacube/index/null/_products.py +++ b/datacube/index/null/_products.py @@ -5,7 +5,7 @@ import logging from datacube.index.abstract import AbstractProductResource -from datacube.model import DatasetType +from datacube.model import Product from typing import Iterable @@ -22,7 +22,7 @@ def add(self, product, allow_table_lock=False): def can_update(self, product, allow_unsafe_updates=False, allow_table_lock=False): raise NotImplementedError() - def update(self, product: DatasetType, allow_unsafe_updates=False, allow_table_lock=False): + def update(self, product: Product, allow_unsafe_updates=False, allow_table_lock=False): raise NotImplementedError() def get_unsafe(self, id_): @@ -40,5 +40,5 @@ def search_robust(self, **query): def search_by_metadata(self, metadata): return [] - def get_all(self) -> Iterable[DatasetType]: + def get_all(self) -> Iterable[Product]: return [] diff --git a/datacube/index/postgres/index.py b/datacube/index/postgres/index.py index 786e4615b3..e2ea9b7544 100644 --- a/datacube/index/postgres/index.py +++ b/datacube/index/postgres/index.py @@ -32,7 +32,7 @@ class Index(AbstractIndex): other connections are active. Or else use a separate instance of this class in each process. :ivar datacube.index._datasets.DatasetResource datasets: store and retrieve :class:`datacube.model.Dataset` - :ivar datacube.index._products.ProductResource products: store and retrieve :class:`datacube.model.DatasetType`\ + :ivar datacube.index._products.ProductResource products: store and retrieve :class:`datacube.model.Product`\ (should really be called Product) :ivar datacube.index._metadata_types.MetadataTypeResource metadata_types: store and retrieve \ :class:`datacube.model.MetadataType` diff --git a/datacube/model/utils.py b/datacube/model/utils.py index 90746d5197..21160cf9fb 100644 --- a/datacube/model/utils.py +++ b/datacube/model/utils.py @@ -197,7 +197,7 @@ def make_dataset(product, sources, extent, center_time, valid_data=None, uri=Non """ Create :class:`datacube.model.Dataset` for the data - :param DatasetType product: Product the dataset is part of + :param Product product: Product the dataset is part of :param list[:class:`Dataset`] sources: datasets used to produce the dataset :param Geometry extent: extent of the dataset :param Geometry valid_data: extent of the valid data diff --git a/datacube/scripts/ingest.py b/datacube/scripts/ingest.py index 184edacc69..468999c17d 100644 --- a/datacube/scripts/ingest.py +++ b/datacube/scripts/ingest.py @@ -17,7 +17,7 @@ import datacube from datacube.api.core import Datacube from datacube.index import Index -from datacube.model import DatasetType, Range, Measurement, IngestorConfig +from datacube.model import Product, Range, Measurement, IngestorConfig from datacube.utils import geometry from datacube.model.utils import make_dataset, xr_apply, datasets_to_doc from datacube.ui import click as ui @@ -57,7 +57,7 @@ def morph_dataset_type(source_type, config, index, storage_format): if 'metadata_type' in config: output_metadata_type = index.metadata_types.get_by_name(config['metadata_type']) - output_type = DatasetType(output_metadata_type, deepcopy(source_type.definition)) + output_type = Product(output_metadata_type, deepcopy(source_type.definition)) output_type.definition['name'] = config['output_type'] output_type.definition['managed'] = True output_type.definition['description'] = config['description'] @@ -151,7 +151,7 @@ def get_resampling(config): def ensure_output_type(index: Index, config: dict, storage_format: str, - allow_product_changes: bool = False) -> Tuple[DatasetType, DatasetType]: + allow_product_changes: bool = False) -> Tuple[Product, Product]: """ Create the output product for the given ingest config if it doesn't already exist. @@ -160,11 +160,11 @@ def ensure_output_type(index: Index, """ source_type = index.products.get_by_name(config['source_type']) if not source_type: - click.echo("Source DatasetType %s does not exist" % config['source_type']) + click.echo("Source Product %s does not exist" % config['source_type']) click.get_current_context().exit(1) output_type = morph_dataset_type(source_type, config, index, storage_format) - _LOG.info('Created DatasetType %s', output_type.name) + _LOG.info('Created Product %s', output_type.name) existing = index.products.get_by_name(output_type.name) if existing: diff --git a/datacube/testutils/__init__.py b/datacube/testutils/__init__.py index 097e89c589..854d00acca 100644 --- a/datacube/testutils/__init__.py +++ b/datacube/testutils/__init__.py @@ -23,7 +23,7 @@ from datacube.model import Measurement from datacube.utils.dates import mk_time_coord from datacube.utils.documents import parse_yaml -from datacube.model import Dataset, DatasetType, MetadataType +from datacube.model import Dataset, Product, MetadataType from datacube.ui.common import get_metadata_path from datacube.utils import read_documents, SimpleDocNav from datacube.utils.geometry import GeoBox, CRS @@ -206,7 +206,7 @@ def mk_measurement(m): if load is not None: definition['load'] = load - return DatasetType(metadata_type, definition) + return Product(metadata_type, definition) def mk_sample_dataset(bands, diff --git a/datacube/virtual/impl.py b/datacube/virtual/impl.py index 6a7bb23e3d..56619fbc27 100644 --- a/datacube/virtual/impl.py +++ b/datacube/virtual/impl.py @@ -25,7 +25,7 @@ from datacube.api.core import output_geobox from datacube.api.grid_workflow import _fast_slice from datacube.api.query import Query, query_group_by -from datacube.model import Measurement, DatasetType +from datacube.model import Measurement, Product from datacube.model.utils import xr_apply, xr_iter, SafeDumper from datacube.testutils.io import native_geobox from datacube.utils.geometry import GeoBox, rio_reproject, geobox_union_conservative @@ -275,10 +275,10 @@ def __init__(self, settings: Dict[str, Any]) -> None: """ self._settings = settings - def output_measurements(self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: + def output_measurements(self, product_definitions: Dict[str, Product]) -> Dict[str, Measurement]: """ A dictionary mapping names to measurement metadata. - :param product_definitions: a dictionary mapping product names to products (`DatasetType` objects) + :param product_definitions: a dictionary mapping product names to products (`Product` objects) """ raise NotImplementedError @@ -322,7 +322,7 @@ def _reconstruct(self): return {key: value if key not in ['fuse_func', 'dataset_predicate'] else qualified_name(value) for key, value in self.items()} - def output_measurements(self, product_definitions: TypeMapping[str, DatasetType], # type: ignore[override] + def output_measurements(self, product_definitions: TypeMapping[str, Product], # type: ignore[override] measurements: Optional[List[str]] = None) -> TypeMapping[str, Measurement]: self._assert(self._product in product_definitions, "product {} not found in definitions".format(self._product)) @@ -455,7 +455,7 @@ def _reconstruct(self): return dict(transform=qualified_name(self['transform']), input=self._input._reconstruct(), **reject_keys(self, ['input', 'transform'])) - def output_measurements(self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: + def output_measurements(self, product_definitions: Dict[str, Product]) -> Dict[str, Measurement]: input_measurements = self._input.output_measurements(product_definitions) return self._transformation.measurements(input_measurements) @@ -505,7 +505,7 @@ def _reconstruct(self): input=self._input._reconstruct(), **reject_keys(self, ['input', 'aggregate', 'group_by'])) - def output_measurements(self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: + def output_measurements(self, product_definitions: Dict[str, Product]) -> Dict[str, Measurement]: input_measurements = self._input.output_measurements(product_definitions) return self._statistic.measurements(input_measurements) @@ -565,7 +565,7 @@ def _reconstruct(self): children = [child._reconstruct() for child in self._children] return dict(collate=children, **reject_keys(self, ['collate'])) - def output_measurements(self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: + def output_measurements(self, product_definitions: Dict[str, Product]) -> Dict[str, Measurement]: input_measurement_list = [child.output_measurements(product_definitions) for child in self._children] @@ -679,7 +679,7 @@ def _reconstruct(self): children = [child._reconstruct() for child in self._children] return dict(juxtapose=children, **reject_keys(self, ['juxtapose'])) - def output_measurements(self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: + def output_measurements(self, product_definitions: Dict[str, Product]) -> Dict[str, Measurement]: input_measurement_list = [child.output_measurements(product_definitions) for child in self._children] @@ -753,10 +753,10 @@ def _reconstruct(self): # pylint: disable=protected-access return dict(input=self._input._reconstruct(), **reject_keys(self, ["input"])) - def output_measurements(self, product_definitions: Dict[str, DatasetType]) -> Dict[str, Measurement]: + def output_measurements(self, product_definitions: Dict[str, Product]) -> Dict[str, Measurement]: """ A dictionary mapping names to measurement metadata. - :param product_definitions: a dictionary mapping product names to products (`DatasetType` objects) + :param product_definitions: a dictionary mapping product names to products (`Product` objects) """ return self._input.output_measurements(product_definitions) From 4b3e31dc57b2dd649b89116d263719645e98cd1d Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 6 Mar 2023 02:16:40 +0000 Subject: [PATCH 07/17] put back ranges_overlap import --- datacube/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacube/model/__init__.py b/datacube/model/__init__.py index e6be9b82f3..a54356940c 100644 --- a/datacube/model/__init__.py +++ b/datacube/model/__init__.py @@ -21,7 +21,7 @@ schema_validated, DocReader from datacube.index.eo3 import is_doc_eo3 from .fields import Field, get_dataset_fields -from ._base import Range +from ._base import Range, ranges_overlap # noqa: F401 from .eo3 import validate_eo3_compatible_type from .lineage import LineageDirection, LineageTree, LineageRelation, InconsistentLineageException # noqa: F401 From 0caed186ef33cd8a5c58d5622473d7f21f0f67ec Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 28 Apr 2023 01:19:22 +0000 Subject: [PATCH 08/17] combine docker build actions into main workflow --- .github/workflows/main.yml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d9408c786a..b447e84538 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -35,7 +35,6 @@ jobs: filters: | docker: - 'docker/**' - - name: Pull Docker if: steps.changes.outputs.docker == 'false' run: | @@ -76,12 +75,14 @@ jobs: run: | echo "Verify that twine is installed" docker run --rm opendatacube/datacube-tests:latest twine --version +<<<<<<< HEAD +======= +>>>>>>> d6b62081 (combine docker build actions into main workflow) echo "Run tests" cat <>>>>>> d6b62081 (combine docker build actions into main workflow) - name: Build Packages run: | cat <>>>>>> d6b62081 (combine docker build actions into main workflow) - name: Publish to PyPi if: | github.event_name == 'push' @@ -135,7 +149,6 @@ jobs: else echo "Skipping upload as 'PyPiToken' is not set" fi - env: TWINE_PASSWORD: ${{ secrets.PyPiToken }} From c5725aefc12fca835d708867b99f462f72d68644 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 28 Apr 2023 05:32:21 +0000 Subject: [PATCH 09/17] fix field extraction error --- datacube/api/core.py | 2 +- datacube/utils/documents.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datacube/api/core.py b/datacube/api/core.py index 8d99419951..4ded4e3806 100644 --- a/datacube/api/core.py +++ b/datacube/api/core.py @@ -295,7 +295,7 @@ def load(self, product=None, measurements=None, output_crs=None, resolution=None If a list is specified, the measurements will be returned in the order requested. By default all available measurements are included. - :param \*\*query: + :param **query: Search parameters for products and dimension ranges as described above. For example: ``'x', 'y', 'time', 'crs'``. diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index 175dd93abe..998705ede1 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -504,7 +504,7 @@ def search_fields(self): def system_fields(self): return {name: field for name, offset in self._system_offsets.items() - if (field := get_doc_offset(offset, self.doc) is not None)} + if ((field := get_doc_offset(offset, self._doc)) is not None)} @property def fields(self): From ff03425c034d7c1193f71c1f0d66f4d40fdaf96b Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 28 Apr 2023 06:10:47 +0000 Subject: [PATCH 10/17] fix doc field offset error --- datacube/utils/documents.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index 998705ede1..1ee0402f62 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -486,7 +486,7 @@ def __setattr__(self, name, val): name, list(self.system_fields.keys()) ) ) - self.doc = toolz.update_in(self.doc, offset, lambda _: val) + self._doc = toolz.update_in(self._doc, offset, lambda _: val) def __dir__(self): return list(self.fields) @@ -497,7 +497,7 @@ def doc(self): @property def search_fields(self): - return {name: field.extract(self.doc) + return {name: field.extract(self._doc) for name, field in self._search_fields.items()} @property From 4279cc0e22c571a8925ef8d1ba9e4d8f3ff02b09 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 28 Apr 2023 07:42:28 +0000 Subject: [PATCH 11/17] actually fix doc field offset issue --- datacube/utils/documents.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index 1ee0402f62..300109ac6e 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -486,25 +486,25 @@ def __setattr__(self, name, val): name, list(self.system_fields.keys()) ) ) - self._doc = toolz.update_in(self._doc, offset, lambda _: val) + self.__dict__['_doc'] = toolz.update_in(self.__dict__['_doc'], offset, lambda _: val) def __dir__(self): return list(self.fields) @property def doc(self): - return self._doc + return self.__dict__['_doc'] @property def search_fields(self): - return {name: field.extract(self._doc) + return {name: field.extract(self.__dict__['_doc']) for name, field in self._search_fields.items()} @property def system_fields(self): return {name: field for name, offset in self._system_offsets.items() - if ((field := get_doc_offset(offset, self._doc)) is not None)} + if ((field := get_doc_offset(offset, self.__dict__['_doc'])) is not None)} @property def fields(self): From 084f4f7623aa8db3a9cb509f45088b71a2b95b55 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 2 May 2023 00:13:44 +0000 Subject: [PATCH 12/17] allow for None value fields --- datacube/utils/documents.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index 300109ac6e..3bd53d3af9 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -502,9 +502,8 @@ def search_fields(self): @property def system_fields(self): - return {name: field - for name, offset in self._system_offsets.items() - if ((field := get_doc_offset(offset, self.__dict__['_doc'])) is not None)} + return {name: get_doc_offset(field, self.__dict__['_doc']) + for name, field in self._system_offsets.items()} @property def fields(self): From 0e372526ee04cec03542fea619d18fea423c3b5c Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Wed, 3 May 2023 01:18:11 +0000 Subject: [PATCH 13/17] fix remaining tests --- datacube/api/core.py | 2 +- datacube/utils/documents.py | 15 +++++++++------ tests/test_utils_docs.py | 8 -------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/datacube/api/core.py b/datacube/api/core.py index 4ded4e3806..8ca5160f65 100644 --- a/datacube/api/core.py +++ b/datacube/api/core.py @@ -1052,7 +1052,7 @@ def _mk_empty(shape: Tuple[int, ...]) -> str: dss = tiled_dss.get(idx, None) if dss is None: - val = _mk_empty(gbt.chunk_shape(idx)) + val = _mk_empty(gbt.chunk_shape(idx).xy) # 3D case if 'extra_dim' in measurement: index_subset = extra_dims.measurements_index(measurement.extra_dim) diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index 3bd53d3af9..f1acd069cc 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -276,12 +276,14 @@ class UnknownMetadataType(InvalidDocException): pass -def get_doc_offset(offset, document, default=None): +def get_doc_offset(offset, document, default=None, no_default=False): """ :type offset: list[str] :type document: dict """ + if no_default: + return toolz.get_in(offset, document, no_default=True) return toolz.get_in(offset, document, default=default) @@ -432,7 +434,7 @@ def id(self): def sources(self): if self._sources is None: self._sources = {k: SimpleDocNav(v) - for k, v in get_doc_offset(self._sources_path, self._doc, {}).items()} + for k, v in get_doc_offset(self._sources_path, self._doc, default={}).items()} return self._sources @property @@ -486,7 +488,7 @@ def __setattr__(self, name, val): name, list(self.system_fields.keys()) ) ) - self.__dict__['_doc'] = toolz.update_in(self.__dict__['_doc'], offset, lambda _: val) + self.__dict__['_doc'] = toolz.assoc_in(self._doc, offset, val) def __dir__(self): return list(self.fields) @@ -520,16 +522,17 @@ def without_lineage_sources(doc: Dict[str, Any], :param spec: Product or MetadataType according to which `doc` to be interpreted :param bool inplace: If True modify `doc` in place """ - # TODO: the inplace param doesn't seem to be used + if not inplace: doc = deepcopy(doc) doc_view = spec.dataset_reader(doc) if 'sources' in doc_view.fields: - doc_view.sources = {} + if doc_view.sources is not None: + doc_view.sources = {} - return doc + return doc_view.doc def schema_validated(schema): diff --git a/tests/test_utils_docs.py b/tests/test_utils_docs.py index 4eacd394f7..4ae0811ce8 100644 --- a/tests/test_utils_docs.py +++ b/tests/test_utils_docs.py @@ -116,23 +116,16 @@ def mk_sample(v): x = {'a': 1} assert without_lineage_sources(x, spec) == x - assert without_lineage_sources(x, spec, inplace=True) == x x = {'a': 1, 'lineage': {}} assert without_lineage_sources(x, spec) == x - assert without_lineage_sources(x, spec, inplace=True) == x x = mk_sample(1) assert without_lineage_sources(x, spec) != x assert x['lineage']['source_datasets'] == 1 - x = mk_sample(2) - assert without_lineage_sources(x, spec, inplace=True) == x - assert x['lineage']['source_datasets'] == {} - assert mk_sample(10) != mk_sample({}) assert without_lineage_sources(mk_sample(10), spec) == mk_sample({}) - assert without_lineage_sources(mk_sample(10), spec, inplace=True) == mk_sample({}) # check behaviour when `sources` is not defined for the type no_sources_type = MetadataType({ @@ -148,7 +141,6 @@ def mk_sample(v): }, dataset_search_fields={}) assert without_lineage_sources(mk_sample(10), no_sources_type) == mk_sample(10) - assert without_lineage_sources(mk_sample(10), no_sources_type, inplace=True) == mk_sample(10) def test_parse_yaml(): From 9d9cdcc0331117ebba4a85cb53efba4e122bb803 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Wed, 3 May 2023 03:51:32 +0000 Subject: [PATCH 14/17] put back _set_doc_offset --- datacube/utils/documents.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/datacube/utils/documents.py b/datacube/utils/documents.py index f1acd069cc..13006d1fb7 100644 --- a/datacube/utils/documents.py +++ b/datacube/utils/documents.py @@ -276,14 +276,12 @@ class UnknownMetadataType(InvalidDocException): pass -def get_doc_offset(offset, document, default=None, no_default=False): +def get_doc_offset(offset, document, default=None): """ :type offset: list[str] :type document: dict """ - if no_default: - return toolz.get_in(offset, document, no_default=True) return toolz.get_in(offset, document, default=default) @@ -434,7 +432,7 @@ def id(self): def sources(self): if self._sources is None: self._sources = {k: SimpleDocNav(v) - for k, v in get_doc_offset(self._sources_path, self._doc, default={}).items()} + for k, v in get_doc_offset(self._sources_path, self._doc, {}).items()} return self._sources @property @@ -451,6 +449,16 @@ def without_location(self): return SimpleDocNav(toolz.dissoc(self._doc, 'location')) +def _set_doc_offset(offset, document, value): + """ + :type offset: list[str] + :type document: dict + """ + read_offset = offset[:-1] + sub_doc = get_doc_offset(read_offset, document, {}) + sub_doc[offset[-1]] = value + + class DocReader(object): def __init__(self, type_definition, search_fields, doc): """ @@ -488,7 +496,7 @@ def __setattr__(self, name, val): name, list(self.system_fields.keys()) ) ) - self.__dict__['_doc'] = toolz.assoc_in(self._doc, offset, val) + return _set_doc_offset(offset, self._doc, val) def __dir__(self): return list(self.fields) @@ -522,7 +530,6 @@ def without_lineage_sources(doc: Dict[str, Any], :param spec: Product or MetadataType according to which `doc` to be interpreted :param bool inplace: If True modify `doc` in place """ - if not inplace: doc = deepcopy(doc) @@ -532,7 +539,7 @@ def without_lineage_sources(doc: Dict[str, Any], if doc_view.sources is not None: doc_view.sources = {} - return doc_view.doc + return doc def schema_validated(schema): From d8e8057312176482fb97cf973f8339fc46086b02 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Wed, 3 May 2023 04:29:23 +0000 Subject: [PATCH 15/17] fix last error --- datacube/api/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacube/api/core.py b/datacube/api/core.py index 8ca5160f65..4ded4e3806 100644 --- a/datacube/api/core.py +++ b/datacube/api/core.py @@ -1052,7 +1052,7 @@ def _mk_empty(shape: Tuple[int, ...]) -> str: dss = tiled_dss.get(idx, None) if dss is None: - val = _mk_empty(gbt.chunk_shape(idx).xy) + val = _mk_empty(gbt.chunk_shape(idx)) # 3D case if 'extra_dim' in measurement: index_subset = extra_dims.measurements_index(measurement.extra_dim) From 87956600f053e322ba0150c0d31c80c2ca6a84a3 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Wed, 3 May 2023 06:41:59 +0000 Subject: [PATCH 16/17] fix merge conflict markers --- .github/workflows/main.yml | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b447e84538..8390760501 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -75,10 +75,7 @@ jobs: run: | echo "Verify that twine is installed" docker run --rm opendatacube/datacube-tests:latest twine --version -<<<<<<< HEAD -======= ->>>>>>> d6b62081 (combine docker build actions into main workflow) echo "Run tests" cat <>>>>>> d6b62081 (combine docker build actions into main workflow) - name: Build Packages run: | cat <>>>>>> d6b62081 (combine docker build actions into main workflow) - name: Publish to PyPi if: | github.event_name == 'push' From a12ab1a90a57f6817c6a0fe40bd4e9695778475f Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Thu, 4 May 2023 00:31:56 +0000 Subject: [PATCH 17/17] update whats_new --- docs/about/whats_new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index 4f3d6bc249..abc764bea6 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -11,6 +11,7 @@ v1.9.next - External Lineage API (:pull:`#1401`) - Add lineage support to index clone operation (:pull:`#1429`) - Migrate to SQLAlchemy 2.0 (:pull:`#1432`) +- Clean up deprecated code and add deprecation warnings to legacy methods, simplify DocReader logic (:pull:`#1406`) v1.8.next