From 930aa9d07b0bdd23453b69a31e91449e2a869c59 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 18 Oct 2019 09:23:53 -0500 Subject: [PATCH] Squashed commit of the following: commit 67a32635359c591a16e43b0f763177b0da4a042c Author: Tom Augspurger Date: Fri Oct 18 08:05:04 2019 -0500 fixup name commit e6183cd5647e26670fd7012134d3c093c4d59658 Author: Tom Augspurger Date: Fri Oct 18 07:05:33 2019 -0500 fixup Index.name commit d1826bba808ab043db14a4a7697611887e8cdac2 Author: Tom Augspurger Date: Thu Oct 17 13:45:30 2019 -0500 REF: Store metadata in attrs dict This aids in the implementation of https://github.com/pandas-dev/pandas/pull/28394. Over there, I'm having issues with using `NDFrame.__finalize__` to copy attributes, in part because getattribute on NDFrame is so complicated. This simplifies this because we only need to look in NDFrame.attrs, which is just a plain dictionary. Aside from the addition of a public NDFrame.attrs dictionary, there aren't any user-facing API changes. --- doc/source/reference/frame.rst | 13 +++++++++++ doc/source/reference/series.rst | 13 +++++++++++ pandas/core/generic.py | 39 ++++++++++++++++++++++++++++++++- pandas/core/series.py | 29 +++++++++++------------- 4 files changed, 77 insertions(+), 17 deletions(-) diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 411eb3dbbedc8d..2eeb11a72df274 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -275,6 +275,19 @@ Time series-related DataFrame.tz_convert DataFrame.tz_localize +.. _api.frame.metadata: + +Metadata +~~~~~~~~ + +:attr:`DataFrame.attrs` is a dictionary for storing global metadata for this DataFrame. + +.. autosummary:: + :toctree: api/ + + DataFrame.attrs + + .. _api.dataframe.plotting: Plotting diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index bb53f45d96303d..a160893a97db2a 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -532,6 +532,19 @@ Sparse-dtype specific methods and attributes are provided under the Series.sparse.to_coo +.. _api.series.metadata: + +Metadata +~~~~~~~~ + +:attr:`Series.attrs` is a dictionary for storing global metadata for this Series. + +.. autosummary:: + :toctree: api/ + + Series.attrs + + Plotting -------- ``Series.plot`` is both a callable method and a namespace attribute for diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e1968e966d689f..dbbf630b692da4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8,12 +8,14 @@ import re from textwrap import dedent from typing import ( + TYPE_CHECKING, Any, Callable, Dict, FrozenSet, Hashable, List, + Mapping, Optional, Sequence, Set, @@ -189,6 +191,12 @@ class NDFrame(PandasObject, SelectionMixin): _is_copy = None _data = None # type: BlockManager + if TYPE_CHECKING: + # TODO(PY36): replace with _attrs : Dict[Hashable, Any] + # We need the TYPE_CHECKING, because _attrs is not a class attribute + # and Py35 doesn't support the new syntax. + _attrs = {} # type: Dict[Hashable, Any] + # ---------------------------------------------------------------------- # Constructors @@ -199,6 +207,7 @@ def __init__( copy: bool = False, dtype: Optional[Dtype] = None, allow_duplicate_labels: bool = True, + attrs: Optional[Mapping[Hashable, Any]] = None, fastpath: bool = False, ): @@ -216,6 +225,11 @@ def __init__( object.__setattr__(self, "_data", data) object.__setattr__(self, "_item_cache", {}) object.__setattr__(self, "allows_duplicate_labels", allow_duplicate_labels) + if attrs is None: + attrs = {} + else: + attrs = dict(attrs) + object.__setattr__(self, "_attrs", attrs) def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): """ passed a manager and a axes dict """ @@ -252,6 +266,18 @@ def allows_duplicate_labels(self, value: bool): self._allows_duplicate_labels = value + def attrs(self) -> Dict[Hashable, Any]: + """ + Dictionary of global attributes on this object. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[Hashable, Any]) -> None: + self._attrs = dict(value) + @property def is_copy(self): """ @@ -2048,7 +2074,13 @@ def to_dense(self): def __getstate__(self): meta = {k: getattr(self, k, None) for k in self._metadata} - return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata, **meta) + return dict( + _data=self._data, + _typ=self._typ, + _metadata=self._metadata, + attrs=self.attrs, + **meta + ) def __setstate__(self, state): @@ -2057,6 +2089,8 @@ def __setstate__(self, state): elif isinstance(state, dict): typ = state.get("_typ") if typ is not None: + attrs = state.get("_attrs", {}) + object.__setattr__(self, "_attrs", attrs) # set in the order of internal names # to avoid definitional recursion @@ -5255,6 +5289,9 @@ def finalize_name(objs): # import pdb; pdb.set_trace() if isinstance(other, NDFrame): + for name in other.attrs: + self.attrs[name] = other.attrs[name] + # For subclasses using _metadata. for name in self._metadata: if name == "name" and getattr(other, "ndim", None) == 1: # Calling hasattr(other, 'name') is bad for DataFrames with diff --git a/pandas/core/series.py b/pandas/core/series.py index 96f506b64d19d9..c9447677713a66 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,7 +5,7 @@ from io import StringIO from shutil import get_terminal_size from textwrap import dedent -from typing import Any, Callable +from typing import Any, Callable, Hashable, List import warnings import numpy as np @@ -29,7 +29,6 @@ is_dict_like, is_extension_array_dtype, is_extension_type, - is_hashable, is_integer, is_iterator, is_list_like, @@ -45,6 +44,7 @@ ABCSeries, ABCSparseArray, ) +from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import ( isna, na_value_for_dtype, @@ -179,7 +179,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): introduces duplicates. See :ref:`duplictes.disallow` for more. """ - _metadata = ["name", "allows_duplicate_labels"] + _metadata = [] # type: List[str] _accessors = {"dt", "cat", "str", "sparse"} _deprecations = ( base.IndexOpsMixin._deprecations @@ -472,19 +472,6 @@ def _update_inplace(self, result, **kwargs): # we want to call the generic version and not the IndexOpsMixin return generic.NDFrame._update_inplace(self, result, **kwargs) - @property - def name(self): - """ - Return name of the Series. - """ - return self._name - - @name.setter - def name(self, value): - if value is not None and not is_hashable(value): - raise TypeError("Series.name must be a hashable type") - object.__setattr__(self, "_name", value) - # ndarray compatibility @property def dtype(self): @@ -500,6 +487,16 @@ def dtypes(self): """ return self._data.dtype + @property + def name(self) -> Hashable: + return self.attrs.get("name", None) + + @name.setter + def name(self, value: Hashable) -> None: + if not is_hashable(value): + raise TypeError("Series.name must be a hashable type") + self.attrs["name"] = value + @property def ftype(self): """