diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 1659d57b33b847..53a259ad6eb158 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -227,10 +227,6 @@ as an attribute: dfa.A panel.one -You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; -if you try to use attribute access to create a new column, it fails silently, creating a new attribute rather than a -new column. - .. ipython:: python sa.a = 5 @@ -267,6 +263,37 @@ You can also assign a ``dict`` to a row of a ``DataFrame``: x.iloc[1] = dict(x=9, y=99) x +You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; +if you try to use attribute access to create a new column, it creates a new attribute rather than a +new column. In 0.21.0 and later, this will raise a ``UserWarning``: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute_access + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Similarly, it is possible to create a column with a name which collides with one of Pandas's +built-in methods or attributes, which can cause confusion later when attempting to access +that column as an attribute. This behavior now warns: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior + In[5]: df.sum + Out[5]: + + Slicing ranges -------------- diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9d5710c80d12b1..3a9cb13d6f8910 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -29,7 +29,6 @@ New features - Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to support type inference in the presence of missing values (:issue:`17059`). - .. _whatsnew_0210.enhancements.infer_objects: ``infer_objects`` type conversion @@ -62,6 +61,51 @@ using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedel df['C'] = pd.to_numeric(df['C'], errors='coerce') df.dtypes +.. _whatsnew_0210.enhancements.attribute_access: + +Improved warnings when attempting to create columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +New users are often flummoxed by the relationship between column operations and attribute +access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances +of this confusion include attempting to create a new column by setting into an attribute: + +.. code-block:: ipython + + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] + +This does not raise any obvious exceptions, but also does not create a new column: + +.. code-block:: ipython + + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +The second source of confusion is creating a column whose name collides with a method or +attribute already in the instance namespace: + +.. code-block:: ipython + + In[4]: df['sum'] = [5., 7., 9.] + +This does not permit that column to be accessed as an attribute: + +.. code-block:: ipython + + In[5]: df.sum + Out[5]: + + +Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. + .. _whatsnew_0210.enhancements.other: Other Enhancements diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 442ec93d940235..2d52eed81d22b0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -27,7 +27,7 @@ pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.generic import ABCSeries, ABCPanel +from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame from pandas.core.common import (_values_from_object, _maybe_box_datetimelike, @@ -1907,6 +1907,10 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): + if isinstance(key, str) and callable(getattr(self, key, None)): + warnings.warn("Column name '{key}' collides with a built-in " + "method, which will cause unexpected attribute " + "behavior".format(key=key), stacklevel=3) self._data.set(key, value) self._clear_item_cache() @@ -3357,6 +3361,12 @@ def __setattr__(self, name, value): else: object.__setattr__(self, name, value) except (AttributeError, TypeError): + if isinstance(self, ABCDataFrame) and (is_list_like(value)): + warnings.warn("Pandas doesn't allow Series to be assigned " + "into nonexistent columns - see " + "https://pandas.pydata.org/pandas-docs/" + "stable/indexing.html#attribute-access", + stacklevel=2) object.__setattr__(self, name, value) # ---------------------------------------------------------------------- diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 653d7d3082c082..ec850cc34e23b0 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd from pandas.core.dtypes import generic as gt +from pandas.util import testing as tm class TestABCClasses(object): @@ -38,3 +39,40 @@ def test_abc_types(self): assert isinstance(self.sparse_array, gt.ABCSparseArray) assert isinstance(self.categorical, gt.ABCCategorical) assert isinstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) + + +def test_setattr_warnings(): + # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash + # GH7175 - GOTCHA: You can't use dot notation to add a column... + d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} + df = pd.DataFrame(d) + + with catch_warnings(record=True) as w: + # successfully add new column + # this should not raise a warning + df['three'] = df.two + 1 + assert len(w) == 0 + assert df.three.sum() > df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + # this should not raise a warning + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + # this should not raise a warning + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() + + with tm.assert_produces_warning(UserWarning): + # warn when column has same name as method + df['sum'] = df.two diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index fc17b5f85b68c1..f33ba7627101e9 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2011,7 +2011,7 @@ def check(obj, comparator): df['string'] = 'foo' df['float322'] = 1. df['float322'] = df['float322'].astype('float32') - df['bool'] = df['float322'] > 0 + df['boolean'] = df['float322'] > 0 df['time1'] = Timestamp('20130101') df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) @@ -2141,7 +2141,7 @@ def test_table_values_dtypes_roundtrip(self): df1['string'] = 'foo' df1['float322'] = 1. df1['float322'] = df1['float322'].astype('float32') - df1['bool'] = df1['float32'] > 0 + df1['boolean'] = df1['float32'] > 0 df1['time1'] = Timestamp('20130101') df1['time2'] = Timestamp('20130102')