diff --git a/ci/deps/azure-35-compat.yaml b/ci/deps/azure-35-compat.yaml index fe207d122657b..97c45b2be27d7 100644 --- a/ci/deps/azure-35-compat.yaml +++ b/ci/deps/azure-35-compat.yaml @@ -11,7 +11,7 @@ dependencies: - openpyxl=2.4.8 - pytables=3.4.2 - python-dateutil=2.6.1 - - python=3.5.* + - python=3.5.3 - pytz=2017.2 - scipy=0.19.0 - xlrd=1.1.0 diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index bd8ba912d5298..05adbf0c924dc 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -10,6 +10,7 @@ dependencies: - jinja2 - lxml - matplotlib + - moto - nomkl - numexpr - numpy @@ -32,4 +33,3 @@ dependencies: - pip - pip: - hypothesis>=3.58.0 - - moto # latest moto in conda-forge fails with 3.7, move to conda dependencies when this is fixed diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml index 43504dec26953..08208d1e2d59a 100644 --- a/ci/deps/azure-windows-37.yaml +++ b/ci/deps/azure-windows-37.yaml @@ -10,6 +10,7 @@ dependencies: - jinja2 - lxml - matplotlib=2.2.* + - moto - numexpr - numpy=1.14.* - openpyxl @@ -29,6 +30,5 @@ dependencies: - pytest-xdist - pytest-mock - pytest-azurepipelines - - moto - hypothesis>=3.58.0 - pyreadstat diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml index c497495553e8b..fead806fc8e1c 100644 --- a/ci/deps/travis-36-cov.yaml +++ b/ci/deps/travis-36-cov.yaml @@ -12,6 +12,7 @@ dependencies: - geopandas - html5lib - matplotlib + - moto - nomkl - numexpr - numpy=1.15.* @@ -46,6 +47,5 @@ dependencies: - pip: - brotlipy - coverage - - moto - pandas-datareader - python-dateutil diff --git a/ci/deps/travis-36-locale.yaml b/ci/deps/travis-36-locale.yaml index 75e3348adab7c..0d9a760914dab 100644 --- a/ci/deps/travis-36-locale.yaml +++ b/ci/deps/travis-36-locale.yaml @@ -14,6 +14,7 @@ dependencies: - jinja2 - lxml=3.8.0 - matplotlib=3.0.* + - moto - nomkl - numexpr - numpy @@ -36,7 +37,6 @@ dependencies: - pytest>=4.0.2 - pytest-xdist - pytest-mock - - moto - pip - pip: - hypothesis>=3.58.0 diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index 26e9b2fdb07a6..dde1db7e693de 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -178,7 +178,6 @@ We'll now kick off a three-step process: # Create and activate the build environment conda env create -f environment.yml conda activate pandas-dev - conda uninstall --force pandas # or with older versions of Anaconda: source activate pandas-dev diff --git a/doc/source/getting_started/10min.rst b/doc/source/getting_started/10min.rst index 68ba777ec2c2a..510c7ef97aa98 100644 --- a/doc/source/getting_started/10min.rst +++ b/doc/source/getting_started/10min.rst @@ -712,7 +712,6 @@ See the :ref:`Plotting ` docs. plt.close('all') .. ipython:: python - :okwarning: ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000)) diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 6ae2ea6e392e6..1a316c2f25ec6 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -198,7 +198,6 @@ Reindexing / selection / label manipulation DataFrame.idxmin DataFrame.last DataFrame.reindex - DataFrame.reindex_axis DataFrame.reindex_like DataFrame.rename DataFrame.rename_axis @@ -337,7 +336,6 @@ Serialization / IO / conversion .. autosummary:: :toctree: api/ - DataFrame.from_csv DataFrame.from_dict DataFrame.from_items DataFrame.from_records diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 280eb05964787..6a2620635445d 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -965,7 +965,8 @@ If you select a label *contained* within an interval, this will also select the df.loc[2.5] df.loc[[2.5, 3.5]] -``Interval`` and ``IntervalIndex`` are used by ``cut`` and ``qcut``: +:func:`cut` and :func:`qcut` both return a ``Categorical`` object, and the bins they +create are stored as an ``IntervalIndex`` in its ``.categories`` attribute. .. ipython:: python @@ -973,13 +974,17 @@ If you select a label *contained* within an interval, this will also select the c c.categories -Furthermore, ``IntervalIndex`` allows one to bin *other* data with these same -bins, with ``NaN`` representing a missing value similar to other dtypes. +:func:`cut` also accepts an ``IntervalIndex`` for its ``bins`` argument, which enables +a useful pandas idiom. First, We call :func:`cut` with some data and ``bins`` set to a +fixed number, to generate the bins. Then, we pass the values of ``.categories`` as the +``bins`` argument in subsequent calls to :func:`cut`, supplying new data which will be +binned into the same bins. .. ipython:: python pd.cut([0, 3, 5, 1], bins=c.categories) +Any value which falls outside all bins will be assigned a ``NaN`` value. Generating ranges of intervals ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1108,6 +1113,8 @@ the :meth:`~Index.is_unique` attribute. weakly_monotonic.is_monotonic_increasing weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique +.. _advanced.endpoints_are_inclusive: + Endpoints are inclusive ~~~~~~~~~~~~~~~~~~~~~~~ @@ -1137,7 +1144,7 @@ index can be somewhat complicated. For example, the following does not work: s.loc['c':'e' + 1] A very common use case is to limit a time series to start and end at two -specific dates. To enable this, we made the design to make label-based +specific dates. To enable this, we made the design choice to make label-based slicing include both endpoints: .. ipython:: python diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index c09eb87df0368..888266c3cfa55 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -61,8 +61,8 @@ of multi-axis indexing. * A list or array of labels ``['a', 'b', 'c']``. * A slice object with labels ``'a':'f'`` (Note that contrary to usual python slices, **both** the start and the stop are included, when present in the - index! See :ref:`Slicing with labels - `.). + index! See :ref:`Slicing with labels ` + and :ref:`Endpoints are inclusive `.) * A boolean array * A ``callable`` function with one argument (the calling Series or DataFrame) and that returns valid output for indexing (one of the above). @@ -335,8 +335,7 @@ The ``.loc`` attribute is the primary access method. The following are valid inp * A list or array of labels ``['a', 'b', 'c']``. * A slice object with labels ``'a':'f'`` (Note that contrary to usual python slices, **both** the start and the stop are included, when present in the - index! See :ref:`Slicing with labels - `.). + index! See :ref:`Slicing with labels `. * A boolean array. * A ``callable``, see :ref:`Selection By Callable `. @@ -418,6 +417,9 @@ error will be raised (since doing otherwise would be computationally expensive, as well as potentially ambiguous for mixed type indexes). For instance, in the above example, ``s.loc[1:6]`` would raise ``KeyError``. +For the rationale behind this behavior, see +:ref:`Endpoints are inclusive `. + .. _indexing.integer: Selection by position diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index e32bb0f110252..9af6c36cc4e4d 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -340,13 +340,6 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None`` `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to override values, a ParserWarning will be issued. See :class:`python:csv.Dialect` documentation for more details. -tupleize_cols : boolean, default ``False`` - .. deprecated:: 0.21.0 - - This argument will be removed and will always convert to MultiIndex - - Leave a list of tuples on columns as is (default is to convert to a MultiIndex - on the columns). Error handling ++++++++++++++ @@ -1718,8 +1711,6 @@ function takes a number of arguments. Only the first is required. * ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when appropriate (default None) * ``chunksize``: Number of rows to write at a time -* ``tupleize_cols``: If False (default), write as a list of tuples, otherwise - write in an expanded line format suitable for ``read_csv`` * ``date_format``: Format string for datetime objects Writing a formatted string @@ -3393,15 +3384,15 @@ both on the writing (serialization), and reading (deserialization). .. warning:: - This is a very new feature of pandas. We intend to provide certain - optimizations in the io of the ``msgpack`` data. Since this is marked - as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. + The msgpack format is deprecated as of 0.25 and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of pandas objects. .. warning:: :func:`read_msgpack` is only guaranteed backwards compatible back to pandas version 0.20.3 .. ipython:: python + :okwarning: df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) df.to_msgpack('foo.msg') @@ -3411,6 +3402,7 @@ both on the writing (serialization), and reading (deserialization). You can pass a list of objects and you will receive them back on deserialization. .. ipython:: python + :okwarning: pd.to_msgpack('foo.msg', df, 'foo', np.array([1, 2, 3]), s) pd.read_msgpack('foo.msg') @@ -3418,6 +3410,7 @@ You can pass a list of objects and you will receive them back on deserialization You can pass ``iterator=True`` to iterate over the unpacked results: .. ipython:: python + :okwarning: for o in pd.read_msgpack('foo.msg', iterator=True): print(o) @@ -3425,6 +3418,7 @@ You can pass ``iterator=True`` to iterate over the unpacked results: You can pass ``append=True`` to the writer to append to an existing pack: .. ipython:: python + :okwarning: df.to_msgpack('foo.msg', append=True) pd.read_msgpack('foo.msg') @@ -3435,6 +3429,7 @@ can pack arbitrary collections of Python lists, dicts, scalars, while intermixin pandas objects. .. ipython:: python + :okwarning: pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'}, {'scalar': 1.}, {'s': s}]}) @@ -3453,14 +3448,16 @@ Read/write API Msgpacks can also be read from and written to strings. .. ipython:: python + :okwarning: df.to_msgpack() Furthermore you can concatenate the strings to produce a list of the original objects. .. ipython:: python + :okwarning: - pd.read_msgpack(df.to_msgpack() + s.to_msgpack()) + pd.read_msgpack(df.to_msgpack() + s.to_msgpack()) .. _io.hdf5: diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 1439296fb8296..ef77826e9a444 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -458,7 +458,6 @@ You can mix pandas' ``reindex`` and ``interpolate`` methods to interpolate at the new values. .. ipython:: python - :okexcept: ser = pd.Series(np.sort(np.random.uniform(size=100))) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index fcad6db945981..ce02059cd421f 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -474,16 +474,6 @@ resulting ``DatetimeIndex``: Custom frequency ranges ~~~~~~~~~~~~~~~~~~~~~~~ -.. warning:: - - This functionality was originally exclusive to ``cdate_range``, which is - deprecated as of version 0.21.0 in favor of ``bdate_range``. Note that - ``cdate_range`` only utilizes the ``weekmask`` and ``holidays`` parameters - when custom business day, 'C', is passed as the frequency string. Support has - been expanded with ``bdate_range`` to work with any custom frequency string. - -.. versionadded:: 0.21.0 - ``bdate_range`` can also generate a range of custom frequency dates by using the ``weekmask`` and ``holidays`` parameters. These parameters will only be used if a custom frequency string is passed. diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index 0614de82cbcd0..ab48594ddadab 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -829,6 +829,7 @@ Experimental Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. .. ipython:: python + :okwarning: df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) df.to_msgpack('foo.msg') @@ -841,6 +842,7 @@ Experimental You can pass ``iterator=True`` to iterator over the unpacked results .. ipython:: python + :okwarning: for o in pd.read_msgpack('foo.msg', iterator=True): print(o) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 403b4908d36e3..a66056f661de3 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1298,7 +1298,7 @@ Deprecations - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) - The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`) - :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) -- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`) +- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary. This deprecation has been removed in 0.25.0. (:issue:`21948`) - :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) - :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1fe808e098860..3b237592122a4 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -5,9 +5,13 @@ What's new in 0.25.0 (April XX, 2019) .. warning:: - Starting with the 0.25.x series of releases, pandas only supports Python 3.5 and higher. + Starting with the 0.25.x series of releases, pandas only supports Python 3.5.3 and higher. See :ref:`install.dropping-27` for more details. +.. warning:: + + The minimum supported Python version will be bumped to 3.6 in a future release. + .. warning:: `Panel` has been fully removed. For N-D labeled data structures, please @@ -561,6 +565,8 @@ Other API changes - The ``.str``-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`) - Removed support of gtk package for clipboards (:issue:`26563`) - Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`) +- :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`) +- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extention data types for a ``fixed`` format. (:issue:`7775`) .. _whatsnew_0250.deprecations: @@ -590,6 +596,12 @@ by a ``Series`` or ``DataFrame`` with sparse values. The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`). +msgpack format +^^^^^^^^^^^^^^ + +The msgpack format is deprecated as of 0.25 and will be removed in a future version. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. (:issue:`27084`) + + Other deprecations ^^^^^^^^^^^^^^^^^^ @@ -603,7 +615,15 @@ Other deprecations Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`). - The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version. Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`). +- The :meth:`Series.get_values`, :meth:`DataFrame.get_values`, :meth:`Index.get_values`, + :meth:`SparseArray.get_values` and :meth:`Categorical.get_values` methods are deprecated. + One of ``np.asarray(..)`` or :meth:`~Series.to_numpy` can be used instead (:issue:`19617`). - :meth:`Timedelta.resolution` is deprecated and replaced with :meth:`Timedelta.resolution_string`. In a future version, :meth:`Timedelta.resolution` will be changed to behave like the standard library :attr:`timedelta.resolution` (:issue:`21344`) +- :func:`read_table` has been undeprecated. (:issue:`25220`) +- :attr:`Index.dtype_str` is deprecated. (:issue:`18262`) +- :attr:`Series.imag` and :attr:`Series.real` are deprecated. (:issue:`18262`) +- :meth:`Series.put` is deprecated. (:issue:`18262`) +- :meth:`Index.item` and :meth:`Series.item` is deprecated. (:issue:`18262`) .. _whatsnew_0250.prior_deprecations: @@ -616,6 +636,14 @@ Removal of prior version deprecations/changes - Removed the previously deprecated ``pd.options.html.border`` (:issue:`16970`) - Removed the previously deprecated ``convert_objects`` (:issue:`11221`) - Removed the previously deprecated ``select`` method of ``DataFrame`` and ``Series`` (:issue:`17633`) +- Removed the previously deprecated behavior of :class:`Series` treated as list-like in :meth:`~Series.cat.rename_categories` (:issue:`17982`) +- Removed the previously deprecated ``DataFrame.reindex_axis`` and ``Series.reindex_axis``` (:issue:`17842`) +- Removed the previously deprecated behavior of altering column or index labels with :meth:`Series.rename_axis` or :meth:`DataFrame.rename_axis` (:issue:`17842`) +- Removed the previously deprecated ``tupleize_cols`` keyword argument in :meth:`read_html`, :meth:`read_csv`, and :meth:`DataFrame.to_csv` (:issue:`17877`, :issue:`17820`) +- Removed the previously deprecated ``DataFrame.from.csv`` and ``Series.from_csv`` (:issue:`17812`) +- Removed the previously deprecated ``raise_on_error`` keyword argument in :meth:`DataFrame.where` and :meth:`DataFrame.mask` (:issue:`17744`) +- Removed the previously deprecated ``ordered`` and ``categories`` keyword arguments in ``astype`` (:issue:`17742`) +- Removed the previously deprecated ``cdate_range`` (:issue:`17691`) .. _whatsnew_0250.performance: @@ -628,6 +656,7 @@ Performance improvements int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`) - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) - Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`, :issue:`26722`) +- :class:`RangeIndex` now performs standard lookup without instantiating an actual hashtable, hence saving memory (:issue:`16685`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) - Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) @@ -694,6 +723,7 @@ Timezones - Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`) - Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`) - Bug in :func:`date_range` where ambiguous or nonexistent start or end times were not handled by the ``ambiguous`` or ``nonexistent`` keywords respectively (:issue:`27088`) +- Bug in :meth:`DatetimeIndex.union` when combining a timezone aware and timezone unaware :class:`DatetimeIndex` (:issue:`21671`) Numeric ^^^^^^^ @@ -789,6 +819,7 @@ I/O - :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`) - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). - Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) +- Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) Plotting ^^^^^^^^ @@ -843,6 +874,7 @@ Reshaping - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) - Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) - Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`) +- Bug in :func:`pivot_table` when pivoting a timezone aware column as the ``values`` would remove timezone information (:issue:`14948`) Sparse ^^^^^^ diff --git a/mypy.ini b/mypy.ini index d29beeca73f1b..cba20d2775fbe 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,6 @@ [mypy] ignore_missing_imports=True +no_implicit_optional=True [mypy-pandas.conftest,pandas.tests.*] ignore_errors=True \ No newline at end of file diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c09fb96eb9182..990ac7c96a73e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -76,7 +76,10 @@ def values_from_object(obj: object): """ return my values or the object if we are say an ndarray """ func: object - func = getattr(obj, 'get_values', None) + if getattr(obj, '_typ', '') == 'dataframe': + return obj.values + + func = getattr(obj, '_internal_get_values', None) if func is not None: obj = func() diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index b73b70caf1597..cafc31dad3568 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -297,7 +297,6 @@ cdef class TextReader: object encoding object compression object mangle_dupe_cols - object tupleize_cols object usecols list dtype_cast_order set unnamed_cols @@ -351,7 +350,6 @@ cdef class TextReader: skipfooter=0, verbose=False, mangle_dupe_cols=True, - tupleize_cols=False, float_precision=None, skip_blank_lines=True): @@ -370,7 +368,6 @@ cdef class TextReader: self.parser.chunksize = tokenize_chunksize self.mangle_dupe_cols = mangle_dupe_cols - self.tupleize_cols = tupleize_cols # For timekeeping self.clocks = [] diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index cc87d95bf35d8..926440218b5d9 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -269,9 +269,19 @@ static PyObject *get_values(PyObject *obj) { } } - if (!values && PyObject_HasAttrString(obj, "get_values")) { + if (!values && PyObject_HasAttrString(obj, "_internal_get_values")) { PRINTMARK(); - values = PyObject_CallMethod(obj, "get_values", NULL); + values = PyObject_CallMethod(obj, "_internal_get_values", NULL); + if (values && !PyArray_CheckExact(values)) { + PRINTMARK(); + Py_DECREF(values); + values = NULL; + } + } + + if (!values && PyObject_HasAttrString(obj, "get_block_values")) { + PRINTMARK(); + values = PyObject_CallMethod(obj, "get_block_values", NULL); if (values && !PyArray_CheckExact(values)) { PRINTMARK(); Py_DECREF(values); diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 98daae076fbc1..4e84d7b26b707 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1590,7 +1590,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) if is_sparse(arr): - arr = arr.get_values() + arr = arr.to_dense() elif isinstance(arr, (ABCIndexClass, ABCSeries)): arr = arr.values diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9f5e3e8ee77f0..68c7b79becb55 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -887,11 +887,6 @@ def rename_categories(self, new_categories, inplace=False): .. versionadded:: 0.23.0 - .. warning:: - - Currently, Series are considered list like. In a future version - of pandas they'll be considered dict-like. - inplace : bool, default False Whether or not to rename the categories inplace or return a copy of this categorical with renamed categories. @@ -939,15 +934,6 @@ def rename_categories(self, new_categories, inplace=False): inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() - if isinstance(new_categories, ABCSeries): - msg = ("Treating Series 'new_categories' as a list-like and using " - "the values. In a future version, 'rename_categories' will " - "treat Series like a dictionary.\n" - "For dict-like, use 'new_categories.to_dict()'\n" - "For list-like, use 'new_categories.values'.") - warn(msg, FutureWarning, stacklevel=2) - new_categories = list(new_categories) - if is_dict_like(new_categories): cat.categories = [new_categories.get(item, item) for item in cat.categories] @@ -1497,6 +1483,8 @@ def get_values(self): """ Return the values. + .. deprecated:: 0.25.0 + For internal compatibility with pandas formatting. Returns @@ -1505,6 +1493,11 @@ def get_values(self): A numpy array of the same dtype as categorical.categories.dtype or Index if datetime / periods. """ + warn("The 'get_values' method is deprecated and will be removed in a " + "future version", FutureWarning, stacklevel=2) + return self._internal_get_values() + + def _internal_get_values(self): # if we are a datetime and period index, return Index to keep metadata if is_datetimelike(self.categories): return self.categories.take(self._codes, fill_value=np.nan) @@ -1937,7 +1930,7 @@ def __iter__(self): """ Returns an Iterator over the values of this Categorical. """ - return iter(self.get_values().tolist()) + return iter(self._internal_get_values().tolist()) def __contains__(self, key): """ @@ -1989,9 +1982,7 @@ def _repr_categories_info(self): """ category_strs = self._repr_categories() - dtype = getattr(self.categories, 'dtype_str', - str(self.categories.dtype)) - + dtype = str(self.categories.dtype) levheader = "Categories ({length}, {dtype}): ".format( length=len(self.categories), dtype=dtype) width, height = get_terminal_size() diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 3a9322773fc69..bb144764a26fc 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -195,7 +195,7 @@ def _simple_new(cls, values, freq=None, **kwargs): def _from_sequence( cls, scalars: Sequence[Optional[Period]], - dtype: PeriodDtype = None, + dtype: Optional[PeriodDtype] = None, copy: bool = False, ) -> ABCPeriodArray: if dtype: diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 3512d4e9e29db..97ab6ec8235ef 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -454,7 +454,7 @@ def _sparse_array_op( if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: with np.errstate(all='ignore'): - result = op(left.get_values(), right.get_values()) + result = op(left.to_dense(), right.to_dense()) fill = op(_get_fill(left), _get_fill(right)) if left.sp_index.ngaps == 0: @@ -1468,8 +1468,21 @@ def to_dense(self): """ return np.asarray(self, dtype=self.sp_values.dtype) - # TODO: Look into deprecating this in favor of `to_dense`. - get_values = to_dense + def get_values(self): + """ + Convert SparseArray to a NumPy array. + + .. deprecated:: 0.25.0 + Use `to_dense` instead. + + """ + warnings.warn( + "The 'get_values' method is deprecated and will be removed in a " + "future version. Use the 'to_dense' method instead.", + FutureWarning, stacklevel=2) + return self._internal_get_values() + + _internal_get_values = to_dense # ------------------------------------------------------------------------ # IO diff --git a/pandas/core/base.py b/pandas/core/base.py index 30e800cb9bd73..93db65deff820 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -693,11 +693,15 @@ def item(self): """ Return the first element of the underlying data as a python scalar. + .. deprecated 0.25.0 + Returns ------- scalar The first element of %(klass)s. """ + warnings.warn('`item` has been deprecated and will be removed in a ' + 'future version', FutureWarning, stacklevel=2) return self.values.item() @property diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 242885c7a9679..66f7a6365fe41 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -194,7 +194,7 @@ def _concat_categorical(to_concat, axis=0): return union_categoricals(categoricals) # extract the categoricals & coerce to object if needed - to_concat = [x.get_values() if is_categorical_dtype(x.dtype) + to_concat = [x._internal_get_values() if is_categorical_dtype(x.dtype) else np.asarray(x).ravel() if not is_datetime64tz_dtype(x) else np.asarray(x.astype(object)) for x in to_concat] result = _concat_compat(to_concat) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 9da6fb84ee18b..81e061a0fc7b4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -214,13 +214,13 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): _metadata = ('categories', 'ordered') _cache = {} # type: Dict[str_type, PandasExtensionDtype] - def __init__(self, categories=None, ordered: bool = None): + def __init__(self, categories=None, ordered: Optional[bool] = None): self._finalize(categories, ordered, fastpath=False) @classmethod def _from_fastpath(cls, categories=None, - ordered: bool = None + ordered: Optional[bool] = None ) -> 'CategoricalDtype': self = cls.__new__(cls) self._finalize(categories, ordered, fastpath=True) @@ -230,7 +230,7 @@ def _from_fastpath(cls, def _from_categorical_dtype(cls, dtype: 'CategoricalDtype', categories=None, - ordered: bool = None, + ordered: Optional[bool] = None, ) -> 'CategoricalDtype': if categories is ordered is None: return dtype @@ -244,8 +244,8 @@ def _from_categorical_dtype(cls, def _from_values_or_dtype(cls, values=None, categories=None, - ordered: bool = None, - dtype: 'CategoricalDtype' = None, + ordered: Optional[bool] = None, + dtype: Optional['CategoricalDtype'] = None, ) -> 'CategoricalDtype': """ Construct dtype from the input parameters used in :class:`Categorical`. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df7003ecf000e..3ff3fff22f4f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -331,7 +331,7 @@ def _constructor(self): _constructor_sliced = Series # type: Type[Series] _deprecations = NDFrame._deprecations | frozenset([ - 'get_value', 'set_value', 'from_csv', 'from_items' + 'get_value', 'set_value', 'from_items' ]) # type: FrozenSet[str] _accessors = set() # type: Set[str] @@ -1616,7 +1616,8 @@ def to_records(self, index=True, convert_datetime64=None, else: ix_vals = [self.index.values] - arrays = ix_vals + [self[c].get_values() for c in self.columns] + arrays = ix_vals + [self[c]._internal_get_values() + for c in self.columns] count = 0 index_names = list(self.index.names) @@ -1632,7 +1633,7 @@ def to_records(self, index=True, convert_datetime64=None, names = [str(name) for name in itertools.chain(index_names, self.columns)] else: - arrays = [self[c].get_values() for c in self.columns] + arrays = [self[c]._internal_get_values() for c in self.columns] names = [str(c) for c in self.columns] index_names = [] @@ -1786,73 +1787,6 @@ def _from_arrays(cls, arrays, columns, index, dtype=None): mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) return cls(mgr) - @classmethod - def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, - encoding=None, tupleize_cols=None, - infer_datetime_format=False): - """ - Read CSV file. - - .. deprecated:: 0.21.0 - Use :func:`read_csv` instead. - - It is preferable to use the more powerful :func:`read_csv` - for most general purposes, but ``from_csv`` makes for an easy - roundtrip to and from a file (the exact counterpart of - ``to_csv``), especially with a DataFrame of time series data. - - This method only differs from the preferred :func:`read_csv` - in some defaults: - - - `index_col` is ``0`` instead of ``None`` (take first column as index - by default) - - `parse_dates` is ``True`` instead of ``False`` (try parsing the index - as datetime by default) - - So a ``pd.DataFrame.from_csv(path)`` can be replaced by - ``pd.read_csv(path, index_col=0, parse_dates=True)``. - - Parameters - ---------- - path : string file path or file handle / StringIO - header : int, default 0 - Row to use as header (skip prior rows) - sep : string, default ',' - Field delimiter - index_col : int or sequence, default 0 - Column to use for index. If a sequence is given, a MultiIndex - is used. Different default from read_table - parse_dates : boolean, default True - Parse dates. Different default from read_table - tupleize_cols : boolean, default False - write multi_index columns as a list of tuples (if True) - or new (expanded format) if False) - infer_datetime_format : boolean, default False - If True and `parse_dates` is True for a column, try to infer the - datetime format based on the first datetime string. If the format - can be inferred, there often will be a large parsing speed-up. - - Returns - ------- - DataFrame - - See Also - -------- - read_csv - """ - - warnings.warn("from_csv is deprecated. Please use read_csv(...) " - "instead. Note that some of the default arguments are " - "different, so please refer to the documentation " - "for from_csv when changing your function calls", - FutureWarning, stacklevel=2) - - from pandas.io.parsers import read_csv - return read_csv(path, header=header, sep=sep, - parse_dates=parse_dates, index_col=index_col, - encoding=encoding, tupleize_cols=tupleize_cols, - infer_datetime_format=infer_datetime_format) - def to_sparse(self, fill_value=None, kind='block'): """ Convert to SparseDataFrame. @@ -3768,13 +3702,6 @@ def reindex(self, *args, **kwargs): kwargs.pop('labels', None) return super().reindex(**kwargs) - @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) - def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, - limit=None, fill_value=np.nan): - return super().reindex_axis(labels=labels, axis=axis, method=method, - level=level, copy=copy, limit=limit, - fill_value=fill_value) - def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'): """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1af3e9449f3da..957efa402346e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1176,11 +1176,6 @@ def rename_axis(self, mapper=sentinel, **kwargs): Notes ----- - Prior to version 0.21.0, ``rename_axis`` could also be used to change - the axis *labels* by passing a mapping or scalar. This behavior is - deprecated and will be removed in a future version. Use ``rename`` - instead. - ``DataFrame.rename_axis`` supports two calling conventions * ``(index=index_mapper, columns=columns_mapper, ...)`` @@ -1280,22 +1275,15 @@ class name inplace = validate_bool_kwarg(inplace, 'inplace') - if (mapper is not sentinel): + if mapper is not sentinel: # Use v0.23 behavior if a scalar or list non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not is_dict_like(mapper)) if non_mapper: return self._set_axis_name(mapper, axis=axis, inplace=inplace) else: - # Deprecated (v0.21) behavior is if mapper is specified, - # and not a list or scalar, then call rename - msg = ("Using 'rename_axis' to alter labels is deprecated. " - "Use '.rename' instead") - warnings.warn(msg, FutureWarning, stacklevel=3) - axis = self._get_axis_name(axis) - d = {'copy': copy, 'inplace': inplace} - d[axis] = mapper - return self.rename(**d) + raise ValueError("Use `.rename` to alter labels " + "with a mapper.") else: # Use new behavior. Means that index and/or columns # is specified @@ -2418,8 +2406,11 @@ def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): """ Serialize object to input file path using msgpack format. - THIS IS AN EXPERIMENTAL LIBRARY and the storage format - may not be stable until a future release. + .. deprecated:: 0.25.0 + + to_msgpack is deprecated and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of + pandas objects. Parameters ---------- @@ -2912,7 +2903,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator=None, chunksize=None, - tupleize_cols=None, date_format=None, doublequote=True, + date_format=None, doublequote=True, escapechar=None, decimal='.'): r""" Write object to a comma-separated values (csv) file. @@ -2985,14 +2976,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, .. versionchanged:: 0.24.0 chunksize : int or None Rows to write at a time. - tupleize_cols : bool, default False - Write MultiIndex columns as a list of tuples (if True) or in - the new, expanded format, where each MultiIndex column is a row - in the CSV (if False). - - .. deprecated:: 0.21.0 - This argument will be removed and will always write each row - of the multi-index as a separate row in the CSV file. date_format : str, default None Format string for datetime objects. doublequote : bool, default True @@ -3026,13 +3009,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, df = self if isinstance(self, ABCDataFrame) else self.to_frame() - if tupleize_cols is not None: - warnings.warn("The 'tupleize_cols' parameter is deprecated and " - "will be removed in a future version", - FutureWarning, stacklevel=2) - else: - tupleize_cols = False - from pandas.io.formats.csvs import CSVFormatter formatter = CSVFormatter(df, path_or_buf, line_terminator=line_terminator, sep=sep, @@ -3042,7 +3018,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=columns, header=header, index=index, index_label=index_label, mode=mode, chunksize=chunksize, quotechar=quotechar, - tupleize_cols=tupleize_cols, date_format=date_format, doublequote=doublequote, escapechar=escapechar, decimal=decimal) @@ -4375,89 +4350,6 @@ def _needs_reindex_multi(self, axes, method, level): def _reindex_multi(self, axes, copy, fill_value): return NotImplemented - _shared_docs['reindex_axis'] = (""" - Conform input object to new index. - - .. deprecated:: 0.21.0 - Use `reindex` instead. - - By default, places NaN in locations having no value in the - previous index. A new object is produced unless the new index - is equivalent to the current one and copy=False. - - Parameters - ---------- - labels : array-like - New labels / index to conform to. Preferably an Index object to - avoid duplicating data. - axis : %(axes_single_arg)s - Indicate whether to use rows or columns. - method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional - Method to use for filling holes in reindexed DataFrame: - - * default: don't fill gaps. - * pad / ffill: propagate last valid observation forward to next - valid. - * backfill / bfill: use next valid observation to fill gap. - * nearest: use nearest valid observations to fill gap. - - level : int or str - Broadcast across a level, matching Index values on the - passed MultiIndex level. - copy : bool, default True - Return a new object, even if the passed indexes are the same. - limit : int, optional - Maximum number of consecutive elements to forward or backward fill. - fill_value : float, default NaN - Value used to fill in locations having no value in the previous - index. - - .. versionadded:: 0.21.0 (list-like tolerance) - - Returns - ------- - %(klass)s - Returns a new DataFrame object with new indices, unless the new - index is equivalent to the current one and copy=False. - - See Also - -------- - DataFrame.set_index : Set row labels. - DataFrame.reset_index : Remove row labels or move them to new columns. - DataFrame.reindex : Change to new indices or expand indices. - DataFrame.reindex_like : Change to same indices as other DataFrame. - - Examples - -------- - >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]}, - ... index=['dog', 'hawk']) - >>> df - num_legs num_wings - dog 4 0 - hawk 2 2 - >>> df.reindex(['num_wings', 'num_legs', 'num_heads'], - ... axis='columns') - num_wings num_legs num_heads - dog 0 4 NaN - hawk 2 2 NaN - """) - - @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) - def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, - limit=None, fill_value=None): - msg = ("'.reindex_axis' is deprecated and will be removed in a future " - "version. Use '.reindex' instead.") - self._consolidate_inplace() - - axis_name = self._get_axis_name(axis) - axis_values = self._get_axis(axis_name) - method = missing.clean_reindex_fill_method(method) - warnings.warn(msg, FutureWarning, stacklevel=3) - new_index, indexer = axis_values.reindex(labels, method, level, - limit=limit) - return self._reindex_with_indexers({axis: [new_index, indexer]}, - fill_value=fill_value, copy=copy) - def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False, allow_dups=False): """allow_dups indicates an internal call here """ @@ -5328,6 +5220,9 @@ def get_values(self): """ Return an ndarray after converting sparse values to dense. + .. deprecated:: 0.25.0 + Use ``np.asarray(..)`` or :meth:`DataFrame.values` instead. + This is the same as ``.values`` for non-sparse data. For sparse data contained in a `SparseArray`, the data are first converted to a dense representation. @@ -5367,6 +5262,13 @@ def get_values(self): [nan, 2.], [nan, 3.]]) """ + warnings.warn( + "The 'get_values' method is deprecated and will be removed in a " + "future version. Use '.values' or 'np.asarray(..)' instead.", + FutureWarning, stacklevel=2) + return self._internal_get_values() + + def _internal_get_values(self): return self.values def get_dtype_counts(self): @@ -8752,13 +8654,6 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, try_cast : bool, default False Try to cast the result back to the input type (if possible). - raise_on_error : bool, default True - Whether to raise on invalid data types (e.g. trying to where on - strings). - - .. deprecated:: 0.21.0 - - Use `errors`. Returns ------- @@ -8846,18 +8741,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, cond_rev="False", name='where', name_other='mask')) def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, - errors='raise', try_cast=False, raise_on_error=None): - - if raise_on_error is not None: - warnings.warn( - "raise_on_error is deprecated in " - "favor of errors='raise|ignore'", - FutureWarning, stacklevel=2) - - if raise_on_error: - errors = 'raise' - else: - errors = 'ignore' + errors='raise', try_cast=False): other = com.apply_if_callable(other, self) return self._where(cond, other, inplace, axis, level, @@ -8867,18 +8751,7 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, cond_rev="True", name='mask', name_other='where')) def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, - errors='raise', try_cast=False, raise_on_error=None): - - if raise_on_error is not None: - warnings.warn( - "raise_on_error is deprecated in " - "favor of errors='raise|ignore'", - FutureWarning, stacklevel=2) - - if raise_on_error: - errors = 'raise' - else: - errors = 'ignore' + errors='raise', try_cast=False): inplace = validate_bool_kwarg(inplace, 'inplace') cond = com.apply_if_callable(cond, self) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 72c8d330170d4..210e82837118c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1118,7 +1118,7 @@ def nunique(self, dropna=True): """ ids, _, _ = self.grouper.group_info - val = self.obj.get_values() + val = self.obj._internal_get_values() try: sorter = np.lexsort((val, ids)) @@ -1192,7 +1192,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False, bins=bins) ids, _, _ = self.grouper.group_info - val = self.obj.get_values() + val = self.obj._internal_get_values() # groupby removes null keys from groupings mask = ids != -1 @@ -1306,7 +1306,7 @@ def count(self): Count of values within each group. """ ids, _, ngroups = self.grouper.group_info - val = self.obj.get_values() + val = self.obj._internal_get_values() mask = (ids != -1) & ~isna(val) ids = ensure_platform_int(ids) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cb5b4a6c8993c..0123e6a5f1065 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -57,13 +57,6 @@ _index_shared_docs = dict() -def _try_get_item(x): - try: - return x.item() - except AttributeError: - return x - - def _make_comparison_op(op, cls): def cmp_method(self, other): if isinstance(other, (np.ndarray, Index, ABCSeries)): @@ -686,11 +679,16 @@ def dtype(self): """ return self._data.dtype - @cache_readonly + @property def dtype_str(self): """ Return the dtype str of the underlying data. + + .. deprecated:: 0.25.0 """ + warnings.warn('`dtype_str` has been deprecated. Call `str` on the ' + 'dtype attribute instead.', FutureWarning, + stacklevel=2) return str(self.dtype) def ravel(self, order='C'): @@ -2726,17 +2724,11 @@ def _convert_can_do_setop(self, other): * backfill / bfill: use NEXT index value if no exact match * nearest: use the NEAREST index value if no exact match. Tied distances are broken by preferring the larger index value. - tolerance : optional + tolerance : int or float, optional Maximum distance from index value for inexact matches. The value of the index at the matching location most satisfy the equation ``abs(index[loc] - key) <= tolerance``. - Tolerance may be a scalar - value, which applies the same tolerance to all values, or - list-like, which applies variable tolerance per element. List-like - includes list, tuple, array, Series, and must be the same size as - the index and its dtype must exactly match the index's type. - .. versionadded:: 0.21.0 (list-like tolerance) Returns @@ -3772,6 +3764,9 @@ def get_values(self): """ Return `Index` data as an `numpy.ndarray`. + .. deprecated:: 0.25.0 + Use :meth:`Index.to_numpy` or :attr:`Index.array` instead. + Returns ------- numpy.ndarray @@ -3810,6 +3805,13 @@ def get_values(self): >>> midx.get_values().ndim 1 """ + warnings.warn( + "The 'get_values' method is deprecated and will be removed in a " + "future version. Use '.to_numpy()' or '.array' instead.", + FutureWarning, stacklevel=2) + return self._internal_get_values() + + def _internal_get_values(self): return self.values @Appender(IndexOpsMixin.memory_usage.__doc__) @@ -4170,7 +4172,8 @@ def equals(self, other): Returns ------- bool - If two Index objects have equal elements True, otherwise False. + True if "other" is an Index and it has the same elements as calling + index; False otherwise. """ if self.is_(other): return True diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 3d3774ce48e8b..db4778f5e375f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -355,9 +355,10 @@ def _wrap_setop_result(self, other, result): name = get_op_result_name(self, other) return self._shallow_copy(result, name=name) - def get_values(self): - """ return the underlying data as an ndarray """ - return self._data.get_values() + def _internal_get_values(self): + # override base Index version to get the numpy array representation of + # the underlying Categorical + return self._data._internal_get_values() def tolist(self): return self._data.tolist() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5ce670d9fe33e..e2658b66f83ba 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -29,7 +29,7 @@ import pandas.core.tools.datetimes as tools from pandas.tseries.frequencies import Resolution, to_offset -from pandas.tseries.offsets import CDay, Nano, prefix_mapping +from pandas.tseries.offsets import Nano, prefix_mapping def _new_DatetimeIndex(cls, d): @@ -1568,66 +1568,6 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, closed=closed, **kwargs) -def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, - normalize=True, name=None, closed=None, **kwargs): - """ - Return a fixed frequency DatetimeIndex, with CustomBusinessDay as the - default frequency - - .. deprecated:: 0.21.0 - - Parameters - ---------- - start : string or datetime-like, default None - Left bound for generating dates - end : string or datetime-like, default None - Right bound for generating dates - periods : integer, default None - Number of periods to generate - freq : string or DateOffset, default 'C' (CustomBusinessDay) - Frequency strings can have multiples, e.g. '5H' - tz : string, default None - Time zone name for returning localized DatetimeIndex, for example - Asia/Beijing - normalize : bool, default False - Normalize start/end dates to midnight before generating date range - name : string, default None - Name of the resulting DatetimeIndex - weekmask : string, Default 'Mon Tue Wed Thu Fri' - weekmask of valid business days, passed to ``numpy.busdaycalendar`` - holidays : list - list/array of dates to exclude from the set of valid business days, - passed to ``numpy.busdaycalendar`` - closed : string, default None - Make the interval closed with respect to the given frequency to - the 'left', 'right', or both sides (None) - - Notes - ----- - Of the three parameters: ``start``, ``end``, and ``periods``, exactly two - must be specified. - - To learn more about the frequency strings, please see `this link - `__. - - Returns - ------- - rng : DatetimeIndex - """ - warnings.warn("cdate_range is deprecated and will be removed in a future " - "version, instead use pd.bdate_range(..., freq='{freq}')" - .format(freq=freq), FutureWarning, stacklevel=2) - - if freq == 'C': - holidays = kwargs.pop('holidays', []) - weekmask = kwargs.pop('weekmask', 'Mon Tue Wed Thu Fri') - freq = CDay(holidays=holidays, weekmask=weekmask) - - return date_range(start=start, end=end, periods=periods, freq=freq, - tz=tz, normalize=normalize, name=name, - closed=closed, **kwargs) - - def _time_to_micros(time): seconds = time.hour * 60 * 60 + 60 * time.minute + time.second return 1000000 * seconds + time.microsecond diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a06d304fb5a22..19ba147fe9a27 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1246,7 +1246,7 @@ def values(self): for i in range(self.nlevels): vals = self._get_level_values(i) if is_categorical_dtype(vals): - vals = vals.get_values() + vals = vals._internal_get_values() if (isinstance(vals.dtype, ExtensionDtype) or hasattr(vals, '_box_values')): vals = vals.astype(object) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index a228895e527aa..5f9c1f22887cc 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -16,7 +16,6 @@ from pandas.core import algorithms import pandas.core.common as com -import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, InvalidIndexError, _index_shared_docs) from pandas.core.ops import get_op_result_name @@ -442,7 +441,9 @@ def __contains__(self, other): return np.isnan(other) and self.hasnans except ValueError: try: - return len(other) <= 1 and ibase._try_get_item(other) in self + return len(other) <= 1 and other.item() in self + except AttributeError: + return len(other) <= 1 and other in self except TypeError: pass except TypeError: @@ -457,9 +458,7 @@ def get_loc(self, key, method=None, tolerance=None): nan_idxs = self._nan_idxs try: return nan_idxs.item() - except (ValueError, IndexError): - # should only need to catch ValueError here but on numpy - # 1.7 .item() can raise IndexError when NaNs are present + except ValueError: if not len(nan_idxs): raise KeyError(key) return nan_idxs diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b20b0c6f853d9..dc11099c3e903 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -874,7 +874,12 @@ def item(self): """ return the first element of the underlying data as a python scalar + + .. deprecated 0.25.0 + """ + warnings.warn('`item` has been deprecated and will be removed in a ' + 'future version', FutureWarning, stacklevel=2) # TODO(DatetimeArray): remove if len(self) == 1: return self[0] diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 47dad1788e021..70ca0b349e7ed 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -13,8 +13,8 @@ from pandas.core.dtypes import concat as _concat from pandas.core.dtypes.common import ( - ensure_python_int, is_int64_dtype, is_integer, is_scalar, - is_timedelta64_dtype) + ensure_platform_int, ensure_python_int, is_int64_dtype, is_integer, + is_integer_dtype, is_list_like, is_scalar, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, ABCTimedeltaIndex) @@ -348,6 +348,36 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) return super().get_loc(key, method=method, tolerance=tolerance) + @Appender(_index_shared_docs['get_indexer']) + def get_indexer(self, target, method=None, limit=None, tolerance=None): + if not (method is None and tolerance is None and is_list_like(target)): + return super().get_indexer(target, method=method, + tolerance=tolerance) + + if self.step > 0: + start, stop, step = self.start, self.stop, self.step + else: + # Work on reversed range for simplicity: + start, stop, step = (self.stop - self.step, + self.start + 1, + - self.step) + + target_array = np.asarray(target) + if not (is_integer_dtype(target_array) and target_array.ndim == 1): + # checks/conversions/roundings are delegated to general method + return super().get_indexer(target, method=method, + tolerance=tolerance) + + locs = target_array - start + valid = (locs % step == 0) & (locs >= 0) & (target_array < stop) + locs[~valid] = -1 + locs[valid] = locs[valid] / step + + if step != self.step: + # We reversed this range: transform to original locs + locs[valid] = len(self) - 1 - locs[valid] + return ensure_platform_int(locs) + def tolist(self): return list(self._range) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a131509a4ed10..b79f87461093d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -177,6 +177,12 @@ def get_values(self, dtype=None): return self.values.astype(object) return self.values + def get_block_values(self, dtype=None): + """ + This is used in the JSON C code + """ + return self.get_values(dtype=dtype) + def to_dense(self): return self.values.view() @@ -220,7 +226,7 @@ def make_block_same_class(self, values, placement=None, ndim=None, if dtype is not None: # issue 19431 fastparquet is passing this warnings.warn("dtype argument is deprecated, will be removed " - "in a future release.", DeprecationWarning) + "in a future release.", FutureWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, @@ -542,17 +548,10 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, if self.is_categorical_astype(dtype): # deprecated 17636 - if ('categories' in kwargs or 'ordered' in kwargs): - if isinstance(dtype, CategoricalDtype): - raise TypeError( - "Cannot specify a CategoricalDtype and also " - "`categories` or `ordered`. Use " - "`dtype=CategoricalDtype(categories, ordered)`" - " instead.") - warnings.warn("specifying 'categories' or 'ordered' in " - ".astype() is deprecated; pass a " - "CategoricalDtype instead", - FutureWarning, stacklevel=7) + for deprecated_arg in ('categories', 'ordered'): + if deprecated_arg in kwargs: + raise ValueError('Got an unexpected argument: {}'.format( + deprecated_arg)) categories = kwargs.get('categories', None) ordered = kwargs.get('ordered', None) @@ -1794,7 +1793,7 @@ def formatting_values(self): "'ExtensionArray._formatting_values' is deprecated. " "Specify 'ExtensionArray._formatter' instead." ) - warnings.warn(msg, DeprecationWarning, stacklevel=10) + warnings.warn(msg, FutureWarning, stacklevel=10) return self.values._formatting_values() return self.values @@ -2928,7 +2927,7 @@ def to_dense(self): # Categorical.get_values returns a DatetimeIndex for datetime # categories, so we can't simply use `np.asarray(self.values)` like # other types. - return self.values.get_values() + return self.values._internal_get_values() def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): """ convert to our native types format, slicing if desired """ @@ -3056,7 +3055,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, if fastpath is not None: # GH#19265 pyarrow is passing this warnings.warn("fastpath argument is deprecated, will be removed " - "in a future release.", DeprecationWarning) + "in a future release.", FutureWarning) if klass is None: dtype = dtype or values.dtype klass = get_block_type(values, dtype) @@ -3229,7 +3228,7 @@ def _putmask_preserve(nv, n): dtype, _ = maybe_promote(n.dtype) if is_extension_type(v.dtype) and is_object_dtype(dtype): - v = v.get_values(dtype) + v = v._internal_get_values(dtype) else: v = v.astype(dtype) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 24a28bf0005cb..cc8b241bedba1 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -228,7 +228,8 @@ def _maybe_get_mask(values: np.ndarray, skipna: bool, def _get_values(values: np.ndarray, skipna: bool, fill_value: Any = None, - fill_value_typ: str = None, mask: Optional[np.ndarray] = None + fill_value_typ: Optional[str] = None, + mask: Optional[np.ndarray] = None ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. @@ -1274,7 +1275,7 @@ def _ensure_numeric(x): except (TypeError, ValueError): x = x.astype(np.float64) else: - if not np.any(x.imag): + if not np.any(np.imag(x)): x = x.real elif not (is_float(x) or is_integer(x) or is_complex(x)): try: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0b9e56fd19556..a4d31cb227f19 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1891,7 +1891,7 @@ def wrapper(self, other, axis=None): name=res_name, dtype='bool') else: - values = self.get_values() + values = self.to_numpy() with np.errstate(all='ignore'): res = na_op(values, other) diff --git a/pandas/core/series.py b/pandas/core/series.py index 730a96f5435a1..f415bc9fd3561 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -137,7 +137,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): # tolist is not actually deprecated, just suppressed in the __dir__ _deprecations = generic.NDFrame._deprecations | frozenset( ['asobject', 'reshape', 'get_value', 'set_value', - 'from_csv', 'valid', 'tolist']) + 'valid', 'tolist']) # Override cache_readonly bc Series is mutable hasnans = property(base.IndexOpsMixin.hasnans.func, @@ -506,11 +506,21 @@ def get_values(self): """ Same as values (but handles sparseness conversions); is a view. + .. deprecated:: 0.25.0 + Use :meth:`Series.to_numpy` or :attr:`Series.array` instead. + Returns ------- numpy.ndarray Data of the Series. """ + warnings.warn( + "The 'get_values' method is deprecated and will be removed in a " + "future version. Use '.to_numpy()' or '.array' instead.", + FutureWarning, stacklevel=2) + return self._internal_get_values() + + def _internal_get_values(self): return self._data.get_values() @property @@ -617,10 +627,14 @@ def put(self, *args, **kwargs): """ Apply the `put` method to its `values` attribute if it has one. + .. deprecated:: 0.25.0 + See Also -------- numpy.ndarray.put """ + warnings.warn('`put` has been deprecated and will be removed in a' + 'future version.', FutureWarning, stacklevel=2) self._values.put(*args, **kwargs) def __len__(self): @@ -793,7 +807,11 @@ def __array_prepare__(self, result, context=None): def real(self): """ Return the real value of vector. + + .. deprecated 0.25.0 """ + warnings.warn("`real` has be deprecated and will be removed in a " + "future verison", FutureWarning, stacklevel=2) return self.values.real @real.setter @@ -804,7 +822,11 @@ def real(self, v): def imag(self): """ Return imag value of vector. + + .. deprecated 0.25.0 """ + warnings.warn("`imag` has be deprecated and will be removed in a " + "future verison", FutureWarning, stacklevel=2) return self.values.imag @imag.setter @@ -3986,27 +4008,6 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): return super().shift(periods=periods, freq=freq, axis=axis, fill_value=fill_value) - def reindex_axis(self, labels, axis=0, **kwargs): - """ - Conform Series to new index with optional filling logic. - - .. deprecated:: 0.21.0 - Use ``Series.reindex`` instead. - - Returns - ------- - Series - Reindexed Series. - """ - # for compatibility with higher dims - if axis != 0: - raise ValueError("cannot reindex series on non-zero axis!") - msg = ("'.reindex_axis' is deprecated and will be removed in a future " - "version. Use '.reindex' instead.") - warnings.warn(msg, FutureWarning, stacklevel=2) - - return self.reindex(index=labels, **kwargs) - def memory_usage(self, index=True, deep=False): """ Return the memory usage of the Series. @@ -4221,81 +4222,13 @@ def between(self, left, right, inclusive=True): return lmask & rmask - @classmethod - def from_csv(cls, path, sep=',', parse_dates=True, header=None, - index_col=0, encoding=None, infer_datetime_format=False): - """ - Read CSV file. - - .. deprecated:: 0.21.0 - Use :func:`pandas.read_csv` instead. - - It is preferable to use the more powerful :func:`pandas.read_csv` - for most general purposes, but ``from_csv`` makes for an easy - roundtrip to and from a file (the exact counterpart of - ``to_csv``), especially with a time Series. - - This method only differs from :func:`pandas.read_csv` in some defaults: - - - `index_col` is ``0`` instead of ``None`` (take first column as index - by default) - - `header` is ``None`` instead of ``0`` (the first row is not used as - the column names) - - `parse_dates` is ``True`` instead of ``False`` (try parsing the index - as datetime by default) - - With :func:`pandas.read_csv`, the option ``squeeze=True`` can be used - to return a Series like ``from_csv``. - - Parameters - ---------- - path : str, file path, or file handle / StringIO - sep : str, default ',' - Field delimiter. - parse_dates : bool, default True - Parse dates. Different default from read_table. - header : int, default None - Row to use as header (skip prior rows). - index_col : int or sequence, default 0 - Column to use for index. If a sequence is given, a MultiIndex - is used. Different default from read_table. - encoding : str, optional - A string representing the encoding to use if the contents are - non-ascii, for python versions prior to 3. - infer_datetime_format : bool, default False - If True and `parse_dates` is True for a column, try to infer the - datetime format based on the first datetime string. If the format - can be inferred, there often will be a large parsing speed-up. - - Returns - ------- - Series - - See Also - -------- - read_csv - """ - - # We're calling `DataFrame.from_csv` in the implementation, - # which will propagate a warning regarding `from_csv` deprecation. - from pandas.core.frame import DataFrame - df = DataFrame.from_csv(path, header=header, index_col=index_col, - sep=sep, parse_dates=parse_dates, - encoding=encoding, - infer_datetime_format=infer_datetime_format) - result = df.iloc[:, 0] - if header is None: - result.index.name = result.name = None - - return result - @Appender(generic.NDFrame.to_csv.__doc__) def to_csv(self, *args, **kwargs): names = ["path_or_buf", "sep", "na_rep", "float_format", "columns", "header", "index", "index_label", "mode", "encoding", "compression", "quoting", "quotechar", "line_terminator", - "chunksize", "tupleize_cols", "date_format", "doublequote", + "chunksize", "date_format", "doublequote", "escapechar", "decimal"] old_names = ["path_or_buf", "index", "sep", "na_rep", "float_format", diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 67ecbcbea67f9..6a0ba5f93c509 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -493,7 +493,7 @@ def xs(self, key, axis=0, copy=False): return data i = self.index.get_loc(key) - data = self.take([i]).get_values()[0] + data = self.take([i])._internal_get_values()[0] return Series(data, index=self.columns) # ---------------------------------------------------------------------- @@ -694,9 +694,10 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None, if col not in self: continue if row_indexer is not None: - new_arrays[col] = algos.take_1d(self[col].get_values(), - row_indexer, - fill_value=fill_value) + new_arrays[col] = algos.take_1d( + self[col]._internal_get_values(), + row_indexer, + fill_value=fill_value) else: new_arrays[col] = self[col] diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 2e740c0acc465..88b6634db92b6 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -224,7 +224,7 @@ def __repr__(self): def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): """ perform a reduction operation """ - return op(self.get_values(), skipna=skipna, **kwds) + return op(self.array.to_dense(), skipna=skipna, **kwds) def __getstate__(self): # pickling diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 93074f5afa2b3..a916f2f06df21 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -269,7 +269,7 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): # we'll be working with everything as 64-bit values, so handle this # 128-bit value early if np.issubdtype(dtype, np.complex128): - return hash_array(vals.real) + 23 * hash_array(vals.imag) + return hash_array(np.real(vals)) + 23 * hash_array(np.imag(vals)) # First, turn whatever array this is into unsigned 64-bit ints, if we can # manage it. diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index ec42acf987737..7b1e203bd33ad 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -482,7 +482,7 @@ def _workbook_class(self): def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): from openpyxl import load_workbook return load_workbook(filepath_or_buffer, - read_only=True, data_only=True) + read_only=True, data_only=True, keep_links=False) @property def sheet_names(self) -> List[str]: diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 120eb4612fc9b..e1d95862ec872 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -24,9 +24,9 @@ class CSVFormatter: def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, - index_label=None, mode='w', nanRep=None, encoding=None, + index_label=None, mode='w', encoding=None, compression='infer', quoting=None, line_terminator='\n', - chunksize=None, tupleize_cols=False, quotechar='"', + chunksize=None, quotechar='"', date_format=None, doublequote=True, escapechar=None, decimal='.'): @@ -68,9 +68,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.date_format = date_format - self.tupleize_cols = tupleize_cols - self.has_mi_columns = (isinstance(obj.columns, ABCMultiIndex) and - not self.tupleize_cols) + self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex) # validate mi options if self.has_mi_columns: diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 5792f6e2a5a08..66a00bf9ab054 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -402,6 +402,10 @@ def _format_value(self, val): val = '-{inf}'.format(inf=self.inf_rep) elif self.float_format is not None: val = float(self.float_format % val) + if getattr(val, 'tzinfo', None) is not None: + raise ValueError('Excel does not support datetimes with ' + 'timezones. Please ensure that datetimes ' + 'are timezone unaware before writing to Excel.') return val def _format_header_mi(self): diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index c709ff876b3a0..3f98fc235b2c5 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -129,7 +129,7 @@ def _get_footer(self): return str(footer) def _get_formatted_values(self): - return format_array(self.categorical.get_values(), None, + return format_array(self.categorical._internal_get_values(), None, float_format=None, na_rep=self.na_rep) def to_string(self): @@ -1196,7 +1196,7 @@ def _format_strings(self): if is_categorical_dtype(values.dtype): # Categorical is special for now, so that we can preserve tzinfo - array = values.get_values() + array = values._internal_get_values() else: array = np.asarray(values) diff --git a/pandas/io/html.py b/pandas/io/html.py index d54489aabf1ed..f080e1d1fc188 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -912,7 +912,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): def read_html(io, match='.+', flavor=None, header=None, index_col=None, skiprows=None, attrs=None, parse_dates=False, - tupleize_cols=None, thousands=',', encoding=None, + thousands=',', encoding=None, decimal='.', converters=None, na_values=None, keep_default_na=True, displayed_only=True): r"""Read HTML tables into a ``list`` of ``DataFrame`` objects. @@ -976,14 +976,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, parse_dates : bool, optional See :func:`~read_csv` for more details. - tupleize_cols : bool, optional - If ``False`` try to parse multiple header rows into a - :class:`~pandas.MultiIndex`, otherwise return raw tuples. Defaults to - ``False``. - - .. deprecated:: 0.21.0 - This argument will be removed and will always convert to MultiIndex - thousands : str, optional Separator to use to parse thousands. Defaults to ``','``. @@ -1073,7 +1065,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, _validate_header_arg(header) return _parse(flavor=flavor, io=io, match=match, header=header, index_col=index_col, skiprows=skiprows, - parse_dates=parse_dates, tupleize_cols=tupleize_cols, + parse_dates=parse_dates, thousands=thousands, attrs=attrs, encoding=encoding, decimal=decimal, converters=converters, na_values=na_values, keep_default_na=keep_default_na, diff --git a/pandas/io/packers.py b/pandas/io/packers.py index db56f8b9da190..e619bec8ce47d 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -78,8 +78,11 @@ def to_msgpack(path_or_buf, *args, **kwargs): """ msgpack (serialize) object to input file path - THIS IS AN EXPERIMENTAL LIBRARY and the storage format - may not be stable until a future release. + .. deprecated:: 0.25.0 + + to_msgpack is deprecated and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of + pandas objects. Parameters ---------- @@ -92,6 +95,12 @@ def to_msgpack(path_or_buf, *args, **kwargs): compress : type of compressor (zlib or blosc), default to None (no compression) """ + warnings.warn("to_msgpack is deprecated and will be removed in a " + "future version.\n" + "It is recommended to use pyarrow for on-the-wire " + "transmission of pandas objects.", + FutureWarning, stacklevel=3) + global compressor compressor = kwargs.pop('compress', None) append = kwargs.pop('append', None) @@ -121,8 +130,11 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): Load msgpack pandas object from the specified file path - THIS IS AN EXPERIMENTAL LIBRARY and the storage format - may not be stable until a future release. + .. deprecated:: 0.25.0 + + read_msgpack is deprecated and will be removed in a future version. + It is recommended to use pyarrow for on-the-wire transmission of + pandas objects. Parameters ---------- @@ -140,6 +152,12 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): read_msgpack is only guaranteed to be backwards compatible to pandas 0.20.3. """ + warnings.warn("The read_msgpack is deprecated and will be removed in a " + "future version.\n" + "It is recommended to use pyarrow for on-the-wire " + "transmission of pandas objects.", + FutureWarning, stacklevel=3) + path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) @@ -523,16 +541,16 @@ def encode(obj): return {'typ': 'np_scalar', 'sub_typ': 'np_complex', 'dtype': obj.dtype.name, - 'real': obj.real.__repr__(), - 'imag': obj.imag.__repr__()} + 'real': np.real(obj).__repr__(), + 'imag': np.imag(obj).__repr__()} else: return {'typ': 'np_scalar', 'dtype': obj.dtype.name, 'data': obj.__repr__()} elif isinstance(obj, complex): return {'typ': 'np_complex', - 'real': obj.real.__repr__(), - 'imag': obj.imag.__repr__()} + 'real': np.real(obj).__repr__(), + 'imag': np.imag(obj).__repr__()} return obj diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3b16544e72233..73d47af5922f7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -9,6 +9,7 @@ import re import sys from textwrap import fill +from typing import Any, Dict, Set import warnings import numpy as np @@ -293,13 +294,6 @@ `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to override values, a ParserWarning will be issued. See csv.Dialect documentation for more details. -tupleize_cols : bool, default False - Leave a list of tuples on columns as is (default is to convert to - a MultiIndex on the columns). - - .. deprecated:: 0.21.0 - This argument will be removed and will always convert to MultiIndex - error_bad_lines : bool, default True Lines with too many fields (e.g. a csv line with too many commas) will by default cause an exception to be raised, and no DataFrame will be returned. @@ -501,7 +495,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): 'squeeze': False, 'compression': None, 'mangle_dupe_cols': True, - 'tupleize_cols': False, 'infer_datetime_format': False, 'skip_blank_lines': True } @@ -514,7 +507,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): 'memory_map': False, 'error_bad_lines': True, 'warn_bad_lines': True, - 'tupleize_cols': False, 'float_precision': None } @@ -530,24 +522,14 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): 'float_precision', } -_deprecated_defaults = { - 'tupleize_cols': None -} -_deprecated_args = { - 'tupleize_cols', -} +_deprecated_defaults = {} # type: Dict[str, Any] +_deprecated_args = set() # type: Set[str] def _make_parser_function(name, default_sep=','): - # prepare read_table deprecation - if name == "read_table": - sep = False - else: - sep = default_sep - def parser_f(filepath_or_buffer: FilePathOrBuffer, - sep=sep, + sep=default_sep, delimiter=None, # Column and Index Locations and Names @@ -601,7 +583,6 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer, comment=None, encoding=None, dialect=None, - tupleize_cols=None, # Error Handling error_bad_lines=True, @@ -613,19 +594,6 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer, memory_map=False, float_precision=None): - # deprecate read_table GH21948 - if name == "read_table": - if sep is False and delimiter is None: - warnings.warn("read_table is deprecated, use read_csv " - "instead, passing sep='\\t'.", - FutureWarning, stacklevel=2) - else: - warnings.warn("read_table is deprecated, use read_csv " - "instead.", - FutureWarning, stacklevel=2) - if sep is False: - sep = default_sep - # gh-23761 # # When a dialect is passed, it overrides any of the overlapping @@ -710,7 +678,6 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer, error_bad_lines=error_bad_lines, low_memory=low_memory, mangle_dupe_cols=mangle_dupe_cols, - tupleize_cols=tupleize_cols, infer_datetime_format=infer_datetime_format, skip_blank_lines=skip_blank_lines) @@ -732,10 +699,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer, read_table = _make_parser_function('read_table', default_sep='\t') read_table = Appender(_doc_read_csv_and_table.format( func_name='read_table', - summary="""Read general delimited file into DataFrame. - -.. deprecated:: 0.24.0 - Use :func:`pandas.read_csv` instead, passing ``sep='\\t'`` if necessary.""", + summary='Read general delimited file into DataFrame.', _default_sep=r"'\\t' (tab-stop)") )(read_table) @@ -1074,10 +1038,6 @@ def _clean_options(self, options, engine): "and will be removed in a future version." .format(arg=arg)) - if arg == 'tupleize_cols': - msg += (' Column tuples will then ' - 'always be converted to MultiIndex.') - if result.get(arg, depr_default) != depr_default: # raise Exception(result.get(arg, depr_default), depr_default) depr_warning += msg + '\n\n' @@ -1384,7 +1344,6 @@ def __init__(self, kwds): self.true_values = kwds.get('true_values') self.false_values = kwds.get('false_values') - self.tupleize_cols = kwds.get('tupleize_cols', False) self.mangle_dupe_cols = kwds.get('mangle_dupe_cols', True) self.infer_datetime_format = kwds.pop('infer_datetime_format', False) self.cache_dates = kwds.pop('cache_dates', True) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c8c27f62cef34..f439e365fbcf0 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -23,7 +23,8 @@ from pandas.core.dtypes.common import ( ensure_object, is_categorical_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_list_like, is_timedelta64_dtype) + is_datetime64tz_dtype, is_extension_type, is_list_like, + is_timedelta64_dtype) from pandas.core.dtypes.missing import array_equivalent from pandas import ( @@ -2647,6 +2648,9 @@ def write_multi_index(self, key, index): index.codes, index.names)): # write the level + if is_extension_type(lev): + raise NotImplementedError("Saving a MultiIndex with an " + "extension dtype is not supported.") level_key = '{key}_level{idx}'.format(key=key, idx=i) conv_level = _convert_index(lev, self.encoding, self.errors, self.format_type).set_name(level_key) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 7f33d7fb102b0..368298537a1df 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -131,13 +131,3 @@ def test_testing(self): from pandas import testing self.check(testing, self.funcs) - - -class TestCDateRange: - - def test_deprecation_cdaterange(self): - # GH17596 - from pandas.core.indexes.datetimes import cdate_range - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - cdate_range('2017-01-01', '2017-12-31') diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 15e4bbab8f649..d2f63268e5a12 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -91,12 +91,7 @@ def test_rename_categories(self): def test_rename_categories_series(self): # https://github.com/pandas-dev/pandas/issues/17981 c = Categorical(['a', 'b']) - xpr = "Treating Series 'new_categories' as a list-like " - with tm.assert_produces_warning(FutureWarning) as rec: - result = c.rename_categories(Series([0, 1])) - - assert len(rec) == 1 - assert xpr in str(rec[0].message) + result = c.rename_categories(Series([0, 1], index=['a', 'b'])) expected = Categorical([0, 1]) tm.assert_categorical_equal(result, expected) @@ -248,7 +243,7 @@ def test_set_categories(self): tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) - tm.assert_numpy_array_equal(c.get_values(), exp) + tm.assert_numpy_array_equal(c.to_dense(), exp) # all "pointers" to '4' must be changed from 3 to 0,... c = c.set_categories([4, 3, 2, 1]) @@ -262,7 +257,7 @@ def test_set_categories(self): # output is the same exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) - tm.assert_numpy_array_equal(c.get_values(), exp) + tm.assert_numpy_array_equal(c.to_dense(), exp) assert c.min() == 4 assert c.max() == 1 @@ -270,13 +265,13 @@ def test_set_categories(self): c2 = c.set_categories([4, 3, 2, 1], ordered=False) assert not c2.ordered - tm.assert_numpy_array_equal(c.get_values(), c2.get_values()) + tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense()) # set_categories should pass thru the ordering c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) assert not c2.ordered - tm.assert_numpy_array_equal(c.get_values(), c2.get_values()) + tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense()) @pytest.mark.parametrize('values, categories, new_categories', [ # No NaNs, same cats, same order @@ -383,7 +378,7 @@ def test_remove_unused_categories(self): tm.assert_index_equal(out.categories, Index(['B', 'D', 'F'])) exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) tm.assert_numpy_array_equal(out.codes, exp_codes) - assert out.get_values().tolist() == val + assert out.tolist() == val alpha = list('abcdefghijklmnopqrstuvwxyz') val = np.random.choice(alpha[::2], 10000).astype('object') @@ -391,7 +386,7 @@ def test_remove_unused_categories(self): cat = Categorical(values=val, categories=alpha) out = cat.remove_unused_categories() - assert out.get_values().tolist() == val.tolist() + assert out.tolist() == val.tolist() class TestCategoricalAPIWithFactor(TestCategorical): @@ -504,3 +499,9 @@ def test_recode_to_categories_large(self): new = Index(expected) result = _recode_for_categories(codes, old, new) tm.assert_numpy_array_equal(result, expected) + + def test_deprecated_get_values(self): + cat = Categorical(["a", "b", "c", "a"]) + with tm.assert_produces_warning(FutureWarning): + res = cat.get_values() + tm.assert_numpy_array_equal(res, np.array(cat)) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index fbf86f66e437f..8a51704732d7f 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -615,16 +615,19 @@ def test_shape(self, data, shape, dtype): [1, np.nan, np.nan, 3, np.nan], [1, np.nan, 0, 3, 0], ]) - @pytest.mark.parametrize("method", ["to_dense", "get_values"]) @pytest.mark.parametrize("fill_value", [None, 0]) - def test_dense_repr(self, vals, fill_value, method): + def test_dense_repr(self, vals, fill_value): vals = np.array(vals) arr = SparseArray(vals, fill_value=fill_value) - dense_func = getattr(arr, method) - res = dense_func() + res = arr.to_dense() tm.assert_numpy_array_equal(res, vals) + with tm.assert_produces_warning(FutureWarning): + res2 = arr.get_values() + + tm.assert_numpy_array_equal(res2, vals) + def test_getitem(self): def _checkit(i): assert_almost_equal(self.arr[i], self.arr.to_dense()[i]) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index ecef835a9c797..4625c79e1bc3d 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -397,6 +397,6 @@ def _formatting_values(self): ser = pd.Series(DecimalArray2([decimal.Decimal('1.0')])) - with tm.assert_produces_warning(DeprecationWarning, + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): repr(ser) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 303604ba7d7ea..e7b4c2c65b842 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -668,24 +668,20 @@ def test_rename_axis_inplace(self, float_frame): assert no_return is None tm.assert_frame_equal(result, expected) - def test_rename_axis_warns(self): + def test_rename_axis_raises(self): # https://github.com/pandas-dev/pandas/issues/17833 df = DataFrame({"A": [1, 2], "B": [1, 2]}) - with tm.assert_produces_warning(FutureWarning) as w: + with pytest.raises(ValueError, match="Use `.rename`"): df.rename_axis(id, axis=0) - assert 'rename' in str(w[0].message) - with tm.assert_produces_warning(FutureWarning) as w: + with pytest.raises(ValueError, match="Use `.rename`"): df.rename_axis({0: 10, 1: 20}, axis=0) - assert 'rename' in str(w[0].message) - with tm.assert_produces_warning(FutureWarning) as w: + with pytest.raises(ValueError, match="Use `.rename`"): df.rename_axis(id, axis=1) - assert 'rename' in str(w[0].message) - with tm.assert_produces_warning(FutureWarning) as w: + with pytest.raises(ValueError, match="Use `.rename`"): df['A'].rename_axis(id) - assert 'rename' in str(w[0].message) def test_rename_axis_mapper(self): # GH 19978 diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index ce841b302a037..ed224e23fbe20 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -547,3 +547,9 @@ def test_tab_complete_warning(self, ip): with tm.assert_produces_warning(None): with provisionalcompleter('ignore'): list(ip.Completer.completions('df.', 1)) + + def test_get_values_deprecated(self): + df = DataFrame({'a': [1, 2], 'b': [.1, .2]}) + with tm.assert_produces_warning(FutureWarning): + res = df.get_values() + tm.assert_numpy_array_equal(res, df.values) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index b4fde43ff3055..18c95beb62a13 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -416,17 +416,6 @@ def test_reindex_fill_value(self): expected[4] = 'foo' assert_frame_equal(result, expected) - # reindex_axis - with tm.assert_produces_warning(FutureWarning): - result = df.reindex_axis(range(15), fill_value=0., axis=0) - expected = df.reindex(range(15)).fillna(0) - assert_frame_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result = df.reindex_axis(range(5), fill_value=0., axis=1) - expected = df.reindex(columns=range(5)).fillna(0) - assert_frame_equal(result, expected) - # other dtypes df['foo'] = 'foo' result = df.reindex(range(15), fill_value=0) @@ -1026,33 +1015,6 @@ def test_reindex_corner(self, int_frame): smaller = int_frame.reindex(columns=['A', 'B', 'E']) assert smaller['E'].dtype == np.float64 - def test_reindex_axis(self, float_frame, int_frame): - cols = ['A', 'B', 'E'] - with tm.assert_produces_warning(FutureWarning) as m: - reindexed1 = int_frame.reindex_axis(cols, axis=1) - assert 'reindex' in str(m[0].message) - reindexed2 = int_frame.reindex(columns=cols) - assert_frame_equal(reindexed1, reindexed2) - - rows = int_frame.index[0:5] - with tm.assert_produces_warning(FutureWarning) as m: - reindexed1 = int_frame.reindex_axis(rows, axis=0) - assert 'reindex' in str(m[0].message) - reindexed2 = int_frame.reindex(index=rows) - assert_frame_equal(reindexed1, reindexed2) - - msg = ("No axis named 2 for object type" - " ") - with pytest.raises(ValueError, match=msg): - int_frame.reindex_axis(rows, axis=2) - - # no-op case - cols = float_frame.columns.copy() - with tm.assert_produces_warning(FutureWarning) as m: - newFrame = float_frame.reindex_axis(cols, axis=1) - assert 'reindex' in str(m[0].message) - assert_frame_equal(newFrame, float_frame) - def test_reindex_with_nans(self): df = DataFrame([[1, 2], [3, 4], [np.nan, np.nan], [7, 8], [9, 10]], columns=['a', 'b'], diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 3b8daa28227f8..4c1abfb1a7f6f 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -76,8 +76,9 @@ def test_get_none(self, df): # see gh-5652 assert df.get(None) is None - def test_loc_iterable(self, float_frame): - idx = iter(['A', 'B', 'C']) + @pytest.mark.parametrize('key_type', [iter, np.array, Series, Index]) + def test_loc_iterable(self, float_frame, key_type): + idx = key_type(['A', 'B', 'C']) result = float_frame.loc[:, idx] expected = float_frame.loc[:, ['A', 'B', 'C']] assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index a3b9e529431e5..ac8d1557a4c43 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -804,7 +804,7 @@ def _test_stack_with_multiindex(multiindex): else: assert_frame_equal(result, expected) - df.columns = MultiIndex.from_tuples(df.columns.get_values(), + df.columns = MultiIndex.from_tuples(df.columns.to_numpy(), names=df.columns.names) expected = df.stack(level=level, dropna=False) if isinstance(expected, Series): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 354826a4b3e7b..cfe9e00a47db5 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -33,15 +33,6 @@ def read_csv(self, path, **kwargs): return pd.read_csv(path, **params) - def test_from_csv_deprecation(self): - # see gh-17812 - with ensure_clean('__tmp_from_csv_deprecation__') as path: - self.tsframe.to_csv(path) - - with tm.assert_produces_warning(FutureWarning): - depr_recons = DataFrame.from_csv(path) - assert_frame_equal(self.tsframe, depr_recons) - def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: @@ -582,19 +573,6 @@ def _make_frame(names=None): result.columns.names = df.columns.names assert_frame_equal(df, result) - # tupleize_cols=True and index=False - df = _make_frame(True) - with tm.assert_produces_warning(FutureWarning): - df.to_csv(path, tupleize_cols=True, index=False) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = read_csv(path, header=0, - tupleize_cols=True, - index_col=None) - result.columns = df.columns - assert_frame_equal(df, result) - # whatsnew example df = _make_frame() df.to_csv(path) @@ -608,18 +586,6 @@ def _make_frame(names=None): index_col=[0]) assert_frame_equal(df, result) - # column & index are multi-index (compatibility) - df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) - with tm.assert_produces_warning(FutureWarning): - df.to_csv(path, tupleize_cols=True) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = read_csv(path, header=0, index_col=[0, 1], - tupleize_cols=True) - result.columns = df.columns - assert_frame_equal(df, result) - # invalid options df = _make_frame(True) df.to_csv(path) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 088007ba6af4b..af0183379790a 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1094,6 +1094,18 @@ def test_dti_union_aware(self): assert result[0].tz.zone == 'US/Central' assert result[-1].tz.zone == 'US/Eastern' + def test_dti_union_mixed(self): + # GH 21671 + rng = DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT]) + rng2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'], tz='Asia/Tokyo') + result = rng.union(rng2) + expected = Index([pd.Timestamp('2011-01-01'), + pd.NaT, + pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), + pd.Timestamp('2012-01-02', tz='Asia/Tokyo')], + dtype=object) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('tz', [None, 'UTC', "US/Central", dateutil.tz.tzoffset(None, -28800)]) @pytest.mark.usefixtures("datetime_tz_utc") diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 5ac73a3c5b940..f886d78da6da2 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -20,7 +20,7 @@ def test_shift(idx): def test_groupby(idx): groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) - labels = idx.get_values().tolist() + labels = idx.tolist() exp = {1: labels[:3], 2: labels[3:]} tm.assert_dict_equal(groups, exp) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index 8315478d85125..8413fc1318d0b 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -8,7 +8,8 @@ def test_dtype_str(indices): - dtype = indices.dtype_str + with tm.assert_produces_warning(FutureWarning): + dtype = indices.dtype_str assert isinstance(dtype, str) assert dtype == str(indices.dtype) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 2f3f15101e7ca..b33982f3d62f3 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -132,12 +132,14 @@ def test_shallow_copy_changing_freq_raises(self): def test_dtype_str(self): pi = pd.PeriodIndex([], freq='M') - assert pi.dtype_str == 'period[M]' - assert pi.dtype_str == str(pi.dtype) + with tm.assert_produces_warning(FutureWarning): + assert pi.dtype_str == 'period[M]' + assert pi.dtype_str == str(pi.dtype) - pi = pd.PeriodIndex([], freq='3M') - assert pi.dtype_str == 'period[3M]' - assert pi.dtype_str == str(pi.dtype) + with tm.assert_produces_warning(FutureWarning): + pi = pd.PeriodIndex([], freq='3M') + assert pi.dtype_str == 'period[3M]' + assert pi.dtype_str == str(pi.dtype) def test_view_asi8(self): idx = pd.PeriodIndex([], freq='M') @@ -162,7 +164,9 @@ def test_values(self): exp = np.array([], dtype=np.object) tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + with tm.assert_produces_warning(FutureWarning): + tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) @@ -170,7 +174,7 @@ def test_values(self): exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) @@ -179,7 +183,7 @@ def test_values(self): exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 1de20dc765655..c618b9b05a942 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1439,9 +1439,12 @@ def test_get_indexer_strings_raises(self): index.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=[2, 2, 2, 2]) - def test_get_indexer_numeric_index_boolean_target(self): + @pytest.mark.parametrize("idx_class", [Int64Index, RangeIndex, + Float64Index]) + def test_get_indexer_numeric_index_boolean_target(self, idx_class): # GH 16877 - numeric_index = pd.Index(range(4)) + + numeric_index = idx_class(RangeIndex((4))) result = numeric_index.get_indexer([True, False, True]) expected = np.array([-1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 451fb2ed7906d..3cb907c6f5844 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -155,9 +155,10 @@ def test_set_name_methods(self, indices): assert indices.names == [name] def test_dtype_str(self, indices): - dtype = indices.dtype_str - assert isinstance(dtype, str) - assert dtype == str(indices.dtype) + with tm.assert_produces_warning(FutureWarning): + dtype = indices.dtype_str + assert isinstance(dtype, str) + assert dtype == str(indices.dtype) def test_hash_error(self, indices): index = indices diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 3f474b0166b15..e9fe1278d7827 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.core.dtypes.common import ensure_platform_int + import pandas as pd from pandas import Float64Index, Index, Int64Index, RangeIndex, Series import pandas.util.testing as tm @@ -171,12 +173,12 @@ def test_start_stop_step_attrs(self, index, start, stop, step): assert index.stop == stop assert index.step == step - def test_deprecated_start_stop_step_attrs(self): + @pytest.mark.parametrize('attr_name', ['_start', '_stop', '_step']) + def test_deprecated_start_stop_step_attrs(self, attr_name): # GH 26581 idx = self.create_index() - for attr_name in ['_start', '_stop', '_step']: - with tm.assert_produces_warning(DeprecationWarning): - getattr(idx, attr_name) + with tm.assert_produces_warning(DeprecationWarning): + getattr(idx, attr_name) def test_copy(self): i = RangeIndex(5, name='Foo') @@ -965,3 +967,23 @@ def test_append(self, appends): # Append single item rather than list result2 = indices[0].append(indices[1]) tm.assert_index_equal(result2, expected, exact=True) + + def test_engineless_lookup(self): + # GH 16685 + # Standard lookup on RangeIndex should not require the engine to be + # created + idx = RangeIndex(2, 10, 3) + + assert idx.get_loc(5) == 1 + tm.assert_numpy_array_equal(idx.get_indexer([2, 8]), + ensure_platform_int(np.array([0, 2]))) + with pytest.raises(KeyError): + idx.get_loc(3) + + assert '_engine' not in idx._cache + + # The engine is still required for lookup of a different dtype scalar: + with pytest.raises(KeyError): + assert idx.get_loc('a') == -1 + + assert '_engine' in idx._cache diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 3394c4c06d45a..2431f27bff78a 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -21,7 +21,7 @@ def test_per_axis_per_level_getitem(self): # example test case ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( 'C', 4), _mklbl('D', 2)]) - df = DataFrame(np.arange(len(ix.get_values())), index=ix) + df = DataFrame(np.arange(len(ix.to_numpy())), index=ix) result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] expected = df.loc[[tuple([a, b, c, d]) @@ -88,7 +88,7 @@ def test_per_axis_per_level_getitem(self): tm.assert_frame_equal(result, expected) # multi-level series - s = Series(np.arange(len(ix.get_values())), index=ix) + s = Series(np.arange(len(ix.to_numpy())), index=ix) result = s.loc['A1':'A3', :, ['C1', 'C3']] expected = s.loc[[tuple([a, b, c, d]) for a, b, c, d in s.index.values diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b997e2b6eec8f..697c0b5280589 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -287,7 +287,7 @@ def test_delete(self): def test_make_block_same_class(self): # issue 19431 block = create_block('M8[ns, US/Eastern]', [3]) - with tm.assert_produces_warning(DeprecationWarning, + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): block.make_block_same_class(block.values, dtype=block.values.dtype) @@ -1254,7 +1254,7 @@ def test_holder(typestr, holder): def test_deprecated_fastpath(): # GH#19265 values = np.random.rand(3, 3) - with tm.assert_produces_warning(DeprecationWarning, + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): make_block(values, placement=np.arange(3), fastpath=True) diff --git a/pandas/tests/io/data/test4.xls b/pandas/tests/io/data/test4.xls index 0e6f4331e2547..10a6ab1cca6a4 100644 Binary files a/pandas/tests/io/data/test4.xls and b/pandas/tests/io/data/test4.xls differ diff --git a/pandas/tests/io/data/test4.xlsm b/pandas/tests/io/data/test4.xlsm index 52328c7b28be9..a5a2ff8caadd6 100644 Binary files a/pandas/tests/io/data/test4.xlsm and b/pandas/tests/io/data/test4.xlsm differ diff --git a/pandas/tests/io/data/test4.xlsx b/pandas/tests/io/data/test4.xlsx index 441db5e55e666..6fb62272caf09 100644 Binary files a/pandas/tests/io/data/test4.xlsx and b/pandas/tests/io/data/test4.xlsx differ diff --git a/pandas/tests/io/data/test5.xls b/pandas/tests/io/data/test5.xls index 4bb7cd4767dd7..f484f54a60c61 100644 Binary files a/pandas/tests/io/data/test5.xls and b/pandas/tests/io/data/test5.xls differ diff --git a/pandas/tests/io/data/test5.xlsm b/pandas/tests/io/data/test5.xlsm index 845cec785b498..7b8d489f24321 100644 Binary files a/pandas/tests/io/data/test5.xlsm and b/pandas/tests/io/data/test5.xlsm differ diff --git a/pandas/tests/io/data/test5.xlsx b/pandas/tests/io/data/test5.xlsx index 13781bb06048f..8660913c74812 100644 Binary files a/pandas/tests/io/data/test5.xlsx and b/pandas/tests/io/data/test5.xlsx differ diff --git a/pandas/tests/io/data/test_multisheet.xls b/pandas/tests/io/data/test_multisheet.xls index 7b4b9759a1a94..bb6cb35e34a57 100644 Binary files a/pandas/tests/io/data/test_multisheet.xls and b/pandas/tests/io/data/test_multisheet.xls differ diff --git a/pandas/tests/io/data/test_multisheet.xlsm b/pandas/tests/io/data/test_multisheet.xlsm index c6191bc61bc49..eaee98a801cf0 100644 Binary files a/pandas/tests/io/data/test_multisheet.xlsm and b/pandas/tests/io/data/test_multisheet.xlsm differ diff --git a/pandas/tests/io/data/test_multisheet.xlsx b/pandas/tests/io/data/test_multisheet.xlsx index dc424a9963253..c22771232961f 100644 Binary files a/pandas/tests/io/data/test_multisheet.xlsx and b/pandas/tests/io/data/test_multisheet.xlsx differ diff --git a/pandas/tests/io/data/test_squeeze.xls b/pandas/tests/io/data/test_squeeze.xls index 7261f4df13f08..2524b975bdce6 100644 Binary files a/pandas/tests/io/data/test_squeeze.xls and b/pandas/tests/io/data/test_squeeze.xls differ diff --git a/pandas/tests/io/data/test_squeeze.xlsm b/pandas/tests/io/data/test_squeeze.xlsm index d7fabe802ff52..fb19b5a40efb7 100644 Binary files a/pandas/tests/io/data/test_squeeze.xlsm and b/pandas/tests/io/data/test_squeeze.xlsm differ diff --git a/pandas/tests/io/data/test_squeeze.xlsx b/pandas/tests/io/data/test_squeeze.xlsx index 89fc590cebcc7..84141fc80ef11 100644 Binary files a/pandas/tests/io/data/test_squeeze.xlsx and b/pandas/tests/io/data/test_squeeze.xlsx differ diff --git a/pandas/tests/io/data/test_types.xlsm b/pandas/tests/io/data/test_types.xlsm index c66fdc82dfb67..4c8c10e40effe 100644 Binary files a/pandas/tests/io/data/test_types.xlsm and b/pandas/tests/io/data/test_types.xlsm differ diff --git a/pandas/tests/io/data/test_types.xlsx b/pandas/tests/io/data/test_types.xlsx index ef749e04ff3b5..561bb2c5d6714 100644 Binary files a/pandas/tests/io/data/test_types.xlsx and b/pandas/tests/io/data/test_types.xlsx differ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 579f39e21d3c1..be5951fe12b46 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -15,7 +15,6 @@ import pandas.util.testing as tm from pandas.io.common import URLError -from pandas.io.excel import ExcelFile @contextlib.contextmanager @@ -32,24 +31,29 @@ def ignore_xlrd_time_clock_warning(): yield +@pytest.fixture(params=[ + # Add any engines to test here + pytest.param('xlrd', marks=td.skip_if_no('xlrd')), + pytest.param('openpyxl', marks=td.skip_if_no('openpyxl')), + pytest.param(None, marks=td.skip_if_no('xlrd')), +]) +def engine(request): + """ + A fixture for Excel reader engines. + """ + return request.param + + class TestReaders: - @pytest.fixture(autouse=True, params=[ - # Add any engines to test here - pytest.param('xlrd', marks=pytest.mark.skipif( - not td.safe_import("xlrd"), reason="no xlrd")), - pytest.param('openpyxl', marks=pytest.mark.skipif( - not td.safe_import("openpyxl"), reason="no openpyxl")), - pytest.param(None, marks=pytest.mark.skipif( - not td.safe_import("xlrd"), reason="no xlrd")), - ]) - def cd_and_set_engine(self, request, datapath, monkeypatch, read_ext): + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext): """ Change directory and set engine for read_excel calls. """ - if request.param == 'openpyxl' and read_ext == '.xls': + if engine == 'openpyxl' and read_ext == '.xls': pytest.skip() - func = partial(pd.read_excel, engine=request.param) + func = partial(pd.read_excel, engine=engine) monkeypatch.chdir(datapath("io", "data")) monkeypatch.setattr(pd, 'read_excel', func) @@ -727,50 +731,45 @@ def test_read_excel_squeeze(self, read_ext): class TestExcelFileRead: - @pytest.fixture(autouse=True, params=[ - # Add any engines to test here - pytest.param('xlrd', marks=pytest.mark.skipif( - not td.safe_import("xlrd"), reason="no xlrd")), - pytest.param('openpyxl', marks=pytest.mark.skipif( - not td.safe_import("openpyxl"), reason="no openpyxl")), - pytest.param(None, marks=pytest.mark.skipif( - not td.safe_import("xlrd"), reason="no xlrd")), - ]) - def cd_and_set_engine(self, request, datapath, monkeypatch): + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext): """ Change directory and set engine for ExcelFile objects. """ - func = partial(pd.ExcelFile, engine=request.param) + if engine == 'openpyxl' and read_ext == '.xls': + pytest.skip() + + func = partial(pd.ExcelFile, engine=engine) monkeypatch.chdir(datapath("io", "data")) monkeypatch.setattr(pd, 'ExcelFile', func) def test_excel_passes_na(self, read_ext): - excel = ExcelFile('test4' + read_ext) - - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, - na_values=['apple']) + with pd.ExcelFile('test4' + read_ext) as excel: + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, + na_values=['apple']) expected = DataFrame([['NA'], [1], ['NA'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, - na_values=['apple']) + with pd.ExcelFile('test4' + read_ext) as excel: + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, + na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) # 13967 - excel = ExcelFile('test5' + read_ext) - - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, - na_values=['apple']) + with pd.ExcelFile('test5' + read_ext) as excel: + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=False, + na_values=['apple']) expected = DataFrame([['1.#QNAN'], [1], ['nan'], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) - parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, - na_values=['apple']) + with pd.ExcelFile('test5' + read_ext) as excel: + parsed = pd.read_excel(excel, 'Sheet1', keep_default_na=True, + na_values=['apple']) expected = DataFrame([[np.nan], [1], [np.nan], [np.nan], ['rabbit']], columns=['Test']) tm.assert_frame_equal(parsed, expected) @@ -778,79 +777,78 @@ def test_excel_passes_na(self, read_ext): @pytest.mark.parametrize('arg', ['sheet', 'sheetname', 'parse_cols']) def test_unexpected_kwargs_raises(self, read_ext, arg): # gh-17964 - excel = ExcelFile('test1' + read_ext) - kwarg = {arg: 'Sheet1'} msg = "unexpected keyword argument `{}`".format(arg) - with pytest.raises(TypeError, match=msg): - pd.read_excel(excel, **kwarg) - def test_excel_table_sheet_by_index(self, read_ext, df_ref): + with pd.ExcelFile('test1' + read_ext) as excel: + with pytest.raises(TypeError, match=msg): + pd.read_excel(excel, **kwarg) - excel = ExcelFile('test1' + read_ext) + def test_excel_table_sheet_by_index(self, read_ext, df_ref): - df1 = pd.read_excel(excel, 0, index_col=0) - df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) + with pd.ExcelFile('test1' + read_ext) as excel: + df1 = pd.read_excel(excel, 0, index_col=0) + df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) - df1 = excel.parse(0, index_col=0) - df2 = excel.parse(1, skiprows=[1], index_col=0) + with pd.ExcelFile('test1' + read_ext) as excel: + df1 = excel.parse(0, index_col=0) + df2 = excel.parse(1, skiprows=[1], index_col=0) tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) - df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) + with pd.ExcelFile('test1' + read_ext) as excel: + df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1) + with pd.ExcelFile('test1' + read_ext) as excel: + df4 = pd.read_excel(excel, 0, index_col=0, skip_footer=1) + tm.assert_frame_equal(df3, df4) - df3 = excel.parse(0, index_col=0, skipfooter=1) - tm.assert_frame_equal(df3, df1.iloc[:-1]) + with pd.ExcelFile('test1' + read_ext) as excel: + df3 = excel.parse(0, index_col=0, skipfooter=1) - import xlrd # will move to engine-specific tests as new ones are added - with pytest.raises(xlrd.XLRDError): - pd.read_excel(excel, 'asdf') + tm.assert_frame_equal(df3, df1.iloc[:-1]) def test_sheet_name(self, read_ext, df_ref): filename = "test1" sheet_name = "Sheet1" - excel = ExcelFile(filename + read_ext) - df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc - df2_parse = excel.parse(index_col=0, - sheet_name=sheet_name) + with pd.ExcelFile(filename + read_ext) as excel: + df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc + + with pd.ExcelFile(filename + read_ext) as excel: + df2_parse = excel.parse(index_col=0, + sheet_name=sheet_name) tm.assert_frame_equal(df1_parse, df_ref, check_names=False) tm.assert_frame_equal(df2_parse, df_ref, check_names=False) - def test_excel_read_buffer(self, read_ext): - + def test_excel_read_buffer(self, engine, read_ext): pth = 'test1' + read_ext - expected = pd.read_excel(pth, 'Sheet1', index_col=0) + expected = pd.read_excel(pth, 'Sheet1', index_col=0, engine=engine) with open(pth, 'rb') as f: - xls = ExcelFile(f) - actual = pd.read_excel(xls, 'Sheet1', index_col=0) - tm.assert_frame_equal(expected, actual) + with pd.ExcelFile(f) as xls: + actual = pd.read_excel(xls, 'Sheet1', index_col=0) - def test_reader_closes_file(self, read_ext): + tm.assert_frame_equal(expected, actual) + def test_reader_closes_file(self, engine, read_ext): f = open('test1' + read_ext, 'rb') - with ExcelFile(f) as xlsx: + with pd.ExcelFile(f) as xlsx: # parses okay - pd.read_excel(xlsx, 'Sheet1', index_col=0) + pd.read_excel(xlsx, 'Sheet1', index_col=0, engine=engine) assert f.closed - @pytest.mark.parametrize('excel_engine', [ - 'xlrd', - None - ]) - def test_read_excel_engine_value(self, read_ext, excel_engine): + def test_conflicting_excel_engines(self, read_ext): # GH 26566 - xl = ExcelFile("test1" + read_ext, engine=excel_engine) msg = "Engine should not be specified when passing an ExcelFile" - with pytest.raises(ValueError, match=msg): - pd.read_excel(xl, engine='openpyxl') + + with pd.ExcelFile("test1" + read_ext) as xl: + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, engine='foo') diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index a4fdcdf70a3ea..d65bebe16804c 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -224,7 +224,7 @@ def test_read_excel_parse_dates(self, ext): class _WriterBase: @pytest.fixture(autouse=True) - def set_engine_and_path(self, request, engine, ext): + def set_engine_and_path(self, engine, ext): """Fixture to set engine and open file for use in each test case Rather than requiring `engine=...` to be provided explicitly as an @@ -250,15 +250,12 @@ class and any subclasses, on account of the `autouse=True` set_option(option_name, prev_engine) # Roll back option change +@td.skip_if_no('xlrd') @pytest.mark.parametrize("engine,ext", [ - pytest.param('openpyxl', '.xlsx', marks=pytest.mark.skipif( - not td.safe_import('openpyxl'), reason='No openpyxl')), - pytest.param('openpyxl', '.xlsm', marks=pytest.mark.skipif( - not td.safe_import('openpyxl'), reason='No openpyxl')), - pytest.param('xlwt', '.xls', marks=pytest.mark.skipif( - not td.safe_import('xlwt'), reason='No xlwt')), - pytest.param('xlsxwriter', '.xlsx', marks=pytest.mark.skipif( - not td.safe_import('xlsxwriter'), reason='No xlsxwriter')) + pytest.param('openpyxl', '.xlsx', marks=td.skip_if_no('openpyxl')), + pytest.param('openpyxl', '.xlsm', marks=td.skip_if_no('openpyxl')), + pytest.param('xlwt', '.xls', marks=td.skip_if_no('xlwt')), + pytest.param('xlsxwriter', '.xlsx', marks=td.skip_if_no('xlsxwriter')) ]) class TestExcelWriter(_WriterBase): # Base class for test cases to run with different Excel writers. @@ -1178,16 +1175,29 @@ def test_merged_cell_custom_objects(self, engine, merge_cells, ext): expected.index = expected.index.astype(np.float64) tm.assert_frame_equal(expected, result) + @pytest.mark.parametrize('dtype', [None, object]) + def test_raise_when_saving_timezones(self, engine, ext, dtype, + tz_aware_fixture): + # GH 27008, GH 7056 + tz = tz_aware_fixture + data = pd.Timestamp('2019', tz=tz) + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(self.path) + + data = data.to_pydatetime() + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(self.path) + class TestExcelWriterEngineTests: @pytest.mark.parametrize('klass,ext', [ - pytest.param(_XlsxWriter, '.xlsx', marks=pytest.mark.skipif( - not td.safe_import('xlsxwriter'), reason='No xlsxwriter')), - pytest.param(_OpenpyxlWriter, '.xlsx', marks=pytest.mark.skipif( - not td.safe_import('openpyxl'), reason='No openpyxl')), - pytest.param(_XlwtWriter, '.xls', marks=pytest.mark.skipif( - not td.safe_import('xlwt'), reason='No xlwt')) + pytest.param(_XlsxWriter, '.xlsx', marks=td.skip_if_no('xlsxwriter')), + pytest.param( + _OpenpyxlWriter, '.xlsx', marks=td.skip_if_no('openpyxl')), + pytest.param(_XlwtWriter, '.xls', marks=td.skip_if_no('xlwt')) ]) def test_ExcelWriter_dispatch(self, klass, ext): with ensure_clean(ext) as path: @@ -1237,6 +1247,7 @@ def check_called(func): 'something.xls', engine='dummy')) +@td.skip_if_no('xlrd') @td.skip_if_no('openpyxl') @pytest.mark.skipif(not PY36, reason='requires fspath') class TestFSPath: diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index b9fc9305a4033..94e1435d4dfab 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -27,3 +27,11 @@ def test_read_xlrd_book(read_ext, frame): result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0) tm.assert_frame_equal(df, result) + + +# TODO: test for openpyxl as well +def test_excel_table_sheet_by_index(datapath, read_ext): + path = datapath("io", "data", 'test1{}'.format(read_ext)) + with pd.ExcelFile(path) as excel: + with pytest.raises(xlrd.XLRDError): + pd.read_excel(excel, 'asdf') diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 28ea90f005f3f..c74e57627d679 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -1917,16 +1917,14 @@ def test_read_csv_memory_growth_chunksize(all_parsers): pass -def test_read_table_deprecated(all_parsers): +def test_read_table_equivalency_to_read_csv(all_parsers): # see gh-21948 + # As of 0.25.0, read_table is undeprecated parser = all_parsers data = "a\tb\n1\t2\n3\t4" expected = parser.read_csv(StringIO(data), sep="\t") - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = parser.read_table(StringIO(data)) - tm.assert_frame_equal(result, expected) + result = parser.read_table(StringIO(data)) + tm.assert_frame_equal(result, expected) def test_first_row_bom(all_parsers): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 4447a5580ae60..a8748c88e0e55 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -122,17 +122,3 @@ def read(self): with pytest.raises(ValueError, match=msg): read_csv(NoNextBuffer(data), engine=python_engine) - - -class TestDeprecatedFeatures: - - @pytest.mark.parametrize("engine", ["c", "python"]) - @pytest.mark.parametrize("kwargs", [{"tupleize_cols": True}, - {"tupleize_cols": False}]) - def test_deprecated_args(self, engine, kwargs): - data = "1,2,3" - arg, _ = list(kwargs.items())[0] - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - read_csv(StringIO(data), engine=engine, **kwargs) diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 40cc05c317471..ec34739672718 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -17,9 +17,9 @@ import pandas as pd from pandas import ( - Categorical, DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex, - RangeIndex, Series, Timestamp, bdate_range, concat, date_range, isna, - timedelta_range) + Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index, Int64Index, + MultiIndex, RangeIndex, Series, Timestamp, bdate_range, concat, date_range, + isna, timedelta_range) import pandas.util.testing as tm from pandas.util.testing import ( assert_frame_equal, assert_series_equal, set_timezone) @@ -4749,6 +4749,19 @@ def test_select_empty_where(self, where): result = pd.read_hdf(store, "df", where=where) assert_frame_equal(result, df) + @pytest.mark.parametrize('idx', [ + date_range('2019', freq='D', periods=3, tz='UTC'), + CategoricalIndex(list('abc')) + ]) + def test_to_hdf_multiindex_extension_dtype(self, idx): + # GH 7775 + mi = MultiIndex.from_arrays([idx, idx]) + df = pd.DataFrame(0, index=mi, columns=['a']) + with ensure_clean_path(self.path) as path: + with pytest.raises(NotImplementedError, + match="Saving a MultiIndex"): + df.to_hdf(path, 'df') + class TestHDFComplexValues(Base): # GH10447 @@ -5170,3 +5183,20 @@ def test_dst_transitions(self): store.append('df', df) result = store.select('df') assert_frame_equal(result, df) + + def test_read_with_where_tz_aware_index(self): + # GH 11926 + periods = 10 + dts = pd.date_range('20151201', periods=periods, + freq='D', tz='UTC') + mi = pd.MultiIndex.from_arrays([dts, range(periods)], + names=['DATE', 'NO']) + expected = pd.DataFrame({'MYCOL': 0}, index=mi) + + key = 'mykey' + with ensure_clean_path(self.path) as path: + with pd.HDFStore(path) as store: + store.append(key, expected, format='table', append=True) + result = pd.read_hdf(path, key, + where="DATE > 20151130") + assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 0ea87d9d961f2..04faf5aee4b6d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -45,6 +45,7 @@ def __fspath__(self): # https://github.com/cython/cython/issues/1720 @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCommonIOCapabilities: data1 = """index,A,B,C,D foo,2,3,4,5 @@ -160,6 +161,7 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): @pytest.mark.parametrize('reader, module, error_class, fn_ext', [ (pd.read_csv, 'os', FileNotFoundError, 'csv'), + (pd.read_table, 'os', FileNotFoundError, 'csv'), (pd.read_fwf, 'os', FileNotFoundError, 'txt'), (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'), (pd.read_feather, 'feather', Exception, 'feather'), @@ -191,18 +193,9 @@ def test_read_expands_user_home_dir(self, reader, module, msg1, msg2, msg3, msg4, msg5)): reader(path) - def test_read_non_existant_read_table(self): - path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv') - msg1 = r"File b'.+does_not_exist\.csv' does not exist" - msg2 = (r"\[Errno 2\] File .+does_not_exist\.csv does not exist:" - r" '.+does_not_exist\.csv'") - with pytest.raises(FileNotFoundError, match=r"({}|{})".format( - msg1, msg2)): - with tm.assert_produces_warning(FutureWarning): - pd.read_table(path) - @pytest.mark.parametrize('reader, module, path', [ (pd.read_csv, 'os', ('io', 'data', 'iris.csv')), + (pd.read_table, 'os', ('io', 'data', 'iris.csv')), (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')), (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')), (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')), @@ -228,21 +221,6 @@ def test_read_fspath_all(self, reader, module, path, datapath): else: tm.assert_frame_equal(result, expected) - def test_read_fspath_all_read_table(self, datapath): - path = datapath('io', 'data', 'iris.csv') - - mypath = CustomFSPath(path) - with tm.assert_produces_warning(FutureWarning): - result = pd.read_table(mypath) - with tm.assert_produces_warning(FutureWarning): - expected = pd.read_table(path) - - if path.endswith('.pickle'): - # categorical - tm.assert_categorical_equal(result, expected) - else: - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize('writer_name, writer_kwargs, module', [ ('to_csv', {}, 'os'), ('to_excel', {'engine': 'xlwt'}, 'xlwt'), diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 33268b637d44a..63184dd1a8f83 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -310,10 +310,8 @@ def test_multiindex_header_index(self): @pytest.mark.slow def test_multiindex_header_skiprows_tuples(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - df = self._bank_data(header=[0, 1], skiprows=1, - tupleize_cols=True)[0] - assert isinstance(df.columns, Index) + df = self._bank_data(header=[0, 1], skiprows=1)[0] + assert isinstance(df.columns, MultiIndex) @pytest.mark.slow def test_multiindex_header_skiprows(self): diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 59fa9fbd02da1..203b550b8936a 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -2,7 +2,7 @@ import glob from io import BytesIO import os -from warnings import catch_warnings +from warnings import catch_warnings, filterwarnings import numpy as np import pytest @@ -83,6 +83,7 @@ def check_arbitrary(a, b): assert(a == b) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestPackers: def setup_method(self, method): @@ -97,6 +98,7 @@ def encode_decode(self, x, compress=None, **kwargs): return read_msgpack(p, **kwargs) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestAPI(TestPackers): def test_string_io(self): @@ -159,6 +161,7 @@ def __init__(self): read_msgpack(path_or_buf=A()) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestNumpy(TestPackers): def test_numpy_scalar_float(self): @@ -277,6 +280,7 @@ def test_list_mixed(self): tm.assert_almost_equal(tuple(x), x_rec) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestBasic(TestPackers): def test_timestamp(self): @@ -322,6 +326,7 @@ def test_intervals(self): assert i == i_rec +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestIndex(TestPackers): def setup_method(self, method): @@ -387,6 +392,7 @@ def categorical_index(self): tm.assert_frame_equal(result, df) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestSeries(TestPackers): def setup_method(self, method): @@ -437,6 +443,7 @@ def test_basic(self): assert_series_equal(i, i_rec) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCategorical(TestPackers): def setup_method(self, method): @@ -460,6 +467,7 @@ def test_basic(self): assert_categorical_equal(i, i_rec) +@pytest.mark.filterwarnings("ignore:msgpack:FutureWarning") class TestNDFrame(TestPackers): def setup_method(self, method): @@ -549,6 +557,7 @@ def test_dataframe_duplicate_column_names(self): @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning") @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning") +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestSparse(TestPackers): def _check_roundtrip(self, obj, comparator, **kwargs): @@ -595,6 +604,7 @@ def test_sparse_frame(self): check_frame_type=True) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCompression(TestPackers): """See https://github.com/pandas-dev/pandas/pull/9783 """ @@ -676,18 +686,21 @@ def decompress(ob): with monkeypatch.context() as m, \ tm.assert_produces_warning(PerformanceWarning) as ws: m.setattr(compress_module, 'decompress', decompress) - i_rec = self.encode_decode(self.frame, compress=compress) - for k in self.frame.keys(): - - value = i_rec[k] - expected = self.frame[k] - assert_frame_equal(value, expected) - # make sure that we can write to the new frames even though - # we needed to copy the data - for block in value._data.blocks: - assert block.values.flags.writeable - # mutate the data in some way - block.values[0] += rhs[block.dtype] + + with catch_warnings(): + filterwarnings('ignore', category=FutureWarning) + i_rec = self.encode_decode(self.frame, compress=compress) + for k in self.frame.keys(): + + value = i_rec[k] + expected = self.frame[k] + assert_frame_equal(value, expected) + # make sure that we can write to the new frames even though + # we needed to copy the data + for block in value._data.blocks: + assert block.values.flags.writeable + # mutate the data in some way + block.values[0] += rhs[block.dtype] for w in ws: # check the messages from our warnings @@ -715,14 +728,18 @@ def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch): def _test_small_strings_no_warn(self, compress): empty = np.array([], dtype='uint8') with tm.assert_produces_warning(None): - empty_unpacked = self.encode_decode(empty, compress=compress) + with catch_warnings(): + filterwarnings('ignore', category=FutureWarning) + empty_unpacked = self.encode_decode(empty, compress=compress) tm.assert_numpy_array_equal(empty_unpacked, empty) assert empty_unpacked.flags.writeable char = np.array([ord(b'a')], dtype='uint8') with tm.assert_produces_warning(None): - char_unpacked = self.encode_decode(char, compress=compress) + with catch_warnings(): + filterwarnings('ignore', category=FutureWarning) + char_unpacked = self.encode_decode(char, compress=compress) tm.assert_numpy_array_equal(char_unpacked, char) assert char_unpacked.flags.writeable @@ -794,6 +811,7 @@ def test_readonly_axis_zlib_to_sql(self): assert_frame_equal(expected, result) +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestEncoding(TestPackers): def setup_method(self, method): @@ -839,6 +857,7 @@ def legacy_packer(request, datapath): @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning") +@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestMsgpack: """ How to add msgpack tests: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 8543d2c2df7d6..7def8e53859c7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -429,6 +429,46 @@ def test_pivot_with_tz(self, method): pv = pd.pivot(df, index='dt1', columns='dt2', values='data1') tm.assert_frame_equal(pv, expected) + def test_pivot_tz_in_values(self): + # GH 14948 + df = pd.DataFrame([{'uid': u'aa', + 'ts': pd.Timestamp('2016-08-12 13:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-12 08:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-12 14:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-25 11:00:00-0700', + tz='US/Pacific')}, + {'uid': u'aa', + 'ts': pd.Timestamp('2016-08-25 13:00:00-0700', + tz='US/Pacific')}]) + + df = df.set_index('ts').reset_index() + mins = df.ts.map(lambda x: x.replace(hour=0, minute=0, + second=0, microsecond=0)) + + result = pd.pivot_table(df.set_index('ts').reset_index(), + values='ts', index=['uid'], columns=[mins], + aggfunc=np.min) + expected = pd.DataFrame( + [ + [pd.Timestamp('2016-08-12 08:00:00-0700', tz='US/Pacific'), + pd.Timestamp('2016-08-25 11:00:00-0700', tz='US/Pacific')] + ], + index=pd.Index(['aa'], name='uid'), + columns=pd.DatetimeIndex( + [ + pd.Timestamp('2016-08-12 00:00:00', tz='US/Pacific'), + pd.Timestamp('2016-08-25 00:00:00', tz='US/Pacific') + ], + name='ts') + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('method', [True, False]) def test_pivot_periods(self, method): df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'), diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 43dc292652519..ef7312616250d 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -229,17 +229,6 @@ def test_where_unsafe(): assert_series_equal(result, expected) -def test_where_raise_on_error_deprecation(): - # gh-14968 - # deprecation of raise_on_error - s = Series(np.random.randn(5)) - cond = s > 0 - with tm.assert_produces_warning(FutureWarning): - s.where(cond, raise_on_error=True) - with tm.assert_produces_warning(FutureWarning): - s.mask(cond, raise_on_error=True) - - def test_where(): s = Series(np.random.randn(5)) cond = s > 0 diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index fac796fbf325a..71b0a2d9d74eb 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -419,10 +419,11 @@ def f(x): tm.assert_series_equal(result, expected) # .item() - s = Series([1]) - result = s.item() - assert result == 1 - assert s.item() == s.iloc[0] + with tm.assert_produces_warning(FutureWarning): + s = Series([1]) + result = s.item() + assert result == 1 + assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) @@ -501,6 +502,12 @@ def test_integer_series_size(self): s = Series(range(9), dtype="Int64") assert s.size == 9 + def test_get_values_deprecation(self): + s = Series(range(9)) + with tm.assert_produces_warning(FutureWarning): + res = s.get_values() + tm.assert_numpy_array_equal(res, s.values) + class TestCategoricalSeries: diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f2345a0822f6d..49417942a3598 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -224,6 +224,13 @@ def test_constructor_list_like(self): result = Series(obj, index=[0, 1, 2]) assert_series_equal(result, expected) + @pytest.mark.parametrize('dtype', ['bool', 'int32', 'int64', 'float64']) + def test_constructor_index_dtype(self, dtype): + # GH 17088 + + s = Series(Index([0, 2, 4]), dtype=dtype) + assert s.dtype == dtype + @pytest.mark.parametrize('input_vals', [ ([1, 2]), (['1', '2']), diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 392163228398b..59566ad3232c7 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -223,15 +223,12 @@ def test_astype_dict_like(self, dtype_class): with pytest.raises(KeyError, match=msg): s.astype(dt5) - def test_astype_categories_deprecation(self): + def test_astype_categories_deprecation_raises(self): # deprecated 17636 s = Series(['a', 'b', 'a']) - expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True)) - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - result = s.astype('category', categories=['a', 'b'], ordered=True) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match="Got an unexpected"): + s.astype('category', categories=['a', 'b'], ordered=True) def test_astype_from_categorical(self): items = ["a", "b", "c", "a"] @@ -349,21 +346,12 @@ def test_astype_categorical_to_categorical(self, name, dtype_ordered, expected = Series(s_data, name=name, dtype=exp_dtype) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.astype('category', ordered=dtype_ordered) - tm.assert_series_equal(result, expected) - # different categories dtype = CategoricalDtype(list('adc'), dtype_ordered) result = s.astype(dtype) expected = Series(s_data, name=name, dtype=dtype) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.astype( - 'category', categories=list('adc'), ordered=dtype_ordered) - tm.assert_series_equal(result, expected) - if dtype_ordered is False: # not specifying ordered, so only test once expected = s @@ -387,20 +375,6 @@ def test_astype_categoricaldtype(self): tm.assert_series_equal(result, expected) tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c'])) - def test_astype_categoricaldtype_with_args(self): - s = Series(['a', 'b']) - type_ = CategoricalDtype(['a', 'b']) - - msg = (r"Cannot specify a CategoricalDtype and also `categories` or" - r" `ordered`\. Use `dtype=CategoricalDtype\(categories," - r" ordered\)` instead\.") - with pytest.raises(TypeError, match=msg): - s.astype(type_, ordered=True) - with pytest.raises(TypeError, match=msg): - s.astype(type_, categories=['a', 'b']) - with pytest.raises(TypeError, match=msg): - s.astype(type_, categories=['a', 'b'], ordered=False) - @pytest.mark.parametrize("dtype", [ np.datetime64, np.timedelta64, @@ -428,17 +402,25 @@ def test_astype_empty_constructor_equality(self, dtype): as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) + @pytest.mark.filterwarnings('ignore::FutureWarning') def test_complex(self): # see gh-4819: complex access for ndarray compat a = np.arange(5, dtype=np.float64) b = Series(a + 4j * a) - tm.assert_numpy_array_equal(a, b.real) - tm.assert_numpy_array_equal(4 * a, b.imag) + tm.assert_numpy_array_equal(a, np.real(b)) + tm.assert_numpy_array_equal(4 * a, np.imag(b)) b.real = np.arange(5) + 5 - tm.assert_numpy_array_equal(a + 5, b.real) - tm.assert_numpy_array_equal(4 * a, b.imag) + tm.assert_numpy_array_equal(a + 5, np.real(b)) + tm.assert_numpy_array_equal(4 * a, np.imag(b)) + + def test_real_imag_deprecated(self): + # GH 18262 + s = pd.Series([1]) + with tm.assert_produces_warning(FutureWarning): + s.imag + s.real def test_arg_for_errors_in_astype(self): # see gh-14878 diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 29846f10dae33..0b62624ad2696 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -221,3 +221,10 @@ def test_hasnans_unchached_for_series(): ser.iloc[-1] = np.nan assert ser.hasnans is True assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ + + +def test_put_deprecated(): + # GH 18262 + s = pd.Series([1]) + with tm.assert_produces_warning(FutureWarning): + s.put(0, 0) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 90949f6bfab91..39c217e7d95b1 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -29,17 +29,6 @@ def read_csv(self, path, **kwargs): return out - def test_from_csv_deprecation(self, datetime_series): - # see gh-17812 - with ensure_clean() as path: - datetime_series.to_csv(path, header=False) - - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - ts = self.read_csv(path) - depr_ts = Series.from_csv(path) - assert_series_equal(depr_ts, ts) - @pytest.mark.parametrize("arg", ["path", "header", "both"]) def test_to_csv_deprecation(self, arg, datetime_series): # see gh-19715 @@ -68,11 +57,6 @@ def test_from_csv(self, datetime_series, string_series): assert ts.name is None assert ts.index.name is None - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - depr_ts = Series.from_csv(path) - assert_series_equal(depr_ts, ts) - # see gh-10483 datetime_series.to_csv(path, header=True) ts_h = self.read_csv(path, header=0) diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 9ce1133cb39ca..290e0203567db 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -1516,14 +1516,6 @@ def test_deprecated_numpy_func_call(self): raise_on_extra_warnings=False): getattr(getattr(self, series), func)() - def test_deprecated_reindex_axis(self): - # https://github.com/pandas-dev/pandas/issues/17833 - # Multiple FutureWarnings, can't check stacklevel - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False) as m: - self.bseries.reindex_axis([0, 1, 2]) - assert 'reindex' in str(m[0].message) - @pytest.mark.parametrize( 'datetime_type', (np.datetime64, diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index d82b205803b09..f9a1bb97cc48c 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -326,13 +326,15 @@ def test_ndarray_compat_properties(self): pass with pytest.raises(ValueError): - o.item() # len > 1 + with tm.assert_produces_warning(FutureWarning): + o.item() # len > 1 assert o.ndim == 1 assert o.size == len(o) - assert Index([1]).item() == 1 - assert Series([1]).item() == 1 + with tm.assert_produces_warning(FutureWarning): + assert Index([1]).item() == 1 + assert Series([1]).item() == 1 def test_value_counts_unique_nunique(self): for orig in self.objs: diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index ad035f9c0158d..6e7b34a0632ad 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -144,9 +144,9 @@ def _coerce_tds(targ, res): # but nanops doesn't, so make that an exception elif targ.dtype.kind == 'O': raise - tm.assert_almost_equal(targ.real, res.real, + tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype) - tm.assert_almost_equal(targ.imag, res.imag, + tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype) def check_fun_data(self, testfunc, targfunc, testarval, targarval, diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 7aa8f1d527d39..735d16f7ad0db 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -5,7 +5,12 @@ @pytest.fixture(params=[True, False]) -def by_blocks(request): +def by_blocks_fixture(request): + return request.param + + +@pytest.fixture(params=['DataFrame', 'Series']) +def obj_fixture(request): return request.param @@ -70,29 +75,35 @@ def _assert_not_frame_equal_both(a, b, **kwargs): @pytest.mark.parametrize("check_like", [True, False]) -def test_frame_equal_row_order_mismatch(check_like): +def test_frame_equal_row_order_mismatch(check_like, obj_fixture): df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) df2 = DataFrame({"A": [3, 2, 1], "B": [6, 5, 4]}, index=["c", "b", "a"]) if not check_like: # Do not ignore row-column orderings. - msg = "DataFrame.index are different" + msg = "{obj}.index are different".format(obj=obj_fixture) with pytest.raises(AssertionError, match=msg): - assert_frame_equal(df1, df2, check_like=check_like) + assert_frame_equal(df1, + df2, + check_like=check_like, + obj=obj_fixture) else: - _assert_frame_equal_both(df1, df2, check_like=check_like) + _assert_frame_equal_both(df1, + df2, + check_like=check_like, + obj=obj_fixture) @pytest.mark.parametrize("df1,df2", [ (DataFrame({"A": [1, 2, 3]}), DataFrame({"A": [1, 2, 3, 4]})), (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), DataFrame({"A": [1, 2, 3]})), ]) -def test_frame_equal_shape_mismatch(df1, df2): - msg = "DataFrame are different" +def test_frame_equal_shape_mismatch(df1, df2, obj_fixture): + msg = "{obj} are different".format(obj=obj_fixture) with pytest.raises(AssertionError, match=msg): - assert_frame_equal(df1, df2) + assert_frame_equal(df1, df2, obj=obj_fixture) @pytest.mark.parametrize("df1,df2,msg", [ @@ -136,12 +147,13 @@ def test_empty_dtypes(check_dtype): assert_frame_equal(df1, df2, **kwargs) -def test_frame_equal_index_mismatch(): - msg = """DataFrame\\.index are different +def test_frame_equal_index_mismatch(obj_fixture): + msg = """{obj}\\.index are different -DataFrame\\.index values are different \\(33\\.33333 %\\) +{obj}\\.index values are different \\(33\\.33333 %\\) \\[left\\]: Index\\(\\['a', 'b', 'c'\\], dtype='object'\\) -\\[right\\]: Index\\(\\['a', 'b', 'd'\\], dtype='object'\\)""" +\\[right\\]: Index\\(\\['a', 'b', 'd'\\], dtype='object'\\)""".format( + obj=obj_fixture) df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) @@ -149,15 +161,16 @@ def test_frame_equal_index_mismatch(): index=["a", "b", "d"]) with pytest.raises(AssertionError, match=msg): - assert_frame_equal(df1, df2) + assert_frame_equal(df1, df2, obj=obj_fixture) -def test_frame_equal_columns_mismatch(): - msg = """DataFrame\\.columns are different +def test_frame_equal_columns_mismatch(obj_fixture): + msg = """{obj}\\.columns are different -DataFrame\\.columns values are different \\(50\\.0 %\\) +{obj}\\.columns values are different \\(50\\.0 %\\) \\[left\\]: Index\\(\\['A', 'B'\\], dtype='object'\\) -\\[right\\]: Index\\(\\['A', 'b'\\], dtype='object'\\)""" +\\[right\\]: Index\\(\\['A', 'b'\\], dtype='object'\\)""".format( + obj=obj_fixture) df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) @@ -165,43 +178,50 @@ def test_frame_equal_columns_mismatch(): index=["a", "b", "c"]) with pytest.raises(AssertionError, match=msg): - assert_frame_equal(df1, df2) + assert_frame_equal(df1, df2, obj=obj_fixture) -def test_frame_equal_block_mismatch(by_blocks): - msg = """DataFrame\\.iloc\\[:, 1\\] are different +def test_frame_equal_block_mismatch(by_blocks_fixture, obj_fixture): + msg = """{obj}\\.iloc\\[:, 1\\] are different -DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\) +{obj}\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\) \\[left\\]: \\[4, 5, 6\\] -\\[right\\]: \\[4, 5, 7\\]""" +\\[right\\]: \\[4, 5, 7\\]""".format(obj=obj_fixture) df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 7]}) with pytest.raises(AssertionError, match=msg): - assert_frame_equal(df1, df2, by_blocks=by_blocks) + assert_frame_equal(df1, + df2, + by_blocks=by_blocks_fixture, + obj=obj_fixture) @pytest.mark.parametrize("df1,df2,msg", [ (DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}), DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "e̊"]}), - """DataFrame\\.iloc\\[:, 1\\] are different + """{obj}\\.iloc\\[:, 1\\] are different -DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\) +{obj}\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\) \\[left\\]: \\[é, è, ë\\] \\[right\\]: \\[é, è, e̊\\]"""), (DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}), DataFrame({"A": ["a", "a", "a"], "E": ["e", "e", "e"]}), - """DataFrame\\.iloc\\[:, 0\\] are different + """{obj}\\.iloc\\[:, 0\\] are different -DataFrame\\.iloc\\[:, 0\\] values are different \\(100\\.0 %\\) +{obj}\\.iloc\\[:, 0\\] values are different \\(100\\.0 %\\) \\[left\\]: \\[á, à, ä\\] \\[right\\]: \\[a, a, a\\]"""), ]) -def test_frame_equal_unicode(df1, df2, msg, by_blocks): +def test_frame_equal_unicode(df1, df2, msg, by_blocks_fixture, obj_fixture): # see gh-20503 # # Test ensures that `assert_frame_equals` raises the right exception # when comparing DataFrames containing differing unicode objects. + msg = msg.format(obj=obj_fixture) with pytest.raises(AssertionError, match=msg): - assert_frame_equal(df1, df2, by_blocks=by_blocks) + assert_frame_equal(df1, + df2, + by_blocks=by_blocks_fixture, + obj=obj_fixture) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f14b202b034d6..cec9416e5d2c5 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1052,7 +1052,8 @@ def assert_series_equal(left, right, check_dtype=True, assert_attr_equal('dtype', left, right) if check_exact: - assert_numpy_array_equal(left.get_values(), right.get_values(), + assert_numpy_array_equal(left._internal_get_values(), + right._internal_get_values(), check_dtype=check_dtype, obj='{obj}'.format(obj=obj),) elif check_datetimelike_compat: @@ -1071,11 +1072,11 @@ def assert_series_equal(left, right, check_dtype=True, '{right}.').format(left=left.values, right=right.values) raise AssertionError(msg) else: - assert_numpy_array_equal(left.get_values(), right.get_values(), + assert_numpy_array_equal(left._internal_get_values(), + right._internal_get_values(), check_dtype=check_dtype) elif is_interval_dtype(left) or is_interval_dtype(right): assert_interval_array_equal(left.array, right.array) - elif (is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype)): # .values is an ndarray, but ._values is the ExtensionArray. @@ -1086,7 +1087,8 @@ def assert_series_equal(left, right, check_dtype=True, is_extension_array_dtype(right) and not is_categorical_dtype(right)): assert_extension_array_equal(left.array, right.array) else: - _testing.assert_almost_equal(left.get_values(), right.get_values(), + _testing.assert_almost_equal(left._internal_get_values(), + right._internal_get_values(), check_less_precise=check_less_precise, check_dtype=check_dtype, obj='{obj}'.format(obj=obj)) @@ -1218,7 +1220,7 @@ def assert_frame_equal(left, right, check_dtype=True, # shape comparison if left.shape != right.shape: raise_assert_detail(obj, - 'DataFrame shape mismatch', + '{obj} shape mismatch'.format(obj=obj), '{shape!r}'.format(shape=left.shape), '{shape!r}'.format(shape=right.shape)) @@ -1249,7 +1251,7 @@ def assert_frame_equal(left, right, check_dtype=True, assert dtype in lblocks assert dtype in rblocks assert_frame_equal(lblocks[dtype], rblocks[dtype], - check_dtype=check_dtype, obj='DataFrame.blocks') + check_dtype=check_dtype, obj=obj) # compare by columns else: @@ -1264,7 +1266,7 @@ def assert_frame_equal(left, right, check_dtype=True, check_exact=check_exact, check_names=check_names, check_datetimelike_compat=check_datetimelike_compat, check_categorical=check_categorical, - obj='DataFrame.iloc[:, {idx}]'.format(idx=i)) + obj='{obj}.iloc[:, {idx}]'.format(obj=obj, idx=i)) def assert_equal(left, right, **kwargs): diff --git a/setup.py b/setup.py index 0380c717ecb41..19c22fc25733d 100755 --- a/setup.py +++ b/setup.py @@ -783,7 +783,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): long_description=LONG_DESCRIPTION, classifiers=CLASSIFIERS, platforms='any', - python_requires='>=3.5', + python_requires='>=3.5.3', extras_require={ 'test': [ # sync with setup.cfg minversion & install.rst