Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into bug/categorical-i…
Browse files Browse the repository at this point in the history
…ndexing-1row-df
  • Loading branch information
keechongtan committed Nov 26, 2019
2 parents 39c95f4 + db60ab6 commit 2b71592
Show file tree
Hide file tree
Showing 65 changed files with 626 additions and 876 deletions.
42 changes: 27 additions & 15 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,6 @@
pass


class Concat:
def setup(self):
N = 10 ** 5
self.s = pd.Series(list("aabbcd") * N).astype("category")

self.a = pd.Categorical(list("aabbcd") * N)
self.b = pd.Categorical(list("bbcdjk") * N)

def time_concat(self):
pd.concat([self.s, self.s])

def time_union(self):
union_categoricals([self.a, self.b])


class Constructor:
def setup(self):
N = 10 ** 5
Expand Down Expand Up @@ -77,6 +62,33 @@ def time_existing_series(self):
pd.Categorical(self.series)


class CategoricalOps:
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
param_names = ["op"]

def setup(self, op):
N = 10 ** 5
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)

def time_categorical_op(self, op):
getattr(self.cat, op)("b")


class Concat:
def setup(self):
N = 10 ** 5
self.s = pd.Series(list("aabbcd") * N).astype("category")

self.a = pd.Categorical(list("aabbcd") * N)
self.b = pd.Categorical(list("bbcdjk") * N)

def time_concat(self):
pd.concat([self.s, self.s])

def time_union(self):
union_categoricals([self.a, self.b])


class ValueCounts:

params = [True, False]
Expand Down
17 changes: 7 additions & 10 deletions ci/azure/posix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,13 @@ jobs:
PATTERN: "not slow and not network"
LOCALE_OVERRIDE: "zh_CN.UTF-8"

# https://github.com/pandas-dev/pandas/issues/29432
# py37_np_dev:
# ENV_FILE: ci/deps/azure-37-numpydev.yaml
# CONDA_PY: "37"
# PATTERN: "not slow and not network"
# TEST_ARGS: "-W error"
# PANDAS_TESTING_MODE: "deprecate"
# EXTRA_APT: "xsel"
# # TODO:
# continueOnError: true
py37_np_dev:
ENV_FILE: ci/deps/azure-37-numpydev.yaml
CONDA_PY: "37"
PATTERN: "not slow and not network"
TEST_ARGS: "-W error"
PANDAS_TESTING_MODE: "deprecate"
EXTRA_APT: "xsel"

steps:
- script: |
Expand Down
4 changes: 2 additions & 2 deletions ci/deps/azure-macos-36.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ dependencies:
- matplotlib=2.2.3
- nomkl
- numexpr
- numpy=1.13.3
- numpy=1.14
- openpyxl
- pyarrow
- pyarrow>=0.12.0
- pytables
- python-dateutil==2.6.1
- pytz
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/azure-windows-36.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies:
- numexpr
- numpy=1.15.*
- openpyxl
- pyarrow
- pyarrow>=0.12.0
- pytables
- python-dateutil
- pytz
Expand Down
1 change: 0 additions & 1 deletion doc/redirects.csv
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,6 @@ generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlev
generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel
generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index
generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame
generated/pandas.MultiIndex.to_hierarchical,../reference/api/pandas.MultiIndex.to_hierarchical
generated/pandas.notna,../reference/api/pandas.notna
generated/pandas.notnull,../reference/api/pandas.notnull
generated/pandas.option_context,../reference/api/pandas.option_context
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ matplotlib 2.2.2 Visualization
openpyxl 2.4.8 Reading / writing for xlsx files
pandas-gbq 0.8.0 Google Big Query access
psycopg2 PostgreSQL engine for sqlalchemy
pyarrow 0.9.0 Parquet and feather reading / writing
pyarrow 0.12.0 Parquet and feather reading / writing
pymysql 0.7.11 MySQL engine for sqlalchemy
pyreadstat SPSS files (.sav) reading
pytables 3.4.2 HDF5 reading / writing
Expand Down
1 change: 0 additions & 1 deletion doc/source/reference/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ MultiIndex components

MultiIndex.set_levels
MultiIndex.set_codes
MultiIndex.to_hierarchical
MultiIndex.to_flat_index
MultiIndex.to_frame
MultiIndex.is_lexsorted
Expand Down
6 changes: 3 additions & 3 deletions doc/source/user_guide/scale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ Use efficient datatypes
-----------------------

The default pandas data types are not the most memory efficient. This is
especially true for high-cardinality text data (columns with relatively few
unique values). By using more efficient data types you can store larger datasets
in memory.
especially true for text data columns with relatively few unique values (commonly
referred to as "low-cardinality" data). By using more efficient data types you
can store larger datasets in memory.

.. ipython:: python
Expand Down
120 changes: 68 additions & 52 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,62 +265,62 @@ The following methods now also correctly output values for unobserved categories
Increased minimum versions for dependencies
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Some minimum supported versions of dependencies were updated (:issue:`29723`).
Some minimum supported versions of dependencies were updated (:issue:`29766`, :issue:`29723`).
If installed, we now require:

+-----------------+-----------------+----------+
| Package | Minimum Version | Required |
+=================+=================+==========+
| numpy | 1.13.3 | X |
+-----------------+-----------------+----------+
| pytz | 2015.4 | X |
+-----------------+-----------------+----------+
| python-dateutil | 2.6.1 | X |
+-----------------+-----------------+----------+
| bottleneck | 1.2.1 | |
+-----------------+-----------------+----------+
| numexpr | 2.6.2 | |
+-----------------+-----------------+----------+
| pytest (dev) | 4.0.2 | |
+-----------------+-----------------+----------+
+-----------------+-----------------+----------+---------+
| Package | Minimum Version | Required | Changed |
+=================+=================+==========+=========+
| numpy | 1.13.3 | X | |
+-----------------+-----------------+----------+---------+
| pytz | 2015.4 | X | |
+-----------------+-----------------+----------+---------+
| python-dateutil | 2.6.1 | X | |
+-----------------+-----------------+----------+---------+
| bottleneck | 1.2.1 | | |
+-----------------+-----------------+----------+---------+
| numexpr | 2.6.2 | | |
+-----------------+-----------------+----------+---------+
| pytest (dev) | 4.0.2 | | |
+-----------------+-----------------+----------+---------+

For `optional libraries <https://dev.pandas.io/docs/install.html#dependencies>`_ the general recommendation is to use the latest version.
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
Optional libraries below the lowest tested version may still work, but are not considered supported.

+-----------------+-----------------+
| Package | Minimum Version |
+=================+=================+
| beautifulsoup4 | 4.6.0 |
+-----------------+-----------------+
| fastparquet | 0.3.2 |
+-----------------+-----------------+
| gcsfs | 0.2.2 |
+-----------------+-----------------+
| lxml | 3.8.0 |
+-----------------+-----------------+
| matplotlib | 2.2.2 |
+-----------------+-----------------+
| openpyxl | 2.4.8 |
+-----------------+-----------------+
| pyarrow | 0.9.0 |
+-----------------+-----------------+
| pymysql | 0.7.1 |
+-----------------+-----------------+
| pytables | 3.4.2 |
+-----------------+-----------------+
| scipy | 0.19.0 |
+-----------------+-----------------+
| sqlalchemy | 1.1.4 |
+-----------------+-----------------+
| xarray | 0.8.2 |
+-----------------+-----------------+
| xlrd | 1.1.0 |
+-----------------+-----------------+
| xlsxwriter | 0.9.8 |
+-----------------+-----------------+
| xlwt | 1.2.0 |
+-----------------+-----------------+
+-----------------+-----------------+---------+
| Package | Minimum Version | Changed |
+=================+=================+=========+
| beautifulsoup4 | 4.6.0 | |
+-----------------+-----------------+---------+
| fastparquet | 0.3.2 | X |
+-----------------+-----------------+---------+
| gcsfs | 0.2.2 | |
+-----------------+-----------------+---------+
| lxml | 3.8.0 | |
+-----------------+-----------------+---------+
| matplotlib | 2.2.2 | |
+-----------------+-----------------+---------+
| openpyxl | 2.4.8 | |
+-----------------+-----------------+---------+
| pyarrow | 0.12.0 | X |
+-----------------+-----------------+---------+
| pymysql | 0.7.1 | |
+-----------------+-----------------+---------+
| pytables | 3.4.2 | |
+-----------------+-----------------+---------+
| scipy | 0.19.0 | |
+-----------------+-----------------+---------+
| sqlalchemy | 1.1.4 | |
+-----------------+-----------------+---------+
| xarray | 0.8.2 | |
+-----------------+-----------------+---------+
| xlrd | 1.1.0 | |
+-----------------+-----------------+---------+
| xlsxwriter | 0.9.8 | |
+-----------------+-----------------+---------+
| xlwt | 1.2.0 | |
+-----------------+-----------------+---------+

See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.

Expand Down Expand Up @@ -364,7 +364,7 @@ Deprecations
value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)``
is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`).
- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`)

- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`)

.. _whatsnew_1000.prior_deprecations:

Expand Down Expand Up @@ -401,10 +401,12 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.

**Other removals**

- Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`)
- Removed the previously deprecated :meth:`Index.summary` (:issue:`18217`)
- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`)
- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`)
- Removed :meth:`Series.from_array` (:issue:`18258`)
- Removed :meth:`DataFrame.from_items` (:issue:`18458`)
- Removed :meth:`DataFrame.as_matrix`, :meth:`Series.as_matrix` (:issue:`18458`)
Expand All @@ -415,6 +417,11 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- :func:`core.internals.blocks.make_block` no longer accepts the "fastpath" keyword(:issue:`19265`)
- :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`)
- Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`)
- Removed the previously deprecated :meth:`MultiIndex.to_hierarchical` (:issue:`21613`)
- Removed the previously deprecated :attr:`MultiIndex.labels`, use :attr:`MultiIndex.codes` instead (:issue:`23752`)
- Removed the previously deprecated "labels" keyword from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`)
- Removed the previously deprecated :meth:`MultiIndex.set_labels`, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`)
- Removed the previously deprecated "labels" keyword from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`)
- Removed support for legacy HDF5 formats (:issue:`29787`)
- :func:`read_excel` removed support for "skip_footer" argument, use "skipfooter" instead (:issue:`18836`)
- :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`)
Expand All @@ -434,11 +441,17 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
- Removed the previously deprecated :meth:`DataFrame.get_ftype_counts`, :meth:`Series.get_ftype_counts` (:issue:`18243`)
- Removed the previously deprecated :meth:`Index.get_duplicated`, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`)
- Removed the previously deprecated :meth:`Series.clip_upper`, :meth:`Series.clip_lower`, :meth:`DataFrame.clip_upper`, :meth:`DataFrame.clip_lower` (:issue:`24203`)
- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`)
- Removed the previously deprecated :attr:`DatetimeIndex.offset` (:issue:`20730`)
- Removed the previously deprecated :meth:`DatetimeIndex.asobject`, :meth:`TimedeltaIndex.asobject`, :meth:`PeriodIndex.asobject`, use ``astype(object)`` instead (:issue:`29801`)
- Removed previously deprecated "order" argument from :func:`factorize` (:issue:`19751`)
- Removed previously deprecated "v" argument from :meth:`FrozenNDarray.searchsorted`, use "value" instead (:issue:`22672`)
- :func:`read_stata` and :meth:`DataFrame.to_stata` no longer supports the "encoding" argument (:issue:`21400`)
- In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`)
- Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`)
- Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`)
- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() <pandas.core.window.Rolling.apply>`, :func:`DataFrame.rolling().apply() <pandas.core.window.Rolling.apply>`,
- :func:`Series.expanding().apply() <pandas.core.window.Expanding.apply>`, and :func:`DataFrame.expanding().apply() <pandas.core.window.Expanding.apply>` to ``False`` (:issue:`20584`)
-

.. _whatsnew_1000.performance:
Expand All @@ -453,7 +466,9 @@ Performance improvements
- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)
- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`)
- Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`)
- Performance improvement when comparing a :meth:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`)
- Performance improvement when comparing a :class:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`)
- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar.
The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`)

.. _whatsnew_1000.bug_fixes:

Expand Down Expand Up @@ -549,6 +564,7 @@ Indexing
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`)
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`)

Missing
Expand Down Expand Up @@ -664,4 +680,4 @@ Other
.. _whatsnew_1000.contributors:

Contributors
~~~~~~~~~~~~
~~~~~~~~~~~~
8 changes: 6 additions & 2 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,12 @@ cdef class IndexEngine:

if self.is_monotonic_increasing:
values = self._get_index_values()
left = values.searchsorted(val, side='left')
right = values.searchsorted(val, side='right')
try:
left = values.searchsorted(val, side='left')
right = values.searchsorted(val, side='right')
except TypeError:
# e.g. GH#29189 get_loc(None) with a Float64Index
raise KeyError(val)

diff = right - left
if diff == 0:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2201,7 +2201,7 @@ cdef class _Period:
return self.days_in_month

@property
def is_leap_year(self):
def is_leap_year(self) -> bool:
return bool(is_leapyear(self.year))

@classmethod
Expand Down
14 changes: 2 additions & 12 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1509,18 +1509,8 @@ class Timedelta(_Timedelta):
if other.dtype.kind == 'm':
# also timedelta-like
return _broadcast_floordiv_td64(self.value, other, _rfloordiv)
elif other.dtype.kind == 'i':
# Backwards compatibility
# GH-19761
msg = textwrap.dedent("""\
Floor division between integer array and Timedelta is
deprecated. Use 'array // timedelta.value' instead.
If you want to obtain epochs from an array of timestamps,
you can rather use
'(array - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")'.
""")
warnings.warn(msg, FutureWarning)
return other // self.value

# Includes integer array // Timedelta, deprecated in GH#19761
raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__')

elif is_float_object(other) and util.is_nan(other):
Expand Down
Loading

0 comments on commit 2b71592

Please sign in to comment.