From 2310faa109bdfd9ff3ef4fc19a163d790d60c645 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 2 Oct 2017 08:32:52 -0500 Subject: [PATCH 01/76] Delay import (#17710) closes #16764 --- .travis.yml | 2 + ci/check_imports.py | 36 +++++++++++++ doc/source/whatsnew/v0.21.0.txt | 3 ++ pandas/core/computation/__init__.py | 23 --------- pandas/core/computation/check.py | 22 ++++++++ pandas/core/computation/eval.py | 8 ++- pandas/core/computation/expressions.py | 2 +- pandas/core/config_init.py | 58 ++++++++++----------- pandas/core/frame.py | 7 ++- pandas/core/internals.py | 3 +- pandas/core/ops.py | 10 +++- pandas/core/panel.py | 3 +- pandas/io/common.py | 37 +++++++------- pandas/io/excel.py | 16 +++++- pandas/plotting/__init__.py | 6 --- pandas/plotting/_core.py | 21 +++++--- pandas/plotting/_style.py | 67 ------------------------- pandas/tests/computation/test_compat.py | 4 +- pandas/tests/frame/test_query_eval.py | 2 +- pandas/util/_tester.py | 26 +++++----- pandas/util/testing.py | 8 +-- 21 files changed, 183 insertions(+), 181 deletions(-) create mode 100644 ci/check_imports.py create mode 100644 pandas/core/computation/check.py diff --git a/.travis.yml b/.travis.yml index 034e2a32bb75c7..fe1a2950dbf081 100644 --- a/.travis.yml +++ b/.travis.yml @@ -121,6 +121,8 @@ script: - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh + - echo "checking imports" + - source activate pandas && python ci/check_imports.py - echo "script done" after_success: diff --git a/ci/check_imports.py b/ci/check_imports.py new file mode 100644 index 00000000000000..a83436e7d258c2 --- /dev/null +++ b/ci/check_imports.py @@ -0,0 +1,36 @@ +""" +Check that certain modules are not loaded by `import pandas` +""" +import sys + +blacklist = { + 'bs4', + 'html5lib', + 'ipython', + 'jinja2' + 'lxml', + 'matplotlib', + 'numexpr', + 'openpyxl', + 'py', + 'pytest', + 's3fs', + 'scipy', + 'tables', + 'xlrd', + 'xlsxwriter', + 'xlwt', +} + + +def main(): + import pandas # noqa + + modules = set(x.split('.')[0] for x in sys.modules) + imported = modules & blacklist + if modules & blacklist: + sys.exit("Imported {}".format(imported)) + + +if __name__ == '__main__': + main() diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d69a5c22acc035..24e2281035c6bf 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -164,6 +164,7 @@ Other Enhancements - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names +- Improved the import time of pandas by about 2.25x (:issue:`16764`) .. _whatsnew_0210.api_breaking: @@ -559,6 +560,8 @@ Other API Changes - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) +- Pandas no longer registers matplotlib converters on import. The converters + will be registered and used when the first plot is draw (:issue:`17710`) .. _whatsnew_0210.deprecations: diff --git a/pandas/core/computation/__init__.py b/pandas/core/computation/__init__.py index e13faf890d1f8c..e69de29bb2d1d6 100644 --- a/pandas/core/computation/__init__.py +++ b/pandas/core/computation/__init__.py @@ -1,23 +0,0 @@ - -import warnings -from distutils.version import LooseVersion - -_NUMEXPR_INSTALLED = False -_MIN_NUMEXPR_VERSION = "2.4.6" - -try: - import numexpr as ne - ver = ne.__version__ - _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION) - - if not _NUMEXPR_INSTALLED: - warnings.warn( - "The installed version of numexpr {ver} is not supported " - "in pandas and will be not be used\nThe minimum supported " - "version is {min_ver}\n".format( - ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning) - -except ImportError: # pragma: no cover - pass - -__all__ = ['_NUMEXPR_INSTALLED'] diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py new file mode 100644 index 00000000000000..bb8cc74bad3c21 --- /dev/null +++ b/pandas/core/computation/check.py @@ -0,0 +1,22 @@ +import warnings +from distutils.version import LooseVersion + +_NUMEXPR_INSTALLED = False +_MIN_NUMEXPR_VERSION = "2.4.6" + +try: + import numexpr as ne + ver = ne.__version__ + _NUMEXPR_INSTALLED = ver >= LooseVersion(_MIN_NUMEXPR_VERSION) + + if not _NUMEXPR_INSTALLED: + warnings.warn( + "The installed version of numexpr {ver} is not supported " + "in pandas and will be not be used\nThe minimum supported " + "version is {min_ver}\n".format( + ver=ver, min_ver=_MIN_NUMEXPR_VERSION), UserWarning) + +except ImportError: # pragma: no cover + pass + +__all__ = ['_NUMEXPR_INSTALLED'] diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index d391764794c1cb..a5df6aea055ab3 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -5,8 +5,6 @@ import tokenize from pandas.io.formats.printing import pprint_thing -from pandas.core.computation import _NUMEXPR_INSTALLED -from pandas.core.computation.expr import Expr, _parsers, tokenize_string from pandas.core.computation.scope import _ensure_scope from pandas.compat import string_types from pandas.core.computation.engines import _engines @@ -32,6 +30,7 @@ def _check_engine(engine): string engine """ + from pandas.core.computation.check import _NUMEXPR_INSTALLED if engine is None: if _NUMEXPR_INSTALLED: @@ -69,6 +68,8 @@ def _check_parser(parser): KeyError * If an invalid parser is passed """ + from pandas.core.computation.expr import _parsers + if parser not in _parsers: raise KeyError('Invalid parser {parser!r} passed, valid parsers are' ' {valid}'.format(parser=parser, valid=_parsers.keys())) @@ -129,6 +130,8 @@ def _convert_expression(expr): def _check_for_locals(expr, stack_level, parser): + from pandas.core.computation.expr import tokenize_string + at_top_of_stack = stack_level == 0 not_pandas_parser = parser != 'pandas' @@ -252,6 +255,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, pandas.DataFrame.query pandas.DataFrame.eval """ + from pandas.core.computation.expr import Expr inplace = validate_bool_kwarg(inplace, "inplace") diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 8ddc625887a511..2196fb5917a440 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -9,7 +9,7 @@ import warnings import numpy as np from pandas.core.common import _values_from_object -from pandas.core.computation import _NUMEXPR_INSTALLED +from pandas.core.computation.check import _NUMEXPR_INSTALLED from pandas.core.config import get_option if _NUMEXPR_INSTALLED: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 5652424a8f75b7..33531e80449d88 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -437,34 +437,36 @@ def use_inf_as_na_cb(key): writer_engine_doc = """ : string The default Excel writer engine for '{ext}' files. Available options: - '{default}' (the default){others}. -""" - -with cf.config_prefix('io.excel'): - # going forward, will be additional writers - for ext, options in [('xls', ['xlwt']), ('xlsm', ['openpyxl'])]: - default = options.pop(0) - if options: - options = " " + ", ".join(options) - else: - options = "" - doc = writer_engine_doc.format(ext=ext, default=default, - others=options) - cf.register_option(ext + '.writer', default, doc, validator=str) - - def _register_xlsx(engine, other): - others = ", '{other}'".format(other=other) - doc = writer_engine_doc.format(ext='xlsx', default=engine, - others=others) - cf.register_option('xlsx.writer', engine, doc, validator=str) - - try: - # better memory footprint - import xlsxwriter # noqa - _register_xlsx('xlsxwriter', 'openpyxl') - except ImportError: - # fallback - _register_xlsx('openpyxl', 'xlsxwriter') + auto, {others}. +""" + +_xls_options = ['xlwt'] +_xlsm_options = ['openpyxl'] +_xlsx_options = ['openpyxl', 'xlsxwriter'] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option("writer", "auto", + writer_engine_doc.format( + ext='xls', + others=', '.join(_xls_options)), + validator=str) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option("writer", "auto", + writer_engine_doc.format( + ext='xlsm', + others=', '.join(_xlsm_options)), + validator=str) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option("writer", "auto", + writer_engine_doc.format( + ext='xlsx', + others=', '.join(_xlsx_options)), + validator=str) + # Set up the io.parquet specific configuration. parquet_engine_doc = """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d439f88bca15a..01e83821d45248 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -76,9 +76,7 @@ create_block_manager_from_blocks) from pandas.core.series import Series from pandas.core.categorical import Categorical -import pandas.core.computation.expressions as expressions import pandas.core.algorithms as algorithms -from pandas.core.computation.eval import eval as _eval from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u, OrderedDict, raise_with_traceback) from pandas import compat @@ -2296,6 +2294,8 @@ def eval(self, expr, inplace=False, **kwargs): >>> df.eval('a + b') >>> df.eval('c = a + b') """ + from pandas.core.computation.eval import eval as _eval + inplace = validate_bool_kwarg(inplace, 'inplace') resolvers = kwargs.pop('resolvers', None) kwargs['level'] = kwargs.pop('level', 0) + 1 @@ -3840,6 +3840,7 @@ def _combine_const(self, other, func, raise_on_error=True, try_cast=True): def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True): + import pandas.core.computation.expressions as expressions # unique if self.columns.is_unique: @@ -3992,6 +3993,7 @@ def combine_first(self, other): ------- combined : DataFrame """ + import pandas.core.computation.expressions as expressions def combiner(x, y, needs_i8_conversion=False): x_values = x.values if hasattr(x, 'values') else x @@ -4027,6 +4029,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, If True, will raise an error if the DataFrame and other both contain data in the same place. """ + import pandas.core.computation.expressions as expressions # TODO: Support other joins if join != 'left': # pragma: no cover raise NotImplementedError("Only left join is supported") diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 9e348819ce5a3c..12ac7a5fd9f207 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -66,7 +66,6 @@ from pandas._libs.tslib import Timedelta from pandas._libs.lib import BlockPlacement -import pandas.core.computation.expressions as expressions from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_bool_kwarg from pandas import compat @@ -1395,6 +1394,8 @@ def where(self, other, cond, align=True, raise_on_error=True, ------- a new block(s), the result of the func """ + import pandas.core.computation.expressions as expressions + values = self.values orig_other = other if transpose: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index d37acf48ed9c28..506b9267f32b4f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -16,7 +16,6 @@ from pandas import compat from pandas.util._decorators import Appender -import pandas.core.computation.expressions as expressions from pandas.compat import bind_method import pandas.core.missing as missing @@ -668,8 +667,9 @@ def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None, Wrapper function for Series arithmetic operations, to avoid code duplication. """ - def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) @@ -1193,6 +1193,8 @@ def to_series(right): def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns', fill_zeros=None, **eval_kwargs): def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) @@ -1349,6 +1351,8 @@ def _arith_method_PANEL(op, name, str_rep=None, fill_zeros=None, # copied from Series na_op above, but without unnecessary branch for # non-scalar def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) @@ -1378,6 +1382,8 @@ def f(self, other): def _comp_method_PANEL(op, name, str_rep=None, masker=False): def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index a3e35492ad9af7..68733a3a8b94ed 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -15,7 +15,6 @@ is_string_like, is_scalar) from pandas.core.dtypes.missing import notna -import pandas.core.computation.expressions as expressions import pandas.core.common as com import pandas.core.ops as ops import pandas.core.missing as missing @@ -1500,6 +1499,8 @@ def _add_aggregate_operations(cls, use_numexpr=True): def _panel_arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, **eval_kwargs): def na_op(x, y): + import pandas.core.computation.expressions as expressions + try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, diff --git a/pandas/io/common.py b/pandas/io/common.py index 69a7e69ea724b7..534c1e06711501 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -19,13 +19,6 @@ # gh-12665: Alias for now and remove later. CParserError = ParserError - -try: - from s3fs import S3File - need_text_wrapping = (BytesIO, S3File) -except ImportError: - need_text_wrapping = (BytesIO,) - # common NA values # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', @@ -34,19 +27,6 @@ 'N/A', 'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '' ]) -try: - import pathlib - _PATHLIB_INSTALLED = True -except ImportError: - _PATHLIB_INSTALLED = False - - -try: - from py.path import local as LocalPath - _PY_PATH_INSTALLED = True -except: - _PY_PATH_INSTALLED = False - if compat.PY3: from urllib.request import urlopen, pathname2url @@ -167,6 +147,18 @@ def _stringify_path(filepath_or_buffer): Any other object is passed through unchanged, which includes bytes, strings, buffers, or anything else that's not even path-like. """ + try: + import pathlib + _PATHLIB_INSTALLED = True + except ImportError: + _PATHLIB_INSTALLED = False + + try: + from py.path import local as LocalPath + _PY_PATH_INSTALLED = True + except ImportError: + _PY_PATH_INSTALLED = False + if hasattr(filepath_or_buffer, '__fspath__'): return filepath_or_buffer.__fspath__() if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): @@ -322,6 +314,11 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, handles : list of file-like objects A list of file-like object that were openned in this function. """ + try: + from s3fs import S3File + need_text_wrapping = (BytesIO, S3File) + except ImportError: + need_text_wrapping = (BytesIO,) handles = list() f = path_or_buf diff --git a/pandas/io/excel.py b/pandas/io/excel.py index afecd76c498efa..41e3b5283a532e 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -170,6 +170,16 @@ def register_writer(klass): _writer_extensions.append(ext) +def _get_default_writer(ext): + _default_writers = {'xlsx': 'openpyxl', 'xlsm': 'openpyxl', 'xls': 'xlwt'} + try: + import xlsxwriter # noqa + _default_writers['xlsx'] = 'xlsxwriter' + except ImportError: + pass + return _default_writers[ext] + + def get_writer(engine_name): if engine_name == 'openpyxl': try: @@ -690,8 +700,10 @@ class ExcelWriter(object): # ExcelWriter. def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) + if issubclass(cls, ExcelWriter): - if engine is None: + if engine is None or (isinstance(engine, string_types) and + engine == 'auto'): if isinstance(path, string_types): ext = os.path.splitext(path)[-1][1:] else: @@ -700,6 +712,8 @@ def __new__(cls, path, engine=None, **kwargs): try: engine = config.get_option('io.excel.{ext}.writer' .format(ext=ext)) + if engine == 'auto': + engine = _get_default_writer(ext) except KeyError: error = ValueError("No engine for filetype: '{ext}'" .format(ext=ext)) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index c3cbedb0fc28c1..8f98e297e3e66a 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -4,12 +4,6 @@ # flake8: noqa -try: # mpl optional - from pandas.plotting import _converter - _converter.register() # needs to override so set_xlim works with str/number -except ImportError: - pass - from pandas.plotting._misc import (scatter_matrix, radviz, andrews_curves, bootstrap_plot, parallel_coordinates, lag_plot, diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index aa919d600ec526..211d9777e7515d 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -33,19 +33,23 @@ from pandas.plotting._compat import (_mpl_ge_1_3_1, _mpl_ge_1_5_0, _mpl_ge_2_0_0) -from pandas.plotting._style import (mpl_stylesheet, plot_params, +from pandas.plotting._style import (plot_params, _get_standard_colors) from pandas.plotting._tools import (_subplots, _flatten, table, _handle_shared_axes, _get_all_lines, _get_xlim, _set_ticks_props, format_date_labels) +_registered = False -if _mpl_ge_1_5_0(): - # Compat with mp 1.5, which uses cycler. - import cycler - colors = mpl_stylesheet.pop('axes.color_cycle') - mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) + +def _setup(): + # delay the import of matplotlib until nescessary + global _registered + if not _registered: + from pandas.plotting import _converter + _converter.register() + _registered = True def _get_standard_kind(kind): @@ -95,6 +99,7 @@ def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, secondary_y=False, colormap=None, table=False, layout=None, **kwds): + _setup() self.data = data self.by = by @@ -2056,6 +2061,7 @@ def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): import matplotlib.pyplot as plt + _setup() ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, grid=grid, rot=rot, figsize=figsize, layout=layout, return_type=return_type, **kwds) @@ -2151,7 +2157,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, kwds : other plotting keyword arguments To be passed to hist function """ - + _setup() if by is not None: axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, figsize=figsize, sharex=sharex, sharey=sharey, @@ -2348,6 +2354,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) >>> boxplot_frame_groupby(grouped, subplots=False) """ + _setup() if subplots is True: naxes = len(grouped) fig, axes = _subplots(naxes=naxes, squeeze=False, diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index 8cb4e30e0d91c4..f1d53da5f1396d 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -14,73 +14,6 @@ from pandas.plotting._compat import _mpl_ge_2_0_0 -# Extracted from https://gist.github.com/huyng/816622 -# this is the rcParams set when setting display.with_mpl_style -# to True. -mpl_stylesheet = { - 'axes.axisbelow': True, - 'axes.color_cycle': ['#348ABD', - '#7A68A6', - '#A60628', - '#467821', - '#CF4457', - '#188487', - '#E24A33'], - 'axes.edgecolor': '#bcbcbc', - 'axes.facecolor': '#eeeeee', - 'axes.grid': True, - 'axes.labelcolor': '#555555', - 'axes.labelsize': 'large', - 'axes.linewidth': 1.0, - 'axes.titlesize': 'x-large', - 'figure.edgecolor': 'white', - 'figure.facecolor': 'white', - 'figure.figsize': (6.0, 4.0), - 'figure.subplot.hspace': 0.5, - 'font.family': 'monospace', - 'font.monospace': ['Andale Mono', - 'Nimbus Mono L', - 'Courier New', - 'Courier', - 'Fixed', - 'Terminal', - 'monospace'], - 'font.size': 10, - 'interactive': True, - 'keymap.all_axes': ['a'], - 'keymap.back': ['left', 'c', 'backspace'], - 'keymap.forward': ['right', 'v'], - 'keymap.fullscreen': ['f'], - 'keymap.grid': ['g'], - 'keymap.home': ['h', 'r', 'home'], - 'keymap.pan': ['p'], - 'keymap.save': ['s'], - 'keymap.xscale': ['L', 'k'], - 'keymap.yscale': ['l'], - 'keymap.zoom': ['o'], - 'legend.fancybox': True, - 'lines.antialiased': True, - 'lines.linewidth': 1.0, - 'patch.antialiased': True, - 'patch.edgecolor': '#EEEEEE', - 'patch.facecolor': '#348ABD', - 'patch.linewidth': 0.5, - 'toolbar': 'toolbar2', - 'xtick.color': '#555555', - 'xtick.direction': 'in', - 'xtick.major.pad': 6.0, - 'xtick.major.size': 0.0, - 'xtick.minor.pad': 6.0, - 'xtick.minor.size': 0.0, - 'ytick.color': '#555555', - 'ytick.direction': 'in', - 'ytick.major.pad': 6.0, - 'ytick.major.size': 0.0, - 'ytick.minor.pad': 6.0, - 'ytick.minor.size': 0.0 -} - - def _get_standard_colors(num_colors=None, colormap=None, color_type='default', color=None): import matplotlib.pyplot as plt diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py index ed569625177d3d..af39ee9815313a 100644 --- a/pandas/tests/computation/test_compat.py +++ b/pandas/tests/computation/test_compat.py @@ -5,13 +5,13 @@ from pandas.core.computation.engines import _engines import pandas.core.computation.expr as expr -from pandas.core.computation import _MIN_NUMEXPR_VERSION +from pandas.core.computation.check import _MIN_NUMEXPR_VERSION def test_compat(): # test we have compat with our version of nu - from pandas.core.computation import _NUMEXPR_INSTALLED + from pandas.core.computation.check import _NUMEXPR_INSTALLED try: import numexpr as ne ver = ne.__version__ diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index f0f1a2df27e936..a6c36792ef074e 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -17,7 +17,7 @@ makeCustomDataframe as mkdf) import pandas.util.testing as tm -from pandas.core.computation import _NUMEXPR_INSTALLED +from pandas.core.computation.check import _NUMEXPR_INSTALLED from pandas.tests.frame.common import TestData diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py index aeb4259a9edae6..d18467f17ec5bb 100644 --- a/pandas/util/_tester.py +++ b/pandas/util/_tester.py @@ -7,21 +7,19 @@ PKG = os.path.dirname(os.path.dirname(__file__)) -try: - import pytest -except ImportError: - def test(): +def test(extra_args=None): + try: + import pytest + except ImportError: raise ImportError("Need pytest>=3.0 to run tests") -else: - def test(extra_args=None): - cmd = ['--skip-slow', '--skip-network'] - if extra_args: - if not isinstance(extra_args, list): - extra_args = [extra_args] - cmd = extra_args - cmd += [PKG] - print("running: pytest {}".format(' '.join(cmd))) - sys.exit(pytest.main(cmd)) + cmd = ['--skip-slow', '--skip-network'] + if extra_args: + if not isinstance(extra_args, list): + extra_args = [extra_args] + cmd = extra_args + cmd += [PKG] + print("running: pytest {}".format(' '.join(cmd))) + sys.exit(pytest.main(cmd)) __all__ = ['test'] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c5f73ca0e885bb..202c9473eea12e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -41,8 +41,6 @@ StringIO, PY3 ) -from pandas.core.computation import expressions as expr - from pandas import (bdate_range, CategoricalIndex, Categorical, IntervalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, @@ -2660,7 +2658,11 @@ def __exit__(self, exc_type, exc_value, traceback): @contextmanager -def use_numexpr(use, min_elements=expr._MIN_ELEMENTS): +def use_numexpr(use, min_elements=None): + from pandas.core.computation import expressions as expr + if min_elements is None: + min_elements = expr._MIN_ELEMENTS + olduse = expr._USE_NUMEXPR oldmin = expr._MIN_ELEMENTS expr.set_use_numexpr(use) From def3bce010eb0eaea2580ad6b6f44c0318314296 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 2 Oct 2017 09:10:42 -0500 Subject: [PATCH 02/76] ENH: Accept CategoricalDtype in read_csv (#17643) * ENH: Accept CategoricalDtype in CSV reader * rework * Fixed basic implementation * Added casting * Doc and cleanup * Fixed assignment of categoricals * Doc and test unexpected values * DOC: fixups * More coercion, use _recode_for_categories * Refactor with maybe_convert_for_categorical * PEP8 * Type for 32bit * REF: refactor to new method * py2 compat * Refactored * More in Categorical * fixup! More in Categorical --- doc/source/io.rst | 39 +++++++++++-- doc/source/whatsnew/v0.21.0.txt | 33 ++++++++++- pandas/_libs/parsers.pyx | 24 ++++---- pandas/core/categorical.py | 55 ++++++++++++++++++ pandas/io/parsers.py | 19 ++++-- pandas/tests/io/parser/dtypes.py | 99 ++++++++++++++++++++++++++++++++ pandas/tests/test_categorical.py | 34 +++++++++++ 7 files changed, 278 insertions(+), 25 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 4eba9687efc58e..8fe5685b33aff6 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -452,7 +452,8 @@ Specifying Categorical dtype .. versionadded:: 0.19.0 -``Categorical`` columns can be parsed directly by specifying ``dtype='category'`` +``Categorical`` columns can be parsed directly by specifying ``dtype='category'`` or +``dtype=CategoricalDtype(categories, ordered)``. .. ipython:: python @@ -468,12 +469,40 @@ Individual columns can be parsed as a ``Categorical`` using a dict specification pd.read_csv(StringIO(data), dtype={'col1': 'category'}).dtypes +.. versionadded:: 0.21.0 + +Specifying ``dtype='cateogry'`` will result in an unordered ``Categorical`` +whose ``categories`` are the unique values observed in the data. For more +control on the categories and order, create a +:class:`~pandas.api.types.CategoricalDtype` ahead of time, and pass that for +that column's ``dtype``. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + dtype = CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True) + pd.read_csv(StringIO(data), dtype={'col1': dtype}).dtypes + +When using ``dtype=CategoricalDtype``, "unexpected" values outside of +``dtype.categories`` are treated as missing values. + +.. ipython:: python + + dtype = CategoricalDtype(['a', 'b', 'd']) # No 'c' + pd.read_csv(StringIO(data), dtype={'col1': dtype}).col1 + +This matches the behavior of :meth:`Categorical.set_categories`. + .. note:: - The resulting categories will always be parsed as strings (object dtype). - If the categories are numeric they can be converted using the - :func:`to_numeric` function, or as appropriate, another converter - such as :func:`to_datetime`. + With ``dtype='category'``, the resulting categories will always be parsed + as strings (object dtype). If the categories are numeric they can be + converted using the :func:`to_numeric` function, or as appropriate, another + converter such as :func:`to_datetime`. + + When ``dtype`` is a ``CategoricalDtype`` with homogenous ``categories`` ( + all numeric, all datetimes, etc.), the conversion is done automatically. .. ipython:: python diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 24e2281035c6bf..8807d8aa09e36b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -119,7 +119,7 @@ expanded to include the ``categories`` and ``ordered`` attributes. A ``CategoricalDtype`` can be used to specify the set of categories and orderedness of an array, independent of the data themselves. This can be useful, e.g., when converting string data to a ``Categorical`` (:issue:`14711`, -:issue:`15078`, :issue:`16015`): +:issue:`15078`, :issue:`16015`, :issue:`17643`): .. ipython:: python @@ -129,8 +129,37 @@ e.g., when converting string data to a ``Categorical`` (:issue:`14711`, dtype = CategoricalDtype(categories=['a', 'b', 'c', 'd'], ordered=True) s.astype(dtype) +One place that deserves special mention is in :meth:`read_csv`. Previously, with +``dtype={'col': 'category'}``, the returned values and categories would always +be strings. + +.. ipython:: python + :suppress: + + from pandas.compat import StringIO + +.. ipython:: python + + data = 'A,B\na,1\nb,2\nc,3' + pd.read_csv(StringIO(data), dtype={'B': 'category'}).B.cat.categories + +Notice the "object" dtype. + +With a ``CategoricalDtype`` of all numerics, datetimes, or +timedeltas, we can automatically convert to the correct type + + dtype = {'B': CategoricalDtype([1, 2, 3])} + pd.read_csv(StringIO(data), dtype=dtype).B.cat.categories + +The values have been correctly interpreted as integers. + The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a -``Series`` with categorical type will now return an instance of ``CategoricalDtype``. +``Series`` with categorical type will now return an instance of +``CategoricalDtype``. For the most part, this is backwards compatible, though +the string repr has changed. If you were previously using ``str(s.dtype) == +'category'`` to detect categorical data, switch to +:func:`pandas.api.types.is_categorical_dtype`, which is compatible with the old +and new ``CategoricalDtype``. See the :ref:`CategoricalDtype docs ` for more. diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 5bf9f4ce83cbfa..60a646769dd1a9 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -45,7 +45,7 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_object_dtype, is_string_dtype, is_datetime64_dtype, pandas_dtype) -from pandas.core.categorical import Categorical +from pandas.core.categorical import Categorical, _recode_for_categories from pandas.core.algorithms import take_1d from pandas.core.dtypes.concat import union_categoricals from pandas import Index @@ -1267,19 +1267,14 @@ cdef class TextReader: return self._string_convert(i, start, end, na_filter, na_hashset) elif is_categorical_dtype(dtype): + # TODO: I suspect that _categorical_convert could be + # optimized when dtype is an instance of CategoricalDtype codes, cats, na_count = _categorical_convert( self.parser, i, start, end, na_filter, na_hashset, self.c_encoding) - # sort categories and recode if necessary - cats = Index(cats) - if not cats.is_monotonic_increasing: - unsorted = cats.copy() - cats = cats.sort_values() - indexer = cats.get_indexer(unsorted) - codes = take_1d(indexer, codes, fill_value=-1) - - return Categorical(codes, categories=cats, ordered=False, - fastpath=True), na_count + cat = Categorical._from_inferred_categories(cats, codes, dtype) + return cat, na_count + elif is_object_dtype(dtype): return self._string_convert(i, start, end, na_filter, na_hashset) @@ -2230,8 +2225,11 @@ def _concatenate_chunks(list chunks): if common_type == np.object: warning_columns.append(str(name)) - if is_categorical_dtype(dtypes.pop()): - result[name] = union_categoricals(arrs, sort_categories=True) + dtype = dtypes.pop() + if is_categorical_dtype(dtype): + sort_categories = isinstance(dtype, str) + result[name] = union_categoricals(arrs, + sort_categories=sort_categories) else: result[name] = np.concatenate(arrs) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 5619f15ac85d99..e8537fb5765361 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -21,6 +21,8 @@ _ensure_platform_int, is_dtype_equal, is_datetimelike, + is_datetime64_dtype, + is_timedelta64_dtype, is_categorical, is_categorical_dtype, is_integer_dtype, @@ -510,6 +512,59 @@ def base(self): """ compat, we are always our own object """ return None + @classmethod + def _from_inferred_categories(cls, inferred_categories, inferred_codes, + dtype): + """Construct a Categorical from inferred values + + For inferred categories (`dtype` is None) the categories are sorted. + For explicit `dtype`, the `inferred_categories` are cast to the + appropriate type. + + Parameters + ---------- + + inferred_categories : Index + inferred_codes : Index + dtype : CategoricalDtype or 'category' + + Returns + ------- + Categorical + """ + from pandas import Index, to_numeric, to_datetime, to_timedelta + + cats = Index(inferred_categories) + + known_categories = (isinstance(dtype, CategoricalDtype) and + dtype.categories is not None) + + if known_categories: + # Convert to a specialzed type with `dtype` if specified + if dtype.categories.is_numeric(): + cats = to_numeric(inferred_categories, errors='coerce') + elif is_datetime64_dtype(dtype.categories): + cats = to_datetime(inferred_categories, errors='coerce') + elif is_timedelta64_dtype(dtype.categories): + cats = to_timedelta(inferred_categories, errors='coerce') + + if known_categories: + # recode from observation oder to dtype.categories order + categories = dtype.categories + codes = _recode_for_categories(inferred_codes, cats, categories) + elif not cats.is_monotonic_increasing: + # sort categories and recode for unknown categories + unsorted = cats.copy() + categories = cats.sort_values() + codes = _recode_for_categories(inferred_codes, unsorted, + categories) + dtype = CategoricalDtype(categories, ordered=False) + else: + dtype = CategoricalDtype(cats, ordered=False) + codes = inferred_codes + + return cls(codes, dtype=dtype, fastpath=True) + @classmethod def from_array(cls, data, **kwargs): """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index eeb79552477e12..c8b2987d591efb 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -21,6 +21,7 @@ is_float, is_dtype_equal, is_object_dtype, is_string_dtype, is_scalar, is_categorical_dtype) +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe from pandas.core.index import (Index, MultiIndex, RangeIndex, @@ -1602,12 +1603,20 @@ def _cast_types(self, values, cast_type, column): """ if is_categorical_dtype(cast_type): - # XXX this is for consistency with - # c-parser which parses all categories - # as strings - if not is_object_dtype(values): + known_cats = (isinstance(cast_type, CategoricalDtype) and + cast_type.categories is not None) + + if not is_object_dtype(values) and not known_cats: + # XXX this is for consistency with + # c-parser which parses all categories + # as strings values = astype_nansafe(values, str) - values = Categorical(values) + + cats = Index(values).unique().dropna() + values = Categorical._from_inferred_categories( + cats, cats.get_indexer(values), cast_type + ) + else: try: values = astype_nansafe(values, cast_type, copy=True) diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index 402fa0817595c7..7d3df6201a3908 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -149,6 +149,105 @@ def test_categorical_dtype_chunksize(self): for actual, expected in zip(actuals, expecteds): tm.assert_frame_equal(actual, expected) + @pytest.mark.parametrize('ordered', [False, True]) + @pytest.mark.parametrize('categories', [ + ['a', 'b', 'c'], + ['a', 'c', 'b'], + ['a', 'b', 'c', 'd'], + ['c', 'b', 'a'], + ]) + def test_categorical_categoricaldtype(self, categories, ordered): + data = """a,b +1,a +1,b +1,b +2,c""" + expected = pd.DataFrame({ + "a": [1, 1, 1, 2], + "b": Categorical(['a', 'b', 'b', 'c'], + categories=categories, + ordered=ordered) + }) + dtype = {"b": CategoricalDtype(categories=categories, + ordered=ordered)} + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categorical_categoricaldtype_unsorted(self): + data = """a,b +1,a +1,b +1,b +2,c""" + dtype = CategoricalDtype(['c', 'b', 'a']) + expected = pd.DataFrame({ + 'a': [1, 1, 1, 2], + 'b': Categorical(['a', 'b', 'b', 'c'], categories=['c', 'b', 'a']) + }) + result = self.read_csv(StringIO(data), dtype={'b': dtype}) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_coerces_numeric(self): + dtype = {'b': CategoricalDtype([1, 2, 3])} + data = "b\n1\n1\n2\n3" + expected = pd.DataFrame({'b': Categorical([1, 1, 2, 3])}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_coerces_datetime(self): + dtype = { + 'b': CategoricalDtype(pd.date_range('2017', '2019', freq='AS')) + } + data = "b\n2017-01-01\n2018-01-01\n2019-01-01" + expected = pd.DataFrame({'b': Categorical(dtype['b'].categories)}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + dtype = { + 'b': CategoricalDtype([pd.Timestamp("2014")]) + } + data = "b\n2014-01-01\n2014-01-01T00:00:00" + expected = pd.DataFrame({'b': Categorical([pd.Timestamp('2014')] * 2)}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_coerces_timedelta(self): + dtype = {'b': CategoricalDtype(pd.to_timedelta(['1H', '2H', '3H']))} + data = "b\n1H\n2H\n3H" + expected = pd.DataFrame({'b': Categorical(dtype['b'].categories)}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categoricaldtype_unexpected_categories(self): + dtype = {'b': CategoricalDtype(['a', 'b', 'd', 'e'])} + data = "b\nd\na\nc\nd" # Unexpected c + expected = pd.DataFrame({"b": Categorical(list('dacd'), + dtype=dtype['b'])}) + result = self.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_categorical_categoricaldtype_chunksize(self): + # GH 10153 + data = """a,b +1,a +1,b +1,b +2,c""" + cats = ['a', 'b', 'c'] + expecteds = [pd.DataFrame({'a': [1, 1], + 'b': Categorical(['a', 'b'], + categories=cats)}), + pd.DataFrame({'a': [1, 2], + 'b': Categorical(['b', 'c'], + categories=cats)}, + index=[2, 3])] + dtype = CategoricalDtype(cats) + actuals = self.read_csv(StringIO(data), dtype={'b': dtype}, + chunksize=2) + + for actual, expected in zip(actuals, expecteds): + tm.assert_frame_equal(actual, expected) + def test_empty_pass_dtype(self): data = 'one,two' result = self.read_csv(StringIO(data), dtype={'one': 'u1'}) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index d43901ea091b7a..9e3bd40dc275a7 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -560,6 +560,40 @@ def f(): codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) pd.Categorical.from_codes(codes, categories=["train", "test"]) + @pytest.mark.parametrize('dtype', [None, 'category']) + def test_from_inferred_categories(self, dtype): + cats = ['a', 'b'] + codes = np.array([0, 0, 1, 1], dtype='i8') + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes(codes, cats) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize('dtype', [None, 'category']) + def test_from_inferred_categories_sorts(self, dtype): + cats = ['b', 'a'] + codes = np.array([0, 1, 1, 1], dtype='i8') + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b']) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_dtype(self): + cats = ['a', 'b', 'd'] + codes = np.array([0, 1, 0, 2], dtype='i8') + dtype = CategoricalDtype(['c', 'b', 'a'], ordered=True) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical(['a', 'b', 'a', 'd'], + categories=['c', 'b', 'a'], + ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_coerces(self): + cats = ['1', '2', 'bad'] + codes = np.array([0, 0, 1, 2], dtype='i8') + dtype = CategoricalDtype([1, 2]) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical([1, 1, 2, np.nan]) + tm.assert_categorical_equal(result, expected) + def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" From 72c7a396fbd10559f0862e59f55a93beb52c35db Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 2 Oct 2017 12:33:11 -0400 Subject: [PATCH 03/76] DOC: whatsnew typos --- doc/source/whatsnew/v0.21.0.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 8807d8aa09e36b..9477c39fb4c2b0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -148,8 +148,10 @@ Notice the "object" dtype. With a ``CategoricalDtype`` of all numerics, datetimes, or timedeltas, we can automatically convert to the correct type - dtype = {'B': CategoricalDtype([1, 2, 3])} - pd.read_csv(StringIO(data), dtype=dtype).B.cat.categories +.. ipython:: python + + dtype = {'B': CategoricalDtype([1, 2, 3])} + pd.read_csv(StringIO(data), dtype=dtype).B.cat.categories The values have been correctly interpreted as integers. @@ -283,8 +285,8 @@ New Behavior: Dependencies have increased minimum versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`) -). If installed, we now require: +We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`). +If installed, we now require: +--------------+-----------------+----------+ | Package | Minimum Version | Required | From 97fea48945163ec27349918c14ec9bdca2335460 Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Mon, 2 Oct 2017 14:21:41 -0500 Subject: [PATCH 04/76] allow neg index on str_get (#17741) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/strings.py | 2 +- pandas/tests/test_strings.py | 13 +++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9477c39fb4c2b0..70b14ed1dc214e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -702,6 +702,7 @@ Indexing - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) - Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`) +- Bug in :func:`String.str_get` raises `index out of range` error instead of inserting NaNs when using a negative index. (:issue:`17704`) I/O ^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 021f88d1aec002..abef6f6086dbd6 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1260,7 +1260,7 @@ def str_get(arr, i): ------- items : Series/Index of objects """ - f = lambda x: x[i] if len(x) > i else np.nan + f = lambda x: x[i] if len(x) > i >= -len(x) else np.nan return _na_map(f, arr) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ec2b0b75b9eedd..f1b97081b6d93a 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2484,6 +2484,19 @@ def test_get(self): expected = Series([u('b'), u('d'), np.nan, u('g')]) tm.assert_series_equal(result, expected) + # bounds testing + values = Series(['1_2_3_4_5', '6_7_8_9_10', '11_12']) + + # positive index + result = values.str.split('_').str.get(2) + expected = Series(['3', '8', np.nan]) + tm.assert_series_equal(result, expected) + + # negative index + result = values.str.split('_').str.get(-3) + expected = Series(['3', '8', np.nan]) + tm.assert_series_equal(result, expected) + def test_more_contains(self): # PR #1179 s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, From d944bb913cae993d78fcce47838ca94032c9bf7c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 2 Oct 2017 21:14:18 -0400 Subject: [PATCH 05/76] DEPR: passing categories or ordered kwargs to Series.astype is deprecated (#17742) closes #17636 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/internals.py | 21 +++++++++---- pandas/tests/frame/test_sorting.py | 3 +- pandas/tests/series/test_dtypes.py | 10 ++++++ pandas/tests/series/test_rank.py | 50 +++++++++--------------------- pandas/tests/test_categorical.py | 30 +++++++++--------- 6 files changed, 58 insertions(+), 57 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 70b14ed1dc214e..74620accd68758 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -606,6 +606,7 @@ Deprecations - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) +- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) .. _whatsnew_0210.deprecations.argmin_min: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 12ac7a5fd9f207..1fddf985f0cdbb 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,3 +1,4 @@ +import warnings import copy from warnings import catch_warnings import itertools @@ -547,12 +548,20 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, # may need to convert to categorical # this is only called for non-categoricals if self.is_categorical_astype(dtype): - if (('categories' in kwargs or 'ordered' in kwargs) and - isinstance(dtype, CategoricalDtype)): - raise TypeError("Cannot specify a CategoricalDtype and also " - "`categories` or `ordered`. Use " - "`dtype=CategoricalDtype(categories, ordered)`" - " instead.") + + # deprecated 17636 + if ('categories' in kwargs or 'ordered' in kwargs): + if isinstance(dtype, CategoricalDtype): + raise TypeError( + "Cannot specify a CategoricalDtype and also " + "`categories` or `ordered`. Use " + "`dtype=CategoricalDtype(categories, ordered)`" + " instead.") + warnings.warn("specifying 'categories' or 'ordered' in " + ".astype() is deprecated; pass a " + "CategoricalDtype instead", + FutureWarning, stacklevel=7) + kwargs = kwargs.copy() categories = getattr(dtype, 'categories', None) ordered = getattr(dtype, 'ordered', False) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index e6f823bf6fac22..a98439797dc287 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -8,6 +8,7 @@ import pandas as pd from pandas.compat import lrange +from pandas.api.types import CategoricalDtype from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range, NaT, IntervalIndex) @@ -513,7 +514,7 @@ def test_sort_index_categorical_index(self): df = (DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')) - .astype('category', categories=list('cab'))}) + .astype(CategoricalDtype(list('cab')))}) .set_index('B')) result = df.sort_index() diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 3099c02e4aabd3..b20c1817e56711 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -186,6 +186,16 @@ def test_astype_dict_like(self, dtype_class): with pytest.raises(KeyError): s.astype(dt5) + def test_astype_categories_deprecation(self): + + # deprecated 17636 + s = Series(['a', 'b', 'a']) + expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.astype('category', categories=['a', 'b'], ordered=True) + tm.assert_series_equal(result, expected) + def test_astype_categoricaldtype(self): s = Series(['a', 'b', 'a']) result = s.astype(CategoricalDtype(['a', 'b'], ordered=True)) diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 128a4cdd845e6e..e45acdedbd2a92 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -7,7 +7,8 @@ from numpy import nan import numpy as np -from pandas import (Series, date_range, NaT) +from pandas import Series, date_range, NaT +from pandas.api.types import CategoricalDtype from pandas.compat import product from pandas.util.testing import assert_series_equal @@ -123,35 +124,25 @@ def test_rank_categorical(self): exp_desc = Series([6., 5., 4., 3., 2., 1.]) ordered = Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=True - ) + ).astype(CategoricalDtype(categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=True)) assert_series_equal(ordered.rank(), exp) assert_series_equal(ordered.rank(ascending=False), exp_desc) # Unordered categoricals should be ranked as objects - unordered = Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=False - ) + unordered = Series(['first', 'second', 'third', 'fourth', + 'fifth', 'sixth']).astype( + CategoricalDtype(categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=False)) exp_unordered = Series([2., 4., 6., 3., 1., 5.]) res = unordered.rank() assert_series_equal(res, exp_unordered) unordered1 = Series( [1, 2, 3, 4, 5, 6], - ).astype( - 'category', - categories=[1, 2, 3, 4, 5, 6], - ordered=False - ) + ).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False)) exp_unordered1 = Series([1., 2., 3., 4., 5., 6.]) res1 = unordered1.rank() assert_series_equal(res1, exp_unordered1) @@ -159,14 +150,8 @@ def test_rank_categorical(self): # Test na_option for rank data na_ser = Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] - ).astype( - 'category', - categories=[ - 'first', 'second', 'third', 'fourth', - 'fifth', 'sixth', 'seventh' - ], - ordered=True - ) + ).astype(CategoricalDtype(['first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh'], True)) exp_top = Series([2., 3., 4., 5., 6., 7., 1.]) exp_bot = Series([1., 2., 3., 4., 5., 6., 7.]) @@ -195,13 +180,8 @@ def test_rank_categorical(self): ) # Test with pct=True - na_ser = Series( - ['first', 'second', 'third', 'fourth', np.NaN], - ).astype( - 'category', - categories=['first', 'second', 'third', 'fourth'], - ordered=True - ) + na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype( + CategoricalDtype(['first', 'second', 'third', 'fourth'], True)) exp_top = Series([0.4, 0.6, 0.8, 1., 0.2]) exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.]) exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN]) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 9e3bd40dc275a7..64c89dbdd0aa48 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -84,17 +84,17 @@ def test_getitem_category_type(self): # get slice result = s.iloc[0:2] - expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) + expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get list of indexes result = s.iloc[[0, 1]] - expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) + expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get boolean array result = s.iloc[[True, False, False]] - expected = pd.Series([1]).astype('category', categories=[1, 2, 3]) + expected = pd.Series([1]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) def test_setitem(self): @@ -2076,12 +2076,12 @@ def test_creation_astype(self): l = ["a", "b", "c", "a"] s = pd.Series(l) exp = pd.Series(Categorical(l, ordered=True)) - res = s.astype('category', ordered=True) + res = s.astype(CategoricalDtype(None, ordered=True)) tm.assert_series_equal(res, exp) exp = pd.Series(Categorical( l, categories=list('abcdef'), ordered=True)) - res = s.astype('category', categories=list('abcdef'), ordered=True) + res = s.astype(CategoricalDtype(list('abcdef'), ordered=True)) tm.assert_series_equal(res, exp) def test_construction_series(self): @@ -4262,11 +4262,11 @@ def test_concat_preserve(self): b = Series(list('aabbca')) df2 = DataFrame({'A': a, - 'B': b.astype('category', categories=list('cab'))}) + 'B': b.astype(CategoricalDtype(list('cab')))}) res = pd.concat([df2, df2]) - exp = DataFrame({'A': pd.concat([a, a]), - 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab'))}) + exp = DataFrame( + {'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))}) tm.assert_frame_equal(res, exp) def test_categorical_index_preserver(self): @@ -4275,13 +4275,13 @@ def test_categorical_index_preserver(self): b = Series(list('aabbca')) df2 = DataFrame({'A': a, - 'B': b.astype('category', categories=list('cab')) + 'B': b.astype(CategoricalDtype(list('cab'))) }).set_index('B') result = pd.concat([df2, df2]) - expected = DataFrame({'A': pd.concat([a, a]), - 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab')) - }).set_index('B') + expected = DataFrame( + {'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab'))) + }).set_index('B') tm.assert_frame_equal(result, expected) # wrong catgories @@ -4324,7 +4324,7 @@ def test_merge(self): cright = right.copy() cright['d'] = cright['d'].astype('category') result = pd.merge(left, cright, how='left', left_on='b', right_on='c') - expected['d'] = expected['d'].astype('category', categories=['null']) + expected['d'] = expected['d'].astype(CategoricalDtype(['null'])) tm.assert_frame_equal(result, expected) # cat-object From f7974084de2359f4e626169cbbfc46ab396eb3bb Mon Sep 17 00:00:00 2001 From: reidy-p Date: Tue, 3 Oct 2017 02:21:40 +0100 Subject: [PATCH 06/76] DOC: Changing forking instructions to https (#16419) (#17751) --- doc/source/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index e172d0d2a71a29..d8d57a8bfffdd3 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -101,7 +101,7 @@ You will need your own fork to work on the code. Go to the `pandas project page `_ and hit the ``Fork`` button. You will want to clone your fork to your machine:: - git clone git@github.com:your-user-name/pandas.git pandas-yourname + git clone https://github.com/your-user-name/pandas.git pandas-yourname cd pandas-yourname git remote add upstream git://github.com/pandas-dev/pandas.git From 2e2093e1985b6ae36e2b00edf1bb14197134bc62 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 3 Oct 2017 04:07:06 -0400 Subject: [PATCH 07/76] BUG: GH17525 Function _get_standard_colors resets global random seed (#17730) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/plotting/_style.py | 10 ++++++---- pandas/tests/plotting/test_misc.py | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 74620accd68758..f4ec8a5f2ad24e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -728,6 +728,7 @@ Plotting - Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`, :issue:`11471`) - With matplotlib 2.0.0 and above, calculation of x limits for line plots is left to matplotlib, so that its new default settings are applied. (:issue:`15495`) - Bug in ``Series.plot.bar`` or ``DataFramee.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`) +- Bug causing ``plotting.parallel_coordinates`` to reset the random seed when using random colors (:issue:`17525`) Groupby/Resample/Rolling diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index f1d53da5f1396d..4c31ff0177488a 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -9,7 +9,7 @@ import numpy as np from pandas.core.dtypes.common import is_list_like -from pandas.compat import range, lrange, lmap +from pandas.compat import lrange, lmap import pandas.compat as compat from pandas.plotting._compat import _mpl_ge_2_0_0 @@ -44,11 +44,13 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', if isinstance(colors, compat.string_types): colors = list(colors) elif color_type == 'random': - import random + from pandas.core.common import _random_state def random_color(column): - random.seed(column) - return [random.random() for _ in range(3)] + """ Returns a random color represented as a list of length 3""" + # GH17525 use common._random_state to avoid resetting the seed + rs = _random_state(column) + return rs.rand(3).tolist() colors = lmap(random_color, lrange(num_colors)) else: diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index c4795ea1e1eca6..957369a20f16e9 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -284,3 +284,20 @@ def test_subplot_titles(self): title=title[:-1]) title_list = [ax.get_title() for sublist in plot for ax in sublist] assert title_list == title[:3] + [''] + + def test_get_standard_colors_random_seed(self): + # GH17525 + df = DataFrame(np.zeros((10, 10))) + + # Make sure that the random seed isn't reset by _get_standard_colors + plotting.parallel_coordinates(df, 0) + rand1 = random.random() + plotting.parallel_coordinates(df, 0) + rand2 = random.random() + assert rand1 != rand2 + + # Make sure it produces the same colors every time it's called + from pandas.plotting._style import _get_standard_colors + color1 = _get_standard_colors(1, color_type='random') + color2 = _get_standard_colors(1, color_type='random') + assert color1 == color2 From 6d30d5f425ddfaf143b8bd878f81395852b50cd9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 3 Oct 2017 06:48:57 -0400 Subject: [PATCH 08/76] CI: pin pytables to valid build (#17760) xref #17757 --- ci/requirements-3.6.run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 822144a80bc9a0..721d0c1ad81018 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -7,7 +7,7 @@ xlsxwriter xlrd xlwt numexpr -pytables +pytables=3.4.2=np113py36_1 matplotlib lxml html5lib From 170411ff666153fa1275c8cdba657729441d2b12 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 3 Oct 2017 06:54:31 -0400 Subject: [PATCH 09/76] TST: remove bunch of warnings for .astype(.....), xref #17636 (#17759) --- pandas/tests/indexing/test_categorical.py | 32 ++++++++++------------- pandas/tests/reshape/test_merge.py | 11 ++++---- pandas/tests/reshape/test_pivot.py | 9 ++++--- pandas/tests/reshape/test_tile.py | 29 ++++++++++---------- 4 files changed, 39 insertions(+), 42 deletions(-) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 6874fedaa705f5..ab6e76c2211025 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -8,6 +8,7 @@ Categorical, CategoricalIndex) from pandas.util.testing import assert_series_equal, assert_frame_equal from pandas.util import testing as tm +from pandas.api.types import CategoricalDtype as CDT class TestCategoricalIndex(object): @@ -16,27 +17,24 @@ def setup_method(self, method): self.df = DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cab'))}).set_index('B') + CDT(list('cab')))}).set_index('B') self.df2 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')).astype( - 'category', categories=list( - 'cabe'))}).set_index('B') + CDT(list('cabe')))}).set_index('B') self.df3 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=True))}).set_index('B') + .astype(CDT([3, 2, 1], ordered=True))) + }).set_index('B') self.df4 = DataFrame({'A': np.arange(6, dtype='int64'), 'B': (Series([1, 1, 2, 1, 3, 2]) - .astype('category', categories=[3, 2, 1], - ordered=False))}).set_index('B') + .astype(CDT([3, 2, 1], ordered=False))) + }).set_index('B') def test_loc_scalar(self): result = self.df.loc['a'] expected = (DataFrame({'A': [0, 1, 5], 'B': (Series(list('aaa')) - .astype('category', - categories=list('cab')))}) + .astype(CDT(list('cab'))))}) .set_index('B')) assert_frame_equal(result, expected) @@ -44,8 +42,7 @@ def test_loc_scalar(self): df.loc['a'] = 20 expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20], 'B': (Series(list('aabbca')) - .astype('category', - categories=list('cab')))}) + .astype(CDT(list('cab'))))}) .set_index('B')) assert_frame_equal(df, expected) @@ -319,13 +316,13 @@ def test_reindexing(self): result = self.df2.reindex(Categorical(['a', 'd'], categories=cats)) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( - 'category', categories=cats)}).set_index('B') + CDT(cats))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(Categorical(['a'], categories=cats)) expected = DataFrame({'A': [0, 1, 5], 'B': Series(list('aaa')).astype( - 'category', categories=cats)}).set_index('B') + CDT(cats))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['a', 'b', 'e']) @@ -348,16 +345,15 @@ def test_reindexing(self): ['a', 'd'], categories=cats, ordered=True)) expected = DataFrame( {'A': [0, 1, 5, np.nan], - 'B': Series(list('aaad')).astype('category', categories=cats, - ordered=True)}).set_index('B') + 'B': Series(list('aaad')).astype( + CDT(cats, ordered=True))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(Categorical( ['a', 'd'], categories=['a', 'd'])) expected = DataFrame({'A': [0, 1, 5, np.nan], 'B': Series(list('aaad')).astype( - 'category', categories=['a', 'd' - ])}).set_index('B') + CDT(['a', 'd']))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index df75983a29d80f..ed99814afd20a9 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -16,6 +16,7 @@ from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm +from pandas.api.types import CategoricalDtype as CDT N = 50 @@ -1414,7 +1415,7 @@ def left(): return DataFrame( {'X': Series(np.random.choice( ['foo', 'bar'], - size=(10,))).astype('category', categories=['foo', 'bar']), + size=(10,))).astype(CDT(['foo', 'bar'])), 'Y': np.random.choice(['one', 'two', 'three'], size=(10,))}) @@ -1422,8 +1423,7 @@ def left(): def right(): np.random.seed(1234) return DataFrame( - {'X': Series(['foo', 'bar']).astype('category', - categories=['foo', 'bar']), + {'X': Series(['foo', 'bar']).astype(CDT(['foo', 'bar'])), 'Z': [1, 2]}) @@ -1468,9 +1468,8 @@ def test_other_columns(self, left, right): @pytest.mark.parametrize( 'change', [lambda x: x, - lambda x: x.astype('category', - categories=['foo', 'bar', 'bah']), - lambda x: x.astype('category', ordered=True)]) + lambda x: x.astype(CDT(['foo', 'bar', 'bah'])), + lambda x: x.astype(CDT(ordered=True))]) @pytest.mark.parametrize('how', ['inner', 'outer', 'left', 'right']) def test_dtype_on_merged_different(self, change, how, left, right): # our merging columns, X now has 2 different dtypes diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index bd8a999ce23304..07d3052c167564 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1,3 +1,4 @@ + from datetime import datetime, date, timedelta import pytest @@ -13,6 +14,7 @@ from pandas.compat import range, product import pandas.util.testing as tm from pandas.tseries.util import pivot_annual, isleapyear +from pandas.api.types import CategoricalDtype as CDT class TestPivotTable(object): @@ -98,13 +100,12 @@ def test_pivot_table_dropna_categoricals(self): 'B': [1, 2, 3, 1, 2, 3, 1, 2, 3], 'C': range(0, 9)}) - df['A'] = df['A'].astype('category', ordered=False, - categories=categories) + df['A'] = df['A'].astype(CDT(categories, ordered=False)) result_true = df.pivot_table(index='B', columns='A', values='C', dropna=True) expected_columns = Series(['a', 'b', 'c'], name='A') - expected_columns = expected_columns.astype('category', ordered=False, - categories=categories) + expected_columns = expected_columns.astype( + CDT(categories, ordered=False)) expected_index = Series([1, 2, 3], name='B') expected_true = DataFrame([[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 91000747b41bb6..4edce8af92f846 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -9,6 +9,7 @@ Interval, IntervalIndex, Categorical, cut, qcut, date_range) import pandas.util.testing as tm +from pandas.api.types import CategoricalDtype as CDT from pandas.core.algorithms import quantile import pandas.core.reshape.tile as tmod @@ -299,7 +300,7 @@ def test_cut_return_intervals(self): exp_bins = np.linspace(0, 8, num=4).round(3) exp_bins[0] -= 0.008 exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take( - [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype('category', ordered=True) + [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True)) tm.assert_series_equal(res, exp) def test_qcut_return_intervals(self): @@ -308,7 +309,7 @@ def test_qcut_return_intervals(self): exp_levels = np.array([Interval(-0.001, 2.664), Interval(2.664, 5.328), Interval(5.328, 8)]) exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype( - 'category', ordered=True) + CDT(ordered=True)) tm.assert_series_equal(res, exp) def test_series_retbins(self): @@ -316,14 +317,14 @@ def test_series_retbins(self): s = Series(np.arange(4)) result, bins = cut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( - [-0.003, 1.5, 3], closed='right').repeat(2)).astype('category', - ordered=True) + [-0.003, 1.5, 3], closed='right').repeat(2)).astype( + CDT(ordered=True)) tm.assert_series_equal(result, expected) result, bins = qcut(s, 2, retbins=True) expected = Series(IntervalIndex.from_breaks( - [-0.001, 1.5, 3], closed='right').repeat(2)).astype('category', - ordered=True) + [-0.001, 1.5, 3], closed='right').repeat(2)).astype( + CDT(ordered=True)) tm.assert_series_equal(result, expected) def test_qcut_duplicates_bin(self): @@ -351,7 +352,7 @@ def test_single_quantile(self): result = qcut(s, 1) intervals = IntervalIndex([Interval(8.999, 9.0), Interval(8.999, 9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9., -9.]) @@ -361,7 +362,7 @@ def test_single_quantile(self): result = qcut(s, 1) intervals = IntervalIndex([Interval(-9.001, -9.0), Interval(-9.001, -9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0., 0.]) @@ -371,7 +372,7 @@ def test_single_quantile(self): result = qcut(s, 1) intervals = IntervalIndex([Interval(-0.001, 0.0), Interval(-0.001, 0.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([9]) @@ -380,7 +381,7 @@ def test_single_quantile(self): tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9]) @@ -389,7 +390,7 @@ def test_single_quantile(self): tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0]) @@ -398,7 +399,7 @@ def test_single_quantile(self): tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right') - expected = Series(intervals).astype('category', ordered=True) + expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) def test_single_bin(self): @@ -450,7 +451,7 @@ def test_datetime_cut(self): Timestamp('2013-01-02 08:00:00')), Interval(Timestamp('2013-01-02 08:00:00'), Timestamp('2013-01-03 00:00:00'))])) - .astype('category', ordered=True)) + .astype(CDT(ordered=True))) tm.assert_series_equal(result, expected) @@ -479,7 +480,7 @@ def test_datetime_bin(self): Series(IntervalIndex.from_intervals([ Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])) - .astype('category', ordered=True)) + .astype(CDT(ordered=True))) for conv in [Timestamp, Timestamp, np.datetime64]: bins = [conv(v) for v in bin_data] From 8e89cb3e135f6ef746437211857776136747388f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 3 Oct 2017 06:55:50 -0400 Subject: [PATCH 10/76] API: warning to raise KeyError in the future if not all elements of a list are selected via .loc (#17295) closes #15747 --- doc/source/advanced.rst | 2 +- doc/source/indexing.rst | 112 +++++++++++++++++++++- doc/source/whatsnew/v0.15.0.txt | 24 ++++- doc/source/whatsnew/v0.21.0.txt | 59 ++++++++++++ pandas/core/indexing.py | 32 +++++-- pandas/core/series.py | 2 +- pandas/io/formats/excel.py | 16 +++- pandas/tests/indexing/test_categorical.py | 3 +- pandas/tests/indexing/test_datetime.py | 8 +- pandas/tests/indexing/test_iloc.py | 3 +- pandas/tests/indexing/test_indexing.py | 18 ++-- pandas/tests/indexing/test_loc.py | 43 ++++++++- pandas/tests/indexing/test_partial.py | 106 ++++++++++++++------ pandas/tests/io/test_excel.py | 6 +- pandas/tests/reshape/test_concat.py | 2 +- pandas/tests/series/test_indexing.py | 9 +- pandas/tests/sparse/test_indexing.py | 8 +- 17 files changed, 386 insertions(+), 67 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index cfdb53ec7e4b1a..44358593793bca 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -1009,7 +1009,7 @@ The different indexing operation can potentially change the dtype of a ``Series` series1 = pd.Series([1, 2, 3]) series1.dtype - res = series1[[0,4]] + res = series1.reindex([0, 4]) res.dtype res diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index edbc4e6d7fd225..415f3fd702c43f 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -333,8 +333,15 @@ Selection By Label dfl.loc['20130102':'20130104'] +.. warning:: + + Starting in 0.21.0, pandas will show a ``FutureWarning`` if indexing with a list with missing labels. In the future + this will raise a ``KeyError``. See :ref:`list-like Using loc with missing keys in a list is Deprecated ` + pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. -**At least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. Integers are valid labels, but they refer to the label **and not the position**. +All of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! +When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. +Integers are valid labels, but they refer to the label **and not the position**. The ``.loc`` attribute is the primary access method. The following are valid inputs: @@ -635,6 +642,107 @@ For getting *multiple* indexers, using ``.get_indexer`` dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])] +.. _indexing.deprecate_loc_reindex_listlike: + +Indexing with list with missing labels is Deprecated +---------------------------------------------------- + +.. warning:: + + Starting in 0.21.0, using ``.loc`` or ``[]`` with a list with one or more missing labels, is deprecated, in favor of ``.reindex``. + +In prior versions, using ``.loc[list-of-labels]`` would work as long as *at least 1* of the keys was found (otherwise it +would raise a ``KeyError``). This behavior is deprecated and will show a warning message pointing to this section. The +recommeded alternative is to use ``.reindex()``. + +For example. + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + +Previous Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc with any non-matching elements will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Reindexing +~~~~~~~~~~ + +The idiomatic way to achieve selecting potentially not-found elmenents is via ``.reindex()``. See also the section on :ref:`reindexing `. + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Alternatively, if you want to select only *valid* keys, the following is idiomatic and efficient; it is guaranteed to preserve the dtype of the selection. + +.. ipython:: python + + labels = [1, 2, 3] + s.loc[s.index.intersection(labels)] + +Having a duplicated index will raise for a ``.reindex()``: + +.. ipython:: python + + s = pd.Series(np.arange(4), index=['a', 'a', 'b', 'c']) + labels = ['c', 'd'] + +.. code-block:: ipython + + In [17]: s.reindex(labels) + ValueError: cannot reindex from a duplicate axis + +Generally, you can interesect the desired labels with the current +axis, and then reindex. + +.. ipython:: python + + s.loc[s.index.intersection(labels)].reindex(labels) + +However, this would *still* raise if your resulting index is duplicated. + +.. code-block:: ipython + + In [41]: labels = ['a', 'd'] + + In [42]: s.loc[s.index.intersection(labels)].reindex(labels) + ValueError: cannot reindex from a duplicate axis + + .. _indexing.basics.partial_setting: Selecting Random Samples @@ -852,7 +960,7 @@ when you don't know which of the sought labels are in fact present: s[s.index.isin([2, 4, 6])] # compare it to the following - s[[2, 4, 6]] + s.reindex([2, 4, 6]) In addition to that, ``MultiIndex`` allows selecting a separate level to use in the membership check: diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.txt index 6282f15b6faebc..e44bc6e9e91e04 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.txt @@ -676,10 +676,19 @@ Other notable API changes: Both will now return a frame reindex by [1,3]. E.g. - .. ipython:: python + .. code-block:: ipython - df.loc[[1,3]] - df.loc[[1,3],:] + In [3]: df.loc[[1,3]] + Out[3]: + 0 + 1 a + 3 NaN + + In [4]: df.loc[[1,3],:] + Out[4]: + 0 + 1 a + 3 NaN This can also be seen in multi-axis indexing with a ``Panel``. @@ -693,9 +702,14 @@ Other notable API changes: The following would raise ``KeyError`` prior to 0.15.0: - .. ipython:: python + .. code-block:: ipython - p.loc[['ItemA','ItemD'],:,'D'] + In [5]: + Out[5]: + ItemA ItemD + 1 3 NaN + 2 7 NaN + 3 11 NaN Furthermore, ``.loc`` will raise If no values are found in a multi-index with a list-like indexer: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f4ec8a5f2ad24e..0d4eaa90d7ab36 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -300,6 +300,64 @@ If installed, we now require: | Bottleneck | 1.0.0 | | +--------------+-----------------+----------+ +.. _whatsnew_0210.api_breaking.loc: + +Indexing with a list with missing labels is Deprecated +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, selecting with a list of labels, where one or more labels were missing would always succeed, returning ``NaN`` for missing labels. +This will now show a ``FutureWarning``, in the future this will raise a ``KeyError`` (:issue:`15747`). +This warning will trigger on a ``DataFrame`` or a ``Series`` for using ``.loc[]`` or ``[[]]`` when passing a list-of-labels with at least 1 missing label. +See the :ref:`deprecation docs `. + + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Previous Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current Behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc or [] with any missing label will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + +The idiomatic way to achieve selecting potentially not-found elmenents is via ``.reindex()`` + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + + .. _whatsnew_0210.api_breaking.pandas_eval: Improved error handling during item assignment in pd.eval @@ -607,6 +665,7 @@ Deprecations - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) - passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) +- Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) .. _whatsnew_0210.deprecations.argmin_min: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2ea1b8a2389134..e977e84702982b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1419,13 +1419,33 @@ def _has_valid_type(self, key, axis): if isinstance(key, tuple) and isinstance(ax, MultiIndex): return True - # TODO: don't check the entire key unless necessary - if (not is_iterator(key) and len(key) and - np.all(ax.get_indexer_for(key) < 0)): + if not is_iterator(key) and len(key): - raise KeyError(u"None of [{key}] are in the [{axis}]" - .format(key=key, - axis=self.obj._get_axis_name(axis))) + # True indicates missing values + missing = ax.get_indexer_for(key) < 0 + + if np.any(missing): + if len(key) == 1 or np.all(missing): + raise KeyError( + u"None of [{key}] are in the [{axis}]".format( + key=key, axis=self.obj._get_axis_name(axis))) + else: + + # we skip the warning on Categorical/Interval + # as this check is actually done (check for + # non-missing values), but a bit later in the + # code, so we want to avoid warning & then + # just raising + _missing_key_warning = textwrap.dedent(""" + Passing list-likes to .loc or [] with any missing label will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike""") # noqa + + if not (ax.is_categorical() or ax.is_interval()): + warnings.warn(_missing_key_warning, + FutureWarning, stacklevel=5) return True diff --git a/pandas/core/series.py b/pandas/core/series.py index 97f39a680c8c97..58cac46f63d7ed 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -691,7 +691,7 @@ def _get_with(self, key): if key_type == 'integer': if self.index.is_integer() or self.index.is_floating(): - return self.reindex(key) + return self.loc[key] else: return self._get_values(key) elif key_type == 'boolean': diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 51668bb6b08953..9e888c38edaa7d 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -356,7 +356,21 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, self.styler = None self.df = df if cols is not None: - self.df = df.loc[:, cols] + + # all missing, raise + if not len(Index(cols) & df.columns): + raise KeyError( + "passes columns are not ALL present dataframe") + + # deprecatedin gh-17295 + # 1 missing is ok (for now) + if len(Index(cols) & df.columns) != len(cols): + warnings.warn( + "Not all names specified in 'columns' are found; " + "this will raise a KeyError in the future", + FutureWarning) + + self.df = df.reindex(columns=cols) self.columns = self.df.columns self.float_format = float_format self.index = index diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index ab6e76c2211025..2c93d2afd17605 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -108,7 +108,8 @@ def test_loc_listlike(self): assert_frame_equal(result, expected, check_index_type=True) # not all labels in the categories - pytest.raises(KeyError, lambda: self.df2.loc[['a', 'd']]) + with pytest.raises(KeyError): + self.df2.loc[['a', 'd']] def test_loc_listlike_dtypes(self): # GH 11586 diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index ddac80fbc46934..617757c888eb54 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -223,7 +223,9 @@ def test_series_partial_set_datetime(self): Timestamp('2011-01-03')] exp = Series([np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name='idx'), name='s') - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) def test_series_partial_set_period(self): # GH 11497 @@ -248,5 +250,7 @@ def test_series_partial_set_period(self): pd.Period('2011-01-03', freq='D')] exp = Series([np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name='idx'), name='s') - result = ser.loc[keys] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = ser.loc[keys] tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 39569f0b0cb383..c8e320f9d9c779 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -617,7 +617,8 @@ def test_iloc_non_unique_indexing(self): expected = DataFrame(new_list) expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) ]) - result = df2.loc[idx] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df2.loc[idx] tm.assert_frame_equal(result, expected, check_index_type=False) def test_iloc_empty_list_indexer_is_ok(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f1f51f26df55ca..d64ed98243d727 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -176,7 +176,8 @@ def test_dups_fancy_indexing(self): 'test1': [7., 6, np.nan], 'other': ['d', 'c', np.nan]}, index=rows) - result = df.loc[rows] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[rows] tm.assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer @@ -186,7 +187,8 @@ def test_dups_fancy_indexing(self): 'other': [np.nan, np.nan, np.nan, 'd', 'c', np.nan]}, index=rows) - result = df.loc[rows] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[rows] tm.assert_frame_equal(result, expected) # inconsistent returns for unique/duplicate indices when values are @@ -203,12 +205,14 @@ def test_dups_fancy_indexing(self): # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - result = df.loc[[0, 8, 0]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) - result = df.loc[[0, 8, 0]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) @@ -216,7 +220,8 @@ def test_dups_fancy_indexing(self): df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame( {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) - result = df.loc[['A', 'A', 'E']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) # GH 5835 @@ -227,7 +232,8 @@ def test_dups_fancy_indexing(self): expected = pd.concat( [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], index=df.index)], axis=1) - result = df.loc[:, ['A', 'B', 'C']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df.loc[:, ['A', 'B', 'C']] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 95d6a24e68425c..c6f38aeba9e87c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -152,15 +152,29 @@ def test_loc_getitem_label_list(self): [Timestamp('20130102'), Timestamp('20130103')], typs=['ts'], axes=0) + def test_loc_getitem_label_list_with_missing(self): self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], typs=['empty'], fails=KeyError) - self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], - typs=['ints', 'uints'], axes=0, fails=KeyError) - self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], - typs=['ints', 'uints'], axes=1, fails=KeyError) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], + typs=['ints', 'uints'], axes=0, fails=KeyError) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], + typs=['ints', 'uints'], axes=1, fails=KeyError) self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], typs=['ints', 'uints'], axes=2, fails=KeyError) + def test_getitem_label_list_with_missing(self): + s = pd.Series(range(3), index=['a', 'b', 'c']) + + # consistency + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s[['a', 'd']] + + s = pd.Series(range(3)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s[[0, 3]] + def test_loc_getitem_label_list_fails(self): # fails self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], @@ -249,7 +263,9 @@ def test_loc_to_fail(self): pytest.raises(KeyError, lambda: s.loc[['4']]) s.loc[-1] = 3 - result = s.loc[[-1, -2]] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.loc[[-1, -2]] expected = Series([3, np.nan], index=[-1, -2]) tm.assert_series_equal(result, expected) @@ -277,6 +293,23 @@ def f(): pytest.raises(KeyError, f) + def test_loc_getitem_list_with_fail(self): + # 15747 + # should KeyError if *any* missing labels + + s = Series([1, 2, 3]) + + s.loc[[2]] + + with pytest.raises(KeyError): + s.loc[[3]] + + # a non-match and a match + with tm.assert_produces_warning(FutureWarning): + expected = s.loc[[2, 3]] + result = s.reindex([2, 3]) + tm.assert_series_equal(result, expected) + def test_loc_getitem_label_slice(self): # label slices (with ints) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 93a85e247a7870..41ddfe934a131f 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -222,13 +222,21 @@ def test_series_partial_set(self): # Regression from GH4825 ser = Series([0.1, 0.2], index=[1, 2]) - # loc + # loc equiv to .reindex expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) - result = ser.loc[[3, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([3, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, 'x']) - result = ser.loc[[3, 2, 3, 'x']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3, 'x']] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([3, 2, 3, 'x']) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) @@ -236,38 +244,71 @@ def test_series_partial_set(self): tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, 'x', 1]) - result = ser.loc[[2, 2, 'x', 1]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 'x', 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = ser.reindex([2, 2, 'x', 1]) tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index pytest.raises(KeyError, lambda: ser.loc[[3, 3, 3]]) expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) - result = ser.loc[[2, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 3]] tm.assert_series_equal(result, expected, check_index_type=True) + result = ser.reindex([2, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[3, 4, 4]] tm.assert_series_equal(result, expected, check_index_type=True) + result = s.reindex([3, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 3, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[5, 3, 3]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([5, 3, 3]) tm.assert_series_equal(result, expected, check_index_type=True) + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[5, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[5, 4, 4]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([5, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) + s = Series([0.1, 0.2, 0.3, 0.4], + index=[4, 5, 6, 7]) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[4, 5, 6, 7]).loc[[7, 2, 2]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[7, 2, 2]] tm.assert_series_equal(result, expected, check_index_type=True) + result = s.reindex([7, 2, 2]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - result = Series([0.1, 0.2, 0.3, 0.4], - index=[1, 2, 3, 4]).loc[[4, 5, 5]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = s.loc[[4, 5, 5]] + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.reindex([4, 5, 5]) tm.assert_series_equal(result, expected, check_index_type=True) # iloc @@ -284,13 +325,15 @@ def test_series_partial_set_with_name(self): # loc exp_idx = Index([3, 2, 3], dtype='int64', name='idx') expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[3, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name='s') - result = ser.loc[[3, 2, 3, 'x']] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[3, 2, 3, 'x']] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([2, 2, 1], dtype='int64', name='idx') @@ -300,7 +343,8 @@ def test_series_partial_set_with_name(self): exp_idx = Index([2, 2, 'x', 1], dtype='object', name='idx') expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name='s') - result = ser.loc[[2, 2, 'x', 1]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 'x', 1]] tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index @@ -308,41 +352,49 @@ def test_series_partial_set_with_name(self): exp_idx = Index([2, 2, 3], dtype='int64', name='idx') expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') - result = ser.loc[[2, 2, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = ser.loc[[2, 2, 3]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([3, 4, 4], dtype='int64', name='idx') expected = Series([0.3, np.nan, np.nan], index=exp_idx, name='s') idx = Index([1, 2, 3], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3], index=idx, name='s').loc[[3, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3], + index=idx, + name='s').loc[[3, 4, 4]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([5, 3, 3], dtype='int64', name='idx') expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 3, 3]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 3, 3]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([5, 4, 4], dtype='int64', name='idx') expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[5, 4, 4]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 4, 4]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([7, 2, 2], dtype='int64', name='idx') expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') idx = Index([4, 5, 6, 7], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[7, 2, 2]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[7, 2, 2]] tm.assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([4, 5, 5], dtype='int64', name='idx') expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, - name='s').loc[[4, 5, 5]] + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[4, 5, 5]] tm.assert_series_equal(result, expected, check_index_type=True) # iloc diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 6a399f41975e5b..4e25fe03717182 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1808,8 +1808,10 @@ def test_invalid_columns(self): write_frame = DataFrame({'A': [1, 1, 1], 'B': [2, 2, 2]}) - write_frame.to_excel(path, 'test1', columns=['B', 'C']) - expected = write_frame.loc[:, ['B', 'C']] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + write_frame.to_excel(path, 'test1', columns=['B', 'C']) + expected = write_frame.reindex(columns=['B', 'C']) read_frame = read_excel(path, 'test1') tm.assert_frame_equal(expected, read_frame) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 6e646f9b294429..65d58a196d1eb1 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1222,7 +1222,7 @@ def test_handle_empty_objects(self): frames = [baz, empty, empty, df[5:]] concatted = concat(frames, axis=0) - expected = df.loc[:, ['a', 'b', 'c', 'd', 'foo']] + expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo']) expected['foo'] = expected['foo'].astype('O') expected.loc[0:4, 'foo'] = 'bar' diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 272e8c7de5e498..86211612a59559 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -590,8 +590,13 @@ def test_getitem_dups_with_missing(self): # breaks reindex, so need to use .loc internally # GH 4246 s = Series([1, 2, 3, 4], ['foo', 'bar', 'foo', 'bah']) - expected = s.loc[['foo', 'bar', 'bah', 'bam']] - result = s[['foo', 'bar', 'bah', 'bam']] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + expected = s.loc[['foo', 'bar', 'bah', 'bam']] + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s[['foo', 'bar', 'bah', 'bam']] assert_series_equal(result, expected) def test_getitem_dups(self): diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index 382cff4b9d0acb..edbac8f09241b1 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -121,8 +121,8 @@ def test_loc(self): tm.assert_sp_series_equal(result, exp) # exceeds the bounds - result = sparse.loc[[1, 3, 4, 5]] - exp = orig.loc[[1, 3, 4, 5]].to_sparse() + result = sparse.reindex([1, 3, 4, 5]) + exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_series_equal(result, exp) # padded with NaN assert np.isnan(result[-1]) @@ -677,8 +677,8 @@ def test_loc(self): tm.assert_sp_frame_equal(result, exp) # exceeds the bounds - result = sparse.loc[[1, 3, 4, 5]] - exp = orig.loc[[1, 3, 4, 5]].to_sparse() + result = sparse.reindex([1, 3, 4, 5]) + exp = orig.reindex([1, 3, 4, 5]).to_sparse() tm.assert_sp_frame_equal(result, exp) # dense array From 9e67f4370ebf4d63ae65878f5dde6e8371538134 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Oct 2017 04:25:45 -0700 Subject: [PATCH 11/76] Remove unused imports (#17745) --- pandas/_libs/hashtable.pyx | 6 ------ pandas/_libs/lib.pyx | 6 +----- pandas/_libs/period.pyx | 12 +++--------- pandas/_libs/tslib.pyx | 8 -------- 4 files changed, 4 insertions(+), 28 deletions(-) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 9aeb700dd5923a..b6b81055f89b28 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -50,12 +50,6 @@ cnp.import_ufunc() cdef int64_t iNaT = util.get_nat() _SIZE_HINT_LIMIT = (1 << 20) + 7 -cdef extern from "datetime.h": - bint PyDateTime_Check(object o) - void PyDateTime_IMPORT() - -PyDateTime_IMPORT - cdef size_t _INIT_VEC_CAP = 128 diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 503badd0ca8bc3..e7e92b7ae987a2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -48,7 +48,6 @@ cdef double NAN = nan # this is our tseries.pxd from datetime cimport ( get_timedelta64_value, get_datetime64_value, - npy_timedelta, npy_datetime, PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, PyDateTime_IMPORT) @@ -62,11 +61,8 @@ from interval import Interval cdef int64_t NPY_NAT = util.get_nat() -ctypedef unsigned char UChar - cimport util -from util cimport (is_array, _checknull, _checknan, INT64_MAX, - INT64_MIN, UINT8_MAX) +from util cimport is_array, _checknull, _checknan cdef extern from "math.h": double sqrt(double x) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 725da22104efcc..7760df51441176 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -7,8 +7,7 @@ from cpython cimport ( PyObject_RichCompareBool, Py_EQ, Py_NE) -from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, - NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) +from numpy cimport int64_t, import_array, ndarray import numpy as np import_array() @@ -23,12 +22,11 @@ from datetime cimport ( pandas_datetimestruct, pandas_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, - PANDAS_FR_ns, - INT32_MIN) + PANDAS_FR_ns) cimport util -from util cimport is_period_object, is_string_object +from util cimport is_period_object, is_string_object, INT32_MIN from lib cimport is_null_datetimelike from pandas._libs import tslib @@ -90,12 +88,8 @@ cdef extern from "period_helper.h": int microseconds, int picoseconds, int freq) nogil except INT32_MIN - int64_t get_python_ordinal(int64_t period_ordinal, - int freq) except INT32_MIN - int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil except INT32_MIN - double getAbsTime(int, int64_t, int64_t) int pyear(int64_t ordinal, int freq) except INT32_MIN int pqyear(int64_t ordinal, int freq) except INT32_MIN diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ff20ea287bd9d1..745632cf3d7198 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -21,7 +21,6 @@ from cpython cimport ( cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) -# this is our datetime.pxd from libc.stdlib cimport free from util cimport (is_integer_object, is_float_object, is_datetime64_object, @@ -65,11 +64,8 @@ from .tslibs.parsing import parse_datetime_string cimport cython -import time - from pandas.compat import iteritems, callable -import operator import collections import warnings @@ -933,10 +929,6 @@ cdef int64_t _NS_UPPER_BOUND = INT64_MAX # use the smallest value with a 0 nanosecond unit (0s in last 3 digits) cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 -cdef pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS -pandas_datetime_to_datetimestruct(_NS_LOWER_BOUND, PANDAS_FR_ns, &_NS_MIN_DTS) -pandas_datetime_to_datetimestruct(_NS_UPPER_BOUND, PANDAS_FR_ns, &_NS_MAX_DTS) - # Resolution is in nanoseconds Timestamp.min = Timestamp(_NS_LOWER_BOUND) Timestamp.max = Timestamp(_NS_UPPER_BOUND) From 2ff1241fa794231b8317ebe96b66f71dce99e0c2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 3 Oct 2017 09:01:05 -0400 Subject: [PATCH 12/76] TST: remove warnings, xref #15747 (#17761) --- pandas/tests/series/test_indexing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 86211612a59559..09ba0e197438dd 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -854,11 +854,15 @@ def test_basic_getitem_with_labels(self): s = Series(np.random.randn(10), index=lrange(0, 20, 2)) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) - result = s[inds] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s[inds] expected = s.reindex(inds) assert_series_equal(result, expected) - result = s[arr_inds] + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s[arr_inds] expected = s.reindex(arr_inds) assert_series_equal(result, expected) From 50c1dda3f1e0c0a4e439c73ac12943536cf58806 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 3 Oct 2017 11:14:44 -0700 Subject: [PATCH 13/76] MAINT: DataFramee --> DataFrame in whatsnew --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 0d4eaa90d7ab36..e47926d95d2fa2 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -786,7 +786,7 @@ Plotting - Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`) - Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`, :issue:`11471`) - With matplotlib 2.0.0 and above, calculation of x limits for line plots is left to matplotlib, so that its new default settings are applied. (:issue:`15495`) -- Bug in ``Series.plot.bar`` or ``DataFramee.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`) +- Bug in ``Series.plot.bar`` or ``DataFrame.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`) - Bug causing ``plotting.parallel_coordinates`` to reset the random seed when using random colors (:issue:`17525`) From 9ac7c51faf15bfef0756f9ab50cef3177d7fe5a8 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 3 Oct 2017 12:49:07 -0700 Subject: [PATCH 14/76] COMPAT: Suppress .take() warning for numpy < 1.12 (#17764) Follow-up to gh-17352. --- pandas/tests/sparse/test_series.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 8c0ed322028e8c..13dab68b2e5b4f 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -9,7 +9,8 @@ import numpy as np import pandas as pd -from pandas import Series, DataFrame, bdate_range, isna, compat +from pandas import (Series, DataFrame, bdate_range, + isna, compat, _np_version_under1p12) from pandas.tseries.offsets import BDay import pandas.util.testing as tm from pandas.compat import range @@ -527,8 +528,13 @@ def test_numpy_take(self): sp = SparseSeries([1.0, 2.0, 3.0]) indices = [1, 2] - tm.assert_series_equal(np.take(sp, indices, axis=0).to_dense(), - np.take(sp.to_dense(), indices, axis=0)) + # gh-17352: older versions of numpy don't properly + # pass in arguments to downstream .take() implementations. + warning = FutureWarning if _np_version_under1p12 else None + + with tm.assert_produces_warning(warning, check_stacklevel=False): + tm.assert_series_equal(np.take(sp, indices, axis=0).to_dense(), + np.take(sp.to_dense(), indices, axis=0)) msg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.take, From 69024a0110fdc5d8e8a015ea2c5316826e2f80be Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Oct 2017 12:57:05 -0700 Subject: [PATCH 15/76] Explicitly define cmp_pandas_datetimestruct (#17750) --- pandas/_libs/src/datetime/np_datetime.c | 4 ++-- pandas/_libs/src/datetime/np_datetime.h | 8 ++++++++ setup.py | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c index ffb901981f939f..f8254ed9d84180 100644 --- a/pandas/_libs/src/datetime/np_datetime.c +++ b/pandas/_libs/src/datetime/np_datetime.c @@ -269,8 +269,8 @@ static void set_datetimestruct_days(npy_int64 days, /* * Compares two pandas_datetimestruct objects chronologically */ -int cmp_pandas_datetimestruct(pandas_datetimestruct *a, - pandas_datetimestruct *b) { +int cmp_pandas_datetimestruct(const pandas_datetimestruct *a, + const pandas_datetimestruct *b) { if (a->year > b->year) { return 1; } else if (a->year < b->year) { diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h index a20bff60126aac..af3d2e0f01c1b5 100644 --- a/pandas/_libs/src/datetime/np_datetime.h +++ b/pandas/_libs/src/datetime/np_datetime.h @@ -99,6 +99,14 @@ convert_datetimestruct_to_datetime(pandas_datetime_metadata *meta, npy_int64 get_datetimestruct_days(const pandas_datetimestruct *dts); + +/* + * Compares two pandas_datetimestruct objects chronologically + */ +int cmp_pandas_datetimestruct(const pandas_datetimestruct *a, + const pandas_datetimestruct *b); + + /* * Adjusts a datetimestruct based on a minutes offset. Assumes * the current values are valid. diff --git a/setup.py b/setup.py index 793aa089e708fa..80be007ba2115c 100755 --- a/setup.py +++ b/setup.py @@ -511,7 +511,7 @@ def pxd(name): 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['join']}, '_libs.reshape': {'pyxfile': '_libs/reshape', - 'depends': _pxi_dep['reshape'], 'include': []}, + 'depends': _pxi_dep['reshape']}, '_libs.interval': {'pyxfile': '_libs/interval', 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']}, @@ -527,7 +527,7 @@ def pxd(name): 'pandas/_libs/src/parser/io.c']}, '_libs.sparse': {'pyxfile': '_libs/sparse', 'depends': (['pandas/_libs/sparse.pyx'] + - _pxi_dep['sparse']), 'include': []}, + _pxi_dep['sparse'])}, '_libs.testing': {'pyxfile': '_libs/testing', 'depends': ['pandas/_libs/testing.pyx']}, '_libs.hashing': {'pyxfile': '_libs/hashing', From 81694dce171ecd93a65e32ed455612ee967d3951 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 3 Oct 2017 13:42:52 -0700 Subject: [PATCH 16/76] BUG: Validate the justify parameter in to_html (#17766) BUG: Validate the justify parameter in to_html Closes gh-17527. --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/core/frame.py | 4 ++ pandas/io/formats/format.py | 10 ++++- pandas/tests/io/formats/test_to_html.py | 50 +++++++------------------ 4 files changed, 28 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e47926d95d2fa2..61c05d1b226e05 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -777,7 +777,8 @@ I/O - Bug in :func:`read_stata` where the index was not set (:issue:`16342`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) - Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`) -- Bug in ``DataFrame.to_html()`` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) +- Bug in :meth:`DataFrame.to_html` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) +- Bug in :meth:`DataFrame.to_html` in which there was no validation of the ``justify`` parameter (:issue:`17527`) - Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`) Plotting diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 01e83821d45248..778a3dc9046a31 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1685,6 +1685,10 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, .. versionadded:: 0.19.0 """ + if (justify is not None and + justify not in fmt._VALID_JUSTIFY_PARAMETERS): + raise ValueError("Invalid value for justify parameter") + formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns, col_space=col_space, na_rep=na_rep, formatters=formatters, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 386d9c3ffe30df..e8ea0714b1dda1 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -79,9 +79,15 @@ line_width : int, optional Width to wrap a line in characters, default no wrap""" +_VALID_JUSTIFY_PARAMETERS = ("left", "right", "center", "justify", + "justify-all", "start", "end", "inherit", + "match-parent", "initial", "unset") + justify_docstring = """ - justify : {'left', 'right'}, default None - Left or right-justify the column labels. If None uses the option from + justify : {'left', 'right', 'center', 'justify', + 'justify-all', 'start', 'end', 'inherit', + 'match-parent', 'initial', 'unset'}, default None + How to justify the column labels. If None uses the option from the print configuration (controlled by set_option), 'right' out of the box.""" diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 194b5ba3e02765..0c8ea98a44d50a 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1557,15 +1557,16 @@ def test_to_html_multiindex(self): assert result == expected - def test_to_html_justify(self): + @pytest.mark.parametrize("justify", fmt._VALID_JUSTIFY_PARAMETERS) + def test_to_html_justify(self, justify): df = DataFrame({'A': [6, 30000, 2], 'B': [1, 2, 70000], 'C': [223442, 0, 1]}, columns=['A', 'B', 'C']) - result = df.to_html(justify='left') + result = df.to_html(justify=justify) expected = ('\n' ' \n' - ' \n' + ' \n' ' \n' ' \n' ' \n' @@ -1592,41 +1593,18 @@ def test_to_html_justify(self): ' \n' ' \n' ' \n' - '
AB1
') + ''.format(justify=justify)) assert result == expected - result = df.to_html(justify='right') - expected = ('\n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - ' \n' - '
ABC
061223442
13000020
22700001
') - assert result == expected + @pytest.mark.parametrize("justify", ["super-right", "small-left", + "noinherit", "tiny", "pandas"]) + def test_to_html_invalid_justify(self, justify): + # see gh-17527 + df = DataFrame() + msg = "Invalid value for justify parameter" + + with tm.assert_raises_regex(ValueError, msg): + df.to_html(justify=justify) def test_to_html_index(self): index = ['foo', 'bar', 'baz'] From 48d0460ab9acbee223bae1be699344f8fd232224 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 4 Oct 2017 07:07:36 -0400 Subject: [PATCH 17/76] DEPR: deprecate .select() in favor of .loc[] (#17633) closes #12401 --- doc/source/whatsnew/v0.21.0.txt | 25 +++++ pandas/core/common.py | 8 ++ pandas/core/generic.py | 30 +++-- pandas/core/indexing.py | 105 +++++++++++++----- pandas/tests/frame/test_alter_axes.py | 5 +- .../tests/frame/test_axis_select_reindex.py | 34 +++++- pandas/tests/frame/test_mutate_columns.py | 1 + pandas/tests/groupby/test_groupby.py | 3 +- pandas/tests/series/test_indexing.py | 18 +-- pandas/tests/test_multilevel.py | 3 +- 10 files changed, 179 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 61c05d1b226e05..812bc2e031d78d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -667,6 +667,31 @@ Deprecations - passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) - Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +.. _whatsnew_0210.deprecations.select: + +Series.select and DataFrame.select +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`Series.select` and :meth:`DataFrame.select` methods are deprecated in favor of using ``df.loc[labels.map(crit)]`` (:issue:`12401`) + +.. ipython:: python + + df = DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz']) + +.. code-block:: ipython + + In [3]: df.select(lambda x: x in ['bar', 'baz']) + FutureWarning: select is deprecated and will be removed in a future release. You can use .loc[crit] as a replacement + Out[3]: + A + bar 2 + baz 3 + +.. ipython:: python + + df.loc[df.index.map(lambda x: x in ['bar', 'baz'])] + + .. _whatsnew_0210.deprecations.argmin_min: Series.argmax and Series.argmin diff --git a/pandas/core/common.py b/pandas/core/common.py index 2686ad370e1ed2..e0dc420bc53f8c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -445,9 +445,17 @@ def _apply_if_callable(maybe_callable, obj, **kwargs): """ Evaluate possibly callable input using obj and kwargs if it is callable, otherwise return as it is + + Parameters + ---------- + maybe_callable : possibly a callable + obj : NDFrame + **kwargs """ + if callable(maybe_callable): return maybe_callable(obj, **kwargs) + return maybe_callable diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5dd770b2600a09..bc8f68eb763d2d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2339,6 +2339,8 @@ def select(self, crit, axis=0): """ Return data corresponding to axis labels matching criteria + DEPRECATED: use df.loc[df.index.map(crit)] to select via labels + Parameters ---------- crit : function @@ -2349,6 +2351,11 @@ def select(self, crit, axis=0): ------- selection : type of caller """ + warnings.warn("'select' is deprecated and will be removed in a " + "future release. You can use " + ".loc[labels.map(crit)] as a replacement", + FutureWarning, stacklevel=2) + axis = self._get_axis_number(axis) axis_name = self._get_axis_name(axis) axis_values = self._get_axis(axis) @@ -3101,7 +3108,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): See Also -------- - pandas.DataFrame.select + pandas.DataFrame.loc Notes ----- @@ -3120,20 +3127,23 @@ def filter(self, items=None, like=None, regex=None, axis=None): if axis is None: axis = self._info_axis_name - axis_name = self._get_axis_name(axis) - axis_values = self._get_axis(axis_name) + labels = self._get_axis(axis) if items is not None: - return self.reindex(**{axis_name: - [r for r in items if r in axis_values]}) + name = self._get_axis_name(axis) + return self.reindex( + **{name: [r for r in items if r in labels]}) elif like: - matchf = lambda x: (like in x if isinstance(x, string_types) else - like in str(x)) - return self.select(matchf, axis=axis_name) + def f(x): + if not isinstance(x, string_types): + x = str(x) + return like in x + values = labels.map(f) + return self.loc(axis=axis)[values] elif regex: matcher = re.compile(regex) - return self.select(lambda x: matcher.search(str(x)) is not None, - axis=axis_name) + values = labels.map(lambda x: matcher.search(str(x)) is not None) + return self.loc(axis=axis)[values] else: raise TypeError('Must pass either `items`, `like`, or `regex`') diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index e977e84702982b..199aa9cfca5067 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -99,6 +99,8 @@ def __call__(self, axis=None): # we need to return a copy of ourselves new_self = self.__class__(self.obj, self.name) + if axis is not None: + axis = self.obj._get_axis_number(axis) new_self.axis = axis return new_self @@ -107,7 +109,8 @@ def __iter__(self): def __getitem__(self, key): if type(key) is tuple: - key = tuple(com._apply_if_callable(x, self.obj) for x in key) + key = tuple(com._apply_if_callable(x, self.obj) + for x in key) try: values = self.obj.get_value(*key) if is_scalar(values): @@ -117,10 +120,16 @@ def __getitem__(self, key): return self._getitem_tuple(key) else: + # we by definition only have the 0th axis + axis = self.axis or 0 + key = com._apply_if_callable(key, self.obj) - return self._getitem_axis(key, axis=0) + return self._getitem_axis(key, axis=axis) + + def _get_label(self, label, axis=None): + if axis is None: + axis = self.axis or 0 - def _get_label(self, label, axis=0): if self.ndim == 1: # for perf reasons we want to try _xs first # as its basically direct indexing @@ -135,10 +144,14 @@ def _get_label(self, label, axis=0): return self.obj._xs(label, axis=axis) - def _get_loc(self, key, axis=0): + def _get_loc(self, key, axis=None): + if axis is None: + axis = self.axis return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=0, kind=None): + def _slice(self, obj, axis=None, kind=None): + if axis is None: + axis = self.axis return self.obj._slice(obj, axis=axis, kind=kind) def _get_setitem_indexer(self, key): @@ -173,7 +186,8 @@ def _get_setitem_indexer(self, key): def __setitem__(self, key, value): if isinstance(key, tuple): - key = tuple(com._apply_if_callable(x, self.obj) for x in key) + key = tuple(com._apply_if_callable(x, self.obj) + for x in key) else: key = com._apply_if_callable(key, self.obj) indexer = self._get_setitem_indexer(key) @@ -192,10 +206,12 @@ def _has_valid_tuple(self, key): "[{types}] types" .format(types=self._valid_types)) - def _should_validate_iterable(self, axis=0): + def _should_validate_iterable(self, axis=None): """ return a boolean whether this axes needs validation for a passed iterable """ + if axis is None: + axis = self.axis or 0 ax = self.obj._get_axis(axis) if isinstance(ax, MultiIndex): return False @@ -233,6 +249,8 @@ def _convert_range(self, key, is_setter=False): def _convert_scalar_indexer(self, key, axis): # if we are accessing via lowered dim, use the last dim + if axis is None: + axis = 0 ax = self.obj._get_axis(min(axis, self.ndim - 1)) # a scalar return ax._convert_scalar_indexer(key, kind=self.name) @@ -895,7 +913,9 @@ def _multi_take(self, tup): except(KeyError, IndexingError): raise self._exception - def _convert_for_reindex(self, key, axis=0): + def _convert_for_reindex(self, key, axis=None): + if axis is None: + axis = self.axis or 0 labels = self.obj._get_axis(axis) if is_bool_indexer(key): @@ -925,7 +945,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup): try: # fast path for series or for tup devoid of slices - return self._get_label(tup, axis=0) + return self._get_label(tup, axis=self.axis) except TypeError: # slices are unhashable pass @@ -1015,7 +1035,7 @@ def _getitem_nested_tuple(self, tup): # this is a series with a multi-index specified a tuple of # selectors - return self._getitem_axis(tup, axis=0) + return self._getitem_axis(tup, axis=self.axis) # handle the multi-axis by taking sections and reducing # this is iterative @@ -1049,7 +1069,10 @@ def _getitem_nested_tuple(self, tup): return obj - def _getitem_axis(self, key, axis=0): + def _getitem_axis(self, key, axis=None): + + if axis is None: + axis = self.axis or 0 if self._should_validate_iterable(axis): self._has_valid_type(key, axis) @@ -1084,7 +1107,10 @@ def _getitem_axis(self, key, axis=0): return self._get_label(key, axis=axis) - def _getitem_iterable(self, key, axis=0): + def _getitem_iterable(self, key, axis=None): + if axis is None: + axis = self.axis or 0 + if self._should_validate_iterable(axis): self._has_valid_type(key, axis) @@ -1138,7 +1164,7 @@ def _getitem_iterable(self, key, axis=0): return result - def _convert_to_indexer(self, obj, axis=0, is_setter=False): + def _convert_to_indexer(self, obj, axis=None, is_setter=False): """ Convert indexing key into something we can use to do actual fancy indexing on an ndarray @@ -1153,6 +1179,9 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): raise AmbiguousIndexError with integer labels? - No, prefer label-based indexing """ + if axis is None: + axis = self.axis or 0 + labels = self.obj._get_axis(axis) if isinstance(obj, slice): @@ -1255,9 +1284,12 @@ def _tuplify(self, loc): tup[0] = loc return tuple(tup) - def _get_slice_axis(self, slice_obj, axis=0): + def _get_slice_axis(self, slice_obj, axis=None): obj = self.obj + if axis is None: + axis = self.axis or 0 + if not need_slice(slice_obj): return obj.copy(deep=False) indexer = self._convert_slice_indexer(slice_obj, axis) @@ -1325,7 +1357,8 @@ class _LocationIndexer(_NDFrameIndexer): def __getitem__(self, key): if type(key) is tuple: - key = tuple(com._apply_if_callable(x, self.obj) for x in key) + key = tuple(com._apply_if_callable(x, self.obj) + for x in key) try: if self._is_scalar_access(key): return self._getitem_scalar(key) @@ -1333,8 +1366,11 @@ def __getitem__(self, key): pass return self._getitem_tuple(key) else: - key = com._apply_if_callable(key, self.obj) - return self._getitem_axis(key, axis=0) + # we by definition only have the 0th axis + axis = self.axis or 0 + + maybe_callable = com._apply_if_callable(key, self.obj) + return self._getitem_axis(maybe_callable, axis=axis) def _is_scalar_access(self, key): raise NotImplementedError() @@ -1342,10 +1378,12 @@ def _is_scalar_access(self, key): def _getitem_scalar(self, key): raise NotImplementedError() - def _getitem_axis(self, key, axis=0): + def _getitem_axis(self, key, axis=None): raise NotImplementedError() - def _getbool_axis(self, key, axis=0): + def _getbool_axis(self, key, axis=None): + if axis is None: + axis = self.axis or 0 labels = self.obj._get_axis(axis) key = check_bool_indexer(labels, key) inds, = key.nonzero() @@ -1354,8 +1392,11 @@ def _getbool_axis(self, key, axis=0): except Exception as detail: raise self._exception(detail) - def _get_slice_axis(self, slice_obj, axis=0): + def _get_slice_axis(self, slice_obj, axis=None): """ this is pretty simple as we just have to deal with labels """ + if axis is None: + axis = self.axis or 0 + obj = self.obj if not need_slice(slice_obj): return obj.copy(deep=False) @@ -1528,7 +1569,10 @@ def _get_partial_string_timestamp_match_key(self, key, labels): return key - def _getitem_axis(self, key, axis=0): + def _getitem_axis(self, key, axis=None): + if axis is None: + axis = self.axis or 0 + labels = self.obj._get_axis(axis) key = self._get_partial_string_timestamp_match_key(key, labels) @@ -1717,7 +1761,9 @@ def _getitem_tuple(self, tup): return retval - def _get_slice_axis(self, slice_obj, axis=0): + def _get_slice_axis(self, slice_obj, axis=None): + if axis is None: + axis = self.axis or 0 obj = self.obj if not need_slice(slice_obj): @@ -1729,7 +1775,7 @@ def _get_slice_axis(self, slice_obj, axis=0): else: return self.obj._take(slice_obj, axis=axis, convert=False) - def _get_list_axis(self, key, axis=0): + def _get_list_axis(self, key, axis=None): """ Return Series values by list or array of integers @@ -1742,13 +1788,17 @@ def _get_list_axis(self, key, axis=0): ------- Series object """ + if axis is None: + axis = self.axis or 0 try: return self.obj._take(key, axis=axis, convert=False) except IndexError: # re-raise with different error message raise IndexError("positional indexers are out-of-bounds") - def _getitem_axis(self, key, axis=0): + def _getitem_axis(self, key, axis=None): + if axis is None: + axis = self.axis or 0 if isinstance(key, slice): self._has_valid_type(key, axis) @@ -1781,8 +1831,10 @@ def _getitem_axis(self, key, axis=0): return self._get_loc(key, axis=axis) - def _convert_to_indexer(self, obj, axis=0, is_setter=False): + def _convert_to_indexer(self, obj, axis=None, is_setter=False): """ much simpler as we only have to deal with our valid types """ + if axis is None: + axis = self.axis or 0 # make need to convert a float key if isinstance(obj, slice): @@ -1818,7 +1870,8 @@ def __getitem__(self, key): def __setitem__(self, key, value): if isinstance(key, tuple): - key = tuple(com._apply_if_callable(x, self.obj) for x in key) + key = tuple(com._apply_if_callable(x, self.obj) + for x in key) else: # scalar callable may return tuple key = com._apply_if_callable(key, self.obj) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 8bcc19e6d8ba41..27906838abb2de 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -143,10 +143,11 @@ def test_set_index_nonuniq(self): def test_set_index_bug(self): # GH1590 df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']}) - df2 = df.select(lambda indx: indx >= 1) - rs = df2.set_index('key') xp = DataFrame({'val': [1, 2]}, Index(['b', 'c'], name='key')) + + df2 = df.loc[df.index.map(lambda indx: indx >= 1)] + rs = df2.set_index('key') assert_frame_equal(rs, xp) def test_set_index_pass_arrays(self): diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 219c1df301c4b6..f9a4275d14f55d 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -796,16 +796,38 @@ def test_filter_corner(self): assert_frame_equal(result, empty) def test_select(self): + + # deprecated: gh-12410 f = lambda x: x.weekday() == 2 - result = self.tsframe.select(f, axis=0) - expected = self.tsframe.reindex( - index=self.tsframe.index[[f(x) for x in self.tsframe.index]]) - assert_frame_equal(result, expected) + index = self.tsframe.index[[f(x) for x in self.tsframe.index]] + expected_weekdays = self.tsframe.reindex(index=index) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.tsframe.select(f, axis=0) + assert_frame_equal(result, expected_weekdays) + + result = self.frame.select(lambda x: x in ('B', 'D'), axis=1) + expected = self.frame.reindex(columns=['B', 'D']) + assert_frame_equal(result, expected, check_names=False) + + # replacement + f = lambda x: x.weekday == 2 + result = self.tsframe.loc(axis=0)[f(self.tsframe.index)] + assert_frame_equal(result, expected_weekdays) - result = self.frame.select(lambda x: x in ('B', 'D'), axis=1) + crit = lambda x: x in ['B', 'D'] + result = self.frame.loc(axis=1)[(self.frame.columns.map(crit))] expected = self.frame.reindex(columns=['B', 'D']) + assert_frame_equal(result, expected, check_names=False) + + # doc example + df = DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz']) - # TODO should reindex check_names? + crit = lambda x: x in ['bar', 'baz'] + with tm.assert_produces_warning(FutureWarning): + expected = df.select(crit) + result = df.loc[df.index.map(crit)] assert_frame_equal(result, expected, check_names=False) def test_take(self): diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py index 0043475702f94b..26e2b801f64607 100644 --- a/pandas/tests/frame/test_mutate_columns.py +++ b/pandas/tests/frame/test_mutate_columns.py @@ -83,6 +83,7 @@ def test_assign_order(self): def test_assign_bad(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + # non-keyword argument with pytest.raises(TypeError): df.assign(lambda x: x.A) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 47bf837fa62d95..657de9b589dc91 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3103,7 +3103,8 @@ def agg_before(hour, func, fix=False): """ def _func(data): - d = data.select(lambda x: x.hour < 11).dropna() + d = data.loc[data.index.map( + lambda x: x.hour < 11)].dropna() if fix: data[data.index[0]] if len(d) == 0: diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 09ba0e197438dd..93e7b81163b549 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -2225,14 +2225,18 @@ def test_rename(self): assert result.name == expected.name def test_select(self): - n = len(self.ts) - result = self.ts.select(lambda x: x >= self.ts.index[n // 2]) - expected = self.ts.reindex(self.ts.index[n // 2:]) - assert_series_equal(result, expected) - result = self.ts.select(lambda x: x.weekday() == 2) - expected = self.ts[self.ts.index.weekday == 2] - assert_series_equal(result, expected) + # deprecated: gh-12410 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + n = len(self.ts) + result = self.ts.select(lambda x: x >= self.ts.index[n // 2]) + expected = self.ts.reindex(self.ts.index[n // 2:]) + assert_series_equal(result, expected) + + result = self.ts.select(lambda x: x.weekday() == 2) + expected = self.ts[self.ts.index.weekday == 2] + assert_series_equal(result, expected) def test_cast_on_putmask(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 050335988ca417..94577db15f01a2 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1239,7 +1239,8 @@ def test_groupby_level_no_obs(self): 'f2', 's1'), ('f2', 's2'), ('f3', 's1'), ('f3', 's2')]) df = DataFrame( [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) - df1 = df.select(lambda u: u[0] in ['f2', 'f3'], axis=1) + df1 = df.loc(axis=1)[df.columns.map( + lambda u: u[0] in ['f2', 'f3'])] grouped = df1.groupby(axis=1, level=0) result = grouped.sum() From a26afcaa99ef3b30334b78ced2ae11402167bbc1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Oct 2017 16:53:50 -0700 Subject: [PATCH 18/76] Move fields functions out of the way (#17770) --- pandas/_libs/tslib.pyx | 504 +------------------------------ pandas/_libs/tslibs/fields.pyx | 537 +++++++++++++++++++++++++++++++++ pandas/core/indexes/period.py | 3 +- setup.py | 5 + 4 files changed, 547 insertions(+), 502 deletions(-) create mode 100644 pandas/_libs/tslibs/fields.pyx diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 745632cf3d7198..a9881daaf8785c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -86,6 +86,9 @@ from tslibs.timezones cimport ( get_timezone, get_utcoffset, maybe_get_tz, get_dst_info ) +from tslibs.fields import ( + get_date_name_field, get_start_end_field, get_date_field, + build_field_sarray) cdef inline object create_timestamp_from_ts( @@ -3867,48 +3870,6 @@ cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): # Accessors #---------------------------------------------------------------------- -def build_field_sarray(ndarray[int64_t] dtindex): - """ - Datetime as int64 representation to a structured array of fields - """ - cdef: - Py_ssize_t i, count = 0 - pandas_datetimestruct dts - ndarray[int32_t] years, months, days, hours, minutes, seconds, mus - - count = len(dtindex) - - sa_dtype = [('Y', 'i4'), # year - ('M', 'i4'), # month - ('D', 'i4'), # day - ('h', 'i4'), # hour - ('m', 'i4'), # min - ('s', 'i4'), # second - ('u', 'i4')] # microsecond - - out = np.empty(count, dtype=sa_dtype) - - years = out['Y'] - months = out['M'] - days = out['D'] - hours = out['h'] - minutes = out['m'] - seconds = out['s'] - mus = out['u'] - - for i in range(count): - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - years[i] = dts.year - months[i] = dts.month - days[i] = dts.day - hours[i] = dts.hour - minutes[i] = dts.min - seconds[i] = dts.sec - mus[i] = dts.us - - return out - - def get_time_micros(ndarray[int64_t] dtindex): """ Datetime as int64 representation to a structured array of fields @@ -3928,453 +3889,6 @@ def get_time_micros(ndarray[int64_t] dtindex): return micros -@cython.wraparound(False) -@cython.boundscheck(False) -def get_date_field(ndarray[int64_t] dtindex, object field): - """ - Given a int64-based datetime index, extract the year, month, etc., - field and return an array of these values. - """ - cdef: - Py_ssize_t i, count = 0 - ndarray[int32_t] out - ndarray[int32_t, ndim=2] _month_offset - int isleap, isleap_prev - pandas_datetimestruct dts - int mo_off, doy, dow, woy - - _month_offset = np.array( - [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], - [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], - dtype=np.int32 ) - - count = len(dtindex) - out = np.empty(count, dtype='i4') - - if field == 'Y': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.year - return out - - elif field == 'M': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month - return out - - elif field == 'D': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.day - return out - - elif field == 'h': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.hour - return out - - elif field == 'm': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.min - return out - - elif field == 's': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.sec - return out - - elif field == 'us': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.us - return out - - elif field == 'ns': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.ps / 1000 - return out - elif field == 'doy': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - out[i] = _month_offset[isleap, dts.month -1] + dts.day - return out - - elif field == 'dow': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dayofweek(dts.year, dts.month, dts.day) - return out - - elif field == 'woy': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - isleap_prev = is_leapyear(dts.year - 1) - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - #estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy / 7 + 1 - - # verify - if woy < 0: - if (woy > -2) or (woy == -2 and isleap_prev): - woy = 53 - else: - woy = 52 - elif woy == 53: - if 31 - dts.day + dow < 3: - woy = 1 - - out[i] = woy - return out - - elif field == 'q': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month - out[i] = ((out[i] - 1) / 3) + 1 - return out - - elif field == 'dim': - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - out[i] = days_in_month(dts) - return out - elif field == 'is_leap_year': - return _isleapyear_arr(get_date_field(dtindex, 'Y')) - - raise ValueError("Field %s not supported" % field) - - -@cython.wraparound(False) -def get_start_end_field(ndarray[int64_t] dtindex, object field, - object freqstr=None, int month_kw=12): - """ - Given an int64-based datetime index return array of indicators - of whether timestamps are at the start/end of the month/quarter/year - (defined by frequency). - """ - cdef: - Py_ssize_t i - int count = 0 - bint is_business = 0 - int end_month = 12 - int start_month = 1 - ndarray[int8_t] out - ndarray[int32_t, ndim=2] _month_offset - bint isleap - pandas_datetimestruct dts - int mo_off, dom, doy, dow, ldom - - _month_offset = np.array( - [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], - [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], - dtype=np.int32 ) - - count = len(dtindex) - out = np.zeros(count, dtype='int8') - - if freqstr: - if freqstr == 'C': - raise ValueError( - "Custom business days is not supported by %s" % field) - is_business = freqstr[0] == 'B' - - # YearBegin(), BYearBegin() use month = starting month of year. - # QuarterBegin(), BQuarterBegin() use startingMonth = starting - # month of year. Other offests use month, startingMonth as ending - # month of year. - - if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( - freqstr[1:3] in ['MS', 'QS', 'AS']): - end_month = 12 if month_kw == 1 else month_kw - 1 - start_month = month_kw - else: - end_month = month_kw - start_month = (end_month % 12) + 1 - else: - end_month = 12 - start_month = 1 - - if field == 'is_month_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_month_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - dow = dayofweek(dts.year, dts.month, dts.day) - - if (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if ldom == doy: - out[i] = 1 - return out.view(bool) - - elif field == 'is_quarter_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - if ((dts.month - start_month) % 3 == 0) and ( - (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if ((dts.month - start_month) % 3 == 0) and dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_quarter_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - dow = dayofweek(dts.year, dts.month, dts.day) - - if ((dts.month - end_month) % 3 == 0) and ( - (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2))): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if ((dts.month - end_month) % 3 == 0) and (ldom == doy): - out[i] = 1 - return out.view(bool) - - elif field == 'is_year_start': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - if (dts.month == start_month) and ( - (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - dom = dts.day - - if (dts.month == start_month) and dom == 1: - out[i] = 1 - return out.view(bool) - - elif field == 'is_year_end': - if is_business: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - dom = dts.day - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dom - dow = dayofweek(dts.year, dts.month, dts.day) - ldom = _month_offset[isleap, dts.month] - - if (dts.month == end_month) and ( - (ldom == doy and dow < 5) or ( - dow == 4 and (ldom - doy <= 2))): - out[i] = 1 - return out.view(bool) - else: - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue - - pandas_datetime_to_datetimestruct( - dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - mo_off = _month_offset[isleap, dts.month - 1] - dom = dts.day - doy = mo_off + dom - ldom = _month_offset[isleap, dts.month] - - if (dts.month == end_month) and (ldom == doy): - out[i] = 1 - return out.view(bool) - - raise ValueError("Field %s not supported" % field) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def get_date_name_field(ndarray[int64_t] dtindex, object field): - """ - Given a int64-based datetime index, return array of strings of date - name based on requested field (e.g. weekday_name) - """ - cdef: - Py_ssize_t i, count = 0 - ndarray[object] out - pandas_datetimestruct dts - int dow - - _dayname = np.array( - ['Monday', 'Tuesday', 'Wednesday', 'Thursday', - 'Friday', 'Saturday', 'Sunday'], - dtype=np.object_ ) - - count = len(dtindex) - out = np.empty(count, dtype=object) - - if field == 'weekday_name': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = np.nan; continue - - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - dow = dayofweek(dts.year, dts.month, dts.day) - out[i] = _dayname[dow] - return out - - raise ValueError("Field %s not supported" % field) - - cdef int64_t DAY_NS = 86400000000000LL @@ -4508,18 +4022,6 @@ def dates_normalized(ndarray[int64_t] stamps, tz=None): #---------------------------------------------------------------------- -cpdef _isleapyear_arr(ndarray years): - cdef: - ndarray[int8_t] out - - # to make NaT result as False - out = np.zeros(len(years), dtype='int8') - out[np.logical_or(years % 400 == 0, - np.logical_and(years % 4 == 0, - years % 100 > 0))] = 1 - return out.view(bool) - - def monthrange(int64_t year, int64_t month): cdef: int64_t days diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx new file mode 100644 index 00000000000000..3ea414b2d4a701 --- /dev/null +++ b/pandas/_libs/tslibs/fields.pyx @@ -0,0 +1,537 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +# cython: linetrace=False +# distutils: define_macros=CYTHON_TRACE=0 +# distutils: define_macros=CYTHON_TRACE_NOGIL=0 +""" +Functions for accessing attributes of Timestamp/datetime64/datetime-like +objects and arrays +""" + +cimport cython +from cython cimport Py_ssize_t + +import numpy as np +cimport numpy as np +from numpy cimport ndarray, int64_t, int32_t, int8_t +np.import_array() + + +from datetime cimport ( + pandas_datetimestruct, + pandas_datetime_to_datetimestruct, + PANDAS_FR_ns, + days_per_month_table, + is_leapyear, + dayofweek) + +cimport util + +cdef int64_t NPY_NAT = util.get_nat() + + +def build_field_sarray(ndarray[int64_t] dtindex): + """ + Datetime as int64 representation to a structured array of fields + """ + cdef: + Py_ssize_t i, count = 0 + pandas_datetimestruct dts + ndarray[int32_t] years, months, days, hours, minutes, seconds, mus + + count = len(dtindex) + + sa_dtype = [('Y', 'i4'), # year + ('M', 'i4'), # month + ('D', 'i4'), # day + ('h', 'i4'), # hour + ('m', 'i4'), # min + ('s', 'i4'), # second + ('u', 'i4')] # microsecond + + out = np.empty(count, dtype=sa_dtype) + + years = out['Y'] + months = out['M'] + days = out['D'] + hours = out['h'] + minutes = out['m'] + seconds = out['s'] + mus = out['u'] + + for i in range(count): + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + years[i] = dts.year + months[i] = dts.month + days[i] = dts.day + hours[i] = dts.hour + minutes[i] = dts.min + seconds[i] = dts.sec + mus[i] = dts.us + + return out + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_name_field(ndarray[int64_t] dtindex, object field): + """ + Given a int64-based datetime index, return array of strings of date + name based on requested field (e.g. weekday_name) + """ + cdef: + Py_ssize_t i, count = 0 + ndarray[object] out + pandas_datetimestruct dts + int dow + + _dayname = np.array( + ['Monday', 'Tuesday', 'Wednesday', 'Thursday', + 'Friday', 'Saturday', 'Sunday'], + dtype=np.object_) + + count = len(dtindex) + out = np.empty(count, dtype=object) + + if field == 'weekday_name': + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + dow = dayofweek(dts.year, dts.month, dts.day) + out[i] = _dayname[dow] + return out + + raise ValueError("Field %s not supported" % field) + + +@cython.wraparound(False) +def get_start_end_field(ndarray[int64_t] dtindex, object field, + object freqstr=None, int month_kw=12): + """ + Given an int64-based datetime index return array of indicators + of whether timestamps are at the start/end of the month/quarter/year + (defined by frequency). + """ + cdef: + Py_ssize_t i + int count = 0 + bint is_business = 0 + int end_month = 12 + int start_month = 1 + ndarray[int8_t] out + ndarray[int32_t, ndim=2] _month_offset + bint isleap + pandas_datetimestruct dts + int mo_off, dom, doy, dow, ldom + + _month_offset = np.array( + [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], + [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], + dtype=np.int32) + + count = len(dtindex) + out = np.zeros(count, dtype='int8') + + if freqstr: + if freqstr == 'C': + raise ValueError( + "Custom business days is not supported by %s" % field) + is_business = freqstr[0] == 'B' + + # YearBegin(), BYearBegin() use month = starting month of year. + # QuarterBegin(), BQuarterBegin() use startingMonth = starting + # month of year. Other offests use month, startingMonth as ending + # month of year. + + if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( + freqstr[1:3] in ['MS', 'QS', 'AS']): + end_month = 12 if month_kw == 1 else month_kw - 1 + start_month = month_kw + else: + end_month = month_kw + start_month = (end_month % 12) + 1 + else: + end_month = 12 + start_month = 1 + + if field == 'is_month_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_month_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = dayofweek(dts.year, dts.month, dts.day) + + if (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ldom == doy: + out[i] = 1 + return out.view(bool) + + elif field == 'is_quarter_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if ((dts.month - start_month) % 3 == 0) and ( + (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if ((dts.month - start_month) % 3 == 0) and dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_quarter_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = dayofweek(dts.year, dts.month, dts.day) + + if ((dts.month - end_month) % 3 == 0) and ( + (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ((dts.month - end_month) % 3 == 0) and (ldom == doy): + out[i] = 1 + return out.view(bool) + + elif field == 'is_year_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if (dts.month == start_month) and ( + (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if (dts.month == start_month) and dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_year_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + dom = dts.day + mo_off = _month_offset[isleap, dts.month - 1] + doy = mo_off + dom + dow = dayofweek(dts.year, dts.month, dts.day) + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and ( + (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = 0; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and (ldom == doy): + out[i] = 1 + return out.view(bool) + + raise ValueError("Field %s not supported" % field) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_field(ndarray[int64_t] dtindex, object field): + """ + Given a int64-based datetime index, extract the year, month, etc., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = 0 + ndarray[int32_t] out + ndarray[int32_t, ndim=2] _month_offset + int isleap, isleap_prev + pandas_datetimestruct dts + int mo_off, doy, dow, woy + + _month_offset = np.array( + [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], + [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], + dtype=np.int32 ) + + count = len(dtindex) + out = np.empty(count, dtype='i4') + + if field == 'Y': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.year + return out + + elif field == 'M': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.month + return out + + elif field == 'D': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.day + return out + + elif field == 'h': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.hour + return out + + elif field == 'm': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.min + return out + + elif field == 's': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.sec + return out + + elif field == 'us': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.us + return out + + elif field == 'ns': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.ps / 1000 + return out + elif field == 'doy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + out[i] = _month_offset[isleap, dts.month -1] + dts.day + return out + + elif field == 'dow': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dayofweek(dts.year, dts.month, dts.day) + return out + + elif field == 'woy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + isleap_prev = is_leapyear(dts.year - 1) + mo_off = _month_offset[isleap, dts.month - 1] + doy = mo_off + dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + #estimate + woy = (doy - 1) - dow + 3 + if woy >= 0: + woy = woy / 7 + 1 + + # verify + if woy < 0: + if (woy > -2) or (woy == -2 and isleap_prev): + woy = 53 + else: + woy = 52 + elif woy == 53: + if 31 - dts.day + dow < 3: + woy = 1 + + out[i] = woy + return out + + elif field == 'q': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.month + out[i] = ((out[i] - 1) / 3) + 1 + return out + + elif field == 'dim': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct( + dtindex[i], PANDAS_FR_ns, &dts) + out[i] = days_in_month(dts) + return out + elif field == 'is_leap_year': + return isleapyear_arr(get_date_field(dtindex, 'Y')) + + raise ValueError("Field %s not supported" % field) + + +cdef inline int days_in_month(pandas_datetimestruct dts) nogil: + return days_per_month_table[is_leapyear(dts.year)][dts.month -1] + + +cpdef isleapyear_arr(ndarray years): + """vectorized version of isleapyear; NaT evaluates as False""" + cdef: + ndarray[int8_t] out + + out = np.zeros(len(years), dtype='int8') + out[np.logical_or(years % 400 == 0, + np.logical_and(years % 4 == 0, + years % 100 > 0))] = 1 + return out.view(bool) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fb47d1db48610b..e6fc47845012a4 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -35,6 +35,7 @@ from pandas._libs.period import (Period, IncompatibleFrequency, get_period_field_arr, _validate_end_alias, _quarter_to_myear) +from pandas._libs.tslibs.fields import isleapyear_arr from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, _ensure_index @@ -589,7 +590,7 @@ def to_datetime(self, dayfirst=False): @property def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ - return tslib._isleapyear_arr(np.asarray(self.year)) + return isleapyear_arr(np.asarray(self.year)) @property def start_time(self): diff --git a/setup.py b/setup.py index 80be007ba2115c..23457c6f4edc1f 100755 --- a/setup.py +++ b/setup.py @@ -343,6 +343,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/parsers.pyx', 'pandas/_libs/tslibs/strptime.pyx', 'pandas/_libs/tslibs/timezones.pyx', + 'pandas/_libs/tslibs/fields.pyx', 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/_libs/tslibs/parsing.pyx', 'pandas/io/sas/sas.pyx'] @@ -486,6 +487,10 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, + '_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields', + 'depends': tseries_depends, + 'sources': ['pandas/_libs/src/datetime/np_datetime.c', + 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.period': {'pyxfile': '_libs/period', 'depends': (tseries_depends + ['pandas/_libs/src/period_helper.h']), From d9a8014474b192356624eebba2ac1a8b53bfec7b Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 4 Oct 2017 19:55:17 -0400 Subject: [PATCH 19/76] DOC: Add column name metadata to spec (#17769) --- doc/source/developer.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/developer.rst b/doc/source/developer.rst index 78c12b7e23b371..a695366d9ada3f 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -45,6 +45,8 @@ So that a ``pandas.DataFrame`` can be faithfully reconstructed, we store a .. code-block:: text {'index_columns': ['__index_level_0__', '__index_level_1__', ...], + 'column_index_names': [, , ...], + 'column_index_dtypes': [, , ..., ] 'columns': [, , ...], 'pandas_version': $VERSION} @@ -106,6 +108,8 @@ As an example of fully-formed metadata: .. code-block:: text {'index_columns': ['__index_level_0__'], + 'column_index_names': [None], + 'column_index_dtypes': ['object'], 'columns': [ {'name': 'c0', 'pandas_type': 'int8', From 6b29fc0eef4012c7556b019ae2448003adae6eb1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 4 Oct 2017 16:56:20 -0700 Subject: [PATCH 20/76] DEPR: Deprecate parse_cols in read_excel (#17774) closes #4988 --- doc/source/io.rst | 10 +++---- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/excel.py | 37 +++++++++++++---------- pandas/tests/io/test_excel.py | 52 ++++++++++++++++++++++----------- 4 files changed, 62 insertions(+), 38 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 8fe5685b33aff6..0aa4ea72e3b139 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2800,21 +2800,21 @@ Parsing Specific Columns It is often the case that users will insert columns to do temporary computations in Excel and you may not want to read in those columns. `read_excel` takes -a `parse_cols` keyword to allow you to specify a subset of columns to parse. +a `usecols` keyword to allow you to specify a subset of columns to parse. -If `parse_cols` is an integer, then it is assumed to indicate the last column +If `usecols` is an integer, then it is assumed to indicate the last column to be parsed. .. code-block:: python - read_excel('path_to_file.xls', 'Sheet1', parse_cols=2) + read_excel('path_to_file.xls', 'Sheet1', usecols=2) -If `parse_cols` is a list of integers, then it is assumed to be the file column +If `usecols` is a list of integers, then it is assumed to be the file column indices to be parsed. .. code-block:: python - read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) + read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3]) Parsing Dates diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 812bc2e031d78d..d7789bfbfd04c6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -658,6 +658,7 @@ Deprecations ~~~~~~~~~~~~ - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). +- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) - The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 41e3b5283a532e..c8d0e42a022ba2 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -31,7 +31,7 @@ import pandas.compat.openpyxl_compat as openpyxl_compat from warnings import warn from distutils.version import LooseVersion -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, deprecate_kwarg from textwrap import fill __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] @@ -86,7 +86,7 @@ Column (0-indexed) to use as the row labels of the DataFrame. Pass None if there is no such column. If a list is passed, those columns will be combined into a ``MultiIndex``. If a - subset of data is selected with ``parse_cols``, index_col + subset of data is selected with ``usecols``, index_col is based on the subset. names : array-like, default None List of column names to use. If file contains no header row, @@ -115,6 +115,10 @@ .. versionadded:: 0.19.0 parse_cols : int or list, default None + .. deprecated:: 0.21.0 + Pass in `usecols` instead. + +usecols : int or list, default None * If None then parse all columns, * If int then indicates last column to be parsed * If list of ints then indicates list of column numbers to be parsed @@ -205,8 +209,9 @@ def get_writer(engine_name): @Appender(_read_excel_doc) +@deprecate_kwarg("parse_cols", "usecols") def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, - index_col=None, names=None, parse_cols=None, parse_dates=False, + index_col=None, names=None, usecols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, converters=None, dtype=None, true_values=None, false_values=None, engine=None, @@ -226,7 +231,7 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, return io._parse_excel( sheetname=sheet_name, header=header, skiprows=skiprows, names=names, - index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates, + index_col=index_col, usecols=usecols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, thousands=thousands, convert_float=convert_float, skip_footer=skip_footer, converters=converters, dtype=dtype, true_values=true_values, @@ -295,7 +300,7 @@ def __fspath__(self): return self._io def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, - names=None, index_col=None, parse_cols=None, parse_dates=False, + names=None, index_col=None, usecols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, converters=None, true_values=None, false_values=None, squeeze=False, **kwds): @@ -309,7 +314,7 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, return self._parse_excel(sheetname=sheet_name, header=header, skiprows=skiprows, names=names, index_col=index_col, - parse_cols=parse_cols, + usecols=usecols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, thousands=thousands, @@ -321,7 +326,7 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, squeeze=squeeze, **kwds) - def _should_parse(self, i, parse_cols): + def _should_parse(self, i, usecols): def _range2cols(areas): """ @@ -347,15 +352,15 @@ def _excel2num(x): cols.append(_excel2num(rng)) return cols - if isinstance(parse_cols, int): - return i <= parse_cols - elif isinstance(parse_cols, compat.string_types): - return i in _range2cols(parse_cols) + if isinstance(usecols, int): + return i <= usecols + elif isinstance(usecols, compat.string_types): + return i in _range2cols(usecols) else: - return i in parse_cols + return i in usecols def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, - skip_footer=0, index_col=None, parse_cols=None, + skip_footer=0, index_col=None, usecols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, true_values=None, false_values=None, verbose=False, dtype=None, @@ -470,10 +475,10 @@ def _parse_cell(cell_contents, cell_typ): row = [] for j, (value, typ) in enumerate(zip(sheet.row_values(i), sheet.row_types(i))): - if parse_cols is not None and j not in should_parse: - should_parse[j] = self._should_parse(j, parse_cols) + if usecols is not None and j not in should_parse: + should_parse[j] = self._should_parse(j, usecols) - if parse_cols is None or should_parse[j]: + if usecols is None or should_parse[j]: row.append(_parse_cell(value, typ)) data.append(row) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 4e25fe03717182..f21f638799e579 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -158,56 +158,74 @@ def setup_method(self, method): self.check_skip() super(ReadingTestsBase, self).setup_method(method) - def test_parse_cols_int(self): + def test_usecols_int(self): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3) + df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, usecols=3) df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols=3) + usecols=3) + + with tm.assert_produces_warning(FutureWarning): + df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + index_col=0, parse_cols=3) + # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) + tm.assert_frame_equal(df3, dfref, check_names=False) - def test_parse_cols_list(self): + def test_usecols_list(self): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['B', 'C']) df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols=[0, 2, 3]) + usecols=[0, 2, 3]) df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols=[0, 2, 3]) + usecols=[0, 2, 3]) + + with tm.assert_produces_warning(FutureWarning): + df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + index_col=0, parse_cols=[0, 2, 3]) + # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) + tm.assert_frame_equal(df3, dfref, check_names=False) - def test_parse_cols_str(self): + def test_usecols_str(self): dfref = self.get_csv_refdf('test1') df1 = dfref.reindex(columns=['A', 'B', 'C']) df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols='A:D') + usecols='A:D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A:D') + usecols='A:D') + + with tm.assert_produces_warning(FutureWarning): + df4 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], + index_col=0, parse_cols='A:D') + # TODO add index to xls, read xls ignores index name ? tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) + tm.assert_frame_equal(df4, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols='A,C,D') + usecols='A,C,D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A,C,D') + usecols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, - parse_cols='A,C:D') + usecols='A,C:D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A,C:D') + usecols='A,C:D') tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) @@ -457,14 +475,14 @@ def test_read_one_empty_col_no_header(self): actual_header_none = read_excel( path, 'no_header', - parse_cols=[0], + usecols=[0], header=None ) actual_header_zero = read_excel( path, 'no_header', - parse_cols=[0], + usecols=[0], header=0 ) expected = DataFrame() @@ -486,14 +504,14 @@ def test_read_one_empty_col_with_header(self): actual_header_none = read_excel( path, 'with_header', - parse_cols=[0], + usecols=[0], header=None ) actual_header_zero = read_excel( path, 'with_header', - parse_cols=[0], + usecols=[0], header=0 ) expected_header_none = DataFrame(pd.Series([0], dtype='int64')) From 37860a5fc7c72a3285f060b44482d6d454fa78ae Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Oct 2017 17:13:32 -0700 Subject: [PATCH 21/76] Implement NaT properties/methods directly (#17765) --- pandas/_libs/tslib.pyx | 206 ++++++++++++++++++-------------- pandas/tests/scalar/test_nat.py | 49 ++++++++ 2 files changed, 164 insertions(+), 91 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a9881daaf8785c..f58aaa0ce3234f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,5 +1,8 @@ # -*- coding: utf-8 -*- # cython: profile=False +# cython: linetrace=False +# distutils: define_macros=CYTHON_TRACE=0 +# distutils: define_macros=CYTHON_TRACE_NOGIL=0 cimport numpy as np from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, @@ -79,7 +82,6 @@ PyDateTime_IMPORT cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT - from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, @@ -783,6 +785,32 @@ class Timestamp(_Timestamp): _nat_strings = set(['NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN']) +def _make_nat_func(func_name, cls): + def f(*args, **kwargs): + return NaT + f.__name__ = func_name + f.__doc__ = getattr(cls, func_name).__doc__ + return f + + +def _make_nan_func(func_name, cls): + def f(*args, **kwargs): + return np.nan + f.__name__ = func_name + f.__doc__ = getattr(cls, func_name).__doc__ + return f + + +def _make_error_func(func_name, cls): + def f(*args, **kwargs): + raise ValueError("NaTType does not support " + func_name) + + f.__name__ = func_name + if cls is not None: + f.__doc__ = getattr(cls, func_name).__doc__ + return f + + class NaTType(_NaT): """(N)ot-(A)-(T)ime, the time equivalent of NaN""" @@ -865,6 +893,90 @@ class NaTType(_NaT): return NaT return NotImplemented + # ---------------------------------------------------------------------- + # inject the Timestamp field properties + # these by definition return np.nan + + year = property(fget=lambda self: np.nan) + quarter = property(fget=lambda self: np.nan) + month = property(fget=lambda self: np.nan) + day = property(fget=lambda self: np.nan) + hour = property(fget=lambda self: np.nan) + minute = property(fget=lambda self: np.nan) + second = property(fget=lambda self: np.nan) + millisecond = property(fget=lambda self: np.nan) + microsecond = property(fget=lambda self: np.nan) + nanosecond = property(fget=lambda self: np.nan) + + week = property(fget=lambda self: np.nan) + dayofyear = property(fget=lambda self: np.nan) + weekofyear = property(fget=lambda self: np.nan) + days_in_month = property(fget=lambda self: np.nan) + daysinmonth = property(fget=lambda self: np.nan) + dayofweek = property(fget=lambda self: np.nan) + weekday_name = property(fget=lambda self: np.nan) + + # inject Timedelta properties + days = property(fget=lambda self: np.nan) + seconds = property(fget=lambda self: np.nan) + microseconds = property(fget=lambda self: np.nan) + nanoseconds = property(fget=lambda self: np.nan) + + # inject pd.Period properties + qyear = property(fget=lambda self: np.nan) + + # ---------------------------------------------------------------------- + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + # These are the ones that can get their docstrings from datetime. + + # nan methods + weekday = _make_nan_func('weekday', datetime) + isoweekday = _make_nan_func('isoweekday', datetime) + + # _nat_methods + date = _make_nat_func('date', datetime) + + utctimetuple = _make_error_func('utctimetuple', datetime) + timetz = _make_error_func('timetz', datetime) + timetuple = _make_error_func('timetuple', datetime) + strptime = _make_error_func('strptime', datetime) + strftime = _make_error_func('strftime', datetime) + isocalendar = _make_error_func('isocalendar', datetime) + dst = _make_error_func('dst', datetime) + ctime = _make_error_func('ctime', datetime) + time = _make_error_func('time', datetime) + toordinal = _make_error_func('toordinal', datetime) + tzname = _make_error_func('tzname', datetime) + utcoffset = _make_error_func('utcoffset', datetime) + + # Timestamp has empty docstring for some methods. + utcfromtimestamp = _make_error_func('utcfromtimestamp', None) + fromtimestamp = _make_error_func('fromtimestamp', None) + combine = _make_error_func('combine', None) + utcnow = _make_error_func('utcnow', None) + + if PY3: + timestamp = _make_error_func('timestamp', datetime) + + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + astimezone = _make_error_func('astimezone', Timestamp) + fromordinal = _make_error_func('fromordinal', Timestamp) + + # _nat_methods + to_pydatetime = _make_nat_func('to_pydatetime', Timestamp) + + now = _make_nat_func('now', Timestamp) + today = _make_nat_func('today', Timestamp) + round = _make_nat_func('round', Timestamp) + floor = _make_nat_func('floor', Timestamp) + ceil = _make_nat_func('ceil', Timestamp) + + tz_convert = _make_nat_func('tz_convert', Timestamp) + tz_localize = _make_nat_func('tz_localize', Timestamp) + replace = _make_nat_func('replace', Timestamp) + def __nat_unpickle(*args): # return constant defined in the module @@ -1323,6 +1435,7 @@ cdef _nat_rdivide_op(self, other): return np.nan return NotImplemented + cdef class _NaT(_Timestamp): def __hash__(_NaT self): @@ -1540,7 +1653,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if is_timestamp(ts): obj.value += ts.nanosecond obj.dts.ps = ts.nanosecond * 1000 - + if nanos: obj.value += nanos obj.dts.ps = nanos * 1000 @@ -3258,95 +3371,6 @@ cpdef convert_to_timedelta64(object ts, object unit): return ts.astype('timedelta64[ns]') -#---------------------------------------------------------------------- -# NaT methods/property setups - - -# inject the Timestamp field properties -# these by definition return np.nan -fields = ['year', 'quarter', 'month', 'day', 'hour', - 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond', - 'week', 'dayofyear', 'weekofyear', 'days_in_month', 'daysinmonth', - 'dayofweek', 'weekday_name', 'days', 'seconds', 'microseconds', - 'nanoseconds', 'qyear'] -for field in fields: - prop = property(fget=lambda self: np.nan) - setattr(NaTType, field, prop) - - -# define how we are handling NaT methods & inject -# to the NaTType class; these can return NaT, np.nan -# or raise respectively -_nat_methods = ['date', 'now', 'replace', 'to_pydatetime', - 'today', 'round', 'floor', 'ceil', 'tz_convert', - 'tz_localize'] -_nan_methods = ['weekday', 'isoweekday'] -_implemented_methods = [ - 'to_datetime', 'to_datetime64', 'isoformat', 'total_seconds'] -_implemented_methods.extend(_nat_methods) -_implemented_methods.extend(_nan_methods) - - -def _get_docstring(_method_name): - # NaT serves double duty as Timestamp & Timedelta - # missing value, so need to acquire doc-strings for both - - try: - return getattr(Timestamp, _method_name).__doc__ - except AttributeError: - pass - - try: - return getattr(Timedelta, _method_name).__doc__ - except AttributeError: - pass - - return None - - -for _method_name in _nat_methods: - - def _make_nat_func(func_name): - def f(*args, **kwargs): - return NaT - f.__name__ = func_name - f.__doc__ = _get_docstring(func_name) - return f - - setattr(NaTType, _method_name, _make_nat_func(_method_name)) - - -for _method_name in _nan_methods: - - def _make_nan_func(func_name): - def f(*args, **kwargs): - return np.nan - f.__name__ = func_name - f.__doc__ = _get_docstring(func_name) - return f - - setattr(NaTType, _method_name, _make_nan_func(_method_name)) - - -# GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or -# return NaT create functions that raise, for binding to NaTType -for _maybe_method_name in dir(NaTType): - _maybe_method = getattr(NaTType, _maybe_method_name) - if (callable(_maybe_method) - and not _maybe_method_name.startswith("_") - and _maybe_method_name not in _implemented_methods): - - def _make_error_func(func_name): - def f(*args, **kwargs): - raise ValueError("NaTType does not support " + func_name) - f.__name__ = func_name - f.__doc__ = _get_docstring(func_name) - return f - - setattr(NaTType, _maybe_method_name, - _make_error_func(_maybe_method_name)) - - #---------------------------------------------------------------------- # Conversion routines diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 6f852f2b394e18..135e4c544de41a 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -10,6 +10,8 @@ from pandas.util import testing as tm from pandas._libs.tslib import iNaT +from pandas.compat import callable + @pytest.mark.parametrize('nat, idx', [(Timestamp('NaT'), DatetimeIndex), (Timedelta('NaT'), TimedeltaIndex), @@ -156,6 +158,53 @@ def test_NaT_methods(): assert NaT.isoformat() == 'NaT' +def test_NaT_docstrings(): + # GH#17327 + nat_names = dir(NaT) + + # NaT should have *most* of the Timestamp methods, with matching + # docstrings. The attributes that are not expected to be present in NaT + # are private methods plus `ts_expected` below. + ts_names = dir(Timestamp) + ts_missing = [x for x in ts_names if x not in nat_names and + not x.startswith('_')] + ts_missing.sort() + ts_expected = ['freqstr', 'normalize', 'offset', + 'to_julian_date', 'to_period', 'tz'] + assert ts_missing == ts_expected + + ts_overlap = [x for x in nat_names if x in ts_names and + not x.startswith('_') and + callable(getattr(Timestamp, x))] + for name in ts_overlap: + tsdoc = getattr(Timestamp, name).__doc__ + natdoc = getattr(NaT, name).__doc__ + assert tsdoc == natdoc + + # NaT should have *most* of the Timedelta methods, with matching + # docstrings. The attributes that are not expected to be present in NaT + # are private methods plus `td_expected` below. + # For methods that are both Timestamp and Timedelta methods, the + # Timestamp docstring takes priority. + td_names = dir(Timedelta) + td_missing = [x for x in td_names if x not in nat_names and + not x.startswith('_')] + td_missing.sort() + td_expected = ['components', 'delta', 'is_populated', + 'to_pytimedelta', 'to_timedelta64', 'view'] + assert td_missing == td_expected + + td_overlap = [x for x in nat_names if x in td_names and + x not in ts_names and # Timestamp __doc__ takes priority + not x.startswith('_') and + callable(getattr(Timedelta, x))] + assert td_overlap == ['total_seconds'] + for name in td_overlap: + tddoc = getattr(Timedelta, name).__doc__ + natdoc = getattr(NaT, name).__doc__ + assert tddoc == natdoc + + @pytest.mark.parametrize('klass', [Timestamp, Timedelta]) def test_isoformat(klass): From d099f8806ed2788effd699a5b97755106beba79d Mon Sep 17 00:00:00 2001 From: reidy-p Date: Thu, 5 Oct 2017 11:27:05 +0100 Subject: [PATCH 22/76] DOC: Clarifying use of categorical data in describe docstring (#16722) (#17789) --- pandas/core/generic.py | 94 +++++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bc8f68eb763d2d..eecdd8a6109e9f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6362,20 +6362,22 @@ def describe(self, percentiles=None, include=None, exclude=None): - A list-like of dtypes : Limits the results to the provided data types. To limit the result to numeric types submit - ``numpy.number``. To limit it instead to categorical - objects submit the ``numpy.object`` data type. Strings + ``numpy.number``. To limit it instead to object columns submit + the ``numpy.object`` data type. Strings can also be used in the style of - ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To + select pandas categorical columns, use ``'category'`` - None (default) : The result will include all numeric columns. exclude : list-like of dtypes or None (default), optional, A black list of data types to omit from the result. Ignored for ``Series``. Here are the options: - A list-like of dtypes : Excludes the provided data types - from the result. To select numeric types submit - ``numpy.number``. To select categorical objects submit the data + from the result. To exclude numeric types submit + ``numpy.number``. To exclude object columns submit the data type ``numpy.object``. Strings can also be used in the style of - ``select_dtypes`` (e.g. ``df.describe(include=['O'])``) + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To + exclude pandas categorical columns, use ``'category'`` - None (default) : The result will exclude nothing. Returns @@ -6400,9 +6402,11 @@ def describe(self, percentiles=None, include=None, exclude=None): among those with the highest count. For mixed data types provided via a ``DataFrame``, the default is to - return only an analysis of numeric columns. If ``include='all'`` - is provided as an option, the result will include a union of - attributes of each type. + return only an analysis of numeric columns. If the dataframe consists + only of object and categorical data without any numeric columns, the + default is to return an analysis of both the object and categorical + columns. If ``include='all'`` is provided as an option, the result + will include a union of attributes of each type. The `include` and `exclude` parameters can be used to limit which columns in a ``DataFrame`` are analyzed for the output. @@ -6452,8 +6456,10 @@ def describe(self, percentiles=None, include=None, exclude=None): Describing a ``DataFrame``. By default only numeric fields are returned. - >>> df = pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c']], - ... columns=['numeric', 'object']) + >>> df = pd.DataFrame({ 'object': ['a', 'b', 'c'], + ... 'numeric': [1, 2, 3], + ... 'categorical': pd.Categorical(['d','e','f']) + ... }) >>> df.describe() numeric count 3.0 @@ -6468,18 +6474,18 @@ def describe(self, percentiles=None, include=None, exclude=None): Describing all columns of a ``DataFrame`` regardless of data type. >>> df.describe(include='all') - numeric object - count 3.0 3 - unique NaN 3 - top NaN b - freq NaN 1 - mean 2.0 NaN - std 1.0 NaN - min 1.0 NaN - 25% 1.5 NaN - 50% 2.0 NaN - 75% 2.5 NaN - max 3.0 NaN + categorical numeric object + count 3 3.0 3 + unique 3 NaN 3 + top f NaN c + freq 1 NaN 1 + mean NaN 2.0 NaN + std NaN 1.0 NaN + min NaN 1.0 NaN + 25% NaN 1.5 NaN + 50% NaN 2.0 NaN + 75% NaN 2.5 NaN + max NaN 3.0 NaN Describing a column from a ``DataFrame`` by accessing it as an attribute. @@ -6514,30 +6520,42 @@ def describe(self, percentiles=None, include=None, exclude=None): object count 3 unique 3 - top b + top c freq 1 + Including only categorical columns from a ``DataFrame`` description. + + >>> df.describe(include=['category']) + categorical + count 3 + unique 3 + top f + freq 1 + Excluding numeric columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.number]) - object - count 3 - unique 3 - top b - freq 1 + categorical object + count 3 3 + unique 3 3 + top f c + freq 1 1 Excluding object columns from a ``DataFrame`` description. >>> df.describe(exclude=[np.object]) - numeric - count 3.0 - mean 2.0 - std 1.0 - min 1.0 - 25% 1.5 - 50% 2.0 - 75% 2.5 - max 3.0 + categorical numeric + count 3 3.0 + unique 3 NaN + top f NaN + freq 1 NaN + mean NaN 2.0 + std NaN 1.0 + min NaN 1.0 + 25% NaN 1.5 + 50% NaN 2.0 + 75% NaN 2.5 + max NaN 3.0 See Also -------- From 6773694eeaa68e31c62b4226dd36e204da0a1823 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Oct 2017 06:30:06 -0500 Subject: [PATCH 23/76] Use argument dtype to inform coercion (#17779) * Use argument dtype to inform coercion Master: ```python >>> import dask.dataframe as dd >>> s = dd.core.Scalar({('s', 0): 10}, 's', 'i8') >>> pdf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7], ... 'b': [7, 6, 5, 4, 3, 2, 1]}) >>> (pdf + s).dtypes a object b object dtype: object Head: ``` >>> (pdf + s).dtypes a int64 b int64 dtype: object ``` This is more consistent with 0.20.3, while still most of the changes in https://github.com/pandas-dev/pandas/pull/16821 Closes https://github.com/pandas-dev/pandas/issues/17767 * Compat for older numpy where bool(dtype) is False * Added timedelta --- pandas/core/dtypes/cast.py | 33 +++++++++++++ pandas/core/internals.py | 54 ++++++++++----------- pandas/tests/internals/test_internals.py | 62 ++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 27 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c2cf6afc1a7b53..f3b11e52cdd7ad 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -483,6 +483,39 @@ def infer_dtype_from_array(arr, pandas_dtype=False): return arr.dtype, arr +def maybe_infer_dtype_type(element): + """Try to infer an object's dtype, for use in arithmetic ops + + Uses `element.dtype` if that's available. + Objects implementing the iterator protocol are cast to a NumPy array, + and from there the array's type is used. + + Parameters + ---------- + element : object + Possibly has a `.dtype` attribute, and possibly the iterator + protocol. + + Returns + ------- + tipo : type + + Examples + -------- + >>> from collections import namedtuple + >>> Foo = namedtuple("Foo", "dtype") + >>> maybe_infer_dtype_type(Foo(np.dtype("i8"))) + numpy.int64 + """ + tipo = None + if hasattr(element, 'dtype'): + tipo = element.dtype + elif is_list_like(element): + element = np.asarray(element) + tipo = element.dtype + return tipo + + def maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): """ provide explict type promotion and coercion diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1fddf985f0cdbb..90de4ded18f8c7 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -44,7 +44,8 @@ soft_convert_objects, maybe_convert_objects, astype_nansafe, - find_common_type) + find_common_type, + maybe_infer_dtype_type) from pandas.core.dtypes.missing import ( isna, notna, array_equivalent, _isna_compat, @@ -629,10 +630,9 @@ def convert(self, copy=True, **kwargs): def _can_hold_element(self, element): """ require the same dtype as ourselves """ dtype = self.values.dtype.type - if is_list_like(element): - element = np.asarray(element) - tipo = element.dtype.type - return issubclass(tipo, dtype) + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, dtype) return isinstance(element, dtype) def _try_cast_result(self, result, dtype=None): @@ -1806,11 +1806,10 @@ class FloatBlock(FloatOrComplexBlock): _downcast_dtype = 'int64' def _can_hold_element(self, element): - if is_list_like(element): - element = np.asarray(element) - tipo = element.dtype.type - return (issubclass(tipo, (np.floating, np.integer)) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return (issubclass(tipo.type, (np.floating, np.integer)) and + not issubclass(tipo.type, (np.datetime64, np.timedelta64))) return (isinstance(element, (float, int, np.floating, np.int_)) and not isinstance(element, (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64))) @@ -1856,9 +1855,9 @@ class ComplexBlock(FloatOrComplexBlock): is_complex = True def _can_hold_element(self, element): - if is_list_like(element): - element = np.array(element) - return issubclass(element.dtype.type, + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating)) return (isinstance(element, (float, int, complex, np.float_, np.int_)) and @@ -1874,12 +1873,12 @@ class IntBlock(NumericBlock): _can_hold_na = False def _can_hold_element(self, element): - if is_list_like(element): - element = np.array(element) - tipo = element.dtype.type - return (issubclass(tipo, np.integer) and - not issubclass(tipo, (np.datetime64, np.timedelta64)) and - self.dtype.itemsize >= element.dtype.itemsize) + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return (issubclass(tipo.type, np.integer) and + not issubclass(tipo.type, (np.datetime64, + np.timedelta64)) and + self.dtype.itemsize >= tipo.itemsize) return is_integer(element) def should_store(self, value): @@ -1917,10 +1916,9 @@ def _box_func(self): return lambda x: tslib.Timedelta(x, unit='ns') def _can_hold_element(self, element): - if is_list_like(element): - element = np.array(element) - tipo = element.dtype.type - return issubclass(tipo, np.timedelta64) + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, np.timedelta64) return isinstance(element, (timedelta, np.timedelta64)) def fillna(self, value, **kwargs): @@ -2018,9 +2016,9 @@ class BoolBlock(NumericBlock): _can_hold_na = False def _can_hold_element(self, element): - if is_list_like(element): - element = np.asarray(element) - return issubclass(element.dtype.type, np.bool_) + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, np.bool_) return isinstance(element, (bool, np.bool_)) def should_store(self, value): @@ -2450,7 +2448,9 @@ def _astype(self, dtype, mgr=None, **kwargs): return super(DatetimeBlock, self)._astype(dtype=dtype, **kwargs) def _can_hold_element(self, element): - if is_list_like(element): + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + # TODO: this still uses asarray, instead of dtype.type element = np.array(element) return element.dtype == _NS_DTYPE or element.dtype == np.int64 return (is_integer(element) or isinstance(element, datetime) or diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index f40fc151676da1..c182db35c0c893 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -2,6 +2,7 @@ # pylint: disable=W0102 from datetime import datetime, date +import operator import sys import pytest import numpy as np @@ -1213,3 +1214,64 @@ def assert_add_equals(val, inc, result): with pytest.raises(ValueError): BlockPlacement(slice(2, None, -1)).add(-1) + + +class DummyElement(object): + def __init__(self, value, dtype): + self.value = value + self.dtype = np.dtype(dtype) + + def __array__(self): + return np.array(self.value, dtype=self.dtype) + + def __str__(self): + return "DummyElement({}, {})".format(self.value, self.dtype) + + def __repr__(self): + return str(self) + + def astype(self, dtype, copy=False): + self.dtype = dtype + return self + + def view(self, dtype): + return type(self)(self.value.view(dtype), dtype) + + def any(self, axis=None): + return bool(self.value) + + +class TestCanHoldElement(object): + @pytest.mark.parametrize('value, dtype', [ + (1, 'i8'), + (1.0, 'f8'), + (1j, 'complex128'), + (True, 'bool'), + (np.timedelta64(20, 'ns'), ' Date: Thu, 5 Oct 2017 08:18:19 -0400 Subject: [PATCH 24/76] DEPR: deprecate .get_value and .set_value for Series, DataFrame, Panel, SparseSeries, SparseDataFrame (#17739) closes #15269 --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/core/frame.py | 34 ++++++++++++++-- pandas/core/indexing.py | 10 ++--- pandas/core/panel.py | 30 ++++++++++++-- pandas/core/series.py | 30 ++++++++++++-- pandas/core/sparse/frame.py | 44 ++++++++++++++++++-- pandas/core/sparse/series.py | 29 +++++++++++++- pandas/tests/frame/test_api.py | 4 +- pandas/tests/frame/test_constructors.py | 16 ++++++-- pandas/tests/frame/test_indexing.py | 53 ++++++++++++++++++------- pandas/tests/series/test_indexing.py | 16 ++++++-- pandas/tests/sparse/test_frame.py | 20 +++++++--- pandas/tests/sparse/test_series.py | 13 ++++-- pandas/tests/test_panel.py | 25 +++++++----- 14 files changed, 264 insertions(+), 63 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d7789bfbfd04c6..66b44d4d391e1e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -666,7 +666,8 @@ Deprecations - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) - passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) -- Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +- Passing a non-existent column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +- ``.get_value`` and ``.set_value`` on ``Series``, ``DataFrame``, ``Panel``, ``SparseSeries``, and ``SparseDataFrame`` are deprecated in favor of using ``.iat[]`` or ``.at[]`` accessors (:issue:`15269`) .. _whatsnew_0210.deprecations.select: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 778a3dc9046a31..a77c002b625cb9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -297,7 +297,8 @@ def _constructor(self): return DataFrame _constructor_sliced = Series - _deprecations = NDFrame._deprecations | frozenset(['sortlevel']) + _deprecations = NDFrame._deprecations | frozenset( + ['sortlevel', 'get_value', 'set_value']) @property def _constructor_expanddim(self): @@ -1922,6 +1923,10 @@ def get_value(self, index, col, takeable=False): """ Quickly retrieve single value at passed column and index + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- index : row label @@ -1933,6 +1938,14 @@ def get_value(self, index, col, takeable=False): value : scalar value """ + warnings.warn("get_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._get_value(index, col, takeable=takeable) + + def _get_value(self, index, col, takeable=False): + if takeable: series = self._iget_item_cache(col) return _maybe_box_datetimelike(series._values[index]) @@ -1948,12 +1961,17 @@ def get_value(self, index, col, takeable=False): # use positional col = self.columns.get_loc(col) index = self.index.get_loc(index) - return self.get_value(index, col, takeable=True) + return self._get_value(index, col, takeable=True) + _get_value.__doc__ = get_value.__doc__ def set_value(self, index, col, value, takeable=False): """ Put single value at passed column and index + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- index : row label @@ -1967,10 +1985,17 @@ def set_value(self, index, col, value, takeable=False): If label pair is contained, will be reference to calling DataFrame, otherwise a new object """ + warnings.warn("set_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._set_value(index, col, value, takeable=takeable) + + def _set_value(self, index, col, value, takeable=False): try: if takeable is True: series = self._iget_item_cache(col) - return series.set_value(index, value, takeable=True) + return series._set_value(index, value, takeable=True) series = self._get_item_cache(col) engine = self.index._engine @@ -1983,6 +2008,7 @@ def set_value(self, index, col, value, takeable=False): self._item_cache.pop(col, None) return self + _set_value.__doc__ = set_value.__doc__ def _ixs(self, i, axis=0): """ @@ -2791,7 +2817,7 @@ def lookup(self, row_labels, col_labels): else: result = np.empty(n, dtype='O') for i, (r, c) in enumerate(zip(row_labels, col_labels)): - result[i] = self.get_value(r, c) + result[i] = self._get_value(r, c) if is_object_dtype(result): result = lib.maybe_convert_objects(result) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 199aa9cfca5067..f1a3fe81a45404 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -112,7 +112,7 @@ def __getitem__(self, key): key = tuple(com._apply_if_callable(x, self.obj) for x in key) try: - values = self.obj.get_value(*key) + values = self.obj._get_value(*key) if is_scalar(values): return values except Exception: @@ -1542,7 +1542,7 @@ def _is_scalar_access(self, key): def _getitem_scalar(self, key): # a fast-path to scalar access # if not, raise - values = self.obj.get_value(*key) + values = self.obj._get_value(*key) return values def _get_partial_string_timestamp_match_key(self, key, labels): @@ -1701,7 +1701,7 @@ def _is_scalar_access(self, key): def _getitem_scalar(self, key): # a fast-path to scalar access # if not, raise - values = self.obj.get_value(*key, takeable=True) + values = self.obj._get_value(*key, takeable=True) return values def _is_valid_integer(self, key, axis): @@ -1866,7 +1866,7 @@ def __getitem__(self, key): raise ValueError('Invalid call for scalar access (getting)!') key = self._convert_key(key) - return self.obj.get_value(*key, takeable=self._takeable) + return self.obj._get_value(*key, takeable=self._takeable) def __setitem__(self, key, value): if isinstance(key, tuple): @@ -1883,7 +1883,7 @@ def __setitem__(self, key, value): '(setting)!') key = list(self._convert_key(key, is_setter=True)) key.append(value) - self.obj.set_value(*key, takeable=self._takeable) + self.obj._set_value(*key, takeable=self._takeable) class _AtIndexer(_ScalarAccessIndexer): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 68733a3a8b94ed..fad6a39223b9e1 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -470,6 +470,10 @@ def get_value(self, *args, **kwargs): """ Quickly retrieve single value at (item, major, minor) location + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- item : item label (panel item) @@ -481,6 +485,13 @@ def get_value(self, *args, **kwargs): ------- value : scalar value """ + warnings.warn("get_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._get_value(*args, **kwargs) + + def _get_value(self, *args, **kwargs): nargs = len(args) nreq = self._AXIS_LEN @@ -500,12 +511,17 @@ def get_value(self, *args, **kwargs): else: lower = self._get_item_cache(args[0]) - return lower.get_value(*args[1:], takeable=takeable) + return lower._get_value(*args[1:], takeable=takeable) + _get_value.__doc__ = get_value.__doc__ def set_value(self, *args, **kwargs): """ Quickly set single value at (item, major, minor) location + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- item : item label (panel item) @@ -520,6 +536,13 @@ def set_value(self, *args, **kwargs): If label combo is contained, will be reference to calling Panel, otherwise a new object """ + warnings.warn("set_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._set_value(*args, **kwargs) + + def _set_value(self, *args, **kwargs): # require an arg for each axis and the value nargs = len(args) nreq = self._AXIS_LEN + 1 @@ -540,7 +563,7 @@ def set_value(self, *args, **kwargs): else: lower = self._get_item_cache(args[0]) - lower.set_value(*args[1:], takeable=takeable) + lower._set_value(*args[1:], takeable=takeable) return self except KeyError: axes = self._expand_axes(args) @@ -553,7 +576,8 @@ def set_value(self, *args, **kwargs): if made_bigger: maybe_cast_item(result, args[0], likely_dtype) - return result.set_value(*args) + return result._set_value(*args) + _set_value.__doc__ = set_value.__doc__ def _box_item_values(self, key, values): if self.ndim == values.ndim: diff --git a/pandas/core/series.py b/pandas/core/series.py index 58cac46f63d7ed..43b7f1d043e4d9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -147,7 +147,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _metadata = ['name'] _accessors = frozenset(['dt', 'cat', 'str']) _deprecations = generic.NDFrame._deprecations | frozenset( - ['sortlevel', 'reshape']) + ['sortlevel', 'reshape', 'get_value', 'set_value']) _allow_index_ops = True def __init__(self, data=None, index=None, dtype=None, name=None, @@ -902,6 +902,10 @@ def get_value(self, label, takeable=False): """ Quickly retrieve single value at passed index label + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- index : label @@ -911,9 +915,17 @@ def get_value(self, label, takeable=False): ------- value : scalar value """ + warnings.warn("get_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._get_value(label, takeable=takeable) + + def _get_value(self, label, takeable=False): if takeable is True: return _maybe_box_datetimelike(self._values[label]) return self.index.get_value(self._values, label) + _get_value.__doc__ = get_value.__doc__ def set_value(self, label, value, takeable=False): """ @@ -921,6 +933,10 @@ def set_value(self, label, value, takeable=False): new object is created with the label placed at the end of the result index + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- label : object @@ -935,17 +951,25 @@ def set_value(self, label, value, takeable=False): If label is contained, will be reference to calling Series, otherwise a new object """ + warnings.warn("set_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._set_value(label, value, takeable=takeable) + + def _set_value(self, label, value, takeable=False): try: if takeable: self._values[label] = value else: self.index._engine.set_value(self._values, label, value) - return self except KeyError: # set using a non-recursive method self.loc[label] = value - return self + + return self + _set_value.__doc__ = set_value.__doc__ def reset_index(self, level=None, drop=False, name=None, inplace=False): """ diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 7aa49efa82f610..a43aad9a0204eb 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -5,6 +5,7 @@ from __future__ import division # pylint: disable=E1101,E1103,W0231,E0202 +import warnings from pandas.compat import lmap from pandas import compat import numpy as np @@ -430,19 +431,47 @@ def __getitem__(self, key): else: return self._get_item_cache(key) - @Appender(DataFrame.get_value.__doc__, indents=0) def get_value(self, index, col, takeable=False): + """ + Quickly retrieve single value at passed column and index + + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + + Parameters + ---------- + index : row label + col : column label + takeable : interpret the index/col as indexers, default False + + Returns + ------- + value : scalar value + """ + warnings.warn("get_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._get_value(index, col, takeable=takeable) + + def _get_value(self, index, col, takeable=False): if takeable is True: series = self._iget_item_cache(col) else: series = self._get_item_cache(col) - return series.get_value(index, takeable=takeable) + return series._get_value(index, takeable=takeable) + _get_value.__doc__ = get_value.__doc__ def set_value(self, index, col, value, takeable=False): """ Put single value at passed column and index + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- index : row label @@ -460,9 +489,18 @@ def set_value(self, index, col, value, takeable=False): ------- frame : DataFrame """ - dense = self.to_dense().set_value(index, col, value, takeable=takeable) + warnings.warn("set_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._set_value(index, col, value, takeable=takeable) + + def _set_value(self, index, col, value, takeable=False): + dense = self.to_dense()._set_value( + index, col, value, takeable=takeable) return dense.to_sparse(kind=self._default_kind, fill_value=self._default_fill_value) + _set_value.__doc__ = set_value.__doc__ def _slice(self, slobj, axis=0, kind=None): if axis == 0: diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 5166dc927989e5..3255bd6bd17e8c 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -426,7 +426,7 @@ def _get_values(self, indexer): return self[indexer] def _set_with_engine(self, key, value): - return self.set_value(key, value) + return self._set_value(key, value) def abs(self): """ @@ -466,6 +466,10 @@ def get_value(self, label, takeable=False): """ Retrieve single value at passed index label + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- index : label @@ -475,8 +479,17 @@ def get_value(self, label, takeable=False): ------- value : scalar value """ + warnings.warn("get_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + + return self._get_value(label, takeable=takeable) + + def _get_value(self, label, takeable=False): loc = label if takeable is True else self.index.get_loc(label) return self._get_val_at(loc) + _get_value.__doc__ = get_value.__doc__ def set_value(self, label, value, takeable=False): """ @@ -484,6 +497,10 @@ def set_value(self, label, value, takeable=False): new object is created with the label placed at the end of the result index + .. deprecated:: 0.21.0 + + Please use .at[] or .iat[] accessors. + Parameters ---------- label : object @@ -501,11 +518,18 @@ def set_value(self, label, value, takeable=False): ------- series : SparseSeries """ + warnings.warn("set_value is deprecated and will be removed " + "in a future release. Please use " + ".at[] or .iat[] accessors instead", FutureWarning, + stacklevel=2) + return self._set_value(label, value, takeable=takeable) + + def _set_value(self, label, value, takeable=False): values = self.to_dense() # if the label doesn't exist, we will create a new object here # and possibily change the index - new_values = values.set_value(label, value, takeable=takeable) + new_values = values._set_value(label, value, takeable=takeable) if new_values is not None: values = new_values new_index = values.index @@ -513,6 +537,7 @@ def set_value(self, label, value, takeable=False): kind=self.kind) self._data = SingleBlockManager(values, new_index) self._index = new_index + _set_value.__doc__ = set_value.__doc__ def _set_values(self, key, value): diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 5ea8230ced41b9..be6d81c63ae1ef 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -69,7 +69,9 @@ def test_getitem_pop_assign_name(self): def test_get_value(self): for idx in self.frame.index: for col in self.frame.columns: - result = self.frame.get_value(idx, col) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.frame.get_value(idx, col) expected = self.frame[col][idx] tm.assert_almost_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d0cd1899a0a3c5..7f1cc12ec42774 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -258,8 +258,12 @@ def test_constructor_dict(self): # Dict with None value frame_none = DataFrame(dict(a=None), index=[0]) frame_none_list = DataFrame(dict(a=[None]), index=[0]) - assert frame_none.get_value(0, 'a') is None - assert frame_none_list.get_value(0, 'a') is None + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert frame_none.get_value(0, 'a') is None + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert frame_none_list.get_value(0, 'a') is None tm.assert_frame_equal(frame_none, frame_none_list) # GH10856 @@ -509,7 +513,9 @@ def test_nested_dict_frame_constructor(self): data = {} for col in df.columns: for row in df.index: - data.setdefault(col, {})[row] = df.get_value(row, col) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + data.setdefault(col, {})[row] = df.get_value(row, col) result = DataFrame(data, columns=rng) tm.assert_frame_equal(result, df) @@ -517,7 +523,9 @@ def test_nested_dict_frame_constructor(self): data = {} for col in df.columns: for row in df.index: - data.setdefault(row, {})[col] = df.get_value(row, col) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + data.setdefault(row, {})[col] = df.get_value(row, col) result = DataFrame(data, index=rng).T tm.assert_frame_equal(result, df) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index dd2759cd3ef8ed..d00f56830a6fa7 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1629,7 +1629,9 @@ def test_getitem_list_duplicates(self): def test_get_value(self): for idx in self.frame.index: for col in self.frame.columns: - result = self.frame.get_value(idx, col) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.frame.get_value(idx, col) expected = self.frame[col][idx] assert result == expected @@ -1637,7 +1639,9 @@ def test_lookup(self): def alt(df, rows, cols, dtype): result = [] for r, c in zip(rows, cols): - result.append(df.get_value(r, c)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result.append(df.get_value(r, c)) return np.array(result, dtype=dtype) def testit(df): @@ -1671,32 +1675,48 @@ def testit(df): def test_set_value(self): for idx in self.frame.index: for col in self.frame.columns: - self.frame.set_value(idx, col, 1) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.frame.set_value(idx, col, 1) assert self.frame[col][idx] == 1 def test_set_value_resize(self): - res = self.frame.set_value('foobar', 'B', 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = self.frame.set_value('foobar', 'B', 0) assert res is self.frame assert res.index[-1] == 'foobar' - assert res.get_value('foobar', 'B') == 0 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert res.get_value('foobar', 'B') == 0 self.frame.loc['foobar', 'qux'] = 0 - assert self.frame.get_value('foobar', 'qux') == 0 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert self.frame.get_value('foobar', 'qux') == 0 res = self.frame.copy() - res3 = res.set_value('foobar', 'baz', 'sam') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res3 = res.set_value('foobar', 'baz', 'sam') assert res3['baz'].dtype == np.object_ res = self.frame.copy() - res3 = res.set_value('foobar', 'baz', True) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res3 = res.set_value('foobar', 'baz', True) assert res3['baz'].dtype == np.object_ res = self.frame.copy() - res3 = res.set_value('foobar', 'baz', 5) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res3 = res.set_value('foobar', 'baz', 5) assert is_float_dtype(res3['baz']) assert isna(res3['baz'].drop(['foobar'])).all() - pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): df_orig = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC')) @@ -1704,7 +1724,9 @@ def test_set_value_with_index_dtype_change(self): # this is actually ambiguous as the 2 is interpreted as a positional # so column is not created df = df_orig.copy() - df.set_value('C', 2, 1.0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df.set_value('C', 2, 1.0) assert list(df.index) == list(df_orig.index) + ['C'] # assert list(df.columns) == list(df_orig.columns) + [2] @@ -1715,7 +1737,9 @@ def test_set_value_with_index_dtype_change(self): # create both new df = df_orig.copy() - df.set_value('C', 'D', 1.0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + df.set_value('C', 'D', 1.0) assert list(df.index) == list(df_orig.index) + ['C'] assert list(df.columns) == list(df_orig.columns) + ['D'] @@ -1728,8 +1752,9 @@ def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) df = DataFrame(index=index, columns=lrange(4)) - pytest.raises(KeyError, df.get_value, 0, 1) - # pytest.raises(KeyError, df.set_value, 0, 1, 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + pytest.raises(KeyError, df.get_value, 0, 1) def test_single_element_ix_dont_upcast(self): self.frame['E'] = 1 diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 93e7b81163b549..0ca319565e24bd 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -800,13 +800,17 @@ def test_setitem_dtypes(self): def test_set_value(self): idx = self.ts.index[10] - res = self.ts.set_value(idx, 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = self.ts.set_value(idx, 0) assert res is self.ts assert self.ts[idx] == 0 # equiv s = self.series.copy() - res = s.set_value('foobar', 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = s.set_value('foobar', 0) assert res is s assert res.index[-1] == 'foobar' assert res['foobar'] == 0 @@ -2632,8 +2636,12 @@ def test_series_set_value(self): dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] index = DatetimeIndex(dates) - s = Series().set_value(dates[0], 1.) - s2 = s.set_value(dates[1], np.nan) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series().set_value(dates[0], 1.) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s2 = s.set_value(dates[1], np.nan) exp = Series([1., np.nan], index=index) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index ed4a3a9e5f75f8..ef94e2f78278db 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -430,22 +430,32 @@ def test_set_value(self): # ok, as the index gets converted to object frame = self.frame.copy() - res = frame.set_value('foobar', 'B', 1.5) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = frame.set_value('foobar', 'B', 1.5) assert res.index.dtype == 'object' res = self.frame res.index = res.index.astype(object) - res = self.frame.set_value('foobar', 'B', 1.5) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res = self.frame.set_value('foobar', 'B', 1.5) assert res is not self.frame assert res.index[-1] == 'foobar' - assert res.get_value('foobar', 'B') == 1.5 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert res.get_value('foobar', 'B') == 1.5 - res2 = res.set_value('foobar', 'qux', 1.5) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + res2 = res.set_value('foobar', 'qux', 1.5) assert res2 is not res tm.assert_index_equal(res2.columns, pd.Index(list(self.frame.columns) + ['qux'])) - assert res2.get_value('foobar', 'qux') == 1.5 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + assert res2.get_value('foobar', 'qux') == 1.5 def test_fancy_index_misc(self): # axis = 0 diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 13dab68b2e5b4f..7c7399317809f4 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -465,15 +465,22 @@ def test_get_get_value(self): expected = self.btseries.to_dense()[dt] tm.assert_almost_equal(result, expected) - tm.assert_almost_equal(self.bseries.get_value(10), self.bseries[10]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + tm.assert_almost_equal( + self.bseries.get_value(10), self.bseries[10]) def test_set_value(self): idx = self.btseries.index[7] - self.btseries.set_value(idx, 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.btseries.set_value(idx, 0) assert self.btseries[idx] == 0 - self.iseries.set_value('foobar', 0) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.iseries.set_value('foobar', 0) assert self.iseries.index[-1] == 'foobar' assert self.iseries['foobar'] == 0 diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index a6113f231f8f20..c8e056f156218f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -405,7 +405,9 @@ def test_get_value(self): for item in self.panel.items: for mjr in self.panel.major_axis[::2]: for mnr in self.panel.minor_axis: - result = self.panel.get_value(item, mjr, mnr) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = self.panel.get_value(item, mjr, mnr) expected = self.panel[item][mnr][mjr] assert_almost_equal(result, expected) @@ -867,16 +869,17 @@ def test_comp(func): test_comp(operator.le) def test_get_value(self): - for item in self.panel.items: - for mjr in self.panel.major_axis[::2]: - for mnr in self.panel.minor_axis: - result = self.panel.get_value(item, mjr, mnr) - expected = self.panel[item][mnr][mjr] - assert_almost_equal(result, expected) - with tm.assert_raises_regex(TypeError, - "There must be an argument " - "for each axis"): - self.panel.get_value('a') + with catch_warnings(record=True): + for item in self.panel.items: + for mjr in self.panel.major_axis[::2]: + for mnr in self.panel.minor_axis: + result = self.panel.get_value(item, mjr, mnr) + expected = self.panel[item][mnr][mjr] + assert_almost_equal(result, expected) + with tm.assert_raises_regex(TypeError, + "There must be an argument " + "for each axis"): + self.panel.get_value('a') def test_set_value(self): with catch_warnings(record=True): From 3c1923287cf1365ef653efa4abb5c6a4c0a9bd1c Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 5 Oct 2017 13:28:21 -0400 Subject: [PATCH 25/76] DOC: Column indexes should use the same metadata as columns (#17795) * DOC: Column indexes should use the same metadata as columns * Fix metadata field name * Remove column_index_names --- doc/source/developer.rst | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/source/developer.rst b/doc/source/developer.rst index a695366d9ada3f..9c214020ab43dd 100644 --- a/doc/source/developer.rst +++ b/doc/source/developer.rst @@ -45,20 +45,19 @@ So that a ``pandas.DataFrame`` can be faithfully reconstructed, we store a .. code-block:: text {'index_columns': ['__index_level_0__', '__index_level_1__', ...], - 'column_index_names': [, , ...], - 'column_index_dtypes': [, , ..., ] + 'column_indexes': [, , ..., ], 'columns': [, , ...], 'pandas_version': $VERSION} -Here, ```` and so forth are dictionaries containing the metadata for each -column. This has JSON form: +Here, ````/```` and so forth are dictionaries containing the metadata +for each column. This has JSON form: .. code-block:: text {'name': column_name, 'pandas_type': pandas_type, 'numpy_type': numpy_type, - 'metadata': type_metadata} + 'metadata': metadata} ``pandas_type`` is the logical type of the column, and is one of: @@ -75,7 +74,7 @@ result of ``str(dtype)`` for the underlying NumPy array that holds the data. So for ``datetimetz`` this is ``datetime64[ns]`` and for categorical, it may be any of the supported integer categorical types. -The ``type_metadata`` is ``None`` except for: +The ``metadata`` field is ``None`` except for: * ``datetimetz``: ``{'timezone': zone, 'unit': 'ns'}``, e.g. ``{'timezone', 'America/New_York', 'unit': 'ns'}``. The ``'unit'`` is optional, and if @@ -108,8 +107,12 @@ As an example of fully-formed metadata: .. code-block:: text {'index_columns': ['__index_level_0__'], - 'column_index_names': [None], - 'column_index_dtypes': ['object'], + 'column_indexes': [ + {'name': None, + 'pandas_type': 'string', + 'numpy_type': 'object', + 'metadata': None} + ], 'columns': [ {'name': 'c0', 'pandas_type': 'int8', From e7bb63c4a4f4e5ca04481e4957f1c1394fdcd598 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 5 Oct 2017 14:35:19 -0400 Subject: [PATCH 26/76] Revert "CI: pin pytables to valid build (#17760)" (#17768) This reverts commit 6d30d5f425ddfaf143b8bd878f81395852b50cd9. --- ci/requirements-3.6.run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 721d0c1ad81018..822144a80bc9a0 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -7,7 +7,7 @@ xlsxwriter xlrd xlwt numexpr -pytables=3.4.2=np113py36_1 +pytables matplotlib lxml html5lib From 7740a6ea71c1e414728a5f81b3e49f066fd7d69e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Oct 2017 13:35:55 -0500 Subject: [PATCH 27/76] API: Change str for CategoricalDtype to category (#17783) --- doc/source/whatsnew/v0.21.0.txt | 9 ++++----- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 8 ++++++++ pandas/tests/series/test_analytics.py | 13 ++----------- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 66b44d4d391e1e..1fe3bed6df2182 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -157,11 +157,10 @@ The values have been correctly interpreted as integers. The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a ``Series`` with categorical type will now return an instance of -``CategoricalDtype``. For the most part, this is backwards compatible, though -the string repr has changed. If you were previously using ``str(s.dtype) == -'category'`` to detect categorical data, switch to -:func:`pandas.api.types.is_categorical_dtype`, which is compatible with the old -and new ``CategoricalDtype``. +``CategoricalDtype``. This change should be backwards compatible, though the +repr has changed. ``str(CategoricalDtype())`` is still the string +``'category'``, but the preferred way to detect categorical data is to use +:func:`pandas.api.types.is_categorical_dtype`. See the :ref:`CategoricalDtype docs ` for more. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d2487905caced2..4d97b7d17a6dc2 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -220,7 +220,7 @@ def __eq__(self, other): # both unordered; this could probably be optimized / cached return hash(self) == hash(other) - def __unicode__(self): + def __repr__(self): tpl = u'CategoricalDtype(categories={}ordered={})' if self.categories is None: data = u"None, " diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index be3e5fdc467d3e..0b9e2c9fe5ffc7 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import re import pytest from itertools import product @@ -649,3 +650,10 @@ def test_from_categorical_dtype_both(self): result = CategoricalDtype._from_categorical_dtype( c1, categories=[1, 2], ordered=False) assert result == CategoricalDtype([1, 2], ordered=False) + + def test_str_vs_repr(self): + c1 = CategoricalDtype(['a', 'b']) + assert str(c1) == 'category' + # Py2 will have unicode prefixes + pat = r"CategoricalDtype\(categories=\[.*\], ordered=False\)" + assert re.match(pat, repr(c1)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 9f5e4f2ac4b6e6..6495d748e38233 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1784,7 +1784,8 @@ class TestNLargestNSmallest(object): # not supported on some archs # Series([3., 2, 1, 2, 5], dtype='complex256'), Series([3., 2, 1, 2, 5], dtype='complex128'), - Series(list('abcde'))]) + Series(list('abcde')), + Series(list('abcde'), dtype='category')]) def test_error(self, r): dt = r.dtype msg = ("Cannot use method 'n(larg|small)est' with " @@ -1795,16 +1796,6 @@ def test_error(self, r): with tm.assert_raises_regex(TypeError, msg): method(arg) - def test_error_categorical_dtype(self): - # same as test_error, but regex hard to escape properly - msg = ("Cannot use method 'n(larg|small)est' with dtype " - "CategoricalDtype.+") - with tm.assert_raises_regex(TypeError, msg): - Series(list('ab'), dtype='category').nlargest(2) - - with tm.assert_raises_regex(TypeError, msg): - Series(list('ab'), dtype='category').nsmallest(2) - @pytest.mark.parametrize( "s", [v for k, v in s_main_dtypes().iteritems()]) From 7a57b83a8c1e362920e5e358c21d2a090c8706aa Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 5 Oct 2017 14:36:28 -0400 Subject: [PATCH 28/76] DEPR: deprecate raise_on_error in .where/.mask in favor of errors= (#17744) closes #14968 --- doc/source/whatsnew/v0.21.0.txt | 3 +- pandas/core/computation/expressions.py | 38 ++++++++-------------- pandas/core/frame.py | 10 +++--- pandas/core/generic.py | 45 ++++++++++++++++++++++---- pandas/core/internals.py | 43 +++++++++++++++--------- pandas/core/ops.py | 14 +++----- pandas/core/panel.py | 2 +- pandas/core/series.py | 2 +- pandas/core/sparse/frame.py | 2 +- pandas/tests/series/test_indexing.py | 11 +++++++ pandas/tests/series/test_missing.py | 6 ++-- pandas/tests/test_expressions.py | 1 + 12 files changed, 108 insertions(+), 69 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1fe3bed6df2182..7fbf2533428dc4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -665,8 +665,9 @@ Deprecations - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) - passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) -- Passing a non-existent column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) - ``.get_value`` and ``.set_value`` on ``Series``, ``DataFrame``, ``Panel``, ``SparseSeries``, and ``SparseDataFrame`` are deprecated in favor of using ``.iat[]`` or ``.at[]`` accessors (:issue:`15269`) +- Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +- ``raise_on_error`` parameter to :func:`Series.where`, :func:`Series.mask`, :func:`DataFrame.where`, :func:`DataFrame.mask` is deprecated, in favor of ``errors=`` (:issue:`14968`) .. _whatsnew_0210.deprecations.select: diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 2196fb5917a440..c74da6379e32f3 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -56,7 +56,7 @@ def set_numexpr_threads(n=None): ne.set_num_threads(n) -def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs): +def _evaluate_standard(op, op_str, a, b, **eval_kwargs): """ standard evaluation """ if _TEST_MODE: _store_test_result(False) @@ -89,7 +89,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): return False -def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, +def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwargs): result = None @@ -111,25 +111,22 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, except ValueError as detail: if 'unknown type object' in str(detail): pass - except Exception as detail: - if raise_on_error: - raise if _TEST_MODE: _store_test_result(result is not None) if result is None: - result = _evaluate_standard(op, op_str, a, b, raise_on_error) + result = _evaluate_standard(op, op_str, a, b) return result -def _where_standard(cond, a, b, raise_on_error=True): +def _where_standard(cond, a, b): return np.where(_values_from_object(cond), _values_from_object(a), _values_from_object(b)) -def _where_numexpr(cond, a, b, raise_on_error=False): +def _where_numexpr(cond, a, b): result = None if _can_use_numexpr(None, 'where', a, b, 'where'): @@ -147,11 +144,10 @@ def _where_numexpr(cond, a, b, raise_on_error=False): if 'unknown type object' in str(detail): pass except Exception as detail: - if raise_on_error: - raise TypeError(str(detail)) + raise TypeError(str(detail)) if result is None: - result = _where_standard(cond, a, b, raise_on_error) + result = _where_standard(cond, a, b) return result @@ -189,7 +185,7 @@ def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')), return True -def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, +def evaluate(op, op_str, a, b, use_numexpr=True, **eval_kwargs): """ evaluate and return the expression of the op on a and b @@ -200,19 +196,16 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, op_str: the string version of the op a : left operand b : right operand - raise_on_error : pass the error to the higher level if indicated - (default is False), otherwise evaluate the op with and - return the results use_numexpr : whether to try to use numexpr (default True) """ + use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b) if use_numexpr: - return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, - **eval_kwargs) - return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error) + return _evaluate(op, op_str, a, b, **eval_kwargs) + return _evaluate_standard(op, op_str, a, b) -def where(cond, a, b, raise_on_error=False, use_numexpr=True): +def where(cond, a, b, use_numexpr=True): """ evaluate the where condition cond on a and b Parameters @@ -221,15 +214,12 @@ def where(cond, a, b, raise_on_error=False, use_numexpr=True): cond : a boolean array a : return if cond is True b : return if cond is False - raise_on_error : pass the error to the higher level if indicated - (default is False), otherwise evaluate the op with and - return the results use_numexpr : whether to try to use numexpr (default True) """ if use_numexpr: - return _where(cond, a, b, raise_on_error=raise_on_error) - return _where_standard(cond, a, b, raise_on_error=raise_on_error) + return _where(cond, a, b) + return _where_standard(cond, a, b) def set_test_mode(v=True): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a77c002b625cb9..142ccf1f034bc6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3862,9 +3862,9 @@ def _combine_match_columns(self, other, func, level=None, try_cast=try_cast) return self._constructor(new_data) - def _combine_const(self, other, func, raise_on_error=True, try_cast=True): + def _combine_const(self, other, func, errors='raise', try_cast=True): new_data = self._data.eval(func=func, other=other, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast) return self._constructor(new_data) @@ -4035,8 +4035,7 @@ def combiner(x, y, needs_i8_conversion=False): else: mask = isna(x_values) - return expressions.where(mask, y_values, x_values, - raise_on_error=True) + return expressions.where(mask, y_values, x_values) return self.combine(other, combiner, overwrite=False) @@ -4091,8 +4090,7 @@ def update(self, other, join='left', overwrite=True, filter_func=None, if mask.all(): continue - self[col] = expressions.where(mask, this, that, - raise_on_error=True) + self[col] = expressions.where(mask, this, that) # ---------------------------------------------------------------------- # Misc methods diff --git a/pandas/core/generic.py b/pandas/core/generic.py index eecdd8a6109e9f..942a9ff2790927 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5758,7 +5758,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, return left.__finalize__(self), right.__finalize__(other) def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, - try_cast=False, raise_on_error=True): + errors='raise', try_cast=False): """ Equivalent to public method `where`, except that `other` is not applied as a function even if callable. Used in __setitem__. @@ -5887,7 +5887,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, else: new_data = self._data.where(other=other, cond=cond, align=align, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast, axis=block_axis, transpose=self._AXIS_REVERSED) @@ -5924,12 +5924,21 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, Whether to perform the operation in place on the data axis : alignment axis if needed, default None level : alignment level if needed, default None + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + Note that currently this parameter won't affect + the results and will always coerce to a suitable dtype. + try_cast : boolean, default False try to cast the result back to the input type (if possible), raise_on_error : boolean, default True Whether to raise on invalid data types (e.g. trying to where on strings) + .. deprecated:: 0.21.0 + Returns ------- wh : same type as caller @@ -6005,24 +6014,46 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, cond_rev="False", name='where', name_other='mask')) def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, - try_cast=False, raise_on_error=True): + errors='raise', try_cast=False, raise_on_error=None): + + if raise_on_error is not None: + warnings.warn( + "raise_on_error is deprecated in " + "favor of errors='raise|ignore'", + FutureWarning, stacklevel=2) + + if raise_on_error: + errors = 'raise' + else: + errors = 'ignore' other = com._apply_if_callable(other, self) - return self._where(cond, other, inplace, axis, level, try_cast, - raise_on_error) + return self._where(cond, other, inplace, axis, level, + errors=errors, try_cast=try_cast) @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="False", cond_rev="True", name='mask', name_other='where')) def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, - try_cast=False, raise_on_error=True): + errors='raise', try_cast=False, raise_on_error=None): + + if raise_on_error is not None: + warnings.warn( + "raise_on_error is deprecated in " + "favor of errors='raise|ignore'", + FutureWarning, stacklevel=2) + + if raise_on_error: + errors = 'raise' + else: + errors = 'ignore' inplace = validate_bool_kwarg(inplace, 'inplace') cond = com._apply_if_callable(cond, self) return self.where(~cond, other=other, inplace=inplace, axis=axis, level=level, try_cast=try_cast, - raise_on_error=raise_on_error) + errors=errors) _shared_docs['shift'] = (""" Shift index by desired number of periods with an optional time freq diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 90de4ded18f8c7..a8f1a0c78c2384 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -533,10 +533,16 @@ def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs): **kwargs) def _astype(self, dtype, copy=False, errors='raise', values=None, - klass=None, mgr=None, raise_on_error=False, **kwargs): + klass=None, mgr=None, **kwargs): """ - Coerce to the new type (if copy=True, return a new copy) - raise on an except if raise == True + Coerce to the new type + + dtype : str, dtype convertible + copy : boolean, default False + copy if indicated + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object """ errors_legal_values = ('raise', 'ignore') @@ -1248,7 +1254,7 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block(new_values, fastpath=True)] - def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): + def eval(self, func, other, errors='raise', try_cast=False, mgr=None): """ evaluate the block; return result block from the result @@ -1256,8 +1262,10 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): ---------- func : how to combine self, other other : a ndarray/object - raise_on_error : if True, raise when I can't perform the function, - False by default (and just return the data that we had coming in) + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + try_cast : try casting the results to the input type Returns @@ -1295,7 +1303,7 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None): except TypeError: block = self.coerce_to_target_dtype(orig_other) return block.eval(func, orig_other, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast, mgr=mgr) # get the result, may need to transpose the other @@ -1337,7 +1345,7 @@ def get_result(other): # error handler if we have an issue operating with the function def handle_error(): - if raise_on_error: + if errors == 'raise': # The 'detail' variable is defined in outer scope. raise TypeError('Could not operate %s with block values %s' % (repr(other), str(detail))) # noqa @@ -1383,7 +1391,7 @@ def handle_error(): result = _block_shape(result, ndim=self.ndim) return [self.make_block(result, fastpath=True, )] - def where(self, other, cond, align=True, raise_on_error=True, + def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): """ evaluate the block; return result block(s) from the result @@ -1393,8 +1401,10 @@ def where(self, other, cond, align=True, raise_on_error=True, other : a ndarray/object cond : the condition to respect align : boolean, perform alignment on other/cond - raise_on_error : if True, raise when I can't perform the function, - False by default (and just return the data that we had coming in) + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + axis : int transpose : boolean Set to True if self is stored with axes reversed @@ -1404,6 +1414,7 @@ def where(self, other, cond, align=True, raise_on_error=True, a new block(s), the result of the func """ import pandas.core.computation.expressions as expressions + assert errors in ['raise', 'ignore'] values = self.values orig_other = other @@ -1436,9 +1447,9 @@ def func(cond, values, other): try: return self._try_coerce_result(expressions.where( - cond, values, other, raise_on_error=True)) + cond, values, other)) except Exception as detail: - if raise_on_error: + if errors == 'raise': raise TypeError('Could not operate [%s] with block values ' '[%s]' % (repr(other), str(detail))) else: @@ -1454,10 +1465,10 @@ def func(cond, values, other): except TypeError: # we cannot coerce, return a compat dtype - # we are explicity ignoring raise_on_error here + # we are explicity ignoring errors block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond, align=align, - raise_on_error=raise_on_error, + errors=errors, try_cast=try_cast, axis=axis, transpose=transpose) return self._maybe_downcast(blocks, 'infer') @@ -2745,7 +2756,7 @@ def sp_index(self): def kind(self): return self.values.kind - def _astype(self, dtype, copy=False, raise_on_error=True, values=None, + def _astype(self, dtype, copy=False, errors='raise', values=None, klass=None, mgr=None, **kwargs): if values is None: values = self.values diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 506b9267f32b4f..f0bd2477eec07f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -671,8 +671,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, **eval_kwargs) + result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = find_common_type([x.dtype, y.dtype]) @@ -1196,8 +1195,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, **eval_kwargs) + result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, ABCSeries)): @@ -1329,7 +1327,7 @@ def f(self, other): # straight boolean comparisions we want to allow all columns # (regardless of dtype to pass thru) See #4537 for discussion. res = self._combine_const(other, func, - raise_on_error=False, + errors='ignore', try_cast=False) return res.fillna(True).astype(bool) @@ -1354,8 +1352,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, **eval_kwargs) + result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: # TODO: might need to find_common_type here? @@ -1385,8 +1382,7 @@ def na_op(x, y): import pandas.core.computation.expressions as expressions try: - result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True) + result = expressions.evaluate(op, str_rep, x, y) except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=bool) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index fad6a39223b9e1..14fba9560cae25 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1527,7 +1527,7 @@ def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, - raise_on_error=True, + errors='raise', **eval_kwargs) except TypeError: result = op(x, y) diff --git a/pandas/core/series.py b/pandas/core/series.py index 43b7f1d043e4d9..49b6a6651367b8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -253,7 +253,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, # create/copy the manager if isinstance(data, SingleBlockManager): if dtype is not None: - data = data.astype(dtype=dtype, raise_on_error=False, + data = data.astype(dtype=dtype, errors='ignore', copy=copy) elif copy: data = data.copy() diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index a43aad9a0204eb..1b45b180b8dc12 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -638,7 +638,7 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None, new_data, index=self.index, columns=union, default_fill_value=self.default_fill_value).__finalize__(self) - def _combine_const(self, other, func, raise_on_error=True, try_cast=True): + def _combine_const(self, other, func, errors='raise', try_cast=True): return self._apply_columns(lambda x: func(x, other)) def _reindex_index(self, index, method, copy, level, fill_value=np.nan, diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 0ca319565e24bd..75ae47ed2fdc11 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1096,6 +1096,17 @@ def test_take(self): with tm.assert_produces_warning(FutureWarning): s.take([-1, 3, 4], convert=False) + def test_where_raise_on_error_deprecation(self): + + # gh-14968 + # deprecation of raise_on_error + s = Series(np.random.randn(5)) + cond = s > 0 + with tm.assert_produces_warning(FutureWarning): + s.where(cond, raise_on_error=True) + with tm.assert_produces_warning(FutureWarning): + s.mask(cond, raise_on_error=True) + def test_where(self): s = Series(np.random.randn(5)) cond = s > 0 diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 01bf7274fd3849..bd4e8b23f31b4b 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -292,15 +292,15 @@ def test_fillna_consistency(self): dtype='object') assert_series_equal(result, expected) - # where (we ignore the raise_on_error) + # where (we ignore the errors=) result = s.where([True, False], Timestamp('20130101', tz='US/Eastern'), - raise_on_error=False) + errors='ignore') assert_series_equal(result, expected) result = s.where([True, False], Timestamp('20130101', tz='US/Eastern'), - raise_on_error=True) + errors='ignore') assert_series_equal(result, expected) # with a non-datetime diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2b972477ae999d..6d2607962dfb0b 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -124,6 +124,7 @@ def run_binary(self, df, other, assert_func, test_flex=False, expr._MIN_ELEMENTS = 0 expr.set_test_mode(True) operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne'] + for arith in operations: if test_flex: op = lambda x, y: getattr(df, arith)(y) From 22515f58c178cdb5cd38c4e56f26dc91c7053550 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Oct 2017 13:37:16 -0500 Subject: [PATCH 29/76] CI: Unpin Miniconda for CI (#17752) --- appveyor.yml | 2 +- ci/install.ps1 | 4 ++-- ci/install_circle.sh | 6 ++---- ci/install_travis.sh | 10 +++------- ci/requirements-2.7.run | 2 +- ci/requirements-2.7_LOCALE.run | 2 +- ci/requirements-2.7_WIN.run | 2 +- 7 files changed, 11 insertions(+), 17 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index f1259f271ee395..a1f8886f6d068f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -59,7 +59,7 @@ install: # install our build environment - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false - # - cmd: conda update -q conda + - cmd: conda update -q conda - cmd: conda config --set ssl_verify false # add the pandas channel *before* defaults to have defaults take priority diff --git a/ci/install.ps1 b/ci/install.ps1 index b784b4ebf5e6ac..64ec7f81884cd1 100644 --- a/ci/install.ps1 +++ b/ci/install.ps1 @@ -7,7 +7,7 @@ $MINICONDA_URL = "http://repo.continuum.io/miniconda/" function DownloadMiniconda ($python_version, $platform_suffix) { $webclient = New-Object System.Net.WebClient - $filename = "Miniconda3-4.3.21-Windows-" + $platform_suffix + ".exe" + $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe" $url = $MINICONDA_URL + $filename $basedir = $pwd.Path + "\" @@ -85,7 +85,7 @@ function UpdateConda ($python_home) { function main () { InstallMiniconda "3.5" $env:PYTHON_ARCH $env:CONDA_ROOT - # UpdateConda $env:CONDA_ROOT + UpdateConda $env:CONDA_ROOT InstallCondaPackages $env:CONDA_ROOT "conda-build jinja2 anaconda-client" } diff --git a/ci/install_circle.sh b/ci/install_circle.sh index eba98be561397d..fd79f907625e9d 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -10,9 +10,7 @@ echo "[Using clean Miniconda install]" rm -rf "$MINICONDA_DIR" # install miniconda -# wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1 -# Pin miniconda -wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-Linux-x86_64.sh -q -O miniconda.sh || exit 1 +wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1 bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 export PATH="$MINICONDA_DIR/bin:$PATH" @@ -20,7 +18,7 @@ export PATH="$MINICONDA_DIR/bin:$PATH" echo "[update conda]" conda config --set ssl_verify false || exit 1 conda config --set always_yes true --set changeps1 false || exit 1 -# conda update -q conda +conda update -q conda # add the pandas channel to take priority # to add extra packages diff --git a/ci/install_travis.sh b/ci/install_travis.sh index faf404ddcd2931..b85263daa1eaca 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -34,13 +34,9 @@ fi # install miniconda if [ "${TRAVIS_OS_NAME}" == "osx" ]; then - # temporarily pin miniconda - # time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 - time wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-MacOSX-x86_64.sh -O miniconda.sh || exit 1 + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 else - # temporarily pin miniconda - # time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 - time wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-Linux-x86_64.sh -O miniconda.sh || exit 1 + time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 fi time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 @@ -52,7 +48,7 @@ echo echo "[update conda]" conda config --set ssl_verify false || exit 1 conda config --set quiet true --set always_yes true --set changeps1 false || exit 1 -# conda update -q conda +conda update -q conda echo echo "[add channels]" diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 7152cb2c8b605f..a68e1d256058df 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -8,7 +8,7 @@ matplotlib openpyxl=1.6.2 xlrd=0.9.2 sqlalchemy=0.9.6 -lxml=3.2.1 +lxml scipy xlsxwriter=0.5.2 s3fs diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 00006106f7009c..978bbf6a051c51 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -8,5 +8,5 @@ xlrd=0.9.2 bottleneck=1.0.0 matplotlib=1.4.3 sqlalchemy=0.8.1 -lxml=3.2.1 +lxml scipy diff --git a/ci/requirements-2.7_WIN.run b/ci/requirements-2.7_WIN.run index a81542ee5006c7..c4ca7fc736bb17 100644 --- a/ci/requirements-2.7_WIN.run +++ b/ci/requirements-2.7_WIN.run @@ -8,7 +8,7 @@ matplotlib openpyxl xlrd sqlalchemy -lxml=3.2.1 +lxml scipy xlsxwriter s3fs From 556b79125c9821837a5f3b777ccfab69c2657941 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 5 Oct 2017 18:11:15 -0500 Subject: [PATCH 30/76] BUG/API: Raise when extension class passed to astype (#17796) Closes https://github.com/pandas-dev/pandas/issues/17780 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/internals.py | 6 ++++++ pandas/tests/frame/test_dtypes.py | 14 ++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7fbf2533428dc4..5f0af8859a1334 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -768,6 +768,7 @@ Conversion - Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`) - Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`) - Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`) +- Bug in :meth:`~DataFrame.astype` converting to object dtype when passeed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`). Indexing ^^^^^^^^ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index a8f1a0c78c2384..689f5521e1ccb5 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,6 +1,7 @@ import warnings import copy from warnings import catch_warnings +import inspect import itertools import re import operator @@ -552,6 +553,11 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, list(errors_legal_values), errors)) raise ValueError(invalid_arg) + if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): + msg = ("Expected an instance of {}, but got the class instead. " + "Try instantiating 'dtype'.".format(dtype.__name__)) + raise TypeError(msg) + # may need to convert to categorical # this is only called for non-categoricals if self.is_categorical_astype(dtype): diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 5941b2ab7c2cbf..abb528f0d21793 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -612,6 +612,20 @@ def test_astype_duplicate_col(self): expected = concat([a1_str, b, a2_str], axis=1) assert_frame_equal(result, expected) + @pytest.mark.parametrize("cls", [ + pd.api.types.CategoricalDtype, + pd.api.types.DatetimeTZDtype, + pd.api.types.IntervalDtype + ]) + def test_astype_categoricaldtype_class_raises(self, cls): + df = DataFrame({"A": ['a', 'a', 'b', 'c']}) + xpr = "Expected an instance of {}".format(cls.__name__) + with tm.assert_raises_regex(TypeError, xpr): + df.astype({"A": cls}) + + with tm.assert_raises_regex(TypeError, xpr): + df['A'].astype(cls) + def test_timedeltas(self): df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3, freq='D')), From 1bc238ab4244fb37a143da5d28527f4ad94f1af4 Mon Sep 17 00:00:00 2001 From: Juarez Bochi Date: Thu, 5 Oct 2017 21:47:12 -0400 Subject: [PATCH 31/76] Fix 17788: Fix documentation that mentioned the deprecated `how` parameter (#17801) --- doc/source/timeseries.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index d2d5ee344591ae..36ffe8806f3738 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1456,8 +1456,9 @@ The ``resample`` function is very flexible and allows you to specify many different parameters to control the frequency conversion and resampling operation. -The ``how`` parameter can be a function name or numpy array function that takes -an array and produces aggregated values: +Any function available via :ref:`dispatching ` is available as +a method of the returned object, including ``sum``, ``mean``, ``std``, ``sem``, +``max``, ``min``, ``median``, ``first``, ``last``, ``ohlc``: .. ipython:: python @@ -1467,9 +1468,6 @@ an array and produces aggregated values: ts.resample('5Min').max() -Any function available via :ref:`dispatching ` can be given to -the ``how`` parameter by name, including ``sum``, ``mean``, ``std``, ``sem``, -``max``, ``min``, ``median``, ``first``, ``last``, ``ohlc``. For downsampling, ``closed`` can be set to 'left' or 'right' to specify which end of the interval is closed: From a211b51dea605cf6c991ac42e8286ca1fee5be2c Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 6 Oct 2017 09:15:32 +0200 Subject: [PATCH 32/76] DOC: add references for different index types + examples for pd.Index (#17680) --- doc/source/api.rst | 49 ++++++++++++++++++++++++++++-- doc/sphinxext/numpydoc/numpydoc.py | 2 +- pandas/core/indexes/base.py | 17 +++++++++++ pandas/core/indexes/datetimes.py | 7 ++++- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 1 + pandas/core/indexes/numeric.py | 10 +++--- pandas/core/indexes/period.py | 7 +++++ pandas/core/indexes/range.py | 15 +++++++-- pandas/core/indexes/timedeltas.py | 7 +++++ 10 files changed, 105 insertions(+), 12 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 28d4567027572f..d98a18e6f7e363 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1626,6 +1626,52 @@ Conversion .. currentmodule:: pandas +PeriodIndex +-------------- + +.. autosummary:: + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + PeriodIndex + +Attributes +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + PeriodIndex.day + PeriodIndex.dayofweek + PeriodIndex.dayofyear + PeriodIndex.days_in_month + PeriodIndex.daysinmonth + PeriodIndex.end_time + PeriodIndex.freq + PeriodIndex.freqstr + PeriodIndex.hour + PeriodIndex.is_leap_year + PeriodIndex.minute + PeriodIndex.month + PeriodIndex.quarter + PeriodIndex.qyear + PeriodIndex.second + PeriodIndex.start_time + PeriodIndex.week + PeriodIndex.weekday + PeriodIndex.weekofyear + PeriodIndex.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + PeriodIndex.asfreq + PeriodIndex.strftime + PeriodIndex.to_timestamp + PeriodIndex.tz_convert + PeriodIndex.tz_localize + Scalars ------- @@ -1653,13 +1699,11 @@ Attributes Period.is_leap_year Period.minute Period.month - Period.now Period.ordinal Period.quarter Period.qyear Period.second Period.start_time - Period.strftime Period.week Period.weekday Period.weekofyear @@ -1671,6 +1715,7 @@ Methods :toctree: generated/ Period.asfreq + Period.now Period.strftime Period.to_timestamp diff --git a/doc/sphinxext/numpydoc/numpydoc.py b/doc/sphinxext/numpydoc/numpydoc.py index f06915997c6162..680983cdf6443e 100755 --- a/doc/sphinxext/numpydoc/numpydoc.py +++ b/doc/sphinxext/numpydoc/numpydoc.py @@ -45,7 +45,7 @@ def mangle_docstrings(app, what, name, obj, options, lines, # PANDAS HACK (to remove the list of methods/attributes for Categorical) no_autosummary = [".Categorical", "CategoricalIndex", "IntervalIndex", "RangeIndex", "Int64Index", "UInt64Index", - "Float64Index"] + "Float64Index", "PeriodIndex"] if what == "class" and any(name.endswith(n) for n in no_autosummary): cfg['class_members_list'] = False diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c4e1398d0178fe..cc917ea3503bc1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -123,6 +123,23 @@ class Index(IndexOpsMixin, PandasObject): Notes ----- An Index instance can **only** contain hashable objects + + Examples + -------- + >>> pd.Index([1, 2, 3]) + Int64Index([1, 2, 3], dtype='int64') + + >>> pd.Index(list('abc')) + Index(['a', 'b', 'c'], dtype='object') + + See Also + --------- + RangeIndex : Index implementing a monotonic integer range + CategoricalIndex : Index of :class:`Categorical` s. + MultiIndex : A multi-level, or hierarchical, Index + IntervalIndex : an Index of :class:`Interval` s. + DatetimeIndex, TimedeltaIndex, PeriodIndex + Int64Index, UInt64Index, Float64Index """ # To hand over control to subclasses _join_precedence = 1 diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1419da3fa8861b..862bc51ada9d2a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -208,9 +208,14 @@ class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, Notes ----- - To learn more about the frequency strings, please see `this link `__. + + See Also + --------- + Index : The base pandas Index type + TimedeltaIndex : Index of timedelta64 data + PeriodIndex : Index of Period data """ _typ = 'datetimeindex' diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 29699f664bbf34..7bf7cfce515a15 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -147,7 +147,7 @@ class IntervalIndex(IntervalMixin, Index): See Also -------- - Index + Index : The base pandas Index type Interval : A bounded slice-like interval interval_range : Function to create a fixed frequency IntervalIndex, IntervalIndex.from_arrays, IntervalIndex.from_breaks, diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9ffac0832062d0..d200642a9f28f6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -95,6 +95,7 @@ class MultiIndex(Index): MultiIndex.from_product : Create a MultiIndex from the cartesian product of iterables MultiIndex.from_tuples : Convert list of tuples to a MultiIndex + Index : The base pandas Index type """ # initialize to zero-length tuples to make everything work diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 142e0f36c66ec8..9fc47ad7b773cf 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -108,19 +108,21 @@ def is_all_dates(self): Make a copy of input ndarray name : object Name to be stored in the index + Notes ----- An Index instance can **only** contain hashable objects. + + See also + -------- + Index : The base pandas Index type """ _int64_descr_args = dict( klass='Int64Index', ltype='integer', dtype='int64', - extra="""This is the default index type used - by the DataFrame and Series ctors when no explicit - index is provided by the user. -""" + extra='' ) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e6fc47845012a4..b70b4c4e4067c3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -164,6 +164,13 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): >>> idx = PeriodIndex(year=year_arr, quarter=q_arr) >>> idx2 = PeriodIndex(start='2000', end='2010', freq='A') + + See Also + --------- + Index : The base pandas Index type + Period : Represents a period of time + DatetimeIndex : Index with datetime64 data + TimedeltaIndex : Index of timedelta64 data """ _box_scalars = True _typ = 'periodindex' diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a3b899d58255b1..9f7bac641ae08d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -23,9 +23,14 @@ class RangeIndex(Int64Index): """ - Immutable Index implementing a monotonic range. RangeIndex is a - memory-saving special case of Int64Index limited to representing - monotonic ranges. + Immutable Index implementing a monotonic integer range. + + RangeIndex is a memory-saving special case of Int64Index limited to + representing monotonic ranges. Using RangeIndex may in some instances + improve computing speed. + + This is the default index type used + by DataFrame and Series when no explicit index is provided by the user. Parameters ---------- @@ -38,6 +43,10 @@ class RangeIndex(Int64Index): copy : bool, default False Unused, accepted for homogeneity with other index types. + See Also + -------- + Index : The base pandas Index type + Int64Index : Index of int64 data """ _typ = 'rangeindex' diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 12b7936503ad70..89757c2bf40da2 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -114,6 +114,13 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, TimelikeOps, Int64Index): To learn more about the frequency strings, please see `this link `__. + + See Also + --------- + Index : The base pandas Index type + Timedelta : Represents a duration between two dates or times. + DatetimeIndex : Index of datetime64 data + PeriodIndex : Index of Period data """ _typ = 'timedeltaindex' From 5653fe41b41fa1b60f3a7edf88a47f29ec7868ee Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 6 Oct 2017 10:03:30 +0200 Subject: [PATCH 33/76] DOC: some typos in whatsnew (#17802) --- doc/source/remote_data.rst | 18 +++++++++--------- doc/source/whatsnew/v0.21.0.txt | 12 ++++++------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst index 9af66058a7aaa3..aba70ccbcc9fbe 100644 --- a/doc/source/remote_data.rst +++ b/doc/source/remote_data.rst @@ -13,18 +13,18 @@ DataReader The sub-package ``pandas.io.data`` was deprecated in v.0.17 and removed in `v.0.19 `__. - Instead there has been created a separately installable `pandas-datareader package -`_. This will allow the data -modules to be independently updated on your pandas installation. +Instead there has been created a separately installable +`pandas-datareader package `__. +This will allow the data modules to be independently updated on your pandas installation. - For code older than < 0.19 you should replace the imports of the following: +For code older than < 0.19 you should replace the imports of the following: - .. code-block:: python +.. code-block:: python - from pandas.io import data, wb + from pandas.io import data, wb - With: +With: - .. code-block:: python +.. code-block:: python - from pandas_datareader import data, wb + from pandas_datareader import data, wb diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5f0af8859a1334..94e4700a59f248 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -344,7 +344,7 @@ Current Behavior 3 NaN dtype: float64 -The idiomatic way to achieve selecting potentially not-found elmenents is via ``.reindex()`` +The idiomatic way to achieve selecting potentially not-found elements is via ``.reindex()`` .. ipython:: python @@ -453,7 +453,7 @@ New Behaviour: Dtype Conversions ^^^^^^^^^^^^^^^^^ -Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to same the type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`). +Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to same the type (e.g. int / float), or raise for datetimelikes. These will now preserve the bools with ``object`` dtypes. (:issue:`16821`). .. ipython:: python @@ -638,7 +638,7 @@ Other API Changes - :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). -- Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) +- Compression defaults in HDF stores now follow pytables standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) - ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) - Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) - Moved definition of ``MergeError`` to the ``pandas.errors`` module. @@ -666,7 +666,7 @@ Deprecations - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) - passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) - ``.get_value`` and ``.set_value`` on ``Series``, ``DataFrame``, ``Panel``, ``SparseSeries``, and ``SparseDataFrame`` are deprecated in favor of using ``.iat[]`` or ``.at[]`` accessors (:issue:`15269`) -- Passing a non-existant column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +- Passing a non-existent column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) - ``raise_on_error`` parameter to :func:`Series.where`, :func:`Series.mask`, :func:`DataFrame.where`, :func:`DataFrame.mask` is deprecated, in favor of ``errors=`` (:issue:`14968`) .. _whatsnew_0210.deprecations.select: @@ -768,7 +768,7 @@ Conversion - Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`) - Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`) - Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`) -- Bug in :meth:`~DataFrame.astype` converting to object dtype when passeed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`). +- Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`). Indexing ^^^^^^^^ @@ -827,7 +827,7 @@ Groupby/Resample/Rolling - Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) -- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) +- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1`` (:issue:`15305`) - Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) - Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`) - Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`) From 4379d04cd933fdaa2b9cbc9be4169cab2c506a92 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 6 Oct 2017 13:46:31 +0200 Subject: [PATCH 34/76] DOC: Add examples to MultiIndex.slice_locs + note that index.slice requires (#17799) --- pandas/core/indexes/base.py | 13 +++++++++++++ pandas/core/indexes/multi.py | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cc917ea3503bc1..0a55559750d7c8 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3589,6 +3589,19 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): ------- start, end : int + Notes + ----- + This method only works if the index is monotonic or unique. + + Examples + --------- + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_locs(start='b', end='c') + (1, 3) + + See Also + -------- + Index.get_loc : Get location for a single label """ inc = (step is None or step >= 0) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d200642a9f28f6..4b6e31133ba4b0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1925,7 +1925,9 @@ def get_slice_bound(self, label, side, kind): def slice_locs(self, start=None, end=None, step=None, kind=None): """ For an ordered MultiIndex, compute the slice locations for input - labels. They can be tuples representing partial levels, e.g. for a + labels. + + The input labels can be tuples representing partial levels, e.g. for a MultiIndex with 3 levels, you can pass a single value (corresponding to the first level), or a 1-, 2-, or 3-tuple. @@ -1945,7 +1947,32 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): Notes ----- - This function assumes that the data is sorted by the first level + This method only works if the MultiIndex is properly lex-sorted. So, + if only the first 2 levels of a 3-level MultiIndex are lexsorted, + you can only pass two levels to ``.slice_locs``. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], + ... names=['A', 'B']) + + Get the slice locations from the beginning of 'b' in the first level + until the end of the multiindex: + + >>> mi.slice_locs(start='b') + (1, 4) + + Like above, but stop at the end of 'b' in the first level and 'f' in + the second level: + + >>> mi.slice_locs(start='b', end=('b', 'f')) + (1, 3) + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. """ # This function adds nothing to its parent implementation (the magic # happens in get_slice_bound method), but it adds meaningful doc. @@ -2016,6 +2043,8 @@ def get_loc(self, key, method=None): See also -------- Index.get_loc : get_loc method for (single-level) index. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). MultiIndex.get_locs : Get location for a label/slice/list/mask or a sequence of such. """ @@ -2369,6 +2398,8 @@ def get_locs(self, seq): See also -------- MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). """ # must be lexsorted to at least as many levels From 3b4121b5fca88a84bcc9dba56af568c8a2f901c0 Mon Sep 17 00:00:00 2001 From: Simon Gibbons Date: Fri, 6 Oct 2017 15:08:22 +0100 Subject: [PATCH 35/76] ENH: Add tranparent compression to json reading/writing (#17798) * ENH: Add tranparent compression to json reading/writing This works in the same way as the argument to ``read_csv`` and ``to_csv``. I've added tests confirming that it works with both file paths, as well and file URLs and S3 URLs. * Fix PEP8 violations * Add PR number to whatsnew entry * Remove problematic Windows test (The S3 test hits the same edge case) * Extract decompress file function so that pytest.paramatrize can be used cleanly * Fix typo in whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/generic.py | 10 +- pandas/io/json/json.py | 46 ++++-- .../tests/io/json/data/tsframe_v012.json.zip | Bin 0 -> 436 bytes pandas/tests/io/json/test_compression.py | 133 ++++++++++++++++++ pandas/tests/io/json/test_readlines.py | 2 +- 6 files changed, 174 insertions(+), 19 deletions(-) create mode 100644 pandas/tests/io/json/data/tsframe_v012.json.zip create mode 100644 pandas/tests/io/json/test_compression.py diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 94e4700a59f248..c881f53d84f7ba 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -195,7 +195,7 @@ Other Enhancements - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names - Improved the import time of pandas by about 2.25x (:issue:`16764`) - +- :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 942a9ff2790927..c7ae9bbee90130 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1258,7 +1258,7 @@ def _repr_latex_(self): def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False): + default_handler=None, lines=False, compression=None): """ Convert the object to a JSON string. @@ -1320,6 +1320,12 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.19.0 + compression : {None, 'gzip', 'bz2', 'xz'} + A string representing the compression to use in the output file, + only used when the first argument is a filename + + .. versionadded:: 0.21.0 + Returns ------- same type as input object with filtered info axis @@ -1372,7 +1378,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=double_precision, force_ascii=force_ascii, date_unit=date_unit, default_handler=default_handler, - lines=lines) + lines=lines, compression=compression) def to_hdf(self, path_or_buf, key, **kwargs): """Write the contained data to an HDF5 file using HDFStore. diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index ab74b265b6a067..be39f4baba0fb4 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -9,7 +9,8 @@ from pandas import compat, isna from pandas import Series, DataFrame, to_datetime, MultiIndex from pandas.io.common import (get_filepath_or_buffer, _get_handle, - _stringify_path, BaseIterator) + _infer_compression, _stringify_path, + BaseIterator) from pandas.io.parsers import _validate_integer from pandas.core.common import AbstractMethodError from pandas.core.reshape.concat import concat @@ -27,7 +28,7 @@ # interface to/from def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', - default_handler=None, lines=False): + default_handler=None, lines=False, compression=None): path_or_buf = _stringify_path(path_or_buf) if lines and orient != 'records': @@ -54,8 +55,11 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', s = _convert_to_line_delimits(s) if isinstance(path_or_buf, compat.string_types): - with open(path_or_buf, 'w') as fh: + fh, handles = _get_handle(path_or_buf, 'w', compression=compression) + try: fh.write(s) + finally: + fh.close() elif path_or_buf is None: return s else: @@ -178,7 +182,7 @@ def write(self): def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, - lines=False, chunksize=None): + lines=False, chunksize=None, compression='infer'): """ Convert a JSON string to pandas object @@ -277,6 +281,15 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, .. versionadded:: 0.21.0 + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer', then use + gzip, bz2, zip or xz if path_or_buf is a string ending in + '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression + otherwise. If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. + + .. versionadded:: 0.21.0 + Returns ------- result : Series or DataFrame, depending on the value of `typ`. @@ -334,15 +347,17 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, {"index": "row 2", "col 1": "c", "col 2": "d"}]}' """ - filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf, - encoding=encoding) + compression = _infer_compression(path_or_buf, compression) + filepath_or_buffer, _, compression = get_filepath_or_buffer( + path_or_buf, encoding=encoding, compression=compression, + ) json_reader = JsonReader( filepath_or_buffer, orient=orient, typ=typ, dtype=dtype, convert_axes=convert_axes, convert_dates=convert_dates, keep_default_dates=keep_default_dates, numpy=numpy, precise_float=precise_float, date_unit=date_unit, encoding=encoding, - lines=lines, chunksize=chunksize + lines=lines, chunksize=chunksize, compression=compression, ) if chunksize: @@ -361,7 +376,7 @@ class JsonReader(BaseIterator): """ def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, - date_unit, encoding, lines, chunksize): + date_unit, encoding, lines, chunksize, compression): self.path_or_buf = filepath_or_buffer self.orient = orient @@ -374,6 +389,7 @@ def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes, self.precise_float = precise_float self.date_unit = date_unit self.encoding = encoding + self.compression = compression self.lines = lines self.chunksize = chunksize self.nrows_seen = 0 @@ -415,20 +431,20 @@ def _get_data_from_filepath(self, filepath_or_buffer): data = filepath_or_buffer + exists = False if isinstance(data, compat.string_types): try: exists = os.path.exists(filepath_or_buffer) - # gh-5874: if the filepath is too long will raise here except (TypeError, ValueError): pass - else: - if exists: - data, _ = _get_handle(filepath_or_buffer, 'r', - encoding=self.encoding) - self.should_close = True - self.open_stream = data + if exists or self.compression is not None: + data, _ = _get_handle(filepath_or_buffer, 'r', + encoding=self.encoding, + compression=self.compression) + self.should_close = True + self.open_stream = data return data diff --git a/pandas/tests/io/json/data/tsframe_v012.json.zip b/pandas/tests/io/json/data/tsframe_v012.json.zip new file mode 100644 index 0000000000000000000000000000000000000000..100ba0c87b2ba55c169081bb0ed60c5db7391bbb GIT binary patch literal 436 zcmWIWW@Zs#-~d8>PgidSBp}Ejz)(`0R+N~V8ee8$Xrz}_oSzpO!Nb60eJyg=i>r~} z7)2P4PTcFqY$(uj|LLnEw<6!?Th+y}ylfKDbYKphQr@pG)b!*{7t{95#=p{PX2~tP zo9VSN!2DO`Wj2tkn(477rQ0RX7Wsm1^R literal 0 HcmV?d00001 diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py new file mode 100644 index 00000000000000..e9976da6f67748 --- /dev/null +++ b/pandas/tests/io/json/test_compression.py @@ -0,0 +1,133 @@ +import pytest +import moto + +import pandas as pd +from pandas import compat +import pandas.util.testing as tm +from pandas.util.testing import assert_frame_equal, assert_raises_regex + + +COMPRESSION_TYPES = [None, 'bz2', 'gzip', 'xz'] + + +def decompress_file(path, compression): + if compression is None: + f = open(path, 'rb') + elif compression == 'gzip': + import gzip + f = gzip.GzipFile(path, 'rb') + elif compression == 'bz2': + import bz2 + f = bz2.BZ2File(path, 'rb') + elif compression == 'xz': + lzma = compat.import_lzma() + f = lzma.open(path, 'rb') + else: + msg = 'Unrecognized compression type: {}'.format(compression) + raise ValueError(msg) + + result = f.read().decode('utf8') + f.close() + return result + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_compression_roundtrip(compression): + if compression == 'xz': + tm._skip_if_no_lzma() + + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression) + assert_frame_equal(df, pd.read_json(path, compression=compression)) + + # explicitly ensure file was compressed. + uncompressed_content = decompress_file(path, compression) + assert_frame_equal(df, pd.read_json(uncompressed_content)) + + +def test_compress_zip_value_error(): + df = pd.DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with tm.ensure_clean() as path: + import zipfile + pytest.raises(zipfile.BadZipfile, df.to_json, path, compression="zip") + + +def test_read_zipped_json(): + uncompressed_path = tm.get_data_path("tsframe_v012.json") + uncompressed_df = pd.read_json(uncompressed_path) + + compressed_path = tm.get_data_path("tsframe_v012.json.zip") + compressed_df = pd.read_json(compressed_path, compression='zip') + + assert_frame_equal(uncompressed_df, compressed_df) + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_with_s3_url(compression): + boto3 = pytest.importorskip('boto3') + pytest.importorskip('s3fs') + if compression == 'xz': + tm._skip_if_no_lzma() + + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with moto.mock_s3(): + conn = boto3.resource("s3", region_name="us-east-1") + bucket = conn.create_bucket(Bucket="pandas-test") + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression) + with open(path, 'rb') as f: + bucket.put_object(Key='test-1', Body=f) + + roundtripped_df = pd.read_json('s3://pandas-test/test-1', + compression=compression) + assert_frame_equal(df, roundtripped_df) + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_lines_with_compression(compression): + if compression == 'xz': + tm._skip_if_no_lzma() + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df.to_json(path, orient='records', lines=True, compression=compression) + roundtripped_df = pd.read_json(path, lines=True, + compression=compression) + assert_frame_equal(df, roundtripped_df) + + +@pytest.mark.parametrize('compression', COMPRESSION_TYPES) +def test_chunksize_with_compression(compression): + if compression == 'xz': + tm._skip_if_no_lzma() + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') + df.to_json(path, orient='records', lines=True, compression=compression) + + roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1, + compression=compression)) + assert_frame_equal(df, roundtripped_df) + + +def test_write_unsupported_compression_type(): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + assert_raises_regex(ValueError, msg, df.to_json, + path, compression="unsupported") + + +def test_read_unsupported_compression_type(): + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + assert_raises_regex(ValueError, msg, pd.read_json, + path, compression="unsupported") diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index d14355b07cf204..95f23e82fced0f 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -128,7 +128,7 @@ def test_readjson_chunks_closes(chunksize): path, orient=None, typ="frame", dtype=True, convert_axes=True, convert_dates=True, keep_default_dates=True, numpy=False, precise_float=False, date_unit=None, encoding=None, - lines=True, chunksize=chunksize) + lines=True, chunksize=chunksize, compression=None) reader.read() assert reader.open_stream.closed, "didn't close stream with \ chunksize = %s" % chunksize From 5bb693a85ef061520936a4feeea30fd382918a76 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 6 Oct 2017 11:22:33 -0400 Subject: [PATCH 36/76] BLD: fix setup.py for xref #17798 (#17804) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 23457c6f4edc1f..365d387dc54d63 100755 --- a/setup.py +++ b/setup.py @@ -720,7 +720,7 @@ def pxd(name): 'sas/data/*.sas7bdat', 'data/*.html', 'data/html_encoding/*.html', - 'json/data/*.json'], + 'json/data/*.json*'], 'pandas.tests.io.formats': ['data/*.csv'], 'pandas.tests.io.msgpack': ['data/*.mp'], 'pandas.tests.reshape': ['data/*.csv'], From 1335090b5da9706419b60e610997c10d9a023fc8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 6 Oct 2017 11:37:24 -0400 Subject: [PATCH 37/76] DOC: sub-section on boolean Index array changes (#17803) closes #17740 --- doc/source/whatsnew/v0.21.0.txt | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c881f53d84f7ba..2b748d50c3fcaa 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -356,6 +356,61 @@ Selection with all keys found is unchanged. s.loc[[1, 2]] +.. _whatsnew_0210.api_breaking.loc_with_index: + +Indexing with a Boolean Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously when passing a boolean ``Index`` to ``.loc``, if the index of the ``Series/DataFrame`` had ``boolean`` labels, +you would get a label based selection, potentially duplicating result labels, rather than a boolean indexing selection +(where ``True`` selects elements), this was inconsistent how a boolean numpy array indexed. The new behavior is to +act like a boolean numpy array indexer. (:issue:`17738`) + +Previous Behavior: + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=[False, True, False]) + s + +.. code-block:: ipython + + In [59]: s.loc[pd.Index([True, False, True])] + Out[59]: + True 2 + False 1 + False 3 + True 2 + dtype: int64 + +Current Behavior + +.. ipython:: python + + s.loc[pd.Index([True, False, True])] + + +Furthermore, previously if you had an index that was non-numeric (e.g. strings), then a boolean Index would raise a ``KeyError``. +This will now be treated as a boolean indexer. + +Previously Behavior: + +.. ipython:: python + + s = pd.Series([1,2,3], index=['a', 'b', 'c']) + s + +.. code-block:: ipython + + In [39]: s.loc[pd.Index([True, False, True])] + KeyError: "None of [Index([True, False, True], dtype='object')] are in the [index]" + +Current Behavior + +.. ipython:: python + + s.loc[pd.Index([True, False, True])] + .. _whatsnew_0210.api_breaking.pandas_eval: From e63c935d5a0705f83fc726932eb82bac4c272106 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 6 Oct 2017 08:38:03 -0700 Subject: [PATCH 38/76] Lock down kwargs in offsets signatures (#17458) --- doc/source/whatsnew/v0.21.0.txt | 1 + .../0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle | Bin 0 -> 132762 bytes .../0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle | Bin 0 -> 132857 bytes .../tests/io/generate_legacy_storage_files.py | 18 ++- pandas/tseries/offsets.py | 141 ++++++++++-------- 5 files changed, 95 insertions(+), 65 deletions(-) create mode 100644 pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2b748d50c3fcaa..b8b06ee0fe94ea 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -703,6 +703,7 @@ Other API Changes - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) +- Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). - Pandas no longer registers matplotlib converters on import. The converters will be registered and used when the first plot is draw (:issue:`17710`) diff --git a/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle b/pandas/tests/io/data/legacy_pickle/0.19.2/0.19.2_x86_64_darwin_2.7.14.pickle new file mode 100644 index 0000000000000000000000000000000000000000..555be58cc33acbb34a2669809cd7528ba83fb329 GIT binary patch literal 132762 zcmc$^1ymeO&?q|Y?z3?x?(QxmL^nzfbcXxMpcXxMpcb7M_K=S32d;jy! zd-pEqOifo;S66jOclFQ<@>yVj8;y#Ki0o|H4LHD!M1&?qT1a?8Xk0{SVu7T@NN-eR zVgWx16%rTODWsZBkQ5agS=$*!S?Ir${*4OrS$KdOO6qD6L2jg-H`21mzYmOW*Df(K zDUqrc+J&;H+`TQ@XE6b8GA`a58yXYUHPT`SyYY||GK&j$By|L#oJT}tOj0Oi@qu6< z5C{xxWC?C5BsnoM#2XnCniSPBg}@RAyO9o^A`&gh=LX{f!Q?Q%#-Wz%K|QvRN`Qw5 z@{mCuIXo^oHla&_@OW=zf!O4jq^MA@*A~p?7akv%nB+|kPogX}g^IG*%HL~gfk1yZ z7-r9}2iT%e{Zk1dlDZ^BQkLO{>Xotu1Z@H{AP{kb<${8O>io2S{zo9-vsWoD(H?65 zB@IXl8|-cX00$mj!OlwnpmvRSJ1~Mq<~eZfv`KFq2o=a*`GO4_?%X}nfwLMkz2U%@ zx1Z#@Xu}GLJ-az@IHDZ6C?(lC$A!j5TCSkv0gbG5o)nng4M)W#SsBvI;y1Z%`moT% z$P~+`T0G<5DeRT9y(FBf9hX$1SSnE_pOrblBYRZO?YJZ|jmbbzQRB z#zs=LL{lM_3h`7(q(U+kQmK$mg-j}BQ{gYA^Ci3S)CjZk`>X;1ZoEZgsMi+TDwt-_ zv^7YJ#8d*xDrDy@g;ANmz_qp{8^Sj7SvTU9%(_`0EqNzEc7JJfIIUmvUZ zf6ekYLdq&(&sNfBl}eq>D(%KQ*^DAWyI5rc;Y5%9NDE%NN^)XUTx4RRoiwbn|24y3 zh$yR^&Aq(Os^D<Rr6WZ?TH4t(Zr-sZ<1Bx&#XmRHQiW>fA()=)pEm;wmE8hY$VS)UTY)kIN}8c zcxrfRd1`y=B-;`>0;VLnx^{Mq?3@%5=B!OUkE2?U&#M22Y7PDv(a_e$PJEaRu`H;t5R*aW*))DGrI9aXFswxwD>^4jrn6R(Mqu2kuG+0Tb8}s`nB@fDAYkET8Yl9!ywU0at5Vv zu#$t^bZk_(H$Ju9G00Oa$j$r(m1uRc>4})g&;&ov&UOorjfzW7icGY+*bUf5TU~=} zIs8bgTaazaso$Z_KC0d6`jG%l5YqUEc%OY=Ccm}YFkpD3J+)WhRs(w;u6*FSu& z_n$sTHB7eSD$<|F`uvw6e$!A^Uz>41pVi;*f2=<$qvG19q^JRbuq|ksIB+#vn}HtZ zBr?cn4gQBDL;jQmtCynm(EpMmrJu5f*$jvKtP%fUIPy=1Osd#bBHKsBS)=}o?O&-V zYqZULjL#bDXa1)?q!WN4O_FV6jq^BRINoPX@YDS@04F-8nq=o#&`GqQvlx)=^@u<7 zP5v(-QZn8Yo9R@aHSHgoP5+x_jU!{Dei?hle=+?l6=lt|na}cBv;E9%8Rxhe+uiKr zV2I!At+|0lqQ`!unU8kVP1}6t{ns3SA)>7L_7n?z)|}4PPxkk&^Q`yV)lbHxo~T1&I>pCY z8$8bLyU}NDO5?cMp>liyOV!%;2JV+>Qgq+)Ut<1AM_F5K=G%PM_B7@@Y>SbO*~qjd zC7nY@3fGnRop+KX#TZC4g)ncLduaWe9LpLM`L zlJhvZ_Mp!?6yS*s1Uzv;9y_6W>_qCZf;@Ke^w?)|kDV?($w3}}X7t#(&|_ynkDd5D zU4uM-=DNcUePY^z*;zU~G{(ujM|{>%JDWR++)h2Q3D&XSI>tnWQBBf1DC>Cb!bOXh zC|$Z#u|mIpR8WvzI-KxiPa78U$9a{qP6qi?=P93cI#sdXhtM;B_*lJ9P8D)C*>lWi zowEzSuz36UcHU!8TcmW+QiV#E_&sYO%FpzI&${>zrkDO;8lKQ4sVfzdKv|aq+*IYn zsL+B<;yZMSx31J1)x+5#>NIX*T}^6aU9;!D?jh5r`>T$H?CG4Od&6hl{Ks^+{+P~L zjN2X}jZ5)Tf04Nm736V*a5pZPa*&C$?$lYe`R7kZjOw-+cWp85*$nQx;keKw>p|L* zCM7zS2nn%~A<1zGQQ;k8?9%OFP=D)DBkQpnOQ}b#Cuy_(O?JP9o+i7=#DtKP>e+hc zv!2^^cDtB(d!83*^CTwN9uZj}g@$VEZ#DLp+m!Xv*8i2ydTpQ5LlTq2?CQ~a<7QJv zq;Y-gMnmixIMjNV#?fD_+C^NXH!d_Lk!oyjY%!5lV&cO))C-LZZ69eLO5fXzKLi9@ zAAeUqX@#CQG_HLl)yR3K);*tm)@S?VX)hRJpA-|UFV4}!K940>UjzBnV(MFHu=U-e zriqZMR7yFVqEqFR()h=(()g!S?~whSI^sO<%9S#16OQo4E}zj$CE9eVo=-k6 z?hJ7Z;jvBN`L+07!p4XCyrd1>SfK)iixnvBrR;{!OFJXnNNDHCL@#5%dp^3cFvk-T zy{z2~_HuqQ-cLrvw)ckEorzw-Zik12ge6DWHy&{zA(R~p;gOVAwDGpib}q3cllhkiA0@o9-SGTO4zj$FJq~xSBgkhNU>WS9LfbA+}q4H9xad zylyw_L=7b+dJX&4V~2s)v|oH)m;DUzym3pZG2mp-)Ijj2b4cHLY}fXtcgTXh861pr zq2tZyALrcZIMtCilil=rGy9Rh%Rg@xhyE{xl{c%6un(>&w?f`*e)`mlp+9Vy-RZHt znfLoe+TD&fhadevy58vw04FmZuKZpEfQ+kxM>#OI@|znD+;(O_{tq^!Bhqzu;3xfZ z`;RuPxj1%%11FS$&`&nJe{@1)2Nny?JI8@p#K*%w+c05{@0tU57M1dTu_3pAK^F%G zH!XYUifjZdz|3gGZ|4Z2ANu3w! zpB5^%69NAcQ+fYE_(x3Tv%6DbD!=`f;u!^;A^zJwzh@M*n?7$LKk_f0QP`pXt7jCk z5x+g7sGt7dJfoP?q3*oI~xzd~`~zC%XxI zpKRx~?<2kCZM4r@!G3!FzDfECQuj#v6c<{<89;d}+O&35SMm?6Y(Je8!K9{$6sA=i zcK@|Q{;@ZDtEOgR`xV`13;j zv|X;YA8p@Wb^r6)x1xRF8SJg&^ftF2KxO~4ivL;F|Ew1ID_#R_iWC>F>kRnq!u9N? z&l}`N{>6psJM@2b;RZJ1w+lD))Bl?bH*$JxC8$4>TVp@^f0W$pyX*gNliROFXkrVI z>dS8XH8R-S)Di2CtkujfC>V&RWvx<9_W1q#pPwUspBqxL6!pg@;26kRConBbAX?NM#=?~dk{U4IOb?Q8SW?1Jc{crLI|AWv`(z%28>(J(3bO=cu zf~Bp0A)ERyzyJOE|F74*Z7ToY>+UfBo7|!QAWBirKF5Upi_GDv!r`acSJJJ`5iY+{@te}230#yYqZ-%n)|m&T^bZ#MCNWs~qP zY^;B<@usn{(|wvWiGO92^e=3Z|G}nX8k?%W*>tjTkv?x{8>DW2U7T?#o1c^RQ#Ze^ z4%X-G=I{LL?q`R(z1rO!O8@TXB${NO*Y`I?$&t?C+rO*2Nv9%A+4=g~U2ZlcBC=hm{rxhegS~@!`#Ct& zUkG^nJMZ$Z{=XdE2iQHo95p#C(GA5$c?a4^pLdWm2=PX?FXA0+zy7m|^nWX*>N#Ib zQ+LTB{((d7XMh`v2@Q*kN%Ri0TfyGp4(@mD=r3RW`)dtveAmdhplT_lqj!W&msUM` zM>>OSk0Vk_vBIgP*eE~xpIfUPK6Zr#03ZlKFzm!q^dIa-|6&*IuyAO>l)t2~NH^+y zC{6T^v3r8OV;$1w&gaH>H|E~)c01BmF|wnh;spPgiO!f`5j4r) z`ZEwGJ2+b(qxJa!rxKT;XmKO z*w-BK-lTYMM5Ncdz(zQql)Ve>mqa%l9-8D`WWV~ni|wcX9}SX<7V$2z8)@aY-{dx3 zWczsg3vy@-Wryjnckfc0(mviT^Yd8l=YfTU$4A%}Sz)*S*o0R)-GN{|Ay45Y=1|TDl3CIj&0kQ(wfb2jHASaLu2mo>ed4Rk?J|I6(04N9)0ty2~fTBP#pg2$h zC<&ATN&{tpvOqbYJWv6s2vhkKsBH`Py?t5)BO93)BOGfciiKpdru* zXbdy~+(1*H8PFVP0ki~K0j+^xpbZcLv;{(eFd!U=03v~QKzqOgL;=x22OtKB1>%5s zAOWxdFOUc%0m(o|pcBv;=mK;Fx&hq*AJ7Bn3G@Pb1ATzLKtG^AFaQ_`3<3rNLx7>c zFkm<^0vHL50!9O4fU&?hU_3AZmC0|~F0$~sVQ4j-hkN`=L0%?!|S&##HPyj_x0%cGERZs(U&;U))1*QYj zgBie#U?wm#m<7xVW&^W>Il!D?E-(Ph4dwy!g89JwU;(foSO_c(76FTb#lYfV39uws z3M>tl0n38r!17=Pup(FqtPEBGtAf?Q>R=79CRhus4c4)Dn7Uv+FbJ#0CQ?MD>9Bcu$1Y3cv!C{ICug)37!H^gJ;0A;5qO-cmcc!UIH(JSHP>_HSjuk1H1{| z0&jzNz`Ni*@ILqed6 zH}E_71N>Og@|U8o)u1l5NcKnnh2}x?p#{)FXc4p+ zS^_PFmO;y*70^m(6|@>!1FePDLF=In&_-wzv>Dn0ZH2Z$+o2uMPG}dj8`=Zyh4w-F zp##uC=n!-mIszSqjzPzv6VOTM6m%Lo1D%D=LFb_h&_(DHbQ!t=U4^be*P$EGP3RVM z8@dDCh3-N3p$E`I=n?c7dICL#o5a%gejPY8JLARn1=;ege6#p60UV3^#$@ za8tM$+#GHJw}e~4t>Iv}4IBcug+t*mI2?|EBjI*%d)NavLV@#97s+i7ZQNvM)Dwek$gygqySP7DTEY8iXcUiVn}hM1X2GRgr2)b)*JT6RCyNM(QAeNL{2J5`@%88XygkMo43%3F1bY zBF&KINDHJT(h6yf1S4&b5Tq>9wZ8hMmiudNGuYE#3Koag?N!f zBne4IIwGBr&PW%eE7A?=j`)xsNKd2}(i`c6^hNq1{gDC4Kx7ay7#V^LMTQ~6krBv9 zWE3(Q8H0>P#v$X83CKib5;7T?f=orGA=8l=$V_AwG8>tL%thuQ^N|I}LSzxL7+HcW zMV2AUkrl{FWEHX+S%a)a)*$A={B1$WCMzvK!fh>_zq=`;i04 zLF5o}7&(F*MUElIkrT*C

rmIfI-<&LQWK3&=&}5^@>2f?P$eA=i-`$W7!HavQmW z+(qso_mKz4L*xWZ7 z$WH`7K@>t^6hToGLvfTqNt8lqltEdPLwQs{MN~p%R6$i#Lv_?ZP1J>^L(`)f(2Qs% zG&7n7&5CA2v!glCoM;jtEsPdHi=xHQ;%Eu9Bw7kBjg~>n zqUF%?Xa%$)S_!R;Rza(x)zIo_4YVd&3$2aTK?BjcXgxFtt&cW98={TS#%L4NjW$J_ zq0P}2XiKye+8PZ;+n^z6TQn36L&MPsG!kuxwnsf^6dH|oKx5EYG!BhN6Hp8FqKRk{ znv8ZtJE5J?E@)S@8`>T9p*_%^XfL!k+6V26_Cx!l1JHr!AapP~1RaVFLx-ay(2?jU zbTm2!9gB`b$Da%dI!CW z-b3%B5739`BlI!)1bvD=L!YBB(3j{d^fmeheT%+B-=iPUkLV}#Gx`Pnihe`Cqd(A} zD1d<&guxhsp%{kY7=e)(h0z#;u^5N(n1G3xgvpqKshEc8n1Pv?3rmNk$1-3Uu}oNI zEDM$u%Z6pga$q^JTvz~>8_R>`#qweKu>x2@tPoZhD}oipiebgE5?D#B6jmB5gO$a~ zVdb$3SVgQ7RvD{;RmG}d)v+2_O{^AH8>@o_Vs){4SP)hpYk)Px8exsGCYT#*iZ#QU zV=b_jSSzeG7L2vQLa?@2C>Dl=V-Z*+)(&frd9Wxf8tZ_?V6j*n7LO%h7Usngu_P=R z>xgy2I%8e1u2?s$JLbcBU_G&3SZ}Nk))(uC^~VNa1F=EaU~C9B6dQ&O$3|cyu~FD) zYz#IQ8;6a@CSVh>N!Vm;3N{s+hE2z2U^B5<*lcVLHW!<#u7dxyQpK42fQPuOSd3-%TJhJD9=U_UVc z2XP38aRf(k499T-Cvgg=aRz5`4(D+J7jX%faRpa#4cBo4H*pu94o{C~z%$~R@XUA? zJS(0J&yMH7bK<%106aII2hWS=!}H?>@Pc?Dyf9t_FNzn#i{mBml6Wb+G+qWTi;3?Gh z5CIbiff5*j69hpL6hRXV!4e$769OR;5+M@`p%NOQ69!=tE+QR~p2$FCBr*}1i7Z4` zA{&vN$U)>JauESUZXyqnm&ixtCkhY+i9$qSq6krxC`J?~N)RQ9QbcK@3{jRSN0cWj z5EY3^L}j81QI)7hR3~Z>HHlh8ZK4hlNYo|j5kW+Kq5;v6Xhbw7nhvG(VrMV3?v2-gNY%;P+}M{oESljBt{XVi7~`j zVjMA^m_SS!#9m?_v7b0V93&1AhlwM^ zQQ{bJoH#+8Bu){hi8I7m;v8|FxIkPaE)kcBE5ude8gZStLEI#65x0pu#9iVZai4fV zJR}|wkBKM5Q{ox%oOnUJBwi7(i8sVs;vMmx_&|IlJ`taZFT_{k8}Xg^LHr~D5+orK zCJ_=PF%l;Uk|ZgTCK-|?Ig%#@QY0l(CKXa8HBu)H(j;AEIx;<(fy_u|A~Ta&$gE^G zGCP@r%t_`V1IXND9x^YPkIYXNAPbU($iie1vM5=MEKZgnOOmC?(qtL3ELn~$PgWo+ zl9kBHWEHY1S&ght)*x$=waD6J9Ws!tOV%TU$ogahvLV@sY)m#G-DFd;8QGj{LAE4Y zk*&#KvJDwRwk1Q!FfyEsAS20kWP8#>Mv>8E2Qr3?CF96=GJ&*6FPTUtk;!C7vJ=^v z>_T=WyOG^VAK8QKN%kUplYPj(WIwV$Ie;8U4k8DWL&%}zFmgCKf*eVXB1e;B$g$)& zay&VKoJdY0CzDgispK?rIyr-!NzNi?lXJ+qRBHiXxJGq10N$w(dlY7X$r{B2SZN$g|`*@;rHgyhvUmFOyfutK>EEI(dV@;&*1{78NxKa*d`ujDuKJNbkBNdgo|K@?0O z6iQ(fP7xGIQ4~!v6iaawPYIMrNt8?}luBuoP8pO*xu|qhdMX2zk;+76rm|33sccks zDhHL5%0&fGxv4xQb$GE`Zr995pGKvkqF zQI)AGR8^`PRh_Cq)ud`swW&H(AXS&DM+H&!sRmR-su9(gYC^fGrc^VkIn{z{NwuO{ zQ^8alDuilFg;HTuI2A!fQthbr_W#aBQPETfDu#-s;;4Aa{$D)GOC?fCR5I0(>O^&> zx=>xIZd7;5NA;k3QoX3&R3EA@)sN~=4WI^6gQ&sO5Naqjj2cdjphi-osL|9IYAiL5 z8c$82CQ_5A$Sra5^5>6j9N~upjJ|= zsMXXOYAv;nT2F1DHd33Y&D0iZE47WMV7RI!|4oE>f4M%hVO>Ds_#zPTinxQn#qv)E(+Bb&tAFJ)j;^ zkEqAg6Y44TjCxMJpk7k1sMpjR>Miw-dQW|zK2o2k&(s&{EA@@~PW_;MQUDFo5Dn7^ zjnWv6(*#Y@6iw3%&C(pr(*iBh5-rmTt5x&hsgZbUbxo6v5$Dcy{2PPd?2(yi#$ zbTHk94x!u9p>!A>PDjv@bUV5|?V+RSXu1O(L&wr_bUd9vTeO!>q?71mx+C3*?o4-~ zyVBk0?zE5YLHDG4(Y@(DbYHq3-Jc#n52OdtgXtmkPUT(X;6}^jvx#J)d4cFQgaIi|HlwQhFJ^oL)h%q*u|a z={59PdL6x<-av1pH_@BvE%a7;8@-+0LGPq@(YxtA^j>-&y`MfnAEXb_hv_5qQTiBt zoIXLHq)*YO=`-|M`W$_pzCd53FVUCjEA&e%hY3nnEFfurXkaaY0NZX+)PuZ8Pl9;!L(#rF|CFeaRdU?Q1zOnb(|L^07!2PTGzW#X84CV{aSFO$e5G099vrW4bd>B4kn zx-s1uAJc>B$@F4+Gkuu8Oh2YSGk_V$3}OZ|Lztn=FlIP2f*Hw-Vn#D#n6b<_W;`>2 znaE6HCNoo*smwHHIx~Zr$;@JAGjo`^%sgg3vw&I1EMgWjOPHn1GG;lmf?3I|VpcP2 zn6=D0W<9fk*~n~SHZxn8t;{xNJF|n?$?RfwGkci5%sys6bAUO>9AXYLN0_6`G3GdP zf;q{YVooz>n6u0|<~(zOxyW2%E;Cn{tIRd#I&*`$$=qUYGk2J~%su8l^MHBCJYpU* zPnf67Gv+z-f_cfjVqP zY*n@zTb-@J)?{n3wb?psAX}HM#|E+W*#>Mwwh`NyZNj?Qrff5|IopD5$+lu!v%zc| zHiT`(YFvGHsIYq4H7kxgQg*^X={wlmv>?aFpz zyR$yF2iueF#r9_VuzlHnY=3qDJCGg34rYh2L)l^MaCQVck{!j4X2-B&*>UW6b^<$* zoy1ONr?6AmY3y`%20N3T#m;8uuyfgY?0j|syO3SPE@qdoOW9@Ya&`r~l3m5FX4kN5 z*>&uCb_2VS-NbHYx3F8;ZR~b-2fLHq#qMVJuzT5k?0)tDdyqZE9%hfQN7-ZSarOjz zl0C(qX3wx^*>mi9_5yp6y~JK-udr9yYwUIQ278me#olJ`uy@&e?0xnD`;dLaK4zb= zPuXYebM^)Ml6}R#X5X-H*>~)F_5=Ho{ltD|zp!7~Z|ryW2m6x+IFN%lm_s;}!#JEH zIFh3{nqxSY<2arZIFXY$nNv8G(>R?oIFoa6>A3V<1}-C)iObAo;j(hsxa?dGE+?0Z z3*d5ddAPh>J}y63fGfxq;tF#`xT0J!t~ghME6J7ON^@nnvRpZ?JXe9M$W`Jhb5*#i zTs5vbSA(m`)#7S%b+|yTE?18W;_7n^xQ1LKt})kyb8}6(W?XZw1=o^m#kJ;wxi(w~ z*Om+A!nklQf{WzZaqT$|7sW+$9k>`SmW$)!xdhJQyj&ue#3gebxlUYXt_#&A8G zd|VH%C)bPX&Gq5>a{aje+yHJMH;5a|4dI4z!?@wx2yP@diW|+1;l^^~xbfTsZX!2{ zo6Jq&rgGD`>D&x%CO3se+stj@wsPCJ?c5G-C%22+&F$g#a{IXb+yU+&cZfU89pR30$GGF%3GO6! ziaX7n;m&gBxbxfv?jm=IyUbnTu5#D7>)Z|QCU=Xw&E4Vda`(9V+ym|*_lSGUJ>i~m z&$#E@3+^TNihIqy;ofrZxcA%#?j!e!`^G=$NMm`gtna{#!<+JhG`5b&sJ{KRr z=jQY9dHH;Ne!c)-kT1j+=8Nz}`C@!=z64*AFU6PU%kX9Sa(sEd0$-7@#8>94@KyP0 ze09DCUz4xJ*XHZ+fqY%Q9v{Tl=Ns@1`9^$Wz6tNK zpTbY&r}5MI8T?Fs7C)Px!_VdC@$>lw{6c;aznEXbFXfl<%lQ@jN`4i;nqR}O<=64+ z`3?L=eiOf$-@!7VfunhDK?7D7v*mC#xU7TO3QLR%qJ z2ou7E2q98vC$twlLX;3KbP!^MSRqb`7ZL6K!XRO=Fhm$C3=@V6BZQH{C}FfPMi?uM6UGY@go(l=VX`nq zm?}&YrVBHKnZhh#wlGJSE6fw-3k!sW!Xja@utZoYEEARsD}Y^c)47juX?#av>5m|M&v z<`wga`NaZaL9vinSS%tI6^n_*#S&skv6NU^EF+c`%ZcU13SvdEl2}=+B32cviPgm# zVokA@SX-PmSQWhwHPe65kthbVyGA< zhKmtmq}WbuFM7l%FkMQjyPAGC(aiahzrF<;$m@$xKvywE*DpbE5%jfYH^LYR$M2p7dMC- z#ZBU7af`TB+$L@pcZfU1UE*$WkGNOdC+-&yhzG?(;$iWKcvL(l9v4rDC&g3ZY4MDB zRy-%37cYnx#Y^I4@rrm=ye3{3Z-_U=TjFi;j(AtRC*Btyh!4d_;$!iN_*8r*J{Mnz zFU42lYw?ZvR(vPE7e9y}#ZTgA@r(FX{3d=Ee~3RtKmsL5f+a*kB}~F4LLwzfq9sOR zB~IccK@ufNk|jk_B~8*LLoy|oluk-7WsovTnWW587AdQgP0B9ika9}7qyQ@r8skBr^Dl3(f%1afbic%%1vQ$N?DpixJOEsjL zQZ1>rR7VPw>Pq#bAgR98Kx!y8k{U}*B)8O5Y9=+8T1YLWR#IyzSZX7MNNuH1DNG8N zBBV&Eoz!0PNKsO>)Io}oVx>4KUP_QG$txvFNm8=ZQR*ahmbyq?rEXGp$tU%YdP=>d z-clc_uhdWKFAb0eN`s`q(hzB=G)x*UjgUr4qomQ&7-_6DP8u&wkS0o#q{-40X{t0$ znl8q|4G3>8f;1x-Q+2Zc4YL+tMBBu5?ejFFlYRN{^(+(i7>a^h|m#y^vl? zucX(~8|kg|PI@nWkUmPEq|ee9>8tcj`Y!#DeoBB0%8(4ph>XgZjLU>f%9KpYjLgcM z%*%o-%91S0imb|-tjmUM$}TyboL&ZcKeYt_$P;MkQmYc|KxvAVtZZ5ZwTgt8E)^f1iMh=nN%Asr8r93#idadNzzAX~CmPLz}6WVxfxPz9HX~Z^^giJMvxmo_t?^AU~8J$&ckH@>BVl{9Jw^zm#9e zujM!LTlt;*Uj86|lt0Ozbb1y&FRRWJot2!&KAg;p4aRXBxL z1VvONMOG9=RWwCc48>GjN;)OIl0nI+WKuFKS(L0wHYK}~L&>S+QUa9RN**Pzl26I6 z6i^B(g_Ocd5v8b7OewCEP)aJLl+sEWrL0m;DX&yeDk_zf%1RZbs!~m8bQmdMkaD zzDhr(zcN4>s0>mDD?^l_$}nZPGC~=tj8aA`W0bMVIAy#tL7AvbQYI@?l&Q)zWx6s$ znW@ZDW-D`)xyn3czOq1Bs4P+zD@&B6$}(lSvO-y@tWs7hYm~LhI%U1GLD{HmQZ_4F zl(WxKLN*{SSOb}M_7y~;jizj8o1s2oxbD@T;0$}#1*azZ(&oKj9JXOy$bIpw@^ zLAj`0QZ6f3l&i`$<+^f1xvAVzZYy_`yUIP~zVbkMs60|0D^HZC$}{D;@&MPS=DT6b~T5ZQ_ZCYsJYcVYF;&;nqMuT z7E}wVh1DWzQMH&_TrHuNR7XwVYaBt)Ny^E2)*$Dr!}=np$10q1IGuskPNQ zYM@$It)~X5_01`@ zerkVpfI3heqz+bxs6*9Z>Tq?0I#L~_j#kI0W7To$cy)q0QJthtR;Q>_)oJQT-33x>8-Gu2$EmYt?n?dUb=kQQf3&R=22I z)oto_b%(lB-KFkU_o#c-ed>PofO=3pq#jm}s7KXf>T&gidQv^5o>tGOXVr7+dG&&N zQN5&IRT~sl`ci$RzEiH0snybIYjw0h zt*%y23)1Rq4YY<@BdxL4M00CRwPsp#t%cT7Yo)c;g0(hUh}KpM)xxxJEkcXb+G*`I zj~1mxYaO&0Emn)u;#g)q(WYwCwCUOmZKgI$ zo2|{!=4$h_`Pu?)p|(g{tS!-&YRk0c+6rx@wn|&At$LUS25qCZN!zS#(Y9*a zwC&msZKt+N+pX=<_G9|hlq)zFy&giVp>AWuJqAuyO zuIQ?+>AG&{rtZ?y>FM|YLXVtUm+4UTHPCb_%py$@}=y~;gdValtUQjQj z7uJjDMfGBOalM3IQZJ>K*30N+^>TW7y@Fm*ucTMjtLRnrYI=3OhF(*zrPtQ$=z)4& zy`CPV*Vh~94fRHPW4(#))|=|h^yYdCy`|ntZ>EU{W9;vs}+v^@Z zN{`k%=rMY%9;e6a3A&|w^+Y{MPu4r?o%GIn7rm?AP4BMz^d5Roy_eow@1ytC`|17l z0s26FkUm%+q7T)F>BIFA`bd40K3X56kJZQNC5#M`bvG3zFJ?SuhrM->-7!#MtzgMS>K{>)wk)} z^&R?7eV4vl-=pu<_v!of1NuSzkbYP{q94_d>BsdG`bqtiep)}HpViOl=k*KvMg5X~ zS-+xR)vxK-^&9$4{g!@PzoXyP@9FpT2l_+(k^WeJqCeH2>Cg2S`b+(l{#t*dzt!LA z@AVJ*NBxujS^uJc)xYWA^&k3A9WX!xGGGHSPy;h?gD^;gGH8P_Sc5ZoLoh@`GGs$B zR6{d#!!S(4Wu!CG8ySp@MkXV(k;TYrWHYiGIgFe}E+fFmZR9cX8u^U;MggOsQOGE4 z6fue##f;)c38SP@$|!A=G0Ga{jPgbWqoPsCsBBa*sv6ad>P8KtrcukNZPYOWjk-oX zBgm+4G%y+(jf}=d6T@vZHJTaCjTS~rqm|Lx2sYXnAx2vx)Ce=ejR+&sXlJxHJVule zZFDeVj94Sih&K`p%kUbBMv{?ibTm2{osBL=SEHNJ-S8PbjGjg>qqot==xg*d`Wpj` zfyN+Xurb6KY78@m8zYR7#wcU7F~%5cj5Ed?6O4(*BxABM#h7YLGo~9ejG4wPW41BJ zm}|^4<{Jx)g~lRdv9ZKhYAiFB8!L>J#wugAvBp?ytTWad8;p&{CS$X)#n@_WGqxK$ zjGe|VW4E!#*lX-F_8SL`gT^7_uyMpVY8*3;8z+pD#wp{pamF}noHNcF7mSO>CF8Pj z#kgu*Gp-vqjGM+SO2C*!m6#rSG`Grk)?jGqQzf+l3bCSsx{X5uDck|t%+CS$TDXY!_Cil$`Bredn5 zX6mM4nx@N4XQnqZm>JDXW@a;snbpi@W;b(~In7*VfSKFOW9BvUnfc8EWJ46HZ&WVjm;*e+iYq!Gn<<&%$8;=v$Yv)wlPD@wq~dqW`>&)W~AB9Y;StZC^OpZ zV8)oSW}F#sCYYA#H51JwGuiBDb}~DgUCgd#H?zCxGkcgl&0c12vya)=>}U2j2bcrR zLFQm{h&j|8W)3$;m?O#+-L4L510qdL*`-gh{48+ zOLOTi!)3Z$u5_;St_-e>u1v1Xt}L#su57OCt{kqMu3WAFS8i7xS6){>SAJIkS3y@H zS7BEXS5a3nS8-PfS4meXS7}!nS6Np%S9wq|tDY;!Ro~UX)$spe?Vf@q*}8BIw{3T7BC*=GZQHhOd$n!bwr$(CZJgfwfA`I~ zJ!jlJb7V!-RYs1isEBVE6^x2TC8M%Y#i(jjGpZXkjG9I*qqb4UsB6?S>KhG=hDIZ! zvC+h6YBV#N8!e2MMk}MW(Z*qqot==xg*d z`Wpj`fyN+Xurb6KY78?%jp4=!W27<47;TI(#v0>{@x}yWqA|&sY)mnx8q@oHl`;7g@0pp-?$T(~qF^(F?jN`@$8^Tq|^ zqH)Q%Y+Ny}8rO{L#tq}9am%=E+%fJN_l*0-1LL9b$ari#F`gRFjOWG+F2O~(|bG?l4M*Yr%^49pNSnVH;7 zVWu=wnW@b*W?D0yncmD`W;8RInawO_Rx_KK-OOR;G;^7`%{*paGoP8?EMOKi3z>z@ zB4$yum|5H`VU{#YnWfD#W?8eGS>CK*Rx~S_mCY(W?QqJ+1~76b~HPgoy{(0SF@Yh-RxoZG<%u7 z%|2#dv!B`D9AFMK2bqJ-A?8qXm>FsgH%FKw%~9rPbBsCG9A}ON#V|%~j@VbB(#yTxYH~H<%mE zP3C5Ei@DX@W^Olkm^;l~=5BM3x!2rh?l%vZ2hBs~Ve^Q2)I4S$H&2)+%~R%S^Ne}c zJZGLaFPIn2OXg+sih0$%W?naMm^aN^=56zidDpyW-ZvkZ56wsBWAlmm)O=<>H(!`9 z%~$4Y^Nsn|d}qElKbRlQPv&Ryi}}_3W_~w+m_N;5=5Od~qRrIpG`ZKbi&TIsCxRt77hmC4F%WwEka*{tkV4lAdX%gSx#vGQ8^to&91 ztDsfLDr^<8idx02;#LW(q*cl)ZI!XgTIH+GuUEHd|Y)t=2YcyS2mGY3;IhTYId%);?>$b-+4k9kLEvN35gPG3&T>!a8Z4 zvQArPth3fR>%4Wrx@cXpE?ZZutJXE^x^=_4Y2C7JTX(Fx);;UK^}u>)J+dBKPpqfb zGwZqa!g^`FvR+$nthd%X>%H~C`e=Q!K3iX`uhuu~yY<8RY5lT(TYs!Dc33-{9o~*$ zN3tZQHShEp26M z+qFI0w*x!GPG%>!Q`jl(RCa1Rjh)s`XQ#I_*ct6ic4j+^oz>1}XSZ|MIqh6_Zaa^i z*Uo3>w+q+>?Lu~8yNF%XE@l_EOV}mtQg&&(j9u0)XP37t*cI(cc4fPYUDd8;SGQ}} zHSJn|oT5%Kr?^wXDe07QN;_qovQ9atyi>uc=u~nlJ5`*j zPBo{xQ^Tq0)N*P&b)33RJ*U3Yz-j0-avD2LoTg4Qr@7O@Y3a0bT03o=woW^zz0<+z z=yY;AJ6)WvPB*8!)5GcM^m2MTeVo2dKc~Mlz!~Taat1p?oT1JzC)644jBrLeqny#s z7-y_A&Kd7aa3(sFoXO4?<{Z@I*Xje&Jt&-v&>oU ztZ-I3tDM!&8fUGu&ROqla5g%doXyS_XREW#+3xIcb~?M9-Oe6oud~nD?;LOrI)|LY z&JpLRbIdvJoN!J$r<~Ky8Rx8X&N=T~a4tHRoXgG?=c;qfx$fL>ZaTM|+s+;5u5-`1 z?>ulGI***k&J*XU^UQhfyl`GRubkJ;8|SU_&Ux>Aa6USpoX^e|=d1J0`R@F1emcLL z-_9Q=j0h{jiSQzVh$te7$Rdh}Dx!(#B8G@5Vu{!yj)*JbiTEOcNGKAC#3G4EDhy!? zOW48@LP(*67OwDwF9H!Fl8NLZg-9tvWe^>hsY^% ziQFQO$Sd-R{Gxy;C<=+fqKGIeiizT)geWOWiPEBsC@ac|@}h#MC@P7{qKc?0s)_2N zhNvlOiQ1x$s4MD;`l5koC>n{zqKRlKnu+G3g=i^SiPoZxXe-)@_M(I6C_0JGqKoJ% zx{2pTg5i9UF;A$#V)a1>=Ap#KCxdM5C_E}aabG?N5wI5T$~Um#VK)GoDpZm zIdNWG5EsQIaamjuSH(4PUEB~i#Vv7L+!1%hJ#k+=5D@mM?&PsKCwT)Yr3#Vhez zyb*83JMmt85Ff=S@mYKkU&S}^UHlL~#V_$&{1IVfSQ$=+ml0${8A(Q#QDjsZO-7e7 zWK0=L#+GqpTp3TsmkDG-nMfv)r6+wE$Pk%KCYLE>N|{Qg zmT6>KnNFsc8DvJ8NoJN=WLB9?W|ui+PMJ&QmU(1enNQ}I1!O^4NEVhwWKmg67MCSt zNm)vkmStpFSx%Oh6=X$ONmiCsWK~&BR+lwoO<7CUmUU!ZSx?rN4P-;vNH&&DWK-Ep zHkU1AOW8`cmThEP*-o~X9b`w@Np_Z9WLMcuc9%V5PuWZMmVIPj*-!SD1LQzCNDh`m zP1aa*13jm&xUFgTyUC^yN?a*Nz5x5@2thukT5$=z~~+$;CV z{qlf3C=bcQ@`yYtkICcmgghxv$rx{9GY;k7UaGh1qx!0Ts=peb2C6}7uo|L4zNF7#3)KPUz9aksRNp(t{R%g^% zbxxgE7t}>{NnKV~)Kzs&T~{~MO?6A%R(I50bx++_57a~TNIh0h)Km3LJy$Q(OZ7^< zR&Uf>^-jH4AJj+nNqtsd)K~RQeOEu!PxVXvR)16&9ae|a;dKNZQAg5|brc;{N7K=D z3>{O)(y?_M9aqQG@pS^7P$$xfbrPLa8`{*CwzZ>$mRf18UF~UK2RcM2)5&!Tol>XL zsdXBiR;SbHbq1YLXVRH<7M)dR)7f5fSJIVr6%bmSNGHX^#DCk57LA6 z5Is~6)1i8}9-&9-QF^oS=nqo}p*zS$ejfqvz^*dcIzu z7wScNv0kE=>ScPlUZGd&ReH5vqu1(ndcEGDH|kA#v)-b&>TPSOx2KA}(QQ~I<%qtEJd`nTCMCzM*gGTl%)X zqwnf_`o4akAL>W?v3{bT>Sy}7exYCLSNgSnqu=Ux`n~?3Kk85Vv;LyL>TmkH{-J;B zU;4NHqraJo7_#|rgT%esogYgS~s1W-p$}M;mYq_=EI&NLJo?G8-;5Kv{xsBZ>Zd13J+uUv8wsc#$t=%?m zTeqFt-tFLabUV47-7aodx0~DD?cw%xd%3;cK5k#PpWELZ;0|;Lxr5yy?ofA_8|n^s zN4O*1QSNAWj62pH=Z<$LxD(w;?qqk0JJp@$PIqUxGu>J4YjxEI|^?q&Cid)2+>UUzS}H{DzA zZTF6Q*S+W7cOSS9-AC?Y_lf(|eda!QU$`&bSMF=~jr-Pp=e~D8xF6k5?q~Ol`_=vC zes_PkKiyyMZ}*QI#tZ9(^TK-(yog>TFR~ZKi|R%5qI)sCm|iR|wim~X>&5fpdkMUR zULr5Cm&8lz8J_7`p6xlF@T8|a?YW-k`Cj0Kc*(rvUJ5Uzm&!}+rSZ~w>Adt_1}~$R z$;<3z@v?f^yzE{MFQ=Ew%kAay@_PBa{9XaCpjXH%>=p5fdd0lrUJ0+HSIR5xmGR1Y z<-GD<1+Su4$*b&D@v3^&yy{*JuclYatL@eC>U#CO`d$OCq1VW3>^1S4dd+SXN`g;Am{@ws@pf|`H><#gT zdc(X>Z@4$Y8|jVmMtfttvEDduyf?v{=uPq_dsDor-ZXEzH^ZCh&GKe@bG*6UJa4|Y zz+31o@)mnbyrteUZ@IU^Tj{OxR(or_wca{!y|=;J=xy>gdt1D%-ZpQ$x5L}%?ecbe zd%V5gK5xHwz&q$2@(z1PyrbSR@3?ouJL#SBPJ3s(v)(!Hym!I7=w0$Idsn=x-Zk&K zcf-5s-STdGcf7maJ@3BvzYnedTN4^*!JB13$!1<|p@4_$mEVeri9BpVm+3 zr}s1X8U0LtWP zerdmqU)C?@m-j3975z$nWxtAF)vxAP_iOky{aSu)zm8wmujkkI8~6?VMt);!pLb`P2Ow{!D+CKii+< z&-Lf|^Zf<>LVuCJ*k9r=^_Tg}{T2R7f0e)5U*oU!*ZJ%H4gN-dlfT*D;&1i0`P=;+ z{!V|FzuVvA@Adcj`~3s{LI03{*gxVQ^^f_-{S*F4|CE2)KjWYE&-v&53;sp_l7HF1 z;$QWz`Pcm${!Rauf7`#~-}UeL_x%U{L;sQg*ni?b^`H6A{TKdA|CRsRf8)RP-}&$T z5B^90lmFTO;(ztO`QQB?{!jmx|J(oLhY7+4;ezl%gdk!NDTo|I38Dtkg6KhvAZ8FN zh#kZU;s)`8_(6goVUQ?D93%;n21Z~8R$vECAOabvKnHH%1%40&AwjYrd5|JV8KeqQ z2Wf(|LAoG)kRiwzWC}6|S%R!Vwjg_uBgh%#3UUW|g1kY#Ab(IGC>Rt93I|1kqCv5s zcu*oJ8I%f22W5h?LAjuOP$8%oR0=8wRf4KPwV--XBd8hF3Tg*+g1SMypnlLGXc#mK z8V5~+ra`lydC($g8MF#o2W^73LA#)R&>`p;bP75LU4pJbx1f8_Bj_3Q3VH{9g1$k& zpnotR7#Iu+1_wiep~0{qG#DO?2u22@g3-a4U~Dih7#~asCI*v&$-$IhYA`LB9?S@4 z2D5_M!JJ@jFfW)NEC?0`i-N_$l3;1DELa|_2v!EGg4MyAU~RB2SRZT%HU^u5&B2yn zYp^ZY9_$Eq2D^gY!Jc4murJsj90(2uhl0bwk>F@>EI1yV2u=p4g44m7;B0U%I3HXH zE(Vu^%fXf4YH%&M9^43S2DgIS!JXi4a4)zYJO~~JkAla+li+FaEO;Kg2wn!Sg4e;D z;BD|OcprQSJ_etH&%u}AYw#`j9{dP?2ET&e!Ji;ZNZ62YA>l(JghUL96cRZkN=Vd@ zXd%%0+%<00Vcp*}xzNhd>;4y6_cw`d{ohM}Q`$xE4jcZS z{_l*vjsILDmQIIO>X+?rPyKiLf6WyAC;s!FivB+fQ?e=9yFz=W6x6JrugiUyi!p^Xj-l&DaniyrzIUw&m$7U`4Eim9Yv|#cEg`YhX>Rg|)E`*2Q{Q9~)ppY=n)m2{y%M*c@A6 zOKgR$u?@DxcGw;}U`OnPov{mc#ctRgdtguOg}t#4_QihK9|zz-9E5{$2oA+z7>dJj z1dhZ}I2y;`SR9AraRN@nNjMp&;8dK3({TpQ#925S=ipqNhx2g(F2qH+7?_uyXKhx_pW9>ha<7?0plJch^d z1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba*YO74#9Me9@8Dg$hxhRTKEy}(7@y!%e1^~Q z1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H-|+|j#9#Ou|KQ)e*?%i6hQsg}0V850jEqq* zDn`TT7z1NsER2o+Bw7DcaWNkL6J`BRCBTIE&$fntDlsO(q-dat7TV~bK#2-9y6B;g z0ft~QOpYlqC8omEmkgV;gLX?XW#|z>e4nJ7X8@irug~_Q0Ol3wvW9 z?2G-dKMufwI0y&h5FCobFcgR52pox{a5Rp=u{aLL;{=?DlW;Ol!KpY6r{fHqiL-Dv z&cV4j59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~;|AP_n{YF3!L7Irx8n}niMwz& z?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A;|08km+&%P!K-);uj388iMQ}J z-od+g5AWjxe29{VlK>$c`z^L!~9qP3t}NG zj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59uqsx=>R1D7VlAwVb+9hh!}{0&8)74D zj7_j9HpAxF0$XA$Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jU6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndrTOJlRJo6vJ zRx^VRVdvF)ct8Ud)I2u>cmt zLRc7!U{NfF#jymI#8Oxq%V1e7hvl&XR>VqJ8LMDb{C6eP-}|#VeGROMwXinU!Ma!v z>th3Kh>fr@Ho>OY44Y#MY>BO~HMYUF*bduc2keNQurqeSuGkH`V-M_!y|6d-!M@lJ z`{Mu{h=Xu24#A-~3`21^j=+&P3Pcz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L z`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(< z_wYVGz=!w;O(V-YNh#jrS*z>-)BOJf- zus$}xhS&%jV-swO&9FJPz?RqwTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$ zus;sKfj9^U;}9H*!!Q(w;|Lsyqi{5i!Lc|F$KwQ?h?8(KPQj@-4X5J_oQbn=HqODh zI1lIJ0$hlTa4{~yrML{2;|g4ft8g{0!L_&!*W(7kM!LxV{&*KHWh?np(Ucsw)4X@)3yotB)Hr~Oz zcn|O61AK^&@G(BYr}zw?;|qL=ukba#!MFGh-{S}Th@bE?e!;K!4Zq_L{E5HtH~zuD zduIQwuow=*V+4$dkuWkw!KfGwqhkz=iLo#?#=*E4594D3Oo)jvF($#JXrPG}+UTG_ zi3&Bk=%J4RhF~&GjwvuDroz;i2Ge3XOph5bBWA+Pm<6+9Hq4GWFem21+?WURVm{1| z1+X9%!opYti()Y>jwP@pmcr6l2FqeOERPkiB38o6SOu%%zZRg|)E` z*2Q{Q9~)ppY=n)m2{y%M*c@A6OKgR$u?@DxcGw;}U`OnPov{mc#ctRgdtguOg}t#4 z_QihK9|zz-9E5{$2oA+z7>dJj1dhZ}I2y;`SR9AraRN@nNjMp&;8dK3({TpQ#925S z=ipqNhx2g(F2qH+7? z_uyXKhx_pW9>ha<7?0plJch^d1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba*YO74#9Me9 z@8Dg$hxhRTKEy}(7@y!%e1^~Q1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H-|+|j#9#Ou z|KQ&(^Z%Rw7!Jc@1dNE0FfvBLs2B~SV+@Rmu`o8q!MGR?<6{C$h>0*UCc&g=potdR z=%7G}3N^aup^pKEU@}aODKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3m zKFp5=upkz~!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3 zJ*D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1 zFYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8 zF5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdGw zOl0OihQsg}0V850jEqq*Dn`TT7z1NsER2nDFfPW!_?Q3_Vj@h8NiZoIXrhHSIw(-0 zLX9qZ=wpB(m<*F+3QUQqFg2#Zw3rUlV+PEKnJ_bE!K|1Kvttg-iMcR0=E1y}5A$OI zEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqwV+E{;m9R2a!Kzpdt78qUiM6mc*1@`159?zC zY>17pF*d=b*bJLv3v7w4ur;>9w%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW) z9EgK(Fb=_?I1EE^IF7)PI0{GO7#xe^a6C@Hi8u)-;}o2V({MV@z?nD;XX6~4i}P?k zF2IGj2p8iLT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC+i*MXz@4}YcjF%1i~Ddt z9>9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8 zKEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXFf8!qv6NUMY z;V?W#z=#+LBV!bdiqSAS#=w{u3u9v(jEnIwJ|@6~m{VlK>$c`z^L!~9qP3t}NG zj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59uqsx=>R1D7VlAwVb+9hh!}{0&8)74D zj7_j9HpAxF0$XA$Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~!~Qq`2jU6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}nc^L}mVCI1G;w zFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng#Cc?y+1e2nHCR%8tg90Tg)aaszJ_Z;O(V-YNh z#jrS*z>-)BOJf-us$}xhS&%jV-swO z&9FJPz?RqwTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H* z!!Q(w;|Lsyqi{5i!Lc|F$KwQ?h?8(KPQj@-4X5J_oQbn=HqODhI1lIJ0$hlTa4{~y zrML{2;|g4ft8g{0!L_&!*W(7kM!LxV{&*KHWh?np(Ucsw)4X@)3yotB)Hr~Ozcn|O61AK^&@G(BY zr}zw?;|qL=ukba#!MFGh-{S}Th@bE?e!;K!4Zq_L{E5HtH~zse(U|`j4#Q&vjEIpi zGDg9u7!9Li42+4fFgC`)xEK%PV**Twi7+uH!K7%Qi5A-Epg@TVHM;1bj{$~YGE9ys zFeRqK)R+d-VmeHZ889Pe!pxWjvtl;PjyW(V=EB^V2lHY+%#Q`IAQr;HSOkk=F)WTH zuq2kk(pUz|VmU026|f>!!pc|$t70{*jy13**23CY2kT-ztd9+_AvVIs*aVwmGi;76 zuqC#_*4PHyVmoY)9k3&I!p_(QyJ9!&jy)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1 zxB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P( zcmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8sCvjeqdJpx&_2|Mwmc4#Q&vjEIpi zGDg9u7!9Li42+4fFgC`)xEK%PV**Twi7+uH!K7%Qi5A-Epg@TVHM;1bj{$~YGE9ys zFeRqK)R+d-VmeHZ889Pe!pxWjvtl;PjyW(V=EB^V2lHY+%#Q`IAQr;HSOkk=F)WTH zuq2kk(pUz|VmU026|f>!!pc|$t70{*jy13**23CY2kT-ztd9+_AvVIs*aVwmGi;76 zuqC#_*4PHyVmoY)9k3&I!p_(QyJ9!&jy)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1 zxB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P( zcmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8sCvjeqdJfc~&CnEx0aBVa^~gpn}{ zM#X3t9b;fjjD@i=4#vfJ7#|a0LQI5-F$pF`15LEhMh68-RH)HK4}AtTItfDN$`HpV8{6q{jlY=JGY z6}HAU*cRJid+dN6u@iR2F4z^jVR!6-J+T+|#y;2=`(b|^fCF(54#puk6o+9b4#yEV z5=Y@^9D`$V9FE5cI1wk|WSoLiaT-p?88{PX;cT3Pb8#Nd#|5|$7vW-Df=h83F2@zP z5?A4BT!U+I9j?a>xDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S z5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM z5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtd|1IVT87|{8{=SHjEC_t0Vc#mm>82_QZ&#+3vF~zphSflUG&h$07EbtCdU+*5>sJn zOoM4L9j3<&m=QB!X3T_y7RM4;5=&ue zEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f(w zY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH2jO5Gffg^Dg zj>a)K7RTXuoPZN?5>Cb`I2EVibew@RaTdnch zFeb*r*cb=nVmyqG2{0ih!o-*alcIqpT4Js)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q z<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX z?XW#|z>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCobFcgR52pox{a5Rp= zu{aLL;{=?DlW;Ol!KpY6r{fHqiL-Dv&cV4j59i|oT!@QsF)qQSxD1!$3S5b+a5b*M zwYUz~;|AP_n{YF3!L7Irx8n}niMwz&?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X@HC#m zvv>~A;|08km+&%P!K-);uj388iMQ}J-od+g5AWjxe29Vx% zJ$As3*ax}57z<-# z9E^+cFg_;0gqR2uV-ie?2AXK0jSdQws8FMe9{Lzy2qweim;zH`Dol-OFfFFT^q2uN zVkXRtSuiVR!|a#?b7C&cjd?IH=EMA001ILvER034C>F!wSOQC8DJ+d;uq>9t@>l^Y zVkNAMRj?{n!|GTAYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYC zVkhj3U9c;5!|vDvdtxu_jeW2$_QU=-00-hA9E?M7C=SC=9F8M!B#y$-I0nb!I2?}? za3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz% za3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS z@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H z@FRZ0&-ewu;y3(`Kkz61!r%A@!^CC&V>k?t5ilY~!pIl}qhd6SjxjJM#=_Vb2jgNq zjE@O0Atu7am;{refhJmLqk{q^D%9wrhdu@vg2^yBrofb#3R7bmOpEC-J!Zg+mVx%J$As3*a0*UCc&g=potdR=%7G}3N^aup^pKEU@}aODKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{ z%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8; ztd2FXCf35*SO@E3J*D!}YiUH{vGTj9YLk zZo}=k19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68 ze#7th1ApQ#{EdGwOnl})hQsg}0V850jEqq*Dn`TT7z1NsER2nDFfPW!_?Q3_Vj@h8 zNiZoIXrhHSIw(-0LX9qZ=wpB(m<*F+3QUQqFg2#Zw3rUlV+PEKnJ_bE!K|1Kvttg- ziMcR0=E1y}5A$OIEQp1$Fc!h0|A&{m45%XL8#X>O3y9d=UD(})9Viyqh?FAs0qkR= zVt03UcXxMpcX!9TbIt`Sa^KJMe0k4@{q4@q&hGwaW@njm6suzmtcmVe3u~hX*1@`1 z59?zC^h7Ugh>fr@Ho>OY44b1jwm=_jiN5HE{uqFP*a}-?5VpZ!Y>Vx%J$As3*a|i>#ctRgdtguOg;wm1eXuX~!~Qq`2Vyu5!ofHMBXB4V!{ImrN8%_Pjbm^u zj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf% zuEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#I zp2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QR zzQgzU0YBm={ET1lD}KZ8_yd39FBC}`|52fV4(NzZ7z>>-Ho9OOjEnIwJ|@6~mJs)Gh-IairFwb=0I1>iMcR0=E1y}5A$OIEQp1$ zFc!h0SPY9}2`q`Fur!vzvRDqwqZ?MhidYFNV->85)v!9&z?$fewXimNU>&TB^{_rR zKu`3-hS&%jV-swO&9FIoV+-`bmgtLq=#K#yh^??S24Ncv#ZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz z7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkg zAMhi7!q4~xzv4Iijz91x{z8$A@gEf$=zxysgt5>WW1|bk!MGR?<6{C$h>0*UCc&hb z43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-9r1oR|x9V;;17pF*d=b*bJMaH?}|@Y>B?;hyECVf!GRLV-U8%U~G%+uswFbj@Su1qXk1S6vMC! zcExVk9eZF;?1fhBjeW2$_QU=-00&|?4#L4W1S4=L4#VL%0!QK~9F1deERMtRH~}Z( zB%F*>a4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x0$1WHT#ajREw01$xB)lf zCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2!0#D*8JdJ1YES|&jcmXfs zCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu0$<`Qe2s7LExyC|_yIrS zC;W_G@GE}9@Aw0M;x81*8UInCfez@1P8bWFF*dqj9E^+cFg_;0gqR2uV-ie?$uK#l zz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLW)F%!#=$H|D{-m=E(~0W64xurL*1($RjKQ~H3nfD492$D4%=e~?1-JPGg>eNLop1yU{~yh z-LVJu#9nB{-q;8GVn6JU18^XQ;~*T2Lofn|;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ z1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G z1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l)-{L!bk00yhEV*~U=FKmd7urW5l zrq~Rdqc^rdA8d)f=!gCofPvTwTVoKm!C-8Q?XW#|z>e4nJEH|dFcibE3wFhB*d2Rd zPwa(O?2Ub}FZRR!H~)Jra4e3)@i+k|;v}4mQ*bIy z!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G z!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc z!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_ z!|(V5f8s9`DH;D!p@9zQh)x&_oiR4LU>uB#@i0Cnz=W6x6JrugipelJrofb#3R7bm zOpEC-J!Zg+mLgW zIeKFY^udj0T_s_ur&r@8w|#_*bduc2keNQurpdP1Vb?lyI@!BhTX9T_QYOj z#opKl`(i)rj{|TZhT|X{j6*O2hvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onz za3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Ro za3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D z@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2 z@F)I4k&5vj6&mP(j_8E3&>3T+3&z2?7!TuP0!)aBFfk^th4-L@#WJjj%B`!KT;@o1-_j zKp$+0zUYVk7=VG;3R`0kw!vU*i|w#IcEFC<2|J?&LogJ>unTs@ZrB}rU{CCYR_u*^ zurKz*{x|>!VmJ=M!8imXa3~JL;Wz?E;wT)AV{j~v!|^x)C*mZWj8kwbPQ&Rq183qa zoQ-pEF3!XGxBwU8B3z71a49as<+uV@;woH?Yj7>D!}YiUH{vGTj9YLkZo}=k19##s z+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fj zyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ# z6sZ~iQK5kj=!i}j3!O1Gx?miPi}5f%CcuQ42oqxxOp3`cIi|prmus$|GPxQiu*a#bA6KsmjusM2T3-rO3 z=!<^nj{z8nt*|u)VH*s_w%88aV+ZVrov<@nFa$#}47*@g?1tU32lm8XXvN;x2m4|_ z?2iL*Aco^09E?LS0*B%-9F8M!B#y$-I0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n z=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB z_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU z@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61LXn2? z9~BztfR5;dvCtV~qYK8txEK%PV**Twi7+uH!K9cBlVb`@iK#F(roptB4%1@>%!rvV zGiJf8m<_XI4s^wwm;O(V-YNh#jrS*z>-)BOJf-7)R4Xa}ftcmVe3u~hX*1@`159?zC^h7Ugh>fr@Ho>OY44b1jwm=_jiN5HE z{uqFP*a}-?5VpZ!Y>Vx%J$As3*a|i>#ctRgdtguOg;wm1eXuX~!~Qq` z2Vyu5!ofHMBXB4V!{ImrN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn z`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t| z{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN z`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39FBEAR|52fV z4(NzZ7z>>-Ho9OOjEnIwJ|@6~mJs)Gh-Ia zirFwb=0I1>iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqwqZ?MhidYFN zV->85)v!9&z?$fewXimNU>&TB^{_rRKu`3-hS&%jV-swO&9FIoV+-`bmgtLq=#K#y zh^??S24Ncv#ZzF zARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp? zAwI&#_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{z8$C@gEf$=zxys zgt5>WW1|bk!MGR?<6{C$h>0*UCc&hb43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6 zV-9r1oR|x9V;;17pF*d=b*bJMaH?}|@Y>B?;hyECVf!GRL zV-U8%U~G%+uswFbj@Su1qXk1S6vMC!cExVk9eZF;?1fhBjeW2$_QU=-00&|?4#L4W z1S4=L4#VL%0!QK~9F1deERMtRH~}Z(B%F*>a4Js2={N&t;w+qvb8s%s!}+)X7vdsZ zj7xASF2m)x0$1WHT#ajREw01$xB)lfCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_ zj7RV&9>e2!0#D*8JdJ1YES|&jcmXfsCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i? zj8E_>KEvnu0$<`Qe2s7LExyC|_yIrSC;W_G@GE}9@Aw0M;x82G8UInCfez@1P8bWF zF*dqj9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLW)F z%!#=$H|D{-m=E(~0W64xurL z*1($RjKQ~H3nfD z492$D4%=e~?1-JPGg>eNLop1yU{~yh-LVJu#9nB{-q;8GVn6JU18^XQ;~*T2Lofn| z;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb z;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F z;xl}XFYqP4!q@l)-{L!bk00yhEV*~U=FKmd7urW5lrq~Rdqc^rdA8d)f=!gCofPvTwTVoKm!C-8Q z?XW#|z>e4nJEH|dFcibE3wFhB*d2RdPwa(O?2Ub}FZRR!H~)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1 zxB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P( zcmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8s9`85#dkp@9zQh)x&_oiR4LU>uB# z@i0Cnz=W6x6JrugipelJrofb#3R7bmOpEC-J!Zg+mLgWIeKFY^udj0T_s_ur&r@8w|#_*bduc z2keNQurpdP1Vb?lyI@!BhTX9T_QYOj#opKl`(i)rj{|TZhT|X{j6*O2hvG0Cjw5g+ zj>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_ zuEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRW zp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>f zzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)I4kxAG-5)~TgfR5;dvCtV~qYK8txEK%P zV**Twi7+uH!K9cBlVb`@iK#F(roptB4%1@>%!rvVGiJf8m<_XI4s^wwm;O(V-YNh#jrS*z>-)BOJf-7)R4Xa}ftcmVe3u~hX z*1@`159?zC^h7Ugh>fr@Ho>OY44b1jwm=_jiN5HE{uqFP*a}-?5VpZ!Y>Vx%J$As3 z*a|i>#ctRgdtguOg;wm1eXuX~!~Qq`2Vyu5!ofHMBXB4V!{ImrN8%_P zjbm^uj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yM zjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*& zjc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao# zjc@QRzQgzU0YBm={ET1lD}KZ8_yd39FBF*>|52fV4(NzZ7z>>-Ho9OOjEnIwJ|@6~ zmJs)Gh-IairFwb=0I1>iMcR0=E1y}5A$OI zEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqwqZ?MhidYFNV->85)v!9&z?$fewXimNU>&TB z^{_rRKu`3-hS&%jV-swO&9FIoV+-`bmgtLq=#K#yh^??S24Ncv#ZzFARfZQcm$8)F+7eZ@FbqX(|88Y z;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC z;yZkgAMhi7!q4~xzv4Iijz91x{z8$3@gEf$=zxysgt5>WW1|bk!MGR?<6{C$h>0*U zCc&hb43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-9r1oR|x9V;;17pF*d=b*bJMaH?}|@Y>B?;hyECVf!GRLV-U8%U~G%+uswFbj@Su1qXk1S z6vMC!cExVk9eZF;?1fhBjeW2$_QU=-00&|?4#L4W1S4=L4#VL%0!QK~9F1deERMtR zH~}Z(B%F*>a4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x0$1WHT#ajREw01$ zxB)lfCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2!0#D*8JdJ1YES|&j zcmXfsCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu0$<`Qe2s7LExyC| z_yIrSC;W_G@GE}9@Aw0M;x80g8UInCfez@1P8bWFF*dqj9E^+cFg_;0gqR2uV-ie? z$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLW)F%!#=$H|D{-m=E(~0W64xurL*1($RjKQ~H3nfD492$D4%=e~?1-JPGg>eNLop1y zU{~yh-LVJu#9nB{-q;8GVn6JU18^XQ;~*T2Lofn|;xHVJBXA^+!qGSe$Kp5~j}ver zPQuAJ1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPF zUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l)-{L!bk00yhEV*~U=FKmd7 zurW5lrq~Rdqc^rdA8d)f=!gCofPvTwTVoKm!C-8Q?XW#|z>e4nJEH|dFcibE3wFhB z*d2RdPwa(O?2Ub}FZRR!H~)Jra4e3)@i+k|;v}4m zQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdT zTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{ zSMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu! zU+^n_!|(V5f8s9`*%|*)p@9zQh)x&_oiR4LU>uB#@i0Cnz=W6x6JrugipelJrofb# z3R7bmOpEC-J!Zg+mLgWIeKFY^udj0T_s_ur&r@8w|#_*bduc2keNQurpdP1Vb?lyI@!BhTX9T z_QYOj#opKl`(i)rj{|TZhT|X{j6*O2hvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKT zG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3 zHr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QG zHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb> zH~fx2@F)I4k%RFc6&mP(j_8E3&>3T+3&z2?7!TuP0u1#FO6?IgsJ>-`6e7&e%dt&| z(1OJ*8_liYW#%GhKg%W)t7WsfbPZl@I6RHo}8fo9hvt zr(@|ml0peXI0#4KBw`6?5nH&3I3liyC*q3)BB4km5{o1vsYoW0ixeWINF`E>G$O4? zC(?@yBBRJ8GK(xCtH>s@n;~))IYlm!TjUXWMLv;V6c7bPAyHTq5k*BYQCyS|B}FMw zT9grGMLAJkxQPm)qNpS)iz=e3s3xk58lt9f7qvuf;UVgXx}u(_FB%9>;UyZ1MxwE3 zBASY3qPg%EErgF~DSU;W@D~9hP_z=QMUZGCf<;@=PP7*tL`Ts{bQTs7B0@!&=pwp` zZlb&BA$p2l!YX=;KBBMaC;E#4VxR~YgT!DlL_~<8Vwe~%Mu?GOlo&0>h_Pav7%wJ> ziDHtNET)L5Vw#vPW{8<$mY6N(h`C~(m@gKHg<_FdES89+VwqSjR*02il~^s-h_zy! zST8n+jbfA7EVhWPVw>15c8Hx~m)I@#h`nN;*e?!@Ui{g^FEUt*F;+nWFZit)WmbfkMh`ZvRxGx@vhvJcVES`v`;+c3ZUWk|C zm3S@Qh_~XMcrQMPkK&W~EWU`Z;+yy`eu$sqms$N%Drrau=_s9KEa@y`OBWeO#+C78 ze3?Kdl!;_wnM5X)$z*bwLZ+0dWNMj4rj_YrdYM6Hl$m5^nMG!m*<^N^L%PbGGMCIP z^T@n1pUf`{$bzzvEG&!2qOzDQE=$OgvXm?>%gC~_oGdThWCdAKR+5!v6UPv1mj~oQc}O0XN90j?OdgjfZCd=iwaSpDok}z zT~#;LUG-2sRWD^#y;UF8SM^i<)c`e6g{whouo|Kw)KE1{4Ob)7NHt20R%6syHBOCJ z6VyaCNljK$)KoQ1O;4zNF7#3)KPUz9aksRNp(t{ zR%g^%bxxgE7t}>{NnKV~)Kzs&T~{~MO?6A%R(I50bx++_57a~TNIh0h)Km3LJy$Q( zOZ7^^-jH4AJj+nNqtsd)K~RQeOEu!PxZ@u^h-k-hT&j18cs$m!`X;!xEOJa zxJEo9zLCI4Xe2Tc8%d0$MlvJ0k-|u6q%u+)X^gZ+IwQT2!N_Q2GBO)kjI2gBBfF8q za5Zuoxs2RK9wV=j&&Y2SFbW!ljKW3{qo`5LC~lN6N*bk%(ncAhtWnM=Z@3v1jEY7j zqq0%OsA^O*sv9+onufbk%cyO57sCBh&~px)@!JZbo;bhtbpM zWmt{gMjxZE(a-2_3@`>7;l?0iurb7lFoqh#jN!%zW27<47;TI(#v0>{@x}yWqA|&s zY)mnx8q@oHl`;7g@0pp-?$T(~qF^(F?jN`@$8^Tq|^qH)Q%Y+Ny}8rO{L#tq}9am%=E+%fJN_l*0-1LL9b$O!cdUZV&1 z;I-y5D1A@{t7WUX<{Fgbua|*sI?etv5R^H>(=CJj-NybtU|&}x8R&qH=!CYH2y@+; z?bzsoaWF2%!}yp06JjDvj7cylCd1^I0#jltOpR$UEvCct7R1D7qC3{Y+US9GurAia`q%(H(F+@5BW#RKuqigf=ID(r&<9(hFZ!WB z24Enz!qymsZ7>+yVmoY)9k3&I!p>;H5DdjI?1Ejf8+OMY*b{r96?9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;tz?*mrZ{r=ji}&z8 zKEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##Bz@PXF&4~cpN1{Rl z9ncY-FcvyvY;?gm7#HJVd`y4|F%c%lB$yPFVRB4?DKQnM#x$4~(_wndfEh6pX2vX- z6|-S>%z>_$6LVp1%!7F`ALhpbSP%nj16|oXl z#wu79t6_Dlfi=+`Yhi8lz&cnL>tTItfS%}u4Y3h6#wOSln_+YG#un&sUZ z5L;nu48k@TjBT+Uw#N?G5j$aLv|tE^Vi3T+3&z2?7!TuP0!)aBFfk^th4-L@#WJjj%B`!KT;@o1-_jKp$+0zUYVk7=VG; z3R`0kw!vU*i|w#IcEFC<2|J?&LogJ>unTs@ZrB}rU{CCYR_u*^urKz*{x|>!VmJ=M z!8imXa3~JL;Wz?E;wT)AV{j~v!|^x)C*mZWj8kwbPQ&Rq183qaoQ-pEF3!XGxBwU8 zB3z71a49as<+uV@;woH?Yj7>D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOL zAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z` zBYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#G=BtOAOBIIfez@1 zP8bWFF*dqj9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7C zJLW)F%!#=$H|D{-m=E(~0W64xurL*1($RjKQ~ zH3nfD492$D4%=e~?1-JPGg>eNLop1yU{~yh-LVJu#9nB{-q;8GVn6JU18^XQ;~*T2 zLofn|;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(- z!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J zsNd)5i5Bzsc@F09^GaCsY)ju%-2Byom#h7SMF=zXuTS*tP(QOU zJ)dMhtDaGc>8yH&$=7Pp6HLJ_=E7>xb4;$madm>e=-_zf!rP)}oU9f--{cycP$wrc z7w*E{(OsIqv()oX!AbN&Uu~FgScqAJ$@O*$bD?k0FLWnD=8%%&HSyZ{bSyj?PDuZU!X2g zPrn5QFEp37AIm;9SA4xKdJ^uhmZWFnxOd%r{%B10GGCXca5uZon9$AEnDiuGRE_AF zyvQ2Svw8p2$ZMTwYqtMGjp(_*Xc_;m5j`mwLyhP;LTW@$6Go4to-)+M{%>kTPb=DM zL{BV6FG)`_>XQC1HKHdZ|ELi?FKPBNJuOKu(=(IwGTqYYWqO{{YSFWlG4(P%YiaLg zx+Y`nWkD|^yO=QBp4B4F<-c7_=~QovVPbzeIOv2Jo=J{oy4jMPbjCk_R~<{|w^#1} zsU>~xV{1vyx;)#%Ft#ozh7-G(>1M#{2jT(?>ho6CPgmqe#}TaudC?;x2@h~W;Bo9SlgQs|71 zK~7dnN?S(fpewFHcWchx`s$M)EKdFnRGcpaYcQJ9)$e6)A-n{IL`ZTs0%A^a7;r=q~j8;n)6KW`{&S|e9 z9Uf{Zo4IPWWVh}6w}x`)jJ6umCyud(f>Y{dPt~~EO3P_0E&9NbOQ(8Ua+}yAo<}DH zIeQS*yym9WlFybHEvotTG0~!GtF!`9MR-IDbV0qx475wQ4s=~14nDr>WCm6w>(S9m z8xY4~zINU8$#1nPU)~C4z(u`Xdq!zz*KK%H`|8fqJeN$%h4ksE9pRAl=%wEkQb=E# z{U1RTHdoEMDWX&U?2Sc#UyAvLU|x#-V}{N4G56X&|7j?3Vt*`j#@J}dCZ6PVH z&-Od2CCp8$rKBw}T2xEvV`7SG>8K*2N41RJ^Zy~LW%Z@mqgu{fH5+ny6B@Fcz6g6m z){TjVT)|wmS}NN1{n?ORY;Qo7bVgf4*0m91Lk`Ls*@`RMimGBODtarfs#Co!)lBRi zu)0o&p##=1)6LdUQ)m3MHMr~iwkJR^Z^X51C7kjI8el$KYMUEYi-#@Atgk`tam{Cm zrH;9+*F$)!xOX$3HFeFz;20L>d$f7tdioICBci^ouz%lZ8<^>4WIc5;E@o@ATD){l zdt~(u64{34s@2lSw(rl#+TSf3>x{O@>N|@uvR+0&SSL#poss86Q*+&FX=X0}eS|mH zsooZE6Z3V({-;F?offT!`{=a40@+fhG}is9f^AvRwyb1ZR<@`A{u{&B%xs3q&*ZQA z@&8@y|7q;xEi6En7kSx%x~yoI-RgJw)PHN8V!u4w^1ohokeS)6v^FMGTChIcUTHdJ zR9ahe)oN*H+xKs!wbvPKm8Neu=1OyH-=#S3~Efy1d zvkTD)F?5VjGu;e)n9lfT$LON-|5cw|bxJiJbHe;<>_m-%g^Cp_T%bsSKRzxV9%g6g zX66l^Vb*;$Hx96OgYNn?wryM=d?#u-wqM&ZLV4^eQf*w zjc8w;(H2pCA2CN%1%z7q>3oExFK$zn@F9%)Mdy0Y2MYvERuYeLxI%GS^HuD`B1ql`voDv{!--1eLJB zT(w#j+V=ff35IQGT%lflhHY}e+p^MJw@*h5FhddaKDd&JnRAuS z<84{3iF&54v2FeRPe<13G7+?P}qc?Y&o8})gyLA44u41=7_K&OBJK@v|#mCFDl8a@ZnUu4>Wxvkz&zTSC{C}KT{MfzVhk54z!KgDI zG>vrTk9_+&YNps5if>eL$IP^-Vvfg9%!z-BIT=|@#oxu8 z`m>nRF%)y=pJL8N7E|eWG3U&*K&$1v3EljHKF;3Fb?2a)U(~5q%O%^+Kl{1)Z&%F` z^0Llp>*u<)gjp?DqGdEEV{93(MrG6$YG%9^Eu(U`KRK>PWz_Kx3)It!<}7nOf8S6a zbB@-hRlAP9p_Usu7k&Sxxo)-GGM9{a|EcTW)>*tQcQjEoao4u>cZJ^5X=ck&Q5AY$ zr~KLVe|P!^<}Ujq^`Wh(-|xhcgTo^;-E2sYZM***PM_!$^E~D_Fi21IrsNTz-w|ye zbHD9ljttA}kFTd@lGXA|ANcR6@|^Pi7o*Axoi*C1^78lLJn>%H&hwu}mDlE8FDD;= zU;m&$%NujUYI&=Z96AKFX>WOFuDZ*RFhBFw-<#{dL;k_G^>@fW>NFjSsE~g$Q|zy< zzgz8RGcBs-zQj<>*MExn7FkS%-^F}4)6B;I!-U5FQy*t<{JMG2_Wr@-m{@Mh)soz{@6TtO{gIVIXS6-rbPVk8Fy`Fb%g9>O^Ym6rD%;WjwboMWEZ&wh zny8yhYuoy}*3#*;ziKVLPKo&@Gnl)A9nI*PwU*Ib|EtzAMV4gNTCn|3&ZvsBSI2*v z`2XDJzwb}zX5Y`WZf1Sv$coFN&l;`bvPND>RK;b}DKXzuc5|0maXCz=I9Hw7UU52p zR9sGT)oRIQ+xKt9<<=Q(6{o`xS#kC+qZl*t*b2yNE8xG@T|S+~+mc@sb)N-nTYuMG zL7nzj-4)U)G2drlbC+4#P_qwMEk$%bzUV5d*Q0-bRZJ(EMLS0RvGAY1=qmniMrHoT z2fx4QDq%a-c=NQu6U;^A7hNT7XN|1IQnszX@2)f@|9+m7u@x0HG}y+&U|q8Qa;mJk zGb&o;VkoaX<(VTDLyeoQJpCP9WBq+sXpki^&{Dxnw0|Y1U!!c-tc$N`CjaqGOC`PO zZKrPBW|icN6p0DSviI`}*%!xfW&y`>ULf zt-#-}a*P9od$Z9MLbC}r zSFM(|wtfGvm+K79q6otu9}(pPOO)6ZMIF@9NE#)85-cOef=V3!eB&@q{BaW!U5TP{ zcEzZJv3HHV_kz7)$3CNj6)P6h!7ldadAHo|-6c_A%#Xac&%XO!Zui+|_uf75S)(`j zZFCgYu+ywMx0JZS?`Us5xT8(Ypo>xJc3j7*1Hb#(-5*MqvO0{$J-L!nNQ}dYQ5D>i z&jga5YMsC|7mkn^2^Wec=W`@*aA6jV-JKtW>sTGl2HD;DF@$knXlOTNuB)Xuppc*P znN82sc*RL;qnO9aZA5{?Y|nA1i1dE^cwV}l`1Gl{;7Le2FC{$p_IVudbNg&Aqok1rkKW9{5Uv|aN@R9C?+oHlPZ z?a0N=3#uwpaUSvF*nR7x#17M4}a{piv8 zxJpL#X(@%H`V2Alup8B9fu!fjbHE%qp3e#MLMF_MHjFn<7WiRa$_evwCd?}~4D$5$ zKXZK55A#}1nAbC5-mqc(v?Va6En&>8eUt6I+t5svSs90xX6NQ&n@=SZ@_@%@ye&2N6-9LXlD11*E))Z-wYX)mIOS@1r@ zb*w(Z?|v4%kLglYpU}8b|5QpLc|Ie?$?3?mm`HDupz%Z)K8LdwhA$*|Rv5kn4jO+Y z)xIz+A+mCW;cG9{iU`9uew-B$hHp)rA~nzEPKia-gNQN^arQ@?^bzN}cLE*F;u?@X zW=0n-gO7H`av=aOX%yHtUoG?FU!u(>xcuMfAA7)ujnBOvCez##f zCHTV+^Jh+&zf71U*YGzHc`Ch7H6?uHN3sxv0Wd}oinug_z}a8~VIa|1RY+x45LRJ( z5d^Xn(|1tH(F($<&XF91<7z2KD+q&~BS{9wQp&N|JgB*dBa|V|ku0);fYVWYlHS^; zhN3!Ft4m|PFbBt>;!>JQ$C6qDJ~-f9QyQ!#4Z>Z74Oxl0#?1q!f~^ ziWtb2yil)8B%>VO=&T0^trqJ`z^qzq;5;@=dDz#ijeuh^Y)nUAPBx)G{~UaeUYui+ zoJ)gkK;i#;>DKqHo}}$4V{449HMY*!dSe@m9c}CwW5*gh&e%p{#~VAr*opdOe^d62 z-B}N3zocI3HnV-@M1ON4eGT0RIN%L$3m7x8wqz5%(vmD@Vr_*>eCsK7{=CvJ`y=Tn z^NPb|-|!+?;tXH6mH^vGfRL|INYriVf^R)(+|zC+rI4uG69Y360jVY;dBrI+TW2(> z9Z-(=D-n`Wgm?%N4Z$=7;cD0>U|cN`s;$qsUefAm<`v9{+3)MEwdo(5cF*jA>w=qd zv2%B{yIyOd4#AJbt?jK|ww?9zxRyi>QVP3uwEasQiN*ktZcO%hV*$Cr8J82KF%xBc zHp&D*Q1ywdDdEqL)sAeY$wvYh^6f-4R+FUCXP?e5G|^Mo$DxA;Pfj^nRo~e;vb(JY zxSK>6n5qmVgX0@AR@i#h&ov{157x&CrWbU6AjlK z-3vfK(Y+ajVSS4B1gvnPxcn5=@&0RjOLd#+^eI2RtoGrvSpH4rbEHIUU;Av1fBO-U yE{npB+8-4VuW2xbKP}=a|H6rbWB79bF7Xwr)cN$$2Y`H!)AV7tTJh@ato{e9*BrP2 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_2.7.14.pickle new file mode 100644 index 0000000000000000000000000000000000000000..963e533c4d2b4bc670b33e10422c5d9b2ac658d0 GIT binary patch literal 132857 zcmc$^1$5j>%rI((F|)m8PMMh*(x$lOHcisBn>0{wlJb_BnVFfHnVFfHnc;aiX>WVs z|Gx9igUvaXN2Ae58c8!6rzhgG;UG6271uhllkGInAUD=JEHTo?BHD+=wGK9}}rY*C)l#j4aWt;N#Ow- zgxQJ*_c%hTK^`i^Lx*^jh`6NK_MMYW5}Op07!~IAI)XX;BI4r`61_ zK(DO_L;c)vxHG>I3(b_lGXV9X7d3JD3R@x%EA9>I{$ zu~J-uGt~J@?4KAu$Xy45_CLH#oRc9)>k{ww!`2Ko*ALfBo%q@hkvzG}pLby09lJ*O z;mkS>ulwPPTaR;HaA28)9$o!#7^WV+ASXIH$A!g4+OCkK{`Krso@AKXjYh>K+G&0+ z%OB1V9+nW9><209pY~tL9Gf|783C%tB^D`^LYdBIrw{Td9?kQYaWW(u$K%)|>Zd(2 z1_h)4v`2~spui7K1`v`0(G-ZKKs*H!DUeKoR0^b1U}AWR&Y9dqdq?2-)^=vkziX7- z4eA7_QZ&lqv$F?Z! zQ;d_*S$ix+RXha}DUeKoR0^b1AV`5s3S?6tmjZujo-4^srUaaw+h^wqa+6IW!@Q2z zcHW;$`}e?1ApmwhN3Z-oyFfs%6gB?Pub>-`^;^L%6wErRI#DLn3`SNNai z@lyf2qBBt?pIzCRXrLQUNDT8P+ExBcV!*EICX$`BUp>2;8;x|9qq@gI3cTNS9b^r^ zc)>xQDxPYd>Yf@&jzoR|lXG58Cs{^zN(>G6+oqPsuUd%DuKkBV|)KwHxyb-PmE)#Ai1R$cTkT z#YNc79F52njhZ{^dP;=+TwkeTQbJ;U>`(LiSGmQ1@&5-Yuv8p67>a|GsC( z2E%U5{A<5uBB_V1hD?Q}Dq)6@u_E3l6FrPjA9}GwQ$&gJEyFz5!s5pD% zf3f|Y3fQ9@=A(V~m;m!X^`ZR%7}_AoS*)=he;AJQ+2aFrzX#w1|58nKaxCmmw6Nb8 zh~xEGAoET7FCmgM-eiaA6rVlyADT`3n`ZSRW21g8_VoW^`a2b{XE@Af`s`T&=8lZB z-K^to&gn2T;Pv*LU^BtvJbo@8<5&0R<}>%d=J<^W*z=qz=KJgg{wds;KT+5V11@1N z3i9~hN(5s`{>RTbKtPG)1zP-HQ~X5(>?O_&OMUjTe^_YwpB5sjG$~x5a6x;;e=$w& z2lh&b;VPfK`lnUa1bO-gla7ppo9+%RHF#Qc*E*jpUtTYdJnpUk&A zD@Oa5js9s;+JERs=DOp*xc(vm_D+Z4E}y+Sg<*ipA19MNoats26Q^rX)|m7?Qc z=h4%DxLd7{{`%xhlIN(;KI;^T;qlH%?wrS&yFl@R#qt#`^4HY)K!E9apMBvUOfUYy zG@^ay#4aGz(cw~%8IYi&1NAbnUpS_C z>9b!sXZO&Aq;RLiv|qcq}q^Or%Jtu2dKDm*?cv&{(rCc(kQlOXu)f8x^z@QYEqg-Ou9v`8=@2AkoVVNv72pBEd0AAP;$kFt- zBG-!qWQ2o}fap<$oFlChS6=l0BzmAyWt}OVLhW~6*e_nu{_%s{l)tw17j=owWrG)Y zaQ*v-hqdq$&YLH#=cCU{`iJvC6g4c9jJx)}3E$7AObve%<&uh1w5)%G2n-Uk^ zRDRO89>>$Ysr_UvylMOx|0R$&ZD5@L9>`xpdDA&fpErE~`B#nT&ETj1t@`q2bP&$z zHu)yWn<+q_a+&nUT4wh5I4&&&{Nm?*kT**J{lB^r>I6Y2(jF@RPJ*DcD_e~8!`Sk# zulwQF)BSV5cOcU`RX0EUXk2Rh!GTp5#jf|m@g)%aqXX|98DHNI3$@5O+Yd8HkA{76 zVEfs=tA4nnpq%rw1NnXPJNsdah9wVvmVRCzc(XdfCWo~*oAc^8UXUN7d47ehH+w)I zf8<8{qjiw`|AnxO{+F=JkuopR?`NnuP6Yi+Oy&Fs;U6)T%jr&zsoc(6vS;M+4+-25 z20SCL)AV`s1(3gaMt(p2@19Y>LHy+z1q1Z|<{5?jJps>%_Itx1_y7M3UD%N$Wuc2W zuTJ79>VW9vgi*|S4T|=sifDiIL?>s7;?AqjTO!cs&kfOkB!ZHDj6VTH`yC`X2Soop z?)`Tl;fV<;5njqq?T`8BCY2vN}fpOT*1%tzjn<(_DgTYWNwxGd|G%b2iX1lbzk>rI~2*zT+=_`FK4dhG=1KX0P;6yuI;D)-I?n+h`*eWdA0)oQ9Z z6@MKglJgb#V>|FKkl#-5&wPdXueJZS|2O#ukv~@j_Wq_$my@UdkiFUeN3u6hndi@3 z>%V^goBS>QLFiY~e{Ubqq2<5m5SlWC_-X%q4)t%p|JnZkXS=sb;s2N2{mlO+ci2CO zl9h8#G~xdub3_WcWC_P@X{R&kPmy7aM<8a?($<0|L3Ge3&z3;FFTKorC+uCdNPH z&rcoRSU)b=_fy!!{bW<&FE;VNvuXb?Z0vuq@&07vc`M@n6^^{ew-1pKL1r z#ipZ!i}ZOrIUr>l?Cc+xybbzuf66x4#gFxQy9PRc-v^ygcdXsbPZ`(;{fQ>Y=k@)K z&?y`Ogx!A;`lH$*?D023G#CpI_WVWY4;_cF*WU<{U^GD3`xl|#Uy~yJ$MC@Z>Zbjr zV)FLa$LVr&p{*m^ggIY7L)%5gwf6S)M zal)M{dy3OaX>{>U{n=;}AMc&^vtcK7@=kXefg?k-Uo7Vo7(+V^aJH~tGI*nxhel`94=lL@Gf>5Ka2Cgg^5EK**4z!2ptv!oT&Zv?p@+g zI!C^x0UpZ&Jc!VU_}0#fEO%Oe?8z(q-NA4zFz*}f2MAbKEyGyv)YK0 z-!TY>2#ACzh=u^fIOPWi@sI$CkOaw)0;!M&>5u`LkOjG*R8VRt4U`s22c?HHKpCM- zP-Z9#loiSb1wq-N98gXu7nB>y1LcMCLHVHqP(i2=R2V7(6@`jH#i0^VNvIT58Y%;o zg~~zYp$bq%s1j5essdGoszKGE8jicygla({P;ICVR2QlT)rT5DZm1#D2x<&9fto_i zpyp5us3jB%wSvN+a3}(54Mjq2ptg_)ih`n{c2Eoy3&lb4PI!v(d{B3&2hf+ zp>fc7XaY15ngmUTra)7nY0z|N1~e0z1@!DXalqn+5~Newm@5xoA=mK;Rx&&Q@u0U6zYtVJ*26Pj;1>J`3KzE^g(0%9u z^bmRkJ%*k@PoZbfbLa*15_$!_hTcGLp?A=G=mYc-`UHK3zCd50Z_sz>hd(pG2#mrQ zjKc&>!W2xy0A^qo=3pKcU=fyJ8CGBw)?ghrU=y}r7n}-C4X1(A!s+1ja0WOdoC(eh zXMwZA+29~JJDdZ~3Fm@y!+GGma6ULcTmUWz7lI4JMc|@vF}OHf0xk)cf=k0?;IeQz zxIA0|t_W9xE5lXbs&F;9I$XoqaB9M};1IYrTnDZT*MsZB4V*7I4dF&`W4H<26mAAL zhg-lc;ZV3090rHO5pZib5^e*xg*|W-91XXFW8hdg4vvT0!#3=N6W~NR3GM)Qgge2V z;Vy7jxEt()yTd);o^UU?H{1vA3-^Qj!vo-f@E~|FJOmyJ4}*upBjAznD0nnH1|AEK zgU7=Y;EC`gcrrW%o(fNcr^7SgneZ%lHarKO3(te+!wcYr@FI9IyaZkfFN2rEE8vyz zDtI-#23`xVgV)0w;EnJmcr&~O-U@Gnx5GQ&o$xMrH@pYl3-5#X!w2Al@FDmxd;~rU zAA^s>C*YItDfl#e20ja)gU`bk;EV7j_%eJ2z6xK1ufsRsoA538Hhc%Z3*Uq9!w=wx z@FVy!`~-dqKZBpcFW{H(EBH1127U{_gWtm+;E(Vp_%r+k{tADCzr#PA;CFTg1Vu0e zM+k&OD1=4;!XPZdAv_`=A|fF&q97`wAv$6pCSoBjBo&exNrR+C(jn=Q3`j;K6OtLp zf@DRqAwfuXBnOfc$%W)b@*sJUd`Nzz08$VsgcL@KAVra4NO7bDQW7bJlt#)RWs!17 zd87hT5vhb!MyeoHk!nbFqy`d<)I@3_AxLec4pJAXhtx+JAa0}~(gddX^li8ZIHHz2Z=(Wk#4o%0`XGIgen@|005T96gbYT8AVZO1$Z%u?G7=euj7G*FW07&l zcw_=H5t)QcMy4QBk!i?uWCk)5nT5~0CEsHgd9eWAV-m7 z$Z_NZauPX(oJP(dXOVNrdE^3e5xIn1My?=Nk!#3xx%LUW^e(7b3qG(TDZEr=FE3!_EQqG&O+I9dWNiIzf3qh-*tXgRbz zS^=$yRzfSIRnV$vHMBZf0}V!NqP5Tvv^H7?t&7$}>!S@&H`)+wgf>Q-piR+cXmhj$ z+7b;#TcKfSI2wVrMkCQSXj{~SMxoJYJ2VE3MdQ$Tv^{F0UNiwsM3c}CXh*aY+8OPF zc163PKD0a91MP|SLVKfq(7tFtv_Cok9f%G>2ctvKq3AGlI649yiH<@?qhrvq=s0vd zIsu)CPC_T6Q_!jCG;}&T1D%P^LT96M(7EV5bUwNOU5GA17o$tirRXwrIl2N}iLOFd zqifK$=sI*gx&hsYZbCPsThOiOHgr3>1Ko-4LU*Hk(7otBbU%6kJ%}Dc52HuWqv$d8 zIC=s-iJn4Fqi4{w=sEN}dI7zNUP3RUSJ12IHS{`q1HFmfLT{sY(7Wh8^gj9keTY6n zAEQsur|2{EIr;*9iM~Q#qi@i+=sWa1`T_ljenLN^U(m1UH}pID!`b*@48c$g!*Gni zNQ}a03}6h#VjRX}0w!V-CSwYwVj8An24-Rw=E728sj)OzS}Yxw9?O7b#4=%-u`F0t zEE^VtWyf-0Ik8+=ZY&R$7t4p`#|mHtu|im3tO!;VD~1)vN?;|iQdnuM3|1B^hn2@F zU=^`SSY@mVRu!v;RmW;z!B|bK78Zil#_C{ov3gj2tO4f68e)yG##j@qDb@^Yj$g27wd=h#|B^nu|e2iYzQ_K8-@+XMqnecQP^l~3^o=UhmFT3 zU=y)P*ko)9HWizOO~+mVb*k)`CwiVlkZO3+CJF#8ZZfp;>7u$#J#|~fzu|wEl>eRU>C7V*k$Yrb``sZUB_-U>~th*k|ku_7(eveaC)a5Dw!Aj^Y@O;{;CP z6i(v+XK)tha2^+M5tncoS8x^Aa2+>r6Sr^|o(fNmr@_G1S;20SC43D1mY!L#Do z@E|-po&(Q`=fZR2dGNe=K0H5O056Ca!VBX?@S=Dzyf|J0FNv4JOXFqmvUoYXJYE5> zh*!cZ<5lpgcs0B_UIP!tYvQ%=5WF^C2d|6Q!|US>a5vr%Z-h6-o8V3HW_WYF1>O=5 z#arQFcsL$`x5gv!Hh5dygGb@fcso1>kHzEgc)UGs<6b-gPsEe(4tPhr6W$r`f_KHc z;Xb@O-UIK6_riPQeek|`KfFIa03V1C!Uy9+@S*rHd^kP=ABm5`N8@AgvG_QAJU#)R zh)=>N<5Tdd_%wVvJ_DbL&%$TpbMU$NJbXUB0AGkN!WZL9@TK@Nd^x@XUx}~6SL18& zwfH)GJ-z|oh;PC-<6H2p_%?hyz60Nh@4|QEd+@#ZK72oZ06&Ny!Vlv|@T2%K{5XCB zKZ&2hPvd9sv-mmuJbnSch+o1l<5%#j_%-}GegnUW-@cksLTJ^ViY0Dp)-!XM*L z@Td4Q{5k#te~G`sU*m7^xA;5!J^lgzh=0OA<6rQv_&5AJ{sV^ym_P`WzzCcm2$G-( zng9etumnf&gg}UdM973fsDwu7gh7~uMYxDmL~0@pk(NkDq$e^E8Hr3pW+DrbmB>Z} z5!s0xL{1_Xk(3PeSs z5>c6`LR2NH5!HzrL@-g4s6~VjwTU`JU7{XQpJ+h1iH1ZYqA}5gXi79AniDOEmP9Dg ziU=dZi3p-K5lOTm+7ccjiijrK5ivw85l6%m?FpOk5(z{ikwkPLIuf0T&O{fYE76Vc z5#5O%L{Fj@(VOT)^d5C~C592hi4nv|ViYl&7(wAC6*D(i50|3VimEPSVOEO z))DK84a7!b6S0}tLTn|r5!;C!#7<%tv76XK>?QUQ`-ua@LE;c`m^eZlC5{oti4(+0 z;uLY3I76Hz&JpK{3&cg@5^^$kJpPvMgDSEKgP-E0UGS z%48L?Dp`%JPSzlU$(m#>GK8#6)*9$tGk|vKiT&Y(cgpL&;WT z7#U7Rkgdr`vJKgm^pH_xG}(@fA!ErnGM;Qt+N76EAQQ_m1ZyO3SUZlsUw zPWB*slD){@WFN9G*^lf`4j>1TgUG?;5OOFvj2upmAV-p;$kF5&ax6KH98XRlCz6xM z$>bDrDmjguPR<}_lC#L!~5^^cIj9gBxAXk#B$kpT;axJ-z zTu*KwH@-6v}d{2HLKa!uw&*T^KEBTH5PW~Vv3Z@VWr7#Mo2#TaAilzX? zP%On!JS9*fB~db^P%5QSI%QBMWl=6F6_uJwL#3tCQR%4+R7NTjm6^&yWu>xFK~#1s z2bGh`MdhaQPGh-yqVp_)?7sOD4)swEXlwW7kP za4LdoO+`{|sJ4`cilU;ac2o=%OT|&~RC~&%yi@{}NF`AnsE$-8sx#Gv>PmH^d{lR; z2i246MfIloP<^R>RDWs!HIN!a4W@=rL#biZaB2iKk{U&grp8cXsd3bJY63NpnnX>e zrchIP;;qy)O>0IwUAmwEvA-GOQ~hla%u&&l3GQrrq)nvsddzP zY6G>A+C*)pwoqHCZPa#Z2ep&hMeU~cPH>926dCVMctI3zW`b2%EzEEGOZ`60{2L;hEjnF8K(Kt=eBu&vY4QPgD zX^!S;ffi|rmT84nX^qxtgEnc4cG0Qm)N~p;EuD@|PiLSr(wXSYbQU@*osABnv(q`~ zoOCWaH=T#hOXs8W(*@{)bRoJhU4$-57o&^QCFqiLDY`UWhAvB&qs!A3=!$eDx-wmb zu1Z&b{({<>&bUnI0-GFw}4e3U7W4a05lx{{hr(4i1=}@{A9Y%-K z5p-)hl5Rt{r9E^M9Zk2RW9V2qj*h3>(>Cp;6X--biS9smq&v}_=`M6vx*P4IyVE`B zo^&s|H{FNsOZTJu(*x*%^dNdLJ%k=g52J_EBj}OzD0(zKh8|0gqsP+|=!x_sdNMtQ zo=Q)nr_(d≠4rHa&-)OV6X{(+lW@^dfpOy@Xy$FQb>!E9jNh zhCWN5qtDY9=!^6v`Z9fmzDi%CuhTc^oAfREHhqV_OW&jK(+}u}^dtH){e*r>Kck=1 zFX)%_EBZD4hJH)Gqu<5 zXa{0IEQkZ~!1?z*-~|aF5hQ^Qpd;u6I)g5tE9eG%pgZURdV*e{H|PWUf_|Vs7yt%> zL0~W#0)~QNU^o~7MuJgbG#CTMf^lFxm;fe%NnkRV0;Ym#U^NPHFyKwf_LCO_y9hFPvA570=|N8;5+yM zAO>a-24ye?X9$L5D28SL!!RtvF+3wMA|o*}qcAF?F*;)~CSx%!CKZ#KNyDUN(lP0o z3`|BQ6O)WtnnJd8PtWk*UN~W~wk%nQBaRrUnzt)MRQgAxv$i4pWz@$JA#UFm9$H(}-!z zG+~-D&6ws)3#KI#%Cus_m~bY7Y0X43ZJ4%BaPB`Y?T&eoTL605gyo#0+MJFhiMP%y4D|Gm;s_ zjAq6#W0`TxcxD1Kk(tCyW~MMxnQ6>)W(G5pnZ?Xz<}h=adCYuf0ke=<#4Ki(FiV+b z%yMQ0vyxfGtY+3QYngS-dS(N&k=ev-X0|X}nQhE=W(TvA*~RQ;_Aq;yeawF50CSKz z#2jXhFh`kV%yH%fbCNm5oMz52XPI-%dFBFhk-5ZNX09+-nQP2-<_2?`YX1*|AnQzQ@<_80@ zFpID#i?KLMup~>dGz(aUWm%5pS%DQQ7*wkzqHZ7ZuP0wav zGqRc3%xo4mE1QiCVzaY3*qm%GHaDAx&CBLv^Rormf@~qSFk6Hz$`)gbvnAM)Y$>)h zTZS#mmSfAa71)YwCAKnKg{{g~W2>_@*kHCMTZ;{0YqNFOx@?n3L zJBA(0j$_BO6WEFDBz7`8g`LVyW2dt-*qQ7sb~ZbQoy*Q+=d%mgh3q1BF}s9a$}VG< zvn$w@>?(FOyM|rMu4C7;8`zEPCU!Hsh26?-W4E(A*q!Vyb~n3+-OKJ{_p=AsgX|&p zFnfeO${u5nvnSY->?!s%dxkyBo@39m7ubvJCH69Vg}usNW3RI}*qiJv_BMNmz02NX z@3RlshwLNvG5dsl%06SCvoF|}>?`&)`-XkXzGL6BAJ~uVC-yV@h5gEYW52UMScro; zghM%u!#RQ@If|n>z%d-laU9PHoXAO>%qg78X`Id(oXJ_7i%Z3&=F)I!xpZ85E(4d5 z%fw~ovT#|sY+MkRoy)=HT&hC2ArE~$Ti{`b4|FW zTr;jY*Me)wg>tR9FfN>n;97H$TpO+}=i#EbXs#U>!^LuOTs+sFvpFx9z$J1?TnDZr z*NN-Qb>X^l-8diDo$JB%N*8^8_Z262PAA>2@I7&n|7!Hwibaih60 z+*ocLH=dioP2?tVlesC}RBjqKotweU}4snOM zBivE$7bzUF0rtm$@t4Rqh&hox8!^84j-r`++Dn2!zhEL0<6oe zJ~N+%&&p@xgZS)x4n8NJi_gvH;q&tO`22hUz93(SFU%L=i}JZ{fG{+xYGL4t^)Ui{H)f;rH_U`2G9={vdydKg=KD zkMhU(IfYz8ZXu75SI8&i7YYakg+fAMp@>jaC?*saN(d!|QbK8=j8Il6 zCzKZ|2o;4&LS>F@3ZX(PAxsDtB81jLq|ioaD|mz`AzElB#0arMoDeUx7i_^RBnXK@lF&iuD0C7! z3tfb+LN~!DbQgLEJ%wIEZ=sLSSLi477X}Ceg+an#VTdqP7$yuCMhGK?QNn0pj4)Oh zCyW;+2or@#!en8JFjbf)Oc!PdGlf~gY+;TtSC}Wv7ZwN$g+;<*VTrI*SSBnNRtPJF zRl;gvjj&c&C#)AX2pfe>!e(KMuvOS5Y!`M2JB3}sZefqGSJ)@)7Y+yqg+sz&;fQcl zI3^qyP6#K3Q^INCjBr*sC!7~92p5G*!e!x#a84KQkO+&2h>Dnq zi-bsult_y}WJFfvL|zm`QItekR76$OL|rsQQ?x{vm`Y47rV-PM>BRJ61~H?UNz5!} z5wnWf#2_)dm_y7d<`Q#@dBnV8J~6*oKrAR05(|q(#G+y`vA9@5EGd=}ON(X1vSK;0 zyjVf3C{_|Hi&ey`Vl}b4SVIgJYl^kR5V5vcN31K>6YGl&M7P*bY$P@on}|)tW@2-( zh1gOI6`vEn#! zyf{IeC{7Y5i&Mm@;xuu(I76H%&Jt&fbHusgJaN9bKwKy;5*Le0#HHdgak;ocTq&*+ zSBq=Jwc7v*J1Nym&#pC|(jTi&w;};x+NQctgA?-V$$%cf`BmJ@LNyKzt}Z z5+93C#HZpj@wxayd?~&XUyEN!g_wQcfwClv~Op<(2YD`K1Cl2lo$B2|^DN!6tqQm|B0swIU;wWT^zU8$Z_Uuqz^rG`=?sj<{VYAQ98noBLD zmQtwHN(z(0r3k6D6e+cl+DaZNN{W`+NikBa6eqMN!_I$QctOu)LZH!^_BWb{iOlYKxvRPSQ;V?m4->fr4iCdX_Pct8Y7LB#!2I) z3DQJqk~CSGB2AU1Nz6CO@IwPHx&PnH`3(`gDl5|bX&S3-IeZ1_oWBYL+O$9 zSb8Eom7Yn@r5Dmm>6P?adLzA+-bwGJ57I~Jlk{2oB7K#&v8 zHaSSnF6WSQ%DLp+avnLaoKMa#7my3eh2+9=5xJ;bOfD{$kW0#?e!g$gSl_xsBXb_Q+9kwA@aPkz?gJIbLor+p?av!;`+)wT=50D4SgXF>T5P7IPOdc+ekVnd+%CqFz@*H`tJWrl4FOV0?i{!=f5_ze-OkOUpkXOp9D5|0~-4m2ygXrGipX zsiag^swh>JYD#sbh7zpQRB9<9N^PZ%Qdg;`)K?lPZl$5pNNKDzQJN~vl;%ncrKJ+8 zv{J&9a3w-%twbtql(vdTiBh7Kc1nyAtHdesN_)jtyh?(Ss3a*Jl#WU#rL)pS>8f;7 zd`fqvhtgB&rSw+%D1DWFN`Ga5GEf<$3|59HLzQ95aAkxtQW>R;R>mk}m2t{=Wr8wM znWRisrYKXDY07kEhB8x`rOa05D07v0%6w&kvQSy1ELN5%OO<8Ha%F|GQdy;}R@NwM zm37K`WrMO&*`#b%wkTVbZOV3Khq6=IrR-MrD0`KC%6{d5a!@&>99E7fN0npBapi<^ zQaPoZR?aAAm2=8@<$`iixujfHt|(WPYsz)yhH_K6rQBBTD0h{6%6;X5@=$rCJXW44 zPnBoNbLEBdQhBAkR^BLYm3PW}<%9B3`J{YSz9?UnZ_0P&hXSdvim0fHskln0q)Mr@ z3RFgARZitqK^0X=l~qMmRZZ1ZLp4=Pb*ZV;)M^?vt(s0vuVzp)s+rWxY8ExCnoSK- zv#UAOoN6vLx0*-ItL9Vls|D18Y9Y0-T0||X7E_C>CDf8?DYdj(MlGwBQ_HIr)QV~) zwX#}8t*Ta2tE)BCV6~=NOAS$Lt98`6YCW~S+CX)y4b?_!W3`FeRBfgL7KnIz%0+4pWD#Bh-=VD0Q?tMjfk;Q^%_l)QRdO zb+S4|ovKb#r>is6nd&TcwmL_htIkvBs|(bH>LPWqxoAQE7XLK;8dPF^{9#fC2C)AVb zDfP5^Mm?*ZQ_rgx)QjpR^|E?Jy{cYQud6rIo9Zp~wt7dstKL)Zs}IzN>Lc~B`b2%I zK2x8oFVvUnEA_SdMt!TkQ{Sr})Q{>X^|Sg#{i=RbzpFn~NP{&*Lp4mpH9{jbN~1NP zF&e9J8m|eOs7acvDVnNjnywj|sacv!OQogO(r9V5bXs~XgO*Xtq-EB!Xj!#vT9B4q z%c14ea%s7>JX&5YpO#-MpcT{#X@#{ST2ZZ-R$MEgmDEaUrL{6zS*@H_UaO#0)GBF} zwJKUwt(sO{tDyyJHMLq=h*n#xqt(^wY4x=Rnp!J13dTG72K3ZR`pVnU+pbgXpX@j*P+E8tnHe4H_jnqbIqqQ;GSZ$m(UYnpz)Fx?@ zwJF+EZJIV+o1x9rW@)pvIoe!po;F`wpe@uEX^XWb+EQ(qwp?4Gt<+X&tF<-ST5X-S zUfZB;)HZ3GwJq9KZJV}T+oA2$c4@n{J=$JvpSE8+pdHi>X@|8V+EMM8c3eB5ozzZg zr?oTMS?!#5Ub~=O)Gle4wJX|H?V5I7yP@6GZfUo*JKA0Co_1e*pgq(cX^*uh+EeYB z_FQ|Rz0_W5ueCSYTkW0pUi+YZ)IMpSwJ+LN?VI*p`=LQPtRp(AV>+%AI;m4StplCW zS)J2)UC>2c(q&!IRbA6{-Ox?l(p`EgJ++=jPphZX)9V@ZjCv+Lvz|rIs%O)K^z3>L zJ*S>a&#mXt^XmEZ{CWYspk7EXtQXOX>c#ZpdI`OxUP>>mm(k1W<@EA;1-+tPNw2I| z(W~m!^y+#IJy@@)*V04u+Ik(mu3k^CuQ$-$dPBXD-dJykJIDz_PVWm^#napPtrT+9raFnXT6KwRqv+z^zM2O zy{Fzw@2&UI`|ADl{`vrYpgu?+tPjzL>cjNm`UribK1v_0kI~2KdW-y`U-uezDi%MuhG})>-6>d z27RNxN#Cq*(YNZ`^zHf%eW$)l->vV__v-uf{rUm@pnga{tRK;j>c{ls`U(A{eo8;B zpV80i=k)XX1^uFaNx!UL(XZ;)^y~T!{ic3PzpdZV@9OvT`}za@q5epJtUuA8>d*A& z`V0M~{z`wXztP|7@AUWj2mPb|N&l>W(ZA~7^zZr)9Wr18F;D|DaDy;NgED9X7>vOh zoWUD{AsUh)8;YSCnxPwpVH%dKJv6dPaStf#Eh98jXy`MiZl{(adOWv@lv4p++ks%m_Ck zjMhe^(Z*P!b(ZT3wbTT>{U5u_qH^XOiH+mR7 zjb27?qmR+o=x6jd1{ed4LB?QXh%wX{W(+q*7$c2Q#%N=VG1eGoj5j726OBp6WMhgk z)tF{XH)a?!jakNQV~#P`m}ks478nbSMaE)diLumJW-K>W7%Poc#%g1YvDR2;tT#3o z8;woIW@C%7)!1fiH+C31ja|lWV~?@d*k|lF4j2cGL&jm_h;h_7W*j$87$=QW#%be> zan?9zoHs5Q7mZ8CW#fu*)wpI{H*Od=ja$ZTcvzpn=ATzs}!^~;s zGIN`G%)DkkGrw8DENB)o3!6pEqGmC(xLLw1X_hien`O+hW;wIGS;4GmRx&G_Rm`ep zHM6=|!wfcSnzhUjv$k2stZUXY>zfTsx7pBaWHvUNm`%-QW^=QJ+0qO(TbW^IxEW!# zHY3e8W?R!^Mw!uOJ2S?NHRH^9v%P7XUNgZ=G?UB@W=FG=+1c!3b~U@1KC`>o!|ZAH zGJBhS%)Vwnv%fjO9B2+Q2b)98q2@4ixH-ZcX^t{Sn`6we<~Vb_Il-K0PBJH(Q_QL6 zG;_K+!<=c(GH07}%(><~bH2I2Txc#b7n@7WrRFkoxw*nzX|6I?n`_Lq<~nn|xxw6M zZZbEUTg`p8lvUa)W0keaS>>$?Rz<6lRoSXyRkf;F z)vX#`XR;bm=3bVqk2&=Uf zX|=K1S{^IPiniKWF;=Vvq<)yUNWI$|BQj#tq0aa>yh=?dSX4bo>|YW7uHMb zmG#->n}OG+(o!Z7v-W|z{R*&7w6(# zf=hHsF4?8HRF~$`U53kaSuU3=l`FL?jVrAyoh!X7gDayelPj|;iz}-un=8nb-Ic?Y z)0NAW+m*+a*Okwe-&Md>&{fD)*j2<;)K$z?+*QI=(pAb;+EvC?)>Y0`-c`X>(N)P+ z*;U0=)m6<^-BrUC?5gRi<@&!^yQg4FmM&bwZQEVSs;tUfZQHhO+qS*hwr$(CZQC|Z z@BP2~=G>k$Zk`-9Bj!~_jhZndzEQ>~Ym_s}8x@R-MkS-NQN^fgR5Pj@HH?}@Eu*$k z$Ea)6GwK@+jD|)dqp{J%XlgVwnj0;QmPRY1wb90CYqT@k8y$>}Mkk}Q(Z%R$bThge zJ&c}4FQd27$LMSHGx{3?jDf}=W3VyA7-|eNLXF|Z2xFu%${1~oF~%C>jPb?BbCWrZLNyZOk#|8uN_##sXuZvB+3#EHRcE%Z%m53S*_Q%2;izG1eOE zjP=F_W23Rj*lcVuwi?@v?Zyscr?Jb}ZR|1j8vBg>#sTA?amYAq95Id>$Bg5~3FD-3 z$~bMDG0qz2jPu3?&6Y^rg6)-ZQL>L8uyI*#slM_@yK{=JTaac z&y45B3*)8n%6M(OG2R;QjQ7R|D(_-uSJz8c?*@5T?~r}4}9ZTvC9m|@LuW_UA# z8PSYnMmD3EQO#&(bTft-(~M=tHshFa&3I;fGl7}VOk^fDlbA_O!!%9Hv`xnprfYhp zG<`ELm8s1TGntv(Okt)pQ<zzsncd7`<}`Dexy?Lg zUNfJW-z;DjGz*!9%_3$|vzS@jEMb;3OPQt3GGvGAS>3E* z)--FGwaq$aU9+B9-)vwuG#i(V0JV+nVrop zW>>SD+1>16_B4B$z0E#mU$dXt-yC2LGzXc3%^~JcbC?-w4mU@bBh69fXmgA?)*NS! zHz$}A%}M5DbBa0DoMuipXP7h1S>|kWjyczyXU;blm}XYMx-m

6=3(=QdDJ{+ z9yd>zC(TplY4ePE);wpPH!qkM%}eHG^NM-Zyk=fEZP~j~T`a zYlXAITM?{?RwOI36~&5bMYEz?F|3$YEGxDZ$BJvkv*KF`tb|q~E3uWtN@^LFX<3$S zIhL?o%d@2ATY;r4ZG~9LtmIY-E2WjnN^PaF(pu@P^i~Edqm{|ZY-O>sTG_1ZRt_tt zmCMR)<+1Ww`KT__qE*SNY*n$U zTGg!TRt>ABRm-Yv)v@YY^{o0<1FNCc$ZBjgv6@=Vtmak=tEJV-YHhW#+FI?b_Erb0 zqt(gkY<01^THUPfRu8ME)ywK_^|AU|{jC1h0BfK%$Qo=7v4&d1tWayXHNqNcjj~2t zW2~{(IBUE$!J24IvL;(otf|&CYq~YVnrY3lW?OTtxz;>uzO}$wXf3i9TT85^)-r3k zwZd9yt+G~IYpk`_I%~bP!P;nTvNl^=tgY5IYrD0>+G*{wc3XR_z1BW!zjeSmXdSW+ zTSu&;)-mh2b;3Gnow80_XRNc$-Krx@q0AZd-S(yVgDH zzV*O*Xg#tXTTiT~)-&t5^}>2-y|P|gZ>+c0JL|pm!TM-@vOZg1tgqHL>$~;C`f2^L zep`R6Fm_lwoE_edU`Mng*^%uic2ql>9o>#$$FyVFvF$i^Tsxi}-%emBv=iBh?Idw^P_D?NoMZJB^*zPG_gLGuRpJOm=2Fi=EZZ zW@oo^*g5T7c5XY5o!8E1=eG;k1?@t1VY`T3)GlThw@cV1?NWAWyNq4dE@zjwE7%q7 zN_J(tie1&NW>>ds*fs51c5S*S8zk4edsDW4npn)NW=sw_DgP?N)YcyN%t} zZfCc*JJ=oVPIhOzi`~`kW_P!H*gfrDc5l0n-Pi7C_qPYw1MNZfV0(x?)E;Js+QaP; z_DFk_J=z{)kG03y+KEpMthUJ+1_GrwYS;Z?H%?`dzZc2-ed2z_u2dH z1NK4tkbT%bVjs1S*~je@_DTDcecC=_pS91~=j{vjMf;L{*}h_5wXfON?Hl$@`<8v% zzGL6D@7ee52lhkzk^R_yVn4N?+0X44_DlPf{n~zGzqQ}l@9huvNBfig+5TdGwZGZl z?H~3}`_l;*I?*^&JB}k9*YO`=-GAFr{!b$0*a#A~KoU~3lC%u!w$>?Nq zGCNtEtWGv3yOYDo>Ev>9J9(VEPCh5UQ@|Lic>C|#+J9V78PCci-)4*xyG;$g{O`N7qGpD)J!fENW za#}lWoVHFor@hm`>F9KFIy+sQu1+_nyVJwz>GX1XJAItKPCuu=Gr$?>3~~lLL!6<` zFelU*?u>9oI-{J?&KPH`GtL?BOmHSTlbp%U6lbb4&6)1ZaArEQoY~GCXRb5PneQxc z7CMWZ#m*9Esk6*k?yPWDI;))3&KhT}v(8!XY;ZO@o1D$g7H6xo&DrkkaCSPooZZeI zXRou*+3y^14myXN!_E=ssB_FY?woK=I;Wh|&Kc*dbIv*MTyQQrmz>Ma73ZpR&AIN} zaBe!coZHSF=dN?lx$iu19y*Vl$IcVysq@Tv?!0hbI~h$^Cq=pu%QDPoD(B94eF;)(bo zfk-G4iNqp_NGc3r3QO3+5kk1a6H@ph5K3qfB9e*ZB85mPQi;?ejYuofiS#0a$S5+2 z%p!}(Dzb^}B8SK+a*5m`kH{DyoUJrqJ?NFT8Y-8jc6;{ ziT0v{=qNgg&Z3LxD!Pg8qKD`ydWqhmkLWArBr#b`5mUu9F57KgBQcTl^7W+^}vqH@q9c zjp#;lBfC-DsBSbjx*Nld>Be$nyK&sOZag=>o4`%zCUO(IN!+Ba;hL`H+OFdY*L6Kt zy1pB@%GGX&o6Jq_rf^fbsod0V8aJ(*&Q0%Ta5K7@+{|tkH>;b?&F zuba=!?-p2B7x{ch%ZWFhu+stk5ws2dzt=!gb8@H|7&Ta2@a67u4+|F(n zx2xOD?e6w)d%C^c-fkbauiMYyC5B zyA#}r?j(1zJH?&qPIITbGu)Z(EO)j$$DQlWbLYDY+=cEUcd@&~UFt4#m%A(6mF_Bc zwY$b$>#lRxyBpk%?k0D$yT#q=ZgaQ0JKUY_E_b)P$KC7hbN9Ok+=K2R_pp1!J?b8F zkGm(_lkO?^w0p)q>z;GZyBFMx?j`rKd&Rx#UURRzH{6@>%Mc}yC2+-?kD%N`^Ek0esjOOKir@0FZZ|m#|`6! z^}>1Ky$D`JFOnD8i{eG~qIuE17+y>-mKWQLUed%dR~36f!EM$4E2uZP#u>*e+K`gnc4eqMiXfH%+^eb-#g$P^bUE4 zy(8XH@0fSoJK>%5PI;%jGu~P6oOj;4;9c}Cd6&H_-c|3Kcip?;-SlpGx4k>wUGJWE z-+SOa^d5PSy(ivN@0s`9d*QwGUU{#*H{M(Co%i1R;C=Kyd7r&6-dFFN_uc#9{q%l$ zzr8l!3Yk)-lBs1HnO3Hg>176)QD%~vWfqxLW|P@v4w+Nt zlDTCbnOEkM`DFoFP!^JfWf56a7L&zg30YE>lBH!CSyq;lwxm+Pv%2jf;TqD=Yb#lGj zAUDcQaqz@~}K2kIG~6xI7_G%2V>RJR{G_ zbMm~rATP>G^0K@lugYuky1XH8%3Jcbyd&?*d-A?~ARo#{^09m(pUP+QxqKmC%2)EW zd?Vk=ck;dbAV11a^0WLRzshg&yZj-4%3t!g{3FBoVf}D^ct3(4(U0Uu_M`Yw{b+u4 zKZYOEkLAbquea9ER>wCWReLwJ(ul*1|nV;NG z;ivRd`KkRhep)}BpWe^lXY@1qnf)w&RzI7c-Ou6Y^mF;S{XBkNKcAo9FW?vS3;Bip zB7RZ7m|xs4;g|GF`KA3bep$bqU*50aSM)3SmHjGyRlk~F-LK)-^lSOG{W^YKzn)*; zZ{Rod8~Kg>CVo@Dncv)R;kWc#`K|pnep|nt-`?-wcl0~?o&7F;SHGLz-S6S|^n3Ze z{XTwQzn|aVAK(x42l<2jA^uQ*m>=p7_ec06{ZamCe~drYALozvC-@WnN&aMiia*t# z=1=!$_%r=k{%n7aKi8k<&-WMj3;jj@VtkSWL3GxQ{g8V^& zpkPoaC>#_CiU!4k;z5a^WKb$79h3>m2IYeCL4}}VP${S!R0*mE)q?6lji6>wE2tgR z3F-#*g8D&&pkdG`XdE;Nng-2+=0S^~WzZ^U9kdDB2JM3OL5HAY&?)E~bP2i!-Gc5x zkDzDJE9f2c3Hk>8g8spPU|=vP7#s`5h7 zCI?f3sll{hdN3oH8O#c12XlhC!MtF8upn3%ED9C}OM<1rvS4|zB3K!$3RVYeg0;cA zV12M5*cfaIHV0dRt--cnd$1$e8SDyn2YZ6O!M zqKc#3s-!BbDypigrmCwNs-~)?YO6Y`uBxZ% zs|Ko}YNQ&gCaS4wrkbl3s-ZCfWE~=~Qrn;*hs;BCudaFLF zuj;4zs{v}D8l(oRA!?`^rb5+lHA0P4qts|MMvYbD)Oa;PO;nTAWHm)iRnydTHABr* zv(#)gN6l69)O@u-EmVutVzopqRm;?JwL+~_tJG?>My*xr)Oxi+ZB(1oX0=6aRom2d zwL|SxyVP#AN9|Sn)P8k99aM+ZVRb|uRmaqEbwZs~r_^b6Mx9mX)OmG5T~wFUWpzbe zRoB#Ybwk}$x72NQN8MHT)P40pJyeg>WA#KmRnOFO^+LT=uheVxM!i+<)O+;+ zXZ1yWRo~Qi^+WwsztnH_M}^U0bvPYfN6-;JpgJ6dR0ds=E=2U=;ZLv%8oT&K_}bt;`&r_pJ3I-Oo;&>3|m zompqmS#>s@UFXm_buOJ-=h1m}KAm3|&;@lNU04^r#O?5NfT({6Ibt~Okx6y5N zJKbJ)&>eLr-C1|hU3E9zUH8yEbuZmp_tAZIKiyvs&;#`#Jy;LXL-jBns)y?ldZZqu zN9!?qtRAPw>j`?Io}?%1DSE1&rl;!}dZwPGXX`n7uAZmo>jiqDUZfZ6C3>k|rkCp# zdZk{aSL-!;tzM_s>kWFN-lR9{Eqbfornl=IdZ*r{ck4ZRuimHk>jV0rKBN!pBl@U5 zrjP3r`lLRkPwO-KtUjmD>kIm#zN9bfEBdOwrmyQ8`li06Z|ghyuD+-5>j(Owexx7k zC;F*=rl0E<`lWuQU+Xvet$wH9>ks;){-i(aFZ!$groZbS`ltS-f9pRwOi0*}a3SGC zB7{T?i4+n!BuYrskZ2*%Lt=!)42cyIJLGTPd6c^U2HZ7n)M4G{z)=NOaz z-#t`+&#~#_;NOVwf9vm-xqmA@CcuQ42oqxxOo|4YXrYY`3Utv!i9QCXP-6%t!{nF( zQ(`JijcG6~{ufG{o<0L+#7vkOvtU;IFH|@?eGbftxiB~8!MvCc^J4)lh=s5)7Qv$U zUnqNV`Vv?YOJQmJH{D_}*egq5)hR>f*q9cy4stcA6)4%WqbSRWf; zLu`bNu?aTCX4o8CU`uR;t+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt5B9}=*dGVr zKpcdFaR?5@VHk?TaRiRUQ8*gM;8+}o<8cB`#7Q_Ar{GkahSPBd&csv02a#7(#vx8PRXhTCxm?!;ZV8~5N|+=u(| z03O6cco>i1Q9Opn@dTd4Q+OKB;8{F}=kWqw#7lS?ui#a@hS%{1-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$@9_hE#83Dczu;H=hTriA{={GS8~@9t@>l^YVkNAMRq&tC^M9%u zR>vCnPuBTARSRol9juG>us$}xhS&%jV-swO&9FJPz?RqwTVoq+i|w#IcEFC<2|HsK z?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!!Q(w;|Lsyqi{5i!Lc|F$KwQ?h?8(K zPQj@-4X5J_oQbn=HqODhI1lIJ0$hlTa4{~yrML{2;|g4ft8g{0!L_&!*W(7kM!LxV{&*KHWh?np( zUcsw)4X@)3yotB)Hr~Ozcn|O61AK^&@G(BYr}zw?;|qL=ukba#!MFGh-{S}Th@bE? ze!;K!4Zq_L{E5HtH~zu@cAfzZZzd;(KFb9E^+cFg_;0gqR2uV-ie? z2AXK0jSdQQ(L;$o2B=VD2qweim;zH`Dol-OFfFFT^q2uNVkXRtSuiVR!|a#?b7C&c zjd?IH=EMA001ILvER034C>F!wSOQC8DJ+d;uq>9t@>l^YVkNAMRj?{n!|GTAYho>| zjdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3U9c;5!|vDvdtxu_ zjeW2$_QU=-00-hA9E?M7C=SC=9F8M!B#y$-I0nb!I2?}?a3W5^$v6e4;xwF&GjJx( z!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN z!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj z!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61 z!r%A@|Jx-NHaznm!{fipO}p7vo`kOn?b75hlhY zm=p~((Lx&?6zHOd5`7F%p~etQhRHDnro>d38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRs44))ZhEF zI(-eSiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4ur;>9w%88aV+ZVrov<@@!LHa1 zyJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1EE^IF7)PI0{GO7#xe^a6C@Hi8u)-;}o2V z({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC z+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$ z*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj z-|##Bz@PXFf8!tgcZcfV=V#am%zq4z5ilY~!pIl}qhd6SjxjJM#=_Vb2jgNqjE@O0 zAtu7am;{refhJmLqk{rn^iZOY0V>oOg2^yBrofb#3R7bmOpEC-J!Zg+mVx%J$As3*aof=wpBiHHKg^OpYlqC8omEm1$w3tcA6)4%WqbSRWf;Lu`bNu?aTCX4o8CU`uR;t+5TZ#dg>pJ77obgq^Vq zcExVk9eZF;?1jCt5B9}=*dGVrKpcdFaR?5@VHk?TaRiRUQ8*gM;8+}o<8cB`#7Q_A zr{GkahSPBd&csv02a#7(#v zx8PRXhTCxm?!;ZV8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F}=kWqw#7lS? zui#a@hS%{1-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$@9_hE#83Dc zzu;H=hTriA{={GS8~@L#V+4$dkuWkw!KfGwqhkz=iLo#?#=*E4594D3 zOo)jvF($#JXrPG}+UTG_7d@2dV}J@ZhF~&GjwvuDroz;i2Ge3XOph5bBWA+Pm<6+9 zHq4GWFem21+?WURVm{1|1+X9%!opYti()Y>jwP@pmcr6l2FqeOERPkiB38o6SOu$M zHLQ*`uqM{R+E@qcVm+*n4X`0L!p7JHn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{ zH|&l*uqXDy-q;8GVn6JU18^V?!ofHMhvF~{#o;&tN8%_Pjbm^uj>GXd0Vm=loQzX& zDo(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>Bdr zD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`y zDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1l zD}KZ8_yd39FZ_*vFid3TKZe8b7y%<7)R4Xa}f ztckU-HrBzqSP$!C18j(murW5lrq~RdV+(AFt*|w=!M4~A+hYgph@G%AcEPUL4ZC9x z?1{awH}=84*bn>T033*ea4-(Rp*RdfaX5~^kvIxR;}{%^<8VAqz==2sC*u^Hiqmj9 z&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18z>T;GH{%xEira8I z?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu z-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f-z>oL|KjRntir?@% z{=lF33xDGu3=@Ux}57z<-#9E^+cFg_;0gqR2uV-ie? z2AXK0jSdQQ(L;$o2B=VD2qweim;zH`Dol-OFfFFT^q2uNVkXRtSuiVR!|a#?b7C&c zjd?IH=EMA001ILvER034C>F!wSOQC8DJ+d;uq>9t@>l^YVkNAMRj?{n!|GTAYho>| zjdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3U9c;5!|vDvdtxu_ zjeW2$_QU=-00-hA9E?M7C=SC=9F8M!B#y$-I0nb!I2?}?a3W5^$v6e4;xwF&GjJx( z!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN z!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj z!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61 z!r%A@!$f8NV>k?t5ilY~!pIl}qhd6SjxjJM#=_Vb2jgNqjE@O0Atu7am;{refhJmL zqk{rn^iZOY0V>oOg2^yBrofb#3R7bmOpEC-J!Zg+mVx%J$As3*a0*UCc&g=potdR=%7Fs zJ(TEUfC@E+U@}aODKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5= zupkz~!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$r zcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY z_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`zo6c* z(f{`z5Dvp*1dNE0FfvBLs2B~SV+@Rmu`o8q!MGR?<6{C$h>0*UCc&g=potdR=%7Fs zJ(TEUfC@E+U@}aODKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5= zupkz~!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$r zcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY z_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`zkvR* zF_`}t9wT5xjD(Rf3P#0f7#(9^OpJxGF%HJXco-iOU_wlUi7^Q#MFUN=&_)LZy6B-q z9|KgVF$9xga!i3KF%_o9G?*6CVS3Df88H)P#w?f>vtf43fjKc3=Egjj7xQ6$EPw^E z5EjNFSQLw4aV&u)u@siZGFTSNVR@{86|oXl#wu79t6_Dlfi{ z5Fg=Ve1cE$89v7s_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZga0k&2^*97 zkKr)_M#M-M8KYoSjE2!M2FAo#7#rhYT#SeDF##sTM3@+pU{W;DLRk0dY#~N4@Yhi7ygLSbU*2f0e5F24* zY=TX(88*ij*b-Y|YixsUu^qO@4%iVpVQ1`uU9lT>#~#=ddtq43-9DyTo6pqF*I2Om@c$|O}aS~3(DL56U;dGpVGjSHq#yL0_=iz)@fD3UE zF2*Ie6qn(0T!AZb6|TlLxE9ypdfb2;aT9LFEw~l8;db1CJ8>88#yz+f_u+m#fCupq z9>ybh6p!I?Jb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEz zKE@~b6rbU9e1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m-#y=P)7XSZaI1G;w zFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng#Cc?y+1e2nHCR%8tg92UjP@<0kD%2Q) z$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh z#jrS*z>-)BOJf-us$}xhS&%jV-swO z&9FJPz?RqwTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H* z!!Q(w;|Lsyqi{5i!Lc|F$KwQ?h?8(KPQj@-4X5J_oQbn=HqODhI1lIJ0$hlTa4{~y zrML{2;|g4ft8g{0!L_&!*W(7kM!LxV{&*KHWh?np(Ucsw)4X@)3yotB)Hr~Ozcn|O61AK^&@G(BY zr}zw?;|qL=ukba#!MFGh-{S}Th@bE?e!;K!4Zq_L{E5HtH~zu@7V(CS&F_B85)v!9&z?xVKYhxX(i}kQRHo%712peM)Y>LgW zIkv!-*a}-?8*Gd1uswFbj@Su1V;Ag--LO0Mz@FF(dt)E$i~X=a4#0sp2nXX39E!s* z6o=yo9EqcFG>*ZsI1b0-1e}PIa57H8sW=U%;|!dMvv4-f!MQjO=i>rgh>LJBF2SX^ z442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3yKpz|!M(T-_u~OPh==en9>Jq{ z43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;utx9~RJ!Mk`5@8bh}h>!3wKEbE> z44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86azwkHy!7y?7{g2@=JVwBX7zra| z6pV_|FgnJ-m>3IVV;qc&@i0Cnz=W6x6JrugiUyi!p^Xj-bkReJJ_e{zV+bb0SI818ZU}tc`WBF4n{P*Z>=1BW#RKuqigf=GX#T zVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~ZzFARfZQcm$8)F+7eZ z@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO z@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n*2gAf={$n@{j}b5;M#9J#1*2j# zjE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=qJbt_XrqGyUGz|*j{z#w7=p<#Ii|prm85)v!9&z?xVKYhxX(i}kQRHo%712peM)Y>LgWIkv!-*a}-? z8*Gd1uswFbj@Su1V;Ag--LO0Mz@FF(dt)E$i~X=a4#0sp2nXX39E!s*6o=yo9EqcF zG>*ZsI1b0-1e}PIa57H8sW=U%;|!dMvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2 zHLk(6xDMCj2Hc37a5HYft+)-h;||=3yKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09 zG@ik;cn;6w1-yut@G@S(t9T8s;|;utx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{ zHNL^O_zvIW2mFYi@H2kFulNnW;}86azwkHy!7%Zd{}>L#V+4$dkuWkw!KfGwqhkz= ziLo#?#=*E4594D3Oo)jvF($#JXrPG}+UTG_7d@2dV}J@ZhF~&GjwvuDroz;i2Ge3X zOph5bBWA+Pm<6+9Hq4GWFem21+?WURVm{1|1+X9%!opYti()Y>jwP@pmcr6l2FqeO zERPkiB38o6SOu$MHLQ*`uqM{R+E@qcVm+*n4X`0L!p7JHn_@F;jxDeyw!+rf2HRpg zY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!ofHMhvF~{#o;&tN8%_Pjbm^u zj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf% zuEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#I zp2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QR zzQgzU0YBm={ET1lD}KZ8_yd39FZ_*vFid>rKZe8b7y%<Gd= zlp^*4>|>*1cXxMpcXxMpcgMSP&IKxR-_P@WdC!Oa?at25?*317pF*d=b*bJMaH?}|@Y>B?;hyECVf!GRLV-U8% zU~G%+uswFbj@Su1qXk1S6vMC!cExVk9eZF;?1fhBjeW2$_QU=-00&|?4#L4W1S4=L z4#VL%0!QK~9F1deERMtRH~}Z(B%F*>a4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xAS zF2m)x0$1WHT#ajREw01$xB)lfCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV& z9>e2!0#D*8JdJ1YES|&jcmXfsCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_> zKEvnu0$<`Qe2s7LExyC|_yIrSC;W_G@GE}9@Aw0M;x80Q8UInCfez@1P8bWFF*dqj z9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLW)F%!#=$ zH|D{-m=E(~0W64xurL*1($R zjKQ~H3nfD492$D z4%=e~?1-JPGg>eNLop1yU{~yh-LVJu#9nB{-q;8GVn6JU18^XQ;~*T2Lofn|;xHVJ zBXA^+!qGSe$Kp5~j}verPQuAJ1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmq zC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}X zFYqP4!q@l)-{L!bk00yhEV*~U=FKmd7urW5lrq~Rdqc^rdA8d)f=!gCofPvTwTVoKm!C-8Q?XW#| zz>e4nJEH|dFcibE3wFhB*d2RdPwa(O?2Ub}FZRR!H~)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$= zDqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2 zDLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8s9`$r=Aqp@9zQh)x&_oiR4LU>uB#@i0Cn zz=W6x6JrugipelJrofb#3R7bmOpEC-J!Zg+mLgWIeKFY^udj0T_s_ur&r@8w|#_*bduc2keNQ zurpdP1Vb?lyI@!BhTX9T_QYOj#opKl`(i)rj{|TZhT|X{j6*O2hvG0Cjw5g+j>6G6 z2FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#0 z2G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|3 z2G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh| z2H)a4e2*XSBYwiq_yxb>H~fx2@F)I4k%I9b6&mP(j_8E3&>3T+3&z2?7!TuP0!)aB zFfk^th4-L@#WJjj%B`!KT;@o1-_jKp$+0zUYVk7=VG;3R`0kw!vU*i|w#IcEFC<2|J?& zLogJ>unTs@ZrB}rU{CCYR_u*^urKz*{x|>!VmJ=M!8imXa3~JL;Wz?E;wT)AV{j~v z!|^x)C*mZWj8kwbPQ&Rq183qaoQ-pEF3!XGxBwU8B3z71a49as<+uV@;woH?Yj7>D z!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ z!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U? z!}s_BKjJ6+j9>68e#7th1ApQ#6e$`1QK5kj=!i}j3!O1Gx?miPi}5f%CcuQ42oqxx zOp3`cIi|prmus$|G zPxQiu*a#bA6KsmjusM2T3-rO3=!<^nj{z8nt*|u)VH*s_w%88aV+ZVrov<@nFa$#} z47*@g?1tU32lm8XXvN;x2m4|_?2iL*Aco^09E?LS0*B%-9F8M!B#y$-I0nb!I2?}? za3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz% za3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS z@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H z@FRZ0&-ewu;y3(`Kkz61LXnE`9~BztfR5;dvCtV~qYK8txEK%PV**Twi7+uH!K9cB zlVb`@iK#F(roptB4%1@>%!rvVGiJf8m<_XI4s^wwm;O(V-YNh z#jrS*z>-)BOJf-7)R4Xa}ftcmVe3u~hX*1@`159?zC^h7Ug zh>fr@Ho>OY44b1jwm=_jiN5HE{uqFP*a}-?5VpZ!Y>Vx%J$As3*a|i> z#ctRgdtguOg;wm1eXuX~!~Qq`2Vyu5!ofHMBXB4V!{ImrN8%_Pjbm^uj>GXd0Vm=l zoQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0% z+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabu zyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm= z{ET1lD}KZ8_yd39FBGX6|52fV4(NzZ7z>>-Ho9OOjEnIwJ|@6~mJs)Gh-IairFwb=0I1>iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9} z2`q`Fur!vzvRDqwqZ?MhidYFNV->85)v!9&z?$fewXimNU>&TB^{_rRKu`3-hS&%j zV-swO&9FIoV+-`bmgtLq=#K#yh^??S24Ncv#ZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFC zui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~x zzv4Iijz91x{z8$4@gEf$=zxysgt5>WW1|bk!MGR?<6{C$h>0*UCc&hb43lFDOo^#5 zHKxI|m=4op2F!?=Ff(Sste6e6V-9r1oR|x9V;;17pF*d=b z*bJMaH?}|@Y>B?;hyECVf!GRLV-U8%U~G%+uswFbj@Su1qXk1S6vMC!cExVk9eZF; z?1fhBjeW2$_QU=-00&|?4#L4W1S4=L4#VL%0!QK~9F1deERMtRH~}Z(B%F*>a4Js2 z={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x0$1WHT#ajREw01$xB)lfCftl$a4T-Z z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2!0#D*8JdJ1YES|&jcmXfsCA^GR@G4%z z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu0$<`Qe2s7LExyC|_yIrSC;W_G@GE}9 z@Aw0M;x80w8UInCfez@1P8bWFF*dqj9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@Q)3!T zi|H^uX26V?2{U6B%!=7CJLW)F%!#=$H|D{-m=E(~0W64xurL*1($RjKQ~H3nfD492$D4%=e~?1-JPGg>eNLop1yU{~yh-LVJu#9nB{ z-q;8GVn6JU18^XQ;~*T2Lofn|;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ1*hUPoQ^Ya zCeFgyI0xtAJe-dUa3LSeN zC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{U zCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l)-{L!bk00yhEV*~U=FKmd7urW5lrq~Rdqc^rd zA8d)f=!gCofPvTwTVoKm!C-8Q?XW#|z>e4nJEH|dFcibE3wFhB*d2RdPwa(O?2Ub} zFZRR!H~)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJ zjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MG zjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVy zjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8s9` z=^6h~p@9zQh)x&_oiR4LU>uB#@i0Cnz=W6x6JrugipelJrofb#3R7bmOpEC-J!Zg+ zmLgWIeKFY^udj0T_s_ur&r@8w|#_*bduc2keNQurpdP1Vb?lyI@!BhTX9T_QYOj#opKl`(i)r zj{|TZhT|X{j6*O2hvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1 z;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$ z;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}> z;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)I4k%93a z6&mP(j_8E3&>3T+3&z2?7!TuP0!)aBFfk^th4-L@#WJjj%B`!KT;@o1-_jKp$+0zUYVk z7=VG;3R`0kw!vU*i|w#IcEFC<2|J?&LogJ>unTs@ZrB}rU{CCYR_u*^urKz*{x|>! zVmJ=M!8imXa3~JL;Wz?E;wT)AV{j~v!|^x)C*mZWj8kwbPQ&Rq183qaoQ-pEF3!XG zxBwU8B3z71a49as<+uV@;woH?Yj7>D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$r zcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY z_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#6d4)+QK5kj z=!i}j3!O1Gx?miPi}5f%CcuQ42oqxxOp3`cIi|prmus$|GPxQiu*a#bA6KsmjusM2T3-rO3=!<^nj{z8n zt*|u)VH*s_w%88aV+ZVrov<@nFa$#}47*@g?1tU32lm8XXvN;x2m4|_?2iL*Aco^0 z9E?LS0*B%-9F8M!B#y$-I0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@i zF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD| z9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6 zKElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61LXk<>J`xog=zxys zgt5>WW1|bk!MGR?<6{C$h>0*UCc&hb43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6 zV-9r1oR|x9V;;17pF*d=b*bJMaH?}|@Y>B?;hyECVf!GRL zV-U8%U~G%+uswFbj@Su1qXk1S6vMC!cExVk9eZF;?1fhBjeW2$_QU=-00&|?4#L4W z1S4=L4#VL%0!QK~9F1deERMtRH~}Z(B%F*>a4Js2={N&t;w+qvb8s%s!}+)X7vdsZ zj7xASF2m)x0$1WHT#ajREw01$xB)lfCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_ zj7RV&9>e2!0#D*8JdJ1YES|&jcmXfsCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i? zj8E_>KEvnu0$<`Qe2s7LExyC|_yIrSC;W_G@GE}9@Aw0M;x81L8UInCfez@1P8bWF zF*dqj9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLW)F z%!#=$H|D{-m=E(~0W64xurL z*1($RjKQ~H3nfD z492$D4%=e~?1-JPGg>eNLop1yU{~yh-LVJu#9nB{-q;8GVn6JU18^XQ;~*T2Lofn| z;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb z;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F z;xl}XFYqP4!q@l)-{L!bk00yhEV*~U=FKmd7urW5lrq~Rdqc^rdA8d)f=!gCofPvTwTVoKm!C-8Q z?XW#|z>e4nJEH|dFcibE3wFhB*d2RdPwa(O?2Ub}FZRR!H~)Jra4e3)@i+k|;v}4mQ*bIy!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1 zxB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P( zcmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5f8s9`SsDLPp@9zQh)x&_oiR4LU>uB# z@i0Cnz=W6x6JrugipelJrofb#3R7bmOpEC-J!Zg+mLgWIeKFY^udj0T_s_ur&r@8w|#_*bduc z2keNQurpdP1Vb?lyI@!BhTX9T_QYOj#opKl`(i)rj{|TZhT|X{j6*O2hvG0Cjw5g+ zj>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_ zuEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRW zp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>f zzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)I4k&W>m6&mP(j_8E3&>3T+3&z2?7!TuP z0!)aBFfk^th4-L@#WJjj%B`!KT;@o1-_jKp$+0zUYVk7=VG;3R`0kw!vU*i|w#IcEFC< z2|J?&LogJ>unTs@ZrB}rU{CCYR_u*^urKz*{x|>!VmJ=M!8imXa3~JL;Wz?E;wT)A zV{j~v!|^x)C*mZWj8kwbPQ&Rq183qaoQ-pEF3!XGxBwU8B3z71a49as<+uV@;woH? zYj7>D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0h zXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZO zZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#6xkX7QK5kj=!i}j3!O1Gx?miPi}5f%CcuQ4 z2oqxxOp3`cIi|prm zus$|GPxQiu*a#bA6KsmjusM2T3-rO3=!<^nj{z8nt*|u)VH*s_w%88aV+ZVrov<@n zFa$#}47*@g?1tU32lm8XXvN;x2m4|_?2iL*Aco^09E?LS0*B%-9F8M!B#y$-I0nb! zI2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYna zI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hn zIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tN zJA98H@FRZ0&-ewu;y3(`Kkz61LXm^<9~BztfR5;dvCtV~qYK8txEK%PV*(8I3rg(~ zHmJU3gA^jn&&#n*htPt>EE~swNGH;Z3?ie*Br=OEBCE(IvYR1t6*)yNkz3>uc||^vUlb4pMIljG z6cI&5F;QHU5G6$^QCgG{WkoqrUbu-0qN1oIDvK(js;DNaiyES)a2K^iZQ&v6h`OSl zs4p4_PvIpRibkTbXd;@5W}><97A=I2XeoS!pYRs}B2csvtwoS%BZ5U+(N44%9Yjab zNpuz#5h6lGnCK$9if*F2=plNFUcxGRi$0>S=qLJ%0b-yC7lXuLF+@a&p<2p7m@KA?(@VwG4e)`+!Yomek6h>c>C*ete)tzw(lE_R5WVwc!0_K3Y=pV%)Bh=bygI4q8c zqvDu2E>4J(;*>Zo&WN+(oH#Eoh>PNqxGb)StKyotE^dgM;+D8A?ufhMp13a_h=<~l zcr2cXr{bA-E?$V2;+1$U-iWv2op>)kh>zlv_$81 zmDyx=nM1nDoHCcpE%V5{GM~&Z3&?`9kSr{V$fB~CEG|pPlCqR6Ez8KVvYae0-DCw> zQC5*ovZL%IJ4=fUk)bk7c9C6WH`!hGkUeEDX_dWY zAK6#-ll|oYIZ%enL2|GhA|vEbIZO_hBjiXqN{*IeiOVm=e zOf6R{)JnBVtyXK)TD4BCR~ytuwMlJOThvyyO>I{@)K0Za?N)o#UbRo{R|nKVbx0jn zN7PYuOdVGz)Jb(pomOYmS#?gGR~OVpbxB=TSJYK?OR5u>P4%qVV@FiIMwjM7FK zqpVTRC~vqK6^x2TC8M%Y#i(jjGpZXkjGBhKQOl@pco=nzx<);tzR|$&G`x(4MkAxK z(ZpzKG&7nT-bM?<$7pHz8h(bq5nu!wt&G-2kkQ5nHrg8PjP^zcqodKu=xkVw5F^wG zGrAaEjc!JFqleMc=w(=q-bNpzuhGxwZwxR78sWwuW3VyAh%km4!;Im^2xFu%${1~o zF~%C>jPb?BbCWrZLNyZOk#|8uN_##sXuZvB+3#EHRcE%Z%m5 z3S*_Q%2;izG1eOEjP=F_W23Rj*lcVuwi?@v?Zyscr?Jb}ZR|1j8vBg>#sTA?amYAq z95Id>$Bg5~3FD-3$~bMDG0qz2jPu3?&6Y^rg6)-ZQL>L8uyI* z#slM_@yH1E3tpoK_u#eWGAMme2CHSOx#k*_4oS025*&OpHk|DJH|@m;zH`Dol-OFfFFT^cZ>T z8QIQ+(SG)6zTwzE(WZkUUklAQANwaK=EB^V2lHY+%#Q`IAQr;HSOkk=F)WTHuq2kk z(pUz|VmU02Zdd^;VkNAMRj?{n!|GTAYoa^W!rJJ8b+9hh!}{0&J<$srVk2yfO|U68 z!{+FXEzk#BqA&WPKL%hRw!+pJgl#Yw+hRLxj~%chcEZkR!4M3^FzkX|u^V>B9@rCm zp%r^$AMA_$us;sKff$a1a4-(R2po#Ta5#>@kvIxR;}{%^<8VAqz==2sC*u^Hiqmj9 z&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18z>T;GH{%xEira8I z?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpUz>9bZFXI)wir4Tu z-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f-z>oL|KjRntir?@% z{=lF33(biD+ee~810B#2oiG+UV{CN6I2ae>VSG%02{92S#w3^&lVNg9fhjQ+rp7dw z7SmyR%zzm&6K2LNm=&{OcFcjUm=kkhZp?#uF(2l~0$30WVPPzSMX?wb#}Zf)OJQj& zgJrQCmPa?NfEBS4R>mq=6{}%&tbsMr9cy82^uRh;7wchtY=EBVg$=P0HpV8{6q{jl z^u`wGgDue){m>r+Fc4c|YYf6R7>sSP9k#~~*bzHnXS84lhGH0Y!LHa1yJHXRiM`N@ zy|EAW#eUcy2jD;q$3Zw4hhPK_#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u; zOq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)k zPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!x zO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=X zPyB`E*8ukM9~BztfR5;dvCtV~qYK8txEK%PV**Twi7+uH!K9cBlVb`@iK#F(roptB z4%1@>%!rvVGiJf8m<_XI4s^wwm;O(V-YNh#jrS*z>-)BOJf-< zi{-F9x?u&Zh?TH1R>7)R4Xa}ftcmVe3u~hX*1@`159?zC^h7Ugh>fr@Ho>OY44b1j zwm=_jiN5HE{uqFP*a}-?5VpZ!Y>Vx%J$As3*a|i>#ctRgdtguOg;wm1 zeXuX~!~Qq`2Vyu5!ofHMBXB4V!{ImrN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+m zES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|M zF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?Z zExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39 zFEoDyU?2Zcp@9zQh)x&_oiR4LU>uB#@i0Cnz=W6x6JrugipelJrofb#3R7bmOpEC- zJ!Zg+mLgWIeKFY z^udj0T_s_ur&r@8w|#_*bduc2keNQurpdP1Vb?lyI@!BhTX9T_QYOj#opKl z`(i)rj{|TZhT|X{j6*O2hvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e z**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7< z-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E z+js}>;yt{N5AY#A!l>Wp>4_He_jwNH@AFDn^lVGy&-3(*i`Am%TVnWmo}PX&zma#3 z6&|c-!Xmt438@SRN2h4t=m%G|m7-^9qJOEcXKW~B^-G2Hysf|slP zg+&N6^{-F#?NC3nFg>4SKdYWmis`I+hRN4z(GyI;F6P2&(Q{0$!EtqhzUbh1=EB>e zXPm4SJ>TRSoKPnxG8gW`-O*i|zq8cyPr*s_LSJo|Z&-*~gUR)F3Ui@v&@Xtf-Xwl{ zp2}*`vsA9ZI%vVRnAroS=d8?t=_#wA^ua?-tQI|Q{Q>?P@GwCE-2DK%Y^VJj&m9nJi$ zs{Lc$m+fOKQD2}gQBS`G1}`+1wjawrHCKGSEqW5}ua=}|$)pJuhkYGCeIxFVi!V^fKMj>1BGJ z(rVGOlri-(J!@(2Wx6I~>}5eOBfFR|+n&`T&E>ycOzBi_i(z7aIymTr7@kRvX1dvu zoOH%Ne^(t#=eJkx|EVQ??qh37&bmC?!!WikD25ZenCWJ~;+PPyxH_l(v81n?fWx@=Q787bHtIlb!AsrrSD4V%zwPd&L`?rR2=!~`+(kG6whJsV-W>3|)+Dgl5D=qrK zkxQp~TXLJ&Bc4Ym1UY*U)x74W)soMa7%i&#^)b<+YOAyYQAKz}3v@xf#|*SfxDIq( zAr3yi>SP91B#|<%Z2pm zsU6{v^ysDE6;eoFn*AR^6gF4Qx+$Vl{_KrKe_x9EhG1Tb{bPpB_A&R`KL2SbabkZg zbjH|d>$1i4No^r1uFv*6swK=#tEHqZF0@GwYU!vVqDQri-t+$83!N6Nhx_QXzXI7(r!?06s)B7<(YCB)TUNHG z|Na}p*UW5&$JR9dh;++JxqW>i{RbJc2TXWRF0rM1@?ZIz~PH|9!nY~Q9^V1T8A&Pug)G}o<` zPUiC8YU`|1y)70Kd$SAC2{CkxP&3^Oe3;JoXUFKG^Z!+!U3E$|9&^I{YwSdgf`y6| zDqNsQfj>Sj9v)_A=w{{(o?+H~H8&2hcZ2TwG`4QgLzfoA_4G8;&4~8W1=zcRRp+!v zRNoE}?QO1FEq!eJ{*7o~ozWIieIGGLR0V`u`ssXxv%k4+wG1$q|Aup*PW85go7gXZ zkWPr<@&}vgW;loFjDLnRLg%-I)BdLMzZ?K<&yJzGFk8@u=~7}i>u@vO4B7}2f;Lj; zvu39alZTtQW+8CYD7BqdeF$OJov00~)0UK*8ZJe#N=nv-cI@Q}U!Nh*) z6LrELmu@hiOfu8WfK4`uoCTR;u3Ifr^#T9>BgJVt*$kxlZxlzH*An4nC}IBP`q#f{ z;4w8_XN}h1XZ${#{yx)oo@x5Nrt5`nwZETBv&_9=`T;)MT(RHD9DP6xcQV&ZH!ESD z36(Hk=d@RX4g{63z+AOj7TWgxSqX-1Xk4T-+A2YZA;wAw^Kxuu@ogVyS!^q5i7v@2 zp7~~Odqfm6U(iD=OU=Z;reNI;55#x0_<=Ix>VbFw%gBR{u$$CRI1`>*qU|G}s;A2g3Q<9$dM6V*}<+qV90DMvVw43B*KI%=la8;WmKamUQGsA7)C zP|S&cia8lsOvT^Docgnv(=ilt=AUBDMix`)cQNP8v_Pxnyb0a>f4U@7A>Q4w?8?qM`hIU z4-3@OismeHJb&L%A9Ie@r&YU-zM+;IIv0Karnzpl+%lJpc>k&E-_}{YEq63gHF4Lr z^>>Bd(`jbQQBf6oU#I-p^?!Hz2j(vOBlV%JsNe6zk%Pk{Gu>=Rk8QjE98RC;6!Sdh zI50?0^rqwypx+T~A9KI$V~z~V?2oUfW|GzNOdt5~sPdfh{uiUl3!OFEsPgjn;XLtP z+0OHyMwQp*UN0vfe_#KgK+79*!)ke}lN>q(v}td7XRf--kT5^<*58}!zeE1Pw)J<& zKk76cil~r(GE?lYuD@IDXEQCT=Dx&G%-4U4`4(ABh2O<|H`C0<|HFjF|5G1lZ~VG> z(D;ApRI5dpkK{k+<|5m)G}G;kUst5rrd70z=Kb3mzY&#DS6^hC4oVl)_I*Yd@mL@6=&bF<;!x>wrnP>mqf?ae<%q=*Mxhq(I6=Bv?Tyy=eSH5_W zCAmA=+UP%14K&fHHXD80ykpxZdYg{iw!Iw2*QbsA3nzg-Wwf@LF!C~@+D9Ut67vlu zHg}otFiA|P-=sRTy?%A@sNZDfs@0O*w(rkpoBffMLT9u++jI=nz@uG@7WJOl#ZvyVla_w7+UCy-tbwCNr43f*sB1nzfeET>q=qGDVhT z)>^RrPtK@{vscG|n)v_R=fCey=w{!~v~Fg7=E#c6qR$$w;<83wNmRvU(|#KECKGs@J1`e^pE;nngQC z{;}|%zUV6cZ$@SQ#|OW^=qh15)p+x?!4u3y20a3OZcl7RM9DB;LOlP_JXMIQvat`Zl3VZp}OiHMaB4?Y~*9k z%xC*VetBmG-<;XAU0P&-tJ}8z4seag@}mM$(@e3qp5FtPyO|ah*IF?YQ~RG{JR*zv z;~lb&nP!fEbxr7I_4IM}Zl;?$-K@S&wOSh3cK+GV?8BO;&S~psI?(3$=M^nu zNc;NlSGg8u2K%d=kFCJpuX2%J*tay(&HD7!8UJh^emcKCS!7Ea;?<3Y0RQQE8~Hp< z>SoWF64PQv%#2wvJGx>n%wzxJ)?Z&?WP=OPR~PNM92j}UQ4Ow@PWf-)X>DdOpUy!h zG@Ca15PP%H74rXjxz6A$iZJ}~5m5q`D6uPwI;f+OG)e#^SVoKml{o(S#$lZJ;|%$9 zC5p<~6{8Nu-Zl2#3-*Q``-~1&tXNP7yV#%S-EzBkmqfuaKl0u_`|f+W-DjWOd-uT5 zCcQEZ@ zl`!P~j99LxnkPr>VdpFjP1dm$B-SI-d$!Ku)8hP=Ouis4&zffg}fX zI-Lv|&cHPig!tVL8RpWZq|T&qkJ~M!kPK%L(}QBV^=u;kG)VN}98~H2&bc|E&NHEw zH@|Z}+JjFQNNCA;Tqu{l<##Sh?x&A^^Yb~$F|e+835ko*UT0P=G55*O;9Q!zPd}L( zo7r*qxeV>`T$jsDtR!6_rEn~I%#D(Xc_olE^{xWuhV<&3FxOybTqxx?KYF zxdz@rmy)`Z#&yMAQVM(YZen0}g_F8_i1_T5Yjd2HF#TS*Y4y8L!Y1lxSHgL~F{<}V zwZ9ZTK!gHfm%@Mi%@JQIf>rfFFI4WPt6+X0r@>Du&O?42nJa}QJRTT9=gUaPv^(czARHq*EO7-e-uhgKP z@Jfy9NtDq2=+XJON=Ef5DTSl@G%@zD8`Wokr02=Az#KWA%L(&*Cd>;qj5kjf_+ehm z3G-4W%*!?m^7QsUb9}`Q^J-3**D_&Vw_*IWB`~HfVa%+3gYERJ<$N%+_D!NlTS{ei z)-I%{%v!P)v-Yi&qn)*HJ4bR6j_;%#2ewp5isxPDNV384y_BQPZ@%vw$tJ4BzH~NNTf z&n0+P7`^}w8hYM#xV5{sw@5oI9a?2kC< zBhGd21Uj0*iGgg%3-#JWGRonN&N^_=YO$^a%&NtD&SU+Qhkec305~SYhII7h zWFz|X&%yWT#W^O)xir`Y6#l=LZhhaHN!pGww$|7>W9yA=Ft*Xy(Z-H3cC4}EjBPS@ zys;CEov2^-H)h}1o%L|`OX{U=6Wdo#^fx8a*U*iC1K#j9gE13pb2iZ{Ey-dg))u(L zx1Lhx&nx}1Ka!3zuQ*)x4KI=<&hT|h39yv}2>BX?MBSP$_|}ugJ?%D93W>TcF)$+$ zkQySASDZ4lZAP=&4&{iy5+NByh=(B25KKc5u9j^A#?=v_+WL&^C9R%jUcro*{l4B> zoBpwB_skx+KDa3tJ9kIB>$Mi@5d2u&+TQAA+u0zGYf027rLbE^+rPw-XbceP#$=y2 z7LXg9aXC?%GEv57qf7tN`3|cDL1F9Q7EE?gTVeJ4@?6Il2p7kng2&-LR{a!p@vREa2#FM5H$zQ75Yw zfC)!;M@f!+qT!mOdjJS1x+jA$tWVLNfE7*@m!G0K-oI^cscti!KINyE)LwiR%fG37 zj+BV)ZJ*8YZyzGkWl`8s`=SEkH4Vn_r&V0#UpR4a41e~+CB8zHI-frJ0Fdu-nm+7S KD_)(QHGczW7(h(` literal 0 HcmV?d00001 diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index 1cb20814093126..0b60d37d36c08c 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -45,7 +45,10 @@ from pandas.tseries.offsets import ( DateOffset, Hour, Minute, Day, MonthBegin, MonthEnd, YearBegin, - YearEnd, Week, + YearEnd, Week, WeekOfMonth, LastWeekOfMonth, + BusinessDay, BusinessHour, CustomBusinessDay, FY5253, + Easter, + SemiMonthEnd, SemiMonthBegin, QuarterBegin, QuarterEnd) from pandas.compat import u import os @@ -53,7 +56,7 @@ import numpy as np import pandas import platform as pl - +from datetime import timedelta _loose_version = LooseVersion(pandas.__version__) @@ -201,6 +204,12 @@ def create_data(): freq='M') off = {'DateOffset': DateOffset(years=1), + 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), + 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), + 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), + 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), + 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), + 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), @@ -209,6 +218,11 @@ def create_data(): 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), + 'Week_Tues': Week(2, normalize=False, weekday=1), + 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), + 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), + 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), + 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1)} diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 3a2a613986dcae..c65691618e6540 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -581,6 +581,7 @@ def __setstate__(self, state): if '_offset' in state: # pragma: no cover raise ValueError('Unexpected key `_offset`') state['_offset'] = state.pop('offset') + state['kwds']['offset'] = state['_offset'] self.__dict__ = state if 'weekmask' in state and 'holidays' in state: calendar, holidays = _get_calendar(weekmask=self.weekmask, @@ -598,11 +599,11 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = 'B' _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self.kwds = {'offset': offset} + self._offset = offset def _offset_str(self): def get_str(td): @@ -693,14 +694,13 @@ def onOffset(self, dt): class BusinessHourMixin(BusinessMixin): - def __init__(self, **kwds): + def __init__(self, start='09:00', end='17:00', offset=timedelta(0)): # must be validated here to equality check - kwds['start'] = self._validate_time(kwds.get('start', '09:00')) - kwds['end'] = self._validate_time(kwds.get('end', '17:00')) + kwds = {'offset': offset} + self.start = kwds['start'] = self._validate_time(start) + self.end = kwds['end'] = self._validate_time(end) self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) - self.start = kwds.get('start', '09:00') - self.end = kwds.get('end', '17:00') + self._offset = offset def _validate_time(self, t_input): from datetime import time as dt_time @@ -923,10 +923,11 @@ class BusinessHour(BusinessHourMixin, SingleConstructorOffset): _prefix = 'BH' _anchor = 0 - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, start='09:00', + end='17:00', offset=timedelta(0)): self.n = int(n) self.normalize = normalize - super(BusinessHour, self).__init__(**kwds) + super(BusinessHour, self).__init__(start=start, end=end, offset=offset) @cache_readonly def next_bday(self): @@ -960,11 +961,11 @@ class CustomBusinessDay(BusinessDay): _prefix = 'C' def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self._offset = offset + self.kwds = {} calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, @@ -976,6 +977,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['weekmask'] = self.weekmask = weekmask self.kwds['holidays'] = self.holidays = holidays self.kwds['calendar'] = self.calendar = calendar + self.kwds['offset'] = offset @apply_wraps def apply(self, other): @@ -1026,10 +1028,12 @@ class CustomBusinessHour(BusinessHourMixin, SingleConstructorOffset): _anchor = 0 def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, + start='09:00', end='17:00', offset=timedelta(0)): self.n = int(n) self.normalize = normalize - super(CustomBusinessHour, self).__init__(**kwds) + super(CustomBusinessHour, self).__init__(start=start, + end=end, offset=offset) calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, @@ -1121,7 +1125,7 @@ class SemiMonthOffset(DateOffset): _default_day_of_month = 15 _min_day_of_month = 2 - def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, day_of_month=None): if day_of_month is None: self.day_of_month = self._default_day_of_month else: @@ -1132,8 +1136,7 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds): day=self.day_of_month)) self.n = int(n) self.normalize = normalize - self.kwds = kwds - self.kwds['day_of_month'] = self.day_of_month + self.kwds = {'day_of_month': self.day_of_month} @classmethod def _from_name(cls, suffix=None): @@ -1408,11 +1411,11 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset): _prefix = 'CBM' def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self._offset = offset + self.kwds = {} calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, @@ -1420,6 +1423,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['weekmask'] = self.weekmask = weekmask self.kwds['holidays'] = self.holidays = holidays self.kwds['calendar'] = self.calendar = calendar + self.kwds['offset'] = offset @cache_readonly def cbday(self): @@ -1430,7 +1434,7 @@ def cbday(self): def m_offset(self): kwds = self.kwds kwds = {key: kwds[key] for key in kwds - if key not in ['calendar', 'weekmask', 'holidays']} + if key not in ['calendar', 'weekmask', 'holidays', 'offset']} return MonthEnd(n=1, normalize=self.normalize, **kwds) @apply_wraps @@ -1478,20 +1482,21 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset): _prefix = 'CBMS' def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', - holidays=None, calendar=None, **kwds): + holidays=None, calendar=None, offset=timedelta(0)): self.n = int(n) self.normalize = normalize - self.kwds = kwds - self._offset = kwds.get('offset', timedelta(0)) + self._offset = offset + self.kwds = {} # _get_calendar does validation and possible transformation # of calendar and holidays. calendar, holidays = _get_calendar(weekmask=weekmask, holidays=holidays, calendar=calendar) - kwds['calendar'] = self.calendar = calendar - kwds['weekmask'] = self.weekmask = weekmask - kwds['holidays'] = self.holidays = holidays + self.kwds['calendar'] = self.calendar = calendar + self.kwds['weekmask'] = self.weekmask = weekmask + self.kwds['holidays'] = self.holidays = holidays + self.kwds['offset'] = offset @cache_readonly def cbday(self): @@ -1502,7 +1507,7 @@ def cbday(self): def m_offset(self): kwds = self.kwds kwds = {key: kwds[key] for key in kwds - if key not in ['calendar', 'weekmask', 'holidays']} + if key not in ['calendar', 'weekmask', 'holidays', 'offset']} return MonthBegin(n=1, normalize=self.normalize, **kwds) @apply_wraps @@ -1540,17 +1545,17 @@ class Week(DateOffset): _adjust_dst = True _inc = timedelta(weeks=1) - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=None): self.n = n self.normalize = normalize - self.weekday = kwds.get('weekday', None) + self.weekday = weekday if self.weekday is not None: if self.weekday < 0 or self.weekday > 6: raise ValueError('Day must be 0<=day<=6, got {day}' .format(day=self.weekday)) - self.kwds = kwds + self.kwds = {'weekday': weekday} def isAnchored(self): return (self.n == 1 and self.weekday is not None) @@ -1642,9 +1647,9 @@ class WeekOfMonth(DateOffset): Parameters ---------- n : int - week : {0, 1, 2, 3, ...} + week : {0, 1, 2, 3, ...}, default None 0 is 1st week of month, 1 2nd week, etc. - weekday : {0, 1, ..., 6} + weekday : {0, 1, ..., 6}, default None 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1656,11 +1661,11 @@ class WeekOfMonth(DateOffset): _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, week=None, weekday=None): self.n = n self.normalize = normalize - self.weekday = kwds['weekday'] - self.week = kwds['week'] + self.weekday = weekday + self.week = week if self.n == 0: raise ValueError('N cannot be 0') @@ -1672,7 +1677,7 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('Week must be 0<=week<=3, got {week}' .format(week=self.week)) - self.kwds = kwds + self.kwds = {'weekday': weekday, 'week': week} @apply_wraps def apply(self, other): @@ -1742,8 +1747,8 @@ class LastWeekOfMonth(DateOffset): Parameters ---------- - n : int - weekday : {0, 1, ..., 6} + n : int, default 1 + weekday : {0, 1, ..., 6}, default None 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1751,12 +1756,13 @@ class LastWeekOfMonth(DateOffset): 4: Fridays 5: Saturdays 6: Sundays + """ - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=None): self.n = n self.normalize = normalize - self.weekday = kwds['weekday'] + self.weekday = weekday if self.n == 0: raise ValueError('N cannot be 0') @@ -1765,7 +1771,7 @@ def __init__(self, n=1, normalize=False, **kwds): raise ValueError('Day must be 0<=day<=6, got {day}' .format(day=self.weekday)) - self.kwds = kwds + self.kwds = {'weekday': weekday} @apply_wraps def apply(self, other): @@ -1829,13 +1835,14 @@ class QuarterOffset(DateOffset): # TODO: Consider combining QuarterOffset and YearOffset __init__ at some # point - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, startingMonth=None): self.n = n self.normalize = normalize - self.startingMonth = kwds.get('startingMonth', - self._default_startingMonth) + if startingMonth is None: + startingMonth = self._default_startingMonth + self.startingMonth = startingMonth - self.kwds = kwds + self.kwds = {'startingMonth': startingMonth} def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -2017,13 +2024,14 @@ class YearOffset(DateOffset): """DateOffset that just needs a month""" _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): - self.month = kwds.get('month', self._default_month) + def __init__(self, n=1, normalize=False, month=None): + month = month if month is not None else self._default_month + self.month = month if self.month < 1 or self.month > 12: raise ValueError('Month must go from 1 to 12') - DateOffset.__init__(self, n=n, normalize=normalize, **kwds) + DateOffset.__init__(self, n=n, normalize=normalize, month=month) @classmethod def _from_name(cls, suffix=None): @@ -2262,15 +2270,17 @@ class FY5253(DateOffset): _suffix_prefix_nearest = 'N' _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, + variation="nearest"): self.n = n self.normalize = normalize - self.startingMonth = kwds['startingMonth'] - self.weekday = kwds["weekday"] + self.startingMonth = startingMonth + self.weekday = weekday - self.variation = kwds["variation"] + self.variation = variation - self.kwds = kwds + self.kwds = {'weekday': weekday, 'startingMonth': startingMonth, + 'variation': variation} if self.n == 0: raise ValueError('N cannot be 0') @@ -2510,24 +2520,29 @@ class FY5253Quarter(DateOffset): _prefix = 'REQ' _adjust_dst = True - def __init__(self, n=1, normalize=False, **kwds): + def __init__(self, n=1, normalize=False, weekday=0, startingMonth=1, + qtr_with_extra_week=1, variation="nearest"): self.n = n self.normalize = normalize - self.qtr_with_extra_week = kwds["qtr_with_extra_week"] + self.weekday = weekday + self.startingMonth = startingMonth + self.qtr_with_extra_week = qtr_with_extra_week + self.variation = variation - self.kwds = kwds + self.kwds = {'weekday': weekday, 'startingMonth': startingMonth, + 'qtr_with_extra_week': qtr_with_extra_week, + 'variation': variation} if self.n == 0: raise ValueError('N cannot be 0') @cache_readonly def _offset(self): - kwds = self.kwds return FY5253( - startingMonth=kwds['startingMonth'], - weekday=kwds["weekday"], - variation=kwds["variation"]) + startingMonth=self.startingMonth, + weekday=self.weekday, + variation=self.variation) def isAnchored(self): return self.n == 1 and self._offset.isAnchored() From 7db7f82ed34dfe9b9768f2449b291d5abd6ef60a Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sat, 7 Oct 2017 19:02:58 +0800 Subject: [PATCH 39/76] DOC: small typo (#17811) --- pandas/core/reshape/reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d280c4f3f73d7b..4eb35daba2282a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1080,7 +1080,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, prefix : string, list of strings, or dict of strings, default None String to append DataFrame column names Pass a list with length equal to the number of columns - when calling get_dummies on a DataFrame. Alternativly, `prefix` + when calling get_dummies on a DataFrame. Alternatively, `prefix` can be a dictionary mapping column names to prefixes. prefix_sep : string, default '_' If appending prefix, separator/delimiter to use. Or pass a From e6aa14ec36c71649ef65dee424a9149bd49f3d3a Mon Sep 17 00:00:00 2001 From: Sudeep Date: Sun, 8 Oct 2017 16:37:24 +0530 Subject: [PATCH 40/76] Typo in error message (#17817) --- pandas/core/window.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 869296503225d0..e3a091573aa2f5 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1138,8 +1138,8 @@ def _validate_freq(self): try: return to_offset(self.window) except (TypeError, ValueError): - raise ValueError("passed window {0} in not " - "compat with a datetimelike " + raise ValueError("passed window {0} is not " + "compatible with a datetimelike " "index".format(self.window)) _agg_doc = dedent(""" From 6e8122261c6c327b995cefacce759e70fec5ed1b Mon Sep 17 00:00:00 2001 From: Malgorzata Turzanska Date: Sun, 8 Oct 2017 12:15:51 -0400 Subject: [PATCH 41/76] ERR: Clarify exceptions for invalid datetimelike operations (#17772) --- pandas/core/indexes/datetimelike.py | 11 ++-- pandas/core/indexes/datetimes.py | 4 +- pandas/tests/indexes/datetimes/test_ops.py | 58 +++++++++++++++------- 3 files changed, 47 insertions(+), 26 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c3232627fce74c..d5b4525e8a1eb6 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -621,7 +621,9 @@ def _convert_scalar_indexer(self, key, kind=None): ._convert_scalar_indexer(key, kind=kind)) def _add_datelike(self, other): - raise AbstractMethodError(self) + raise TypeError("cannot add {0} and {1}" + .format(type(self).__name__, + type(other).__name__)) def _sub_datelike(self, other): raise AbstractMethodError(self) @@ -647,16 +649,13 @@ def __add__(self, other): return other._add_delta(self) raise TypeError("cannot add TimedeltaIndex and {typ}" .format(typ=type(other))) - elif isinstance(other, Index): - raise TypeError("cannot add {typ1} and {typ2}" - .format(typ1=type(self).__name__, - typ2=type(other).__name__)) elif isinstance(other, (DateOffset, timedelta, np.timedelta64, Timedelta)): return self._add_delta(other) elif is_integer(other): return self.shift(other) - elif isinstance(other, (Timestamp, datetime)): + elif isinstance(other, (Index, Timestamp, datetime, + np.datetime64)): return self._add_datelike(other) else: # pragma: no cover return NotImplemented diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 862bc51ada9d2a..25897bee298458 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -754,7 +754,9 @@ def _add_datelike(self, other): # adding a timedeltaindex to a datetimelike if other is libts.NaT: return self._nat_new(box=True) - raise TypeError("cannot add a datelike to a DatetimeIndex") + raise TypeError("cannot add {0} and {1}" + .format(type(self).__name__, + type(other).__name__)) def _sub_datelike(self, other): # subtract a datetime from myself, yielding a TimedeltaIndex diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 7cb051d351444c..424ef7fc3caf7b 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -433,31 +433,51 @@ def test_add_iadd(self): tm.assert_index_equal(rng, expected) idx = DatetimeIndex(['2011-01-01', '2011-01-02']) - msg = "cannot add a datelike to a DatetimeIndex" + msg = "cannot add DatetimeIndex and Timestamp" with tm.assert_raises_regex(TypeError, msg): idx + Timestamp('2011-01-01') with tm.assert_raises_regex(TypeError, msg): Timestamp('2011-01-01') + idx - def test_add_dti_dti(self): - # previously performed setop (deprecated in 0.16.0), now raises - # TypeError (GH14164) - - dti = date_range('20130101', periods=3) - dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') - - with pytest.raises(TypeError): - dti + dti - - with pytest.raises(TypeError): - dti_tz + dti_tz - - with pytest.raises(TypeError): - dti_tz + dti - - with pytest.raises(TypeError): - dti + dti_tz + @pytest.mark.parametrize('addend', [ + datetime(2011, 1, 1), + DatetimeIndex(['2011-01-01', '2011-01-02']), + DatetimeIndex(['2011-01-01', '2011-01-02']) + .tz_localize('US/Eastern'), + np.datetime64('2011-01-01'), + Timestamp('2011-01-01'), + ]) + def test_add_datetimelike_and_dti(self, addend): + # issue #9631 + + dti = DatetimeIndex(['2011-01-01', '2011-01-02']) + msg = 'cannot add DatetimeIndex and {0}'.format( + type(addend).__name__) + with tm.assert_raises_regex(TypeError, msg): + dti + addend + with tm.assert_raises_regex(TypeError, msg): + addend + dti + + @pytest.mark.parametrize('addend', [ + datetime(2011, 1, 1), + DatetimeIndex(['2011-01-01', '2011-01-02']), + DatetimeIndex(['2011-01-01', '2011-01-02']) + .tz_localize('US/Eastern'), + np.datetime64('2011-01-01'), + Timestamp('2011-01-01'), + ]) + def test_add_datetimelike_and_dti_tz(self, addend): + # issue #9631 + + dti_tz = DatetimeIndex(['2011-01-01', '2011-01-02']) \ + .tz_localize('US/Eastern') + msg = 'cannot add DatetimeIndex and {0}'.format( + type(addend).__name__) + with tm.assert_raises_regex(TypeError, msg): + dti_tz + addend + with tm.assert_raises_regex(TypeError, msg): + addend + dti_tz def test_difference(self): for tz in self.tz: From f9ba6fed36d79485174fd12d07afdca9ed786471 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sun, 8 Oct 2017 12:04:56 -0500 Subject: [PATCH 42/76] ERR: Raise ValueError when setting scalars in a dataframe with no index ( #16823) (#16968) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/core/frame.py | 12 ++++++++---- pandas/core/reshape/pivot.py | 3 +++ pandas/tests/frame/test_indexing.py | 5 +++++ pandas/tests/indexing/test_loc.py | 11 +++++------ pandas/tests/indexing/test_partial.py | 20 ++++++-------------- pandas/tests/reshape/test_pivot.py | 3 ++- 7 files changed, 31 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b8b06ee0fe94ea..1e9c402dac73e6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -706,6 +706,8 @@ Other API Changes - Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). - Pandas no longer registers matplotlib converters on import. The converters will be registered and used when the first plot is draw (:issue:`17710`) +- Setting on a column with a scalar value and 0-len index now raises a ``ValueError`` (:issue:`16823`) + .. _whatsnew_0210.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 142ccf1f034bc6..d907492759dbd4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2531,13 +2531,17 @@ def _ensure_valid_index(self, value): passed value """ # GH5632, make sure that we are a Series convertible - if not len(self.index) and is_list_like(value): + if not len(self.index): + if not is_list_like(value): + # GH16823, Raise an error due to loss of information + raise ValueError('If using all scalar values, you must pass' + ' an index') try: value = Series(value) except: - raise ValueError('Cannot set a frame with no defined index ' - 'and a value that cannot be converted to a ' - 'Series') + raise ValueError('Cannot set a frame with no defined' + 'index and a value that cannot be ' + 'converted to a Series') self._data = self._data.reindex_axis(value.index.copy(), axis=1, fill_value=np.nan) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d19de6030d4736..38c28af4d6ecb1 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -454,6 +454,9 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, from pandas import DataFrame df = DataFrame(data, index=common_idx) + if not len(df): + return DataFrame(index=common_idx) + if values is None: df['__dummy__'] = 0 kwargs = {'aggfunc': len, 'fill_value': 0} diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index d00f56830a6fa7..1a16e4ef48b647 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -721,6 +721,11 @@ def test_setitem_empty_frame_with_boolean(self): df[df > df2] = 47 assert_frame_equal(df, df2) + def test_setitem_scalars_no_index(self): + # GH16823 + df = DataFrame() + pytest.raises(ValueError, df.__setitem__, 'foo', 1) + def test_getitem_empty_frame_with_boolean(self): # Test for issue #11859 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c6f38aeba9e87c..bf3a840aced8c6 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -423,15 +423,14 @@ def test_loc_setitem_consistency(self): def test_loc_setitem_consistency_empty(self): # empty (essentially noops) - expected = DataFrame(columns=['x', 'y']) - expected['x'] = expected['x'].astype(np.int64) + # GH16823 df = DataFrame(columns=['x', 'y']) - df.loc[:, 'x'] = 1 - tm.assert_frame_equal(df, expected) + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + df.loc[:, 'x'] = 1 df = DataFrame(columns=['x', 'y']) - df['x'] = 1 - tm.assert_frame_equal(df, expected) + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + df['x'] = 1 def test_loc_setitem_consistency_slice_column_len(self): # .loc[:,column] setting with slice == len of the column diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 41ddfe934a131f..16f325393649ff 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -575,24 +575,16 @@ def f(): def test_partial_set_empty_frame_row(self): # GH5720, GH5744 # don't create rows when empty - expected = DataFrame(columns=['A', 'B', 'New'], - index=pd.Index([], dtype='int64')) - expected['A'] = expected['A'].astype('int64') - expected['B'] = expected['B'].astype('float64') - expected['New'] = expected['New'].astype('float64') - df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] - y['New'] = np.nan - tm.assert_frame_equal(y, expected) - # tm.assert_frame_equal(y,expected) + # GH16823 + # Setting a column with a scalar and no index should raise + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + y['New'] = np.nan - expected = DataFrame(columns=['a', 'b', 'c c', 'd']) - expected['d'] = expected['d'].astype('int64') df = DataFrame(columns=['a', 'b', 'c c']) - df['d'] = 3 - tm.assert_frame_equal(df, expected) - tm.assert_series_equal(df['c c'], Series(name='c c', dtype=object)) + with tm.assert_raises_regex(ValueError, 'If using all scalar values'): + df['d'] = 3 # reindex columns is ok df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 07d3052c167564..4126bb1de84d7a 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1226,7 +1226,8 @@ def test_crosstab_no_overlap(self): s2 = pd.Series([4, 5, 6], index=[4, 5, 6]) actual = crosstab(s1, s2) - expected = pd.DataFrame() + expected = pd.DataFrame( + index=pd.Index([], dtype='int64')).astype('int64') tm.assert_frame_equal(actual, expected) From 9f0ee53dad3bd14a3f0475a29d326c89c6b6aced Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 8 Oct 2017 17:27:59 -0700 Subject: [PATCH 43/76] MAINT: Effeciently --> Efficiently Docs typo in inference.pyx --- pandas/_libs/src/inference.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index ed883bf5db5bcc..7990fd3b1b5c91 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -226,7 +226,7 @@ cdef _try_infer_map(v): def infer_dtype(object value, bint skipna=False): """ - Effeciently infer the type of a passed val, or list-like + Efficiently infer the type of a passed val, or list-like array of values. Return a string describing the type. Parameters From f57071aca164c8f37823b1d5d0058e7ad5f36105 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Mon, 9 Oct 2017 08:09:01 -0400 Subject: [PATCH 44/76] Moved timezones and offset_types to conftest (#17825) --- pandas/tests/tseries/conftest.py | 13 ++ pandas/tests/tseries/test_offsets.py | 200 ++++++++++++--------------- 2 files changed, 105 insertions(+), 108 deletions(-) create mode 100644 pandas/tests/tseries/conftest.py diff --git a/pandas/tests/tseries/conftest.py b/pandas/tests/tseries/conftest.py new file mode 100644 index 00000000000000..25446c24b28c09 --- /dev/null +++ b/pandas/tests/tseries/conftest.py @@ -0,0 +1,13 @@ +import pytest +import pandas.tseries.offsets as offsets + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__]) +def offset_types(request): + return request.param + + +@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']) +def tz(request): + return request.param diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 543d21e162f048..c0e682c978610f 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -101,15 +101,9 @@ def test_to_m8(): class Base(object): _offset = None - _offset_types = [getattr(offsets, o) for o in offsets.__all__] - timezones = [None, 'UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific'] - @property - def offset_types(self): - return self._offset_types - def _get_offset(self, klass, value=1, normalize=False): # create instance from offset class if klass is FY5253: @@ -134,7 +128,7 @@ def _get_offset(self, klass, value=1, normalize=False): klass = klass(normalize=normalize) return klass - def test_apply_out_of_range(self): + def test_apply_out_of_range(self, tz): if self._offset is None: return @@ -153,11 +147,10 @@ def test_apply_out_of_range(self): assert result.tzinfo is None # Check tz is preserved - for tz in self.timezones: - t = Timestamp('20080101', tz=tz) - result = t + offset - assert isinstance(result, datetime) - assert t.tzinfo == result.tzinfo + t = Timestamp('20080101', tz=tz) + result = t + offset + assert isinstance(result, datetime) + assert t.tzinfo == result.tzinfo except tslib.OutOfBoundsDatetime: raise @@ -214,42 +207,39 @@ def setup_method(self, method): 'Nano': Timestamp(np_datetime64_compat( '2011-01-01T09:00:00.000000001Z'))} - def test_return_type(self): - for offset in self.offset_types: - offset = self._get_offset(offset) + def test_return_type(self, offset_types): + offset = self._get_offset(offset_types) - # make sure that we are returning a Timestamp - result = Timestamp('20080101') + offset - assert isinstance(result, Timestamp) + # make sure that we are returning a Timestamp + result = Timestamp('20080101') + offset + assert isinstance(result, Timestamp) - # make sure that we are returning NaT - assert NaT + offset is NaT - assert offset + NaT is NaT + # make sure that we are returning NaT + assert NaT + offset is NaT + assert offset + NaT is NaT - assert NaT - offset is NaT - assert (-offset).apply(NaT) is NaT + assert NaT - offset is NaT + assert (-offset).apply(NaT) is NaT - def test_offset_n(self): - for offset_klass in self.offset_types: - offset = self._get_offset(offset_klass) - assert offset.n == 1 + def test_offset_n(self, offset_types): + offset = self._get_offset(offset_types) + assert offset.n == 1 - neg_offset = offset * -1 - assert neg_offset.n == -1 + neg_offset = offset * -1 + assert neg_offset.n == -1 - mul_offset = offset * 3 - assert mul_offset.n == 3 + mul_offset = offset * 3 + assert mul_offset.n == 3 - def test_offset_freqstr(self): - for offset_klass in self.offset_types: - offset = self._get_offset(offset_klass) + def test_offset_freqstr(self, offset_types): + offset = self._get_offset(offset_types) - freqstr = offset.freqstr - if freqstr not in ('', - "", - 'LWOM-SAT', ): - code = get_offset(freqstr) - assert offset.rule_code == code + freqstr = offset.freqstr + if freqstr not in ('', + "", + 'LWOM-SAT', ): + code = get_offset(freqstr) + assert offset.rule_code == code def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): @@ -319,20 +309,19 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, else: assert result == expected_localize - def test_apply(self): + def test_apply(self, offset_types): sdt = datetime(2011, 1, 1, 9, 0) ndt = np_datetime64_compat('2011-01-01 09:00Z') - for offset in self.offset_types: - for dt in [sdt, ndt]: - expected = self.expecteds[offset.__name__] - self._check_offsetfunc_works(offset, 'apply', dt, expected) + for dt in [sdt, ndt]: + expected = self.expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'apply', dt, expected) - expected = Timestamp(expected.date()) - self._check_offsetfunc_works(offset, 'apply', dt, expected, - normalize=True) + expected = Timestamp(expected.date()) + self._check_offsetfunc_works(offset_types, 'apply', dt, expected, + normalize=True) - def test_rollforward(self): + def test_rollforward(self, offset_types): expecteds = self.expecteds.copy() # result will not be changed if the target is on the offset @@ -366,16 +355,15 @@ def test_rollforward(self): sdt = datetime(2011, 1, 1, 9, 0) ndt = np_datetime64_compat('2011-01-01 09:00Z') - for offset in self.offset_types: - for dt in [sdt, ndt]: - expected = expecteds[offset.__name__] - self._check_offsetfunc_works(offset, 'rollforward', dt, - expected) - expected = norm_expected[offset.__name__] - self._check_offsetfunc_works(offset, 'rollforward', dt, - expected, normalize=True) + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollforward', dt, + expected) + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollforward', dt, + expected, normalize=True) - def test_rollback(self): + def test_rollback(self, offset_types): expecteds = {'BusinessDay': Timestamp('2010-12-31 09:00:00'), 'CustomBusinessDay': Timestamp('2010-12-31 09:00:00'), 'CustomBusinessMonthEnd': @@ -428,66 +416,62 @@ def test_rollback(self): sdt = datetime(2011, 1, 1, 9, 0) ndt = np_datetime64_compat('2011-01-01 09:00Z') - for offset in self.offset_types: - for dt in [sdt, ndt]: - expected = expecteds[offset.__name__] - self._check_offsetfunc_works(offset, 'rollback', dt, expected) + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollback', dt, + expected) - expected = norm_expected[offset.__name__] - self._check_offsetfunc_works(offset, 'rollback', dt, expected, - normalize=True) + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works(offset_types, 'rollback', dt, + expected, normalize=True) - def test_onOffset(self): - for offset in self.offset_types: - dt = self.expecteds[offset.__name__] - offset_s = self._get_offset(offset) - assert offset_s.onOffset(dt) - - # when normalize=True, onOffset checks time is 00:00:00 - offset_n = self._get_offset(offset, normalize=True) - assert not offset_n.onOffset(dt) - - if offset in (BusinessHour, CustomBusinessHour): - # In default BusinessHour (9:00-17:00), normalized time - # cannot be in business hour range - continue - date = datetime(dt.year, dt.month, dt.day) - assert offset_n.onOffset(date) + def test_onOffset(self, offset_types): + dt = self.expecteds[offset_types.__name__] + offset_s = self._get_offset(offset_types) + assert offset_s.onOffset(dt) + + # when normalize=True, onOffset checks time is 00:00:00 + offset_n = self._get_offset(offset_types, normalize=True) + assert not offset_n.onOffset(dt) - def test_add(self): + if offset_types in (BusinessHour, CustomBusinessHour): + # In default BusinessHour (9:00-17:00), normalized time + # cannot be in business hour range + return + date = datetime(dt.year, dt.month, dt.day) + assert offset_n.onOffset(date) + + def test_add(self, offset_types, tz): dt = datetime(2011, 1, 1, 9, 0) - for offset in self.offset_types: - offset_s = self._get_offset(offset) - expected = self.expecteds[offset.__name__] + offset_s = self._get_offset(offset_types) + expected = self.expecteds[offset_types.__name__] - result_dt = dt + offset_s - result_ts = Timestamp(dt) + offset_s - for result in [result_dt, result_ts]: - assert isinstance(result, Timestamp) - assert result == expected + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected - for tz in self.timezones: - expected_localize = expected.tz_localize(tz) - result = Timestamp(dt, tz=tz) + offset_s - assert isinstance(result, Timestamp) - assert result == expected_localize + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize - # normalize=True - offset_s = self._get_offset(offset, normalize=True) - expected = Timestamp(expected.date()) + # normalize=True + offset_s = self._get_offset(offset_types, normalize=True) + expected = Timestamp(expected.date()) - result_dt = dt + offset_s - result_ts = Timestamp(dt) + offset_s - for result in [result_dt, result_ts]: - assert isinstance(result, Timestamp) - assert result == expected + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected - for tz in self.timezones: - expected_localize = expected.tz_localize(tz) - result = Timestamp(dt, tz=tz) + offset_s - assert isinstance(result, Timestamp) - assert result == expected_localize + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize def test_pickle_v0_15_2(self): offsets = {'DateOffset': DateOffset(years=1), From 2ebe085e665d3e844d47a4489fdc454dfcdfe0e3 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 9 Oct 2017 05:17:04 -0700 Subject: [PATCH 45/76] ENH: Add Index.to_frame method (#17815) Closes gh-15230. --- doc/source/api.rst | 3 +++ doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 23 ++++++++++++++++++ pandas/core/indexes/datetimes.py | 40 ++++++++++++++++++++++++++++++++ pandas/core/indexes/multi.py | 6 ++--- pandas/tests/indexes/common.py | 15 ++++++++++++ 6 files changed, 85 insertions(+), 3 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index d98a18e6f7e363..646a28686bb063 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1376,6 +1376,7 @@ Conversion Index.tolist Index.to_datetime Index.to_series + Index.to_frame Sorting ~~~~~~~ @@ -1591,6 +1592,7 @@ Conversion DatetimeIndex.to_perioddelta DatetimeIndex.to_pydatetime DatetimeIndex.to_series + DatetimeIndex.to_frame TimedeltaIndex -------------- @@ -1623,6 +1625,7 @@ Conversion TimedeltaIndex.round TimedeltaIndex.floor TimedeltaIndex.ceil + TimedeltaIndex.to_frame .. currentmodule:: pandas diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1e9c402dac73e6..595fab9e18ea4f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -31,6 +31,7 @@ New features - Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to support type inference in the presence of missing values (:issue:`17059`). - :class:`~pandas.Resampler.nearest` is added to support nearest-neighbor upsampling (:issue:`17496`). +- :class:`~pandas.Index` has added support for a ``to_frame`` method (:issue:`15230`) .. _whatsnew_0210.enhancements.infer_objects: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0a55559750d7c8..df0e963e7628d6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1005,6 +1005,29 @@ def to_series(self, **kwargs): index=self._shallow_copy(), name=self.name) + def to_frame(self, index=True): + """ + Create a DataFrame with a column containing the Index. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + index : boolean, default True + Set the index of the returned DataFrame as the original Index. + + Returns + ------- + DataFrame : a DataFrame containing the original Index data. + """ + + from pandas import DataFrame + result = DataFrame(self._shallow_copy(), columns=[self.name or 0]) + + if index: + result.index = self + return result + def _to_embed(self, keep_tz=False): """ *this is an internal non-public method* diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 25897bee298458..dae62176722e1e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -915,6 +915,46 @@ def to_series(self, keep_tz=False): index=self._shallow_copy(), name=self.name) + def to_frame(self, index=True, keep_tz=False): + """ + Create a DataFrame with a column containing the DatetimeIndex. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + index : boolean, default True + Set the index of the returned DataFrame + as the original DatetimeIndex. + + keep_tz : optional, defaults False. + return the data keeping the timezone. + + If keep_tz is True: + + If the timezone is not set, the resulting + Series will have a datetime64[ns] dtype. + + Otherwise the DataFrame will have an datetime64[ns, tz] dtype; + the tz will be preserved. + + If keep_tz is False: + + DataFrame will have a datetime64[ns] dtype. TZ aware + objects will have the tz removed. + + Returns + ------- + DataFrame : a DataFrame containing the original DatetimeIndex data. + """ + + from pandas import DataFrame + result = DataFrame(self._to_embed(keep_tz), columns=[self.name or 0]) + + if index: + result.index = self + return result + def _to_embed(self, keep_tz=False): """ return an array repr of this object, potentially casting to object diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4b6e31133ba4b0..06b208b4d174e5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1010,18 +1010,18 @@ def _to_safe_for_reshape(self): def to_frame(self, index=True): """ - Create a DataFrame with the columns the levels of the MultiIndex + Create a DataFrame with the levels of the MultiIndex as columns. .. versionadded:: 0.20.0 Parameters ---------- index : boolean, default True - return this MultiIndex as the index + Set the index of the returned DataFrame as the original MultiIndex. Returns ------- - DataFrame + DataFrame : a DataFrame containing the original MultiIndex data. """ from pandas import DataFrame diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 970dd7b63225ab..456e5a9bd6439d 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -51,6 +51,21 @@ def test_to_series(self): assert s.index is not idx assert s.name == idx.name + def test_to_frame(self): + # see gh-15230 + idx = self.create_index() + name = idx.name or 0 + + df = idx.to_frame() + + assert df.index is idx + assert len(df.columns) == 1 + assert df.columns[0] == name + assert df[name].values is not idx.values + + df = idx.to_frame(index=False) + assert df.index is not idx + def test_shift(self): # GH8083 test the base class for shift From 35590c67f11b8be4d06782c081943ca3b521108c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 9 Oct 2017 12:17:18 -0700 Subject: [PATCH 46/76] DEPR: Deprecate tupleize_cols in read_csv (#17820) xref gh-17060. --- doc/source/io.rst | 4 ++ doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/parsers.py | 10 +++- pandas/tests/frame/test_to_csv.py | 15 ++++-- pandas/tests/io/parser/header.py | 19 +++----- pandas/tests/io/parser/python_parser_only.py | 4 +- pandas/tests/io/parser/test_unsupported.py | 51 +++++++++----------- 7 files changed, 56 insertions(+), 48 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 0aa4ea72e3b139..08d00138b7cd8e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -343,6 +343,10 @@ dialect : str or :class:`python:csv.Dialect` instance, default ``None`` override values, a ParserWarning will be issued. See :class:`python:csv.Dialect` documentation for more details. tupleize_cols : boolean, default ``False`` + .. deprecated:: 0.21.0 + + This argument will be removed and will always convert to MultiIndex + Leave a list of tuples on columns as is (default is to convert to a MultiIndex on the columns). diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 595fab9e18ea4f..f86847d8b82749 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -717,6 +717,7 @@ Deprecations - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). - :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) +- :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`) - The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index c8b2987d591efb..4b6c358ea7dcd5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -260,8 +260,11 @@ override values, a ParserWarning will be issued. See csv.Dialect documentation for more details. tupleize_cols : boolean, default False + .. deprecated:: 0.21.0 + This argument will be removed and will always convert to MultiIndex + Leave a list of tuples on columns as is (default is to convert to - a Multi Index on the columns) + a MultiIndex on the columns) error_bad_lines : boolean, default True Lines with too many fields (e.g. a csv line with too many commas) will by default cause an exception to be raised, and no DataFrame will be returned. @@ -510,6 +513,7 @@ def _read(filepath_or_buffer, kwds): 'buffer_lines': None, 'error_bad_lines': True, 'warn_bad_lines': True, + 'tupleize_cols': False, 'float_precision': None } @@ -529,6 +533,7 @@ def _read(filepath_or_buffer, kwds): 'buffer_lines', 'compact_ints', 'use_unsigned', + 'tupleize_cols', } @@ -962,6 +967,9 @@ def _clean_options(self, options, engine): if arg == 'as_recarray': msg += ' Please call pd.to_csv(...).to_records() instead.' + elif arg == 'tupleize_cols': + msg += (' Column tuples will then ' + 'always be converted to MultiIndex') if result.get(arg, parser_default) != parser_default: depr_warning += msg + '\n\n' diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 6a4b1686a31e25..a61a1571812530 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -555,8 +555,12 @@ def _make_frame(names=None): # tupleize_cols=True and index=False df = _make_frame(True) df.to_csv(path, tupleize_cols=True, index=False) - result = read_csv( - path, header=0, tupleize_cols=True, index_col=None) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = read_csv(path, header=0, + tupleize_cols=True, + index_col=None) result.columns = df.columns assert_frame_equal(df, result) @@ -576,8 +580,11 @@ def _make_frame(names=None): # column & index are multi-index (compatibility) df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv(path, tupleize_cols=True) - result = read_csv(path, header=0, index_col=[ - 0, 1], tupleize_cols=True) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = read_csv(path, header=0, index_col=[0, 1], + tupleize_cols=True) result.columns = df.columns assert_frame_equal(df, result) diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 50ae4dae541ace..ff3beb70b774f5 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -105,13 +105,13 @@ def test_header_multi_index(self): R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 """ - df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1]) tm.assert_frame_equal(df, expected) # skipping lines in the header - df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df = self.read_csv(StringIO(data), header=[0, 1, 2, 3], + index_col=[0, 1]) tm.assert_frame_equal(df, expected) # INVALID OPTIONS @@ -121,25 +121,22 @@ def test_header_multi_index(self): FutureWarning, check_stacklevel=False): pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], as_recarray=True, - tupleize_cols=False) + index_col=[0, 1], as_recarray=True) # names pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], names=['foo', 'bar'], - tupleize_cols=False) + index_col=[0, 1], names=['foo', 'bar']) # usecols pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=[0, 1], usecols=['foo', 'bar'], - tupleize_cols=False) + index_col=[0, 1], usecols=['foo', 'bar']) # non-numeric index_col pytest.raises(ValueError, self.read_csv, StringIO(data), header=[0, 1, 2, 3], - index_col=['foo', 'bar'], tupleize_cols=False) + index_col=['foo', 'bar']) def test_header_multiindex_common_format(self): diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index c3dc91b3f188c4..267b589ee91f47 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -232,9 +232,7 @@ def test_none_delimiter(self): result = self.read_csv(StringIO(data), header=0, sep=None, error_bad_lines=False, - warn_bad_lines=True, - engine='python', - tupleize_cols=True) + warn_bad_lines=True) tm.assert_frame_equal(result, expected) def test_skipfooter_bad_row(self): diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 5d248f2fef59cc..2e73ce6aa19b0b 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -127,32 +127,25 @@ def read(self): class TestDeprecatedFeatures(object): - def test_deprecated_args(self): - data = '1,2,3' - - # deprecated arguments with non-default values - deprecated = { - 'as_recarray': True, - 'buffer_lines': True, - 'compact_ints': True, - 'use_unsigned': True, - 'skip_footer': 1, - } - - engines = 'c', 'python' - - for engine in engines: - for arg, non_default_val in deprecated.items(): - if engine == 'c' and arg == 'skip_footer': - # unsupported --> exception is raised - continue - - if engine == 'python' and arg == 'buffer_lines': - # unsupported --> exception is raised - continue - - with tm.assert_produces_warning( - FutureWarning, check_stacklevel=False): - kwargs = {arg: non_default_val} - read_csv(StringIO(data), engine=engine, - **kwargs) + @pytest.mark.parametrize("engine", ["c", "python"]) + @pytest.mark.parametrize("kwargs", [{"as_recarray": True}, + {"buffer_lines": True}, + {"compact_ints": True}, + {"use_unsigned": True}, + {"tupleize_cols": True}, + {"skip_footer": 1}]) + def test_deprecated_args(self, engine, kwargs): + data = "1,2,3" + arg, _ = list(kwargs.items())[0] + + if engine == "c" and arg == "skip_footer": + # unsupported --> exception is raised + return + + if engine == "python" and arg == "buffer_lines": + # unsupported --> exception is raised + return + + with tm.assert_produces_warning( + FutureWarning, check_stacklevel=False): + read_csv(StringIO(data), engine=engine, **kwargs) From 3ba2cffff4c851460d3a2dbe893955dc9d9d0640 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 9 Oct 2017 15:57:35 -0700 Subject: [PATCH 47/76] BUG: Coerce to numeric despite uint64 conflict (#17823) Closes gh-17007. Closes gh-17125. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/src/inference.pyx | 27 +++++----- pandas/tests/dtypes/test_inference.py | 75 +++++++++++++-------------- pandas/tests/tools/test_numeric.py | 25 +++++++++ 4 files changed, 75 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f86847d8b82749..ed3be718522993 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -829,6 +829,7 @@ Conversion - Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`) - Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`) - Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`). +- Bug in :meth:`to_numeric` in which elements were not always being coerced to numeric when ``errors='coerce'`` (:issue:`17007`, :issue:`17125`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 7990fd3b1b5c91..b0a64e1ccc225e 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -165,20 +165,8 @@ cdef class Seen(object): two conflict cases was also detected. However, we are trying to force conversion to a numeric dtype. """ - if self.uint_ and (self.null_ or self.sint_): - if not self.coerce_numeric: - return True - - if self.null_: - msg = ("uint64 array detected, and such an " - "array cannot contain NaN.") - else: # self.sint_ = 1 - msg = ("uint64 and negative values detected. " - "Cannot safely return a numeric array " - "without truncating data.") - - raise ValueError(msg) - return False + return (self.uint_ and (self.null_ or self.sint_) + and not self.coerce_numeric) cdef inline saw_null(self): """ @@ -1103,10 +1091,17 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, seen.saw_int(val) if val >= 0: - uints[i] = val + if val <= oUINT64_MAX: + uints[i] = val + else: + seen.float_ = True if val <= oINT64_MAX: ints[i] = val + + if seen.sint_ and seen.uint_: + seen.float_ = True + elif util.is_bool_object(val): floats[i] = uints[i] = ints[i] = bools[i] = val seen.bool_ = True @@ -1154,6 +1149,8 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, uints[i] = as_int if as_int <= oINT64_MAX: ints[i] = as_int + + seen.float_ = seen.float_ or (seen.uint_ and seen.sint_) else: seen.float_ = True except (TypeError, ValueError) as e: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 857f7a283aa951..70273f9e999cf5 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -39,6 +39,11 @@ from pandas.util import testing as tm +@pytest.fixture(params=[True, False], ids=lambda val: str(val)) +def coerce(request): + return request.param + + def test_is_sequence(): is_seq = inference.is_sequence assert (is_seq((1, 2))) @@ -340,44 +345,38 @@ def test_convert_numeric_uint64(self): exp = np.array([2**63], dtype=np.uint64) tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) - def test_convert_numeric_uint64_nan(self): - msg = 'uint64 array detected' - cases = [(np.array([2**63, np.nan], dtype=object), set()), - (np.array([str(2**63), np.nan], dtype=object), set()), - (np.array([np.nan, 2**63], dtype=object), set()), - (np.array([np.nan, str(2**63)], dtype=object), set()), - (np.array([2**63, 2**63 + 1], dtype=object), set([2**63])), - (np.array([str(2**63), str(2**63 + 1)], - dtype=object), set([2**63]))] - - for coerce in (True, False): - for arr, na_values in cases: - if coerce: - with tm.assert_raises_regex(ValueError, msg): - lib.maybe_convert_numeric(arr, na_values, - coerce_numeric=coerce) - else: - tm.assert_numpy_array_equal(lib.maybe_convert_numeric( - arr, na_values), arr) - - def test_convert_numeric_int64_uint64(self): - msg = 'uint64 and negative values detected' - cases = [np.array([2**63, -1], dtype=object), - np.array([str(2**63), -1], dtype=object), - np.array([str(2**63), str(-1)], dtype=object), - np.array([-1, 2**63], dtype=object), - np.array([-1, str(2**63)], dtype=object), - np.array([str(-1), str(2**63)], dtype=object)] - - for coerce in (True, False): - for case in cases: - if coerce: - with tm.assert_raises_regex(ValueError, msg): - lib.maybe_convert_numeric(case, set(), - coerce_numeric=coerce) - else: - tm.assert_numpy_array_equal(lib.maybe_convert_numeric( - case, set()), case) + @pytest.mark.parametrize("arr", [ + np.array([2**63, np.nan], dtype=object), + np.array([str(2**63), np.nan], dtype=object), + np.array([np.nan, 2**63], dtype=object), + np.array([np.nan, str(2**63)], dtype=object)]) + def test_convert_numeric_uint64_nan(self, coerce, arr): + expected = arr.astype(float) if coerce else arr.copy() + result = lib.maybe_convert_numeric(arr, set(), + coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + def test_convert_numeric_uint64_nan_values(self, coerce): + arr = np.array([2**63, 2**63 + 1], dtype=object) + na_values = set([2**63]) + + expected = (np.array([np.nan, 2**63 + 1], dtype=float) + if coerce else arr.copy()) + result = lib.maybe_convert_numeric(arr, na_values, + coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("case", [ + np.array([2**63, -1], dtype=object), + np.array([str(2**63), -1], dtype=object), + np.array([str(2**63), str(-1)], dtype=object), + np.array([-1, 2**63], dtype=object), + np.array([-1, str(2**63)], dtype=object), + np.array([str(-1), str(2**63)], dtype=object)]) + def test_convert_numeric_int64_uint64(self, case, coerce): + expected = case.astype(float) if coerce else case.copy() + result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) def test_maybe_convert_objects_uint64(self): # see gh-4471 diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py index 1d13ba93ba7592..b306dba0be7f1a 100644 --- a/pandas/tests/tools/test_numeric.py +++ b/pandas/tests/tools/test_numeric.py @@ -381,3 +381,28 @@ def test_downcast_limits(self): for dtype, downcast, min_max in dtype_downcast_min_max: series = pd.to_numeric(pd.Series(min_max), downcast=downcast) assert series.dtype == dtype + + def test_coerce_uint64_conflict(self): + # see gh-17007 and gh-17125 + # + # Still returns float despite the uint64-nan conflict, + # which would normally force the casting to object. + df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]}) + expected = pd.Series([200, 300, np.nan, np.nan, + 30000000000000000000], dtype=float, name="a") + result = to_numeric(df["a"], errors="coerce") + tm.assert_series_equal(result, expected) + + s = pd.Series(["12345678901234567890", "1234567890", "ITEM"]) + expected = pd.Series([12345678901234567890, + 1234567890, np.nan], dtype=float) + result = to_numeric(s, errors="coerce") + tm.assert_series_equal(result, expected) + + # For completeness, check against "ignore" and "raise" + result = to_numeric(s, errors="ignore") + tm.assert_series_equal(result, s) + + msg = "Unable to parse string" + with tm.assert_raises_regex(ValueError, msg): + to_numeric(s, errors="raise") From 0f548d485b8c5baf93f479333607bda7155a6154 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 9 Oct 2017 21:49:20 -0700 Subject: [PATCH 48/76] DEPR: Deprecate from_csv in favor of read_csv (#17812) Closes gh-4191. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 11 +++- pandas/core/series.py | 7 ++- pandas/tests/frame/test_to_csv.py | 83 ++++++++++++++++++++----------- pandas/tests/series/test_io.py | 75 +++++++++++++++++++--------- 5 files changed, 120 insertions(+), 57 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ed3be718522993..2eefc7ec1b6368 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -715,6 +715,7 @@ Other API Changes Deprecations ~~~~~~~~~~~~ +- :meth:`DataFrame.from_csv` and :meth:`Series.from_csv` have been deprecated in favor of :func:`read_csv()` (:issue:`4191`) - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). - :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) - :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d907492759dbd4..c536cc9f2b82c5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -298,7 +298,7 @@ def _constructor(self): _constructor_sliced = Series _deprecations = NDFrame._deprecations | frozenset( - ['sortlevel', 'get_value', 'set_value']) + ['sortlevel', 'get_value', 'set_value', 'from_csv']) @property def _constructor_expanddim(self): @@ -1291,7 +1291,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False): """ - Read CSV file (DISCOURAGED, please use :func:`pandas.read_csv` + Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` instead). It is preferable to use the more powerful :func:`pandas.read_csv` @@ -1339,6 +1339,13 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, y : DataFrame """ + + warnings.warn("from_csv is deprecated. Please use read_csv(...) " + "instead. Note that some of the default arguments are " + "different, so please refer to the documentation " + "for from_csv when changing your function calls", + FutureWarning, stacklevel=2) + from pandas.io.parsers import read_table return read_table(path, header=header, sep=sep, parse_dates=parse_dates, index_col=index_col, diff --git a/pandas/core/series.py b/pandas/core/series.py index 49b6a6651367b8..be4066f0c39b93 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -147,7 +147,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _metadata = ['name'] _accessors = frozenset(['dt', 'cat', 'str']) _deprecations = generic.NDFrame._deprecations | frozenset( - ['sortlevel', 'reshape', 'get_value', 'set_value']) + ['sortlevel', 'reshape', 'get_value', 'set_value', 'from_csv']) _allow_index_ops = True def __init__(self, data=None, index=None, dtype=None, name=None, @@ -2688,7 +2688,7 @@ def between(self, left, right, inclusive=True): def from_csv(cls, path, sep=',', parse_dates=True, header=None, index_col=0, encoding=None, infer_datetime_format=False): """ - Read CSV file (DISCOURAGED, please use :func:`pandas.read_csv` + Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` instead). It is preferable to use the more powerful :func:`pandas.read_csv` @@ -2736,6 +2736,9 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, ------- y : Series """ + + # We're calling `DataFrame.from_csv` in the implementation, + # which will propagate a warning regarding `from_csv` deprecation. from pandas.core.frame import DataFrame df = DataFrame.from_csv(path, header=header, index_col=index_col, sep=sep, parse_dates=parse_dates, diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index a61a1571812530..ab34ce877a7266 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -31,6 +31,21 @@ class TestDataFrameToCSV(TestData): + def read_csv(self, path, **kwargs): + params = dict(index_col=0, parse_dates=True) + params.update(**kwargs) + + return pd.read_csv(path, **params) + + def test_from_csv_deprecation(self): + # see gh-17812 + with ensure_clean('__tmp_from_csv_deprecation__') as path: + self.tsframe.to_csv(path) + + with tm.assert_produces_warning(FutureWarning): + depr_recons = DataFrame.from_csv(path) + assert_frame_equal(self.tsframe, depr_recons) + def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: @@ -43,24 +58,25 @@ def test_to_csv_from_csv1(self): # test roundtrip self.tsframe.to_csv(path) - recons = DataFrame.from_csv(path) - + recons = self.read_csv(path) assert_frame_equal(self.tsframe, recons) self.tsframe.to_csv(path, index_label='index') - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) + assert(len(recons.columns) == len(self.tsframe.columns) + 1) # no index self.tsframe.to_csv(path, index=False) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert_almost_equal(self.tsframe.values, recons.values) # corner case dm = DataFrame({'s1': Series(lrange(3), lrange(3)), 's2': Series(lrange(2), lrange(2))}) dm.to_csv(path) - recons = DataFrame.from_csv(path) + + recons = self.read_csv(path) assert_frame_equal(dm, recons) def test_to_csv_from_csv2(self): @@ -71,27 +87,26 @@ def test_to_csv_from_csv2(self): df = DataFrame(np.random.randn(3, 3), index=['a', 'a', 'b'], columns=['x', 'y', 'z']) df.to_csv(path) - result = DataFrame.from_csv(path) + result = self.read_csv(path) assert_frame_equal(result, df) midx = MultiIndex.from_tuples( [('A', 1, 2), ('A', 1, 2), ('B', 1, 2)]) df = DataFrame(np.random.randn(3, 3), index=midx, columns=['x', 'y', 'z']) + df.to_csv(path) - result = DataFrame.from_csv(path, index_col=[0, 1, 2], - parse_dates=False) - # TODO from_csv names index ['Unnamed: 1', 'Unnamed: 2'] should it - # ? + result = self.read_csv(path, index_col=[0, 1, 2], + parse_dates=False) assert_frame_equal(result, df, check_names=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_csv(path, header=col_aliases) - rs = DataFrame.from_csv(path) + + rs = self.read_csv(path) xp = self.frame2.copy() xp.columns = col_aliases - assert_frame_equal(xp, rs) pytest.raises(ValueError, self.frame2.to_csv, path, @@ -231,8 +246,9 @@ def make_dtnat_arr(n, nnat=None): with ensure_clean('1.csv') as pth: df = DataFrame(dict(a=s1, b=s2)) df.to_csv(pth, chunksize=chunksize) - recons = DataFrame.from_csv(pth)._convert(datetime=True, - coerce=True) + + recons = self.read_csv(pth)._convert(datetime=True, + coerce=True) assert_frame_equal(df, recons, check_names=False, check_less_precise=True) @@ -247,16 +263,17 @@ def _do_test(df, r_dtype=None, c_dtype=None, if rnlvl is not None: kwargs['index_col'] = lrange(rnlvl) kwargs['header'] = lrange(cnlvl) + with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', chunksize=chunksize, tupleize_cols=False) - recons = DataFrame.from_csv( - path, tupleize_cols=False, **kwargs) + recons = self.read_csv(path, tupleize_cols=False, **kwargs) else: kwargs['header'] = 0 + with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', chunksize=chunksize) - recons = DataFrame.from_csv(path, **kwargs) + recons = self.read_csv(path, **kwargs) def _to_uni(x): if not isinstance(x, compat.text_type): @@ -398,7 +415,7 @@ def test_to_csv_from_csv_w_some_infs(self): with ensure_clean() as path: self.frame.to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) # TODO to_csv drops column name assert_frame_equal(self.frame, recons, check_names=False) @@ -413,7 +430,7 @@ def test_to_csv_from_csv_w_all_infs(self): with ensure_clean() as path: self.frame.to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) # TODO to_csv drops column name assert_frame_equal(self.frame, recons, check_names=False) @@ -448,11 +465,13 @@ def test_to_csv_headers(self): to_df = DataFrame([[1, 2], [3, 4]], columns=['X', 'Y']) with ensure_clean('__tmp_to_csv_headers__') as path: from_df.to_csv(path, header=['X', 'Y']) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + assert_frame_equal(to_df, recons) from_df.to_csv(path, index=False, header=['X', 'Y']) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + recons.reset_index(inplace=True) assert_frame_equal(to_df, recons) @@ -471,13 +490,15 @@ def test_to_csv_multiindex(self): # round trip frame.to_csv(path) - df = DataFrame.from_csv(path, index_col=[0, 1], parse_dates=False) + + df = self.read_csv(path, index_col=[0, 1], + parse_dates=False) # TODO to_csv drops column name assert_frame_equal(frame, df, check_names=False) assert frame.index.names == df.index.names - # needed if setUP becomes a classmethod + # needed if setUp becomes a class method self.frame.index = old_index # try multiindex with dates @@ -487,21 +508,22 @@ def test_to_csv_multiindex(self): tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_csv(path, index_label=['time', 'foo']) - recons = DataFrame.from_csv(path, index_col=[0, 1]) + recons = self.read_csv(path, index_col=[0, 1]) + # TODO to_csv drops column name assert_frame_equal(tsframe, recons, check_names=False) # do not load index tsframe.to_csv(path) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert len(recons.columns) == len(tsframe.columns) + 2 # no index tsframe.to_csv(path, index=False) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert_almost_equal(recons.values, self.tsframe.values) - # needed if setUP becomes classmethod + # needed if setUp becomes class method self.tsframe.index = old_index with ensure_clean('__tmp_to_csv_multiindex__') as path: @@ -606,7 +628,8 @@ def _make_frame(names=None): with ensure_clean('__tmp_to_csv_multiindex__') as path: # empty tsframe[:0].to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + exp = tsframe[:0] exp.index = [] @@ -631,7 +654,7 @@ def test_to_csv_withcommas(self): with ensure_clean('__tmp_to_csv_withcommas__.csv') as path: df.to_csv(path) - df2 = DataFrame.from_csv(path) + df2 = self.read_csv(path) assert_frame_equal(df2, df) def test_to_csv_mixed(self): @@ -746,7 +769,7 @@ def test_to_csv_wide_frame_formatting(self): def test_to_csv_bug(self): f1 = StringIO('a,1.0\nb,2.0') - df = DataFrame.from_csv(f1, header=None) + df = self.read_csv(f1, header=None) newdf = DataFrame({'t': df[df.columns[0]]}) with ensure_clean() as path: diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 5b7fd1ec94a90b..ad51261a47c5c3 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -20,43 +20,73 @@ class TestSeriesToCSV(TestData): + def read_csv(self, path, **kwargs): + params = dict(squeeze=True, index_col=0, + header=None, parse_dates=True) + params.update(**kwargs) + + header = params.get("header") + out = pd.read_csv(path, **params) + + if header is None: + out.name = out.index.name = None + + return out + + def test_from_csv_deprecation(self): + # see gh-17812 + with ensure_clean() as path: + self.ts.to_csv(path) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = self.read_csv(path) + depr_ts = Series.from_csv(path) + assert_series_equal(depr_ts, ts) + def test_from_csv(self): with ensure_clean() as path: self.ts.to_csv(path) - ts = Series.from_csv(path) + ts = self.read_csv(path) assert_series_equal(self.ts, ts, check_names=False) + assert ts.name is None assert ts.index.name is None - # GH10483 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + depr_ts = Series.from_csv(path) + assert_series_equal(depr_ts, ts) + + # see gh-10483 self.ts.to_csv(path, header=True) - ts_h = Series.from_csv(path, header=0) - assert ts_h.name == 'ts' + ts_h = self.read_csv(path, header=0) + assert ts_h.name == "ts" self.series.to_csv(path) - series = Series.from_csv(path) - assert series.name is None - assert series.index.name is None + series = self.read_csv(path) assert_series_equal(self.series, series, check_names=False) + assert series.name is None assert series.index.name is None self.series.to_csv(path, header=True) - series_h = Series.from_csv(path, header=0) - assert series_h.name == 'series' + series_h = self.read_csv(path, header=0) + assert series_h.name == "series" - outfile = open(path, 'w') - outfile.write('1998-01-01|1.0\n1999-01-01|2.0') + outfile = open(path, "w") + outfile.write("1998-01-01|1.0\n1999-01-01|2.0") outfile.close() - series = Series.from_csv(path, sep='|') - checkseries = Series({datetime(1998, 1, 1): 1.0, - datetime(1999, 1, 1): 2.0}) - assert_series_equal(checkseries, series) - series = Series.from_csv(path, sep='|', parse_dates=False) - checkseries = Series({'1998-01-01': 1.0, '1999-01-01': 2.0}) - assert_series_equal(checkseries, series) + series = self.read_csv(path, sep="|") + check_series = Series({datetime(1998, 1, 1): 1.0, + datetime(1999, 1, 1): 2.0}) + assert_series_equal(check_series, series) + + series = self.read_csv(path, sep="|", parse_dates=False) + check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0}) + assert_series_equal(check_series, series) def test_to_csv(self): import io @@ -76,20 +106,19 @@ def test_to_csv_unicode_index(self): buf = StringIO() s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) - s.to_csv(buf, encoding='UTF-8') + s.to_csv(buf, encoding="UTF-8") buf.seek(0) - s2 = Series.from_csv(buf, index_col=0, encoding='UTF-8') - + s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") assert_series_equal(s, s2) def test_to_csv_float_format(self): with ensure_clean() as filename: ser = Series([0.123456, 0.234567, 0.567567]) - ser.to_csv(filename, float_format='%.2f') + ser.to_csv(filename, float_format="%.2f") - rs = Series.from_csv(filename) + rs = self.read_csv(filename) xp = Series([0.12, 0.23, 0.57]) assert_series_equal(rs, xp) From 390f36e0068aa3e77f20b7c7f37adc446ac408c2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 10 Oct 2017 09:34:36 +0200 Subject: [PATCH 49/76] Remove keep_tz kwarg from DatetimeIndex.to_frame (#17826) --- pandas/core/indexes/datetimes.py | 40 -------------------------------- 1 file changed, 40 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index dae62176722e1e..25897bee298458 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -915,46 +915,6 @@ def to_series(self, keep_tz=False): index=self._shallow_copy(), name=self.name) - def to_frame(self, index=True, keep_tz=False): - """ - Create a DataFrame with a column containing the DatetimeIndex. - - .. versionadded:: 0.21.0 - - Parameters - ---------- - index : boolean, default True - Set the index of the returned DataFrame - as the original DatetimeIndex. - - keep_tz : optional, defaults False. - return the data keeping the timezone. - - If keep_tz is True: - - If the timezone is not set, the resulting - Series will have a datetime64[ns] dtype. - - Otherwise the DataFrame will have an datetime64[ns, tz] dtype; - the tz will be preserved. - - If keep_tz is False: - - DataFrame will have a datetime64[ns] dtype. TZ aware - objects will have the tz removed. - - Returns - ------- - DataFrame : a DataFrame containing the original DatetimeIndex data. - """ - - from pandas import DataFrame - result = DataFrame(self._to_embed(keep_tz), columns=[self.name or 0]) - - if index: - result.index = self - return result - def _to_embed(self, keep_tz=False): """ return an array repr of this object, potentially casting to object From 6ff84345e811bfd8e111d32e76ecbb95d7c553af Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 10 Oct 2017 09:14:53 -0400 Subject: [PATCH 50/76] TST: cleanup warnings on mpl 2.1 (#17835) --- pandas/plotting/_compat.py | 8 ++++++++ pandas/plotting/_core.py | 2 +- pandas/tests/plotting/test_boxplot_method.py | 20 ++++++++++++++------ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py index 7b04b9e1171ecc..d527bc08e2f080 100644 --- a/pandas/plotting/_compat.py +++ b/pandas/plotting/_compat.py @@ -65,3 +65,11 @@ def _mpl_ge_2_0_1(): return matplotlib.__version__ >= LooseVersion('2.0.1') except ImportError: return False + + +def _mpl_ge_2_1_0(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.1') + except ImportError: + return False diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 211d9777e7515d..c4cd562df7eb30 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2000,7 +2000,7 @@ def maybe_color_bp(bp): def plot_group(keys, values, ax): keys = [pprint_thing(x) for x in keys] - values = [remove_na_arraylike(v) for v in values] + values = [np.asarray(remove_na_arraylike(v)) for v in values] bp = ax.boxplot(values, **kwds) if fontsize is not None: ax.tick_params(axis='both', labelsize=fontsize) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 8fe119d28644c9..4b1cb2ccbd3dd3 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -11,7 +11,6 @@ import numpy as np from numpy import random -from numpy.random import randn import pandas.plotting as plotting @@ -35,8 +34,8 @@ def _skip_if_mpl_14_or_dev_boxplot(): class TestDataFramePlots(TestPlotBase): @pytest.mark.slow - def test_boxplot_legacy(self): - df = DataFrame(randn(6, 4), + def test_boxplot_legacy1(self): + df = DataFrame(np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) df['indic'] = ['foo', 'bar'] * 3 @@ -60,6 +59,8 @@ def test_boxplot_legacy(self): with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='indic', notch=1) + @pytest.mark.slow + def test_boxplot_legacy2(self): df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) df['X'] = Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) df['Y'] = Series(['A'] * 10) @@ -103,7 +104,7 @@ def test_boxplot_return_type_legacy(self): # API change in https://github.com/pandas-dev/pandas/pull/7096 import matplotlib as mpl # noqa - df = DataFrame(randn(6, 4), + df = DataFrame(np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) with pytest.raises(ValueError): @@ -176,7 +177,7 @@ def test_fontsize(self): class TestDataFrameGroupByPlots(TestPlotBase): @pytest.mark.slow - def test_boxplot_legacy(self): + def test_boxplot_legacy1(self): grouped = self.hist_df.groupby(by='gender') with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') @@ -184,10 +185,12 @@ def test_boxplot_legacy(self): axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_boxplot_legacy2(self): tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) - grouped = df.groupby(level=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') @@ -197,6 +200,11 @@ def test_boxplot_legacy(self): return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + @pytest.mark.slow + def test_boxplot_legacy3(self): + tuples = lzip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), + index=MultiIndex.from_tuples(tuples)) grouped = df.unstack(level=1).groupby(level=0, axis=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') From d12a7a01888258a4c95fa7003916859f110075f1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 10 Oct 2017 09:17:01 -0400 Subject: [PATCH 51/76] COMPAT: sum/prod on all nan will remain nan regardless of bottleneck install (#17630) xref #15507 closes #9422 --- doc/source/missing_data.rst | 36 +++++ doc/source/whatsnew/v0.21.0.txt | 47 ++++++ pandas/core/generic.py | 2 +- pandas/core/nanops.py | 36 +++-- pandas/tests/frame/test_analytics.py | 73 +++++---- pandas/tests/groupby/test_aggregate.py | 2 +- pandas/tests/series/test_analytics.py | 201 +++++++++---------------- pandas/tests/test_panel.py | 2 +- pandas/tests/test_panel4d.py | 2 +- pandas/tests/test_window.py | 7 +- pandas/util/testing.py | 15 -- 11 files changed, 223 insertions(+), 200 deletions(-) diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 07740d66a21865..c0b3a2e0edb305 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -181,6 +181,42 @@ account for missing data. For example: df.mean(1) df.cumsum() + +.. _missing_data.numeric_sum: + +Sum/Prod of Empties/Nans +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + This behavior is now standard as of v0.21.0; previously sum/prod would give different + results if the ``bottleneck`` package was installed. See the :ref:`here `. + +With ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, the result will be all-``NaN``. + +.. ipython:: python + + s = Series([np.nan]) + + s.sum() + +Summing of an empty ``Series`` + +.. ipython:: python + + pd.Series([]).sum() + +.. warning:: + + These behaviors differ from the default in ``numpy`` where an empty sum returns zero. + + .. ipython:: python + + np.nansum(np.array([np.nan])) + np.nansum(np.array([])) + + + NA values in GroupBy ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2eefc7ec1b6368..1c4af579d16dcb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -12,6 +12,7 @@ Highlights include: - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying categoricals independent of the data, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, see :ref:`here ` Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -412,6 +413,52 @@ Current Behavior s.loc[pd.Index([True, False, True])] +.. _whatsnew_0210.api_breaking.bottleneck: + +Sum/Prod of all-NaN Series/DataFrames is now consistently NaN +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on +whether `bottleneck `__ is installed. (:issue:`9422`, :issue:`15507`). + +With ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, the result will be all-``NaN``. See the :ref:`docs `. + +.. ipython:: python + + s = Series([np.nan]) + +Previously NO ``bottleneck`` + +.. code_block:: ipython + + In [2]: s.sum() + Out[2]: np.nan + +Previously WITH ``bottleneck`` + +.. code_block:: ipython + + In [2]: s.sum() + Out[2]: 0.0 + +New Behavior, without regards to the bottleneck installation. + +.. ipython:: python + + s.sum() + +Note that this also changes the sum of an empty ``Series`` + +Previously regardless of ``bottlenck`` + +.. code_block:: ipython + + In [1]: pd.Series([]).sum() + Out[1]: 0 + +.. ipython:: python + + pd.Series([]).sum() .. _whatsnew_0210.api_breaking.pandas_eval: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c7ae9bbee90130..bc0f10a3f79abb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6990,7 +6990,7 @@ def _doc_parms(cls): ---------- axis : %(axis_descr)s skipna : boolean, default True - Exclude NA/null values. If an entire row/column is NA, the result + Exclude NA/null values. If an entire row/column is NA or empty, the result will be NA level : int or level name, default None If the axis is a MultiIndex (hierarchical), count along a diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 388b2ecdff445d..baeb869239c1e4 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -18,7 +18,7 @@ is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype from pandas.core.config import get_option from pandas.core.common import _values_from_object @@ -89,8 +89,7 @@ def _f(*args, **kwargs): class bottleneck_switch(object): - def __init__(self, zero_value=None, **kwargs): - self.zero_value = zero_value + def __init__(self, **kwargs): self.kwargs = kwargs def __call__(self, alt): @@ -108,18 +107,20 @@ def f(values, axis=None, skipna=True, **kwds): if k not in kwds: kwds[k] = v try: - if self.zero_value is not None and values.size == 0: - if values.ndim == 1: + if values.size == 0: + + # we either return np.nan or pd.NaT + if is_numeric_dtype(values): + values = values.astype('float64') + fill_value = na_value_for_dtype(values.dtype) - # wrap the 0's if needed - if is_timedelta64_dtype(values): - return lib.Timedelta(0) - return 0 + if values.ndim == 1: + return fill_value else: result_shape = (values.shape[:axis] + values.shape[axis + 1:]) - result = np.empty(result_shape) - result.fill(0) + result = np.empty(result_shape, dtype=values.dtype) + result.fill(fill_value) return result if (_USE_BOTTLENECK and skipna and @@ -154,11 +155,16 @@ def _bn_ok_dtype(dt, name): # Bottleneck chokes on datetime64 if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)): + # GH 15507 # bottleneck does not properly upcast during the sum # so can overflow - if name == 'nansum': - if dt.itemsize < 8: - return False + + # GH 9422 + # further we also want to preserve NaN when all elements + # are NaN, unlinke bottleneck/numpy which consider this + # to be 0 + if name in ['nansum', 'nanprod']: + return False return True return False @@ -297,7 +303,7 @@ def nanall(values, axis=None, skipna=True): @disallow('M8') -@bottleneck_switch(zero_value=0) +@bottleneck_switch() def nansum(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, 0) dtype_sum = dtype_max diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index dca905b47000e6..c36b5957a4283d 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -448,7 +448,11 @@ def test_sum(self): has_numeric_only=True, check_dtype=False, check_less_precise=True) - def test_stat_operators_attempt_obj_array(self): + @pytest.mark.parametrize( + "method", ['sum', 'mean', 'prod', 'var', + 'std', 'skew', 'min', 'max']) + def test_stat_operators_attempt_obj_array(self, method): + # GH #676 data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], @@ -458,20 +462,17 @@ def test_stat_operators_attempt_obj_array(self): } df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') - methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max'] - # GH #676 df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) for df in [df1, df2]: - for meth in methods: - assert df.values.dtype == np.object_ - result = getattr(df, meth)(1) - expected = getattr(df.astype('f8'), meth)(1) + assert df.values.dtype == np.object_ + result = getattr(df, method)(1) + expected = getattr(df.astype('f8'), method)(1) - if not tm._incompat_bottleneck_version(meth): - tm.assert_series_equal(result, expected) + if method in ['sum', 'prod']: + tm.assert_series_equal(result, expected) def test_mean(self): self._check_stat_op('mean', np.mean, check_dates=True) @@ -563,15 +564,15 @@ def test_var_std(self): arr = np.repeat(np.random.random((1, 1000)), 1000, 0) result = nanops.nanvar(arr, axis=0) assert not (result < 0).any() - if nanops._USE_BOTTLENECK: - nanops._USE_BOTTLENECK = False + + with pd.option_context('use_bottleneck', False): result = nanops.nanvar(arr, axis=0) assert not (result < 0).any() - nanops._USE_BOTTLENECK = True - def test_numeric_only_flag(self): + @pytest.mark.parametrize( + "meth", ['sem', 'var', 'std']) + def test_numeric_only_flag(self, meth): # GH #9201 - methods = ['sem', 'var', 'std'] df1 = DataFrame(np.random.randn(5, 3), columns=['foo', 'bar', 'baz']) # set one entry to a number in str format df1.loc[0, 'foo'] = '100' @@ -580,20 +581,19 @@ def test_numeric_only_flag(self): # set one entry to a non-number str df2.loc[0, 'foo'] = 'a' - for meth in methods: - result = getattr(df1, meth)(axis=1, numeric_only=True) - expected = getattr(df1[['bar', 'baz']], meth)(axis=1) - tm.assert_series_equal(expected, result) + result = getattr(df1, meth)(axis=1, numeric_only=True) + expected = getattr(df1[['bar', 'baz']], meth)(axis=1) + tm.assert_series_equal(expected, result) - result = getattr(df2, meth)(axis=1, numeric_only=True) - expected = getattr(df2[['bar', 'baz']], meth)(axis=1) - tm.assert_series_equal(expected, result) + result = getattr(df2, meth)(axis=1, numeric_only=True) + expected = getattr(df2[['bar', 'baz']], meth)(axis=1) + tm.assert_series_equal(expected, result) - # df1 has all numbers, df2 has a letter inside - pytest.raises(TypeError, lambda: getattr(df1, meth)( - axis=1, numeric_only=False)) - pytest.raises(TypeError, lambda: getattr(df2, meth)( - axis=1, numeric_only=False)) + # df1 has all numbers, df2 has a letter inside + pytest.raises(TypeError, lambda: getattr(df1, meth)( + axis=1, numeric_only=False)) + pytest.raises(TypeError, lambda: getattr(df2, meth)( + axis=1, numeric_only=False)) def test_mixed_ops(self): # GH 16116 @@ -606,11 +606,9 @@ def test_mixed_ops(self): result = getattr(df, op)() assert len(result) == 2 - if nanops._USE_BOTTLENECK: - nanops._USE_BOTTLENECK = False + with pd.option_context('use_bottleneck', False): result = getattr(df, op)() assert len(result) == 2 - nanops._USE_BOTTLENECK = True def test_cumsum(self): self.tsframe.loc[5:10, 0] = nan @@ -676,11 +674,10 @@ def test_sem(self): arr = np.repeat(np.random.random((1, 1000)), 1000, 0) result = nanops.nansem(arr, axis=0) assert not (result < 0).any() - if nanops._USE_BOTTLENECK: - nanops._USE_BOTTLENECK = False + + with pd.option_context('use_bottleneck', False): result = nanops.nansem(arr, axis=0) assert not (result < 0).any() - nanops._USE_BOTTLENECK = True def test_skew(self): tm._skip_if_no_scipy() @@ -767,7 +764,7 @@ def wrapper(x): tm.assert_series_equal(result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, check_less_precise=check_less_precise) - if not tm._incompat_bottleneck_version(name): + if name in ['sum', 'prod']: exp = frame.apply(skipna_wrapper, axis=1) tm.assert_series_equal(result1, exp, check_dtype=False, check_less_precise=check_less_precise) @@ -799,7 +796,7 @@ def wrapper(x): all_na = self.frame * np.NaN r0 = getattr(all_na, name)(axis=0) r1 = getattr(all_na, name)(axis=1) - if not tm._incompat_bottleneck_version(name): + if name in ['sum', 'prod']: assert np.isnan(r0).all() assert np.isnan(r1).all() @@ -1859,14 +1856,14 @@ def test_dataframe_clip(self): assert (clipped_df.values[ub_mask] == ub).all() assert (clipped_df.values[mask] == df.values[mask]).all() - @pytest.mark.xfail(reason=("clip on mixed integer or floats " - "with integer clippers coerces to float")) def test_clip_mixed_numeric(self): - + # TODO(jreback) + # clip on mixed integer or floats + # with integer clippers coerces to float df = DataFrame({'A': [1, 2, 3], 'B': [1., np.nan, 3.]}) result = df.clip(1, 2) - expected = DataFrame({'A': [1, 2, 2], + expected = DataFrame({'A': [1, 2, 2.], 'B': [1., np.nan, 2.]}) tm.assert_frame_equal(result, expected, check_like=True) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index efc833575843c1..913d3bcc098692 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -562,7 +562,7 @@ def _testit(name): exp.name = 'C' result = op(grouped)['C'] - if not tm._incompat_bottleneck_version(name): + if name in ['sum', 'prod']: assert_series_equal(result, exp) _testit('count') diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 6495d748e38233..8cc40bb5146c57 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -15,110 +15,103 @@ from pandas.core.index import MultiIndex from pandas.core.indexes.datetimes import Timestamp from pandas.core.indexes.timedeltas import Timedelta -import pandas.core.config as cf - import pandas.core.nanops as nanops -from pandas.compat import lrange, range, is_platform_windows +from pandas.compat import lrange, range from pandas import compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, assert_index_equal) import pandas.util.testing as tm - from .common import TestData -skip_if_bottleneck_on_windows = (is_platform_windows() and - nanops._USE_BOTTLENECK) +class TestSeriesAnalytics(TestData): + @pytest.mark.parametrize("use_bottleneck", [True, False]) + @pytest.mark.parametrize("method", ["sum", "prod"]) + def test_empty(self, method, use_bottleneck): -class TestSeriesAnalytics(TestData): + with pd.option_context("use_bottleneck", use_bottleneck): + # GH 9422 + # treat all missing as NaN + s = Series([]) + result = getattr(s, method)() + assert isna(result) - def test_sum_zero(self): - arr = np.array([]) - assert nanops.nansum(arr) == 0 + result = getattr(s, method)(skipna=True) + assert isna(result) - arr = np.empty((10, 0)) - assert (nanops.nansum(arr, axis=1) == 0).all() + s = Series([np.nan]) + result = getattr(s, method)() + assert isna(result) - # GH #844 - s = Series([], index=[]) - assert s.sum() == 0 + result = getattr(s, method)(skipna=True) + assert isna(result) - df = DataFrame(np.empty((10, 0))) - assert (df.sum(1) == 0).all() + s = Series([np.nan, 1]) + result = getattr(s, method)() + assert result == 1.0 + + s = Series([np.nan, 1]) + result = getattr(s, method)(skipna=True) + assert result == 1.0 + + # GH #844 (changed in 9422) + df = DataFrame(np.empty((10, 0))) + assert (df.sum(1).isnull()).all() + + @pytest.mark.parametrize( + "method", ['sum', 'mean', 'median', 'std', 'var']) + def test_ops_consistency_on_empty(self, method): + + # GH 7869 + # consistency on empty + + # float + result = getattr(Series(dtype=float), method)() + assert isna(result) + + # timedelta64[ns] + result = getattr(Series(dtype='m8[ns]'), method)() + assert result is pd.NaT def test_nansum_buglet(self): s = Series([1.0, np.nan], index=[0, 1]) result = np.nansum(s) assert_almost_equal(result, 1) - def test_overflow(self): - # GH 6915 - # overflowing on the smaller int dtypes - for dtype in ['int32', 'int64']: - v = np.arange(5000000, dtype=dtype) - s = Series(v) - - # no bottleneck - result = s.sum(skipna=False) - assert int(result) == v.sum(dtype='int64') - result = s.min(skipna=False) - assert int(result) == 0 - result = s.max(skipna=False) - assert int(result) == v[-1] - - for dtype in ['float32', 'float64']: - v = np.arange(5000000, dtype=dtype) - s = Series(v) - - # no bottleneck - result = s.sum(skipna=False) - assert result == v.sum(dtype=dtype) - result = s.min(skipna=False) - assert np.allclose(float(result), 0.0) - result = s.max(skipna=False) - assert np.allclose(float(result), v[-1]) - - @pytest.mark.xfail( - skip_if_bottleneck_on_windows, - reason="buggy bottleneck with sum overflow on windows") - def test_overflow_with_bottleneck(self): - # GH 6915 - # overflowing on the smaller int dtypes - for dtype in ['int32', 'int64']: - v = np.arange(5000000, dtype=dtype) - s = Series(v) - - # use bottleneck if available - result = s.sum() - assert int(result) == v.sum(dtype='int64') - result = s.min() - assert int(result) == 0 - result = s.max() - assert int(result) == v[-1] - - for dtype in ['float32', 'float64']: - v = np.arange(5000000, dtype=dtype) - s = Series(v) - - # use bottleneck if available - result = s.sum() - assert result == v.sum(dtype=dtype) - result = s.min() - assert np.allclose(float(result), 0.0) - result = s.max() - assert np.allclose(float(result), v[-1]) - - @pytest.mark.xfail( - skip_if_bottleneck_on_windows, - reason="buggy bottleneck with sum overflow on windows") + @pytest.mark.parametrize("use_bottleneck", [True, False]) + def test_sum_overflow(self, use_bottleneck): + + with pd.option_context('use_bottleneck', use_bottleneck): + # GH 6915 + # overflowing on the smaller int dtypes + for dtype in ['int32', 'int64']: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert int(result) == v.sum(dtype='int64') + result = s.min(skipna=False) + assert int(result) == 0 + result = s.max(skipna=False) + assert int(result) == v[-1] + + for dtype in ['float32', 'float64']: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert result == v.sum(dtype=dtype) + result = s.min(skipna=False) + assert np.allclose(float(result), 0.0) + result = s.max(skipna=False) + assert np.allclose(float(result), v[-1]) + def test_sum(self): self._check_stat_op('sum', np.sum, check_allna=True) def test_sum_inf(self): - import pandas.core.nanops as nanops - s = Series(np.random.randn(10)) s2 = s.copy() @@ -130,7 +123,7 @@ def test_sum_inf(self): arr = np.random.randn(100, 100).astype('f4') arr[:, 2] = np.inf - with cf.option_context("mode.use_inf_as_na", True): + with pd.option_context("mode.use_inf_as_na", True): assert_almost_equal(s.sum(), s2.sum()) res = nanops.nansum(arr, axis=1) @@ -510,9 +503,8 @@ def test_npdiff(self): def _check_stat_op(self, name, alternate, check_objects=False, check_allna=False): - import pandas.core.nanops as nanops - def testit(): + with pd.option_context('use_bottleneck', False): f = getattr(Series, name) # add some NaNs @@ -535,15 +527,7 @@ def testit(): allna = self.series * nan if check_allna: - # xref 9422 - # bottleneck >= 1.0 give 0.0 for an allna Series sum - try: - assert nanops._USE_BOTTLENECK - import bottleneck as bn # noqa - assert bn.__version__ >= LooseVersion('1.0') - assert f(allna) == 0.0 - except: - assert np.isnan(f(allna)) + assert np.isnan(f(allna)) # dtype=object with None, it works! s = Series([1, 2, 3, None, 5]) @@ -574,16 +558,6 @@ def testit(): tm.assert_raises_regex(NotImplementedError, name, f, self.series, numeric_only=True) - testit() - - try: - import bottleneck as bn # noqa - nanops._USE_BOTTLENECK = False - testit() - nanops._USE_BOTTLENECK = True - except ImportError: - pass - def _check_accum_op(self, name, check_dtype=True): func = getattr(np, name) tm.assert_numpy_array_equal(func(self.ts).values, @@ -733,31 +707,6 @@ def test_modulo(self): expected = Series([nan, 0.0]) assert_series_equal(result, expected) - def test_ops_consistency_on_empty(self): - - # GH 7869 - # consistency on empty - - # float - result = Series(dtype=float).sum() - assert result == 0 - - result = Series(dtype=float).mean() - assert isna(result) - - result = Series(dtype=float).median() - assert isna(result) - - # timedelta64[ns] - result = Series(dtype='m8[ns]').sum() - assert result == Timedelta(0) - - result = Series(dtype='m8[ns]').mean() - assert result is pd.NaT - - result = Series(dtype='m8[ns]').median() - assert result is pd.NaT - def test_corr(self): tm._skip_if_no_scipy() diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index c8e056f156218f..2769ec0d2dbed3 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -172,7 +172,7 @@ def wrapper(x): for i in range(obj.ndim): result = f(axis=i) - if not tm._incompat_bottleneck_version(name): + if name in ['sum', 'prod']: assert_frame_equal(result, obj.apply(skipna_wrapper, axis=i)) pytest.raises(Exception, f, axis=obj.ndim) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 863671feb4ed89..49859fd27d7bc2 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -138,7 +138,7 @@ def wrapper(x): with catch_warnings(record=True): for i in range(obj.ndim): result = f(axis=i) - if not tm._incompat_bottleneck_version(name): + if name in ['sum', 'prod']: expected = obj.apply(skipna_wrapper, axis=i) tm.assert_panel_equal(result, expected) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 0fe51121abef6c..432350b4849d80 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -2355,7 +2355,8 @@ def test_expanding_consistency(self, min_periods): expanding_apply_f_result = x.expanding( min_periods=min_periods).apply(func=f) - if not tm._incompat_bottleneck_version(name): + # GH 9422 + if name in ['sum', 'prod']: assert_equal(expanding_f_result, expanding_apply_f_result) @@ -2453,7 +2454,9 @@ def test_rolling_consistency(self, window, min_periods, center): rolling_apply_f_result = x.rolling( window=window, min_periods=min_periods, center=center).apply(func=f) - if not tm._incompat_bottleneck_version(name): + + # GH 9422 + if name in ['sum', 'prod']: assert_equal(rolling_f_result, rolling_apply_f_result) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 202c9473eea12e..3c23462e10d355 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -401,21 +401,6 @@ def _skip_if_no_localpath(): pytest.skip("py.path not installed") -def _incompat_bottleneck_version(method): - """ skip if we have bottleneck installed - and its >= 1.0 - as we don't match the nansum/nanprod behavior for all-nan - ops, see GH9422 - """ - if method not in ['sum', 'prod']: - return False - try: - import bottleneck as bn - return bn.__version__ >= LooseVersion('1.0') - except ImportError: - return False - - def skip_if_no_ne(engine='numexpr'): from pandas.core.computation.expressions import ( _USE_NUMEXPR, From 727ea20bdb72ff4a5b8380d7f613d4aff058e28a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 10 Oct 2017 10:17:56 -0500 Subject: [PATCH 52/76] API: Added axis argument to rename, reindex (#17800) * API: Added axis argument to rename xref: https://github.com/pandas-dev/pandas/issues/12392 * API: Accept 'axis' keyword argument for reindex --- doc/source/basics.rst | 24 +++- doc/source/whatsnew/v0.21.0.txt | 34 +++++ pandas/core/frame.py | 136 +++++++++++++++++- pandas/core/generic.py | 78 +++++++++- pandas/core/panel.py | 3 +- pandas/core/series.py | 63 +++++++- pandas/core/sparse/series.py | 3 +- pandas/tests/frame/test_alter_axes.py | 108 ++++++++++++++ .../tests/frame/test_axis_select_reindex.py | 98 ++++++++++++- 9 files changed, 529 insertions(+), 18 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 0990d2bd15ee6f..be9d1a5d83b85a 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1217,6 +1217,15 @@ following can be done: This means that the reindexed Series's index is the same Python object as the DataFrame's index. +.. versionadded:: 0.21.0 + +:meth:`DataFrame.reindex` also supports an "axis-style" calling convention, +where you specify a single ``labels`` argument and the ``axis`` it applies to. + +.. ipython:: python + + df.reindex(['c', 'f', 'b'], axis='index') + df.reindex(['three', 'two', 'one'], axis='columns') .. seealso:: @@ -1413,12 +1422,23 @@ Series can also be used: .. ipython:: python - df.rename(columns={'one' : 'foo', 'two' : 'bar'}, - index={'a' : 'apple', 'b' : 'banana', 'd' : 'durian'}) + df.rename(columns={'one': 'foo', 'two': 'bar'}, + index={'a': 'apple', 'b': 'banana', 'd': 'durian'}) If the mapping doesn't include a column/index label, it isn't renamed. Also extra labels in the mapping don't throw an error. +.. versionadded:: 0.21.0 + +:meth:`DataFrame.rename` also supports an "axis-style" calling convention, where +you specify a single ``mapper`` and the ``axis`` to apply that mapping to. + +.. ipython:: python + + df.rename({'one': 'foo', 'two': 'bar'}, axis='columns'}) + df.rename({'a': 'apple', 'b': 'banana', 'd': 'durian'}, axis='columns'}) + + The :meth:`~DataFrame.rename` method also provides an ``inplace`` named parameter that is by default ``False`` and copies the underlying data. Pass ``inplace=True`` to rename the data in place. diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1c4af579d16dcb..f04410ef635318 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -111,6 +111,40 @@ For example: # the following is now equivalent df.drop(columns=['B', 'C']) +.. _whatsnew_0210.enhancements.rename_reindex_axis: + +``rename``, ``reindex`` now also accept axis keyword +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`DataFrame.rename` and :meth:`DataFrame.reindex` methods have gained +the ``axis`` keyword to specify the axis to target with the operation +(:issue:`12392`). + +Here's ``rename``: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df.rename(str.lower, axis='columns') + df.rename(id, axis='index') + +And ``reindex``: + +.. ipython:: python + + df.reindex(['A', 'B', 'C'], axis='columns') + df.reindex([0, 1, 3], axis='index') + +The "index, columns" style continues to work as before. + +.. ipython:: python + + df.rename(index=id, columns=str.lower) + df.reindex(index=[0, 1, 3], columns=['A', 'B', 'C']) + +We *highly* encourage using named arguments to avoid confusion when using either +style. + .. _whatsnew_0210.enhancements.categorical_dtype: ``CategoricalDtype`` for specifying categoricals diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c536cc9f2b82c5..94ff70f287fbe7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -65,6 +65,7 @@ _values_from_object, _maybe_box_datetimelike, _dict_compat, + _all_not_none, standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -111,7 +112,13 @@ optional_by=""" by : str or list of str Name or list of names which refer to the axis items.""", - versionadded_to_excel='') + versionadded_to_excel='', + optional_labels="""labels : array-like, optional + New labels / index to conform the axis specified by 'axis' to.""", + optional_axis="""axis : int or str, optional + Axis to target. Can be either the axis name ('index', 'columns') + or number (0, 1).""", +) _numeric_only_doc = """numeric_only : boolean, default None Include only float, int, boolean data. If None, will attempt to use @@ -2776,6 +2783,47 @@ def reindexer(value): return np.atleast_2d(np.asarray(value)) + def _validate_axis_style_args(self, arg, arg_name, index, columns, + axis, method_name): + if axis is not None: + # Using "axis" style, along with a positional arg + # Both index and columns should be None then + axis = self._get_axis_name(axis) + if index is not None or columns is not None: + msg = ( + "Can't specify both 'axis' and 'index' or 'columns'. " + "Specify either\n" + "\t.{method_name}.rename({arg_name}, axis=axis), or\n" + "\t.{method_name}.rename(index=index, columns=columns)" + ).format(arg_name=arg_name, method_name=method_name) + raise TypeError(msg) + if axis == 'index': + index = arg + elif axis == 'columns': + columns = arg + + elif _all_not_none(arg, index, columns): + msg = ( + "Cannot specify all of '{arg_name}', 'index', and 'columns'. " + "Specify either {arg_name} and 'axis', or 'index' and " + "'columns'." + ).format(arg_name=arg_name) + raise TypeError(msg) + + elif _all_not_none(arg, index): + # This is the "ambiguous" case, so emit a warning + msg = ( + "Interpreting call to '.{method_name}(a, b)' as " + "'.{method_name}(index=a, columns=b)'. " + "Use keyword arguments to remove any ambiguity." + ).format(method_name=method_name) + warnings.warn(msg, stacklevel=3) + index, columns = arg, index + elif index is None: + # This is for the default axis, like reindex([0, 1]) + index = arg + return index, columns + @property def _series(self): result = {} @@ -2902,7 +2950,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, broadcast_axis=broadcast_axis) @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) - def reindex(self, index=None, columns=None, **kwargs): + def reindex(self, labels=None, index=None, columns=None, axis=None, + **kwargs): + index, columns = self._validate_axis_style_args(labels, 'labels', + index, columns, + axis, 'reindex') return super(DataFrame, self).reindex(index=index, columns=columns, **kwargs) @@ -2914,8 +2966,84 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, method=method, level=level, copy=copy, limit=limit, fill_value=fill_value) - @Appender(_shared_docs['rename'] % _shared_doc_kwargs) - def rename(self, index=None, columns=None, **kwargs): + def rename(self, mapper=None, index=None, columns=None, axis=None, + **kwargs): + """Alter axes labels. + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + mapper, index, columns : dict-like or function, optional + dict-like or functions transformations to apply to + that axis' values. Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` and + ``columns``. + axis : int or str, optional + Axis to target with ``mapper``. Can be either the axis name + ('index', 'columns') or number (0, 1). The default is 'index'. + copy : boolean, default True + Also copy underlying data + inplace : boolean, default False + Whether to return a new %(klass)s. If True then value of copy is + ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. + + Returns + ------- + renamed : DataFrame + + See Also + -------- + pandas.DataFrame.rename_axis + + Examples + -------- + + ``DataFrame.rename`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...) + * ``(mapper, axis={'index', 'columns'}, ...) + + We *highly* recommend using keyword arguments to clarify your + intent. + + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename(index=str, columns={"A": "a", "B": "c"}) + a c + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename(index=str, columns={"A": "a", "C": "c"}) + a B + 0 1 4 + 1 2 5 + 2 3 6 + + Using axis-style parameters + + >>> df.rename(str.lower, axis='columns') + a b + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename({1: 2, 2: 4}, axis='index') + A B + 0 1 4 + 2 2 5 + 4 3 6 + """ + index, columns = self._validate_axis_style_args(mapper, 'mapper', + index, columns, + axis, 'rename') return super(DataFrame, self).rename(index=index, columns=columns, **kwargs) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bc0f10a3f79abb..9d9d8334fcaf40 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -742,11 +742,13 @@ def swaplevel(self, i=-2, j=-1, axis=0): Parameters ---------- + %(optional_mapper)s %(axes)s : scalar, list-like, dict-like or function, optional Scalar or list-like will alter the ``Series.name`` attribute, and raise on DataFrame or Panel. dict-like or functions are transformations to apply to that axis' values + %(optional_axis)s copy : boolean, default True Also copy underlying data inplace : boolean, default False @@ -766,6 +768,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): Examples -------- + >>> s = pd.Series([1, 2, 3]) >>> s 0 1 @@ -787,27 +790,58 @@ def swaplevel(self, i=-2, j=-1, axis=0): 3 2 5 3 dtype: int64 + + Since ``DataFrame`` doesn't have a ``.name`` attribute, + only mapping-type arguments are allowed. + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) >>> df.rename(2) Traceback (most recent call last): ... TypeError: 'int' object is not callable + + ``DataFrame.rename`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...) + * ``(mapper, axis={'index', 'columns'}, ...) + + We *highly* recommend using keyword arguments to clarify your + intent. + >>> df.rename(index=str, columns={"A": "a", "B": "c"}) a c 0 1 4 1 2 5 2 3 6 + >>> df.rename(index=str, columns={"A": "a", "C": "c"}) a B 0 1 4 1 2 5 2 3 6 + + Using axis-style parameters + + >>> df.rename(str.lower, axis='columns') + a b + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename({1: 2, 2: 4}, axis='index') + A B + 0 1 4 + 2 2 5 + 4 3 6 + + See the :ref:`user guide ` for more. """ @Appender(_shared_docs['rename'] % dict(axes='axes keywords for this' - ' object', klass='NDFrame')) + ' object', klass='NDFrame', + optional_mapper='', + optional_axis='')) def rename(self, *args, **kwargs): - axes, kwargs = self._construct_axes_from_arguments(args, kwargs) copy = kwargs.pop('copy', True) inplace = kwargs.pop('inplace', False) @@ -886,6 +920,7 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): Examples -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) >>> df.rename_axis("foo") # scalar, alters df.index.name A B @@ -2746,10 +2781,11 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Parameters ---------- - %(axes)s : array-like, optional (can be specified in order, or as - keywords) + %(optional_labels)s + %(axes)s : array-like, optional (should be specified using keywords) New labels / index to conform to. Preferably an Index object to avoid duplicating data + %(optional_axis)s method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional method to use for filling holes in reindexed DataFrame. Please note: this is only applicable to DataFrames/Series with a @@ -2781,6 +2817,14 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Examples -------- + ``DataFrame.reindex`` supports two calling conventions + + * ``(index=index_labels, columns=column_labels, ...) + * ``(labels, axis={'index', 'columns'}, ...) + + We *highly* recommend using keyword arguments to clarify your + intent. + Create a dataframe with some fictional data. >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] @@ -2831,6 +2875,26 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, IE10 404 0.08 Chrome 200 0.02 + We can also reindex the columns. + + >>> df.reindex(columns=['http_status', 'user_agent']) + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + Or we can use "axis-style" keyword arguments + + >>> df.reindex(['http_status', 'user_agent'], axis="columns") + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + To further illustrate the filling functionality in ``reindex``, we will create a dataframe with a monotonically increasing index (for example, a sequence @@ -2893,6 +2957,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, desired indexes. If you do want to fill in the ``NaN`` values present in the original dataframe, use the ``fillna()`` method. + See the :ref:`user guide ` for more. + Returns ------- reindexed : %(klass)s @@ -2901,7 +2967,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, # TODO: Decide if we care about having different examples for different # kinds - @Appender(_shared_docs['reindex'] % dict(axes="axes", klass="NDFrame")) + @Appender(_shared_docs['reindex'] % dict(axes="axes", klass="NDFrame", + optional_labels="", + optional_axis="")) def reindex(self, *args, **kwargs): # construct the args diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 14fba9560cae25..b2f50eaf733d87 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -39,7 +39,8 @@ _shared_doc_kwargs = dict( axes='items, major_axis, minor_axis', klass="Panel", - axes_single_arg="{0, 1, 2, 'items', 'major_axis', 'minor_axis'}") + axes_single_arg="{0, 1, 2, 'items', 'major_axis', 'minor_axis'}", + optional_mapper='', optional_axis='', optional_labels='') _shared_doc_kwargs['args_transpose'] = ("three positional arguments: each one" "of\n%s" % _shared_doc_kwargs['axes_single_arg']) diff --git a/pandas/core/series.py b/pandas/core/series.py index be4066f0c39b93..93afdc5151b35e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -85,7 +85,7 @@ inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series', - optional_by='', + optional_by='', optional_mapper='', optional_labels='', optional_axis='', versionadded_to_excel='\n .. versionadded:: 0.20.0\n') @@ -2525,8 +2525,67 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, limit=limit, fill_axis=fill_axis, broadcast_axis=broadcast_axis) - @Appender(generic._shared_docs['rename'] % _shared_doc_kwargs) def rename(self, index=None, **kwargs): + """Alter Series index labels or name + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + Alternatively, change ``Series.name`` with a scalar value. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + index : scalar, hashable sequence, dict-like or function, optional + dict-like or functions are transformations to apply to + the index. + Scalar or hashable sequence-like will alter the ``Series.name`` + attribute. + copy : boolean, default True + Also copy underlying data + inplace : boolean, default False + Whether to return a new %(klass)s. If True then value of copy is + ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. + + Returns + ------- + renamed : Series (new object) + + See Also + -------- + pandas.Series.rename_axis + + Examples + -------- + + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.rename("my_name") # scalar, changes Series.name + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: int64 + >>> s.rename(lambda x: x ** 2) # function, changes labels + 0 1 + 1 2 + 4 3 + dtype: int64 + >>> s.rename({1: 3, 2: 5}) # mapping, changes labels + 0 1 + 3 2 + 5 3 + dtype: int64 + + """ kwargs['inplace'] = validate_bool_kwarg(kwargs.get('inplace', False), 'inplace') diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 3255bd6bd17e8c..5c76cca08f6094 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -35,7 +35,8 @@ _shared_doc_kwargs = dict(axes='index', klass='SparseSeries', - axes_single_arg="{0, 'index'}") + axes_single_arg="{0, 'index'}", + optional_labels='', optional_axis='') # ----------------------------------------------------------------------------- # Wrapper function for Series arithmetic methods diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 27906838abb2de..feb32324ff1b10 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -837,6 +837,106 @@ def test_rename_objects(self): assert 'FOO' in renamed assert 'foo' not in renamed + def test_rename_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y']) + expected = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + + result = df.rename(str.lower, axis=1) + assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis='columns') + assert_frame_equal(result, expected) + + result = df.rename({"A": 'a', 'B': 'b'}, axis=1) + assert_frame_equal(result, expected) + + result = df.rename({"A": 'a', 'B': 'b'}, axis='columns') + assert_frame_equal(result, expected) + + # Index + expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + result = df.rename(str.lower, axis=0) + assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis='index') + assert_frame_equal(result, expected) + + result = df.rename({'X': 'x', 'Y': 'y'}, axis=0) + assert_frame_equal(result, expected) + + result = df.rename({'X': 'x', 'Y': 'y'}, axis='index') + assert_frame_equal(result, expected) + + def test_rename_mapper_multi(self): + df = pd.DataFrame({"A": ['a', 'b'], "B": ['c', 'd'], + 'C': [1, 2]}).set_index(["A", "B"]) + result = df.rename(str.upper) + expected = df.rename(index=str.upper) + assert_frame_equal(result, expected) + + def test_rename_positional_named(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + result = df.rename(str.lower, columns=str.upper) + expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + assert_frame_equal(result, expected) + + def test_rename_axis_style_raises(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) + + # Named target and axis + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis=1) + + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis='columns') + + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis='columns') + + with tm.assert_raises_regex(TypeError, None): + df.rename(columns=str.lower, axis='columns') + + with tm.assert_raises_regex(TypeError, None): + df.rename(index=str.lower, axis=0) + + # Multiple targets and axis + with tm.assert_raises_regex(TypeError, None): + df.rename(str.lower, str.lower, axis='columns') + + # Too many targets + with tm.assert_raises_regex(TypeError, None): + df.rename(str.lower, str.lower, str.lower) + + def test_reindex_api_equivalence(self): + # equivalence of the labels/axis and index/columns API's + df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=['a', 'b', 'c'], + columns=['d', 'e', 'f']) + + res1 = df.reindex(['b', 'a']) + res2 = df.reindex(index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a']) + res4 = df.reindex(labels=['b', 'a'], axis=0) + res5 = df.reindex(['b', 'a'], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=['e', 'd']) + res2 = df.reindex(['e', 'd'], axis=1) + res3 = df.reindex(labels=['e', 'd'], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(index=['b', 'a'], columns=['e', 'd']) + res2 = df.reindex(columns=['e', 'd'], index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'], + axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + def test_assign_columns(self): self.frame['hi'] = 'there' @@ -860,6 +960,14 @@ def test_set_index_preserve_categorical_dtype(self): result = result.reindex(columns=df.columns) tm.assert_frame_equal(result, df) + def test_ambiguous_warns(self): + df = pd.DataFrame({"A": [1, 2]}) + with tm.assert_produces_warning(UserWarning): + df.rename(id, id) + + with tm.assert_produces_warning(UserWarning): + df.rename({0: 10}, {"A": "B"}) + class TestIntervalIndex(object): diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index f9a4275d14f55d..38ed8ee20bc501 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -447,6 +447,98 @@ def test_reindex_dups(self): # reindex fails pytest.raises(ValueError, df.reindex, index=list(range(len(df)))) + def test_reindex_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = pd.DataFrame({"A": [1, 2, np.nan], "B": [4, 5, np.nan]}, + index=[0, 1, 3]) + result = df.reindex([0, 1, 3]) + assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis=0) + assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis='index') + assert_frame_equal(result, expected) + + def test_reindex_positional_warns(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = pd.DataFrame({"A": [1., 2], 'B': [4., 5], + "C": [np.nan, np.nan]}) + with tm.assert_produces_warning(UserWarning): + result = df.reindex([0, 1], ['A', 'B', 'C']) + + assert_frame_equal(result, expected) + + def test_reindex_axis_style_raises(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], 'B': [4, 5, 6]}) + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex([0, 1], ['A'], axis=1) + + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex([0, 1], ['A'], axis='index') + + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex(index=[0, 1], axis='index') + + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex(index=[0, 1], axis='columns') + + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex(columns=[0, 1], axis='columns') + + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex(index=[0, 1], columns=[0, 1], axis='columns') + + with tm.assert_raises_regex(TypeError, 'Cannot specify all'): + df.reindex([0, 1], [0], ['A']) + + # Mixing styles + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex(index=[0, 1], axis='index') + + with tm.assert_raises_regex(TypeError, 'reindex'): + df.reindex(index=[0, 1], axis='columns') + + def test_reindex_single_named_indexer(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) + result = df.reindex([0, 1], columns=['A']) + expected = pd.DataFrame({"A": [1, 2]}) + assert_frame_equal(result, expected) + + def test_reindex_api_equivalence(self): + # https://github.com/pandas-dev/pandas/issues/12392 + # equivalence of the labels/axis and index/columns API's + df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=['a', 'b', 'c'], + columns=['d', 'e', 'f']) + + res1 = df.reindex(['b', 'a']) + res2 = df.reindex(index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a']) + res4 = df.reindex(labels=['b', 'a'], axis=0) + res5 = df.reindex(['b', 'a'], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=['e', 'd']) + res2 = df.reindex(['e', 'd'], axis=1) + res3 = df.reindex(labels=['e', 'd'], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + with tm.assert_produces_warning(UserWarning) as m: + res1 = df.reindex(['b', 'a'], ['e', 'd']) + assert 'reindex' in str(m[0].message) + res2 = df.reindex(columns=['e', 'd'], index=['b', 'a']) + res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'], + axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + def test_align(self): af, bf = self.frame.align(self.frame) assert af._data is not self.frame._data @@ -974,21 +1066,21 @@ def test_reindex_with_nans(self): def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) - result = df.reindex(lrange(4), lrange(4)) + result = df.reindex(index=lrange(4), columns=lrange(4)) expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(lrange(4), lrange(4)) + result = df.reindex(index=lrange(4), columns=lrange(4)) expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(lrange(2), lrange(2)) + result = df.reindex(index=lrange(2), columns=lrange(2)) expected = df.reindex(lrange(2)).reindex(columns=lrange(2)) assert_frame_equal(result, expected) From 3544394e841efac6de7e19cd0484e71a93485804 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 11 Oct 2017 10:18:05 -0500 Subject: [PATCH 53/76] API: Deprecate renamae_axis and reindex_axis (#17842) * API: Deprecate renamae_axis and reindex_axis Closes https://github.com/pandas-dev/pandas/issues/17833 * REF: Refactor axis style validator to generic This sets us up to re-use it for Panel reindex * fixup! API: Deprecate renamae_axis and reindex_axis * fixup! API: Deprecate renamae_axis and reindex_axis * fixup! API: Deprecate renamae_axis and reindex_axis * fixup! API: Deprecate renamae_axis and reindex_axis * Ugh * fixup! API: Deprecate renamae_axis and reindex_axis * Fixup --- doc/source/basics.rst | 7 +- doc/source/whatsnew/v0.21.0.txt | 3 + pandas/core/computation/align.py | 8 +- pandas/core/frame.py | 64 +++---------- pandas/core/generic.py | 92 +++++++++++++++---- pandas/core/groupby.py | 2 +- pandas/core/indexing.py | 4 +- pandas/core/internals.py | 4 +- pandas/core/panel.py | 14 ++- pandas/core/panel4d.py | 14 +++ pandas/core/reshape/pivot.py | 4 +- pandas/core/series.py | 4 + pandas/core/sparse/scipy_sparse.py | 2 +- pandas/io/pytables.py | 6 +- pandas/plotting/_core.py | 2 +- pandas/tests/frame/test_alter_axes.py | 19 ++++ .../tests/frame/test_axis_select_reindex.py | 18 +++- pandas/tests/reshape/test_pivot.py | 4 +- pandas/tests/reshape/test_reshape.py | 6 +- pandas/tests/sparse/test_series.py | 6 ++ pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_panel.py | 16 ++++ pandas/tests/test_resample.py | 4 +- 23 files changed, 199 insertions(+), 106 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index be9d1a5d83b85a..3044a8886b9ae8 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1201,8 +1201,11 @@ With a DataFrame, you can simultaneously reindex the index and columns: df df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one']) -For convenience, you may utilize the :meth:`~Series.reindex_axis` method, which -takes the labels and a keyword ``axis`` parameter. +You may also use ``reindex`` with an ``axis`` keyword: + +.. ipython:: python + + df.reindex(index=['c', 'f', 'b'], axis='index') Note that the ``Index`` objects containing the actual axis labels can be **shared** between objects. So if we have a Series and a DataFrame, the diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f04410ef635318..2bee7cf5cdddce 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -810,6 +810,8 @@ Deprecations - ``.get_value`` and ``.set_value`` on ``Series``, ``DataFrame``, ``Panel``, ``SparseSeries``, and ``SparseDataFrame`` are deprecated in favor of using ``.iat[]`` or ``.at[]`` accessors (:issue:`15269`) - Passing a non-existent column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) - ``raise_on_error`` parameter to :func:`Series.where`, :func:`Series.mask`, :func:`DataFrame.where`, :func:`DataFrame.mask` is deprecated, in favor of ``errors=`` (:issue:`14968`) +- Using :meth:`DataFrame.rename_axis` and :meth:`Series.rename_axis` to alter index or column *labels* is now deprecated in favor of using ``.rename``. ``rename_axis`` may still be used to alter the name of the index or columns (:issue:`17833`). +- :meth:`~DataFrame.reindex_axis` has been deprecated in favor of :meth:`~DataFrame.reindex`. See :ref`here` for more (:issue:`17833`). .. _whatsnew_0210.deprecations.select: @@ -998,6 +1000,7 @@ Reshaping - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) - Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) - Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) +- Bug in :fun:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) Numeric ^^^^^^^ diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 691eaebfd5fc1e..0e7ae0cbe7c87a 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -89,7 +89,7 @@ def _align_core(terms): for axis, items in zip(range(ndim), axes): ti = terms[i].value - if hasattr(ti, 'reindex_axis'): + if hasattr(ti, 'reindex'): transpose = isinstance(ti, pd.Series) and naxes > 1 reindexer = axes[naxes - 1] if transpose else items @@ -104,11 +104,7 @@ def _align_core(terms): ).format(axis=axis, term=terms[i].name, ordm=ordm) warnings.warn(w, category=PerformanceWarning, stacklevel=6) - if transpose: - f = partial(ti.reindex, index=reindexer, copy=False) - else: - f = partial(ti.reindex_axis, reindexer, axis=axis, - copy=False) + f = partial(ti.reindex, reindexer, axis=axis, copy=False) terms[i].update(f()) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 94ff70f287fbe7..c7e8c0da75e2c9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -65,7 +65,6 @@ _values_from_object, _maybe_box_datetimelike, _dict_compat, - _all_not_none, standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -2736,7 +2735,7 @@ def reindexer(value): if isinstance(loc, (slice, Series, np.ndarray, Index)): cols = maybe_droplevels(self.columns[loc], key) if len(cols) and not cols.equals(value.columns): - value = value.reindex_axis(cols, axis=1) + value = value.reindex(cols, axis=1) # now align rows value = reindexer(value).T @@ -2783,47 +2782,6 @@ def reindexer(value): return np.atleast_2d(np.asarray(value)) - def _validate_axis_style_args(self, arg, arg_name, index, columns, - axis, method_name): - if axis is not None: - # Using "axis" style, along with a positional arg - # Both index and columns should be None then - axis = self._get_axis_name(axis) - if index is not None or columns is not None: - msg = ( - "Can't specify both 'axis' and 'index' or 'columns'. " - "Specify either\n" - "\t.{method_name}.rename({arg_name}, axis=axis), or\n" - "\t.{method_name}.rename(index=index, columns=columns)" - ).format(arg_name=arg_name, method_name=method_name) - raise TypeError(msg) - if axis == 'index': - index = arg - elif axis == 'columns': - columns = arg - - elif _all_not_none(arg, index, columns): - msg = ( - "Cannot specify all of '{arg_name}', 'index', and 'columns'. " - "Specify either {arg_name} and 'axis', or 'index' and " - "'columns'." - ).format(arg_name=arg_name) - raise TypeError(msg) - - elif _all_not_none(arg, index): - # This is the "ambiguous" case, so emit a warning - msg = ( - "Interpreting call to '.{method_name}(a, b)' as " - "'.{method_name}(index=a, columns=b)'. " - "Use keyword arguments to remove any ambiguity." - ).format(method_name=method_name) - warnings.warn(msg, stacklevel=3) - index, columns = arg, index - elif index is None: - # This is for the default axis, like reindex([0, 1]) - index = arg - return index, columns - @property def _series(self): result = {} @@ -2952,11 +2910,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) def reindex(self, labels=None, index=None, columns=None, axis=None, **kwargs): - index, columns = self._validate_axis_style_args(labels, 'labels', - index, columns, - axis, 'reindex') - return super(DataFrame, self).reindex(index=index, columns=columns, - **kwargs) + axes = self._validate_axis_style_args(labels, 'labels', + axes=[index, columns], + axis=axis, method_name='reindex') + kwargs.update(axes) + return super(DataFrame, self).reindex(**kwargs) @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, @@ -3041,11 +2999,11 @@ def rename(self, mapper=None, index=None, columns=None, axis=None, 2 2 5 4 3 6 """ - index, columns = self._validate_axis_style_args(mapper, 'mapper', - index, columns, - axis, 'rename') - return super(DataFrame, self).rename(index=index, columns=columns, - **kwargs) + axes = self._validate_axis_style_args(mapper, 'mapper', + axes=[index, columns], + axis=axis, method_name='rename') + kwargs.update(axes) + return super(DataFrame, self).rename(**kwargs) @Appender(_shared_docs['fillna'] % _shared_doc_kwargs) def fillna(self, value=None, method=None, axis=None, inplace=False, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9d9d8334fcaf40..5fe5718d46bcb1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -29,7 +29,8 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame -from pandas.core.common import (_values_from_object, +from pandas.core.common import (_all_not_none, + _values_from_object, _maybe_box_datetimelike, SettingWithCopyError, SettingWithCopyWarning, AbstractMethodError) @@ -729,6 +730,51 @@ def swaplevel(self, i=-2, j=-1, axis=0): result._data.set_axis(axis, labels.swaplevel(i, j)) return result + def _validate_axis_style_args(self, arg, arg_name, axes, + axis, method_name): + out = {} + for i, value in enumerate(axes): + if value is not None: + out[self._AXIS_NAMES[i]] = value + + aliases = ', '.join(self._AXIS_NAMES.values()) + if axis is not None: + # Using "axis" style, along with a positional arg + # Both index and columns should be None then + axis = self._get_axis_name(axis) + if any(x is not None for x in axes): + msg = ( + "Can't specify both 'axis' and {aliases}. " + "Specify either\n" + "\t.{method_name}({arg_name}, axis=axis), or\n" + "\t.{method_name}(index=index, columns=columns)" + ).format(arg_name=arg_name, method_name=method_name, + aliases=aliases) + raise TypeError(msg) + out[axis] = arg + + elif _all_not_none(arg, *axes): + msg = ( + "Cannot specify all of '{arg_name}', {aliases}. " + "Specify either {arg_name} and 'axis', or {aliases}." + ).format(arg_name=arg_name, aliases=aliases) + raise TypeError(msg) + + elif _all_not_none(arg, axes[0]): + # This is the "ambiguous" case, so emit a warning + msg = ( + "Interpreting call to '.{method_name}(a, b)' as " + "'.{method_name}(index=a, columns=b)'. " # TODO + "Use keyword arguments to remove any ambiguity." + ).format(method_name=method_name) + warnings.warn(msg, stacklevel=3) + out[self._AXIS_ORDERS[0]] = arg + out[self._AXIS_ORDERS[1]] = axes[0] + elif axes[0] is None: + # This is for the default axis, like reindex([0, 1]) + out[self._AXIS_ORDERS[0]] = arg + return out + # ---------------------------------------------------------------------- # Rename @@ -893,17 +939,12 @@ def f(x): rename.__doc__ = _shared_docs['rename'] def rename_axis(self, mapper, axis=0, copy=True, inplace=False): - """ - Alter index and / or columns using input function or functions. - A scalar or list-like for ``mapper`` will alter the ``Index.name`` - or ``MultiIndex.names`` attribute. - A function or dict for ``mapper`` will alter the labels. - Function / dict values must be unique (1-to-1). Labels not contained in - a dict / Series will be left as-is. + """Alter the name of the index or columns. Parameters ---------- - mapper : scalar, list-like, dict-like or function, optional + mapper : scalar, list-like, optional + Value to set the axis name attribute. axis : int or string, default 0 copy : boolean, default True Also copy underlying data @@ -913,31 +954,35 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): ------- renamed : type of caller or None if inplace=True + Notes + ----- + Prior to version 0.21.0, ``rename_axis`` could also be used to change + the axis *labels* by passing a mapping or scalar. This behavior is + deprecated and will be removed in a future version. Use ``rename`` + instead. + See Also -------- - pandas.NDFrame.rename + pandas.Series.rename, pandas.DataFrame.rename pandas.Index.rename Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - >>> df.rename_axis("foo") # scalar, alters df.index.name + >>> df.rename_axis("foo") A B foo 0 1 4 1 2 5 2 3 6 - >>> df.rename_axis(lambda x: 2 * x) # function: alters labels - A B - 0 1 4 - 2 2 5 - 4 3 6 - >>> df.rename_axis({"A": "ehh", "C": "see"}, axis="columns") # mapping - ehh B + + >>> df.rename_axis("bar", axis="columns") + bar A B 0 1 4 1 2 5 2 3 6 + """ inplace = validate_bool_kwarg(inplace, 'inplace') non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not @@ -945,6 +990,9 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): if non_mapper: return self._set_axis_name(mapper, axis=axis, inplace=inplace) else: + msg = ("Using 'rename_axis' to alter labels is deprecated. " + "Use '.rename' instead") + warnings.warn(msg, FutureWarning, stacklevel=2) axis = self._get_axis_name(axis) d = {'copy': copy, 'inplace': inplace} d[axis] = mapper @@ -2981,6 +3029,11 @@ def reindex(self, *args, **kwargs): tolerance = kwargs.pop('tolerance', None) fill_value = kwargs.pop('fill_value', np.nan) + # Series.reindex doesn't use / need the axis kwarg + # We pop and ignore it here, to make writing Series/Frame generic code + # easier + kwargs.pop("axis", None) + if kwargs: raise TypeError('reindex() got an unexpected keyword ' 'argument "{0}"'.format(list(kwargs.keys())[0])) @@ -3085,11 +3138,14 @@ def _reindex_multi(self, axes, copy, fill_value): @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, limit=None, fill_value=np.nan): + msg = ("'.reindex_axis' is deprecated and will be removed in a future " + "version. Use '.reindex' instead.") self._consolidate_inplace() axis_name = self._get_axis_name(axis) axis_values = self._get_axis(axis_name) method = missing.clean_reindex_fill_method(method) + warnings.warn(msg, FutureWarning, stacklevel=3) new_index, indexer = axis_values.reindex(labels, method, level, limit=limit) return self._reindex_with_indexers({axis: [new_index, indexer]}, diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9518f17e5f4f17..ccaf90b4482a7c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -901,7 +901,7 @@ def reset_identity(values): result.index.get_indexer_for(ax.values)) result = result.take(indexer, axis=self.axis) else: - result = result.reindex_axis(ax, axis=self.axis) + result = result.reindex(ax, axis=self.axis) elif self.group_keys: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f1a3fe81a45404..654c3510b7cf79 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -368,7 +368,7 @@ def _setitem_with_indexer(self, indexer, value): # so the object is the same index = self.obj._get_axis(i) labels = index.insert(len(index), key) - self.obj._data = self.obj.reindex_axis(labels, i)._data + self.obj._data = self.obj.reindex(labels, axis=i)._data self.obj._maybe_update_cacher(clear=True) self.obj.is_copy = None @@ -1132,7 +1132,7 @@ def _getitem_iterable(self, key, axis=None): if labels.is_unique and Index(keyarr).is_unique: try: - return self.obj.reindex_axis(keyarr, axis=axis) + return self.obj.reindex(keyarr, axis=axis) except AttributeError: # Series diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 689f5521e1ccb5..879859309c4f93 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3283,8 +3283,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, for k, obj in aligned_args.items(): axis = getattr(obj, '_info_axis_number', 0) - kwargs[k] = obj.reindex_axis(b_items, axis=axis, - copy=align_copy) + kwargs[k] = obj.reindex(b_items, axis=axis, + copy=align_copy) kwargs['mgr'] = self applied = getattr(b, f)(**kwargs) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index b2f50eaf733d87..1f22cb49d01962 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1197,13 +1197,21 @@ def _wrap_result(self, result, axis): return self._construct_return_type(result, axes) @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) - def reindex(self, items=None, major_axis=None, minor_axis=None, **kwargs): + def reindex(self, labels=None, + items=None, major_axis=None, minor_axis=None, + axis=None, **kwargs): major_axis = (major_axis if major_axis is not None else kwargs.pop('major', None)) minor_axis = (minor_axis if minor_axis is not None else kwargs.pop('minor', None)) - return super(Panel, self).reindex(items=items, major_axis=major_axis, - minor_axis=minor_axis, **kwargs) + axes = self._validate_axis_style_args( + labels, 'labels', axes=[items, major_axis, minor_axis], + axis=axis, method_name='reindex') + if self.ndim >= 4: + # Hack for PanelND + axes = {} + kwargs.update(axes) + return super(Panel, self).reindex(**kwargs) @Appender(_shared_docs['rename'] % _shared_doc_kwargs) def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs): diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index 16e7d0dfcc3362..e6914fb268359b 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -57,4 +57,18 @@ def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, dtype=dtype) +def panel4d_reindex(self, labs=None, labels=None, items=None, major_axis=None, + minor_axis=None, axis=None, **kwargs): + # Hack for reindex_axis deprecation + # Ha, we used labels for two different things + # I think this will work still. + axes = self._validate_axis_style_args( + labs, 'labels', + axes=[labels, items, major_axis, minor_axis], + axis=axis, method_name='reindex') + kwargs.update(axes) + return super(Panel, self).reindex(**kwargs) + + Panel4D.__init__ = panel4d_init +Panel4D.reindex = panel4d_reindex diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 38c28af4d6ecb1..7ee021e5c62466 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -101,14 +101,14 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', try: m = MultiIndex.from_arrays(cartesian_product(table.index.levels), names=table.index.names) - table = table.reindex_axis(m, axis=0) + table = table.reindex(m, axis=0) except AttributeError: pass # it's a single level try: m = MultiIndex.from_arrays(cartesian_product(table.columns.levels), names=table.columns.names) - table = table.reindex_axis(m, axis=1) + table = table.reindex(m, axis=1) except AttributeError: pass # it's a single level or a series diff --git a/pandas/core/series.py b/pandas/core/series.py index 93afdc5151b35e..8499f8b55d2d0a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2615,6 +2615,10 @@ def reindex_axis(self, labels, axis=0, **kwargs): """ for compatibility with higher dims """ if axis != 0: raise ValueError("cannot reindex series on non-zero axis!") + msg = ("'.reindex_axis' is deprecated and will be removed in a future " + "version. Use '.reindex' instead.") + warnings.warn(msg, FutureWarning, stacklevel=2) + return self.reindex(index=labels, **kwargs) def memory_usage(self, index=True, deep=False): diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index d2b9583d8efe5c..748a52f4848931 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -134,5 +134,5 @@ def _coo_to_sparse_series(A, dense_index=False): i = range(A.shape[0]) j = range(A.shape[1]) ind = MultiIndex.from_product([i, j]) - s = s.reindex_axis(ind) + s = s.reindex(ind) return s diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ca1b4d031d3ced..39d088e00b2196 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1040,7 +1040,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None, dc = data_columns if k == selector else None # compute the val - val = value.reindex_axis(v, axis=axis) + val = value.reindex(v, axis=axis) self.append(k, val, data_columns=dc, **kwargs) @@ -3493,7 +3493,7 @@ def get_blk_items(mgr, blocks): data_columns = self.validate_data_columns( data_columns, min_itemsize) if len(data_columns): - mgr = block_obj.reindex_axis( + mgr = block_obj.reindex( Index(axis_labels).difference(Index(data_columns)), axis=axis )._data @@ -3501,7 +3501,7 @@ def get_blk_items(mgr, blocks): blocks = list(mgr.blocks) blk_items = get_blk_items(mgr, blocks) for c in data_columns: - mgr = block_obj.reindex_axis([c], axis=axis)._data + mgr = block_obj.reindex([c], axis=axis)._data blocks.extend(mgr.blocks) blk_items.extend(get_blk_items(mgr, mgr.blocks)) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c4cd562df7eb30..0d77b5f41a08ee 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -697,7 +697,7 @@ def _parse_errorbars(self, label, err): from pandas import DataFrame, Series def match_labels(data, e): - e = e.reindex_axis(data.index) + e = e.reindex(data.index) return e # key-matched DataFrame diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index feb32324ff1b10..84f7dd108f2cb5 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -436,6 +436,25 @@ def test_rename_axis_inplace(self): assert no_return is None assert_frame_equal(result, expected) + def test_rename_axis_warns(self): + # https://github.com/pandas-dev/pandas/issues/17833 + df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) + with tm.assert_produces_warning(FutureWarning) as w: + df.rename_axis(id, axis=0) + assert 'rename' in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning) as w: + df.rename_axis({0: 10, 1: 20}, axis=0) + assert 'rename' in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning) as w: + df.rename_axis(id, axis=1) + assert 'rename' in str(w[0].message) + + with tm.assert_produces_warning(FutureWarning) as w: + df['A'].rename_axis(id) + assert 'rename' in str(w[0].message) + def test_rename_multiindex(self): tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')] diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 38ed8ee20bc501..fee0c8b213bd99 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -418,11 +418,13 @@ def test_reindex_fill_value(self): assert_frame_equal(result, expected) # reindex_axis - result = df.reindex_axis(lrange(15), fill_value=0., axis=0) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex_axis(lrange(15), fill_value=0., axis=0) expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) - result = df.reindex_axis(lrange(5), fill_value=0., axis=1) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex_axis(lrange(5), fill_value=0., axis=1) expected = df.reindex(columns=lrange(5)).fillna(0) assert_frame_equal(result, expected) @@ -1030,12 +1032,16 @@ def test_reindex_corner(self): def test_reindex_axis(self): cols = ['A', 'B', 'E'] - reindexed1 = self.intframe.reindex_axis(cols, axis=1) + with tm.assert_produces_warning(FutureWarning) as m: + reindexed1 = self.intframe.reindex_axis(cols, axis=1) + assert 'reindex' in str(m[0].message) reindexed2 = self.intframe.reindex(columns=cols) assert_frame_equal(reindexed1, reindexed2) rows = self.intframe.index[0:5] - reindexed1 = self.intframe.reindex_axis(rows, axis=0) + with tm.assert_produces_warning(FutureWarning) as m: + reindexed1 = self.intframe.reindex_axis(rows, axis=0) + assert 'reindex' in str(m[0].message) reindexed2 = self.intframe.reindex(index=rows) assert_frame_equal(reindexed1, reindexed2) @@ -1043,7 +1049,9 @@ def test_reindex_axis(self): # no-op case cols = self.frame.columns.copy() - newFrame = self.frame.reindex_axis(cols, axis=1) + with tm.assert_produces_warning(FutureWarning) as m: + newFrame = self.frame.reindex_axis(cols, axis=1) + assert 'reindex' in str(m[0].message) assert_frame_equal(newFrame, self.frame) def test_reindex_with_nans(self): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 4126bb1de84d7a..90afd2e2160452 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -116,7 +116,9 @@ def test_pivot_table_dropna_categoricals(self): result_false = df.pivot_table(index='B', columns='A', values='C', dropna=False) - expected_columns = Series(['a', 'b', 'c', 'd'], name='A') + expected_columns = ( + Series(['a', 'b', 'c', 'd'], name='A').astype('category') + ) expected_false = DataFrame([[0.0, 3.0, 6.0, np.NaN], [1.0, 4.0, 7.0, np.NaN], [2.0, 5.0, 8.0, np.NaN]], diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 632d3b4ad2e7ac..fc9f89934b4ea0 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -311,7 +311,7 @@ def test_include_na(self): 'a': {0: 1, 1: 0, 2: 0}, 'b': {0: 0, 1: 1, 2: 0}}, dtype=np.uint8) - exp_na = exp_na.reindex_axis(['a', 'b', nan], 1) + exp_na = exp_na.reindex(['a', 'b', nan], axis=1) # hack (NaN handling in assert_index_equal) exp_na.columns = res_na.columns assert_frame_equal(res_na, exp_na) @@ -542,8 +542,8 @@ def test_basic_drop_first_NA(self): 2: 0}, nan: {0: 0, 1: 0, - 2: 1}}, dtype=np.uint8).reindex_axis( - ['b', nan], 1) + 2: 1}}, dtype=np.uint8).reindex( + ['b', nan], axis=1) assert_frame_equal(res_na, exp_na) res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse, diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index 7c7399317809f4..c218eee921bb1a 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -1414,6 +1414,12 @@ def test_deprecated_numpy_func_call(self): check_stacklevel=False): getattr(getattr(self, series), func)() + def test_deprecated_reindex_axis(self): + # https://github.com/pandas-dev/pandas/issues/17833 + with tm.assert_produces_warning(FutureWarning) as m: + self.bseries.reindex_axis([0, 1, 2]) + assert 'reindex' in str(m[0].message) + @pytest.mark.parametrize( 'datetime_type', (np.datetime64, diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 94577db15f01a2..785be71e236d7b 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1805,7 +1805,7 @@ def test_reindex_level_partial_selection(self): expected = self.frame.iloc[[0, 1, 2, 7, 8, 9]] tm.assert_frame_equal(result, expected) - result = self.frame.T.reindex_axis(['foo', 'qux'], axis=1, level=0) + result = self.frame.T.reindex(['foo', 'qux'], axis=1, level=0) tm.assert_frame_equal(result, expected.T) result = self.frame.loc[['foo', 'qux']] diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 2769ec0d2dbed3..da30c8c403d410 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1444,6 +1444,22 @@ def test_reindex(self): assert_panel_equal(result, self.panel) assert result is self.panel + def test_reindex_axis_style(self): + with catch_warnings(record=True): + panel = Panel(np.random.rand(5, 5, 5)) + expected0 = Panel(panel.values).iloc[[0, 1]] + expected1 = Panel(panel.values).iloc[:, [0, 1]] + expected2 = Panel(panel.values).iloc[:, :, [0, 1]] + + result = panel.reindex([0, 1], axis=0) + assert_panel_equal(result, expected0) + + result = panel.reindex([0, 1], axis=1) + assert_panel_equal(result, expected1) + + result = panel.reindex([0, 1], axis=2) + assert_panel_equal(result, expected2) + def test_reindex_multi(self): with catch_warnings(record=True): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index cd15203eccd826..4e26689badb3cd 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1422,7 +1422,7 @@ def test_resample_ohlc_dataframe(self): Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, Timestamp('2011-01-06 12:54:09', tz=None): 100000000}}) - ).reindex_axis(['VOLUME', 'PRICE'], axis=1) + ).reindex(['VOLUME', 'PRICE'], axis=1) res = df.resample('H').ohlc() exp = pd.concat([df['VOLUME'].resample('H').ohlc(), df['PRICE'].resample('H').ohlc()], @@ -1652,7 +1652,7 @@ def test_resample_categorical_data_with_timedeltaindex(self): expected = DataFrame({'Group_obj': ['A', 'A'], 'Group': ['A', 'A']}, index=pd.to_timedelta([0, 10], unit='s')) - expected = expected.reindex_axis(['Group_obj', 'Group'], 1) + expected = expected.reindex(['Group_obj', 'Group'], axis=1) tm.assert_frame_equal(result, expected) def test_resample_daily_anchored(self): From 9772c2288331d5194e7438288247c5f436f0b4d3 Mon Sep 17 00:00:00 2001 From: Licht Takeuchi Date: Thu, 12 Oct 2017 00:19:56 +0900 Subject: [PATCH 54/76] BUG: Fix default encoding for CSVFormatter.save (#17821) * BUG: Fix default encoding for CSVFormatter.save * TST: Add to_csv defualt encoding test * DOC: Add comments on to_csv defualt encoding test * DOC: added release note * DOC: Add the fixing to_csv default encoding to whatsnew note * Revert "DOC: Add the fixing to_csv default encoding to whatsnew note" This reverts commit 039f2cf670c82b2a47e33c0b0387f3df359b4fd4. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/formats/format.py | 18 +++++++++++++----- pandas/tests/io/formats/test_to_csv.py | 17 +++++++++++++++++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2bee7cf5cdddce..d7a08b1985076c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -946,6 +946,7 @@ I/O - Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). - Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) +- Bug in :meth:`DataFrame.to_csv` defaulting to 'ascii' encoding in Python 3, instead of 'utf-8' (:issue:`17097`) - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_stata` where the index was not set (:issue:`16342`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index e8ea0714b1dda1..4608c3fe0ceb80 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1612,12 +1612,20 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', def save(self): # create the writer & save + if self.encoding is None: + if compat.PY2: + encoding = 'ascii' + else: + encoding = 'utf-8' + else: + encoding = self.encoding + if hasattr(self.path_or_buf, 'write'): f = self.path_or_buf close = False else: f, handles = _get_handle(self.path_or_buf, self.mode, - encoding=self.encoding, + encoding=encoding, compression=self.compression) close = True @@ -1627,11 +1635,11 @@ def save(self): doublequote=self.doublequote, escapechar=self.escapechar, quotechar=self.quotechar) - if self.encoding is not None: - writer_kwargs['encoding'] = self.encoding - self.writer = UnicodeWriter(f, **writer_kwargs) - else: + if encoding == 'ascii': self.writer = csv.writer(f, **writer_kwargs) + else: + writer_kwargs['encoding'] = encoding + self.writer = UnicodeWriter(f, **writer_kwargs) self._save() diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 1073fbcef5aecb..b82d9895ddcf59 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from pandas import DataFrame import numpy as np import pandas as pd @@ -6,6 +8,21 @@ class TestToCSV(object): + def test_to_csv_defualt_encoding(self): + # GH17097 + df = DataFrame({'col': [u"AAAAA", u"ÄÄÄÄÄ", u"ßßßßß", u"聞聞聞聞聞"]}) + + with tm.ensure_clean('test.csv') as path: + # the default to_csv encoding in Python 2 is ascii, and that in + # Python 3 is uft-8. + if pd.compat.PY2: + # the encoding argument parameter should be utf-8 + with tm.assert_raises_regex(UnicodeEncodeError, 'ascii'): + df.to_csv(path) + else: + df.to_csv(path) + tm.assert_frame_equal(pd.read_csv(path, index_col=0), df) + def test_to_csv_quotechar(self): df = DataFrame({'col': [1, 2]}) expected = """\ From 7e159aeec1d5d2e79311ca688c0e594f0fd5f709 Mon Sep 17 00:00:00 2001 From: Licht Takeuchi Date: Thu, 12 Oct 2017 00:21:26 +0900 Subject: [PATCH 55/76] TST: Add the default separator test for PythonParser (#17822) * TST: Add the default separator test for PythonParser * DOC: Add comment of Python CSV default separator test * DOC: Add the document about how PythonParser sniffing the separator --- doc/source/io.rst | 3 ++- pandas/io/parsers.py | 3 ++- pandas/tests/io/parser/python_parser_only.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 08d00138b7cd8e..d05b5605cd4d32 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -84,7 +84,8 @@ filepath_or_buffer : various sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table` Delimiter to use. If sep is ``None``, the C engine cannot automatically detect the separator, but the Python parsing engine can, meaning the latter will be - used automatically. In addition, separators longer than 1 character and + used and automatically detect the separator by Python's builtin sniffer tool, + :class:`python:csv.Sniffer`. In addition, separators longer than 1 character and different from ``'\s+'`` will be interpreted as regular expressions and will also force the use of the Python parsing engine. Note that regex delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4b6c358ea7dcd5..3c94871003dd07 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -316,7 +316,8 @@ _sep_doc = r"""sep : str, default {default} Delimiter to use. If sep is None, the C engine cannot automatically detect the separator, but the Python parsing engine can, meaning the latter will - be used automatically. In addition, separators longer than 1 character and + be used and automatically detect the separator by Python's builtin sniffer + tool, ``csv.Sniffer``. In addition, separators longer than 1 character and different from ``'\s+'`` will be interpreted as regular expressions and will also force the use of the Python parsing engine. Note that regex delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'`` diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index 267b589ee91f47..c0616ebbab4a5d 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -19,6 +19,16 @@ class PythonParserTests(object): + def test_default_separator(self): + # GH17333 + # csv.Sniffer in Python treats 'o' as separator. + text = 'aob\n1o2\n3o4' + expected = DataFrame({'a': [1, 3], 'b': [2, 4]}) + + result = self.read_csv(StringIO(text), sep=None) + + tm.assert_frame_equal(result, expected) + def test_invalid_skipfooter(self): text = "a\n1\n2" From eac4d3f70b9909252e7fa274f3de51f4bec7205b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 12 Oct 2017 23:02:13 +0200 Subject: [PATCH 56/76] REF/INT: concat blocks of same type with preserving block type (#17728) --- pandas/core/dtypes/concat.py | 10 +- pandas/core/internals.py | 108 +++++++++++++++++- pandas/core/reshape/concat.py | 16 +-- pandas/tests/internals/test_external_block.py | 41 ++++++- 4 files changed, 152 insertions(+), 23 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index f6f956832eebe8..93993fd0a0cab2 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -63,11 +63,12 @@ def get_dtype_kinds(l): return typs -def _get_series_result_type(result): +def _get_series_result_type(result, objs=None): """ return appropriate class of Series concat input is either dict or array-like """ + # concat Series with axis 1 if isinstance(result, dict): # concat Series with axis 1 if all(is_sparse(c) for c in compat.itervalues(result)): @@ -77,13 +78,12 @@ def _get_series_result_type(result): from pandas.core.frame import DataFrame return DataFrame - elif is_sparse(result): - # concat Series with axis 1 + # otherwise it is a SingleBlockManager (axis = 0) + if result._block.is_sparse: from pandas.core.sparse.api import SparseSeries return SparseSeries else: - from pandas.core.series import Series - return Series + return objs[0]._constructor def _get_frame_result_type(result, objs): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 879859309c4f93..a1e9b24afe5fc8 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -102,6 +102,7 @@ class Block(PandasObject): _validate_ndim = True _ftype = 'dense' _holder = None + _concatenator = staticmethod(np.concatenate) def __init__(self, values, placement, ndim=None, fastpath=False): if ndim is None: @@ -314,6 +315,15 @@ def ftype(self): def merge(self, other): return _merge_blocks([self, other]) + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + """ + values = self._concatenator([blk.values for blk in to_concat], + axis=self.ndim - 1) + return self.make_block_same_class( + values, placement=placement or slice(0, len(values), 1)) + def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, limit=None, mask_info=None): """ @@ -2309,6 +2319,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): _verify_integrity = True _can_hold_na = True _holder = Categorical + _concatenator = staticmethod(_concat._concat_categorical) def __init__(self, values, placement, fastpath=False, **kwargs): @@ -2432,6 +2443,17 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): # we are expected to return a 2-d ndarray return values.reshape(1, len(values)) + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + """ + values = self._concatenator([blk.values for blk in to_concat], + axis=self.ndim - 1) + # not using self.make_block_same_class as values can be object dtype + return make_block( + values, placement=placement or slice(0, len(values), 1), + ndim=self.ndim) + class DatetimeBlock(DatetimeLikeBlockMixin, Block): __slots__ = () @@ -2571,6 +2593,7 @@ class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): """ implement a datetime64 block with a tz attribute """ __slots__ = () _holder = DatetimeIndex + _concatenator = staticmethod(_concat._concat_datetime) is_datetimetz = True def __init__(self, values, placement, ndim=2, **kwargs): @@ -2711,6 +2734,16 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block_same_class(new_values, placement=self.mgr_locs)] + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + """ + values = self._concatenator([blk.values for blk in to_concat], + axis=self.ndim - 1) + # not using self.make_block_same_class as values can be non-tz dtype + return make_block( + values, placement=placement or slice(0, len(values), 1)) + class SparseBlock(NonConsolidatableMixIn, Block): """ implement as a list of sparse arrays of the same dtype """ @@ -2721,6 +2754,7 @@ class SparseBlock(NonConsolidatableMixIn, Block): _can_hold_na = True _ftype = 'sparse' _holder = SparseArray + _concatenator = staticmethod(_concat._concat_sparse) @property def shape(self): @@ -4517,6 +4551,45 @@ def fast_xs(self, loc): """ return self._block.values[loc] + def concat(self, to_concat, new_axis): + """ + Concatenate a list of SingleBlockManagers into a single + SingleBlockManager. + + Used for pd.concat of Series objects with axis=0. + + Parameters + ---------- + to_concat : list of SingleBlockManagers + new_axis : Index of the result + + Returns + ------- + SingleBlockManager + + """ + non_empties = [x for x in to_concat if len(x) > 0] + + # check if all series are of the same block type: + if len(non_empties) > 0: + blocks = [obj.blocks[0] for obj in non_empties] + + if all([type(b) is type(blocks[0]) for b in blocks[1:]]): # noqa + new_block = blocks[0].concat_same_type(blocks) + else: + values = [x.values for x in blocks] + values = _concat._concat_compat(values) + new_block = make_block( + values, placement=slice(0, len(values), 1)) + else: + values = [x._block.values for x in to_concat] + values = _concat._concat_compat(values) + new_block = make_block( + values, placement=slice(0, len(values), 1)) + + mgr = SingleBlockManager(new_block, new_axis) + return mgr + def construction_error(tot_items, block_shape, axes, e=None): """ raise a helpful message about our construction """ @@ -5105,13 +5178,42 @@ def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): [get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers], concat_axis) - blocks = [make_block( - concatenate_join_units(join_units, concat_axis, copy=copy), - placement=placement) for placement, join_units in concat_plan] + blocks = [] + + for placement, join_units in concat_plan: + + if is_uniform_join_units(join_units): + b = join_units[0].block.concat_same_type( + [ju.block for ju in join_units], placement=placement) + else: + b = make_block( + concatenate_join_units(join_units, concat_axis, copy=copy), + placement=placement) + blocks.append(b) return BlockManager(blocks, axes) +def is_uniform_join_units(join_units): + """ + Check if the join units consist of blocks of uniform type that can + be concatenated using Block.concat_same_type instead of the generic + concatenate_join_units (which uses `_concat._concat_compat`). + + """ + return ( + # all blocks need to have the same type + all([type(ju.block) is type(join_units[0].block) for ju in join_units]) and # noqa + # no blocks that would get missing values (can lead to type upcasts) + all([not ju.is_na for ju in join_units]) and + # no blocks with indexers (as then the dimensions do not fit) + all([not ju.indexers for ju in join_units]) and + # disregard Panels + all([ju.block.ndim <= 2 for ju in join_units]) and + # only use this path when there is something to concatenate + len(join_units) > 1) + + def get_empty_dtype_and_na(join_units): """ Return dtype and N/A values to use when concatenating specified units. diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4040c651366174..c54763f8ebde13 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -362,20 +362,12 @@ def get_result(self): # stack blocks if self.axis == 0: - # concat Series with length to keep dtype as much - non_empties = [x for x in self.objs if len(x) > 0] - if len(non_empties) > 0: - values = [x._values for x in non_empties] - else: - values = [x._values for x in self.objs] - new_data = _concat._concat_compat(values) - name = com._consensus_name_attr(self.objs) - cons = _concat._get_series_result_type(new_data) - return (cons(new_data, index=self.new_axes[0], - name=name, dtype=new_data.dtype) - .__finalize__(self, method='concat')) + mgr = self.objs[0]._data.concat([x._data for x in self.objs], + self.new_axes) + cons = _concat._get_series_result_type(mgr, self.objs) + return cons(mgr, name=name).__finalize__(self, method='concat') # combine as columns in a frame else: diff --git a/pandas/tests/internals/test_external_block.py b/pandas/tests/internals/test_external_block.py index cccde76c3e1d92..d98b293ed8daa3 100644 --- a/pandas/tests/internals/test_external_block.py +++ b/pandas/tests/internals/test_external_block.py @@ -4,14 +4,26 @@ import numpy as np import pandas as pd -from pandas.core.internals import Block, BlockManager, SingleBlockManager +from pandas.core.internals import ( + Block, BlockManager, SingleBlockManager, NonConsolidatableMixIn) -class CustomBlock(Block): +class CustomBlock(NonConsolidatableMixIn, Block): + + _holder = np.ndarray def formatting_values(self): return np.array(["Val: {}".format(i) for i in self.values]) + def concat_same_type(self, to_concat, placement=None): + """ + Always concatenate disregarding self.ndim as the values are + always 1D in this custom Block + """ + values = np.concatenate([blk.values for blk in to_concat]) + return self.make_block_same_class( + values, placement=placement or slice(0, len(values), 1)) + def test_custom_repr(): values = np.arange(3, dtype='int64') @@ -23,7 +35,30 @@ def test_custom_repr(): assert repr(s) == '0 Val: 0\n1 Val: 1\n2 Val: 2\ndtype: int64' # dataframe - block = CustomBlock(values.reshape(1, -1), placement=slice(0, 1)) + block = CustomBlock(values, placement=slice(0, 1)) blk_mgr = BlockManager([block], [['col'], range(3)]) df = pd.DataFrame(blk_mgr) assert repr(df) == ' col\n0 Val: 0\n1 Val: 1\n2 Val: 2' + + +def test_concat_series(): + # GH17728 + values = np.arange(3, dtype='int64') + block = CustomBlock(values, placement=slice(0, 3)) + s = pd.Series(block, pd.RangeIndex(3), fastpath=True) + + res = pd.concat([s, s]) + assert isinstance(res._data.blocks[0], CustomBlock) + + +def test_concat_dataframe(): + # GH17728 + df = pd.DataFrame({'a': [1, 2, 3]}) + blocks = df._data.blocks + values = np.arange(3, dtype='int64') + custom_block = CustomBlock(values, placement=slice(1, 2)) + blocks = blocks + (custom_block, ) + block_manager = BlockManager(blocks, [pd.Index(['a', 'b']), df.index]) + df = pd.DataFrame(block_manager) + res = pd.concat([df, df]) + assert isinstance(res._data.blocks[1], CustomBlock) From 92db5c9c461ca4c649cfddcc6b0fb810cec18595 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 12 Oct 2017 14:04:34 -0700 Subject: [PATCH 57/76] DEPR: Deprecate the convert parameter completely (#17831) Previously, we weren't issuing a warning if the user happened to pass in the original default of "True", which would cause downstream code to break. Closes gh-17828. --- pandas/core/generic.py | 10 +++++---- pandas/core/sparse/series.py | 11 +++++----- .../tests/frame/test_axis_select_reindex.py | 4 ++++ pandas/tests/sparse/test_series.py | 21 +++++++++---------- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5fe5718d46bcb1..acc1bf1241bffe 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2220,6 +2220,7 @@ def _take(self, indices, axis=0, convert=True, is_copy=True): selecting rows, "1" means that we are selecting columns, etc. convert : bool, default True .. deprecated:: 0.21.0 + In the future, negative indices will always be converted. Whether to convert negative indices into positive ones. For example, ``-1`` would map to the ``len(axis) - 1``. @@ -2282,14 +2283,15 @@ class max_speed """ @Appender(_shared_docs['take']) - def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): - nv.validate_take(tuple(), kwargs) - - if not convert: + def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs): + if convert is not None: msg = ("The 'convert' parameter is deprecated " "and will be removed in a future version.") warnings.warn(msg, FutureWarning, stacklevel=2) + else: + convert = True + convert = nv.validate_take(tuple(), kwargs) return self._take(indices, axis=axis, convert=convert, is_copy=is_copy) def xs(self, key, axis=0, level=None, drop_level=True): diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 5c76cca08f6094..17d0737ba7c634 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -387,7 +387,7 @@ def _ixs(self, i, axis=0): """ label = self.index[i] if isinstance(label, Index): - return self.take(i, axis=axis, convert=True) + return self.take(i, axis=axis) else: return self._get_val_at(i) @@ -629,14 +629,15 @@ def sparse_reindex(self, new_index): fill_value=self.fill_value).__finalize__(self) @Appender(generic._shared_docs['take']) - def take(self, indices, axis=0, convert=True, *args, **kwargs): - convert = nv.validate_take_with_convert(convert, args, kwargs) - - if not convert: + def take(self, indices, axis=0, convert=None, *args, **kwargs): + if convert is not None: msg = ("The 'convert' parameter is deprecated " "and will be removed in a future version.") warnings.warn(msg, FutureWarning, stacklevel=2) + else: + convert = True + nv.validate_take_with_convert(convert, args, kwargs) new_values = SparseArray.take(self.values, indices) new_index = self.index.take(indices) return self._constructor(new_values, diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index fee0c8b213bd99..bf96818edf04e0 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -946,6 +946,10 @@ def test_take(self): expected = df.reindex(df.index.take(order)) assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = df.take(order, convert=True, axis=0) + assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): result = df.take(order, convert=False, axis=0) assert_frame_equal(result, expected) diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py index c218eee921bb1a..df1badb860d6d0 100644 --- a/pandas/tests/sparse/test_series.py +++ b/pandas/tests/sparse/test_series.py @@ -528,6 +528,9 @@ def _compare(idx): exp = pd.Series(np.repeat(nan, 5)) tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp) + with tm.assert_produces_warning(FutureWarning): + sp.take([1, 5], convert=True) + with tm.assert_produces_warning(FutureWarning): sp.take([1, 5], convert=False) @@ -535,21 +538,17 @@ def test_numpy_take(self): sp = SparseSeries([1.0, 2.0, 3.0]) indices = [1, 2] - # gh-17352: older versions of numpy don't properly - # pass in arguments to downstream .take() implementations. - warning = FutureWarning if _np_version_under1p12 else None - - with tm.assert_produces_warning(warning, check_stacklevel=False): + if not _np_version_under1p12: tm.assert_series_equal(np.take(sp, indices, axis=0).to_dense(), np.take(sp.to_dense(), indices, axis=0)) - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.take, - sp, indices, out=np.empty(sp.shape)) + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.take, + sp, indices, out=np.empty(sp.shape)) - msg = "the 'mode' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.take, - sp, indices, mode='clip') + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.take, + sp, indices, out=None, mode='clip') def test_setitem(self): self.bseries[5] = 7. From 839a746f200b6b210029a7d9f219e06e741bc85a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 12 Oct 2017 20:32:07 -0500 Subject: [PATCH 58/76] Whatsnew cleanup (#17858) * start * borrow from numpy (cherry picked from commit 496a6763347346cb84e94ad3725123aea95b03dd) * More * pep8 * Fix README extension for MANIFEST --- MANIFEST.in | 2 +- doc/source/api.rst | 8 +- doc/source/release.rst | 258 ++++++++++++++++++++++++++------ doc/source/whatsnew/v0.20.0.txt | 5 + doc/source/whatsnew/v0.21.0.txt | 131 ++++++++-------- pandas/core/dtypes/dtypes.py | 3 +- scripts/announce.py | 124 +++++++++++++++ 7 files changed, 420 insertions(+), 111 deletions(-) create mode 100644 scripts/announce.py diff --git a/MANIFEST.in b/MANIFEST.in index 1a6b831c1b9752..9773019c6e6e08 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include MANIFEST.in include LICENSE include RELEASE.md -include README.rst +include README.md include setup.py include pyproject.toml diff --git a/doc/source/api.rst b/doc/source/api.rst index 646a28686bb063..1e63a938ff3890 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -645,8 +645,12 @@ strings and apply several methods to it. These can be accessed like Categorical ~~~~~~~~~~~ -.. autoclass:: api.types.CategoricalDtype - :members: categories, ordered +The dtype of a ``Categorical`` can be described by a :class:`pandas.api.types.CategoricalDtype`. + +.. autosummary:: + :toctree: generated/ + + api.types.CategoricalDtype If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the diff --git a/doc/source/release.rst b/doc/source/release.rst index bf272e243e0dd5..eff3eea63e9f8b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -37,55 +37,229 @@ analysis / manipulation tool available in any language. * Binary installers on PyPI: http://pypi.python.org/pypi/pandas * Documentation: http://pandas.pydata.org -pandas 0.20.2 -------------- +pandas 0.21.0RC1 +---------------- -**Release date:** June 4, 2017 +**Release date:** October 13, 2017 -This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, -bug fixes and performance improvements. -We recommend that all users upgrade to this version. +This is a major release from 0.20.3 and includes a number of API changes, +deprecations, new features, enhancements, and performance improvements along +with a large number of bug fixes. We recommend that all users upgrade to this +version. + +Highlights include: + +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying + categoricals independent of the data, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, see :ref:`here ` +- Compatibility fixes for pypy, see :ref:`here `. -See the :ref:`v0.20.2 Whatsnew ` overview for an extensive list -of all enhancements and bugs that have been fixed in 0.20.2. +See the :ref:`v0.21.0 Whatsnew ` overview for an extensive list +of all enhancements and bugs that have been fixed in 0.21.0 Thanks ~~~~~~ -- Aaron Barber -- Andrew 亮 -- Becky Sweger -- Christian Prinoth -- Christian Stade-Schuldt -- DSM -- Erik Fredriksen -- Hugues Valois -- Jeff Reback -- Jeff Tratner -- JimStearns206 -- John W. O'Brien -- Joris Van den Bossche -- JosephWagner -- Keith Webber -- Mehmet Ali "Mali" Akmanalp -- Pankaj Pandey -- Patrick Luo -- Patrick O'Melveny -- Pietro Battiston -- RobinFiveWords -- Ryan Hendrickson -- SimonBaron -- Tom Augspurger -- WBare -- bpraggastis -- chernrick -- chris-b1 -- economy -- gfyoung -- jaredsnyder -- keitakurita -- linebp -- lloydkirk +A total of 196 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* 3553x + +* Aaron Barber +* Adam Gleave + +* Adam Smith + +* Adrian Liaw + +* Alan Velasco + +* Alan Yee + +* Alex B + +* Alex Lubbock + +* Alex Marchenko + +* Alex Rychyk + +* Amol K + +* Andreas Winkler +* Andrew + +* Andrew 亮 +* André Jonasson + +* Becky Sweger +* Berkay + +* Bob Haffner + +* Bran Yang +* Brock Mendel + +* Carol Willing + +* Carter Green + +* Chankey Pathak + +* Chris +* Chris Billington +* Chris Filo Gorgolewski + +* Chris Kerr +* Chris M + +* Chris Mazzullo + +* Christian Prinoth +* Christian Stade-Schuldt +* Christoph Moehl + +* DSM +* Daniel Chen + +* Daniel Grady +* Daniel Himmelstein +* Dave Willmer +* David Cook +* David Gwynne +* David Read + +* Dillon Niederhut + +* Douglas Rudd +* Eric Stein + +* Eric Wieser + +* Erik Fredriksen +* Florian Wilhelm + +* Floris Kint + +* Forbidden Donut +* Gabe F + +* Giftlin + +* Giftlin Rajaiah + +* Giulio Pepe + +* Guilherme Beltramini +* Guillem Borrell + +* Hanmin Qin + +* Hendrik Makait + +* Hugues Valois +* Hussain Tamboli + +* Iva Miholic + +* Jan Novotný + +* Jean Helie + +* Jean-Baptiste Schiratti + +* Jean-Mathieu Deschenes +* Jeff Knupp + +* Jeff Reback +* Jeff Tratner +* JennaVergeynst +* JimStearns206 +* Joel Nothman +* John W. O'Brien +* Jon Crall + +* Joris Van den Bossche +* JosephWagner +* Juarez Bochi +* Julian Kuhlmann + +* Karel De Brabandere +* Kassandra Keeton + +* Keiron Pizzey + +* Keith Webber +* Kernc +* Kevin Sheppard +* Kirk Hansen + +* Licht Takeuchi + +* Lucas Kushner + +* Mahdi Ben Jelloul + +* Makarov Andrey + +* Malgorzata Turzanska + +* Marc Garcia + +* Margaret Sy + +* MarsGuy + +* Matt Bark + +* Matthew Roeschke +* Matti Picus +* Mehmet Ali "Mali" Akmanalp +* Michael Gasvoda + +* Michael Penkov + +* Milo + +* Morgan Stuart + +* Morgan243 + +* Nathan Ford + +* Nick Eubank +* Nick Garvey + +* Oleg Shteynbuk + +* P-Tillmann + +* Pankaj Pandey +* Patrick Luo +* Patrick O'Melveny +* Paula + +* Peter Quackenbush +* Peter Yanovich + +* Phillip Cloud +* Pierre Haessig +* Pietro Battiston +* Pradyumna Reddy Chinthala +* Prasanjit Prakash +* RobinFiveWords +* Ryan Hendrickson +* Sam Foo +* Sangwoong Yoon + +* Simon Gibbons + +* SimonBaron +* Steven Cutting + +* Sudeep + +* Sylvia + +* T N + +* Telt +* Thomas A Caswell +* Tim Swast + +* Tom Augspurger +* Tong SHEN +* Tuan + +* Utkarsh Upadhyay + +* Vincent La + +* Vivek + +* WANG Aiyong +* WBare +* Wes McKinney +* XF + +* Yi Liu + +* Yosuke Nakabayashi + +* abarber4gh + +* aernlund + +* agustín méndez + +* andymaheshw + +* ante328 + +* aviolov + +* bpraggastis +* cbertinato + +* cclauss + +* chernrick +* chris-b1 +* dkamm + +* dwkenefick +* economy +* faic + +* fding253 + +* gfyoung +* guygoldberg + +* hhuuggoo + +* huashuai + +* ian +* iulia + +* jaredsnyder +* jbrockmendel + +* jdeschenes +* jebob + +* jschendel + +* keitakurita +* kernc + +* kiwirob + +* kjford +* linebp +* lloydkirk +* louispotok + +* majiang + +* manikbhandari + +* mattip +* maxwasserman + +* mjlove12 + +* nmartensen + +* pandas-docs-bot + +* parchd-1 + +* philipphanemann + +* rdk1024 + +* reidy-p + +* ri938 +* ruiann + +* rvernica + +* s-weigand + +* skwbc + +* step4me + +* topper-123 + +* tsdlovell +* ysau + +* zzgao + pandas 0.20.0 / 0.20.1 ---------------------- diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fe24f8f4991727..1a7b75266bfdfe 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -43,6 +43,11 @@ Check the :ref:`API Changes ` and :ref:`deprecations New features ~~~~~~~~~~~~ +.. ipython:: python + :suppress: + + import pandas.util.testing as tm + .. _whatsnew_0200.enhancements.agg: ``agg`` API for DataFrame/Series diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d7a08b1985076c..c90b9939ce16d3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1,18 +1,19 @@ .. _whatsnew_0210: -v0.21.0 (???) -------------- +v0.21.0 RC1 (October 13, 2017) +------------------------------ -This is a major release from 0.20.x and includes a number of API changes, deprecations, new features, +This is a major release from 0.20.3 and includes a number of API changes, deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. Highlights include: -- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `. - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying categoricals independent of the data, see :ref:`here `. - The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, see :ref:`here ` +- Compatibility fixes for pypy, see :ref:`here `. Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -46,7 +47,7 @@ method. See the documentation :ref:`here ` for more details. (:issue:`11221`) This method only performs soft conversions on object columns, converting Python objects -to native types, but not any coercive conversions. For example: +to native types, but not any coercive conversions. For example: .. ipython:: python @@ -57,7 +58,7 @@ to native types, but not any coercive conversions. For example: df.infer_objects().dtypes Note that column ``'C'`` was not converted - only scalar numeric types -will be inferred to a new type. Other types of conversion should be accomplished +will be converted to a new type. Other types of conversion should be accomplished using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`). .. ipython:: python @@ -71,25 +72,26 @@ using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedel Improved warnings when attempting to create columns ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -New users are often flummoxed by the relationship between column operations and attribute -access on ``DataFrame`` instances (:issue:`7175`). One specific instance -of this confusion is attempting to create a new column by setting into an attribute: +New users are often flummoxed by the relationship between column operations and +attribute access on ``DataFrame`` instances (:issue:`7175`). One specific +instance of this confusion is attempting to create a new column by setting an +attribute on the ``DataFrame``: .. code-block:: ipython - In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) - In[2]: df.two = [4, 5, 6] + In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In[2]: df.two = [4, 5, 6] This does not raise any obvious exceptions, but also does not create a new column: .. code-block:: ipython - In[3]: df - Out[3]: - one - 0 1.0 - 1 2.0 - 2 3.0 + In[3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 Setting a list-like data structure into a new attribute now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. @@ -97,7 +99,7 @@ Setting a list-like data structure into a new attribute now raise a ``UserWarnin ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The :meth:`~DataFrame.drop` method has gained ``index``/``columns`` keywords as an -alternative to specify the ``axis`` and to make it similar in usage to ``reindex`` +alternative to specifying the ``axis``. This is similar to the behavior of ``reindex`` (:issue:`12392`). For example: @@ -153,8 +155,8 @@ style. :class:`pandas.api.types.CategoricalDtype` has been added to the public API and expanded to include the ``categories`` and ``ordered`` attributes. A ``CategoricalDtype`` can be used to specify the set of categories and -orderedness of an array, independent of the data themselves. This can be useful, -e.g., when converting string data to a ``Categorical`` (:issue:`14711`, +orderedness of an array, independent of the data. This can be useful for example, +when converting string data to a ``Categorical`` (:issue:`14711`, :issue:`15078`, :issue:`16015`, :issue:`17643`): .. ipython:: python @@ -193,10 +195,10 @@ The values have been correctly interpreted as integers. The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a ``Series`` with categorical type will now return an instance of -``CategoricalDtype``. This change should be backwards compatible, though the -repr has changed. ``str(CategoricalDtype())`` is still the string -``'category'``, but the preferred way to detect categorical data is to use -:func:`pandas.api.types.is_categorical_dtype`. +``CategoricalDtype``. While the repr has changed, ``str(CategoricalDtype())`` is +still the string ``'category'``. We'll take this moment to remind users that the +*preferred* way to detect categorical data is to use +:func:`pandas.api.types.is_categorical_dtype`, and not ``str(dtype) == 'category'``. See the :ref:`CategoricalDtype docs ` for more. @@ -205,8 +207,8 @@ See the :ref:`CategoricalDtype docs ` for more. Other Enhancements ^^^^^^^^^^^^^^^^^^ -- The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) -- Added support for `PEP 518 `_ to the build system (:issue:`16745`) +- The ``validate`` argument for :func:`merge` now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) +- Added support for `PEP 518 `_ (``pyproject.toml``) to the build system (:issue:`16745`) - :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) - :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) - :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) @@ -217,20 +219,20 @@ Other Enhancements - :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) - :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) -- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) -- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) +- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year. (:issue:`9313`) +- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year. (:issue:`9313`) - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. (:issue:`15838`, :issue:`17438`) - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) -- Read/write methods that infer compression (:func:`read_csv`, :func:`read_table`, :func:`read_pickle`, and :meth:`~DataFrame.to_pickle`) can now infer from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). -- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). -- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) -- :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`). +- Read/write methods that infer compression (:func:`read_csv`, :func:`read_table`, :func:`read_pickle`, and :meth:`~DataFrame.to_pickle`) can now infer from path-like objects, such as ``pathlib.Path``. (:issue:`17206`) +- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files. (:issue:`15871`) +- :func:`DataFrame.items` and :func:`Series.items` are now present in both Python 2 and 3 and is lazy in all cases. (:issue:`13918`, :issue:`17213`) +- :func:`Styler.where` has been implemented as a convenience for :func:`Styler.applymap`. (:issue:`17474`) - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`) - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) -- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names -- Improved the import time of pandas by about 2.25x (:issue:`16764`) +- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names. (:issue:`14207`) +- Improved the import time of pandas by about 2.25x. (:issue:`16764`) - :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`) .. _whatsnew_0210.api_breaking: @@ -238,6 +240,26 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_0210.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`). +If installed, we now require: + + +--------------+-----------------+----------+ + | Package | Minimum Version | Required | + +==============+=================+==========+ + | Numpy | 1.9.0 | X | + +--------------+-----------------+----------+ + | Matplotlib | 1.4.3 | | + +--------------+-----------------+----------+ + | Scipy | 0.14.0 | | + +--------------+-----------------+----------+ + | Bottleneck | 1.0.0 | | + +--------------+-----------------+----------+ + .. _whatsnew_0210.api_breaking.period_index_resampling: ``PeriodIndex`` resampling @@ -314,34 +336,13 @@ New Behavior: s.resample('M').ohlc() - -.. _whatsnew_0210.api_breaking.deps: - -Dependencies have increased minimum versions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`). -If installed, we now require: - - +--------------+-----------------+----------+ - | Package | Minimum Version | Required | - +==============+=================+==========+ - | Numpy | 1.9.0 | X | - +--------------+-----------------+----------+ - | Matplotlib | 1.4.3 | | - +--------------+-----------------+----------+ - | Scipy | 0.14.0 | | - +--------------+-----------------+----------+ - | Bottleneck | 1.0.0 | | - +--------------+-----------------+----------+ - .. _whatsnew_0210.api_breaking.loc: Indexing with a list with missing labels is Deprecated ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Previously, selecting with a list of labels, where one or more labels were missing would always succeed, returning ``NaN`` for missing labels. -This will now show a ``FutureWarning``, in the future this will raise a ``KeyError`` (:issue:`15747`). +This will now show a ``FutureWarning``. In the future this will raise a ``KeyError`` (:issue:`15747`). This warning will trigger on a ``DataFrame`` or a ``Series`` for using ``.loc[]`` or ``[[]]`` when passing a list-of-labels with at least 1 missing label. See the :ref:`deprecation docs `. @@ -452,10 +453,10 @@ Current Behavior Sum/Prod of all-NaN Series/DataFrames is now consistently NaN ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on +The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on whether `bottleneck `__ is installed. (:issue:`9422`, :issue:`15507`). -With ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, the result will be all-``NaN``. See the :ref:`docs `. +Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs `. .. ipython:: python @@ -463,19 +464,19 @@ With ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a Previously NO ``bottleneck`` -.. code_block:: ipython +.. code-block:: ipython In [2]: s.sum() Out[2]: np.nan Previously WITH ``bottleneck`` -.. code_block:: ipython +.. code-block:: ipython In [2]: s.sum() Out[2]: 0.0 -New Behavior, without regards to the bottleneck installation. +New Behavior, without regard to the bottleneck installation. .. ipython:: python @@ -485,7 +486,7 @@ Note that this also changes the sum of an empty ``Series`` Previously regardless of ``bottlenck`` -.. code_block:: ipython +.. code-block:: ipython In [1]: pd.Series([]).sum() Out[1]: 0 @@ -660,7 +661,7 @@ The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and MultiIndex Constructor with a Single Level ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all +The ``MultiIndex`` constructors no longer squeezes a MultiIndex with all length-one levels down to a regular ``Index``. This affects all the ``MultiIndex`` constructors. (:issue:`17178`) @@ -1001,7 +1002,7 @@ Reshaping - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) - Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) - Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) -- Bug in :fun:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) +- Bug in :func:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) Numeric ^^^^^^^ @@ -1015,6 +1016,8 @@ Categorical - Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) - Bug in categorical operations with :ref:`Series.cat ` not preserving the original Series' name (:issue:`17509`) +.. _whatsnew_0210.pypy: + PyPy ^^^^ diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 4d97b7d17a6dc2..2fdbad93fa63b0 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -109,7 +109,6 @@ class CategoricalDtypeType(type): class CategoricalDtype(ExtensionDtype): - """ Type for categorical data with the categories and orderedness @@ -140,7 +139,7 @@ class CategoricalDtype(ExtensionDtype): See Also -------- - Categorical + pandas.Categorical """ # TODO: Document public vs. private API name = 'category' diff --git a/scripts/announce.py b/scripts/announce.py new file mode 100644 index 00000000000000..63f86173eff587 --- /dev/null +++ b/scripts/announce.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# -*- encoding:utf-8 -*- +""" +Script to generate contribor and pull request lists + +This script generates contributor and pull request lists for release +announcements using Github v3 protocol. Use requires an authentication token in +order to have sufficient bandwidth, you can get one following the directions at +`_ +Don't add any scope, as the default is read access to public information. The +token may be stored in an environment variable as you only get one chance to +see it. + +Usage:: + + $ ./tools/announce.py + +The output is utf8 rst. + +Dependencies +------------ + +- gitpython +- pygithub + +Some code was copied from scipy `tools/gh_list.py` and `tools/authors.py`. + +Examples +-------- + +From the bash command line with $GITHUB token. + + $ ./tools/announce $GITHUB v1.11.0..v1.11.1 > announce.rst + +""" +from __future__ import print_function, division + +import os +import re +import codecs +from git import Repo + +UTF8Writer = codecs.getwriter('utf8') +this_repo = Repo(os.path.join(os.path.dirname(__file__), "..")) + +author_msg = """\ +A total of %d people contributed to this release. People with a "+" by their +names contributed a patch for the first time. +""" + +pull_request_msg = """\ +A total of %d pull requests were merged for this release. +""" + + +def get_authors(revision_range): + pat = u'^.*\\t(.*)$' + lst_release, cur_release = [r.strip() for r in revision_range.split('..')] + + # authors, in current release and previous to current release. + cur = set(re.findall(pat, this_repo.git.shortlog('-s', revision_range), + re.M)) + pre = set(re.findall(pat, this_repo.git.shortlog('-s', lst_release), + re.M)) + + # Homu is the author of auto merges, clean him out. + cur.discard('Homu') + pre.discard('Homu') + + # Append '+' to new authors. + authors = [s + u' +' for s in cur - pre] + [s for s in cur & pre] + authors.sort() + return authors + + +def get_pull_requests(repo, revision_range): + prnums = [] + + # From regular merges + merges = this_repo.git.log( + '--oneline', '--merges', revision_range) + issues = re.findall(u"Merge pull request \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From Homu merges (Auto merges) + issues = re. findall(u"Auto merge of \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From fast forward squash-merges + commits = this_repo.git.log( + '--oneline', '--no-merges', '--first-parent', revision_range) + issues = re.findall(u'^.*\\(\\#(\\d+)\\)$', commits, re.M) + prnums.extend(int(s) for s in issues) + + # get PR data from github repo + prnums.sort() + prs = [repo.get_pull(n) for n in prnums] + return prs + + +def main(revision_range, repo): + lst_release, cur_release = [r.strip() for r in revision_range.split('..')] + + # document authors + authors = get_authors(revision_range) + heading = u"Contributors" + print() + print(heading) + print(u"=" * len(heading)) + print(author_msg % len(authors)) + + for s in authors: + print(u'* ' + s) + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser(description="Generate author lists for release") + parser.add_argument('revision_range', help='..') + parser.add_argument('--repo', help="Github org/repository", + default="pandas-dev/pandas") + args = parser.parse_args() + main(args.revision_range, args.repo) From c277cd76416d4e930b1f05da873b9eaf101139da Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 12 Oct 2017 20:38:23 -0500 Subject: [PATCH 59/76] RLS: v0.21.0rc1 From 0926b2d31a5dfbc082ae5a0882b29cd4d3360b5e Mon Sep 17 00:00:00 2001 From: "Jonathan J. Helmus" Date: Fri, 13 Oct 2017 05:32:46 -0500 Subject: [PATCH 60/76] BUG: set tz on DTI from fixed format HDFStore (#17844) closes #17618 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/pytables.py | 7 +++++-- pandas/tests/io/test_pytables.py | 11 +++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c90b9939ce16d3..d6bdf153e03684 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -941,6 +941,7 @@ Indexing I/O ^^^ +- Bug in :func:`read_hdf` when reading a timezone aware index from ``fixed`` format HDFStore (:issue:`17618`) - Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) - Bug in :func:`read_csv` in which specified column names were not being thoroughly de-duplicated (:issue:`17095`) - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 39d088e00b2196..2af28161678295 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2391,8 +2391,11 @@ def _alias_to_class(self, alias): def _get_index_factory(self, klass): if klass == DatetimeIndex: def f(values, freq=None, tz=None): - return DatetimeIndex._simple_new(values, None, freq=freq, - tz=tz) + # data are already in UTC, localize and convert if tz present + result = DatetimeIndex._simple_new(values, None, freq=freq) + if tz is not None: + result = result.tz_localize('UTC').tz_convert(tz) + return result return f elif klass == PeriodIndex: def f(values, freq=None, tz=None): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2fe3cf1f34d44c..6e3e338ce3de39 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2272,6 +2272,17 @@ def test_calendar_roundtrip_issue(self): result = store.select('table') assert_series_equal(result, s) + def test_roundtrip_tz_aware_index(self): + # GH 17618 + time = pd.Timestamp('2000-01-01 01:00:00', tz='US/Eastern') + df = pd.DataFrame(data=[0], index=[time]) + + with ensure_clean_store(self.path) as store: + store.put('frame', df, format='fixed') + recons = store['frame'] + tm.assert_frame_equal(recons, df) + assert recons.index[0].value == 946706400000000000 + def test_append_with_timedelta(self): # GH 3577 # append timedelta From 3c964a47d626a06a3f9c2d0795ee7d744dc72363 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 13 Oct 2017 05:32:10 -0600 Subject: [PATCH 61/76] CLN: Use pandas.core.common for None checks (#17816) --- pandas/core/common.py | 27 ++++++++++++++++++++------- pandas/core/generic.py | 12 +++++------- pandas/core/groupby.py | 28 +++++++++++++--------------- pandas/core/indexes/api.py | 2 +- pandas/core/indexes/base.py | 13 ++++++------- pandas/core/indexes/multi.py | 5 +++-- pandas/core/indexes/range.py | 3 ++- pandas/core/panel.py | 16 +++++++++------- pandas/core/reshape/concat.py | 2 +- pandas/core/reshape/merge.py | 2 +- pandas/core/series.py | 5 +++-- pandas/core/window.py | 7 +++---- pandas/io/formats/excel.py | 4 ++-- pandas/io/formats/format.py | 8 ++++---- pandas/io/formats/style.py | 7 +++---- pandas/io/json/table_schema.py | 3 ++- pandas/io/pytables.py | 6 +++--- pandas/plotting/_core.py | 6 +++--- pandas/tests/frame/test_to_csv.py | 3 ++- pandas/tests/util/test_util.py | 3 ++- pandas/util/testing.py | 3 ++- 21 files changed, 90 insertions(+), 75 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index e0dc420bc53f8c..7b96700313012e 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -223,17 +223,36 @@ def _mut_exclusive(**kwargs): def _not_none(*args): + """Returns a generator consisting of the arguments that are not None""" return (arg for arg in args if arg is not None) def _any_none(*args): + """Returns a boolean indicating if any argument is None""" for arg in args: if arg is None: return True return False +def _all_none(*args): + """Returns a boolean indicating if all arguments are None""" + for arg in args: + if arg is not None: + return False + return True + + +def _any_not_none(*args): + """Returns a boolean indicating if any argument is not None""" + for arg in args: + if arg is not None: + return True + return False + + def _all_not_none(*args): + """Returns a boolean indicating if all arguments are not None""" for arg in args: if arg is None: return False @@ -241,6 +260,7 @@ def _all_not_none(*args): def _count_not_none(*args): + """Returns the count of arguments that are not None""" return sum(x is not None for x in args) @@ -459,13 +479,6 @@ def _apply_if_callable(maybe_callable, obj, **kwargs): return maybe_callable -def _all_none(*args): - for arg in args: - if arg is not None: - return False - return True - - def _where_compat(mask, arr1, arr2): if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE: new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8')) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index acc1bf1241bffe..fc3982dba93ce3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -28,12 +28,10 @@ from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame - -from pandas.core.common import (_all_not_none, - _values_from_object, - _maybe_box_datetimelike, - SettingWithCopyError, SettingWithCopyWarning, - AbstractMethodError) +from pandas.core.common import (_all_not_none, _count_not_none, + _maybe_box_datetimelike, _values_from_object, + AbstractMethodError, SettingWithCopyError, + SettingWithCopyWarning) from pandas.core.base import PandasObject, SelectionMixin from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -3252,7 +3250,7 @@ def filter(self, items=None, like=None, regex=None, axis=None): """ import re - nkw = sum([x is not None for x in [items, like, regex]]) + nkw = _count_not_none(items, like, regex) if nkw > 1: raise TypeError('Keyword arguments `items`, `like`, or `regex` ' 'are mutually exclusive') diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ccaf90b4482a7c..3b7d3685db3b74 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -39,7 +39,8 @@ from pandas.core.dtypes.missing import isna, notna, _maybe_fill from pandas.core.common import (_values_from_object, AbstractMethodError, - _default_index) + _default_index, _not_none, _get_callable_name, + _asarray_tuplesafe) from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) @@ -60,7 +61,6 @@ from pandas.util._validators import validate_kwargs import pandas.core.algorithms as algorithms -import pandas.core.common as com from pandas.core.config import option_context from pandas.plotting._core import boxplot_frame_groupby @@ -877,10 +877,9 @@ def _concat_objects(self, keys, values, not_indexed_same=False): def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing - for v in values: - if v is not None: - ax = v._get_axis(self.axis) - ax._reset_identity() + for v in _not_none(*values): + ax = v._get_axis(self.axis) + ax._reset_identity() return values if not not_indexed_same: @@ -1806,7 +1805,7 @@ def apply(self, f, data, axis=0): group_keys = self._get_group_keys() # oh boy - f_name = com._get_callable_name(f) + f_name = _get_callable_name(f) if (f_name not in _plotting_methods and hasattr(splitter, 'fast_apply') and axis == 0): try: @@ -2533,7 +2532,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, self.grouper = self.obj[self.name] elif isinstance(self.grouper, (list, tuple)): - self.grouper = com._asarray_tuplesafe(self.grouper) + self.grouper = _asarray_tuplesafe(self.grouper) # a passed Categorical elif is_categorical_dtype(self.grouper): @@ -2739,7 +2738,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: - keys = [com._asarray_tuplesafe(keys)] + keys = [_asarray_tuplesafe(keys)] if isinstance(level, (tuple, list)): if key is None: @@ -3028,7 +3027,7 @@ def _aggregate_multiple_funcs(self, arg, _level): columns.append(f) else: # protect against callables without names - columns.append(com._get_callable_name(f)) + columns.append(_get_callable_name(f)) arg = lzip(columns, arg) results = {} @@ -3686,14 +3685,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): key_names = self.grouper.names # GH12824. - def first_non_None_value(values): + def first_not_none(values): try: - v = next(v for v in values if v is not None) + return next(_not_none(*values)) except StopIteration: return None - return v - v = first_non_None_value(values) + v = first_not_none(values) if v is None: # GH9684. If all values are None, then this will throw an error. @@ -3726,7 +3724,7 @@ def first_non_None_value(values): key_index = None # make Nones an empty object - v = first_non_None_value(values) + v = first_not_none(values) if v is None: return DataFrame() elif isinstance(v, NDFrame): diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index d20a0b0a2c73df..08cda8a06ba64f 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -123,7 +123,7 @@ def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make # the set hashable, then reverse on return consensus_names = set([tuple(i.names) for i in indexes - if any(n is not None for n in i.names)]) + if com._any_not_none(*i.names)]) if len(consensus_names) == 1: return list(list(consensus_names)[0]) return [None] * indexes[0].nlevels diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df0e963e7628d6..c3343f149005c3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -42,16 +42,15 @@ needs_i8_conversion, is_iterator, is_list_like, is_scalar) -from pandas.core.common import (is_bool_indexer, - _values_from_object, - _asarray_tuplesafe) +from pandas.core.common import (is_bool_indexer, _values_from_object, + _asarray_tuplesafe, _not_none, + _index_labels_to_array) from pandas.core.base import PandasObject, IndexOpsMixin import pandas.core.base as base from pandas.util._decorators import ( Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList -import pandas.core.common as com import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing import pandas.core.algorithms as algos @@ -3168,8 +3167,8 @@ def _join_multi(self, other, how, return_indexers=True): other_is_mi = isinstance(other, MultiIndex) # figure out join names - self_names = [n for n in self.names if n is not None] - other_names = [n for n in other.names if n is not None] + self_names = _not_none(*self.names) + other_names = _not_none(*other.names) overlap = list(set(self_names) & set(other_names)) # need at least 1 in common, but not more than 1 @@ -3714,7 +3713,7 @@ def drop(self, labels, errors='raise'): ------- dropped : Index """ - labels = com._index_labels_to_array(labels) + labels = _index_labels_to_array(labels) indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 06b208b4d174e5..4cc59f52970589 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -21,7 +21,8 @@ is_scalar) from pandas.core.dtypes.missing import isna, array_equivalent from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.core.common import (_values_from_object, +from pandas.core.common import (_any_not_none, + _values_from_object, is_bool_indexer, is_null_slice, is_true_slices) @@ -509,7 +510,7 @@ def _format_attrs(self): max_seq_items=False)), ('labels', ibase.default_pprint(self._labels, max_seq_items=False))] - if not all(name is None for name in self.names): + if _any_not_none(*self.names): attrs.append(('names', ibase.default_pprint(self.names))) if self.sortorder is not None: attrs.append(('sortorder', ibase.default_pprint(self.sortorder))) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9f7bac641ae08d..b2e55d4826670e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -12,6 +12,7 @@ from pandas import compat from pandas.compat import lrange, range from pandas.compat.numpy import function as nv +from pandas.core.common import _all_none from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat @@ -83,7 +84,7 @@ def _ensure_int(value, field): return new_value - if start is None and stop is None and step is None: + if _all_none(start, stop, step): msg = "RangeIndex(...) must be called with integers" raise TypeError(msg) elif start is None: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 1f22cb49d01962..997dd9c8e0f67a 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -15,13 +15,13 @@ is_string_like, is_scalar) from pandas.core.dtypes.missing import notna -import pandas.core.common as com import pandas.core.ops as ops import pandas.core.missing as missing from pandas import compat from pandas.compat import (map, zip, range, u, OrderedDict) from pandas.compat.numpy import function as nv -from pandas.core.common import _try_sort, _default_index +from pandas.core.common import (_try_sort, _default_index, _all_not_none, + _any_not_none, _apply_if_callable) from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -166,7 +166,7 @@ def _init_data(self, data, copy, dtype, **kwargs): axes = None if isinstance(data, BlockManager): - if any(x is not None for x in passed_axes): + if _any_not_none(*passed_axes): axes = [x if x is not None else y for x, y in zip(passed_axes, data.axes)] mgr = data @@ -178,7 +178,7 @@ def _init_data(self, data, copy, dtype, **kwargs): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None - elif is_scalar(data) and all(x is not None for x in passed_axes): + elif is_scalar(data) and _all_not_none(*passed_axes): values = cast_scalar_to_array([len(x) for x in passed_axes], data, dtype=dtype) mgr = self._init_matrix(values, passed_axes, dtype=values.dtype, @@ -279,7 +279,7 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): return cls(**d) def __getitem__(self, key): - key = com._apply_if_callable(key, self) + key = _apply_if_callable(key, self) if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) @@ -594,7 +594,7 @@ def _box_item_values(self, key, values): return self._constructor_sliced(values, **d) def __setitem__(self, key, value): - key = com._apply_if_callable(key, self) + key = _apply_if_callable(key, self) shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): value = value.reindex( @@ -616,7 +616,9 @@ def __setitem__(self, key, value): def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" - _unpickle = com._unpickle_array + from pandas.io.pickle import _unpickle_array + + _unpickle = _unpickle_array vals, items, major, minor = state items = _unpickle(items) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index c54763f8ebde13..e2c02bd0e71fb0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -241,7 +241,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, raise ValueError('No objects to concatenate') if keys is None: - objs = [obj for obj in objs if obj is not None] + objs = list(com._not_none(*objs)) else: # #1649 clean_keys = [] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6bb6988a7442a1..e409090e76944b 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1550,4 +1550,4 @@ def _should_fill(lname, rname): def _any(x): - return x is not None and len(x) > 0 and any([y is not None for y in x]) + return x is not None and com._any_not_none(*x) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8499f8b55d2d0a..76baa89f165d43 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -45,7 +45,8 @@ SettingWithCopyError, _maybe_box_datetimelike, _dict_compat, - standardize_mapping) + standardize_mapping, + _any_none) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -713,7 +714,7 @@ def _get_with(self, key): def _get_values_tuple(self, key): # mpl hackaround - if any(k is None for k in key): + if _any_none(*key): return self._get_values(key) if not isinstance(self.index, MultiIndex): diff --git a/pandas/core/window.py b/pandas/core/window.py index e3a091573aa2f5..5143dddc5e866b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -32,7 +32,7 @@ from pandas.core.base import (PandasObject, SelectionMixin, GroupByMixin) -import pandas.core.common as com +from pandas.core.common import _asarray_tuplesafe, _count_not_none import pandas._libs.window as _window from pandas import compat @@ -535,7 +535,7 @@ def _prep_window(self, **kwargs): window = self._get_window() if isinstance(window, (list, tuple, np.ndarray)): - return com._asarray_tuplesafe(window).astype(float) + return _asarray_tuplesafe(window).astype(float) elif is_integer(window): import scipy.signal as sig @@ -1972,8 +1972,7 @@ def dataframe_from_int_dict(data, frame_template): def _get_center_of_mass(com, span, halflife, alpha): - valid_count = len([x for x in [com, span, halflife, alpha] - if x is not None]) + valid_count = _count_not_none(com, span, halflife, alpha) if valid_count > 1: raise ValueError("com, span, halflife, and alpha " "are mutually exclusive") diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 9e888c38edaa7d..af24537cabf90a 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -10,6 +10,7 @@ from pandas.compat import reduce from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.printing import pprint_thing +from pandas.core.common import _any_not_none from pandas.core.dtypes.common import is_float import pandas._libs.lib as lib from pandas import Index, MultiIndex, PeriodIndex @@ -548,8 +549,7 @@ def _format_hierarchical_rows(self): self.rowcounter += 1 # if index labels are not empty go ahead and dump - if (any(x is not None for x in index_labels) and - self.header is not False): + if _any_not_none(*index_labels) and self.header is not False: for cidx, name in enumerate(index_labels): yield ExcelCell(self.rowcounter - 1, cidx, name, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4608c3fe0ceb80..c5d4a0ecf44ab9 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -26,6 +26,7 @@ is_list_like) from pandas.core.dtypes.generic import ABCSparseArray from pandas.core.base import PandasObject +from pandas.core.common import _any_not_none, sentinel_factory from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat from pandas.compat import (StringIO, lzip, range, map, zip, u, @@ -36,7 +37,6 @@ _stringify_path) from pandas.io.formats.printing import adjoin, justify, pprint_thing from pandas.io.formats.common import get_level_lengths -import pandas.core.common as com import pandas._libs.lib as lib from pandas._libs.tslib import (iNaT, Timestamp, Timedelta, format_array_from_datetime) @@ -1257,7 +1257,7 @@ def _column_header(): if self.fmt.sparsify: # GH3547 - sentinel = com.sentinel_factory() + sentinel = sentinel_factory() else: sentinel = None levels = self.columns.format(sparsify=sentinel, adjoin=False, @@ -1426,7 +1426,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): if self.fmt.sparsify: # GH3547 - sentinel = com.sentinel_factory() + sentinel = sentinel_factory() levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) @@ -2352,7 +2352,7 @@ def single_row_table(row): # pragma: no cover def _has_names(index): if isinstance(index, MultiIndex): - return any([x is not None for x in index.names]) + return _any_not_none(*index.names) else: return index.name is not None diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d7677e3642c26e..2e87b3b925eddf 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -27,7 +27,7 @@ from pandas.compat import range from pandas.core.config import get_option from pandas.core.generic import _shared_docs -import pandas.core.common as com +from pandas.core.common import _any_not_none, sentinel_factory from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice from pandas.util._decorators import Appender try: @@ -259,8 +259,7 @@ def format_attr(pair): row_es.append(es) head.append(row_es) - if self.data.index.names and not all(x is None - for x in self.data.index.names): + if self.data.index.names and _any_not_none(*self.data.index.names): index_header_row = [] for c, name in enumerate(self.data.index.names): @@ -1204,7 +1203,7 @@ def _get_level_lengths(index): Result is a dictionary of (level, inital_position): span """ - sentinel = com.sentinel_factory() + sentinel = sentinel_factory() levels = index.format(sparsify=sentinel, adjoin=False, names=False) if index.nlevels == 1: diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index c3865afa9c0c05..9cec5b3d6ba498 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -3,6 +3,7 @@ http://specs.frictionlessdata.io/json-table-schema/ """ +from pandas.core.common import _all_not_none from pandas.core.dtypes.common import ( is_integer_dtype, is_timedelta64_dtype, is_numeric_dtype, is_bool_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -61,7 +62,7 @@ def as_json_table_type(x): def set_default_names(data): """Sets index names to 'index' for regular, or 'level_x' for Multi""" - if all(name is not None for name in data.index.names): + if _all_not_none(*data.index.names): return data data = data.copy() diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2af28161678295..40955c50f6b5ff 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -34,7 +34,7 @@ from pandas.core.base import StringMixin from pandas.io.formats.printing import adjoin, pprint_thing from pandas.errors import PerformanceWarning -from pandas.core.common import _asarray_tuplesafe +from pandas.core.common import _asarray_tuplesafe, _all_none from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical, _factorize_from_iterables from pandas.core.internals import (BlockManager, make_block, @@ -905,7 +905,7 @@ def remove(self, key, where=None, start=None, stop=None): raise KeyError('No object named %s in the file' % key) # remove the node - if where is None and start is None and stop is None: + if _all_none(where, start, stop): s.group._f_remove(recursive=True) # delete from the table @@ -2363,7 +2363,7 @@ def delete(self, where=None, start=None, stop=None, **kwargs): support fully deleting the node in its entirety (only) - where specification must be None """ - if where is None and start is None and stop is None: + if _all_none(where, start, stop): self._handle.remove_node(self.group, recursive=True) return None diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 0d77b5f41a08ee..ad3c4f0ecb05f0 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -20,7 +20,7 @@ is_iterator) from pandas.core.dtypes.generic import ABCSeries -from pandas.core.common import AbstractMethodError, _try_sort +from pandas.core.common import AbstractMethodError, _try_sort, _any_not_none from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex @@ -607,7 +607,7 @@ def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): def _get_index_name(self): if isinstance(self.data.index, MultiIndex): name = self.data.index.names - if any(x is not None for x in name): + if _any_not_none(*name): name = ','.join([pprint_thing(x) for x in name]) else: name = None @@ -955,7 +955,7 @@ def _make_plot(self): it = self._iter_data() stacking_id = self._get_stacking_id() - is_errorbar = any(e is not None for e in self.errors.values()) + is_errorbar = _any_not_none(*self.errors.values()) colors = self._get_colors() for i, (label, y) in enumerate(it): diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index ab34ce877a7266..a8449d6f874dfa 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -9,6 +9,7 @@ import numpy as np from pandas.compat import (lmap, range, lrange, StringIO, u) +from pandas.core.common import _all_none from pandas.errors import ParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, date_range, read_csv, compat, to_datetime) @@ -570,7 +571,7 @@ def _make_frame(names=None): df = _make_frame(True) df.to_csv(path, tupleize_cols=False, index=False) result = read_csv(path, header=[0, 1], tupleize_cols=False) - assert all([x is None for x in result.columns.names]) + assert _all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py index ffc9703abff41d..659ce36de6babf 100644 --- a/pandas/tests/util/test_util.py +++ b/pandas/tests/util/test_util.py @@ -8,6 +8,7 @@ import pytest from pandas.compat import intern +from pandas.core.common import _all_none from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf from pandas.util._decorators import deprecate_kwarg, make_signature from pandas.util._validators import (validate_args, validate_kwargs, @@ -437,7 +438,7 @@ def test_set_locale(self): pytest.skip("Only a single locale found, no point in " "trying to test setting another locale") - if all(x is None for x in self.current_locale): + if _all_none(*self.current_locale): # Not sure why, but on some travis runs with pytest, # getlocale() returned (None, None). pytest.skip("Current locale is not set.") diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3c23462e10d355..730d2782e85d2b 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -33,6 +33,7 @@ is_list_like) from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d +from pandas.core.common import _all_not_none import pandas.compat as compat from pandas.compat import ( @@ -579,7 +580,7 @@ def set_locale(new_locale, lc_var=locale.LC_ALL): except ValueError: yield new_locale else: - if all(lc is not None for lc in normalized_locale): + if _all_not_none(*normalized_locale): yield '.'.join(normalized_locale) else: yield new_locale From 446d5b4bc13636cb4d569aeb5669421e9ee862ec Mon Sep 17 00:00:00 2001 From: matthiashuschle Date: Sat, 14 Oct 2017 16:36:38 +0200 Subject: [PATCH 62/76] BUG: to_json - prevent various segfault conditions (GH14256) (#17857) --- doc/source/whatsnew/v0.21.0.txt | 2 + pandas/_libs/src/ujson/lib/ultrajson.h | 7 ++++ pandas/_libs/src/ujson/lib/ultrajsonenc.c | 7 +--- pandas/_libs/src/ujson/python/objToJSON.c | 7 +++- pandas/tests/io/json/test_pandas.py | 45 +++++++++++++++++++++++ 5 files changed, 62 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d6bdf153e03684..f4fbbd3596b57c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -956,6 +956,7 @@ I/O - Bug in :meth:`DataFrame.to_html` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) - Bug in :meth:`DataFrame.to_html` in which there was no validation of the ``justify`` parameter (:issue:`17527`) - Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`) +- Bug in :func:`to_json` where several conditions (including objects with unprintable symbols, objects with deep recursion, overlong labels) caused segfaults instead of raising the appropriate exception (:issue:`14256`) Plotting ^^^^^^^^ @@ -1033,3 +1034,4 @@ Other ^^^^^ - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) + diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h index 4f51fa8b3eb383..159645b4007e1c 100644 --- a/pandas/_libs/src/ujson/lib/ultrajson.h +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -307,4 +307,11 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, size_t cbBuffer); EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t); +#define Buffer_Reserve(__enc, __len) \ + if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \ + Buffer_Realloc((__enc), (__len)); \ + } + +void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded); + #endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index 6bf22977490063..2d6c823a45515e 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -714,11 +714,6 @@ int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc, } } -#define Buffer_Reserve(__enc, __len) \ - if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \ - Buffer_Realloc((__enc), (__len)); \ - } - #define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr; FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, @@ -976,6 +971,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, } enc->iterEnd(obj, &tc); + Buffer_Reserve(enc, 2); Buffer_AppendCharUnchecked(enc, ']'); break; } @@ -1003,6 +999,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, } enc->iterEnd(obj, &tc); + Buffer_Reserve(enc, 2); Buffer_AppendCharUnchecked(enc, '}'); break; } diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 1ee862b54cf0bc..ae7854dfc14278 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -783,6 +783,7 @@ static void NpyArr_getLabel(JSOBJ obj, JSONTypeContext *tc, size_t *outLen, JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder; PRINTMARK(); *outLen = strlen(labels[idx]); + Buffer_Reserve(enc, *outLen); memcpy(enc->offset, labels[idx], sizeof(char) * (*outLen)); enc->offset += *outLen; *outLen = 0; @@ -879,7 +880,7 @@ int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) { NpyArrContext *npyarr; PRINTMARK(); - if (PyErr_Occurred()) { + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { return 0; } @@ -1224,6 +1225,10 @@ int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { PyObject *attrName; char *attrStr; + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + if (itemValue) { Py_DECREF(GET_TC(tc)->itemValue); GET_TC(tc)->itemValue = itemValue = NULL; diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index de4afec883efdb..6625446bea4693 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -511,6 +511,51 @@ def test_blocks_compat_GH9037(self): by_blocks=True, check_exact=True) + def test_frame_nonprintable_bytes(self): + # GH14256: failing column caused segfaults, if it is not the last one + + class BinaryThing(object): + + def __init__(self, hexed): + self.hexed = hexed + if compat.PY2: + self.binary = hexed.decode('hex') + else: + self.binary = bytes.fromhex(hexed) + + def __str__(self): + return self.hexed + + hexed = '574b4454ba8c5eb4f98a8f45' + binthing = BinaryThing(hexed) + + # verify the proper conversion of printable content + df_printable = DataFrame({'A': [binthing.hexed]}) + assert df_printable.to_json() == '{"A":{"0":"%s"}}' % hexed + + # check if non-printable content throws appropriate Exception + df_nonprintable = DataFrame({'A': [binthing]}) + with pytest.raises(OverflowError): + df_nonprintable.to_json() + + # the same with multiple columns threw segfaults + df_mixed = DataFrame({'A': [binthing], 'B': [1]}, + columns=['A', 'B']) + with pytest.raises(OverflowError): + df_mixed.to_json() + + # default_handler should resolve exceptions for non-string types + assert df_nonprintable.to_json(default_handler=str) == \ + '{"A":{"0":"%s"}}' % hexed + assert df_mixed.to_json(default_handler=str) == \ + '{"A":{"0":"%s"},"B":{"0":1}}' % hexed + + def test_label_overflow(self): + # GH14256: buffer length not checked when writing label + df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]}) + assert df.to_json() == \ + '{"%s":{"0":1},"foo":{"0":1337}}' % ('bar' * 100000) + def test_series_non_unique_index(self): s = Series(['a', 'b'], index=[1, 1]) From b59413792fca5854e6f3ea82db187a60e2b60171 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 14 Oct 2017 07:39:06 -0700 Subject: [PATCH 63/76] DEPR: Deprecate read_csv arguments fully (#17865) Issue warnings on `read_csv` deprecated args in full, even if the normal defaults were passed in. Closes gh-17828. --- pandas/core/frame.py | 2 +- pandas/io/parsers.py | 29 +++++++++---- pandas/tests/frame/test_to_csv.py | 47 +++++++++++----------- pandas/tests/io/parser/test_unsupported.py | 5 +++ 4 files changed, 51 insertions(+), 32 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7e8c0da75e2c9..97943f153319bd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1294,7 +1294,7 @@ def _from_arrays(cls, arrays, columns, index, dtype=None): @classmethod def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, - encoding=None, tupleize_cols=False, + encoding=None, tupleize_cols=None, infer_datetime_format=False): """ Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3c94871003dd07..1b6414ea974fac 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -529,6 +529,14 @@ def _read(filepath_or_buffer, kwds): 'buffer_lines', 'float_precision', } + +_deprecated_defaults = { + 'as_recarray': None, + 'buffer_lines': None, + 'compact_ints': None, + 'use_unsigned': None, + 'tupleize_cols': None +} _deprecated_args = { 'as_recarray', 'buffer_lines', @@ -594,7 +602,7 @@ def parser_f(filepath_or_buffer, comment=None, encoding=None, dialect=None, - tupleize_cols=False, + tupleize_cols=None, # Error Handling error_bad_lines=True, @@ -606,9 +614,9 @@ def parser_f(filepath_or_buffer, # Internal doublequote=True, delim_whitespace=False, - as_recarray=False, - compact_ints=False, - use_unsigned=False, + as_recarray=None, + compact_ints=None, + use_unsigned=None, low_memory=_c_parser_defaults['low_memory'], buffer_lines=None, memory_map=False, @@ -831,12 +839,14 @@ def _get_options_with_defaults(self, engine): if ('python' in engine and argname not in _python_unsupported): pass + elif value == _deprecated_defaults.get(argname, default): + pass else: raise ValueError( 'The %r option is not supported with the' ' %r engine' % (argname, engine)) else: - value = default + value = _deprecated_defaults.get(argname, default) options[argname] = value if engine == 'python-fwf': @@ -962,6 +972,8 @@ def _clean_options(self, options, engine): for arg in _deprecated_args: parser_default = _c_parser_defaults[arg] + depr_default = _deprecated_defaults[arg] + msg = ("The '{arg}' argument has been deprecated " "and will be removed in a future version." .format(arg=arg)) @@ -970,10 +982,13 @@ def _clean_options(self, options, engine): msg += ' Please call pd.to_csv(...).to_records() instead.' elif arg == 'tupleize_cols': msg += (' Column tuples will then ' - 'always be converted to MultiIndex') + 'always be converted to MultiIndex.') - if result.get(arg, parser_default) != parser_default: + if result.get(arg, depr_default) != depr_default: + # raise Exception(result.get(arg, depr_default), depr_default) depr_warning += msg + '\n\n' + else: + result[arg] = parser_default if depr_warning != '': warnings.warn(depr_warning, FutureWarning, stacklevel=2) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index a8449d6f874dfa..b0cc414861818f 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -267,8 +267,8 @@ def _do_test(df, r_dtype=None, c_dtype=None, with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', - chunksize=chunksize, tupleize_cols=False) - recons = self.read_csv(path, tupleize_cols=False, **kwargs) + chunksize=chunksize) + recons = self.read_csv(path, **kwargs) else: kwargs['header'] = 0 @@ -542,35 +542,35 @@ def _make_frame(names=None): # column & index are multi-index df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1, 2, 3], index_col=[ - 0, 1], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], + index_col=[0, 1]) assert_frame_equal(df, result) # column is mi df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) + df.to_csv(path) result = read_csv( - path, header=[0, 1, 2, 3], index_col=0, tupleize_cols=False) + path, header=[0, 1, 2, 3], index_col=0) assert_frame_equal(df, result) # dup column names? df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1, 2, 3], index_col=[ - 0, 1, 2], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], + index_col=[0, 1, 2]) assert_frame_equal(df, result) # writing with no index df = _make_frame() - df.to_csv(path, tupleize_cols=False, index=False) - result = read_csv(path, header=[0, 1], tupleize_cols=False) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) assert_frame_equal(df, result) # we lose the names here df = _make_frame(True) - df.to_csv(path, tupleize_cols=False, index=False) - result = read_csv(path, header=[0, 1], tupleize_cols=False) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) assert _all_none(*result.columns.names) result.columns.names = df.columns.names assert_frame_equal(df, result) @@ -589,15 +589,15 @@ def _make_frame(names=None): # whatsnew example df = _make_frame() - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1], index_col=[ - 0], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1], + index_col=[0]) assert_frame_equal(df, result) df = _make_frame(True) - df.to_csv(path, tupleize_cols=False) - result = read_csv(path, header=[0, 1], index_col=[ - 0], tupleize_cols=False) + df.to_csv(path) + result = read_csv(path, header=[0, 1], + index_col=[0]) assert_frame_equal(df, result) # column & index are multi-index (compatibility) @@ -613,18 +613,17 @@ def _make_frame(names=None): # invalid options df = _make_frame(True) - df.to_csv(path, tupleize_cols=False) + df.to_csv(path) for i in [6, 7]: msg = 'len of {i}, but only 5 lines in file'.format(i=i) with tm.assert_raises_regex(ParserError, msg): - read_csv(path, tupleize_cols=False, - header=lrange(i), index_col=0) + read_csv(path, header=lrange(i), index_col=0) # write with cols with tm.assert_raises_regex(TypeError, 'cannot specify cols ' 'with a MultiIndex'): - df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar']) + df.to_csv(path, columns=['foo', 'bar']) with ensure_clean('__tmp_to_csv_multiindex__') as path: # empty diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 2e73ce6aa19b0b..189a113bb6abb0 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -129,10 +129,15 @@ class TestDeprecatedFeatures(object): @pytest.mark.parametrize("engine", ["c", "python"]) @pytest.mark.parametrize("kwargs", [{"as_recarray": True}, + {"as_recarray": False}, {"buffer_lines": True}, + {"buffer_lines": False}, {"compact_ints": True}, + {"compact_ints": False}, {"use_unsigned": True}, + {"use_unsigned": False}, {"tupleize_cols": True}, + {"tupleize_cols": False}, {"skip_footer": 1}]) def test_deprecated_args(self, engine, kwargs): data = "1,2,3" From 7a2891bf8444549a5b0e8af3986b18e32c0a4424 Mon Sep 17 00:00:00 2001 From: Jan Rudolph Date: Sat, 14 Oct 2017 16:48:51 +0200 Subject: [PATCH 64/76] BUG: merging with a boolean/int categorical column (#17841) * BUG: merging with a boolean/int categorical column pandas-dev/pandas#17187 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/internals.py | 2 +- pandas/tests/reshape/test_merge.py | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f4fbbd3596b57c..b06075f2dd13c1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1017,6 +1017,7 @@ Categorical - Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`) - Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) - Bug in categorical operations with :ref:`Series.cat ` not preserving the original Series' name (:issue:`17509`) +- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`) .. _whatsnew_0210.pypy: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index a1e9b24afe5fc8..f6c5ecbca81ef3 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5596,7 +5596,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): # preserve these for validation in _concat_compat return self.block.values - if self.block.is_bool: + if self.block.is_bool and not self.block.is_categorical: # External code requested filling/upcasting, bool values must # be upcasted to object to avoid being upcasted to numeric. values = self.block.astype(np.object_).values diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index ed99814afd20a9..81956c0bd5b281 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -1546,6 +1546,30 @@ def test_dtype_on_categorical_dates(self): result_inner = pd.merge(df, df2, how='inner', on=['date']) assert_frame_equal(result_inner, expected_inner) + @pytest.mark.parametrize('category_column,categories,expected_categories', + [([False, True, True, False], [True, False], + [True, False]), + ([2, 1, 1, 2], [1, 2], [1, 2]), + (['False', 'True', 'True', 'False'], + ['True', 'False'], ['True', 'False'])]) + def test_merging_with_bool_or_int_cateorical_column(self, category_column, + categories, + expected_categories): + # GH 17187 + # merging with a boolean/int categorical column + df1 = pd.DataFrame({'id': [1, 2, 3, 4], + 'cat': category_column}) + df1['cat'] = df1['cat'].astype('category', + categories=categories, ordered=True) + df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]}) + result = df1.merge(df2) + expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories, + 'num': [1, 9]}) + expected['cat'] = expected['cat'].astype('category', + categories=categories, + ordered=True) + assert_frame_equal(expected, result) + @pytest.fixture def left_df(): From e001500cb934ffdbc2baa082a1f17be4d7aa953f Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Sat, 14 Oct 2017 10:54:13 -0400 Subject: [PATCH 65/76] Refactor index-as-string groupby tests and fix spurious warning (Bug 17383) (#17843) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/groupby.py | 2 +- pandas/tests/groupby/test_groupby.py | 152 ------------------- pandas/tests/groupby/test_index_as_string.py | 116 ++++++++++++++ 4 files changed, 118 insertions(+), 153 deletions(-) create mode 100644 pandas/tests/groupby/test_index_as_string.py diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b06075f2dd13c1..117e7c9d11259d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -980,6 +980,7 @@ Groupby/Resample/Rolling - Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) - Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`) - Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`) +- Bug in ``DataFrame.groupby`` where spurious warning is raised when ``Grouper`` object is used to override ambiguous column name (:issue:`17383`) - Bug in ``TimeGrouper`` differs when passes as a list and as a scalar (:issue:`17530`) Sparse diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 3b7d3685db3b74..f13804f347c9f8 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2703,7 +2703,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, # a passed-in Grouper, directly convert if isinstance(key, Grouper): - binner, grouper, obj = key._get_grouper(obj) + binner, grouper, obj = key._get_grouper(obj, validate=False) if key.key is None: return grouper, [], obj else: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 657de9b589dc91..740526e262d160 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -253,158 +253,6 @@ def test_grouper_column_and_index(self): expected = df_single.reset_index().groupby(['inner', 'B']).mean() assert_frame_equal(result, expected) - def test_grouper_index_level_as_string(self): - # GH 5677, allow strings passed as the `by` parameter to reference - # columns or index levels - - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3)]) - idx.names = ['outer', 'inner'] - df_multi = pd.DataFrame({"A": np.arange(6), - 'B': ['one', 'one', 'two', - 'two', 'one', 'one']}, - index=idx) - - df_single = df_multi.reset_index('outer') - - # Column and Index on MultiIndex - result = df_multi.groupby(['B', 'inner']).mean() - expected = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() - assert_frame_equal(result, expected) - - # Index and Column on MultiIndex - result = df_multi.groupby(['inner', 'B']).mean() - expected = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert_frame_equal(result, expected) - - # Column and Index on single Index - result = df_single.groupby(['B', 'inner']).mean() - expected = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() - assert_frame_equal(result, expected) - - # Index and Column on single Index - result = df_single.groupby(['inner', 'B']).mean() - expected = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert_frame_equal(result, expected) - - # Single element list of Index on MultiIndex - result = df_multi.groupby(['inner']).mean() - expected = df_multi.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Single element list of Index on single Index - result = df_single.groupby(['inner']).mean() - expected = df_single.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Index on MultiIndex - result = df_multi.groupby('inner').mean() - expected = df_multi.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Index on single Index - result = df_single.groupby('inner').mean() - expected = df_single.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - def test_grouper_column_index_level_precedence(self): - # GH 5677, when a string passed as the `by` parameter - # matches a column and an index level the column takes - # precedence - - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3)]) - idx.names = ['outer', 'inner'] - df_multi_both = pd.DataFrame({"A": np.arange(6), - 'B': ['one', 'one', 'two', - 'two', 'one', 'one'], - 'inner': [1, 1, 1, 1, 1, 1]}, - index=idx) - - df_single_both = df_multi_both.reset_index('outer') - - # Group MultiIndex by single key - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby('inner').mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by single key - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby('inner').mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group MultiIndex by single key list - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['inner']).mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by single key list - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['inner']).mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group MultiIndex by two keys (1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['B', 'inner']).mean() - - expected = df_multi_both.groupby(['B', - pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(['B', - pd.Grouper(level='inner') - ]).mean() - assert not result.index.equals(not_expected.index) - - # Group MultiIndex by two keys (2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['inner', 'B']).mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner'), - 'B']).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), - 'B']).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by two keys (1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['B', 'inner']).mean() - - expected = df_single_both.groupby(['B', - pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(['B', - pd.Grouper(level='inner') - ]).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by two keys (2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['inner', 'B']).mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner'), - 'B']).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby([pd.Grouper(level='inner'), - 'B']).mean() - assert not result.index.equals(not_expected.index) - def test_grouper_getting_correct_binner(self): # GH 10063 diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py new file mode 100644 index 00000000000000..3b6e15036cfe20 --- /dev/null +++ b/pandas/tests/groupby/test_index_as_string.py @@ -0,0 +1,116 @@ +import pytest +import pandas as pd +import numpy as np + +from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm + + +@pytest.fixture(params=[['inner'], ['inner', 'outer']]) +def frame(request): + levels = request.param + df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 3, 1, 2, 3], + 'A': np.arange(6), + 'B': ['one', 'one', 'two', 'two', 'one', 'one']}) + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture() +def series(): + df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 3, 1, 2, 3], + 'A': np.arange(6), + 'B': ['one', 'one', 'two', 'two', 'one', 'one']}) + s = df.set_index(['outer', 'inner', 'B'])['A'] + + return s + + +@pytest.mark.parametrize('key_strs,groupers', [ + ('inner', # Index name + pd.Grouper(level='inner') + ), + (['inner'], # List of index name + [pd.Grouper(level='inner')] + ), + (['B', 'inner'], # Column and index + ['B', pd.Grouper(level='inner')] + ), + (['inner', 'B'], # Index and column + [pd.Grouper(level='inner'), 'B'])]) +def test_grouper_index_level_as_string(frame, key_strs, groupers): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('levels', [ + 'inner', 'outer', 'B', + ['inner'], ['outer'], ['B'], + ['inner', 'outer'], ['outer', 'inner'], + ['inner', 'outer', 'B'], ['B', 'outer', 'inner'] +]) +def test_grouper_index_level_as_string_series(series, levels): + + # Compute expected result + if isinstance(levels, list): + groupers = [pd.Grouper(level=lv) for lv in levels] + else: + groupers = pd.Grouper(level=levels) + + expected = series.groupby(groupers).mean() + + # Compute and check result + result = series.groupby(levels).mean() + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('key_strs,key_groupers,level_groupers', [ + ('inner', # Index name + pd.Grouper(key='inner'), + pd.Grouper(level='inner'), + ), + (['inner'], # List of index name + [pd.Grouper(key='inner')], + [pd.Grouper(level='inner')] + ), + (['B', 'inner'], # Column and index + ['B', pd.Grouper(key='inner')], + ['B', pd.Grouper(level='inner')] + ), + (['inner', 'B'], # Index and column + [pd.Grouper(key='inner'), 'B'], + [pd.Grouper(level='inner'), 'B'])]) +def test_grouper_column_index_level_precedence(frame, + key_strs, + key_groupers, + level_groupers): + + # GH 5677, when a string passed as the `by` parameter + # matches a column and an index level the column takes + # precedence and a FutureWarning is raised + + # Add 'inner' column to frame + # (frame already has an 'inner' index) + frame['inner'] = [1, 1, 1, 1, 1, 1] + + # Performing a groupby with strings should produce warning + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = frame.groupby(key_strs).mean() + + # Grouping with key Grouper should produce the same result and no warning + with tm.assert_produces_warning(False): + expected = frame.groupby(key_groupers).mean() + + assert_frame_equal(result, expected) + + # Grouping with level Grouper should produce a difference result but + # still no warning + with tm.assert_produces_warning(False): + not_expected = frame.groupby(level_groupers).mean() + + assert not result.index.equals(not_expected.index) From 8d16271e53c25e742094de5558d2937e02549ee3 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 14 Oct 2017 14:50:47 -0400 Subject: [PATCH 66/76] TST: remove some deprecation warnings (#17870) --- pandas/io/html.py | 5 ++++- pandas/tests/indexes/test_multi.py | 2 +- pandas/tests/plotting/test_frame.py | 2 +- pandas/tests/plotting/test_misc.py | 1 - pandas/tests/reshape/test_merge.py | 12 ++++++------ pandas/tests/test_categorical.py | 6 +++--- pandas/tests/test_panel.py | 12 ++++++------ pandas/tests/test_panel4d.py | 4 ++-- 8 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index b5aaffcf710c29..6f98683a1bff1a 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -759,7 +759,7 @@ def _parse(flavor, io, match, attrs, encoding, **kwargs): def read_html(io, match='.+', flavor=None, header=None, index_col=None, skiprows=None, attrs=None, parse_dates=False, - tupleize_cols=False, thousands=',', encoding=None, + tupleize_cols=None, thousands=',', encoding=None, decimal='.', converters=None, na_values=None, keep_default_na=True): r"""Read HTML tables into a ``list`` of ``DataFrame`` objects. @@ -828,6 +828,9 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, :class:`~pandas.MultiIndex`, otherwise return raw tuples. Defaults to ``False``. + .. deprecated:: 0.21.0 + This argument will be removed and will always convert to MultiIndex + thousands : str, optional Separator to use to parse thousands. Defaults to ``','``. diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index b1b5413b4d0819..18bfc3d0efbee6 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -458,7 +458,7 @@ def test_set_value_keeps_names(self): df = df.sort_index() assert df.is_copy is None assert df.index.names == ('Name', 'Number') - df = df.set_value(('grethe', '4'), 'one', 99.34) + df.at[('grethe', '4'), 'one'] = 99.34 assert df.is_copy is None assert df.index.names == ('Name', 'Number') diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index a428d73fce1e3e..11dca1abc5ec74 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -2835,7 +2835,7 @@ def test_plain_axes(self): Series(rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() - from mpl_toolkits.axes_grid.inset_locator import inset_axes + from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1., loc=3) Series(rand(10)).plot(ax=ax) Series(rand(10)).plot(ax=iax) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 957369a20f16e9..6f476553091d99 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -204,7 +204,6 @@ def test_parallel_coordinates(self): def test_parallel_coordinates_with_sorted_labels(self): """ For #15908 """ from pandas.plotting import parallel_coordinates - df = DataFrame({"feat": [i for i in range(30)], "class": [2 for _ in range(10)] + [3 for _ in range(10)] + diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 81956c0bd5b281..172667c9a0fb86 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -1546,6 +1546,7 @@ def test_dtype_on_categorical_dates(self): result_inner = pd.merge(df, df2, how='inner', on=['date']) assert_frame_equal(result_inner, expected_inner) + @pytest.mark.parametrize('ordered', [True, False]) @pytest.mark.parametrize('category_column,categories,expected_categories', [([False, True, True, False], [True, False], [True, False]), @@ -1554,20 +1555,19 @@ def test_dtype_on_categorical_dates(self): ['True', 'False'], ['True', 'False'])]) def test_merging_with_bool_or_int_cateorical_column(self, category_column, categories, - expected_categories): + expected_categories, + ordered): # GH 17187 # merging with a boolean/int categorical column df1 = pd.DataFrame({'id': [1, 2, 3, 4], 'cat': category_column}) - df1['cat'] = df1['cat'].astype('category', - categories=categories, ordered=True) + df1['cat'] = df1['cat'].astype(CDT(categories, ordered=ordered)) df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]}) result = df1.merge(df2) expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories, 'num': [1, 9]}) - expected['cat'] = expected['cat'].astype('category', - categories=categories, - ordered=True) + expected['cat'] = expected['cat'].astype( + CDT(categories, ordered=ordered)) assert_frame_equal(expected, result) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 64c89dbdd0aa48..e1d0b756fed1cd 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -3592,7 +3592,7 @@ def test_slicing_and_getting_ops(self): tm.assert_frame_equal(res_fancy, exp_fancy) # get_value - res_val = df.get_value("j", "cats") + res_val = df.at["j", "cats"] assert res_val == exp_val # i : int, slice, or sequence of integers @@ -3956,12 +3956,12 @@ def f(): # set_value df = orig.copy() - df.set_value("j", "cats", "b") + df.at["j", "cats"] = "b" tm.assert_frame_equal(df, exp_single_cats_value) def f(): df = orig.copy() - df.set_value("j", "cats", "c") + df.at["j", "cats"] = "c" pytest.raises(ValueError, f) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index da30c8c403d410..e845fcac333237 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1451,14 +1451,14 @@ def test_reindex_axis_style(self): expected1 = Panel(panel.values).iloc[:, [0, 1]] expected2 = Panel(panel.values).iloc[:, :, [0, 1]] - result = panel.reindex([0, 1], axis=0) - assert_panel_equal(result, expected0) + result = panel.reindex([0, 1], axis=0) + assert_panel_equal(result, expected0) - result = panel.reindex([0, 1], axis=1) - assert_panel_equal(result, expected1) + result = panel.reindex([0, 1], axis=1) + assert_panel_equal(result, expected1) - result = panel.reindex([0, 1], axis=2) - assert_panel_equal(result, expected2) + result = panel.reindex([0, 1], axis=2) + assert_panel_equal(result, expected2) def test_reindex_multi(self): with catch_warnings(record=True): diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 49859fd27d7bc2..c0e8770dff8b8f 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -563,8 +563,8 @@ def test_get_value(self): for item in self.panel4d.items: for mjr in self.panel4d.major_axis[::2]: for mnr in self.panel4d.minor_axis: - result = self.panel4d.get_value( - label, item, mjr, mnr) + result = self.panel4d.loc[ + label, item, mjr, mnr] expected = self.panel4d[label][item][mnr][mjr] assert_almost_equal(result, expected) From 9cbf89576b9491a379f6d54f769d8c97d07d9fc9 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Sat, 14 Oct 2017 19:53:05 +0100 Subject: [PATCH 67/76] =?UTF-8?q?ERR:=20Raise=20ValueError=20when=20week?= =?UTF-8?q?=20is=20passed=20in=20to=5Fdatetime=20format=20witho=E2=80=A6?= =?UTF-8?q?=20(#17819)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/tslibs/strptime.pyx | 10 ++++++++++ pandas/tests/indexes/datetimes/test_tools.py | 14 ++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 117e7c9d11259d..ce15fff81224b5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -784,6 +784,7 @@ Other API Changes - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) +- :func:`to_datetime` now raises a ``ValueError`` when format includes ``%W`` or ``%U`` without also including day of the week and calendar year (:issue:`16774`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) - Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 20b24d6be9a581..59a7376280da0d 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -83,6 +83,16 @@ def array_strptime(ndarray[object] values, object fmt, assert is_raise or is_ignore or is_coerce + if fmt is not None: + if '%W' in fmt or '%U' in fmt: + if '%Y' not in fmt and '%y' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + if ('%A' not in fmt and '%a' not in fmt and '%w' not + in fmt): + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + global _TimeRE_cache, _regex_cache with _cache_lock: if _getlang() != _TimeRE_cache.locale_time.lang: diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b8ce1f0af6ea85..330ec9f357655e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -372,6 +372,20 @@ def test_datetime_invalid_datatype(self): with pytest.raises(TypeError): pd.to_datetime(pd.to_datetime) + @pytest.mark.parametrize('date, format', + [('2017-20', '%Y-%W'), + ('20 Sunday', '%W %A'), + ('20 Sun', '%W %a'), + ('2017-21', '%Y-%U'), + ('20 Sunday', '%U %A'), + ('20 Sun', '%U %a')]) + def test_week_without_day_and_calendar_year(self, date, format): + # GH16774 + + msg = "Cannot use '%W' or '%U' without day and year" + with tm.assert_raises_regex(ValueError, msg): + pd.to_datetime(date, format=format) + class TestToDatetimeUnit(object): From 5c0b20a3a383e69278f6347a958ccb1388d0334e Mon Sep 17 00:00:00 2001 From: Brian Tu <2344324+buntwo@users.noreply.github.com> Date: Sat, 14 Oct 2017 17:06:29 -0400 Subject: [PATCH 68/76] ENH: tolerance now takes list-like argument for reindex and get_indexer. (#17367) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/generic.py | 17 +++- pandas/core/indexes/base.py | 23 +++++- pandas/core/indexes/datetimelike.py | 14 ++-- pandas/core/indexes/datetimes.py | 9 ++- pandas/core/indexes/numeric.py | 21 +++-- pandas/core/indexes/period.py | 24 ++++-- pandas/core/indexes/timedeltas.py | 2 +- pandas/core/tools/timedeltas.py | 3 + pandas/tests/frame/test_indexing.py | 9 +++ .../tests/indexes/datetimes/test_datetime.py | 22 +++++- pandas/tests/indexes/period/test_period.py | 24 +++++- pandas/tests/indexes/test_base.py | 78 ++++++++++++------- pandas/tests/indexes/test_numeric.py | 8 ++ .../indexes/timedeltas/test_timedelta.py | 10 ++- pandas/tests/series/test_indexing.py | 8 ++ pandas/tests/sparse/test_indexing.py | 5 ++ 17 files changed, 222 insertions(+), 56 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ce15fff81224b5..5577089c776ed6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -234,6 +234,7 @@ Other Enhancements - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names. (:issue:`14207`) - Improved the import time of pandas by about 2.25x. (:issue:`16764`) - :func:`read_json` and :func:`to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`) +- :func:`Series.reindex`, :func:`DataFrame.reindex`, :func:`Index.get_indexer` now support list-like argument for ``tolerance``. (:issue:`17367`) .. _whatsnew_0210.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc3982dba93ce3..5a311afc27c9a7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2470,9 +2470,10 @@ def reindex_like(self, other, method=None, copy=True, limit=None, Maximum number of consecutive labels to fill for inexact matches. tolerance : optional Maximum distance between labels of the other object and this - object for inexact matches. + object for inexact matches. Can be list-like. .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Notes ----- @@ -2860,7 +2861,14 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Examples -------- @@ -3120,7 +3128,14 @@ def _reindex_multi(self, axes, copy, fill_value): matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Examples -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c3343f149005c3..a995fc10a66749 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2484,7 +2484,14 @@ def _get_unique_index(self, dropna=False): the index at the matching location most satisfy the equation ``abs(index[loc] - key) <= tolerance``. + Tolerance may be a scalar + value, which applies the same tolerance to all values, or + list-like, which applies variable tolerance per element. List-like + includes list, tuple, array, Series, and must be the same size as + the index and its dtype must exactly match the index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Returns ------- @@ -2627,7 +2634,14 @@ def _get_level_values(self, level): matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + .. versionadded:: 0.17.0 + .. versionadded:: 0.21.0 (list-like tolerance) Examples -------- @@ -2647,7 +2661,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) if tolerance is not None: - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, target) # Treat boolean labels passed to a numeric index as not found. Without # this fix False and True would be treated as 0 and 1 respectively. @@ -2683,10 +2697,15 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): 'backfill or nearest reindexing') indexer = self._engine.get_indexer(target._values) + return _ensure_platform_int(indexer) - def _convert_tolerance(self, tolerance): + def _convert_tolerance(self, tolerance, target): # override this method on subclasses + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') return tolerance def _get_fill_indexer(self, target, method, limit=None, tolerance=None): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d5b4525e8a1eb6..5d40975586e734 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -7,6 +7,7 @@ from pandas import compat from pandas.compat.numpy import function as nv +from pandas.core.tools.timedeltas import to_timedelta import numpy as np from pandas.core.dtypes.common import ( @@ -431,13 +432,12 @@ def asobject(self): from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object) - def _convert_tolerance(self, tolerance): - try: - return Timedelta(tolerance).to_timedelta64() - except ValueError: - raise ValueError('tolerance argument for %s must be convertible ' - 'to Timedelta: %r' - % (type(self).__name__, tolerance)) + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(to_timedelta(tolerance, box=False)) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') + return tolerance def _maybe_mask_results(self, result, fill_value=None, convert=None): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 25897bee298458..d16251a7829b91 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1423,7 +1423,7 @@ def get_loc(self, key, method=None, tolerance=None): if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by # the try/except clauses below - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) if isinstance(key, datetime): # needed to localize naive datetimes @@ -1447,7 +1447,12 @@ def get_loc(self, key, method=None, tolerance=None): try: stamp = Timestamp(key, tz=self.tz) return Index.get_loc(self, stamp, method, tolerance) - except (KeyError, ValueError): + except KeyError: + raise KeyError(key) + except ValueError as e: + # list-like tolerance size must match target index size + if 'list-like' in str(e): + raise e raise KeyError(key) def _maybe_cast_slice_bound(self, label, side, kind): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 9fc47ad7b773cf..1f007b1961e062 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -71,12 +71,21 @@ def _convert_for_op(self, value): return value - def _convert_tolerance(self, tolerance): - try: - return float(tolerance) - except ValueError: - raise ValueError('tolerance argument for %s must be numeric: %r' % - (type(self).__name__, tolerance)) + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') + if not np.issubdtype(tolerance.dtype, np.number): + if tolerance.ndim > 0: + raise ValueError(('tolerance argument for %s must contain ' + 'numeric elements if it is list type') % + (type(self).__name__,)) + else: + raise ValueError(('tolerance argument for %s must be numeric ' + 'if it is a scalar: %r') % + (type(self).__name__, tolerance)) + return tolerance @classmethod def _assert_safe_casting(cls, data, subarr): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b70b4c4e4067c3..148ca2725fbdcd 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -641,12 +641,17 @@ def to_timestamp(self, freq=None, how='start'): return DatetimeIndex(new_data, freq='infer', name=self.name) def _maybe_convert_timedelta(self, other): - if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): + if isinstance( + other, (timedelta, np.timedelta64, offsets.Tick, np.ndarray)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): - nanos = tslib._delta_to_nanoseconds(other) + if isinstance(other, np.ndarray): + nanos = np.vectorize(tslib._delta_to_nanoseconds)(other) + else: + nanos = tslib._delta_to_nanoseconds(other) offset_nanos = tslib._delta_to_nanoseconds(offset) - if nanos % offset_nanos == 0: + check = np.all(nanos % offset_nanos == 0) + if check: return nanos // offset_nanos elif isinstance(other, offsets.DateOffset): freqstr = other.rule_code @@ -782,7 +787,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): target = target.asi8 if tolerance is not None: - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, target) return Index.get_indexer(self._int64index, target, method, limit, tolerance) @@ -825,7 +830,8 @@ def get_loc(self, key, method=None, tolerance=None): try: ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal if tolerance is not None: - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, + np.asarray(key)) return self._int64index.get_loc(ordinal, method, tolerance) except KeyError: @@ -908,8 +914,12 @@ def _get_string_slice(self, key): return slice(self.searchsorted(t1.ordinal, side='left'), self.searchsorted(t2.ordinal, side='right')) - def _convert_tolerance(self, tolerance): - tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance) + def _convert_tolerance(self, tolerance, target): + tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, + target) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError('list-like tolerance size must match ' + 'target index size') return self._maybe_convert_timedelta(tolerance) def insert(self, loc, item): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 89757c2bf40da2..6e08c32f30dcd8 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -699,7 +699,7 @@ def get_loc(self, key, method=None, tolerance=None): if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by # the try/except clauses below - tolerance = self._convert_tolerance(tolerance) + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) if _is_convertible_to_td(key): key = Timedelta(key) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index d5132826bb93f8..f61d9f90d6ca26 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -83,6 +83,9 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, unit=unit, box=box, errors=errors, name=arg.name) + elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 0: + # extract array scalar and process below + arg = arg.item() elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1: return _convert_listlike(arg, unit=unit, box=box, errors=errors) elif getattr(arg, 'ndim', 1) > 1: diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 1a16e4ef48b647..f850b8f2ee1780 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1935,9 +1935,13 @@ def test_reindex_methods(self): actual = df.reindex_like(df, method=method, tolerance=0) assert_frame_equal(df, actual) + actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) + assert_frame_equal(df, actual) actual = df.reindex(target, method=method, tolerance=1) assert_frame_equal(expected, actual) + actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) + assert_frame_equal(expected, actual) e2 = expected[::-1] actual = df.reindex(target[::-1], method=method) @@ -1958,6 +1962,11 @@ def test_reindex_methods(self): actual = df.reindex(target, method='nearest', tolerance=0.2) assert_frame_equal(expected, actual) + expected = pd.DataFrame({'x': [0, np.nan, 1, np.nan]}, index=target) + actual = df.reindex(target, method='nearest', + tolerance=[0.5, 0.01, 0.4, 0.1]) + assert_frame_equal(expected, actual) + def test_reindex_frame_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 47f53f53cfd021..8d9ac59cf9883d 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -41,10 +41,17 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-01T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with pytest.raises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') + with pytest.raises( + ValueError, + match='tolerance size must match target index size'): + idx.get_loc('2000-01-01', method='nearest', + tolerance=[pd.Timedelta('1day').to_timedelta64(), + pd.Timedelta('1day').to_timedelta64()]) assert idx.get_loc('2000', method='nearest') == slice(0, 3) assert idx.get_loc('2000-01', method='nearest') == slice(0, 3) @@ -93,6 +100,19 @@ def test_get_indexer(self): idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), np.array([0, -1, 1], dtype=np.intp)) + tol_raw = [pd.Timedelta('1 hour'), + pd.Timedelta('1 hour'), + pd.Timedelta('1 hour').to_timedelta64(), ] + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=[np.timedelta64(x) for x in tol_raw]), + np.array([0, -1, 1], dtype=np.intp)) + tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), + pd.Timedelta('1 hour').to_timedelta64(), + 'foo', ] + with pytest.raises( + ValueError, match='abbreviation w/o a number'): + idx.get_indexer(target, 'nearest', tolerance=tol_bad) with pytest.raises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 51f7d13cb0638f..ae500e66359b49 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -9,6 +9,7 @@ from pandas import (PeriodIndex, period_range, notna, DatetimeIndex, NaT, Index, Period, Int64Index, Series, DataFrame, date_range, offsets, compat) +from pandas.core.indexes.period import IncompatibleFrequency from ..datetimelike import DatetimeLike @@ -83,7 +84,8 @@ def test_get_loc(self): tolerance=np.timedelta64(1, 'D')) == 1 assert idx.get_loc('2000-01-02T12', method='nearest', tolerance=timedelta(1)) == 1 - with tm.assert_raises_regex(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' @@ -91,6 +93,12 @@ def test_get_loc(self): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with pytest.raises(KeyError): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + with pytest.raises( + ValueError, + match='list-like tolerance size must match target index size'): + idx.get_loc('2000-01-10', method='nearest', + tolerance=[pd.Timedelta('1 day').to_timedelta64(), + pd.Timedelta('1 day').to_timedelta64()]) def test_where(self): i = self.create_index() @@ -158,6 +166,20 @@ def test_get_indexer(self): tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest', tolerance='1 day'), np.array([0, 1, 1], dtype=np.intp)) + tol_raw = [pd.Timedelta('1 hour'), + pd.Timedelta('1 hour'), + np.timedelta64(1, 'D'), ] + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=[np.timedelta64(x) for x in tol_raw]), + np.array([0, -1, 1], dtype=np.intp)) + tol_bad = [pd.Timedelta('2 hour').to_timedelta64(), + pd.Timedelta('1 hour').to_timedelta64(), + np.timedelta64(1, 'M'), ] + with pytest.raises( + IncompatibleFrequency, + match='Input has different freq from'): + idx.get_indexer(target, 'nearest', tolerance=tol_bad) def test_repeat(self): # GH10183 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 81f113d58d680a..307cda7f2d1cb7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1075,40 +1075,58 @@ def test_get_indexer_invalid(self): with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], limit=1) - def test_get_indexer_nearest(self): + @pytest.mark.parametrize( + 'method, tolerance, indexer, expected', + [ + ('pad', None, [0, 5, 9], [0, 5, 9]), + ('backfill', None, [0, 5, 9], [0, 5, 9]), + ('nearest', None, [0, 5, 9], [0, 5, 9]), + ('pad', 0, [0, 5, 9], [0, 5, 9]), + ('backfill', 0, [0, 5, 9], [0, 5, 9]), + ('nearest', 0, [0, 5, 9], [0, 5, 9]), + + ('pad', None, [0.2, 1.8, 8.5], [0, 1, 8]), + ('backfill', None, [0.2, 1.8, 8.5], [1, 2, 9]), + ('nearest', None, [0.2, 1.8, 8.5], [0, 2, 9]), + ('pad', 1, [0.2, 1.8, 8.5], [0, 1, 8]), + ('backfill', 1, [0.2, 1.8, 8.5], [1, 2, 9]), + ('nearest', 1, [0.2, 1.8, 8.5], [0, 2, 9]), + + ('pad', 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), + ('backfill', 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), + ('nearest', 0.2, [0.2, 1.8, 8.5], [0, 2, -1])]) + def test_get_indexer_nearest(self, method, tolerance, indexer, expected): idx = Index(np.arange(10)) - all_methods = ['pad', 'backfill', 'nearest'] - for method in all_methods: - actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], - dtype=np.intp)) - - actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0) - tm.assert_numpy_array_equal(actual, np.array([0, 5, 9], - dtype=np.intp)) - - for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], - [0, 2, 9]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) - - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=1) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) + actual = idx.get_indexer(indexer, method=method, tolerance=tolerance) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) + + @pytest.mark.parametrize('listtype', [list, tuple, Series, np.array]) + @pytest.mark.parametrize( + 'tolerance, expected', + list(zip([[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], + [0.1, 0.5, 0.5]], + [[0, 2, -1], [0, -1, -1], + [-1, 2, 9]]))) + def test_get_indexer_nearest_listlike_tolerance(self, tolerance, + expected, listtype): + idx = Index(np.arange(10)) - for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], - [0, 2, -1]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=0.2) - tm.assert_numpy_array_equal(actual, np.array(expected, - dtype=np.intp)) + actual = idx.get_indexer([0.2, 1.8, 8.5], method='nearest', + tolerance=listtype(tolerance)) + tm.assert_numpy_array_equal(actual, np.array(expected, + dtype=np.intp)) + def test_get_indexer_nearest_error(self): + idx = Index(np.arange(10)) with tm.assert_raises_regex(ValueError, 'limit argument'): idx.get_indexer([1, 0], method='nearest', limit=1) + with pytest.raises(ValueError, match='tolerance size must match'): + idx.get_indexer([1, 0], method='nearest', + tolerance=[1, 2, 3]) + def test_get_indexer_nearest_decreasing(self): idx = Index(np.arange(10))[::-1] @@ -1141,6 +1159,10 @@ def test_get_indexer_strings(self): with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + with pytest.raises(TypeError): + idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', + tolerance=[2, 2, 2, 2]) + def test_get_indexer_numeric_index_boolean_target(self): # GH 16877 numeric_idx = pd.Index(range(4)) @@ -1172,6 +1194,8 @@ def test_get_loc(self): idx.get_loc(1.1, 'nearest', tolerance='invalid') with tm.assert_raises_regex(ValueError, 'tolerance .* valid if'): idx.get_loc(1.1, tolerance=1) + with pytest.raises(ValueError, match='tolerance size must match'): + idx.get_loc(1.1, 'nearest', tolerance=[1, 1]) idx = pd.Index(['a', 'c']) with pytest.raises(TypeError): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index dc38b0a2b1fb7f..a96c677852339d 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -354,6 +354,14 @@ def test_get_loc(self): with tm.assert_raises_regex(ValueError, 'must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') + with pytest.raises(ValueError, match='must contain numeric elements'): + idx.get_loc(1.4, method='nearest', tolerance=np.array(['foo'])) + + with pytest.raises( + ValueError, + match='tolerance size must match target index size'): + idx.get_loc(1.4, method='nearest', tolerance=np.array([1, 2])) + def test_get_loc_na(self): idx = Float64Index([np.nan, 1, 2]) assert idx.get_loc(1) == 1 diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 0b3bd0b03bccfd..0a09199eca9d57 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -60,9 +60,17 @@ def test_get_loc(self): assert idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)) == 1 - with tm.assert_raises_regex(ValueError, 'must be convertible'): + with tm.assert_raises_regex(ValueError, + 'unit abbreviation w/o a number'): idx.get_loc(idx[1], method='nearest', tolerance='foo') + with pytest.raises( + ValueError, + match='tolerance size must match'): + idx.get_loc(idx[1], method='nearest', + tolerance=[Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64()]) + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: assert idx.get_loc('1 day 1 hour', method) == loc diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 75ae47ed2fdc11..d141b378fe2146 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -2117,11 +2117,19 @@ def test_reindex_nearest(self): actual = s.reindex_like(actual, method='nearest', tolerance=1) assert_series_equal(expected, actual) + actual = s.reindex_like(actual, method='nearest', + tolerance=[1, 2, 3, 4]) + assert_series_equal(expected, actual) actual = s.reindex(target, method='nearest', tolerance=0.2) expected = Series([0, 1, np.nan, 2], target) assert_series_equal(expected, actual) + actual = s.reindex(target, method='nearest', + tolerance=[0.3, 0.01, 0.4, 3]) + expected = Series([0, np.nan, np.nan, 2], target) + assert_series_equal(expected, actual) + def test_reindex_backfill(self): pass diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py index edbac8f09241b1..37a287af71451c 100644 --- a/pandas/tests/sparse/test_indexing.py +++ b/pandas/tests/sparse/test_indexing.py @@ -414,6 +414,11 @@ def test_reindex_nearest(self): expected = pd.Series([0, 1, np.nan, 2], target).to_sparse() tm.assert_sp_series_equal(expected, actual) + actual = s.reindex(target, method='nearest', + tolerance=[0.3, 0.01, 0.4, 3]) + expected = pd.Series([0, np.nan, np.nan, 2], target).to_sparse() + tm.assert_sp_series_equal(expected, actual) + def tests_indexing_with_sparse(self): # GH 13985 From a85bfdc5db4b6ec03596eb0cb72e1c8fcb2a7260 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 14 Oct 2017 17:09:07 -0400 Subject: [PATCH 69/76] TST: remove moar warnings (#17872) --- pandas/core/dtypes/missing.py | 6 ++++ pandas/tests/io/test_gbq.py | 1 - pandas/tests/io/test_pytables.py | 53 ++++++-------------------------- pandas/tests/test_algos.py | 2 +- 4 files changed, 17 insertions(+), 45 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 49b7b1d1d3a9b4..d8973dd2eb27af 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -327,6 +327,12 @@ def array_equivalent(left, right, strict_nan=False): left = left.view('i8') right = right.view('i8') + # if we have structured dtypes, compare first + if (left.dtype.type is np.void or + right.dtype.type is np.void): + if left.dtype != right.dtype: + return False + return np.array_equal(left, right) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index 8f20fb2e75c8ad..58a84ad4d47f86 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -92,7 +92,6 @@ def make_mixed_dataframe_v2(test_size): index=range(test_size)) -@pytest.mark.xfail(reason="gbq having issues") @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(object): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 6e3e338ce3de39..a97747b93369fc 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -1,5 +1,4 @@ import pytest -import sys import os import tempfile from contextlib import contextmanager @@ -16,7 +15,8 @@ date_range, timedelta_range, Index, DatetimeIndex, isna) -from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type +from pandas.compat import (is_platform_windows, is_platform_little_endian, + PY3, PY35, PY36, BytesIO, text_type) from pandas.io.formats.printing import pprint_thing from pandas.core.dtypes.common import is_categorical_dtype @@ -1042,11 +1042,10 @@ def check(format, index): with catch_warnings(record=True): check('fixed', index) + @pytest.mark.skipif(not is_platform_little_endian(), + reason="reason platform is not little endian") def test_encoding(self): - if sys.byteorder != 'little': - pytest.skip('system byteorder is not little') - with ensure_clean_store(self.path) as store: df = DataFrame(dict(A='foo', B='bar'), index=range(5)) df.loc[2, 'A'] = np.nan @@ -2887,9 +2886,6 @@ def test_index_types(self): def test_timeseries_preepoch(self): - if sys.version_info[0] == 2 and sys.version_info[1] < 7: - pytest.skip("won't work on Python < 2.7") - dr = bdate_range('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: @@ -4274,13 +4270,11 @@ def test_select_as_multiple(self): ['df1', 'df3'], where=['A>0', 'B>0'], selector='df1') + @pytest.mark.skipf( + LooseVersion(tables.__version__) < '3.1.0', + "tables version does not support fix for nan selection bug: GH 4858") def test_nan_selection_bug_4858(self): - # GH 4858; nan selection bug, only works for pytables >= 3.1 - if LooseVersion(tables.__version__) < '3.1.0': - pytest.skip('tables version does not support fix for nan ' - 'selection bug: GH 4858') - with ensure_clean_store(self.path) as store: df = DataFrame(dict(cols=range(6), values=range(6)), @@ -4598,11 +4592,9 @@ def test_pytables_native_read(self): d2 = store['detector/readout'] assert isinstance(d2, DataFrame) + @pytest.mark.skipif(PY35 and is_platform_windows(), + reason="native2 read fails oddly on windows / 3.5") def test_pytables_native2_read(self): - # fails on win/3.5 oddly - if PY35 and is_platform_windows(): - pytest.skip("native2 read fails oddly on windows / 3.5") - with ensure_clean_store( tm.get_data_path('legacy_hdf/pytables_native2.h5'), mode='r') as store: @@ -4690,31 +4682,6 @@ def do_copy(f, new_f=None, keys=None, finally: safe_remove(path) - def test_legacy_table_write(self): - pytest.skip("cannot write legacy tables") - - store = HDFStore(tm.get_data_path( - 'legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a') - - df = tm.makeDataFrame() - with catch_warnings(record=True): - wp = tm.makePanel() - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['foo', 'bar']) - df = DataFrame(np.random.randn(10, 3), index=index, - columns=['A', 'B', 'C']) - store.append('mi', df) - - df = DataFrame(dict(A='foo', B='bar'), index=lrange(10)) - store.append('df', df, data_columns=['B'], min_itemsize={'A': 200}) - store.append('wp', wp) - - store.close() - def test_store_datetime_fractional_secs(self): with ensure_clean_store(self.path) as store: @@ -5260,7 +5227,7 @@ def test_read_hdf_series_mode_r(self, format): result = pd.read_hdf(path, key='data', mode='r') tm.assert_series_equal(result, series) - @pytest.mark.skipif(sys.version_info < (3, 6), reason="Need python 3.6") + @pytest.mark.skipif(not PY36, reason="Need python 3.6") def test_fspath(self): with tm.ensure_clean('foo.h5') as path: with pd.HDFStore(path) as store: diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3694bba594adb2..38625bfb299179 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -765,7 +765,7 @@ def test_duplicated_with_nas(self): 2, 4, 1, 5, 6]), np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]), - pytest.mark.xfail(resaon="Complex bug. GH 16399")( + pytest.mark.xfail(reason="Complex bug. GH 16399")( np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j, 2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]) ), From 528fdaa80c847fd1c186dbdd3db061b993b557c0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 14 Oct 2017 16:44:51 -0700 Subject: [PATCH 70/76] flake8 cleanup (#17873) --- pandas/_libs/tslibs/fields.pyx | 104 ++++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3ea414b2d4a701..1427bf50239ba3 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -160,7 +160,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, if field == 'is_month_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -172,7 +174,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -185,7 +189,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_month_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -202,7 +208,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -219,7 +227,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_quarter_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -232,7 +242,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -245,7 +257,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_quarter_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -263,7 +277,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -280,7 +296,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_year_start': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -293,7 +311,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -306,7 +326,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, elif field == 'is_year_end': if is_business: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -324,7 +346,9 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field, return out.view(bool) else: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = 0; continue + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -367,7 +391,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): if field == 'Y': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -377,7 +403,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'M': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -387,7 +415,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'D': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -397,7 +427,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'h': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -407,7 +439,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'm': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -417,7 +451,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 's': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -427,7 +463,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'us': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -437,7 +475,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'ns': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -446,7 +486,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'doy': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -457,7 +499,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'dow': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -467,7 +511,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'woy': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -477,7 +523,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): doy = mo_off + dts.day dow = dayofweek(dts.year, dts.month, dts.day) - #estimate + # estimate woy = (doy - 1) - dow + 3 if woy >= 0: woy = woy / 7 + 1 @@ -498,7 +544,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'q': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -509,7 +557,9 @@ def get_date_field(ndarray[int64_t] dtindex, object field): elif field == 'dim': with nogil: for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue pandas_datetime_to_datetimestruct( dtindex[i], PANDAS_FR_ns, &dts) @@ -522,7 +572,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): cdef inline int days_in_month(pandas_datetimestruct dts) nogil: - return days_per_month_table[is_leapyear(dts.year)][dts.month -1] + return days_per_month_table[is_leapyear(dts.year)][dts.month - 1] cpdef isleapyear_arr(ndarray years): From aed9b921394efb13ed1b7870508e8638db951101 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 14 Oct 2017 17:33:11 -0700 Subject: [PATCH 71/76] TST: Catch read_html slow test warning (#17874) Follow up to gh-17872. xref gh-17870, gh-17865. --- pandas/tests/io/test_html.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 6fc080c8d9090e..8dfae2733ef207 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -335,8 +335,10 @@ def test_multiindex_header_index(self): @pytest.mark.slow def test_multiindex_header_skiprows_tuples(self): - df = self._bank_data(header=[0, 1], skiprows=1, tupleize_cols=True)[0] - assert isinstance(df.columns, Index) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df = self._bank_data(header=[0, 1], skiprows=1, + tupleize_cols=True)[0] + assert isinstance(df.columns, Index) @pytest.mark.slow def test_multiindex_header_skiprows(self): From c9876947831b8a2093d87613a83777a98dfbfcaa Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 15 Oct 2017 17:29:58 -0700 Subject: [PATCH 72/76] TST: Skip if no openpyxl in test_excel (#17883) "test_read_excel_parse_dates" was calling "to_excel" without checking if "openpyxl" was installed. --- pandas/tests/io/test_excel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index f21f638799e579..7af8bd12ca805d 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -975,6 +975,7 @@ def test_read_excel_chunksize(self): def test_read_excel_parse_dates(self): # GH 11544, 12051 + _skip_if_no_openpyxl() df = DataFrame( {'col': [1, 2, 3], From 00f23ca69e8999592f1bad02c6d763df48365a97 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Mon, 16 Oct 2017 07:59:05 +0100 Subject: [PATCH 73/76] DOC: Adding examples to update docstring (#16812) (#17859) * DOC: Adding examples to update docstring (#16812) * formatting issues * improving examples --- pandas/core/frame.py | 55 +++++++++++++++++++++++++++++++++++++++++++ pandas/core/series.py | 38 ++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97943f153319bd..c09995b45f0ce4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4154,6 +4154,61 @@ def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict : boolean If True, will raise an error if the DataFrame and other both contain data in the same place. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, 5, 6], + ... 'C': [7, 8, 9]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']}) + >>> df.update(new_df) + >>> df + A B + 0 a d + 1 b e + 2 c f + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2]) + >>> df.update(new_column) + >>> df + A B + 0 a d + 1 b y + 2 c e + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2]) + >>> df.update(new_df) + >>> df + A B + 0 a x + 1 b d + 2 c e + + If ``other`` contains NaNs the corresponding values are not updated + in the original dataframe. + + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4.0 + 1 2 500.0 + 2 3 6.0 """ import pandas.core.computation.expressions as expressions # TODO: Support other joins diff --git a/pandas/core/series.py b/pandas/core/series.py index 76baa89f165d43..dbd91309ed1853 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1781,6 +1781,44 @@ def update(self, other): Parameters ---------- other : Series + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + >>> s = pd.Series(['a', 'b', 'c']) + >>> s.update(pd.Series(['d', 'e'], index=[0, 2])) + >>> s + 0 d + 1 b + 2 e + dtype: object + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6, 7, 8])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + If ``other`` contains NaNs the corresponding values are not updated + in the original Series. + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, np.nan, 6])) + >>> s + 0 4 + 1 2 + 2 6 + dtype: int64 + """ other = other.reindex_like(self) mask = notna(other) From a2ff3f0a4d847b9798b76046db05f723c2723355 Mon Sep 17 00:00:00 2001 From: Licht Takeuchi Date: Mon, 16 Oct 2017 19:27:20 +0900 Subject: [PATCH 74/76] BUG: Fix wrong column selection in drop_duplicates when duplicate column names (#17879) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 3 ++- pandas/tests/frame/test_analytics.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5577089c776ed6..2216119e4d391e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1008,6 +1008,7 @@ Reshaping - Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) - Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) - Bug in :func:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) +- Bug in ``DataFrame.drop_duplicates`` where dropping with non-unique column names raised a ``ValueError`` (:issue:`17836`) Numeric ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c09995b45f0ce4..2580509c4e4f88 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3556,7 +3556,8 @@ def f(vals): isinstance(subset, tuple) and subset in self.columns): subset = subset, - vals = (self[col].values for col in subset) + vals = (col.values for name, col in self.iteritems() + if name in subset) labels, shape = map(list, zip(*map(f, vals))) ids = get_group_index(labels, shape, sort=False, xnull=False) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index c36b5957a4283d..1bac4037e99c93 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1394,6 +1394,21 @@ def test_drop_duplicates(self): for keep in ['first', 'last', False]: assert df.duplicated(keep=keep).sum() == 0 + def test_drop_duplicates_with_duplicate_column_names(self): + # GH17836 + df = DataFrame([ + [1, 2, 5], + [3, 4, 6], + [3, 4, 7] + ], columns=['a', 'a', 'b']) + + result0 = df.drop_duplicates() + tm.assert_frame_equal(result0, df) + + result1 = df.drop_duplicates('a') + expected1 = df[:2] + tm.assert_frame_equal(result1, expected1) + def test_drop_duplicates_for_take_all(self): df = DataFrame({'AAA': ['foo', 'bar', 'baz', 'bar', 'foo', 'bar', 'qux', 'foo'], From 34978a72107b3c77cf295fadbf1f5244ccb6afdf Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 16 Oct 2017 03:28:34 -0700 Subject: [PATCH 75/76] DEPR: Deprecate tupleize_cols in to_csv (#17877) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 19 ++++++++++++++++--- pandas/tests/frame/test_to_csv.py | 6 ++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2216119e4d391e..642ee3c8e54c7f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -803,6 +803,7 @@ Deprecations - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). - :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) - :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`) +- :meth:`DataFrame.to_csv` has deprecated the ``tupleize_cols`` argument. Multi-index columns will be always written as rows in the CSV file (:issue:`17060`) - The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2580509c4e4f88..3f0aa9d2a1dd23 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1432,7 +1432,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, - tupleize_cols=False, date_format=None, doublequote=True, + tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.'): r"""Write DataFrame to a comma-separated values (csv) file @@ -1485,8 +1485,13 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, chunksize : int or None rows to write at a time tupleize_cols : boolean, default False - write multi_index columns as a list of tuples (if True) - or new (expanded format) if False) + .. deprecated:: 0.21.0 + This argument will be removed and will always write each row + of the multi-index as a separate row in the CSV file. + + Write MultiIndex columns as a list of tuples (if True) or in + the new, expanded format, where each MultiIndex column is a row + in the CSV (if False). date_format : string, default None Format string for datetime objects decimal: string, default '.' @@ -1494,6 +1499,14 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, European data """ + + if tupleize_cols is not None: + warnings.warn("The 'tupleize_cols' parameter is deprecated and " + "will be removed in a future version", + FutureWarning, stacklevel=2) + else: + tupleize_cols = False + formatter = fmt.CSVFormatter(self, path_or_buf, line_terminator=line_terminator, sep=sep, encoding=encoding, diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index b0cc414861818f..4162a586f8063d 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -577,7 +577,8 @@ def _make_frame(names=None): # tupleize_cols=True and index=False df = _make_frame(True) - df.to_csv(path, tupleize_cols=True, index=False) + with tm.assert_produces_warning(FutureWarning): + df.to_csv(path, tupleize_cols=True, index=False) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -602,7 +603,8 @@ def _make_frame(names=None): # column & index are multi-index (compatibility) df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) - df.to_csv(path, tupleize_cols=True) + with tm.assert_produces_warning(FutureWarning): + df.to_csv(path, tupleize_cols=True) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): From 9092445b1b8bdc6cb031a2319ac98204b13f908c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 16 Oct 2017 14:24:06 +0200 Subject: [PATCH 76/76] CategoricalDtype construction: actually use fastpath (#17891) --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2fdbad93fa63b0..b3498abb3b2c06 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -178,7 +178,7 @@ def _finalize(self, categories, ordered, fastpath=False): if categories is not None: categories = Index(categories, tupleize_cols=False) # validation - self._validate_categories(categories) + self._validate_categories(categories, fastpath=fastpath) self._validate_ordered(ordered) self._categories = categories self._ordered = ordered