From 1952afac53886e80cfc3f310b9e5275235c11d4a Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Mon, 19 Nov 2018 18:03:04 -0700 Subject: [PATCH 01/12] TST: Mark test_pct_max_many_rows with pytest.mark.single (#23799) --- pandas/tests/frame/test_rank.py | 1 + pandas/tests/series/test_rank.py | 1 + pandas/tests/test_algos.py | 1 + 3 files changed, 3 insertions(+) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index eaba5f7ec7790..e7a876bcf52d1 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -310,6 +310,7 @@ def test_rank_pct_true(self, method, exp): expected = DataFrame(exp) tm.assert_frame_equal(result, expected) + @pytest.mark.single def test_pct_max_many_rows(self): # GH 18271 df = DataFrame({'A': np.arange(2**24 + 1), diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 5b0ea37a0bfcf..72d05cb4839ef 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -497,6 +497,7 @@ def test_rank_first_pct(dtype, ser, exp): assert_series_equal(result, expected) +@pytest.mark.single def test_pct_max_many_rows(): # GH 18271 s = Series(np.arange(2**24 + 1)) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index ff505f2986b1a..fa33a1ceae0b9 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1462,6 +1462,7 @@ def test_too_many_ndims(self): with pytest.raises(TypeError, match=msg): algos.rank(arr) + @pytest.mark.single @pytest.mark.parametrize('values', [ np.arange(2**24 + 1), np.arange(2**25 + 2).reshape(2**24 + 1, 2)], From 6fad5a0f815e0e441137908c0366403d2616549d Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Tue, 20 Nov 2018 01:04:23 +0000 Subject: [PATCH 02/12] CLN: Finish isort core (#23765) --- pandas/core/arrays/base.py | 14 +++--- pandas/core/arrays/categorical.py | 67 ++++++++++---------------- pandas/core/arrays/datetimelike.py | 41 ++++++---------- pandas/core/arrays/datetimes.py | 31 +++++------- pandas/core/arrays/integer.py | 26 +++++----- pandas/core/arrays/interval.py | 35 +++++++------- pandas/core/arrays/period.py | 45 +++++++---------- pandas/core/arrays/timedeltas.py | 21 +++----- pandas/core/internals/blocks.py | 77 +++++++++--------------------- pandas/core/internals/concat.py | 16 +++---- pandas/core/internals/managers.py | 36 ++++++-------- pandas/core/sparse/api.py | 2 +- pandas/core/sparse/frame.py | 36 +++++++------- pandas/core/sparse/scipy_sparse.py | 5 +- pandas/core/sparse/series.py | 38 +++++++-------- pandas/core/tools/datetimes.py | 34 +++++-------- pandas/core/tools/numeric.py | 19 ++++---- pandas/core/tools/timedeltas.py | 9 ++-- pandas/core/util/hashing.py | 15 +++--- setup.cfg | 19 -------- 20 files changed, 227 insertions(+), 359 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index f842d1237cb14..eb2fef482ff17 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -5,16 +5,18 @@ This is an experimental API and subject to breaking changes without warning. """ -import numpy as np - import operator -from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass -from pandas.errors import AbstractMethodError +import numpy as np + +from pandas.compat import PY3, set_function_name from pandas.compat.numpy import function as nv -from pandas.compat import set_function_name, PY3 -from pandas.core import ops +from pandas.errors import AbstractMethodError + from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries + +from pandas.core import ops _not_implemented_message = "{} does not implement {}." diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 276ef6426a51b..6dc3a960dc817 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,61 +1,46 @@ # pylint: disable=E1101,W0232 -import numpy as np -from warnings import warn import textwrap +from warnings import warn -from pandas import compat -from pandas.compat import u, lzip -from pandas._libs import lib, algos as libalgos +import numpy as np + +from pandas._libs import algos as libalgos, lib +import pandas.compat as compat +from pandas.compat import lzip, u +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + Appender, Substitution, cache_readonly, deprecate_kwarg) +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs -from pandas.core.dtypes.generic import ( - ABCSeries, ABCIndexClass, ABCCategoricalIndex) -from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.cast import ( - maybe_infer_to_datetimelike, - coerce_indexer_dtype) -from pandas.core.dtypes.dtypes import CategoricalDtype + coerce_indexer_dtype, maybe_infer_to_datetimelike) from pandas.core.dtypes.common import ( - ensure_int64, - ensure_object, - ensure_platform_int, - is_extension_array_dtype, - is_dtype_equal, - is_datetimelike, - is_datetime64_dtype, - is_timedelta64_dtype, - is_categorical, - is_categorical_dtype, - is_float_dtype, - is_integer_dtype, - is_object_dtype, - is_list_like, is_sequence, - is_scalar, is_iterator, - is_dict_like) - -from pandas.core.algorithms import factorize, take_1d, unique1d, take + ensure_int64, ensure_object, ensure_platform_int, is_categorical, + is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like, + is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype, + is_iterator, is_list_like, is_object_dtype, is_scalar, is_sequence, + is_timedelta64_dtype) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, ABCIndexClass, ABCSeries) +from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.missing import isna, notna + from pandas.core.accessor import PandasDelegate, delegate_names -from pandas.core.base import (PandasObject, - NoNewAttributesMixin, _shared_docs) +import pandas.core.algorithms as algorithms +from pandas.core.algorithms import factorize, take, take_1d, unique1d +from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs import pandas.core.common as com +from pandas.core.config import get_option from pandas.core.missing import interpolate_2d -from pandas.compat.numpy import function as nv -from pandas.util._decorators import ( - Appender, cache_readonly, deprecate_kwarg, Substitution) - -import pandas.core.algorithms as algorithms - from pandas.core.sorting import nargsort from pandas.io.formats import console from pandas.io.formats.terminal import get_terminal_size -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs -from pandas.core.config import get_option from .base import ExtensionArray - _take_msg = textwrap.dedent("""\ Interpreting negative values in 'indexer' as missing values. In the future, this will change to meaning positional indices diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 631257b7a5264..4e784d9c89c5f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -5,44 +5,33 @@ import numpy as np -from pandas._libs import lib, iNaT, NaT +from pandas._libs import NaT, iNaT, lib from pandas._libs.tslibs import timezones -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta -from pandas._libs.tslibs.timestamps import maybe_integer_op_deprecated from pandas._libs.tslibs.period import ( - Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) - + DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period) +from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds +from pandas._libs.tslibs.timestamps import maybe_integer_op_deprecated +import pandas.compat as compat from pandas.errors import ( AbstractMethodError, NullFrequencyError, PerformanceWarning) -from pandas import compat - -from pandas.tseries import frequencies -from pandas.tseries.offsets import Tick, DateOffset +from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.common import ( - pandas_dtype, - needs_i8_conversion, - is_list_like, - is_offsetlike, - is_extension_array_dtype, - is_datetime64_dtype, - is_datetime64_any_dtype, - is_datetime64tz_dtype, - is_float_dtype, - is_integer_dtype, - is_bool_dtype, - is_period_dtype, - is_timedelta64_dtype, - is_object_dtype) -from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame, ABCIndexClass + is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype, + is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype, + is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike, + is_period_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna -import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr, take, unique1d +import pandas.core.common as com + +from pandas.tseries import frequencies +from pandas.tseries.offsets import DateOffset, Tick from .base import ExtensionOpsMixin -from pandas.util._decorators import deprecate_kwarg def _make_comparison_op(cls, op): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c56e994e0ca2f..2187ff28844a0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -6,35 +6,28 @@ from pytz import utc from pandas._libs import lib, tslib -from pandas._libs.tslib import Timestamp, NaT, iNaT +from pandas._libs.tslib import NaT, Timestamp, iNaT from pandas._libs.tslibs import ( - ccalendar, normalize_date, - conversion, fields, timezones, - resolution as libresolution) - -from pandas.util._decorators import cache_readonly, Appender + ccalendar, conversion, fields, normalize_date, resolution as libresolution, + timezones) +import pandas.compat as compat from pandas.errors import PerformanceWarning -from pandas import compat +from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - _NS_DTYPE, - is_object_dtype, - is_int64_dtype, - is_datetime64tz_dtype, - is_datetime64_dtype) + _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_int64_dtype, + is_object_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna -import pandas.core.common as com -from pandas.core.algorithms import checked_add_with_arr from pandas.core import ops - -from pandas.tseries.frequencies import to_offset, get_period_alias -from pandas.tseries.offsets import Tick, generate_range - +from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl +import pandas.core.common as com +from pandas.tseries.frequencies import get_period_alias, to_offset +from pandas.tseries.offsets import Tick, generate_range _midnight = time(0, 0) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 9e045a7785660..e9d51aaea4218 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,31 +1,27 @@ +import copy import sys import warnings -import copy -import numpy as np +import numpy as np from pandas._libs import lib +from pandas.compat import range, set_function_name, string_types, u from pandas.util._decorators import cache_readonly -from pandas.compat import u, range, string_types -from pandas.compat import set_function_name -from pandas.core import nanops +from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import astype_nansafe -from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass from pandas.core.dtypes.common import ( - is_integer, is_scalar, is_float, - is_bool_dtype, - is_float_dtype, - is_integer_dtype, - is_object_dtype, - is_list_like) -from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin -from pandas.core.dtypes.base import ExtensionDtype + is_bool_dtype, is_float, is_float_dtype, is_integer, is_integer_dtype, + is_list_like, is_object_dtype, is_scalar) from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna, notna +from pandas.core import nanops +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin + from pandas.io.formats.printing import ( - format_object_summary, format_object_attrs, default_pprint) + default_pprint, format_object_attrs, format_object_summary) class _IntegerDtype(ExtensionDtype): diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 90118cd300a22..70be850481d85 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1,31 +1,30 @@ +from operator import le, lt import textwrap -import numpy as np -from operator import le, lt +import numpy as np -from pandas._libs.interval import (Interval, IntervalMixin, - intervals_to_interval_bounds) +from pandas._libs.interval import ( + Interval, IntervalMixin, intervals_to_interval_bounds) from pandas.compat import add_metaclass from pandas.compat.numpy import function as nv -import pandas.core.common as com -from pandas.core.config import get_option +from pandas.util._decorators import Appender +from pandas.util._doctools import _WritableDoc + from pandas.core.dtypes.cast import maybe_convert_platform -from pandas.core.dtypes.common import (is_categorical_dtype, is_float_dtype, - is_integer_dtype, is_interval_dtype, - is_scalar, is_string_dtype, - is_datetime64_any_dtype, - is_timedelta64_dtype, is_interval, - pandas_dtype) +from pandas.core.dtypes.common import ( + is_categorical_dtype, is_datetime64_any_dtype, is_float_dtype, + is_integer_dtype, is_interval, is_interval_dtype, is_scalar, + is_string_dtype, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import IntervalDtype -from pandas.core.dtypes.generic import (ABCDatetimeIndex, ABCPeriodIndex, - ABCSeries, ABCIntervalIndex, - ABCInterval) +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, ABCInterval, ABCIntervalIndex, ABCPeriodIndex, ABCSeries) from pandas.core.dtypes.missing import isna, notna + +import pandas.core.common as com +from pandas.core.config import get_option from pandas.core.indexes.base import Index, ensure_index -from pandas.util._decorators import Appender -from pandas.util._doctools import _WritableDoc -from . import ExtensionArray, Categorical +from . import Categorical, ExtensionArray _VALID_CLOSED = {'left', 'right', 'both', 'neither'} _interval_shared_docs = {} diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d9989b1ac36c0..53629dca4d391 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -4,48 +4,35 @@ import numpy as np -from pandas import compat -from pandas.compat.numpy import function as nv from pandas._libs.tslib import NaT, iNaT -from pandas._libs.tslibs.period import ( - Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, - get_period_field_arr, period_asfreq_arr, -) from pandas._libs.tslibs import period as libperiod -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr -from pandas.util._decorators import cache_readonly, Appender +from pandas._libs.tslibs.period import ( + DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period, get_period_field_arr, + period_asfreq_arr) +from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds +import pandas.compat as compat +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, cache_readonly from pandas.util._validators import validate_fillna_kwargs -import pandas.core.algorithms as algos + from pandas.core.dtypes.common import ( - is_integer_dtype, is_float_dtype, is_period_dtype, - pandas_dtype, - is_datetime64_dtype, - is_categorical_dtype, - is_list_like, - is_array_like, - is_object_dtype, - is_string_dtype, - is_datetime_or_timedelta_dtype, - is_dtype_equal, - ensure_object, - _TD_DTYPE, -) + _TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype, + is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, + is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, + is_period_dtype, is_string_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.generic import ( - ABCSeries, ABCIndexClass, ABCPeriodIndex -) +from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries from pandas.core.dtypes.missing import isna, notna -from pandas.core.missing import pad_1d, backfill_1d +import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray, datetimelike as dtl import pandas.core.common as com +from pandas.core.missing import backfill_1d, pad_1d from pandas.tseries import frequencies from pandas.tseries.offsets import Tick -from pandas.core.arrays import ExtensionArray -from pandas.core.arrays import datetimelike as dtl - def _field_accessor(name, alias, docstring=None): def f(self): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cb630c9b66c20..38e28e8b77359 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -5,32 +5,25 @@ import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT +from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, parse_timedelta_unit) +import pandas.compat as compat from pandas.util._decorators import Appender -from pandas import compat - from pandas.core.dtypes.common import ( - _TD_DTYPE, - is_object_dtype, - is_string_dtype, - is_float_dtype, - is_integer_dtype, - is_timedelta64_dtype, - is_datetime64_dtype, - is_list_like, - ensure_int64) + _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, + is_integer_dtype, is_list_like, is_object_dtype, is_string_dtype, + is_timedelta64_dtype) from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex from pandas.core.dtypes.missing import isna -import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr +import pandas.core.common as com -from pandas.tseries.offsets import Tick from pandas.tseries.frequencies import to_offset +from pandas.tseries.offsets import Tick from . import datetimelike as dtl diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1f2a1ee52159e..857bf18c5982b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1,76 +1,45 @@ # -*- coding: utf-8 -*- +from datetime import date, datetime, timedelta import functools -import warnings import inspect import re -from datetime import datetime, timedelta, date +import warnings import numpy as np -from pandas._libs import lib, tslib, tslibs, internals as libinternals -from pandas._libs.tslibs import conversion, Timedelta - -from pandas import compat +from pandas._libs import internals as libinternals, lib, tslib, tslibs +from pandas._libs.tslibs import Timedelta, conversion +import pandas.compat as compat from pandas.compat import range, zip - from pandas.util._validators import validate_bool_kwarg -from pandas.core.dtypes.dtypes import ( - ExtensionDtype, DatetimeTZDtype, - PandasExtensionDtype, - CategoricalDtype) -from pandas.core.dtypes.common import ( - _TD_DTYPE, _NS_DTYPE, - ensure_platform_int, - is_integer, - is_dtype_equal, - is_timedelta64_dtype, - is_datetime64_dtype, is_datetimetz, - is_categorical, is_categorical_dtype, - is_integer_dtype, - is_datetime64tz_dtype, - is_bool_dtype, - is_object_dtype, - is_float_dtype, - is_numeric_v_string_like, is_extension_type, - is_extension_array_dtype, - is_list_like, - is_re, - is_re_compilable, - is_sparse, - pandas_dtype) from pandas.core.dtypes.cast import ( - maybe_downcast_to_dtype, - maybe_upcast, - maybe_promote, - infer_dtype_from, - infer_dtype_from_scalar, - soft_convert_objects, - maybe_convert_objects, - astype_nansafe, - find_common_type, - maybe_infer_dtype_type) -from pandas.core.dtypes.missing import ( - isna, notna, array_equivalent, - _isna_compat, - is_null_datelike_scalar) + astype_nansafe, find_common_type, infer_dtype_from, + infer_dtype_from_scalar, maybe_convert_objects, maybe_downcast_to_dtype, + maybe_infer_dtype_type, maybe_promote, maybe_upcast, soft_convert_objects) +from pandas.core.dtypes.common import ( + _NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical, + is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_datetimetz, is_dtype_equal, is_extension_array_dtype, is_extension_type, + is_float_dtype, is_integer, is_integer_dtype, is_list_like, + is_numeric_v_string_like, is_object_dtype, is_re, is_re_compilable, + is_sparse, is_timedelta64_dtype, pandas_dtype) import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PandasExtensionDtype) from pandas.core.dtypes.generic import ( - ABCSeries, - ABCDatetimeIndex, - ABCExtensionArray, - ABCIndexClass) + ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass, ABCSeries) +from pandas.core.dtypes.missing import ( + _isna_compat, array_equivalent, is_null_datelike_scalar, isna, notna) -import pandas.core.common as com import pandas.core.algorithms as algos -import pandas.core.missing as missing -from pandas.core.base import PandasObject - from pandas.core.arrays import Categorical - +from pandas.core.base import PandasObject +import pandas.core.common as com from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_setitem_lengths +import pandas.core.missing as missing from pandas.io.formats.printing import pprint_thing diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index dfb7408384038..2fb533478b2f3 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -1,23 +1,19 @@ # -*- coding: utf-8 -*- # TODO: Needs a better name; too many modules are already called "concat" -import copy from collections import defaultdict +import copy import numpy as np -from pandas._libs import tslibs, internals as libinternals +from pandas._libs import internals as libinternals, tslibs from pandas.util._decorators import cache_readonly -from pandas.core.dtypes.missing import isna -from pandas.core.dtypes.common import ( - is_timedelta64_dtype, - is_datetime64_dtype, is_datetimetz, - is_categorical_dtype, - is_float_dtype, is_numeric_dtype, - is_sparse, - _get_dtype) from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + _get_dtype, is_categorical_dtype, is_datetime64_dtype, is_datetimetz, + is_float_dtype, is_numeric_dtype, is_sparse, is_timedelta64_dtype) import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0519c5e5abe33..c3762d9819153 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -7,42 +7,34 @@ import numpy as np -from pandas._libs import lib, internals as libinternals - +from pandas._libs import internals as libinternals, lib +from pandas.compat import map, range, zip from pandas.util._validators import validate_bool_kwarg -from pandas.compat import range, map, zip -from pandas.core.dtypes.common import ( - _NS_DTYPE, - is_datetimelike_v_numeric, - is_numeric_v_string_like, is_extension_type, - is_extension_array_dtype, - is_scalar) from pandas.core.dtypes.cast import ( - maybe_promote, - infer_dtype_from_scalar, - find_common_type, - maybe_convert_objects) -from pandas.core.dtypes.missing import isna + find_common_type, infer_dtype_from_scalar, maybe_convert_objects, + maybe_promote) +from pandas.core.dtypes.common import ( + _NS_DTYPE, is_datetimelike_v_numeric, is_extension_array_dtype, + is_extension_type, is_numeric_v_string_like, is_scalar) import pandas.core.dtypes.concat as _concat -from pandas.core.dtypes.generic import ABCSeries, ABCExtensionArray +from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries +from pandas.core.dtypes.missing import isna -from pandas.core.base import PandasObject import pandas.core.algorithms as algos from pandas.core.arrays.sparse import _maybe_to_sparse - +from pandas.core.base import PandasObject from pandas.core.index import Index, MultiIndex, ensure_index from pandas.core.indexing import maybe_convert_indices from pandas.io.formats.printing import pprint_thing from .blocks import ( - Block, DatetimeTZBlock, CategoricalBlock, ExtensionBlock, - _extend_blocks, _merge_blocks, _safe_reshape, - make_block, get_block_type) + Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock, _extend_blocks, + _merge_blocks, _safe_reshape, get_block_type, make_block) from .concat import ( # all for concatenate_block_managers - concatenate_join_units, is_uniform_join_units, - get_mgr_concatenation_plan, combine_concat_plans) + combine_concat_plans, concatenate_join_units, get_mgr_concatenation_plan, + is_uniform_join_units) # TODO: flexible with index=None and/or items=None diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py index e3be241bcdd70..33e8b921905ba 100644 --- a/pandas/core/sparse/api.py +++ b/pandas/core/sparse/api.py @@ -1,5 +1,5 @@ # pylint: disable=W0611 # flake8: noqa from pandas.core.arrays.sparse import SparseArray, SparseDtype -from pandas.core.sparse.series import SparseSeries from pandas.core.sparse.frame import SparseDataFrame +from pandas.core.sparse.series import SparseSeries diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index ee7de49bc1bce..a25ffa2744cb7 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -3,32 +3,36 @@ with float64 data """ from __future__ import division -# pylint: disable=E1101,E1103,W0231,E0202 import warnings -from pandas.compat import lmap -from pandas import compat + import numpy as np -from pandas.core.dtypes.missing import isna, notna -from pandas.core.dtypes.cast import maybe_upcast, find_common_type +from pandas._libs.sparse import BlockIndex, get_blocks +import pandas.compat as compat +from pandas.compat import lmap +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender + +from pandas.core.dtypes.cast import find_common_type, maybe_upcast from pandas.core.dtypes.common import ensure_platform_int, is_scipy_sparse +from pandas.core.dtypes.missing import isna, notna -from pandas.compat.numpy import function as nv -from pandas.core.index import Index, MultiIndex, ensure_index -from pandas.core.series import Series -from pandas.core.frame import DataFrame, extract_index, _prep_ndarray import pandas.core.algorithms as algos -from pandas.core.internals import (BlockManager, - create_block_manager_from_arrays) -import pandas.core.generic as generic from pandas.core.arrays.sparse import SparseArray, SparseDtype -from pandas.core.sparse.series import SparseSeries -from pandas._libs.sparse import BlockIndex, get_blocks -from pandas.util._decorators import Appender -import pandas.core.ops as ops import pandas.core.common as com +from pandas.core.frame import DataFrame, _prep_ndarray, extract_index +import pandas.core.generic as generic +from pandas.core.index import Index, MultiIndex, ensure_index import pandas.core.indexes.base as ibase +from pandas.core.internals import ( + BlockManager, create_block_manager_from_arrays) +import pandas.core.ops as ops +from pandas.core.series import Series +from pandas.core.sparse.series import SparseSeries + +# pylint: disable=E1101,E1103,W0231,E0202 + _shared_doc_kwargs = dict(klass='SparseDataFrame') diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index 748a52f484893..ab4fdeb05f8f1 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -3,10 +3,11 @@ Currently only includes SparseSeries.to_coo helpers. """ -from pandas.core.index import MultiIndex, Index -from pandas.core.series import Series from pandas.compat import OrderedDict, lmap +from pandas.core.index import Index, MultiIndex +from pandas.core.series import Series + def _check_is_partition(parts, whole): whole = set(whole) diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index ff32712f9056a..4ea4531c53c72 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -5,36 +5,30 @@ # pylint: disable=E1101,E1103,W0231 -import numpy as np import warnings -from pandas.core.dtypes.common import ( - is_scalar, -) -from pandas.core.dtypes.missing import isna, notna, is_integer +import numpy as np -from pandas import compat -from pandas.compat.numpy import function as nv -from pandas.core.index import Index -from pandas.core.series import Series -from pandas.core.dtypes.generic import ABCSeries, ABCSparseSeries -from pandas.core.internals import SingleBlockManager -from pandas.core import generic -import pandas.core.ops as ops import pandas._libs.index as libindex +import pandas._libs.sparse as splib +from pandas._libs.sparse import BlockIndex, IntIndex +import pandas.compat as compat +from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution -from pandas.core.arrays import ( - SparseArray, -) -from pandas.core.arrays.sparse import SparseAccessor -from pandas._libs.sparse import BlockIndex, IntIndex -import pandas._libs.sparse as splib +from pandas.core.dtypes.common import is_scalar +from pandas.core.dtypes.generic import ABCSeries, ABCSparseSeries +from pandas.core.dtypes.missing import is_integer, isna, notna +from pandas.core import generic +from pandas.core.arrays import SparseArray +from pandas.core.arrays.sparse import SparseAccessor +from pandas.core.index import Index +from pandas.core.internals import SingleBlockManager +import pandas.core.ops as ops +from pandas.core.series import Series from pandas.core.sparse.scipy_sparse import ( - _sparse_series_to_coo, - _coo_to_sparse_series) - + _coo_to_sparse_series, _sparse_series_to_coo) _shared_doc_kwargs = dict(axes='index', klass='SparseSeries', axes_single_arg="{0, 'index'}", diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0eb2ffeab28f1..2c6fdb3eaf03c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,36 +1,24 @@ -from functools import partial -from datetime import datetime, time from collections import MutableMapping +from datetime import datetime, time +from functools import partial import numpy as np from pandas._libs import tslib, tslibs -from pandas._libs.tslibs.strptime import array_strptime -from pandas._libs.tslibs import parsing, conversion, Timestamp +from pandas._libs.tslibs import Timestamp, conversion, parsing from pandas._libs.tslibs.parsing import ( # noqa - parse_time_string, - DateParseError, - _format_is_iso, - _guess_datetime_format) + DateParseError, _format_is_iso, _guess_datetime_format, parse_time_string) +from pandas._libs.tslibs.strptime import array_strptime +from pandas.compat import zip from pandas.core.dtypes.common import ( - ensure_object, - is_datetime64_ns_dtype, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_integer_dtype, - is_integer, - is_float, - is_list_like, - is_scalar, - is_numeric_dtype, - is_object_dtype) -from pandas.core.dtypes.generic import ( - ABCIndexClass, ABCSeries, - ABCDataFrame) + ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype, + is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype, + is_list_like, is_numeric_dtype, is_object_dtype, is_scalar) +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import notna + from pandas.core import algorithms -from pandas.compat import zip def _guess_datetime_format_for_array(arr, **kwargs): diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 12699927141cb..1d4973de92b99 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -1,16 +1,15 @@ import numpy as np -import pandas as pd -from pandas.core.dtypes.common import ( - is_scalar, - is_numeric_dtype, - is_decimal, - is_datetime_or_timedelta_dtype, - is_number, - ensure_object) -from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass -from pandas.core.dtypes.cast import maybe_downcast_to_dtype + from pandas._libs import lib +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.common import ( + ensure_object, is_datetime_or_timedelta_dtype, is_decimal, is_number, + is_numeric_dtype, is_scalar) +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries + +import pandas as pd + def to_numeric(arg, errors='raise', downcast=None): """ diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index db93820c6942f..2579efdbc65fe 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -3,14 +3,15 @@ """ import numpy as np -import pandas as pd + from pandas._libs import tslibs -from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, - parse_timedelta_unit) +from pandas._libs.tslibs.timedeltas import ( + convert_to_timedelta64, parse_timedelta_unit) from pandas.core.dtypes.common import is_list_like -from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +import pandas as pd from pandas.core.arrays.timedeltas import sequence_to_td64ns diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 6a2cfd4d4a7b3..0e58b69465d3c 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,18 +2,17 @@ data hash pandas / numpy objects """ import itertools + import numpy as np + from pandas._libs import hashing, tslibs -from pandas.core.dtypes.generic import ( - ABCMultiIndex, - ABCIndexClass, - ABCSeries, - ABCDataFrame) + +from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( - is_categorical_dtype, is_list_like, is_extension_array_dtype) + is_categorical_dtype, is_extension_array_dtype, is_list_like) +from pandas.core.dtypes.generic import ( + ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCSeries) from pandas.core.dtypes.missing import isna -from pandas.core.dtypes.cast import infer_dtype_from_scalar - # 16 byte long hashing key _default_hash_key = '0123456789123456' diff --git a/setup.cfg b/setup.cfg index 5132e6c5f79cd..0214922585077 100644 --- a/setup.cfg +++ b/setup.cfg @@ -138,25 +138,6 @@ skip= pandas/core/indexes/accessors.py, pandas/core/indexes/period.py, pandas/core/indexes/frozen.py, - pandas/core/arrays/categorical.py, - pandas/core/arrays/integer.py, - pandas/core/arrays/interval.py, - pandas/core/arrays/timedeltas.py, - pandas/core/arrays/datetimelike.py, - pandas/core/arrays/datetimes.py, - pandas/core/arrays/base.py, - pandas/core/arrays/period.py, - pandas/core/util/hashing.py, - pandas/core/tools/numeric.py, - pandas/core/tools/timedeltas.py, - pandas/core/tools/datetimes.py, - pandas/core/internals/concat.py, - pandas/core/internals/managers.py, - pandas/core/internals/blocks.py, - pandas/core/sparse/api.py, - pandas/core/sparse/series.py, - pandas/core/sparse/frame.py, - pandas/core/sparse/scipy_sparse.py, pandas/tests/test_errors.py, pandas/tests/test_base.py, pandas/tests/test_register_accessor.py, From b2fe1c88c657830cecc31605f715a4e90245f3e9 Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Tue, 20 Nov 2018 02:07:23 +0100 Subject: [PATCH 03/12] API/DEPR: replace kwarg "pat" with "sep" in str.[r]partition (#23767) --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/strings.py | 17 +++++++++++------ pandas/tests/test_strings.py | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index f94aa3d320b75..3f0c5dc2c2daf 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1035,6 +1035,7 @@ Deprecations - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`) - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) - The methods :meth:`DataFrame.update` and :meth:`Panel.update` have deprecated the ``raise_conflict=False|True`` keyword in favor of ``errors='ignore'|'raise'`` (:issue:`23585`) +- The methods :meth:`Series.str.partition` and :meth:`Series.str.rpartition` have deprecated the ``pat`` keyword in favor of ``sep`` (:issue:`22676`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) - :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 58ce562d03d1d..1c4317d56f82b 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -19,7 +19,7 @@ from pandas.core.algorithms import take_1d import pandas.compat as compat from pandas.core.base import NoNewAttributesMixin -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, deprecate_kwarg import re import pandas._libs.lib as lib import pandas._libs.ops as libops @@ -2410,8 +2410,11 @@ def rsplit(self, pat=None, n=-1, expand=False): Parameters ---------- - pat : str, default whitespace + sep : str, default whitespace String to split on. + pat : str, default whitespace + .. deprecated:: 0.24.0 + Use ``sep`` instead expand : bool, default True If True, return DataFrame/MultiIndex expanding dimensionality. If False, return Series/Index. @@ -2485,8 +2488,9 @@ def rsplit(self, pat=None, n=-1, expand=False): 'empty strings', 'also': 'rpartition : Split the string at the last occurrence of `sep`' }) - def partition(self, pat=' ', expand=True): - f = lambda x: x.partition(pat) + @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep') + def partition(self, sep=' ', expand=True): + f = lambda x: x.partition(sep) result = _na_map(f, self._parent) return self._wrap_result(result, expand=expand) @@ -2496,8 +2500,9 @@ def partition(self, pat=' ', expand=True): 'string itself', 'also': 'partition : Split the string at the first occurrence of `sep`' }) - def rpartition(self, pat=' ', expand=True): - f = lambda x: x.rpartition(pat) + @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep') + def rpartition(self, sep=' ', expand=True): + f = lambda x: x.rpartition(sep) result = _na_map(f, self._parent) return self._wrap_result(result, expand=expand) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d128a66a182ba..7b4e330ca6e3d 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2609,6 +2609,24 @@ def test_partition_with_name(self): assert res.nlevels == 1 tm.assert_index_equal(res, exp) + def test_partition_deprecation(self): + # GH 22676; depr kwarg "pat" in favor of "sep" + values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h']) + + # str.partition + # using sep -> no warning + expected = values.str.partition(sep='_') + with tm.assert_produces_warning(FutureWarning): + result = values.str.partition(pat='_') + tm.assert_frame_equal(result, expected) + + # str.rpartition + # using sep -> no warning + expected = values.str.rpartition(sep='_') + with tm.assert_produces_warning(FutureWarning): + result = values.str.rpartition(pat='_') + tm.assert_frame_equal(result, expected) + def test_pipe_failures(self): # #2119 s = Series(['A|B|C']) From 71ba5bfbdadc11a4470abe4f2866fd007fbf8de9 Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Tue, 20 Nov 2018 01:10:51 +0000 Subject: [PATCH 04/12] BUG: Maintain column order with groupby.nth (#22811) --- doc/source/whatsnew/v0.24.0.rst | 2 + pandas/core/groupby/groupby.py | 3 +- pandas/core/indexes/base.py | 20 +++++--- pandas/core/indexes/interval.py | 8 +++- pandas/core/indexes/multi.py | 22 +++++++-- pandas/tests/groupby/test_nth.py | 24 ++++++++++ pandas/tests/indexes/common.py | 11 +++-- pandas/tests/indexes/datetimes/test_setops.py | 34 ++++++++----- .../tests/indexes/interval/test_interval.py | 17 +++++-- pandas/tests/indexes/multi/test_set_ops.py | 37 ++++++++------ pandas/tests/indexes/period/test_period.py | 7 +-- pandas/tests/indexes/period/test_setops.py | 42 ++++++++++------ pandas/tests/indexes/test_base.py | 48 ++++++++++++++----- .../indexes/timedeltas/test_timedelta.py | 34 +++++++++++-- 14 files changed, 225 insertions(+), 84 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3f0c5dc2c2daf..3ed5c91141b16 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -288,6 +288,7 @@ Other Enhancements - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`) +- :meth:`Index.difference` now has an optional ``sort`` parameter to specify whether the results should be sorted if possible (:issue:`17839`) - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. - :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) @@ -1417,6 +1418,7 @@ Groupby/Resample/Rolling - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) - Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`) - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`). +- Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 96aff09126772..d2dc5f16de7f8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -494,7 +494,8 @@ def _set_group_selection(self): if len(groupers): # GH12839 clear selected obj cache when group selection changes - self._group_selection = ax.difference(Index(groupers)).tolist() + self._group_selection = ax.difference(Index(groupers), + sort=False).tolist() self._reset_cache('_selected_obj') def _set_result_index_ordered(self, result): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0632198c77262..0fa6973b717e9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2944,17 +2944,20 @@ def intersection(self, other): taken.name = None return taken - def difference(self, other): + def difference(self, other, sort=True): """ Return a new Index with elements from the index that are not in `other`. This is the set difference of two Index objects. - It's sorted if sorting is possible. Parameters ---------- other : Index or array-like + sort : bool, default True + Sort the resulting index if possible + + .. versionadded:: 0.24.0 Returns ------- @@ -2963,10 +2966,12 @@ def difference(self, other): Examples -------- - >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx1 = pd.Index([2, 1, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) Int64Index([1, 2], dtype='int64') + >>> idx1.difference(idx2, sort=False) + Int64Index([2, 1], dtype='int64') """ self._assert_can_do_setop(other) @@ -2985,10 +2990,11 @@ def difference(self, other): label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) - try: - the_diff = sorting.safe_sort(the_diff) - except TypeError: - pass + if sort: + try: + the_diff = sorting.safe_sort(the_diff) + except TypeError: + pass return this._shallow_copy(the_diff, name=result_name, freq=None) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2b157bf91c5a2..c64a179a299e9 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1037,7 +1037,7 @@ def overlaps(self, other): return self._data.overlaps(other) def _setop(op_name): - def func(self, other): + def func(self, other, sort=True): other = self._as_like_interval_index(other) # GH 19016: ensure set op will not return a prohibited dtype @@ -1048,7 +1048,11 @@ def func(self, other): 'objects that have compatible dtypes') raise TypeError(msg.format(op=op_name)) - result = getattr(self._multiindex, op_name)(other._multiindex) + if op_name == 'difference': + result = getattr(self._multiindex, op_name)(other._multiindex, + sort) + else: + result = getattr(self._multiindex, op_name)(other._multiindex) result_name = get_op_result_name(self, other) # GH 19101: ensure empty results have correct dtype diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index dbb1b8e196bf7..619e1ae866a1b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2798,10 +2798,18 @@ def intersection(self, other): return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) - def difference(self, other): + def difference(self, other, sort=True): """ Compute sorted set difference of two MultiIndex objects + Parameters + ---------- + other : MultiIndex + sort : bool, default True + Sort the resulting MultiIndex if possible + + .. versionadded:: 0.24.0 + Returns ------- diff : MultiIndex @@ -2817,8 +2825,16 @@ def difference(self, other): labels=[[]] * self.nlevels, names=result_names, verify_integrity=False) - difference = sorted(set(self._ndarray_values) - - set(other._ndarray_values)) + this = self._get_unique_index() + + indexer = this.get_indexer(other) + indexer = indexer.take((indexer != -1).nonzero()[0]) + + label_diff = np.setdiff1d(np.arange(this.size), indexer, + assume_unique=True) + difference = this.values.take(label_diff) + if sort: + difference = sorted(difference) if len(difference) == 0: return MultiIndex(levels=[[]] * self.nlevels, diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index a1b748cd50e8f..4ea4b580a2c3f 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -390,3 +390,27 @@ def test_nth_empty(): names=['a', 'b']), columns=['c']) assert_frame_equal(result, expected) + + +def test_nth_column_order(): + # GH 20760 + # Check that nth preserves column order + df = DataFrame([[1, 'b', 100], + [1, 'a', 50], + [1, 'a', np.nan], + [2, 'c', 200], + [2, 'd', 150]], + columns=['A', 'C', 'B']) + result = df.groupby('A').nth(0) + expected = DataFrame([['b', 100.0], + ['c', 200.0]], + columns=['C', 'B'], + index=Index([1, 2], name='A')) + assert_frame_equal(result, expected) + + result = df.groupby('A').nth(-1, dropna='any') + expected = DataFrame([['a', 50.0], + ['d', 150.0]], + columns=['C', 'B'], + index=Index([1, 2], name='A')) + assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 4b0daac34c2e3..7f1cf143a3a6e 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -666,12 +666,13 @@ def test_union_base(self): with pytest.raises(TypeError, match=msg): first.union([1, 2, 3]) - def test_difference_base(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_base(self, sort): for name, idx in compat.iteritems(self.indices): first = idx[2:] second = idx[:4] answer = idx[4:] - result = first.difference(second) + result = first.difference(second, sort) if isinstance(idx, CategoricalIndex): pass @@ -685,7 +686,7 @@ def test_difference_base(self): if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" with pytest.raises(ValueError, match=msg): - first.difference(case) + first.difference(case, sort) elif isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): @@ -693,13 +694,13 @@ def test_difference_base(self): tm.assert_numpy_array_equal(result.sort_values().asi8, answer.sort_values().asi8) else: - result = first.difference(case) + result = first.difference(case, sort) assert tm.equalContents(result, answer) if isinstance(idx, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" with pytest.raises(TypeError, match=msg): - first.difference([1, 2, 3]) + first.difference([1, 2, 3], sort) def test_symmetric_difference(self): for name, idx in compat.iteritems(self.indices): diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index d72bf275463ac..7c1f753dbeaaa 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -209,47 +209,55 @@ def test_intersection_bug_1708(self): assert len(result) == 0 @pytest.mark.parametrize("tz", tz) - def test_difference(self, tz): - rng1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + @pytest.mark.parametrize("sort", [True, False]) + def test_difference(self, tz, sort): + rng_dates = ['1/2/2000', '1/3/2000', '1/1/2000', '1/4/2000', + '1/5/2000'] + + rng1 = pd.DatetimeIndex(rng_dates, tz=tz) other1 = pd.date_range('1/6/2000', freq='D', periods=5, tz=tz) - expected1 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + expected1 = pd.DatetimeIndex(rng_dates, tz=tz) - rng2 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + rng2 = pd.DatetimeIndex(rng_dates, tz=tz) other2 = pd.date_range('1/4/2000', freq='D', periods=5, tz=tz) - expected2 = pd.date_range('1/1/2000', freq='D', periods=3, tz=tz) + expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz) - rng3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + rng3 = pd.DatetimeIndex(rng_dates, tz=tz) other3 = pd.DatetimeIndex([], tz=tz) - expected3 = pd.date_range('1/1/2000', freq='D', periods=5, tz=tz) + expected3 = pd.DatetimeIndex(rng_dates, tz=tz) for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3)]: - result_diff = rng.difference(other) + result_diff = rng.difference(other, sort) + if sort: + expected = expected.sort_values() tm.assert_index_equal(result_diff, expected) - def test_difference_freq(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_freq(self, sort): # GH14323: difference of DatetimeIndex should not preserve frequency index = date_range("20160920", "20160925", freq="D") other = date_range("20160921", "20160924", freq="D") expected = DatetimeIndex(["20160920", "20160925"], freq=None) - idx_diff = index.difference(other) + idx_diff = index.difference(other, sort) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) other = date_range("20160922", "20160925", freq="D") - idx_diff = index.difference(other) + idx_diff = index.difference(other, sort) expected = DatetimeIndex(["20160920", "20160921"], freq=None) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) - def test_datetimeindex_diff(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_datetimeindex_diff(self, sort): dti1 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) dti2 = DatetimeIndex(freq='Q-JAN', start=datetime(1997, 12, 31), periods=98) - assert len(dti1.difference(dti2)) == 2 + assert len(dti1.difference(dti2, sort)) == 2 def test_datetimeindex_union_join_empty(self): dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D') diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index d5f62429ddb73..da3b3253ecbd1 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -801,19 +801,26 @@ def test_intersection(self, closed): result = index.intersection(other) tm.assert_index_equal(result, expected) - def test_difference(self, closed): - index = self.create_index(closed=closed) - tm.assert_index_equal(index.difference(index[:1]), index[1:]) + @pytest.mark.parametrize("sort", [True, False]) + def test_difference(self, closed, sort): + index = IntervalIndex.from_arrays([1, 0, 3, 2], + [1, 2, 3, 4], + closed=closed) + result = index.difference(index[:1], sort) + expected = index[1:] + if sort: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) # GH 19101: empty result, same dtype - result = index.difference(index) + result = index.difference(index, sort) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) tm.assert_index_equal(result, expected) # GH 19101: empty result, different dtypes other = IntervalIndex.from_arrays(index.left.astype('float64'), index.right, closed=closed) - result = index.difference(other) + result = index.difference(other, sort) tm.assert_index_equal(result, expected) def test_symmetric_difference(self, closed): diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 34da3df4fb16e..91edf11e77f10 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -56,11 +56,12 @@ def test_union_base(idx): first.union([1, 2, 3]) -def test_difference_base(idx): +@pytest.mark.parametrize("sort", [True, False]) +def test_difference_base(idx, sort): first = idx[2:] second = idx[:4] answer = idx[4:] - result = first.difference(second) + result = first.difference(second, sort) assert tm.equalContents(result, answer) @@ -68,12 +69,12 @@ def test_difference_base(idx): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - result = first.difference(case) + result = first.difference(case, sort) assert tm.equalContents(result, answer) msg = "other must be a MultiIndex or a list of tuples" with pytest.raises(TypeError, match=msg): - first.difference([1, 2, 3]) + first.difference([1, 2, 3], sort) def test_symmetric_difference(idx): @@ -101,11 +102,17 @@ def test_empty(idx): assert idx[:0].empty -def test_difference(idx): +@pytest.mark.parametrize("sort", [True, False]) +def test_difference(idx, sort): first = idx - result = first.difference(idx[-3:]) - expected = MultiIndex.from_tuples(sorted(idx[:-3].values), + result = first.difference(idx[-3:], sort) + vals = idx[:-3].values + + if sort: + vals = sorted(vals) + + expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names) @@ -114,19 +121,19 @@ def test_difference(idx): assert result.names == idx.names # empty difference: reflexive - result = idx.difference(idx) + result = idx.difference(idx, sort) expected = idx[:0] assert result.equals(expected) assert result.names == idx.names # empty difference: superset - result = idx[-3:].difference(idx) + result = idx[-3:].difference(idx, sort) expected = idx[:0] assert result.equals(expected) assert result.names == idx.names # empty difference: degenerate - result = idx[:0].difference(idx) + result = idx[:0].difference(idx, sort) expected = idx[:0] assert result.equals(expected) assert result.names == idx.names @@ -134,24 +141,24 @@ def test_difference(idx): # names not the same chunklet = idx[-3:] chunklet.names = ['foo', 'baz'] - result = first.difference(chunklet) + result = first.difference(chunklet, sort) assert result.names == (None, None) # empty, but non-equal - result = idx.difference(idx.sortlevel(1)[0]) + result = idx.difference(idx.sortlevel(1)[0], sort) assert len(result) == 0 # raise Exception called with non-MultiIndex - result = first.difference(first.values) + result = first.difference(first.values, sort) assert result.equals(first[:0]) # name from empty array - result = first.difference([]) + result = first.difference([], sort) assert first.equals(result) assert first.names == result.names # name from non-empty array - result = first.difference([('foo', 'one')]) + result = first.difference([('foo', 'one')], sort) expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) expected.names = first.names diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index ddb3fe686534a..5d78333016f74 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -72,7 +72,8 @@ def test_no_millisecond_field(self): with pytest.raises(AttributeError): DatetimeIndex([]).millisecond - def test_difference_freq(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_freq(self, sort): # GH14323: difference of Period MUST preserve frequency # but the ability to union results must be preserved @@ -80,12 +81,12 @@ def test_difference_freq(self): other = period_range("20160921", "20160924", freq="D") expected = PeriodIndex(["20160920", "20160925"], freq='D') - idx_diff = index.difference(other) + idx_diff = index.difference(other, sort) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) other = period_range("20160922", "20160925", freq="D") - idx_diff = index.difference(other) + idx_diff = index.difference(other, sort) expected = PeriodIndex(["20160920", "20160921"], freq='D') tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index c8b7d82855519..565e64607350f 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -203,37 +203,49 @@ def test_intersection_cases(self): result = rng.intersection(rng[0:0]) assert len(result) == 0 - def test_difference(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference(self, sort): # diff - rng1 = pd.period_range('1/1/2000', freq='D', periods=5) + period_rng = ['1/3/2000', '1/2/2000', '1/1/2000', '1/5/2000', + '1/4/2000'] + rng1 = pd.PeriodIndex(period_rng, freq='D') other1 = pd.period_range('1/6/2000', freq='D', periods=5) - expected1 = pd.period_range('1/1/2000', freq='D', periods=5) + expected1 = rng1 - rng2 = pd.period_range('1/1/2000', freq='D', periods=5) + rng2 = pd.PeriodIndex(period_rng, freq='D') other2 = pd.period_range('1/4/2000', freq='D', periods=5) - expected2 = pd.period_range('1/1/2000', freq='D', periods=3) + expected2 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000'], + freq='D') - rng3 = pd.period_range('1/1/2000', freq='D', periods=5) + rng3 = pd.PeriodIndex(period_rng, freq='D') other3 = pd.PeriodIndex([], freq='D') - expected3 = pd.period_range('1/1/2000', freq='D', periods=5) + expected3 = rng3 - rng4 = pd.period_range('2000-01-01 09:00', freq='H', periods=5) + period_rng = ['2000-01-01 10:00', '2000-01-01 09:00', + '2000-01-01 12:00', '2000-01-01 11:00', + '2000-01-01 13:00'] + rng4 = pd.PeriodIndex(period_rng, freq='H') other4 = pd.period_range('2000-01-02 09:00', freq='H', periods=5) expected4 = rng4 - rng5 = pd.PeriodIndex(['2000-01-01 09:01', '2000-01-01 09:03', + rng5 = pd.PeriodIndex(['2000-01-01 09:03', '2000-01-01 09:01', '2000-01-01 09:05'], freq='T') other5 = pd.PeriodIndex( ['2000-01-01 09:01', '2000-01-01 09:05'], freq='T') expected5 = pd.PeriodIndex(['2000-01-01 09:03'], freq='T') - rng6 = pd.period_range('2000-01-01', freq='M', periods=7) + period_rng = ['2000-02-01', '2000-01-01', '2000-06-01', + '2000-07-01', '2000-05-01', '2000-03-01', + '2000-04-01'] + rng6 = pd.PeriodIndex(period_rng, freq='M') other6 = pd.period_range('2000-04-01', freq='M', periods=7) - expected6 = pd.period_range('2000-01-01', freq='M', periods=3) + expected6 = pd.PeriodIndex(['2000-02-01', '2000-01-01', '2000-03-01'], + freq='M') - rng7 = pd.period_range('2003-01-01', freq='A', periods=5) + period_rng = ['2003', '2007', '2006', '2005', '2004'] + rng7 = pd.PeriodIndex(period_rng, freq='A') other7 = pd.period_range('1998-01-01', freq='A', periods=8) - expected7 = pd.period_range('2006-01-01', freq='A', periods=2) + expected7 = pd.PeriodIndex(['2007', '2006'], freq='A') for rng, other, expected in [(rng1, other1, expected1), (rng2, other2, expected2), @@ -242,5 +254,7 @@ def test_difference(self): (rng5, other5, expected5), (rng6, other6, expected6), (rng7, other7, expected7), ]: - result_union = rng.difference(other) + result_union = rng.difference(other, sort) + if sort: + expected = expected.sort_values() tm.assert_index_equal(result_union, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 424f6b1f9a77a..1b3b48075e292 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -29,6 +29,7 @@ from pandas.core.indexes.datetimes import _to_m8 from pandas.tests.indexes.common import Base from pandas.util.testing import assert_almost_equal +from pandas.core.sorting import safe_sort class TestIndex(Base): @@ -1119,7 +1120,8 @@ def test_iadd_string(self): @pytest.mark.parametrize("second_name,expected", [ (None, None), ('name', 'name')]) - def test_difference_name_preservation(self, second_name, expected): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_name_preservation(self, second_name, expected, sort): # TODO: replace with fixturesult first = self.strIndex[5:20] second = self.strIndex[:10] @@ -1127,7 +1129,7 @@ def test_difference_name_preservation(self, second_name, expected): first.name = 'name' second.name = second_name - result = first.difference(second) + result = first.difference(second, sort) assert tm.equalContents(result, answer) @@ -1136,22 +1138,37 @@ def test_difference_name_preservation(self, second_name, expected): else: assert result.name == expected - def test_difference_empty_arg(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_empty_arg(self, sort): first = self.strIndex[5:20] first.name == 'name' - result = first.difference([]) + result = first.difference([], sort) assert tm.equalContents(result, first) assert result.name == first.name - def test_difference_identity(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_identity(self, sort): first = self.strIndex[5:20] first.name == 'name' - result = first.difference(first) + result = first.difference(first, sort) assert len(result) == 0 assert result.name == first.name + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_sort(self, sort): + first = self.strIndex[5:20] + second = self.strIndex[:10] + + result = first.difference(second, sort) + expected = self.strIndex[10:20] + + if sort: + expected = expected.sort_values() + + tm.assert_index_equal(result, expected) + def test_symmetric_difference(self): # smoke index1 = Index([1, 2, 3, 4], name='index1') @@ -1196,17 +1213,19 @@ def test_symmetric_difference_non_index(self): assert tm.equalContents(result, expected) assert result.name == 'new_name' - def test_difference_type(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_type(self, sort): # GH 20040 # If taking difference of a set and itself, it # needs to preserve the type of the index skip_index_keys = ['repeats'] for key, index in self.generate_index_types(skip_index_keys): - result = index.difference(index) + result = index.difference(index, sort) expected = index.drop(index) tm.assert_index_equal(result, expected) - def test_intersection_difference(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_intersection_difference(self, sort): # GH 20040 # Test that the intersection of an index with an # empty index produces the same index as the difference @@ -1214,7 +1233,7 @@ def test_intersection_difference(self): skip_index_keys = ['repeats'] for key, index in self.generate_index_types(skip_index_keys): inter = index.intersection(index.drop(index)) - diff = index.difference(index) + diff = index.difference(index, sort) tm.assert_index_equal(inter, diff) @pytest.mark.parametrize("attr,expected", [ @@ -2424,14 +2443,17 @@ def test_intersection_different_type_base(self, klass): result = first.intersection(klass(second.values)) assert tm.equalContents(result, second) - def test_difference_base(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() first = index[:4] second = index[3:] - result = first.difference(second) - expected = Index([0, 1, 'a']) + result = first.difference(second, sort) + expected = Index([0, 'a', 1]) + if sort: + expected = Index(safe_sort(expected)) tm.assert_index_equal(result, expected) def test_symmetric_difference(self): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 1d068971fad2d..ee92782a87363 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -53,23 +53,51 @@ def test_fillna_timedelta(self): [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) tm.assert_index_equal(idx.fillna('x'), exp) - def test_difference_freq(self): + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_freq(self, sort): # GH14323: Difference of TimedeltaIndex should not preserve frequency index = timedelta_range("0 days", "5 days", freq="D") other = timedelta_range("1 days", "4 days", freq="D") expected = TimedeltaIndex(["0 days", "5 days"], freq=None) - idx_diff = index.difference(other) + idx_diff = index.difference(other, sort) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) other = timedelta_range("2 days", "5 days", freq="D") - idx_diff = index.difference(other) + idx_diff = index.difference(other, sort) expected = TimedeltaIndex(["0 days", "1 days"], freq=None) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) + @pytest.mark.parametrize("sort", [True, False]) + def test_difference_sort(self, sort): + + index = pd.TimedeltaIndex(["5 days", "3 days", "2 days", "4 days", + "1 days", "0 days"]) + + other = timedelta_range("1 days", "4 days", freq="D") + idx_diff = index.difference(other, sort) + + expected = TimedeltaIndex(["5 days", "0 days"], freq=None) + + if sort: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other, sort) + expected = TimedeltaIndex(["1 days", "0 days"], freq=None) + + if sort: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal('freq', idx_diff, expected) + def test_isin(self): index = tm.makeTimedeltaIndex(4) From 2af56d4e23a1220e9f6bde2869621fe6fcd5d448 Mon Sep 17 00:00:00 2001 From: Pulkit Maloo Date: Mon, 19 Nov 2018 20:13:56 -0500 Subject: [PATCH 05/12] BUG: fixed .str.contains(..., na=False) for categorical series (#22170) --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/core/strings.py | 9 +++++---- pandas/tests/test_strings.py | 30 ++++++++++++++++++++++++------ 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3ed5c91141b16..7d8ee975ba02c 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1280,7 +1280,7 @@ Strings - Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`). - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). -- +- Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) Interval ^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 1c4317d56f82b..6c21318c93597 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1857,7 +1857,7 @@ def __iter__(self): g = self.get(i) def _wrap_result(self, result, use_codes=True, - name=None, expand=None): + name=None, expand=None, fill_value=np.nan): from pandas.core.index import Index, MultiIndex @@ -1867,7 +1867,8 @@ def _wrap_result(self, result, use_codes=True, # so make it possible to skip this step as the method already did this # before the transformation... if use_codes and self._is_categorical: - result = take_1d(result, self._orig.cat.codes) + result = take_1d(result, self._orig.cat.codes, + fill_value=fill_value) if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'): return result @@ -2520,12 +2521,12 @@ def join(self, sep): def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): result = str_contains(self._parent, pat, case=case, flags=flags, na=na, regex=regex) - return self._wrap_result(result) + return self._wrap_result(result, fill_value=na) @copy(str_match) def match(self, pat, case=True, flags=0, na=np.nan): result = str_match(self._parent, pat, case=case, flags=flags, na=na) - return self._wrap_result(result) + return self._wrap_result(result, fill_value=na) @copy(str_replace) def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7b4e330ca6e3d..c0aab5d25e3fe 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -512,10 +512,28 @@ def test_contains(self): assert result.dtype == np.bool_ tm.assert_numpy_array_equal(result, expected) - # na - values = Series(['om', 'foo', np.nan]) - res = values.str.contains('foo', na="foo") - assert res.loc[2] == "foo" + def test_contains_for_object_category(self): + # gh 22158 + + # na for category + values = Series(["a", "b", "c", "a", np.nan], dtype="category") + result = values.str.contains('a', na=True) + expected = Series([True, False, False, True, True]) + tm.assert_series_equal(result, expected) + + result = values.str.contains('a', na=False) + expected = Series([True, False, False, True, False]) + tm.assert_series_equal(result, expected) + + # na for objects + values = Series(["a", "b", "c", "a", np.nan]) + result = values.str.contains('a', na=True) + expected = Series([True, False, False, True, True]) + tm.assert_series_equal(result, expected) + + result = values.str.contains('a', na=False) + expected = Series([True, False, False, True, False]) + tm.assert_series_equal(result, expected) def test_startswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) @@ -2893,7 +2911,7 @@ def test_get_complex_nested(self, to_type): expected = Series([np.nan]) tm.assert_series_equal(result, expected) - def test_more_contains(self): + def test_contains_moar(self): # PR #1179 s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA', 'dog', 'cat']) @@ -2943,7 +2961,7 @@ def test_contains_nan(self): expected = Series([np.nan, np.nan, np.nan], dtype=np.object_) assert_series_equal(result, expected) - def test_more_replace(self): + def test_replace_moar(self): # PR #1179 s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA', 'dog', 'cat']) From b4c6632749cc3be35e6183551fa8059225c16254 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 19 Nov 2018 17:50:21 -0800 Subject: [PATCH 06/12] STY: isort tests/scalar, tests/tslibs, import libwindow instead of _window (#23787) --- pandas/core/window.py | 99 +++++++++---------- pandas/tests/scalar/interval/test_interval.py | 6 +- pandas/tests/scalar/period/test_asfreq.py | 12 +-- pandas/tests/scalar/period/test_period.py | 25 ++--- pandas/tests/scalar/test_nat.py | 15 +-- .../tests/scalar/timedelta/test_arithmetic.py | 68 ++++++------- .../scalar/timedelta/test_construction.py | 26 +++-- .../tests/scalar/timedelta/test_timedelta.py | 14 +-- .../tests/scalar/timestamp/test_arithmetic.py | 8 +- .../scalar/timestamp/test_comparisons.py | 5 +- .../tests/scalar/timestamp/test_rendering.py | 8 +- .../tests/scalar/timestamp/test_timestamp.py | 27 +++-- .../tests/scalar/timestamp/test_timezones.py | 14 +-- .../tests/scalar/timestamp/test_unary_ops.py | 13 +-- pandas/tests/tslibs/test_array_to_datetime.py | 5 +- pandas/tests/tslibs/test_conversion.py | 5 +- pandas/tests/tslibs/test_libfrequencies.py | 8 +- pandas/tests/tslibs/test_parsing.py | 10 +- pandas/tests/tslibs/test_period_asfreq.py | 2 +- pandas/tests/tslibs/test_timezones.py | 5 +- pandas/tests/tslibs/test_tslib.py | 2 +- pandas/tseries/converter.py | 14 +-- pandas/tseries/frequencies.py | 49 ++++----- pandas/tseries/holiday.py | 15 +-- pandas/tseries/offsets.py | 39 +++----- setup.cfg | 28 +----- 26 files changed, 238 insertions(+), 284 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index f7a60527602a1..494ab5d5963ee 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -7,41 +7,29 @@ """ from __future__ import division -import warnings -import numpy as np from collections import defaultdict from datetime import timedelta +from textwrap import dedent +import warnings + +import numpy as np + +import pandas._libs.window as libwindow +import pandas.compat as compat +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution, cache_readonly -from pandas.core.dtypes.generic import ( - ABCSeries, - ABCDataFrame, - ABCDatetimeIndex, - ABCTimedeltaIndex, - ABCPeriodIndex, - ABCDateOffset) from pandas.core.dtypes.common import ( - is_integer, - is_bool, - is_float_dtype, - is_integer_dtype, - needs_i8_conversion, - is_timedelta64_dtype, - is_list_like, - ensure_float64, - is_scalar) + ensure_float64, is_bool, is_float_dtype, is_integer, is_integer_dtype, + is_list_like, is_scalar, is_timedelta64_dtype, needs_i8_conversion) +from pandas.core.dtypes.generic import ( + ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCPeriodIndex, ABCSeries, + ABCTimedeltaIndex) from pandas.core.base import PandasObject, SelectionMixin -from pandas.core.groupby.base import GroupByMixin import pandas.core.common as com -import pandas._libs.window as _window - -from pandas import compat -from pandas.compat.numpy import function as nv -from pandas.util._decorators import (Substitution, Appender, - cache_readonly) from pandas.core.generic import _shared_docs -from textwrap import dedent - +from pandas.core.groupby.base import GroupByMixin _shared_docs = dict(**_shared_docs) _doc_template = """ @@ -688,10 +676,10 @@ def _apply_window(self, mean=True, **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, len(window)) - return _window.roll_window(np.concatenate((arg, - additional_nans)) - if center else arg, window, minp, - avg=mean) + return libwindow.roll_window(np.concatenate((arg, + additional_nans)) + if center else arg, window, minp, + avg=mean) result = np.apply_along_axis(f, self.axis, values) @@ -848,10 +836,10 @@ def _apply(self, func, name=None, window=None, center=None, # if we have a string function name, wrap it if isinstance(func, compat.string_types): - cfunc = getattr(_window, func, None) + cfunc = getattr(libwindow, func, None) if cfunc is None: raise ValueError("we do not support this function " - "in _window.{0}".format(func)) + "in libwindow.{func}".format(func=func)) def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, window) @@ -995,7 +983,7 @@ def f(arg, window, min_periods, closed): minp = _use_window(min_periods, window) if not raw: arg = Series(arg, index=self.obj.index) - return _window.roll_generic( + return libwindow.roll_generic( arg, window, minp, indexi, closed, offset, func, raw, args, kwargs) @@ -1160,8 +1148,8 @@ def std(self, ddof=1, *args, **kwargs): def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) - return _zsqrt(_window.roll_var(arg, window, minp, indexi, - self.closed, ddof)) + return _zsqrt(libwindow.roll_var(arg, window, minp, indexi, + self.closed, ddof)) return self._apply(f, 'std', check_minp=_require_min_periods(1), ddof=ddof, **kwargs) @@ -1331,15 +1319,15 @@ def quantile(self, quantile, interpolation='linear', **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) if quantile == 1.0: - return _window.roll_max(arg, window, minp, indexi, - self.closed) + return libwindow.roll_max(arg, window, minp, indexi, + self.closed) elif quantile == 0.0: - return _window.roll_min(arg, window, minp, indexi, - self.closed) + return libwindow.roll_min(arg, window, minp, indexi, + self.closed) else: - return _window.roll_quantile(arg, window, minp, indexi, - self.closed, quantile, - interpolation) + return libwindow.roll_quantile(arg, window, minp, indexi, + self.closed, quantile, + interpolation) return self._apply(f, 'quantile', quantile=quantile, **kwargs) @@ -2262,10 +2250,10 @@ def _apply(self, func, **kwargs): # if we have a string function name, wrap it if isinstance(func, compat.string_types): - cfunc = getattr(_window, func, None) + cfunc = getattr(libwindow, func, None) if cfunc is None: raise ValueError("we do not support this function " - "in _window.{0}".format(func)) + "in libwindow.{func}".format(func=func)) def func(arg): return cfunc(arg, self.com, int(self.adjust), @@ -2300,9 +2288,9 @@ def var(self, bias=False, *args, **kwargs): nv.validate_window_func('var', args, kwargs) def f(arg): - return _window.ewmcov(arg, arg, self.com, int(self.adjust), - int(self.ignore_na), int(self.min_periods), - int(bias)) + return libwindow.ewmcov(arg, arg, self.com, int(self.adjust), + int(self.ignore_na), int(self.min_periods), + int(bias)) return self._apply(f, **kwargs) @@ -2320,9 +2308,10 @@ def cov(self, other=None, pairwise=None, bias=False, **kwargs): def _get_cov(X, Y): X = self._shallow_copy(X) Y = self._shallow_copy(Y) - cov = _window.ewmcov(X._prep_values(), Y._prep_values(), self.com, - int(self.adjust), int(self.ignore_na), - int(self.min_periods), int(bias)) + cov = libwindow.ewmcov(X._prep_values(), Y._prep_values(), + self.com, int(self.adjust), + int(self.ignore_na), int(self.min_periods), + int(bias)) return X._wrap_result(cov) return _flex_binary_moment(self._selected_obj, other._selected_obj, @@ -2344,10 +2333,10 @@ def _get_corr(X, Y): Y = self._shallow_copy(Y) def _cov(x, y): - return _window.ewmcov(x, y, self.com, int(self.adjust), - int(self.ignore_na), - int(self.min_periods), - 1) + return libwindow.ewmcov(x, y, self.com, int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + 1) x_values = X._prep_values() y_values = Y._prep_values() diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py index 7951fb7ddda0d..432f44725e2ba 100644 --- a/pandas/tests/scalar/interval/test_interval.py +++ b/pandas/tests/scalar/interval/test_interval.py @@ -1,11 +1,11 @@ from __future__ import division import numpy as np -from pandas import Interval, Timestamp, Timedelta -import pandas.core.common as com - import pytest +from pandas import Interval, Timedelta, Timestamp +import pandas.core.common as com + @pytest.fixture def interval(): diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 23762fda8c22a..064d1a96878c2 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -1,11 +1,11 @@ import pytest +from pandas._libs.tslibs.frequencies import ( + INVALID_FREQ_ERR_MSG, _period_code_map) from pandas.errors import OutOfBoundsDatetime -import pandas as pd from pandas import Period, offsets from pandas.util import testing as tm -from pandas._libs.tslibs.frequencies import _period_code_map class TestFreqConversion(object): @@ -328,13 +328,13 @@ def test_conv_weekly(self): assert ival_W.asfreq('W') == ival_W - msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + msg = INVALID_FREQ_ERR_MSG with pytest.raises(ValueError, match=msg): ival_W.asfreq('WK') def test_conv_weekly_legacy(self): # frequency conversion tests: from Weekly Frequency - msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + msg = INVALID_FREQ_ERR_MSG with pytest.raises(ValueError, match=msg): Period(freq='WK', year=2007, month=1, day=1) @@ -741,11 +741,11 @@ def test_asfreq_MS(self): assert initial.asfreq(freq="M", how="S") == Period('2013-01', 'M') - msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + msg = INVALID_FREQ_ERR_MSG with pytest.raises(ValueError, match=msg): initial.asfreq(freq="MS", how="S") with pytest.raises(ValueError, match=msg): - pd.Period('2013-01', 'MS') + Period('2013-01', 'MS') assert _period_code_map.get("MS") is None diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 14b26bce42d87..4d3aa1109c120 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1,20 +1,21 @@ -import pytest +from datetime import date, datetime, timedelta -import pytz import numpy as np -from datetime import datetime, date, timedelta - -import pandas as pd -from pandas import Timedelta, NaT, Period, Timestamp, offsets -import pandas.util.testing as tm -import pandas.core.indexes.period as period -from pandas.compat import text_type, iteritems -from pandas.compat.numpy import np_datetime64_compat +import pytest +import pytz from pandas._libs.tslibs import iNaT, period as libperiod from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG from pandas._libs.tslibs.parsing import DateParseError from pandas._libs.tslibs.timezones import dateutil_gettz, maybe_get_tz +from pandas.compat import iteritems, text_type +from pandas.compat.numpy import np_datetime64_compat + +import pandas as pd +from pandas import NaT, Period, Timedelta, Timestamp, offsets +import pandas.core.indexes.period as period +import pandas.util.testing as tm class TestPeriodConstruction(object): @@ -712,7 +713,7 @@ def test_period_deprecated_freq(self): "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"]} - msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + msg = INVALID_FREQ_ERR_MSG for exp, freqs in iteritems(cases): for freq in freqs: with pytest.raises(ValueError, match=msg): @@ -851,7 +852,7 @@ def test_properties_weekly_legacy(self): exp = Period(freq='W', year=2012, month=2, day=1) assert exp.days_in_month == 29 - msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + msg = INVALID_FREQ_ERR_MSG with pytest.raises(ValueError, match=msg): Period(freq='WK', year=2007, month=1, day=7) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index b49da5ed4eb24..ddf3984744114 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -1,15 +1,16 @@ -import pytest - from datetime import datetime, timedelta -import pytz import numpy as np -from pandas import (NaT, Index, Timestamp, Timedelta, Period, - DatetimeIndex, - TimedeltaIndex, Series, isna) +import pytest +import pytz + +from pandas._libs.tslib import iNaT + +from pandas import ( + DatetimeIndex, Index, NaT, Period, Series, Timedelta, TimedeltaIndex, + Timestamp, isna) from pandas.core.arrays import PeriodArray from pandas.util import testing as tm -from pandas._libs.tslib import iNaT @pytest.mark.parametrize('nat, idx', [(Timestamp('NaT'), DatetimeIndex), diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index b9af31db1d6e6..b6ad251d598ab 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -9,9 +9,9 @@ import pytest import pandas as pd -import pandas.util.testing as tm +from pandas import NaT, Timedelta, Timestamp from pandas.core import ops -from pandas import Timedelta, Timestamp, NaT +import pandas.util.testing as tm class TestTimedeltaAdditionSubtraction(object): @@ -189,54 +189,54 @@ def test_td_rsub_offset(self): assert result == Timedelta(-239, unit='h') def test_td_sub_timedeltalike_object_dtype_array(self): - # GH 21980 + # GH#21980 arr = np.array([Timestamp('20130101 9:01'), Timestamp('20121230 9:02')]) exp = np.array([Timestamp('20121231 9:01'), Timestamp('20121229 9:02')]) - res = arr - pd.Timedelta('1D') + res = arr - Timedelta('1D') tm.assert_numpy_array_equal(res, exp) def test_td_sub_mixed_most_timedeltalike_object_dtype_array(self): - # GH 21980 - now = pd.Timestamp.now() + # GH#21980 + now = Timestamp.now() arr = np.array([now, - pd.Timedelta('1D'), + Timedelta('1D'), np.timedelta64(2, 'h')]) - exp = np.array([now - pd.Timedelta('1D'), - pd.Timedelta('0D'), - np.timedelta64(2, 'h') - pd.Timedelta('1D')]) - res = arr - pd.Timedelta('1D') + exp = np.array([now - Timedelta('1D'), + Timedelta('0D'), + np.timedelta64(2, 'h') - Timedelta('1D')]) + res = arr - Timedelta('1D') tm.assert_numpy_array_equal(res, exp) def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self): - # GH 21980 - now = pd.Timestamp.now() + # GH#21980 + now = Timestamp.now() arr = np.array([now, - pd.Timedelta('1D'), + Timedelta('1D'), np.timedelta64(2, 'h')]) with pytest.raises(TypeError): - pd.Timedelta('1D') - arr + Timedelta('1D') - arr @pytest.mark.parametrize('op', [operator.add, ops.radd]) def test_td_add_timedeltalike_object_dtype_array(self, op): - # GH 21980 + # GH#21980 arr = np.array([Timestamp('20130101 9:01'), Timestamp('20121230 9:02')]) exp = np.array([Timestamp('20130102 9:01'), Timestamp('20121231 9:02')]) - res = op(arr, pd.Timedelta('1D')) + res = op(arr, Timedelta('1D')) tm.assert_numpy_array_equal(res, exp) @pytest.mark.parametrize('op', [operator.add, ops.radd]) def test_td_add_mixed_timedeltalike_object_dtype_array(self, op): - # GH 21980 - now = pd.Timestamp.now() + # GH#21980 + now = Timestamp.now() arr = np.array([now, - pd.Timedelta('1D')]) - exp = np.array([now + pd.Timedelta('1D'), - pd.Timedelta('2D')]) - res = op(arr, pd.Timedelta('1D')) + Timedelta('1D')]) + exp = np.array([now + Timedelta('1D'), + Timedelta('2D')]) + res = op(arr, Timedelta('1D')) tm.assert_numpy_array_equal(res, exp) @@ -255,7 +255,7 @@ class TestTimedeltaMultiplicationDivision(object): # --------------------------------------------------------------- # Timedelta.__mul__, __rmul__ - @pytest.mark.parametrize('td_nat', [pd.NaT, + @pytest.mark.parametrize('td_nat', [NaT, np.timedelta64('NaT', 'ns'), np.timedelta64('NaT')]) @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) @@ -558,7 +558,7 @@ def test_mod_invalid(self): td = Timedelta(hours=37) with pytest.raises(TypeError): - td % pd.Timestamp('2018-01-22') + td % Timestamp('2018-01-22') with pytest.raises(TypeError): td % [] @@ -583,7 +583,7 @@ def test_rmod_invalid(self): td = Timedelta(minutes=3) with pytest.raises(TypeError): - pd.Timestamp('2018-01-22') % td + Timestamp('2018-01-22') % td with pytest.raises(TypeError): 15 % td @@ -608,8 +608,8 @@ def test_divmod_numeric(self): assert result result = divmod(td, np.nan) - assert result[0] is pd.NaT - assert result[1] is pd.NaT + assert result[0] is NaT + assert result[1] is NaT def test_divmod(self): # GH#19365 @@ -625,9 +625,9 @@ def test_divmod(self): assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(0) - result = divmod(td, pd.NaT) + result = divmod(td, NaT) assert np.isnan(result[0]) - assert result[1] is pd.NaT + assert result[1] is NaT def test_divmod_offset(self): # GH#19365 @@ -643,7 +643,7 @@ def test_divmod_invalid(self): td = Timedelta(days=2, hours=6) with pytest.raises(TypeError): - divmod(td, pd.Timestamp('2018-01-22')) + divmod(td, Timestamp('2018-01-22')) def test_rdivmod_pytimedelta(self): # GH#19365 @@ -663,7 +663,7 @@ def test_rdivmod_invalid(self): td = Timedelta(minutes=3) with pytest.raises(TypeError): - divmod(pd.Timestamp('2018-01-22'), td) + divmod(Timestamp('2018-01-22'), td) with pytest.raises(TypeError): divmod(15, td) @@ -684,8 +684,8 @@ def test_rdivmod_invalid(self): ops.rsub]) @pytest.mark.parametrize('arr', [ np.array([Timestamp('20130101 9:01'), Timestamp('20121230 9:02')]), - np.array([pd.Timestamp.now(), pd.Timedelta('1D')]) + np.array([Timestamp.now(), Timedelta('1D')]) ]) def test_td_op_timedelta_timedeltalike_array(self, op, arr): with pytest.raises(TypeError): - op(arr, pd.Timedelta('1D')) + op(arr, Timedelta('1D')) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 4165b1aec705f..880eca914749b 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- from datetime import timedelta -import pytest import numpy as np +import pytest -import pandas as pd -from pandas import Timedelta +from pandas import Timedelta, offsets, to_timedelta def test_construction(): @@ -107,16 +106,15 @@ def test_construction(): assert Timedelta(10.5, unit='s').value == expected # offset - assert pd.to_timedelta(pd.offsets.Hour(2)) == Timedelta(hours=2) - assert Timedelta(pd.offsets.Hour(2)) == Timedelta(hours=2) - assert Timedelta(pd.offsets.Second(2)) == Timedelta(seconds=2) + assert to_timedelta(offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(offsets.Hour(2)) == Timedelta(hours=2) + assert Timedelta(offsets.Second(2)) == Timedelta(seconds=2) # GH#11995: unicode expected = Timedelta('1H') - result = pd.Timedelta(u'1H') + result = Timedelta(u'1H') assert result == expected - assert (pd.to_timedelta(pd.offsets.Hour(2)) == - Timedelta(u'0 days, 02:00:00')) + assert to_timedelta(offsets.Hour(2)) == Timedelta(u'0 days, 02:00:00') with pytest.raises(ValueError): Timedelta(u'foo bar') @@ -154,17 +152,17 @@ def test_td_from_repr_roundtrip(val): def test_overflow_on_construction(): - # xref https://github.com/statsmodels/statsmodels/issues/3374 - value = pd.Timedelta('1day').value * 20169940 + # GH#3374 + value = Timedelta('1day').value * 20169940 with pytest.raises(OverflowError): - pd.Timedelta(value) + Timedelta(value) # xref GH#17637 with pytest.raises(OverflowError): - pd.Timedelta(7 * 19999, unit='D') + Timedelta(7 * 19999, unit='D') with pytest.raises(OverflowError): - pd.Timedelta(timedelta(days=13 * 19999)) + Timedelta(timedelta(days=13 * 19999)) @pytest.mark.parametrize('fmt,exp', [ diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 58064213d9b3b..f7dac81a5b8d7 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -1,15 +1,17 @@ """ test the scalar Timedelta """ -import pytest +from datetime import timedelta import numpy as np -from datetime import timedelta +import pytest + +from pandas._libs.tslib import NaT, iNaT +import pandas.compat as compat import pandas as pd -import pandas.util.testing as tm +from pandas import ( + Series, Timedelta, TimedeltaIndex, timedelta_range, to_timedelta) from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type as ct -from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series, - to_timedelta, compat) -from pandas._libs.tslib import iNaT, NaT +import pandas.util.testing as tm class TestTimedeltaArithmetic(object): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 207bd103105ea..331d66589802d 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -1,14 +1,16 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta -import pytest import numpy as np +import pytest -import pandas.util.testing as tm from pandas.compat import long + +from pandas import Timedelta, Timestamp +import pandas.util.testing as tm + from pandas.tseries import offsets from pandas.tseries.frequencies import to_offset -from pandas import Timestamp, Timedelta class TestTimestampArithmetic(object): diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index f293f8f161010..74dd52c48153f 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -2,10 +2,11 @@ from datetime import datetime import operator -import pytest import numpy as np +import pytest + +from pandas.compat import PY2, long -from pandas.compat import long, PY2 from pandas import Timestamp diff --git a/pandas/tests/scalar/timestamp/test_rendering.py b/pandas/tests/scalar/timestamp/test_rendering.py index c404b60567daf..29b65ee4df745 100644 --- a/pandas/tests/scalar/timestamp/test_rendering.py +++ b/pandas/tests/scalar/timestamp/test_rendering.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -import pytest +from distutils.version import LooseVersion +import pprint + import dateutil +import pytest import pytz # noqa # a test below uses pytz but only inside a `eval` call -import pprint -from distutils.version import LooseVersion - from pandas import Timestamp diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index c1f532d56304c..2d5c8f77dd338 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -1,29 +1,28 @@ """ test the scalar Timestamp """ -import pytz -import pytest -import dateutil import calendar +from datetime import datetime, timedelta import locale import unicodedata -import numpy as np +import dateutil from dateutil.tz import tzutc +import numpy as np +import pytest +import pytz from pytz import timezone, utc -from datetime import datetime, timedelta -import pandas.util.testing as tm +from pandas._libs.tslibs import conversion +from pandas._libs.tslibs.timezones import dateutil_gettz as gettz, get_timezone +from pandas.compat import PY2, PY3, long +from pandas.compat.numpy import np_datetime64_compat +from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td -from pandas.tseries import offsets - -from pandas._libs.tslibs import conversion -from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz +from pandas import NaT, Period, Timedelta, Timestamp +import pandas.util.testing as tm -from pandas.errors import OutOfBoundsDatetime -from pandas.compat import long, PY3, PY2 -from pandas.compat.numpy import np_datetime64_compat -from pandas import Timestamp, Period, Timedelta, NaT +from pandas.tseries import offsets class TestTimestampProperties(object): diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 6755d0bd4ae27..72e4fd42ae15a 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -2,20 +2,20 @@ """ Tests for Timestamp timezone-related methods """ -from datetime import datetime, date, timedelta - +from datetime import date, datetime, timedelta from distutils.version import LooseVersion + +import dateutil +from dateutil.tz import gettz, tzoffset import pytest import pytz from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError -import dateutil -from dateutil.tz import gettz, tzoffset -import pandas.util.testing as tm +from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td -from pandas import Timestamp, NaT -from pandas.errors import OutOfBoundsDatetime +from pandas import NaT, Timestamp +import pandas.util.testing as tm class TestTimestampTZOperations(object): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 21404bf7ef76f..d3ca85df3fd4f 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -1,18 +1,19 @@ # -*- coding: utf-8 -*- from datetime import datetime +from dateutil.tz import gettz import pytest import pytz from pytz import utc -from dateutil.tz import gettz - -import pandas.util.testing as tm -import pandas.util._test_decorators as td -from pandas.compat import PY3 from pandas._libs.tslibs import conversion from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG -from pandas import Timestamp, NaT +from pandas.compat import PY3 +import pandas.util._test_decorators as td + +from pandas import NaT, Timestamp +import pandas.util.testing as tm + from pandas.tseries.frequencies import to_offset diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index f2d9f35256a10..ff8880257b225 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- -from datetime import datetime, date +from datetime import date, datetime +from dateutil.tz.tz import tzoffset import numpy as np import pytest import pytz -from dateutil.tz.tz import tzoffset from pandas._libs import iNaT, tslib from pandas.compat.numpy import np_array_datetime64_compat + import pandas.util.testing as tm diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 76038136c26cb..de36c0bb2f789 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -3,11 +3,12 @@ import numpy as np import pytest -import pandas.util.testing as tm -from pandas import date_range from pandas._libs.tslib import iNaT from pandas._libs.tslibs import conversion, timezones +from pandas import date_range +import pandas.util.testing as tm + def compare_utc_to_local(tz_didx, utc_didx): f = lambda x: conversion.tz_convert_single(x, 'UTC', tz_didx.tz) diff --git a/pandas/tests/tslibs/test_libfrequencies.py b/pandas/tests/tslibs/test_libfrequencies.py index 18840fe1fd9b9..1bf6d0596e2fe 100644 --- a/pandas/tests/tslibs/test_libfrequencies.py +++ b/pandas/tests/tslibs/test_libfrequencies.py @@ -2,11 +2,11 @@ import pytest +from pandas._libs.tslibs.frequencies import ( + INVALID_FREQ_ERR_MSG, _period_str_to_code, get_rule_month, is_subperiod, + is_superperiod) + from pandas.tseries import offsets -from pandas._libs.tslibs.frequencies import (get_rule_month, - _period_str_to_code, - INVALID_FREQ_ERR_MSG, - is_superperiod, is_subperiod) def assert_aliases_deprecated(freq, expected, aliases): diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 2762fb9cbe000..f2b0ae98aff98 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -3,15 +3,17 @@ Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx """ from datetime import datetime + +from dateutil.parser import parse import numpy as np import pytest -from dateutil.parser import parse -import pandas.util._test_decorators as td -from pandas import compat -from pandas.util import testing as tm from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import parse_time_string +import pandas.compat as compat +import pandas.util._test_decorators as td + +from pandas.util import testing as tm class TestParseQuarters(object): diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 61737083e22ea..e5978a59bc2a1 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from pandas._libs.tslibs.frequencies import get_freq -from pandas._libs.tslibs.period import period_ordinal, period_asfreq +from pandas._libs.tslibs.period import period_asfreq, period_ordinal class TestPeriodFreqConversion(object): diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 12f04505d953d..68a6c1b09b992 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- from datetime import datetime +import dateutil.tz import pytest import pytz -import dateutil.tz -from pandas._libs.tslibs import timezones, conversion +from pandas._libs.tslibs import conversion, timezones + from pandas import Timestamp diff --git a/pandas/tests/tslibs/test_tslib.py b/pandas/tests/tslibs/test_tslib.py index 0df9328d0db16..17bd46cd235da 100644 --- a/pandas/tests/tslibs/test_tslib.py +++ b/pandas/tests/tslibs/test_tslib.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """Tests for functions from pandas._libs.tslibs""" -from datetime import datetime, date +from datetime import date, datetime from pandas._libs import tslibs diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 26d3f3cb85edc..05dd7cea1bd2f 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,15 +1,11 @@ # flake8: noqa import warnings -from pandas.plotting._converter import (time2num, - TimeConverter, TimeFormatter, - PeriodConverter, get_datevalue, - DatetimeConverter, - PandasAutoDateFormatter, - PandasAutoDateLocator, - MilliSecondLocator, get_finder, - TimeSeries_DateLocator, - TimeSeries_DateFormatter) +from pandas.plotting._converter import ( + DatetimeConverter, MilliSecondLocator, PandasAutoDateFormatter, + PandasAutoDateLocator, PeriodConverter, TimeConverter, TimeFormatter, + TimeSeries_DateFormatter, TimeSeries_DateLocator, get_datevalue, + get_finder, time2num) def register(): diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index ac9a87b258056..95904fab05322 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,40 +1,37 @@ # -*- coding: utf-8 -*- from datetime import timedelta -from pandas.compat import zip -from pandas import compat import re import numpy as np +from pytz import AmbiguousTimeError -from pandas.util._decorators import cache_readonly - -from pandas.core.dtypes.generic import ABCSeries -from pandas.core.dtypes.common import ( - is_period_arraylike, - is_timedelta64_dtype, - is_datetime64_dtype) - -from pandas.core.algorithms import unique - -from pandas.tseries.offsets import DateOffset - +from pandas._libs.algos import unique_deltas from pandas._libs.tslibs import Timedelta, Timestamp - +from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday +from pandas._libs.tslibs.conversion import tz_convert +from pandas._libs.tslibs.fields import build_field_sarray import pandas._libs.tslibs.frequencies as libfreqs from pandas._libs.tslibs.frequencies import ( # noqa, semi-public API - get_freq, get_base_alias, get_to_timestamp_base, get_freq_code, - FreqGroup, + FreqGroup, get_base_alias, get_freq, get_freq_code, get_to_timestamp_base, is_subperiod, is_superperiod) -from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday +from pandas._libs.tslibs.offsets import _offset_to_period_map # noqa:E402 import pandas._libs.tslibs.resolution as libresolution from pandas._libs.tslibs.resolution import Resolution -from pandas._libs.tslibs.fields import build_field_sarray -from pandas._libs.tslibs.conversion import tz_convert +import pandas.compat as compat +from pandas.compat import zip +from pandas.util._decorators import cache_readonly -from pandas._libs.algos import unique_deltas +from pandas.core.dtypes.common import ( + is_datetime64_dtype, is_period_arraylike, is_timedelta64_dtype) +from pandas.core.dtypes.generic import ABCSeries -from pytz import AmbiguousTimeError +from pandas.core.algorithms import unique +from pandas.tseries.offsets import ( # noqa + BDay, BMonthBegin, BMonthEnd, BQuarterBegin, BQuarterEnd, BYearBegin, + BYearEnd, CDay, DateOffset, Day, Hour, Micro, Milli, Minute, MonthBegin, + MonthEnd, Nano, QuarterBegin, QuarterEnd, Second, Week, YearBegin, YearEnd, + prefix_mapping) RESO_NS = 0 RESO_US = 1 @@ -54,14 +51,6 @@ # --------------------------------------------------------------------- # Offset names ("time rules") and related functions -from pandas._libs.tslibs.offsets import _offset_to_period_map # noqa:E402 -from pandas.tseries.offsets import (Nano, Micro, Milli, Second, # noqa - Minute, Hour, - Day, BDay, CDay, Week, MonthBegin, - MonthEnd, BMonthBegin, BMonthEnd, - QuarterBegin, QuarterEnd, BQuarterBegin, - BQuarterEnd, YearBegin, YearEnd, - BYearBegin, BYearEnd, prefix_mapping) try: cday = CDay() except NotImplementedError: diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 0497a827e2e1b..40e2b76672a4e 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -1,13 +1,16 @@ +from datetime import datetime, timedelta import warnings -from pandas import DateOffset, DatetimeIndex, Series, Timestamp -from pandas.errors import PerformanceWarning -from pandas.compat import add_metaclass -from datetime import datetime, timedelta -from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU # noqa -from pandas.tseries.offsets import Easter, Day +from dateutil.relativedelta import FR, MO, SA, SU, TH, TU, WE # noqa import numpy as np +from pandas.compat import add_metaclass +from pandas.errors import PerformanceWarning + +from pandas import DateOffset, DatetimeIndex, Series, Timestamp + +from pandas.tseries.offsets import Day, Easter + def next_monday(dt): """ diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 440a6a3558d9e..ca81b3bcfef2a 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -3,33 +3,24 @@ import functools import operator -from pandas.compat import range -from pandas import compat -import numpy as np - -from pandas.core.dtypes.generic import ABCPeriod -from pandas.core.tools.datetimes import to_datetime - -# import after tools, dateutil check from dateutil.easter import easter -from pandas._libs import tslibs, Timestamp, OutOfBoundsDatetime, Timedelta -from pandas.util._decorators import cache_readonly +import numpy as np from pandas._libs.tslibs import ( - ccalendar, conversion, - frequencies as libfrequencies) -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds -import pandas._libs.tslibs.offsets as liboffsets + NaT, OutOfBoundsDatetime, Timedelta, Timestamp, ccalendar, conversion, + delta_to_nanoseconds, frequencies as libfrequencies, normalize_date, + offsets as liboffsets) from pandas._libs.tslibs.offsets import ( - ApplyTypeError, - as_datetime, _is_normalized, - _get_calendar, _to_dt64, - apply_index_wraps, - roll_yearday, - shift_month, - BaseOffset) + ApplyTypeError, BaseOffset, _get_calendar, _is_normalized, _to_dt64, + apply_index_wraps, as_datetime, roll_yearday, shift_month) +import pandas.compat as compat +from pandas.compat import range from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly +from pandas.core.dtypes.generic import ABCPeriod + +from pandas.core.tools.datetimes import to_datetime __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay', 'CBMonthEnd', 'CBMonthBegin', @@ -60,8 +51,8 @@ def as_timestamp(obj): def apply_wraps(func): @functools.wraps(func) def wrapper(self, other): - if other is tslibs.NaT: - return tslibs.NaT + if other is NaT: + return NaT elif isinstance(other, (timedelta, Tick, DateOffset)): # timedelta path return func(self, other) @@ -103,7 +94,7 @@ def wrapper(self, other): if self.normalize: # normalize_date returns normal datetime - result = tslibs.normalize_date(result) + result = normalize_date(result) if tz is not None and result.tzinfo is None: result = conversion.localize_pydatetime(result, tz) diff --git a/setup.cfg b/setup.cfg index 0214922585077..e8db1308741aa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,7 +20,8 @@ ignore = E731, # do not assign a lambda expression, use a def C406, # Unnecessary list literal - rewrite as a dict literal. C408, # Unnecessary dict call - rewrite as a literal. - C409 # Unnecessary list passed to tuple() - rewrite as a tuple literal. + C409, # Unnecessary list passed to tuple() - rewrite as a tuple literal. + S001 # found modulo formatter (incorrect picks up mod operations) exclude = doc/sphinxext/*.py, doc/build/*.py, @@ -113,7 +114,6 @@ skip= pandas/core/ops.py, pandas/core/categorical.py, pandas/core/api.py, - pandas/core/window.py, pandas/core/indexing.py, pandas/core/apply.py, pandas/core/generic.py, @@ -258,26 +258,6 @@ skip= pandas/tests/arithmetic/test_datetime64.py, pandas/tests/arithmetic/conftest.py, pandas/tests/arithmetic/test_timedelta64.py, - pandas/tests/scalar/test_nat.py, - pandas/tests/scalar/timestamp/test_rendering.py, - pandas/tests/scalar/timestamp/test_timestamp.py, - pandas/tests/scalar/timestamp/test_timezones.py, - pandas/tests/scalar/timestamp/test_unary_ops.py, - pandas/tests/scalar/timestamp/test_arithmetic.py, - pandas/tests/scalar/timestamp/test_comparisons.py, - pandas/tests/scalar/period/test_asfreq.py, - pandas/tests/scalar/period/test_period.py, - pandas/tests/scalar/timedelta/test_construction.py, - pandas/tests/scalar/timedelta/test_timedelta.py, - pandas/tests/scalar/timedelta/test_arithmetic.py, - pandas/tests/scalar/interval/test_interval.py, - pandas/tests/tslibs/test_tslib.py, - pandas/tests/tslibs/test_period_asfreq.py, - pandas/tests/tslibs/test_timezones.py, - pandas/tests/tslibs/test_libfrequencies.py, - pandas/tests/tslibs/test_parsing.py, - pandas/tests/tslibs/test_array_to_datetime.py, - pandas/tests/tslibs/test_conversion.py, pandas/tests/internals/test_internals.py, pandas/tests/groupby/test_value_counts.py, pandas/tests/groupby/test_filters.py, @@ -387,10 +367,6 @@ skip= pandas/tests/sparse/frame/conftest.py, pandas/tests/computation/test_compat.py, pandas/tests/computation/test_eval.py, - pandas/tseries/holiday.py, - pandas/tseries/converter.py, - pandas/tseries/offsets.py, - pandas/tseries/frequencies.py, pandas/plotting/_core.py, pandas/plotting/_style.py, pandas/plotting/_timeseries.py, From c9c99129108cf16bc6c3684dc0df5a5fc60ffc8a Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Tue, 20 Nov 2018 02:54:32 +0100 Subject: [PATCH 07/12] TST: move .str-test to strings.py & parametrize it; precursor to #23582 (#23777) --- pandas/tests/series/test_api.py | 76 ---------------------- pandas/tests/test_strings.py | 112 ++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 76 deletions(-) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index f944d6f8c9d08..65f5c59deba36 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -602,82 +602,6 @@ def f(): ordered=True)) tm.assert_series_equal(result, expected) - def test_str_accessor_api_for_categorical(self): - # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.strings import StringMethods - s = Series(list('aabb')) - s = s + " " + s - c = s.astype('category') - assert isinstance(c.str, StringMethods) - - # str functions, which need special arguments - special_func_defs = [ - ('cat', (list("zyxw"),), {"sep": ","}), - ('center', (10,), {}), - ('contains', ("a",), {}), - ('count', ("a",), {}), - ('decode', ("UTF-8",), {}), - ('encode', ("UTF-8",), {}), - ('endswith', ("a",), {}), - ('extract', ("([a-z]*) ",), {"expand": False}), - ('extract', ("([a-z]*) ",), {"expand": True}), - ('extractall', ("([a-z]*) ",), {}), - ('find', ("a",), {}), - ('findall', ("a",), {}), - ('index', (" ",), {}), - ('ljust', (10,), {}), - ('match', ("a"), {}), # deprecated... - ('normalize', ("NFC",), {}), - ('pad', (10,), {}), - ('partition', (" ",), {"expand": False}), # not default - ('partition', (" ",), {"expand": True}), # default - ('repeat', (3,), {}), - ('replace', ("a", "z"), {}), - ('rfind', ("a",), {}), - ('rindex', (" ",), {}), - ('rjust', (10,), {}), - ('rpartition', (" ",), {"expand": False}), # not default - ('rpartition', (" ",), {"expand": True}), # default - ('slice', (0, 1), {}), - ('slice_replace', (0, 1, "z"), {}), - ('split', (" ",), {"expand": False}), # default - ('split', (" ",), {"expand": True}), # not default - ('startswith', ("a",), {}), - ('wrap', (2,), {}), - ('zfill', (10,), {}) - ] - _special_func_names = [f[0] for f in special_func_defs] - - # * get, join: they need a individual elements of type lists, but - # we can't make a categorical with lists as individual categories. - # -> `s.str.split(" ").astype("category")` will error! - # * `translate` has different interfaces for py2 vs. py3 - _ignore_names = ["get", "join", "translate"] - - str_func_names = [f for f in dir(s.str) if not ( - f.startswith("_") or - f in _special_func_names or - f in _ignore_names)] - - func_defs = [(f, (), {}) for f in str_func_names] - func_defs.extend(special_func_defs) - - for func, args, kwargs in func_defs: - res = getattr(c.str, func)(*args, **kwargs) - exp = getattr(s.str, func)(*args, **kwargs) - - if isinstance(res, DataFrame): - tm.assert_frame_equal(res, exp) - else: - tm.assert_series_equal(res, exp) - - invalid = Series([1, 2, 3]).astype('category') - msg = "Can only use .str accessor with string" - - with pytest.raises(AttributeError, match=msg): - invalid.str - assert not hasattr(invalid, 'str') - def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 from pandas.core.indexes.accessors import Properties diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index c0aab5d25e3fe..bfabaa7a1069a 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -26,6 +26,98 @@ def assert_series_or_index_equal(left, right): assert_index_equal(left, right) +_any_string_method = [ + ('cat', (), {'sep': ','}), # noqa: E241 + ('cat', (Series(list('zyx')),), {'sep': ',', # noqa: E241 + 'join': 'left'}), + ('center', (10,), {}), # noqa: E241 + ('contains', ('a',), {}), # noqa: E241 + ('count', ('a',), {}), # noqa: E241 + ('decode', ('UTF-8',), {}), # noqa: E241 + ('encode', ('UTF-8',), {}), # noqa: E241 + ('endswith', ('a',), {}), # noqa: E241 + ('extract', ('([a-z]*)',), {'expand': False}), # noqa: E241 + ('extract', ('([a-z]*)',), {'expand': True}), # noqa: E241 + ('extractall', ('([a-z]*)',), {}), # noqa: E241 + ('find', ('a',), {}), # noqa: E241 + ('findall', ('a',), {}), # noqa: E241 + ('get', (0,), {}), # noqa: E241 + # because "index" (and "rindex") fail intentionally + # if the string is not found, search only for empty string + ('index', ('',), {}), # noqa: E241 + ('join', (',',), {}), # noqa: E241 + ('ljust', (10,), {}), # noqa: E241 + ('match', ('a',), {}), # noqa: E241 + ('normalize', ('NFC',), {}), # noqa: E241 + ('pad', (10,), {}), # noqa: E241 + ('partition', (' ',), {'expand': False}), # noqa: E241 + ('partition', (' ',), {'expand': True}), # noqa: E241 + ('repeat', (3,), {}), # noqa: E241 + ('replace', ('a', 'z',), {}), # noqa: E241 + ('rfind', ('a',), {}), # noqa: E241 + ('rindex', ('',), {}), # noqa: E241 + ('rjust', (10,), {}), # noqa: E241 + ('rpartition', (' ',), {'expand': False}), # noqa: E241 + ('rpartition', (' ',), {'expand': True}), # noqa: E241 + ('slice', (0, 1,), {}), # noqa: E241 + ('slice_replace', (0, 1, 'z',), {}), # noqa: E241 + ('split', (' ',), {'expand': False}), # noqa: E241 + ('split', (' ',), {'expand': True}), # noqa: E241 + ('startswith', ('a',), {}), # noqa: E241 + # translating unicode points of "a" to "d" + ('translate', ({97: 100},), {}), # noqa: E241 + ('wrap', (2,), {}), # noqa: E241 + ('zfill', (10,), {}) # noqa: E241 +] + list(zip([ + # methods without positional arguments: zip with empty tuple and empty dict + 'capitalize', 'cat', 'get_dummies', + 'isalnum', 'isalpha', 'isdecimal', + 'isdigit', 'islower', 'isnumeric', + 'isspace', 'istitle', 'isupper', + 'len', 'lower', 'lstrip', 'partition', + 'rpartition', 'rsplit', 'rstrip', + 'slice', 'slice_replace', 'split', + 'strip', 'swapcase', 'title', 'upper' +], [()] * 100, [{}] * 100)) +ids, _, _ = zip(*_any_string_method) # use method name as fixture-id + + +# test that the above list captures all methods of StringMethods +missing_methods = {f for f in dir(strings.StringMethods) + if not f.startswith('_')} - set(ids) +assert not missing_methods + + +@pytest.fixture(params=_any_string_method, ids=ids) +def any_string_method(request): + """ + Fixture for all public methods of `StringMethods` + + This fixture returns a tuple of the method name and sample arguments + necessary to call the method. + + Returns + ------- + method_name : str + The name of the method in `StringMethods` + args : tuple + Sample values for the positional arguments + kwargs : dict + Sample values for the keyword arguments + + Examples + -------- + >>> def test_something(any_string_method): + ... s = pd.Series(['a', 'b', np.nan, 'd']) + ... + ... method_name, args, kwargs = any_string_method + ... method = getattr(s.str, method_name) + ... # will not raise + ... method(*args, **kwargs) + """ + return request.param + + class TestStringMethods(object): def test_api(self): @@ -40,6 +132,26 @@ def test_api(self): invalid.str assert not hasattr(invalid, 'str') + def test_api_for_categorical(self, any_string_method): + # https://github.com/pandas-dev/pandas/issues/10661 + s = Series(list('aabb')) + s = s + " " + s + c = s.astype('category') + assert isinstance(c.str, strings.StringMethods) + + method_name, args, kwargs = any_string_method + + result = getattr(c.str, method_name)(*args, **kwargs) + expected = getattr(s.str, method_name)(*args, **kwargs) + + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + elif isinstance(result, Series): + tm.assert_series_equal(result, expected) + else: + # str.cat(others=None) returns string, for example + assert result == expected + def test_iter(self): # GH3638 strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel' From 34e8d52295d4403670196dcdc8b6e0f29a30411b Mon Sep 17 00:00:00 2001 From: Erik Date: Mon, 19 Nov 2018 18:00:06 -0800 Subject: [PATCH 08/12] TST: For GH4861, Period and datetime in multiindex (#23776) --- pandas/tests/indexing/test_multiindex.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index ea17844a75033..f4caf17b60d65 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -1,3 +1,4 @@ +from datetime import datetime from warnings import catch_warnings import numpy as np @@ -7,7 +8,8 @@ import pandas as pd from pandas import ( - DataFrame, Index, MultiIndex, Panel, Series, Timestamp, date_range) + DataFrame, Index, MultiIndex, Panel, Period, Series, Timestamp, date_range, + period_range) from pandas.tests.indexing.common import _mklbl from pandas.util import testing as tm @@ -1340,3 +1342,20 @@ def test_panel_setitem_with_multiindex(self): p5.iloc[0, :, 0] = [1, 2] expected = Panel(arr, **axes) tm.assert_panel_equal(p5, expected) + + +def test_multiindex_period_datetime(): + # GH4861, using datetime in period of multiindex raises exception + + idx1 = Index(['a', 'a', 'a', 'b', 'b']) + idx2 = period_range('2012-01', periods=len(idx1), freq='M') + s = Series(np.random.randn(len(idx1)), [idx1, idx2]) + + # try Period as index + expected = s.iloc[0] + result = s.loc['a', Period('2012-01')] + assert result == expected + + # try datetime as index + result = s.loc['a', datetime(2012, 1, 1)] + assert result == expected From 295c2781049ee29952d4e2a13fb516b1d5cd9c2a Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Tue, 20 Nov 2018 03:17:27 +0100 Subject: [PATCH 09/12] TST: add tests for keeping dtype in Series.update (#23604) --- pandas/tests/series/test_combine_concat.py | 40 +++++++++++++++++++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 3f137bf686715..e13cb9edffe2b 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -10,10 +10,10 @@ import pandas as pd from pandas import DataFrame, DatetimeIndex, Series, compat, date_range import pandas.util.testing as tm -from pandas.util.testing import assert_series_equal +from pandas.util.testing import assert_frame_equal, assert_series_equal -class TestSeriesCombine(): +class TestSeriesCombine(object): def test_append(self, datetime_series, string_series, object_series): appendedSeries = string_series.append(object_series) @@ -116,8 +116,40 @@ def test_update(self): df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df['c'] = np.nan - # this will fail as long as series is a sub-class of ndarray - # df['c'].update(Series(['foo'],index=[0])) ##### + df['c'].update(Series(['foo'], index=[0])) + expected = DataFrame([[1, np.nan, 'foo'], [3, 2., np.nan]], + columns=['a', 'b', 'c']) + assert_frame_equal(df, expected) + + @pytest.mark.parametrize('other, dtype, expected', [ + # other is int + ([61, 63], 'int32', pd.Series([10, 61, 12], dtype='int32')), + ([61, 63], 'int64', pd.Series([10, 61, 12])), + ([61, 63], float, pd.Series([10., 61., 12.])), + ([61, 63], object, pd.Series([10, 61, 12], dtype=object)), + # other is float, but can be cast to int + ([61., 63.], 'int32', pd.Series([10, 61, 12], dtype='int32')), + ([61., 63.], 'int64', pd.Series([10, 61, 12])), + ([61., 63.], float, pd.Series([10., 61., 12.])), + ([61., 63.], object, pd.Series([10, 61., 12], dtype=object)), + # others is float, cannot be cast to int + ([61.1, 63.1], 'int32', pd.Series([10., 61.1, 12.])), + ([61.1, 63.1], 'int64', pd.Series([10., 61.1, 12.])), + ([61.1, 63.1], float, pd.Series([10., 61.1, 12.])), + ([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)), + # other is object, cannot be cast + ([(61,), (63,)], 'int32', pd.Series([10, (61,), 12])), + ([(61,), (63,)], 'int64', pd.Series([10, (61,), 12])), + ([(61,), (63,)], float, pd.Series([10., (61,), 12.])), + ([(61,), (63,)], object, pd.Series([10, (61,), 12])) + ]) + def test_update_dtypes(self, other, dtype, expected): + + s = Series([10, 11, 12], dtype=dtype) + other = Series(other, index=[1, 3]) + s.update(other) + + assert_series_equal(s, expected) def test_concat_empty_series_dtypes_roundtrips(self): From 9f17a0744440d95f56a08f2469827d0fa14d2657 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Mon, 19 Nov 2018 21:18:36 -0500 Subject: [PATCH 10/12] DOC: Improve GL03 message re: blank lines at end of docstrings. (#23649) --- scripts/tests/test_validate_docstrings.py | 41 +++++++++++++++++++++-- scripts/validate_docstrings.py | 4 ++- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index a996cb35fb84f..ca3efbfce20a7 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -288,6 +288,34 @@ def astype3(self, dtype): """ pass + def two_linebreaks_between_sections(self, foo): + """ + Test linebreaks message GL03. + + Note 2 blank lines before parameters section. + + + Parameters + ---------- + foo : str + Description of foo parameter. + """ + pass + + def linebreak_at_end_of_docstring(self, foo): + """ + Test linebreaks message GL03. + + Note extra blank line at end of docstring. + + Parameters + ---------- + foo : str + Description of foo parameter. + + """ + pass + def plot(self, kind, **kwargs): """ Generate a plot. @@ -723,7 +751,8 @@ def test_bad_class(self): @capture_stderr @pytest.mark.parametrize("func", [ 'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method', - 'private_classes']) + 'private_classes', + ]) def test_bad_generic_functions(self, func): errors = validate_one(self._import_path( # noqa:F821 klass='BadGenericDocStrings', func=func))['errors'] @@ -811,8 +840,16 @@ def test_bad_generic_functions(self, func): 'E226 missing whitespace around arithmetic operator',)), ('BadExamples', 'missing_whitespace_after_comma', ("flake8 error: E231 missing whitespace after ',' (3 times)",)), + ('BadGenericDocStrings', 'two_linebreaks_between_sections', + ('Double line break found; please use only one blank line to ' + 'separate sections or paragraphs, and do not leave blank lines ' + 'at the end of docstrings',)), + ('BadGenericDocStrings', 'linebreak_at_end_of_docstring', + ('Double line break found; please use only one blank line to ' + 'separate sections or paragraphs, and do not leave blank lines ' + 'at the end of docstrings',)), ]) - def test_bad_examples(self, capsys, klass, func, msgs): + def test_bad_docstrings(self, capsys, klass, func, msgs): result = validate_one(self._import_path(klass=klass, func=func)) for msg in msgs: assert msg in ' '.join(err[1] for err in result['errors']) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 7704dd5d7d80f..2039fda90ef0f 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -67,7 +67,9 @@ 'in the docstring (do not close the quotes in the same line as ' 'the text, or leave a blank line between the last text and the ' 'quotes)', - 'GL03': 'Use only one blank line to separate sections or paragraphs', + 'GL03': 'Double line break found; please use only one blank line to ' + 'separate sections or paragraphs, and do not leave blank lines ' + 'at the end of docstrings', 'GL04': 'Private classes ({mentioned_private_classes}) should not be ' 'mentioned in public docstrings', 'GL05': 'Tabs found at the start of line "{line_with_tabs}", please use ' From 7a6ecf71e0846c0f39864da8fad90041559b06f0 Mon Sep 17 00:00:00 2001 From: Brian Choi Date: Mon, 19 Nov 2018 18:21:06 -0800 Subject: [PATCH 11/12] DOC: Fixed the doctsring for _set_axis_name (GH 22895) (#22969) --- pandas/core/generic.py | 204 ++++++++++++++++++++++------------------- 1 file changed, 110 insertions(+), 94 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dde671993a56b..97ea4fb96ce95 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1109,16 +1109,15 @@ def rename(self, *args, **kwargs): ('inplace', False)]) def rename_axis(self, mapper=None, **kwargs): """ - Alter the name of the index or name of Index object that is the - columns. + Set the name of the axis for the index or columns. Parameters ---------- mapper : scalar, list-like, optional Value to set the axis name attribute. index, columns : scalar, list-like, dict-like or function, optional - dict-like or functions transformations to apply to - that axis' values. + A scalar, list-like, dict-like or functions transformations to + apply to that axis' values. Use either ``mapper`` and ``axis`` to specify the axis to target with ``mapper``, or ``index`` @@ -1126,18 +1125,25 @@ def rename_axis(self, mapper=None, **kwargs): .. versionchanged:: 0.24.0 - axis : int or string, default 0 - copy : boolean, default True + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to rename. + copy : bool, default True Also copy underlying data. - inplace : boolean, default False + inplace : bool, default False Modifies the object directly, instead of creating a new Series or DataFrame. Returns ------- - renamed : Series, DataFrame, or None + Series, DataFrame, or None The same type as the caller or None if `inplace` is True. + See Also + -------- + Series.rename : Alter Series index labels or name. + DataFrame.rename : Alter DataFrame index labels or name. + Index.rename : Set new names on index. + Notes ----- Prior to version 0.21.0, ``rename_axis`` could also be used to change @@ -1162,75 +1168,73 @@ def rename_axis(self, mapper=None, **kwargs): We *highly* recommend using keyword arguments to clarify your intent. - See Also - -------- - Series.rename : Alter Series index labels or name. - DataFrame.rename : Alter DataFrame index labels or name. - Index.rename : Set new names on index. - Examples -------- **Series** - >>> s = pd.Series([1, 2, 3]) - >>> s.rename_axis("foo") - foo - 0 1 - 1 2 - 2 3 - dtype: int64 + >>> s = pd.Series(["dog", "cat", "monkey"]) + >>> s + 0 dog + 1 cat + 2 monkey + dtype: object + >>> s.rename_axis("animal") + animal + 0 dog + 1 cat + 2 monkey + dtype: object **DataFrame** - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - >>> df.rename_axis("foo") - A B - foo - 0 1 4 - 1 2 5 - 2 3 6 - - >>> df.rename_axis("bar", axis="columns") - bar A B - 0 1 4 - 1 2 5 - 2 3 6 - - >>> mi = pd.MultiIndex.from_product([['a', 'b', 'c'], [1, 2]], - ... names=['let','num']) - >>> df = pd.DataFrame({'x': [i for i in range(len(mi))], - ... 'y' : [i*10 for i in range(len(mi))]}, - ... index=mi) - >>> df.rename_axis(index={'num' : 'n'}) - x y - let n - a 1 0 0 - 2 1 10 - b 1 2 20 - 2 3 30 - c 1 4 40 - 2 5 50 - - >>> cdf = df.rename_axis(columns='col') - >>> cdf - col x y - let num - a 1 0 0 - 2 1 10 - b 1 2 20 - 2 3 30 - c 1 4 40 - 2 5 50 - - >>> cdf.rename_axis(columns=str.upper) - COL x y - let num - a 1 0 0 - 2 1 10 - b 1 2 20 - 2 3 30 - c 1 4 40 - 2 5 50 + >>> df = pd.DataFrame({"num_legs": [4, 4, 2], + ... "num_arms": [0, 0, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs num_arms + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("animal") + >>> df + num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("limbs", axis="columns") + >>> df + limbs num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + + **MultiIndex** + + >>> df.index = pd.MultiIndex.from_product([['mammal'], + ... ['dog', 'cat', 'monkey']], + ... names=['type', 'name']) + >>> df + limbs num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(index={'type': 'class'}) + limbs num_legs num_arms + class name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(columns=str.upper) + LIMBS num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 """ axes, kwargs = self._construct_axes_from_arguments((), kwargs) copy = kwargs.pop('copy', True) @@ -1285,45 +1289,57 @@ def rename_axis(self, mapper=None, **kwargs): def _set_axis_name(self, name, axis=0, inplace=False): """ - Alter the name or names of the axis. + Set the name(s) of the axis. Parameters ---------- name : str or list of str - Name for the Index, or list of names for the MultiIndex - axis : int or str - 0 or 'index' for the index; 1 or 'columns' for the columns - inplace : bool - whether to modify `self` directly or return a copy + Name(s) to set. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to set the label. The value 0 or 'index' specifies index, + and the value 1 or 'columns' specifies columns. + inplace : bool, default False + If `True`, do operation inplace and return None. .. versionadded:: 0.21.0 Returns ------- - renamed : same type as caller or None if inplace=True + Series, DataFrame, or None + The same type as the caller or `None` if `inplace` is `True`. See Also -------- - pandas.DataFrame.rename - pandas.Series.rename - pandas.Index.rename + DataFrame.rename : Alter the axis labels of :class:`DataFrame`. + Series.rename : Alter the index labels or set the index name + of :class:`Series`. + Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. Examples -------- - >>> df._set_axis_name("foo") - A - foo - 0 1 - 1 2 - 2 3 - >>> df.index = pd.MultiIndex.from_product([['A'], ['a', 'b', 'c']]) - >>> df._set_axis_name(["bar", "baz"]) - A - bar baz - A a 1 - b 2 - c 3 - """ + >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs + dog 4 + cat 4 + monkey 2 + >>> df._set_axis_name("animal") + num_legs + animal + dog 4 + cat 4 + monkey 2 + >>> df.index = pd.MultiIndex.from_product( + ... [["mammal"], ['dog', 'cat', 'monkey']]) + >>> df._set_axis_name(["type", "name"]) + legs + type name + mammal dog 4 + cat 4 + monkey 2 + """ + pd.MultiIndex.from_product([["mammal"], ['dog', 'cat', 'monkey']]) axis = self._get_axis_number(axis) idx = self._get_axis(axis).set_names(name) From df5eeece8ed28462832ee6e762192629c446670c Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Tue, 20 Nov 2018 03:27:34 +0100 Subject: [PATCH 12/12] DOC: more consistent flake8-commands in contributing.rst (#23724) --- Makefile | 2 +- doc/source/contributing.rst | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 4a4aca21e1b78..d2bd067950fd0 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ build: clean_pyc python setup.py build_ext --inplace lint-diff: - git diff master --name-only -- "*.py" | grep -E "pandas|scripts" | xargs flake8 + git diff upstream/master --name-only -- "*.py" | xargs flake8 develop: build -python setup.py develop diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index b44bd1cfd9007..6fdb5bdbb6b1d 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -575,7 +575,7 @@ the `flake8 `_ tool and report any stylistic errors in your code. Therefore, it is helpful before submitting code to run the check yourself on the diff:: - git diff master -u -- "*.py" | flake8 --diff + git diff upstream/master -u -- "*.py" | flake8 --diff This command will catch any stylistic errors in your changes specifically, but be beware it may not catch all of them. For example, if you delete the only @@ -584,21 +584,21 @@ unused function. However, style-checking the diff will not catch this because the actual import is not part of the diff. Thus, for completeness, you should run this command, though it will take longer:: - git diff master --name-only -- "*.py" | grep "pandas/" | xargs -r flake8 + git diff upstream/master --name-only -- "*.py" | xargs -r flake8 Note that on OSX, the ``-r`` flag is not available, so you have to omit it and run this slightly modified command:: - git diff master --name-only -- "*.py" | grep "pandas/" | xargs flake8 + git diff upstream/master --name-only -- "*.py" | xargs flake8 -Windows does not support the ``grep`` and ``xargs`` commands (unless installed -for example via the `MinGW `__ toolchain), but one can -imitate the behaviour as follows:: +Windows does not support the ``xargs`` command (unless installed for example +via the `MinGW `__ toolchain), but one can imitate the +behaviour as follows:: - for /f %i in ('git diff upstream/master --name-only ^| findstr pandas/') do flake8 %i + for /f %i in ('git diff upstream/master --name-only -- "*.py"') do flake8 %i -This will also get all the files being changed by the PR (and within the -``pandas/`` folder), and run ``flake8`` on them one after the other. +This will get all the files being changed by the PR (and ending with ``.py``), +and run ``flake8`` on them, one after the other. .. _contributing.import-formatting: