diff --git a/ci/code_checks.sh b/ci/code_checks.sh index eba96f0c6c2fc..fac5c211cdad8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -9,16 +9,19 @@ # In the future we may want to add the validation of docstrings and other checks here. # # Usage: -# $ ./ci/code_checks.sh # run all checks -# $ ./ci/code_checks.sh lint # run linting only -# $ ./ci/code_checks.sh patterns # check for patterns that should not exist -# $ ./ci/code_checks.sh doctests # run doctests +# $ ./ci/code_checks.sh # run all checks +# $ ./ci/code_checks.sh lint # run linting only +# $ ./ci/code_checks.sh patterns # check for patterns that should not exist +# $ ./ci/code_checks.sh doctests # run doctests +# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent echo "inside $0" [[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; } -[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; } +[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" || "$1" == "dependencies" ]] \ + || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests|dependencies]"; exit 9999; } source activate pandas +BASE_DIR="$(dirname $0)/.." RET=0 CHECK=$1 @@ -172,4 +175,11 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then fi +### DEPENDENCIES ### +if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then + MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG + $BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare + RET=$(($RET + $?)) ; echo $MSG "DONE" +fi + exit $RET diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml deleted file mode 100644 index 2718c1cd582b6..0000000000000 --- a/ci/environment-dev.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: pandas-dev -channels: - - defaults - - conda-forge -dependencies: - - Cython>=0.28.2 - - NumPy - - flake8 - - flake8-comprehensions - - flake8-rst - - hypothesis>=3.58.0 - - isort - - moto - - pytest>=3.6 - - python-dateutil>=2.5.0 - - python=3 - - pytz - - setuptools>=24.2.0 - - sphinx - - sphinxcontrib-spelling diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt deleted file mode 100644 index 8758c8154abca..0000000000000 --- a/ci/requirements-optional-conda.txt +++ /dev/null @@ -1,28 +0,0 @@ -beautifulsoup4>=4.2.1 -blosc -bottleneck>=1.2.0 -fastparquet>=0.1.2 -gcsfs -html5lib -ipython>=5.6.0 -ipykernel -jinja2 -lxml -matplotlib>=2.0.0 -nbsphinx -numexpr>=2.6.1 -openpyxl -pyarrow>=0.7.0 -pymysql -pytables>=3.4.2 -pytest-cov -pytest-xdist -s3fs -scipy>=0.18.1 -seaborn -sqlalchemy -statsmodels -xarray -xlrd -xlsxwriter -xlwt diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt deleted file mode 100644 index a1cb20c265974..0000000000000 --- a/ci/requirements_dev.txt +++ /dev/null @@ -1,16 +0,0 @@ -# This file was autogenerated by scripts/convert_deps.py -# Do not modify directly -Cython>=0.28.2 -NumPy -flake8 -flake8-comprehensions -flake8-rst -hypothesis>=3.58.0 -isort -moto -pytest>=3.6 -python-dateutil>=2.5.0 -pytz -setuptools>=24.2.0 -sphinx -sphinxcontrib-spelling \ No newline at end of file diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 084f710091a1b..514a58456bcd9 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -170,7 +170,7 @@ We'll now kick off a three-step process: .. code-block:: none # Create and activate the build environment - conda env create -f ci/environment-dev.yaml + conda env create -f environment.yml conda activate pandas-dev # or with older versions of Anaconda: @@ -180,9 +180,6 @@ We'll now kick off a three-step process: python setup.py build_ext --inplace -j 4 python -m pip install -e . - # Install the rest of the optional dependencies - conda install -c defaults -c conda-forge --file=ci/requirements-optional-conda.txt - At this point you should be able to import pandas from your locally built version:: $ python # start an interpreter @@ -221,14 +218,12 @@ You'll need to have at least python3.5 installed on your system. . ~/virtualenvs/pandas-dev/bin/activate # Install the build dependencies - python -m pip install -r ci/requirements_dev.txt + python -m pip install -r requirements-dev.txt + # Build and install pandas python setup.py build_ext --inplace -j 4 python -m pip install -e . - # Install additional dependencies - python -m pip install -r ci/requirements-optional-pip.txt - Creating a branch ----------------- diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ef1b397e372fb..8cd6331410d73 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -248,6 +248,7 @@ Backwards incompatible API changes - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`) - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`) +- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`) .. _whatsnew_0240.api_breaking.deps: @@ -971,6 +972,7 @@ Deprecations - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) +- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000..f66625e6a60c7 --- /dev/null +++ b/environment.yml @@ -0,0 +1,53 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + # required + - NumPy + - python=3 + - python-dateutil>=2.5.0 + - pytz + + # development + - Cython>=0.28.2 + - flake8 + - flake8-comprehensions + - flake8-rst + - hypothesis>=3.58.0 + - isort + - moto + - pytest>=3.6 + - setuptools>=24.2.0 + - sphinx + - sphinxcontrib-spelling + + # optional + - beautifulsoup4>=4.2.1 + - blosc + - bottleneck>=1.2.0 + - fastparquet>=0.1.2 + - gcsfs + - html5lib + - ipython>=5.6.0 + - ipykernel + - jinja2 + - lxml + - matplotlib>=2.0.0 + - nbsphinx + - numexpr>=2.6.1 + - openpyxl + - pyarrow>=0.7.0 + - pymysql + - pytables>=3.4.2 + - pytest-cov + - pytest-xdist + - s3fs + - scipy>=0.18.1 + - seaborn + - sqlalchemy + - statsmodels + - xarray + - xlrd + - xlsxwriter + - xlwt diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 08b83598bb6af..b0485cc82f07f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -234,9 +234,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): result = cls._simple_new(values, freq=freq, tz=tz) if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) + result.freq = to_offset(result.inferred_freq) # NB: Among other things not yet ported from the DatetimeIndex # constructor, this does not call _deepcopy_if_needed diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cf3ba263d1f81..1f78e0c00bf00 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1,18 +1,28 @@ # -*- coding: utf-8 -*- from datetime import timedelta +import warnings import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, Timestamp, NaT +from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT from pandas._libs.tslibs.fields import get_timedelta_field -from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, parse_timedelta_unit) from pandas import compat from pandas.core.dtypes.common import ( - _TD_DTYPE, is_list_like) -from pandas.core.dtypes.generic import ABCSeries + _TD_DTYPE, + is_object_dtype, + is_string_dtype, + is_float_dtype, + is_integer_dtype, + is_timedelta64_dtype, + is_datetime64_dtype, + is_list_like, + ensure_int64) +from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex from pandas.core.dtypes.missing import isna import pandas.core.common as com @@ -139,9 +149,7 @@ def __new__(cls, values, freq=None): result = cls._simple_new(values, freq=freq) if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) + result.freq = to_offset(result.inferred_freq) return result @@ -397,6 +405,163 @@ def f(x): # --------------------------------------------------------------------- # Constructor Helpers +def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): + """ + Parameters + ---------- + array : list-like + copy : bool, default False + unit : str, default "ns" + errors : {"raise", "coerce", "ignore"}, default "raise" + + Returns + ------- + ndarray[timedelta64[ns]] + inferred_freq : Tick or None + + Raises + ------ + ValueError : data cannot be converted to timedelta64[ns] + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + inferred_freq = None + unit = parse_timedelta_unit(unit) + + # Unwrap whatever we have into a np.ndarray + if not hasattr(data, 'dtype'): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.array(data, copy=False) + elif isinstance(data, ABCSeries): + data = data._values + elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArrayMixin)): + inferred_freq = data.freq + data = data._data + + # Convert whatever we have into timedelta64[ns] dtype + if is_object_dtype(data) or is_string_dtype(data): + # no need to make a copy, need to convert if string-dtyped + data = objects_to_td64ns(data, unit=unit, errors=errors) + copy = False + + elif is_integer_dtype(data): + # treat as multiples of the given unit + data, copy_made = ints_to_td64ns(data, unit=unit) + copy = copy and not copy_made + + elif is_float_dtype(data): + # treat as multiples of the given unit. If after converting to nanos, + # there are fractional components left, these are truncated + # (i.e. NOT rounded) + mask = np.isnan(data) + coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns') + data = (coeff * data).astype(np.int64).view('timedelta64[ns]') + data[mask] = iNaT + copy = False + + elif is_timedelta64_dtype(data): + if data.dtype != _TD_DTYPE: + # non-nano unit + # TODO: watch out for overflows + data = data.astype(_TD_DTYPE) + copy = False + + elif is_datetime64_dtype(data): + # GH#23539 + warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is " + "deprecated, will raise a TypeError in a future " + "version", + FutureWarning, stacklevel=3) + data = ensure_int64(data).view(_TD_DTYPE) + + else: + raise TypeError("dtype {dtype} cannot be converted to timedelta64[ns]" + .format(dtype=data.dtype)) + + data = np.array(data, copy=copy) + assert data.dtype == 'm8[ns]', data + return data, inferred_freq + + +def ints_to_td64ns(data, unit="ns"): + """ + Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating + the integers as multiples of the given timedelta unit. + + Parameters + ---------- + data : np.ndarray with integer-dtype + unit : str, default "ns" + + Returns + ------- + ndarray[timedelta64[ns]] + bool : whether a copy was made + """ + copy_made = False + unit = unit if unit is not None else "ns" + + if data.dtype != np.int64: + # converting to int64 makes a copy, so we can avoid + # re-copying later + data = data.astype(np.int64) + copy_made = True + + if unit != "ns": + dtype_str = "timedelta64[{unit}]".format(unit=unit) + data = data.view(dtype_str) + + # TODO: watch out for overflows when converting from lower-resolution + data = data.astype("timedelta64[ns]") + # the astype conversion makes a copy, so we can avoid re-copying later + copy_made = True + + else: + data = data.view("timedelta64[ns]") + + return data, copy_made + + +def objects_to_td64ns(data, unit="ns", errors="raise"): + """ + Convert a object-dtyped or string-dtyped array into an + timedelta64[ns]-dtyped array. + + Parameters + ---------- + data : ndarray or Index + unit : str, default "ns" + errors : {"raise", "coerce", "ignore"}, default "raise" + + Returns + ------- + ndarray[timedelta64[ns]] + + Raises + ------ + ValueError : data cannot be converted to timedelta64[ns] + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + # coerce Index to np.ndarray, converting string-dtype if necessary + values = np.array(data, dtype=np.object_, copy=False) + + result = array_to_timedelta64(values, + unit=unit, errors=errors) + return result.view('timedelta64[ns]') + + def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8da0672559006..c82cff19573e3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -239,6 +239,21 @@ def __new__(cls, data=None, dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None, verify_integrity=True): + if data is None: + # TODO: Remove this block and associated kwargs; GH#20535 + result = cls._generate_range(start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + result.name = name + return result + + if is_scalar(data): + raise TypeError("{cls}() must be called with a " + "collection of some kind, {data} was passed" + .format(cls=cls.__name__, data=repr(data))) + + # - Cases checked above all return/raise before reaching here - # + # This allows to later ensure that the 'copy' parameter is honored: if isinstance(data, Index): ref_to_data = data._data @@ -253,20 +268,8 @@ def __new__(cls, data=None, # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if data is None: - # TODO: Remove this block and associated kwargs; GH#20535 - result = cls._generate_range(start, end, periods, - freq=freq, tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) - result.name = name - return result - if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): - if is_scalar(data): - raise ValueError('DatetimeIndex() must be called with a ' - 'collection of some kind, %s was passed' - % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) @@ -328,9 +331,7 @@ def __new__(cls, data=None, cls._validate_frequency(subarr, freq, ambiguous=ambiguous) if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) + subarr.freq = to_offset(subarr.inferred_freq) return subarr._deepcopy_if_needed(ref_to_data, copy) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5b077a6984114..35e17c7400892 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -15,7 +15,8 @@ from pandas.core.dtypes.missing import isna from pandas.core.arrays.timedeltas import ( - TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) + TimedeltaArrayMixin, _is_convertible_to_td, _to_m8, + sequence_to_td64ns) from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index @@ -33,10 +34,9 @@ TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op, wrap_array_method, wrap_field_accessor) from pandas.core.tools.timedeltas import ( - to_timedelta, _coerce_scalar_to_timedelta_type) + _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, join as libjoin, Timedelta, NaT) -from pandas._libs.tslibs.timedeltas import array_to_timedelta64 class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, @@ -139,12 +139,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): - if isinstance(data, TimedeltaIndex) and freq is None and name is None: - if copy: - return data.copy() - else: - return data._shallow_copy() - freq, freq_infer = dtl.maybe_infer_freq(freq) if data is None: @@ -154,32 +148,31 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, result.name = name return result - if unit is not None: - data = to_timedelta(data, unit=unit, box=False) - if is_scalar(data): - raise ValueError('TimedeltaIndex() must be called with a ' - 'collection of some kind, {data} was passed' - .format(data=repr(data))) - - # convert if not already - if getattr(data, 'dtype', None) != _TD_DTYPE: - data = to_timedelta(data, unit=unit, box=False) - elif copy: - data = np.array(data, copy=True) - - data = np.array(data, copy=False) - if data.dtype == np.object_: - data = array_to_timedelta64(data) - if data.dtype != _TD_DTYPE: - if is_timedelta64_dtype(data): - # non-nano unit - # TODO: watch out for overflows - data = data.astype(_TD_DTYPE) + raise TypeError('{cls}() must be called with a ' + 'collection of some kind, {data} was passed' + .format(cls=cls.__name__, data=repr(data))) + + if isinstance(data, TimedeltaIndex) and freq is None and name is None: + if copy: + return data.copy() else: - data = ensure_int64(data).view(_TD_DTYPE) + return data._shallow_copy() - assert data.dtype == 'm8[ns]', data.dtype + # - Cases checked above all return/raise before reaching here - # + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) + if inferred_freq is not None: + if freq is not None and freq != inferred_freq: + raise ValueError('Inferred frequency {inferred} from passed ' + 'values does not conform to passed frequency ' + '{passed}' + .format(inferred=inferred_freq, + passed=freq.freqstr)) + elif freq_infer: + freq = inferred_freq + freq_infer = False + verify_integrity = False subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs @@ -188,9 +181,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, cls._validate_frequency(subarr, freq) if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) + subarr.freq = to_offset(subarr.inferred_freq) return subarr diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 220b14a9cb7c6..fad136b3b5a45 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -6,16 +6,13 @@ import pandas as pd from pandas._libs import tslibs from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, - array_to_timedelta64, parse_timedelta_unit) -from pandas.core.dtypes.common import ( - ensure_object, - is_integer_dtype, - is_timedelta64_dtype, - is_list_like) +from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass +from pandas.core.arrays.timedeltas import sequence_to_td64ns + def to_timedelta(arg, unit='ns', box=True, errors='raise'): """ @@ -129,31 +126,27 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'): - arg = np.array(list(arg), dtype='O') - - # these are shortcut-able - if is_timedelta64_dtype(arg): - value = arg.astype('timedelta64[ns]') - elif is_integer_dtype(arg): - value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype( - 'timedelta64[ns]', copy=False) - else: - try: - value = array_to_timedelta64(ensure_object(arg), - unit=unit, errors=errors) - value = value.astype('timedelta64[ns]', copy=False) - except ValueError: - if errors == 'ignore': - return arg - else: - # This else-block accounts for the cases when errors='raise' - # and errors='coerce'. If errors == 'raise', these errors - # should be raised. If errors == 'coerce', we shouldn't - # expect any errors to be raised, since all parsing errors - # cause coercion to pd.NaT. However, if an error / bug is - # introduced that causes an Exception to be raised, we would - # like to surface it. - raise + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator + arg = np.array(list(arg), dtype=object) + + try: + value = sequence_to_td64ns(arg, unit=unit, + errors=errors, copy=False)[0] + except ValueError: + if errors == 'ignore': + return arg + else: + # This else-block accounts for the cases when errors='raise' + # and errors='coerce'. If errors == 'raise', these errors + # should be raised. If errors == 'coerce', we shouldn't + # expect any errors to be raised, since all parsing errors + # cause coercion to pd.NaT. However, if an error / bug is + # introduced that causes an Exception to be raised, we would + # like to surface it. + raise if box: from pandas import TimedeltaIndex diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f92a772f3eaad..50c0e9564e02d 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1054,11 +1054,11 @@ def test_tdi_mul_float_series(self, box_df_fail): idx = tm.box_expected(idx, box) rng5f = np.arange(5, dtype='float64') - expected = TimedeltaIndex(rng5f * (rng5f + 0.1)) + expected = TimedeltaIndex(rng5f * (rng5f + 1.0)) box2 = pd.Series if box is pd.Index else box expected = tm.box_expected(expected, box2) - result = idx * Series(rng5f + 0.1) + result = idx * Series(rng5f + 1.0) tm.assert_equal(result, expected) # TODO: Put Series/DataFrame in others? diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 04b2c4f280588..42a75f277faa6 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -318,7 +318,8 @@ def test_constructor_coverage(self): pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', end='1/10/2000') - pytest.raises(ValueError, DatetimeIndex, '1/1/2000') + with pytest.raises(TypeError): + DatetimeIndex('1/1/2000') # generator expression gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index a03698c9ea0de..82337ac37fbee 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -453,10 +453,16 @@ def test_timedelta_ops_with_missing_values(self): # setup s1 = pd.to_timedelta(Series(['00:00:01'])) s2 = pd.to_timedelta(Series(['00:00:02'])) - sn = pd.to_timedelta(Series([pd.NaT])) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # Passing datetime64-dtype data to TimedeltaIndex is deprecated + sn = pd.to_timedelta(Series([pd.NaT])) + df1 = pd.DataFrame(['00:00:01']).apply(pd.to_timedelta) df2 = pd.DataFrame(['00:00:02']).apply(pd.to_timedelta) - dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # Passing datetime64-dtype data to TimedeltaIndex is deprecated + dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + scalar1 = pd.to_timedelta('00:00:01') scalar2 = pd.to_timedelta('00:00:02') timedelta_NaT = pd.to_timedelta('NaT') diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 1abda624777c8..074c8904b55b1 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -5,11 +5,81 @@ import pandas as pd import pandas.util.testing as tm -from pandas import TimedeltaIndex, timedelta_range, to_timedelta +from pandas import TimedeltaIndex, timedelta_range, to_timedelta, Timedelta class TestTimedeltaIndex(object): + def test_int64_nocopy(self): + # GH#23539 check that a copy isn't made when we pass int64 data + # and copy=False + arr = np.arange(10, dtype=np.int64) + tdi = TimedeltaIndex(arr, copy=False) + assert tdi._data.base is arr + + def test_infer_from_tdi(self): + # GH#23539 + # fast-path for inferring a frequency if the passed data already + # has one + tdi = pd.timedelta_range('1 second', periods=10**7, freq='1s') + + result = pd.TimedeltaIndex(tdi, freq='infer') + assert result.freq == tdi.freq + + # check that inferred_freq was not called by checking that the + # value has not been cached + assert "inferred_freq" not in getattr(result, "_cache", {}) + + def test_infer_from_tdi_mismatch(self): + # GH#23539 + # fast-path for invalidating a frequency if the passed data already + # has one and it does not match the `freq` input + tdi = pd.timedelta_range('1 second', periods=100, freq='1s') + + msg = ("Inferred frequency .* from passed values does " + "not conform to passed frequency") + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(tdi, freq='D') + + def test_dt64_data_invalid(self): + # GH#23539 + # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] + # does not yet, but will in the future + dti = pd.date_range('2016-01-01', periods=3) + + msg = "cannot be converted to timedelta64" + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti.tz_localize('Europe/Brussels')) + + with tm.assert_produces_warning(FutureWarning): + TimedeltaIndex(dti) + + with tm.assert_produces_warning(FutureWarning): + TimedeltaIndex(np.asarray(dti)) + + def test_float64_ns_rounded(self): + # GH#23539 without specifying a unit, floats are regarded as nanos, + # and fractional portions are truncated + tdi = TimedeltaIndex([2.3, 9.7]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # integral floats are non-lossy + tdi = TimedeltaIndex([2.0, 9.0]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # NaNs get converted to NaT + tdi = TimedeltaIndex([2.0, np.nan]) + expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT]) + tm.assert_index_equal(tdi, expected) + + def test_float64_unit_conversion(self): + # GH#23539 + tdi = TimedeltaIndex([1.5, 2.25], unit='D') + expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) + tm.assert_index_equal(tdi, expected) + def test_construction_base_constructor(self): arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) @@ -63,7 +133,8 @@ def test_constructor_coverage(self): pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') - pytest.raises(ValueError, TimedeltaIndex, '1 days') + with pytest.raises(TypeError): + TimedeltaIndex('1 days') # generator expression gen = (timedelta(i) for i in range(10)) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 2fc0a49d789fd..989955c0d7ee7 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -54,8 +54,7 @@ def test_minmax(self): assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - td = TimedeltaIndex(np.asarray(dr)) + td = timedelta_range('16815 days', '16820 days', freq='D') assert np.min(td) == Timedelta('16815 days') assert np.max(td) == Timedelta('16820 days') diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 65709b0eebaf7..79fa49b564ad6 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -506,6 +506,9 @@ def test_td_rfloordiv_numeric_series(self): # TODO: GH-19761. Change to TypeError. ser // td + # ---------------------------------------------------------------- + # Timedelta.__mod__, __rmod__ + def test_mod_timedeltalike(self): # GH#19365 td = Timedelta(hours=37) @@ -545,9 +548,6 @@ def test_mod_offset(self): assert isinstance(result, Timedelta) assert result == Timedelta(hours=2) - # ---------------------------------------------------------------- - # Timedelta.__mod__, __rmod__ - def test_mod_numeric(self): # GH#19365 td = Timedelta(hours=37) diff --git a/ci/requirements-optional-pip.txt b/requirements-dev.txt similarity index 63% rename from ci/requirements-optional-pip.txt rename to requirements-dev.txt index 62f1c555d8544..93145d948c218 100644 --- a/ci/requirements-optional-pip.txt +++ b/requirements-dev.txt @@ -1,5 +1,17 @@ -# This file was autogenerated by scripts/convert_deps.py -# Do not modify directly +NumPy +python-dateutil>=2.5.0 +pytz +Cython>=0.28.2 +flake8 +flake8-comprehensions +flake8-rst +hypothesis>=3.58.0 +isort +moto +pytest>=3.6 +setuptools>=24.2.0 +sphinx +sphinxcontrib-spelling beautifulsoup4>=4.2.1 blosc bottleneck>=1.2.0 diff --git a/scripts/convert_deps.py b/scripts/convert_deps.py deleted file mode 100755 index 3ff157e0a0d7b..0000000000000 --- a/scripts/convert_deps.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Convert the conda environment.yaml to a pip requirements.txt -""" -import re -import yaml - -exclude = {'python=3'} -rename = {'pytables': 'tables'} - -with open("ci/environment-dev.yaml") as f: - dev = yaml.load(f) - -with open("ci/requirements-optional-conda.txt") as f: - optional = [x.strip() for x in f.readlines()] - -required = dev['dependencies'] -required = [rename.get(dep, dep) for dep in required if dep not in exclude] -optional = [rename.get(dep, dep) for dep in optional if dep not in exclude] -optional = [re.sub("(?<=[^<>])=", '==', dep) for dep in optional] - - -with open("ci/requirements_dev.txt", 'wt') as f: - f.write("# This file was autogenerated by scripts/convert_deps.py\n") - f.write("# Do not modify directly\n") - f.write('\n'.join(required)) - - -with open("ci/requirements-optional-pip.txt", 'wt') as f: - f.write("# This file was autogenerated by scripts/convert_deps.py\n") - f.write("# Do not modify directly\n") - f.write("\n".join(optional)) diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py new file mode 100755 index 0000000000000..2474214a4a53b --- /dev/null +++ b/scripts/generate_pip_deps_from_conda.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +""" +Convert the conda environment.yml to the pip requirements-dev.txt, +or check that they have the same packages (for the CI) + +Usage: + + Generate `requirements-dev.txt` + $ ./conda_to_pip + + Compare and fail (exit status != 0) if `requirements-dev.txt` has not been + generated with this script: + $ ./conda_to_pip --compare +""" +import argparse +import os +import re +import sys +import yaml + + +EXCLUDE = {'python=3'} +RENAME = {'pytables': 'tables'} + + +def conda_package_to_pip(package): + """ + Convert a conda package to its pip equivalent. + + In most cases they are the same, those are the exceptions: + - Packages that should be excluded (in `EXCLUDE`) + - Packages that should be renamed (in `RENAME`) + - A package requiring a specific version, in conda is defined with a single + equal (e.g. ``pandas=1.0``) and in pip with two (e.g. ``pandas==1.0``) + """ + if package in EXCLUDE: + return + + if package in RENAME: + return RENAME[package] + + return re.sub('(?<=[^<>])=', '==', package).strip() + + +def main(conda_fname, pip_fname, compare=False): + """ + Generate the pip dependencies file from the conda file, or compare that + they are synchronized (``compare=True``). + + Parameters + ---------- + conda_fname : str + Path to the conda file with dependencies (e.g. `environment.yml`). + pip_fname : str + Path to the pip file with dependencies (e.g. `requirements-dev.txt`). + compare : bool, default False + Whether to generate the pip file (``False``) or to compare if the + pip file has been generated with this script and the last version + of the conda file (``True``). + + Returns + ------- + bool + True if the comparison fails, False otherwise + """ + with open(conda_fname) as conda_fd: + deps = yaml.safe_load(conda_fd)['dependencies'] + + pip_content = '\n'.join(filter(None, map(conda_package_to_pip, deps))) + + if compare: + with open(pip_fname) as pip_fd: + return pip_content != pip_fd.read() + else: + with open(pip_fname, 'w') as pip_fd: + pip_fd.write(pip_content) + return False + + +if __name__ == '__main__': + argparser = argparse.ArgumentParser( + description='convert (or compare) conda file to pip') + argparser.add_argument('--compare', + action='store_true', + help='compare whether the two files are equivalent') + args = argparser.parse_args() + + repo_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) + res = main(os.path.join(repo_path, 'environment.yml'), + os.path.join(repo_path, 'requirements-dev.txt'), + compare=args.compare) + if res: + sys.stderr.write('`requirements-dev.txt` has to be generated with ' + '`{}` after `environment.yml` is modified.\n'.format( + sys.argv[0])) + sys.exit(res)