Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into depr_concat_join_…
Browse files Browse the repository at this point in the history
…axes
  • Loading branch information
h-vetinari committed Sep 5, 2018
2 parents d2aa496 + a5fe9cf commit 144ef86
Show file tree
Hide file tree
Showing 46 changed files with 733 additions and 499 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ build: clean_pyc
python setup.py build_ext --inplace

lint-diff:
git diff master --name-only -- "*.py" | grep "pandas" | xargs flake8
git diff master --name-only -- "*.py" | grep -E "pandas|scripts" | xargs flake8

develop: build
-python setup.py develop
Expand Down
7 changes: 6 additions & 1 deletion ci/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ if [ "$LINT" ]; then
if [ $? -ne "0" ]; then
RET=1
fi

flake8 scripts/tests --filename=*.py
if [ $? -ne "0" ]; then
RET=1
fi
echo "Linting *.py DONE"

echo "Linting setup.py"
Expand Down Expand Up @@ -175,7 +180,7 @@ if [ "$LINT" ]; then
RET=1
fi
echo "Check for old-style classes DONE"

echo "Check for backticks incorrectly rendering because of missing spaces"
grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/

Expand Down
2 changes: 2 additions & 0 deletions ci/script_single.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ elif [ "$COVERAGE" ]; then
echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas

echo pytest -s -r xXs --strict scripts
pytest -s -r xXs --strict scripts
else
echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
Expand Down
2 changes: 0 additions & 2 deletions doc/source/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ consider the following ``DataFrame``:

.. note::

.. versionadded:: 0.20

A string passed to ``groupby`` may refer to either a column or an index level.
If a string matches both a column name and an index level name, a
``ValueError`` will be raised.
Expand Down
2 changes: 1 addition & 1 deletion doc/source/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ The same alignment can be used when ``others`` is a ``DataFrame``:
Concatenating a Series and many objects into a Series
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

All one-dimensional list-likes can be arbitrarily combined in a list-like container (including iterators, ``dict``-views, etc.):
All one-dimensional list-likes can be combined in a list-like container (including iterators, ``dict``-views, etc.):

.. ipython:: python
Expand Down
2 changes: 1 addition & 1 deletion doc/source/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2228,7 +2228,7 @@ To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or
didx.tz_convert(None)
# tz_convert(None) is identical with tz_convert('UTC').tz_localize(None)
didx.tz_convert('UCT').tz_localize(None)
didx.tz_convert('UTC').tz_localize(None)
.. _timeseries.timezone_ambiguous:

Expand Down
15 changes: 11 additions & 4 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -507,14 +507,16 @@ Other API Changes
Deprecations
~~~~~~~~~~~~

- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`).
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`)
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
- The signature of :meth:`Series.to_csv` has been uniformed to that of doc:meth:`DataFrame.to_csv`: the name of the first argument is now 'path_or_buf', the order of subsequent arguments has changed, the 'header' argument now defaults to True. (:issue:`19715`)
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result (:issue:`21951`)
- :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)

.. _whatsnew_0240.prior_deprecations:

Expand All @@ -526,6 +528,7 @@ Removal of prior version deprecations/changes
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats``(:issue:`14645`)
-

.. _whatsnew_0240.performance:
Expand Down Expand Up @@ -583,6 +586,7 @@ Datetimelike
- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`)
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise `OverflowError` (:issue:`22492`, :issue:`22508`)

Timedelta
^^^^^^^^^
Expand Down Expand Up @@ -664,6 +668,7 @@ Indexing
- Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`)
- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`)
- Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`)
- Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`)
- ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`)

Missing
Expand Down Expand Up @@ -704,6 +709,7 @@ Groupby/Resample/Rolling
- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
- Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`).
- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to `loffset` kwarg (:issue:`7687`).

Sparse
^^^^^^
Expand Down Expand Up @@ -736,9 +742,10 @@ Build Changes
Other
^^^^^

- :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`)
- :meth:`~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`)
- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`)
- :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`)
- :meth:`~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`)
- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly.
-
-
-
5 changes: 3 additions & 2 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):

Better to do this with Cython because of the enormous speed boost.
"""
cdef Py_ssize_t i, length
cdef dict result = {}
cdef:
Py_ssize_t i, length
dict result = {}

length = len(index)

Expand Down
5 changes: 1 addition & 4 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,7 @@ cdef class IndexEngine:
loc = self.get_loc(key)
value = convert_scalar(arr, value)

if PySlice_Check(loc) or util.is_array(loc):
arr[loc] = value
else:
util.set_value_at(arr, loc, value)
arr[loc] = value

cpdef get_loc(self, object val):
if is_definitely_invalid_key(val):
Expand Down
12 changes: 3 additions & 9 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
if is_datelike and checknull(v):
result[i] = NPY_NAT
else:
# we can use the unsafe version because we know `result` is mutable
# since it was created from `np.empty`
util.set_value_at_unsafe(result, i, v)
result[i] = v

return result

Expand All @@ -505,9 +503,7 @@ cpdef ndarray[object] astype_unicode(ndarray arr):
ndarray[object] result = np.empty(n, dtype=object)

for i in range(n):
# we can use the unsafe version because we know `result` is mutable
# since it was created from `np.empty`
util.set_value_at_unsafe(result, i, unicode(arr[i]))
result[i] = unicode(arr[i])

return result

Expand All @@ -518,9 +514,7 @@ cpdef ndarray[object] astype_str(ndarray arr):
ndarray[object] result = np.empty(n, dtype=object)

for i in range(n):
# we can use the unsafe version because we know `result` is mutable
# since it was created from `np.empty`
util.set_value_at_unsafe(result, i, str(arr[i]))
result[i] = str(arr[i])

return result

Expand Down
41 changes: 22 additions & 19 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cdef extern from "Python.h":

import numpy as np
cimport numpy as cnp
from numpy cimport ndarray, uint8_t, uint64_t, int64_t
from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t
cnp.import_array()

from util cimport UINT64_MAX, INT64_MAX, INT64_MIN
Expand Down Expand Up @@ -694,7 +694,7 @@ cdef class TextReader:
if ptr == NULL:
if not os.path.exists(source):
raise compat.FileNotFoundError(
'File %s does not exist' % source)
'File {source} does not exist'.format(source=source))
raise IOError('Initializing from file failed')

self.parser.source = ptr
Expand Down Expand Up @@ -772,9 +772,10 @@ cdef class TextReader:

if name == '':
if self.has_mi_columns:
name = 'Unnamed: %d_level_%d' % (i, level)
name = ('Unnamed: {i}_level_{lvl}'
.format(i=i, lvl=level))
else:
name = 'Unnamed: %d' % i
name = 'Unnamed: {i}'.format(i=i)
unnamed_count += 1

count = counts.get(name, 0)
Expand Down Expand Up @@ -849,8 +850,8 @@ cdef class TextReader:
# 'data has %d fields'
# % (passed_count, field_count))

if self.has_usecols and self.allow_leading_cols and \
not callable(self.usecols):
if (self.has_usecols and self.allow_leading_cols and
not callable(self.usecols)):
nuse = len(self.usecols)
if nuse == passed_count:
self.leading_cols = 0
Expand Down Expand Up @@ -1027,17 +1028,19 @@ cdef class TextReader:

if self.table_width - self.leading_cols > num_cols:
raise ParserError(
"Too many columns specified: expected %s and found %s" %
(self.table_width - self.leading_cols, num_cols))
"Too many columns specified: expected {expected} and "
"found {found}"
.format(expected=self.table_width - self.leading_cols,
found=num_cols))

results = {}
nused = 0
for i in range(self.table_width):
if i < self.leading_cols:
# Pass through leading columns always
name = i
elif self.usecols and not callable(self.usecols) and \
nused == len(self.usecols):
elif (self.usecols and not callable(self.usecols) and
nused == len(self.usecols)):
# Once we've gathered all requested columns, stop. GH5766
break
else:
Expand Down Expand Up @@ -1103,7 +1106,7 @@ cdef class TextReader:
col_res = _maybe_upcast(col_res)

if col_res is None:
raise ParserError('Unable to parse column %d' % i)
raise ParserError('Unable to parse column {i}'.format(i=i))

results[i] = col_res

Expand Down Expand Up @@ -1222,8 +1225,8 @@ cdef class TextReader:
elif dtype.kind == 'U':
width = dtype.itemsize
if width > 0:
raise TypeError("the dtype %s is not "
"supported for parsing" % dtype)
raise TypeError("the dtype {dtype} is not "
"supported for parsing".format(dtype=dtype))

# unicode variable width
return self._string_convert(i, start, end, na_filter,
Expand All @@ -1241,12 +1244,12 @@ cdef class TextReader:
return self._string_convert(i, start, end, na_filter,
na_hashset)
elif is_datetime64_dtype(dtype):
raise TypeError("the dtype %s is not supported "
raise TypeError("the dtype {dtype} is not supported "
"for parsing, pass this column "
"using parse_dates instead" % dtype)
"using parse_dates instead".format(dtype=dtype))
else:
raise TypeError("the dtype %s is not "
"supported for parsing" % dtype)
raise TypeError("the dtype {dtype} is not "
"supported for parsing".format(dtype=dtype))

cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end,
bint na_filter, kh_str_t *na_hashset):
Expand Down Expand Up @@ -2058,7 +2061,7 @@ cdef kh_float64_t* kset_float64_from_list(values) except NULL:
khiter_t k
kh_float64_t *table
int ret = 0
cnp.float64_t val
float64_t val
object value

table = kh_init_float64()
Expand Down Expand Up @@ -2101,7 +2104,7 @@ cdef raise_parser_error(object base, parser_t *parser):
Py_XDECREF(type)
raise old_exc

message = '%s. C error: ' % base
message = '{base}. C error: '.format(base=base)
if parser.error_msg != NULL:
if PY3:
message += parser.error_msg.decode('utf-8')
Expand Down
7 changes: 2 additions & 5 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ cnp.import_array()
cimport util
from lib import maybe_convert_objects

is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2'


cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):

Expand Down Expand Up @@ -282,8 +280,7 @@ cdef class SeriesBinGrouper:
result = _get_result_array(res,
self.ngroups,
len(self.dummy_arr))

util.assign_value_1d(result, i, res)
result[i] = res

islider.advance(group_size)
vslider.advance(group_size)
Expand Down Expand Up @@ -408,7 +405,7 @@ cdef class SeriesGrouper:
self.ngroups,
len(self.dummy_arr))

util.assign_value_1d(result, lab, res)
result[lab] = res
counts[lab] = group_size
islider.advance(group_size)
vslider.advance(group_size)
Expand Down
31 changes: 0 additions & 31 deletions pandas/_libs/src/numpy_helper.h

This file was deleted.

7 changes: 3 additions & 4 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ from tslibs.nattype cimport checknull_with_nat, NPY_NAT

from tslibs.offsets cimport to_offset

from tslibs.timestamps cimport (create_timestamp_from_ts,
_NS_UPPER_BOUND, _NS_LOWER_BOUND)
from tslibs.timestamps cimport create_timestamp_from_ts
from tslibs.timestamps import Timestamp


Expand Down Expand Up @@ -350,8 +349,8 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
# check the bounds
if not need_to_iterate:

if ((fvalues < _NS_LOWER_BOUND).any()
or (fvalues > _NS_UPPER_BOUND).any()):
if ((fvalues < Timestamp.min.value).any()
or (fvalues > Timestamp.max.value).any()):
raise OutOfBoundsDatetime("cannot convert input with unit "
"'{unit}'".format(unit=unit))
result = (iresult * m).astype('M8[ns]')
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/tslibs/timestamps.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,3 @@ from np_datetime cimport npy_datetimestruct
cdef object create_timestamp_from_ts(int64_t value,
npy_datetimestruct dts,
object tz, object freq)

cdef int64_t _NS_UPPER_BOUND, _NS_LOWER_BOUND
Loading

0 comments on commit 144ef86

Please sign in to comment.