Skip to content

Commit

Permalink
Merge tag 'v0.20.2' into releases
Browse files Browse the repository at this point in the history
Version 0.20.2

* tag 'v0.20.2': (68 commits)
  RLS: v0.20.2
  DOC: Update release.rst
  DOC: Whatsnew fixups (pandas-dev#16596)
  ERRR: Raise error in usecols when column doesn't exist but length matches (pandas-dev#16460)
  BUG: convert numpy strings in index names in HDF pandas-dev#13492 (pandas-dev#16444)
  PERF: vectorize _interp_limit (pandas-dev#16592)
  DOC: whatsnew 0.20.2 edits (pandas-dev#16587)
  API: Make is_strictly_monotonic_* private (pandas-dev#16576)
  BUG: reimplement MultiIndex.remove_unused_levels (pandas-dev#16565)
  Strictly monotonic (pandas-dev#16555)
  ENH: add .ngroup() method to groupby objects (pandas-dev#14026) (pandas-dev#14026)
  fix linting
  BUG: Incorrect handling of rolling.cov with offset window (pandas-dev#16244)
  BUG: select_as_multiple doesn't respect start/stop kwargs GH16209 (pandas-dev#16317)
  return empty MultiIndex for symmetrical difference on equal MultiIndexes (pandas-dev#16486)
  BUG: Bug in .resample() and .groupby() when aggregating on integers (pandas-dev#16549)
  BUG: Fixed tput output on windows (pandas-dev#16496)
  Strictly monotonic (pandas-dev#16555)
  BUG: fixed wrong order of ordered labels in pd.cut()
  BUG: Fixed to_html ignoring index_names parameter
  ...
  • Loading branch information
yarikoptic committed Jul 10, 2017
2 parents b3f6bc7 + 2814061 commit 483706d
Show file tree
Hide file tree
Showing 116 changed files with 2,456 additions and 574 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ after_success:

after_script:
- echo "after_script start"
- source activate pandas && python -c "import pandas; pandas.show_versions();"
- source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
- if [ -e /tmp/single.xml ]; then
ci/print_skipped.py /tmp/single.xml;
fi
Expand Down
9 changes: 9 additions & 0 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,11 @@ def setup(self):
self.dates = (np.datetime64('now') + self.offsets)
self.df = DataFrame({'key1': np.random.randint(0, 500, size=self.n), 'key2': np.random.randint(0, 100, size=self.n), 'value1': np.random.randn(self.n), 'value2': np.random.randn(self.n), 'value3': np.random.randn(self.n), 'dates': self.dates, })

N = 1000000
self.draws = pd.Series(np.random.randn(N))
labels = pd.Series(['foo', 'bar', 'baz', 'qux'] * (N // 4))
self.cats = labels.astype('category')

def time_groupby_multi_size(self):
self.df.groupby(['key1', 'key2']).size()

Expand All @@ -377,6 +382,10 @@ def time_groupby_dt_size(self):
def time_groupby_dt_timegrouper_size(self):
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()

def time_groupby_size(self):
self.draws.groupby(self.cats).size()



#----------------------------------------------------------------------
# groupby with a variable value for ngroups
Expand Down
44 changes: 40 additions & 4 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def time_getitem_list_like(self):
def time_getitem_array(self):
self.s[np.arange(10000)]

def time_getitem_lists(self):
self.s[np.arange(10000).tolist()]

def time_iloc_array(self):
self.s.iloc[np.arange(10000)]

Expand Down Expand Up @@ -190,10 +193,22 @@ def setup(self):
np.arange(1000)], names=['one', 'two'])

import string
self.mistring = MultiIndex.from_product(
[np.arange(1000),
np.arange(20), list(string.ascii_letters)],

self.mi_large = MultiIndex.from_product(
[np.arange(1000), np.arange(20), list(string.ascii_letters)],
names=['one', 'two', 'three'])
self.mi_med = MultiIndex.from_product(
[np.arange(1000), np.arange(10), list('A')],
names=['one', 'two', 'three'])
self.mi_small = MultiIndex.from_product(
[np.arange(100), list('A'), list('A')],
names=['one', 'two', 'three'])

rng = np.random.RandomState(4)
size = 1 << 16
self.mi_unused_levels = pd.MultiIndex.from_arrays([
rng.randint(0, 1 << 13, size),
rng.randint(0, 1 << 10, size)])[rng.rand(size) < 0.1]

def time_series_xs_mi_ix(self):
self.s.ix[999]
Expand All @@ -215,12 +230,33 @@ def time_multiindex_get_indexer(self):
(0, 16), (0, 17), (0, 18),
(0, 19)], dtype=object))

def time_multiindex_large_get_loc(self):
self.mi_large.get_loc((999, 19, 'Z'))

def time_multiindex_large_get_loc_warm(self):
for _ in range(1000):
self.mi_large.get_loc((999, 19, 'Z'))

def time_multiindex_med_get_loc(self):
self.mi_med.get_loc((999, 9, 'A'))

def time_multiindex_med_get_loc_warm(self):
for _ in range(1000):
self.mi_med.get_loc((999, 9, 'A'))

def time_multiindex_string_get_loc(self):
self.mistring.get_loc((999, 19, 'Z'))
self.mi_small.get_loc((99, 'A', 'A'))

def time_multiindex_small_get_loc_warm(self):
for _ in range(1000):
self.mi_small.get_loc((99, 'A', 'A'))

def time_is_monotonic(self):
self.miint.is_monotonic

def time_remove_unused_levels(self):
self.mi_unused_levels.remove_unused_levels()


class IntervalIndexing(object):
goal_time = 0.2
Expand Down
11 changes: 11 additions & 0 deletions asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def setup(self):
def time_series_dropna_int64(self):
self.s.dropna()


class series_dropna_datetime(object):
goal_time = 0.2

Expand All @@ -120,3 +121,13 @@ def setup(self):

def time_series_dropna_datetime(self):
self.s.dropna()


class series_clip(object):
goal_time = 0.2

def setup(self):
self.s = pd.Series(np.random.randn(50))

def time_series_dropna_datetime(self):
self.s.clip(0, 1)
27 changes: 16 additions & 11 deletions ci/install_travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,7 @@ if [ "$COVERAGE" ]; then
fi

echo
if [ "$BUILD_TEST" ]; then

# build & install testing
echo ["Starting installation test."]
bash ci/install_release_build.sh
conda uninstall -y cython
time pip install dist/*tar.gz || exit 1

else
if [ -z "$BUILD_TEST" ]; then

# build but don't install
echo "[build em]"
Expand Down Expand Up @@ -163,9 +155,22 @@ fi
# w/o removing anything else
echo
echo "[removing installed pandas]"
conda remove pandas --force
conda remove pandas -y --force

if [ -z "$BUILD_TEST" ]; then
if [ "$BUILD_TEST" ]; then

# remove any installation
pip uninstall -y pandas
conda list pandas
pip list --format columns |grep pandas

# build & install testing
echo ["building release"]
bash scripts/build_dist_for_release.sh
conda uninstall -y cython
time pip install dist/*tar.gz || exit 1

else

# install our pandas
echo
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-3.5_OSX.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ source activate pandas

echo "install 35_OSX"

conda install -n pandas -c conda-forge feather-format
conda install -n pandas -c conda-forge feather-format==0.3.1
20 changes: 13 additions & 7 deletions ci/script_multi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,26 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496
echo PYTHONHASHSEED=$PYTHONHASHSEED

if [ "$BUILD_TEST" ]; then
echo "build-test"
echo "[build-test]"

echo "[env]"
pip list --format columns |grep pandas

echo "[running]"
cd /tmp
pwd
conda list pandas
echo "running"
python -c "import pandas; pandas.test(['-n 2'])"
unset PYTHONPATH
python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'

elif [ "$DOC" ]; then
echo "We are not running pytest as this is a doc-build"

elif [ "$COVERAGE" ]; then
echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas

else
echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
fi

RET="$?"
Expand Down
4 changes: 2 additions & 2 deletions ci/script_single.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ elif [ "$COVERAGE" ]; then
echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
else
echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas
pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas
pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
fi

RET="$?"
Expand Down
35 changes: 24 additions & 11 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,47 +34,60 @@
SPHINX_BUILD = 'sphinxbuild'


def upload_dev(user='pandas'):
def _process_user(user):
if user is None or user is False:
user = ''
else:
user = user + '@'
return user


def upload_dev(user=None):
'push a copy to the pydata dev directory'
if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
user = _process_user(user)
if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)):
raise SystemExit('Upload to Pydata Dev failed')


def upload_dev_pdf(user='pandas'):
def upload_dev_pdf(user=None):
'push a copy to the pydata dev directory'
if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
user = _process_user(user)
if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)):
raise SystemExit('PDF upload to Pydata Dev failed')


def upload_stable(user='pandas'):
def upload_stable(user=None):
'push a copy to the pydata stable directory'
if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
user = _process_user(user)
if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)):
raise SystemExit('Upload to stable failed')


def upload_stable_pdf(user='pandas'):
def upload_stable_pdf(user=None):
'push a copy to the pydata dev directory'
if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
user = _process_user(user)
if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)):
raise SystemExit('PDF upload to stable failed')


def upload_prev(ver, doc_root='./', user='pandas'):
def upload_prev(ver, doc_root='./', user=None):
'push a copy of older release to appropriate version directory'
user = _process_user(user)
local_dir = doc_root + 'build/html'
remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver
cmd = 'cd %s; rsync -avz . %s@pandas.pydata.org:%s -essh'
cmd = 'cd %s; rsync -avz . %spandas.pydata.org:%s -essh'
cmd = cmd % (local_dir, user, remote_dir)
print(cmd)
if os.system(cmd):
raise SystemExit(
'Upload to %s from %s failed' % (remote_dir, local_dir))

local_dir = doc_root + 'build/latex'
pdf_cmd = 'cd %s; scp pandas.pdf %s@pandas.pydata.org:%s'
pdf_cmd = 'cd %s; scp pandas.pdf %spandas.pydata.org:%s'
pdf_cmd = pdf_cmd % (local_dir, user, remote_dir)
if os.system(pdf_cmd):
raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root))
Expand Down
10 changes: 10 additions & 0 deletions doc/source/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,16 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
In [11]: df.loc[2:3, :]
KeyError: 'Cannot get right slice bound for non-unique label: 3'
:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
:meth:`Index.is_unique`
.. ipython:: python
weakly_monotonic = pd.Index(['a', 'b', 'c', 'c'])
weakly_monotonic
weakly_monotonic.is_monotonic_increasing
weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique
Endpoints are inclusive
~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,7 @@ Serialization / IO / Conversion
Series.to_dense
Series.to_string
Series.to_clipboard
Series.to_latex

Sparse
~~~~~~
Expand Down Expand Up @@ -1704,6 +1705,7 @@ Computations / Descriptive Stats
GroupBy.mean
GroupBy.median
GroupBy.min
GroupBy.ngroup
GroupBy.nth
GroupBy.ohlc
GroupBy.prod
Expand Down
8 changes: 8 additions & 0 deletions doc/source/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,14 @@ the original values:
np.asarray(cat) > base
When you compare two unordered categoricals with the same categories, the order is not considered:

.. ipython:: python
c1 = pd.Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
c2 = pd.Categorical(['a', 'b'], categories=['b', 'a'], ordered=False)
c1 == c2
Operations
----------

Expand Down
Loading

0 comments on commit 483706d

Please sign in to comment.