Merge tag 'v0.20.2' into releases

Version 0.20.2 * tag 'v0.20.2': (68 commits) RLS: v0.20.2 DOC: Update release.rst DOC: Whatsnew fixups (pandas-dev#16596) ERRR: Raise error in usecols when column doesn't exist but length matches (pandas-dev#16460) BUG: convert numpy strings in index names in HDF pandas-dev#13492 (pandas-dev#16444) PERF: vectorize _interp_limit (pandas-dev#16592) DOC: whatsnew 0.20.2 edits (pandas-dev#16587) API: Make is_strictly_monotonic_* private (pandas-dev#16576) BUG: reimplement MultiIndex.remove_unused_levels (pandas-dev#16565) Strictly monotonic (pandas-dev#16555) ENH: add .ngroup() method to groupby objects (pandas-dev#14026) (pandas-dev#14026) fix linting BUG: Incorrect handling of rolling.cov with offset window (pandas-dev#16244) BUG: select_as_multiple doesn't respect start/stop kwargs GH16209 (pandas-dev#16317) return empty MultiIndex for symmetrical difference on equal MultiIndexes (pandas-dev#16486) BUG: Bug in .resample() and .groupby() when aggregating on integers (pandas-dev#16549) BUG: Fixed tput output on windows (pandas-dev#16496) Strictly monotonic (pandas-dev#16555) BUG: fixed wrong order of ordered labels in pd.cut() BUG: Fixed to_html ignoring index_names parameter ...
neurodebian · Jul 10, 2017 · 483706d · 483706d
2 parents b3f6bc7 + 2814061
commit 483706d
Show file tree

Hide file tree

Showing 116 changed files with 2,456 additions and 574 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -123,7 +123,7 @@ after_success:
 
 after_script:
   - echo "after_script start"
-  - source activate pandas && python -c "import pandas; pandas.show_versions();"
+  - source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
   - if [ -e /tmp/single.xml ]; then
     ci/print_skipped.py /tmp/single.xml;
     fi

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -368,6 +368,11 @@ def setup(self):
         self.dates = (np.datetime64('now') + self.offsets)
         self.df = DataFrame({'key1': np.random.randint(0, 500, size=self.n), 'key2': np.random.randint(0, 100, size=self.n), 'value1': np.random.randn(self.n), 'value2': np.random.randn(self.n), 'value3': np.random.randn(self.n), 'dates': self.dates, })
 
+        N = 1000000
+        self.draws = pd.Series(np.random.randn(N))
+        labels = pd.Series(['foo', 'bar', 'baz', 'qux'] * (N // 4))
+        self.cats = labels.astype('category')
+
     def time_groupby_multi_size(self):
         self.df.groupby(['key1', 'key2']).size()
 
@@ -377,6 +382,10 @@ def time_groupby_dt_size(self):
     def time_groupby_dt_timegrouper_size(self):
         self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
 
+    def time_groupby_size(self):
+        self.draws.groupby(self.cats).size()
+
+
 
 #----------------------------------------------------------------------
 # groupby with a variable value for ngroups

diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -19,6 +19,9 @@ def time_getitem_list_like(self):
     def time_getitem_array(self):
         self.s[np.arange(10000)]
 
+    def time_getitem_lists(self):
+        self.s[np.arange(10000).tolist()]
+
     def time_iloc_array(self):
         self.s.iloc[np.arange(10000)]
 
@@ -190,10 +193,22 @@ def setup(self):
              np.arange(1000)], names=['one', 'two'])
 
         import string
-        self.mistring = MultiIndex.from_product(
-            [np.arange(1000),
-             np.arange(20), list(string.ascii_letters)],
+
+        self.mi_large = MultiIndex.from_product(
+            [np.arange(1000), np.arange(20), list(string.ascii_letters)],
             names=['one', 'two', 'three'])
+        self.mi_med = MultiIndex.from_product(
+            [np.arange(1000), np.arange(10), list('A')],
+            names=['one', 'two', 'three'])
+        self.mi_small = MultiIndex.from_product(
+            [np.arange(100), list('A'), list('A')],
+            names=['one', 'two', 'three'])
+
+        rng = np.random.RandomState(4)
+        size = 1 << 16
+        self.mi_unused_levels = pd.MultiIndex.from_arrays([
+            rng.randint(0, 1 << 13, size),
+            rng.randint(0, 1 << 10, size)])[rng.rand(size) < 0.1]
 
     def time_series_xs_mi_ix(self):
         self.s.ix[999]
@@ -215,12 +230,33 @@ def time_multiindex_get_indexer(self):
                       (0, 16), (0, 17), (0, 18),
                       (0, 19)], dtype=object))
 
+    def time_multiindex_large_get_loc(self):
+        self.mi_large.get_loc((999, 19, 'Z'))
+
+    def time_multiindex_large_get_loc_warm(self):
+        for _ in range(1000):
+            self.mi_large.get_loc((999, 19, 'Z'))
+
+    def time_multiindex_med_get_loc(self):
+        self.mi_med.get_loc((999, 9, 'A'))
+
+    def time_multiindex_med_get_loc_warm(self):
+        for _ in range(1000):
+            self.mi_med.get_loc((999, 9, 'A'))
+
     def time_multiindex_string_get_loc(self):
-        self.mistring.get_loc((999, 19, 'Z'))
+        self.mi_small.get_loc((99, 'A', 'A'))
+
+    def time_multiindex_small_get_loc_warm(self):
+        for _ in range(1000):
+            self.mi_small.get_loc((99, 'A', 'A'))
 
     def time_is_monotonic(self):
         self.miint.is_monotonic
 
+    def time_remove_unused_levels(self):
+        self.mi_unused_levels.remove_unused_levels()
+
 
 class IntervalIndexing(object):
     goal_time = 0.2

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
@@ -111,6 +111,7 @@ def setup(self):
     def time_series_dropna_int64(self):
         self.s.dropna()
 
+
 class series_dropna_datetime(object):
     goal_time = 0.2
 
@@ -120,3 +121,13 @@ def setup(self):
 
     def time_series_dropna_datetime(self):
         self.s.dropna()
+
+
+class series_clip(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.s = pd.Series(np.random.randn(50))
+
+    def time_series_dropna_datetime(self):
+        self.s.clip(0, 1)
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
@@ -119,15 +119,7 @@ if [ "$COVERAGE" ]; then
 fi
 
 echo
-if [ "$BUILD_TEST" ]; then
-
-    # build & install testing
-    echo ["Starting installation test."]
-    bash ci/install_release_build.sh
-    conda uninstall -y cython
-    time pip install dist/*tar.gz || exit 1
-
-else
+if [ -z "$BUILD_TEST" ]; then
 
     # build but don't install
     echo "[build em]"
@@ -163,9 +155,22 @@ fi
 # w/o removing anything else
 echo
 echo "[removing installed pandas]"
-conda remove pandas --force
+conda remove pandas -y --force
 
-if [ -z "$BUILD_TEST" ]; then
+if [ "$BUILD_TEST" ]; then
+
+    # remove any installation
+    pip uninstall -y pandas
+    conda list pandas
+    pip list --format columns |grep pandas
+
+    # build & install testing
+    echo ["building release"]
+    bash scripts/build_dist_for_release.sh
+    conda uninstall -y cython
+    time pip install dist/*tar.gz || exit 1
+
+else
 
     # install our pandas
     echo

diff --git a/ci/requirements-3.5_OSX.sh b/ci/requirements-3.5_OSX.sh
@@ -4,4 +4,4 @@ source activate pandas
 
 echo "install 35_OSX"
 
-conda install -n pandas -c conda-forge feather-format
+conda install -n pandas -c conda-forge feather-format==0.3.1
diff --git a/ci/script_multi.sh b/ci/script_multi.sh
@@ -19,20 +19,26 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496
 echo PYTHONHASHSEED=$PYTHONHASHSEED
 
 if [ "$BUILD_TEST" ]; then
-    echo "build-test"
+    echo "[build-test]"
+
+    echo "[env]"
+    pip list --format columns |grep pandas
+
+    echo "[running]"
     cd /tmp
-    pwd
-    conda list pandas
-    echo "running"
-    python -c "import pandas; pandas.test(['-n 2'])"
+    unset PYTHONPATH
+    python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'
+
 elif [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
+
 elif [ "$COVERAGE" ]; then
     echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
     pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
+
 else
-    echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
-    pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
+    echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
+    pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
 fi
 
 RET="$?"

diff --git a/ci/script_single.sh b/ci/script_single.sh
@@ -20,8 +20,8 @@ elif [ "$COVERAGE" ]; then
     echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
     pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
 else
-    echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas
-    pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
+    echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    pytest -m "single" -r xX  --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
 fi
 
 RET="$?"

diff --git a/doc/make.py b/doc/make.py
@@ -34,47 +34,60 @@
 SPHINX_BUILD = 'sphinxbuild'
 
 
-def upload_dev(user='pandas'):
+def _process_user(user):
+    if user is None or user is False:
+        user = ''
+    else:
+        user = user + '@'
+    return user
+
+
+def upload_dev(user=None):
     'push a copy to the pydata dev directory'
-    if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)):
         raise SystemExit('Upload to Pydata Dev failed')
 
 
-def upload_dev_pdf(user='pandas'):
+def upload_dev_pdf(user=None):
     'push a copy to the pydata dev directory'
-    if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)):
         raise SystemExit('PDF upload to Pydata Dev failed')
 
 
-def upload_stable(user='pandas'):
+def upload_stable(user=None):
     'push a copy to the pydata stable directory'
-    if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)):
         raise SystemExit('Upload to stable failed')
 
 
-def upload_stable_pdf(user='pandas'):
+def upload_stable_pdf(user=None):
     'push a copy to the pydata dev directory'
-    if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
+    user = _process_user(user)
+    if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
                  ':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)):
         raise SystemExit('PDF upload to stable failed')
 
 
-def upload_prev(ver, doc_root='./', user='pandas'):
+def upload_prev(ver, doc_root='./', user=None):
     'push a copy of older release to appropriate version directory'
+    user = _process_user(user)
     local_dir = doc_root + 'build/html'
     remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver
-    cmd = 'cd %s; rsync -avz . %s@pandas.pydata.org:%s -essh'
+    cmd = 'cd %s; rsync -avz . %spandas.pydata.org:%s -essh'
     cmd = cmd % (local_dir, user, remote_dir)
     print(cmd)
     if os.system(cmd):
         raise SystemExit(
             'Upload to %s from %s failed' % (remote_dir, local_dir))
 
     local_dir = doc_root + 'build/latex'
-    pdf_cmd = 'cd %s; scp pandas.pdf %s@pandas.pydata.org:%s'
+    pdf_cmd = 'cd %s; scp pandas.pdf %spandas.pydata.org:%s'
     pdf_cmd = pdf_cmd % (local_dir, user, remote_dir)
     if os.system(pdf_cmd):
         raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root))

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
@@ -948,6 +948,16 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
     In [11]: df.loc[2:3, :]
     KeyError: 'Cannot get right slice bound for non-unique label: 3'
 
+:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
+an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
+:meth:`Index.is_unique`
+
+.. ipython:: python
+
+   weakly_monotonic = pd.Index(['a', 'b', 'c', 'c'])
+   weakly_monotonic
+   weakly_monotonic.is_monotonic_increasing
+   weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique
 
 Endpoints are inclusive
 ~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -724,6 +724,7 @@ Serialization / IO / Conversion
    Series.to_dense
    Series.to_string
    Series.to_clipboard
+   Series.to_latex
 
 Sparse
 ~~~~~~
@@ -1704,6 +1705,7 @@ Computations / Descriptive Stats
    GroupBy.mean
    GroupBy.median
    GroupBy.min
+   GroupBy.ngroup
    GroupBy.nth
    GroupBy.ohlc
    GroupBy.prod

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
@@ -453,6 +453,14 @@ the original values:
 
     np.asarray(cat) > base
 
+When you compare two unordered categoricals with the same categories, the order is not considered:
+
+.. ipython:: python
+
+   c1 = pd.Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
+   c2 = pd.Categorical(['a', 'b'], categories=['b', 'a'], ordered=False)
+   c1 == c2
+
 Operations
 ----------