Merge remote-tracking branch 'upstream/master' into ea-array-protocol

pandas-dev · Nov 16, 2018 · bade9e4 · bade9e4
2 parents 9506fef + e98032d
commit bade9e4
Show file tree

Hide file tree

Showing 421 changed files with 8,574 additions and 5,550 deletions.
diff --git a/.gitignore b/.gitignore
@@ -109,6 +109,5 @@ doc/build/html/index.html
 # Windows specific leftover:
 doc/tmp.sv
 doc/source/styled.xlsx
-doc/source/templates/
 env/
 doc/source/savefig/
diff --git a/.travis.yml b/.travis.yml
@@ -23,7 +23,7 @@ env:
 
 git:
     # for cloning
-    depth: 1000
+    depth: 1500
 
 matrix:
     fast_finish: true

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -5,7 +5,7 @@ if [ "${TRAVIS_OS_NAME}" != "linux" ]; then
    exit 0
 fi
 
-cd "$TRAVIS_BUILD_DIR"
+cd "$TRAVIS_BUILD_DIR"/doc
 echo "inside $0"
 
 if [ "$DOC" ]; then
@@ -14,10 +14,6 @@ if [ "$DOC" ]; then
 
     source activate pandas
 
-    mv "$TRAVIS_BUILD_DIR"/doc /tmp
-    mv "$TRAVIS_BUILD_DIR/LICENSE" /tmp  # included in the docs.
-    cd /tmp/doc
-
     echo ###############################
     echo # Log file for the doc build  #
     echo ###############################
@@ -29,7 +25,7 @@ if [ "$DOC" ]; then
     echo # Create and send docs #
     echo ########################
 
-    cd /tmp/doc/build/html
+    cd build/html
     git config --global user.email "pandas-docs-bot@localhost.foo"
     git config --global user.name "pandas-docs-bot"
 

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -9,16 +9,19 @@
 # In the future we may want to add the validation of docstrings and other checks here.
 #
 # Usage:
-#   $ ./ci/code_checks.sh             # run all checks
-#   $ ./ci/code_checks.sh lint        # run linting only
-#   $ ./ci/code_checks.sh patterns    # check for patterns that should not exist
-#   $ ./ci/code_checks.sh doctests    # run doctests
+#   $ ./ci/code_checks.sh               # run all checks
+#   $ ./ci/code_checks.sh lint          # run linting only
+#   $ ./ci/code_checks.sh patterns      # check for patterns that should not exist
+#   $ ./ci/code_checks.sh doctests      # run doctests
+#   $ ./ci/code_checks.sh dependencies  # check that dependencies are consistent
 
 echo "inside $0"
 [[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; }
-[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; }
+[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" || "$1" == "dependencies"  ]] \
+    || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests|dependencies]"; exit 9999; }
 
 source activate pandas
+BASE_DIR="$(dirname $0)/.."
 RET=0
 CHECK=$1
 
@@ -44,6 +47,13 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    echo "flake8-rst --version"
+    flake8-rst --version
+
+    MSG='Linting code-blocks in .rst documentation' ; echo $MSG
+    flake8-rst doc/source --filename=*.rst
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
     # it doesn't make a difference, but we want to be internally consistent.
     # Note: this grep pattern is (intended to be) equivalent to the python
@@ -64,6 +74,9 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    echo "isort --version-number"
+    isort --version-number
+
     # Imports - Check formatting using isort see setup.cfg for settings
     MSG='Check import format using isort ' ; echo $MSG
     isort --recursive --check-only pandas
@@ -109,6 +122,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     ! grep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG
+    ! grep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Check for modules that pandas should not import' ; echo $MSG
     python -c "
 import sys
@@ -141,7 +158,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
     MSG='Doctests generic.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs"
+        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests top-level reshaping functions' ; echo $MSG
@@ -162,4 +179,11 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
 fi
 
+### DEPENDENCIES ###
+if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then
+    MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG
+    $BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+fi
+
 exit $RET
diff --git a/ci/deps/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml
@@ -8,10 +8,11 @@ dependencies:
   - bottleneck
   - cython>=0.28.2
   - fastparquet
+  - gitpython
   - html5lib
   - hypothesis>=3.58.0
   - ipykernel
-  - ipython==6.5.0
+  - ipython
   - ipywidgets
   - lxml
   - matplotlib

diff --git a/ci/deps/travis-36.yaml b/ci/deps/travis-36.yaml
@@ -9,6 +9,7 @@ dependencies:
   - fastparquet
   - flake8>=3.5
   - flake8-comprehensions
+  - flake8-rst=0.4.2
   - gcsfs
   - geopandas
   - html5lib

diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
diff --git a/doc/make.py b/doc/make.py
@@ -126,7 +126,12 @@ def _process_single_doc(self, single_doc):
             self.single_doc = 'api'
         elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
             self.single_doc_type = 'rst'
-            self.single_doc = os.path.splitext(os.path.basename(single_doc))[0]
+
+            if 'whatsnew' in single_doc:
+                basename = single_doc
+            else:
+                basename = os.path.basename(single_doc)
+            self.single_doc = os.path.splitext(basename)[0]
         elif os.path.exists(
                 os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
             self.single_doc_type = 'rst'

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -45,7 +45,7 @@ a default integer index:
 
 .. ipython:: python
 
-   s = pd.Series([1,3,5,np.nan,6,8])
+   s = pd.Series([1, 3, 5, np.nan, 6, 8])
    s
 
 Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index
@@ -62,12 +62,12 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
 
 .. ipython:: python
 
-   df2 = pd.DataFrame({ 'A' : 1.,
-                        'B' : pd.Timestamp('20130102'),
-                        'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
-                        'D' : np.array([3] * 4,dtype='int32'),
-                        'E' : pd.Categorical(["test","train","test","train"]),
-                        'F' : 'foo' })
+   df2 = pd.DataFrame({'A': 1.,
+                       'B': pd.Timestamp('20130102'),
+                       'C': pd.Series(1, index=list(range(4)),dtype='float32'),
+                       'D': np.array([3] * 4, dtype='int32'),
+                       'E': pd.Categorical(["test", "train", "test", "train"]),
+                       'F': 'foo'})
    df2
 
 The columns of the resulting ``DataFrame`` have different 
@@ -283,9 +283,9 @@ Using the :func:`~Series.isin` method for filtering:
 .. ipython:: python
 
    df2 = df.copy()
-   df2['E'] = ['one', 'one','two','three','four','three']
+   df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
    df2
-   df2[df2['E'].isin(['two','four'])]
+   df2[df2['E'].isin(['two', 'four'])]
 
 Setting
 ~~~~~~~
@@ -295,7 +295,7 @@ by the indexes.
 
 .. ipython:: python
 
-   s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
+   s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
    s1
    df['F'] = s1
 
@@ -394,7 +394,7 @@ In addition, pandas automatically broadcasts along the specified dimension.
 
 .. ipython:: python
 
-   s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
+   s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
    s
    df.sub(s, axis='index')
 
@@ -492,7 +492,7 @@ section.
 
 .. ipython:: python
 
-   df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
+   df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
    df
    s = df.iloc[3]
    df.append(s, ignore_index=True)
@@ -512,12 +512,12 @@ See the :ref:`Grouping section <groupby>`.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
-                             'foo', 'bar', 'foo', 'foo'],
-                      'B' : ['one', 'one', 'two', 'three',
-                             'two', 'two', 'one', 'three'],
-                      'C' : np.random.randn(8),
-                      'D' : np.random.randn(8)})
+   df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                            'foo', 'bar', 'foo', 'foo'],
+                      'B': ['one', 'one', 'two', 'three',
+                            'two', 'two', 'one', 'three'],
+                      'C': np.random.randn(8),
+                      'D': np.random.randn(8)})
    df
 
 Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting 
@@ -532,7 +532,7 @@ apply the ``sum`` function.
 
 .. ipython:: python
 
-   df.groupby(['A','B']).sum()
+   df.groupby(['A', 'B']).sum()
 
 Reshaping
 ---------
@@ -578,11 +578,11 @@ See the section on :ref:`Pivot Tables <reshaping.pivot>`.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
-                      'B' : ['A', 'B', 'C'] * 4,
-                      'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
-                      'D' : np.random.randn(12),
-                      'E' : np.random.randn(12)})
+   df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
+                      'B': ['A', 'B', 'C'] * 4,
+                      'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
+                      'D': np.random.randn(12),
+                      'E': np.random.randn(12)})
    df
 
 We can produce pivot tables from this data very easily:
@@ -653,7 +653,7 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the
 
 .. ipython:: python
 
-    df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
+    df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
 
 Convert the raw grades to a categorical data type.
 
@@ -753,13 +753,13 @@ Writing to a HDF5 Store.
 
 .. ipython:: python
 
-   df.to_hdf('foo.h5','df')
+   df.to_hdf('foo.h5', 'df')
 
 Reading from a HDF5 Store.
 
 .. ipython:: python
 
-   pd.read_hdf('foo.h5','df')
+   pd.read_hdf('foo.h5', 'df')
 
 .. ipython:: python
    :suppress:
@@ -796,7 +796,7 @@ If you are attempting to perform an operation you might see an exception like:
 .. code-block:: python
 
     >>> if pd.Series([False, True, False]):
-        print("I was true")
+    ...     print("I was true")
     Traceback
         ...
     ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().