Merge remote-tracking branch 'upstream/master' into CLN-annonate-eq

pandas-dev · Nov 27, 2019 · 68115de · 68115de
2 parents e69277e + f855025
commit 68115de
Show file tree

Hide file tree

Showing 117 changed files with 2,920 additions and 1,477 deletions.
diff --git a/README.md b/README.md
@@ -164,12 +164,11 @@ pip install pandas
 ```
 
 ## Dependencies
-- [NumPy](https://www.numpy.org): 1.13.3 or higher
-- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
-- [pytz](https://pythonhosted.org/pytz): 2015.4 or higher
+- [NumPy](https://www.numpy.org)
+- [python-dateutil](https://labix.org/python-dateutil)
+- [pytz](https://pythonhosted.org/pytz)
 
-See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies)
-for recommended and optional dependencies.
+See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
 
 ## Installation from sources
 To install pandas from source you need Cython in addition to the normal

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -14,21 +14,6 @@
         pass
 
 
-class Concat:
-    def setup(self):
-        N = 10 ** 5
-        self.s = pd.Series(list("aabbcd") * N).astype("category")
-
-        self.a = pd.Categorical(list("aabbcd") * N)
-        self.b = pd.Categorical(list("bbcdjk") * N)
-
-    def time_concat(self):
-        pd.concat([self.s, self.s])
-
-    def time_union(self):
-        union_categoricals([self.a, self.b])
-
-
 class Constructor:
     def setup(self):
         N = 10 ** 5
@@ -77,6 +62,33 @@ def time_existing_series(self):
         pd.Categorical(self.series)
 
 
+class CategoricalOps:
+    params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
+    param_names = ["op"]
+
+    def setup(self, op):
+        N = 10 ** 5
+        self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
+
+    def time_categorical_op(self, op):
+        getattr(self.cat, op)("b")
+
+
+class Concat:
+    def setup(self):
+        N = 10 ** 5
+        self.s = pd.Series(list("aabbcd") * N).astype("category")
+
+        self.a = pd.Categorical(list("aabbcd") * N)
+        self.b = pd.Categorical(list("bbcdjk") * N)
+
+    def time_concat(self):
+        pd.concat([self.s, self.s])
+
+    def time_union(self):
+        union_categoricals([self.a, self.b])
+
+
 class ValueCounts:
 
     params = [True, False]

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -16,95 +16,6 @@ jobs:
     name: Windows
     vmImage: vs2017-win2016
 
-- job: 'Checks'
-  pool:
-    vmImage: ubuntu-16.04
-  timeoutInMinutes: 90
-  steps:
-  - script: |
-      echo '##vso[task.prependpath]$(HOME)/miniconda3/bin'
-      echo '##vso[task.setvariable variable=ENV_FILE]environment.yml'
-      echo '##vso[task.setvariable variable=AZURE]true'
-    displayName: 'Setting environment variables'
-
-  # Do not require a conda environment
-  - script: ci/code_checks.sh patterns
-    displayName: 'Looking for unwanted patterns'
-    condition: true
-
-  - script: |
-      sudo apt-get update
-      sudo apt-get install -y libc6-dev-i386
-      ci/setup_env.sh
-    displayName: 'Setup environment and build pandas'
-    condition: true
-
-  # Do not require pandas
-  - script: |
-      source activate pandas-dev
-      ci/code_checks.sh lint
-    displayName: 'Linting'
-    condition: true
-
-  - script: |
-      source activate pandas-dev
-      ci/code_checks.sh dependencies
-    displayName: 'Dependencies consistency'
-    condition: true
-
-  # Require pandas
-  - script: |
-      source activate pandas-dev
-      ci/code_checks.sh code
-    displayName: 'Checks on imported code'
-    condition: true
-
-  - script: |
-      source activate pandas-dev
-      ci/code_checks.sh doctests
-    displayName: 'Running doctests'
-    condition: true
-
-  - script: |
-      source activate pandas-dev
-      ci/code_checks.sh docstrings
-    displayName: 'Docstring validation'
-    condition: true
-
-  - script: |
-      source activate pandas-dev
-      ci/code_checks.sh typing
-    displayName: 'Typing validation'
-    condition: true
-
-  - script: |
-      source activate pandas-dev
-      pytest --capture=no --strict scripts
-    displayName: 'Testing docstring validation script'
-    condition: true
-
-  - script: |
-      source activate pandas-dev
-      cd asv_bench
-      asv check -E existing
-      git remote add upstream https://github.com/pandas-dev/pandas.git
-      git fetch upstream
-      if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
-          asv machine --yes
-          ASV_OUTPUT="$(asv dev)"
-          if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
-              echo "##vso[task.logissue type=error]Benchmarks run with errors"
-              echo "$ASV_OUTPUT"
-              exit 1
-          else
-              echo "Benchmarks run without errors"
-          fi
-      else
-          echo "Benchmarks did not run, no changes detected"
-      fi
-    displayName: 'Running benchmarks'
-    condition: true
-
 - job: 'Web_and_Docs'
   pool:
     vmImage: ubuntu-16.04

diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml
@@ -44,16 +44,13 @@ jobs:
           PATTERN: "not slow and not network"
           LOCALE_OVERRIDE: "zh_CN.UTF-8"
 
-        # https://github.com/pandas-dev/pandas/issues/29432
-        # py37_np_dev:
-        #   ENV_FILE: ci/deps/azure-37-numpydev.yaml
-        #   CONDA_PY: "37"
-        #   PATTERN: "not slow and not network"
-        #   TEST_ARGS: "-W error"
-        #   PANDAS_TESTING_MODE: "deprecate"
-        #   EXTRA_APT: "xsel"
-        #   # TODO:
-        #   continueOnError: true
+        py37_np_dev:
+          ENV_FILE: ci/deps/azure-37-numpydev.yaml
+          CONDA_PY: "37"
+          PATTERN: "not slow and not network"
+          TEST_ARGS: "-W error"
+          PANDAS_TESTING_MODE: "deprecate"
+          EXTRA_APT: "xsel"
 
   steps:
     - script: |

diff --git a/ci/deps/azure-macos-36.yaml b/ci/deps/azure-macos-36.yaml
@@ -20,9 +20,9 @@ dependencies:
   - matplotlib=2.2.3
   - nomkl
   - numexpr
-  - numpy=1.13.3
+  - numpy=1.14
   - openpyxl
-  - pyarrow
+  - pyarrow>=0.12.0
   - pytables
   - python-dateutil==2.6.1
   - pytz

diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml
@@ -20,7 +20,7 @@ dependencies:
   - numexpr
   - numpy=1.15.*
   - openpyxl
-  - pyarrow
+  - pyarrow>=0.12.0
   - pytables
   - python-dateutil
   - pytz

diff --git a/doc/redirects.csv b/doc/redirects.csv
@@ -828,7 +828,6 @@ generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlev
 generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel
 generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index
 generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame
-generated/pandas.MultiIndex.to_hierarchical,../reference/api/pandas.MultiIndex.to_hierarchical
 generated/pandas.notna,../reference/api/pandas.notna
 generated/pandas.notnull,../reference/api/pandas.notnull
 generated/pandas.option_context,../reference/api/pandas.option_context

diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst
@@ -19,3 +19,4 @@ Development
     developer
     policies
     roadmap
+    meeting
diff --git a/doc/source/development/meeting.rst b/doc/source/development/meeting.rst
@@ -0,0 +1,32 @@
+.. _meeting:
+
+==================
+Developer Meetings
+==================
+
+We hold regular developer meetings on the second Wednesday
+of each month at 18:00 UTC. These meetings and their minutes are open to
+the public. All are welcome to join.
+
+Minutes
+-------
+
+The minutes of past meetings are available in `this Google Document <https://docs.google.com/document/d/1tGbTiYORHiSPgVMXawiweGJlBw5dOkVJLY-licoBmBU/edit?usp=sharing>`__.
+
+Calendar
+--------
+
+This calendar shows all the developer meetings.
+
+.. raw:: html
+
+   <iframe src="https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com" style="border: 0" width="800" height="600" frameborder="0" scrolling="no"></iframe>
+
+You can subscribe to this calendar with the following links:
+
+* `iCal <https://calendar.google.com/calendar/ical/pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com/public/basic.ics>`__
+* `Google calendar <https://calendar.google.com/calendar/embed?src=pgbn14p6poja8a1cf2dv2jhrmg%40group.calendar.google.com>`__
+
+Additionally, we'll sometimes have one-off meetings on specific topics.
+These will be published on the same calendar.
+
diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
@@ -1950,6 +1950,7 @@ sparse              :class:`SparseDtype`      (none)             :class:`arrays.
 intervals           :class:`IntervalDtype`    :class:`Interval`  :class:`arrays.IntervalArray` :ref:`advanced.intervalindex`
 nullable integer    :class:`Int64Dtype`, ...  (none)             :class:`arrays.IntegerArray`  :ref:`integer_na`
 Strings             :class:`StringDtype`      :class:`str`       :class:`arrays.StringArray`   :ref:`text`
+Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`      :class:`arrays.BooleanArray`  :ref:`api.arrays.bool`
 =================== ========================= ================== ============================= =============================
 
 Pandas has two ways to store strings.

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -258,7 +258,7 @@ matplotlib                2.2.2              Visualization
 openpyxl                  2.4.8              Reading / writing for xlsx files
 pandas-gbq                0.8.0              Google Big Query access
 psycopg2                                     PostgreSQL engine for sqlalchemy
-pyarrow                   0.9.0              Parquet and feather reading / writing
+pyarrow                   0.12.0             Parquet and feather reading / writing
 pymysql                   0.7.11             MySQL engine for sqlalchemy
 pyreadstat                                   SPSS files (.sav) reading
 pytables                  3.4.2              HDF5 reading / writing

diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst
@@ -25,6 +25,7 @@ Nullable Integer    :class:`Int64Dtype`, ...  (none)             :ref:`api.array
 Categorical         :class:`CategoricalDtype` (none)             :ref:`api.arrays.categorical`
 Sparse              :class:`SparseDtype`      (none)             :ref:`api.arrays.sparse`
 Strings             :class:`StringDtype`      :class:`str`       :ref:`api.arrays.string`
+Boolean (with NA)   :class:`BooleanDtype`     :class:`bool`      :ref:`api.arrays.bool`
 =================== ========================= ================== =============================
 
 Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`).
@@ -485,6 +486,28 @@ The ``Series.str`` accessor is available for ``Series`` backed by a :class:`arra
 See :ref:`api.series.str` for more.
 
 
+.. _api.arrays.bool:
+
+Boolean data with missing values
+--------------------------------
+
+The boolean dtype (with the alias ``"boolean"``) provides support for storing
+boolean data (True, False values) with missing values, which is not possible
+with a bool :class:`numpy.ndarray`.
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   arrays.BooleanArray
+
+.. autosummary::
+   :toctree: api/
+   :template: autosummary/class_without_autosummary.rst
+
+   BooleanDtype
+
+
 .. Dtype attributes which are manually listed in their docstrings: including
 .. it here to make sure a docstring page is built for them
 

diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst
@@ -305,7 +305,6 @@ MultiIndex components
 
    MultiIndex.set_levels
    MultiIndex.set_codes
-   MultiIndex.to_hierarchical
    MultiIndex.to_flat_index
    MultiIndex.to_frame
    MultiIndex.is_lexsorted

diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst
@@ -41,6 +41,7 @@ Style application
    Styler.set_caption
    Styler.set_properties
    Styler.set_uuid
+   Styler.set_na_rep
    Styler.clear
    Styler.pipe
 

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -93,9 +93,9 @@ Use efficient datatypes
 -----------------------
 
 The default pandas data types are not the most memory efficient. This is
-especially true for high-cardinality text data (columns with relatively few
-unique values). By using more efficient data types you can store larger datasets
-in memory.
+especially true for text data columns with relatively few unique values (commonly
+referred to as "low-cardinality" data). By using more efficient data types, you
+can store larger datasets in memory.
 
 .. ipython:: python
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,3 +19,4 @@ Development @@
         developer
         policies
         roadmap
+        meeting