diff --git a/.gitignore b/.gitignore index a59f2843c365a..f912fedb199c0 100644 --- a/.gitignore +++ b/.gitignore @@ -109,6 +109,5 @@ doc/build/html/index.html # Windows specific leftover: doc/tmp.sv doc/source/styled.xlsx -doc/source/templates/ env/ doc/source/savefig/ diff --git a/.travis.yml b/.travis.yml index 9fac09e1fa788..6d31adcbf8a43 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,7 +23,7 @@ env: git: # for cloning - depth: 1000 + depth: 1500 matrix: fast_finish: true diff --git a/ci/build_docs.sh b/ci/build_docs.sh index f445447e3565c..33340a1c038dc 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -5,7 +5,7 @@ if [ "${TRAVIS_OS_NAME}" != "linux" ]; then exit 0 fi -cd "$TRAVIS_BUILD_DIR" +cd "$TRAVIS_BUILD_DIR"/doc echo "inside $0" if [ "$DOC" ]; then @@ -14,10 +14,6 @@ if [ "$DOC" ]; then source activate pandas - mv "$TRAVIS_BUILD_DIR"/doc /tmp - mv "$TRAVIS_BUILD_DIR/LICENSE" /tmp # included in the docs. - cd /tmp/doc - echo ############################### echo # Log file for the doc build # echo ############################### @@ -29,7 +25,7 @@ if [ "$DOC" ]; then echo # Create and send docs # echo ######################## - cd /tmp/doc/build/html + cd build/html git config --global user.email "pandas-docs-bot@localhost.foo" git config --global user.name "pandas-docs-bot" diff --git a/ci/deps/travis-36-doc.yaml b/ci/deps/travis-36-doc.yaml index ce095b887f189..f79fcb11c179f 100644 --- a/ci/deps/travis-36-doc.yaml +++ b/ci/deps/travis-36-doc.yaml @@ -8,10 +8,11 @@ dependencies: - bottleneck - cython>=0.28.2 - fastparquet + - gitpython - html5lib - hypothesis>=3.58.0 - ipykernel - - ipython==6.5.0 + - ipython - ipywidgets - lxml - matplotlib diff --git a/ci/deps/travis-36.yaml b/ci/deps/travis-36.yaml index 8aa551f6194d9..1880fa2501581 100644 --- a/ci/deps/travis-36.yaml +++ b/ci/deps/travis-36.yaml @@ -9,7 +9,7 @@ dependencies: - fastparquet - flake8>=3.5 - flake8-comprehensions - - flake8-rst + - flake8-rst=0.4.2 - gcsfs - geopandas - html5lib diff --git a/doc/make.py b/doc/make.py index cab5fa0ed4c52..0a3a7483fcc91 100755 --- a/doc/make.py +++ b/doc/make.py @@ -126,7 +126,12 @@ def _process_single_doc(self, single_doc): self.single_doc = 'api' elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)): self.single_doc_type = 'rst' - self.single_doc = os.path.splitext(os.path.basename(single_doc))[0] + + if 'whatsnew' in single_doc: + basename = single_doc + else: + basename = os.path.basename(single_doc) + self.single_doc = os.path.splitext(basename)[0] elif os.path.exists( os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))): self.single_doc_type = 'rst' diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 24c117a534209..563c869eff54d 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -702,7 +702,7 @@ Index Types We have discussed ``MultiIndex`` in the previous sections pretty extensively. Documentation about ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `, -and documentation about ``TimedeltaIndex`` is found :ref:`here `. +and documentation about ``TimedeltaIndex`` is found :ref:`here `. In the following sub-sections we will highlight some other index types. diff --git a/doc/source/api.rst b/doc/source/api.rst index 665649aead33c..81bb420c47a99 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1724,6 +1724,7 @@ MultiIndex Components MultiIndex.set_levels MultiIndex.set_labels MultiIndex.to_hierarchical + MultiIndex.to_flat_index MultiIndex.to_frame MultiIndex.is_lexsorted MultiIndex.sortlevel diff --git a/doc/source/conf.py b/doc/source/conf.py index b0501eaf54dc2..47adc80204fcc 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -40,7 +40,6 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.append(os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../sphinxext')) - sys.path.extend([ # numpy standard doc extensions @@ -75,6 +74,7 @@ 'sphinx.ext.ifconfig', 'sphinx.ext.linkcode', 'nbsphinx', + 'contributors', # custom pandas extension ] try: @@ -120,7 +120,9 @@ templates_path = ['../_templates'] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = [ + '.rst', +] # The encoding of source files. source_encoding = 'utf-8' @@ -298,8 +300,26 @@ for page in moved_api_pages } + +common_imports = """\ +.. currentmodule:: pandas + +.. ipython:: python + :suppress: + + import numpy as np + from pandas import * + import pandas as pd + randn = np.random.randn + np.set_printoptions(precision=4, suppress=True) + options.display.max_rows = 15 + from pandas.compat import StringIO +""" + + html_context = { - 'redirects': {old: new for old, new in moved_api_pages} + 'redirects': {old: new for old, new in moved_api_pages}, + 'common_imports': common_imports, } # If false, no module index is generated. @@ -388,6 +408,7 @@ category=FutureWarning) +ipython_warning_is_error = False ipython_exec_lines = [ 'import numpy as np', 'import pandas as pd', @@ -653,7 +674,23 @@ def process_class_docstrings(app, what, name, obj, options, lines): ] +def rstjinja(app, docname, source): + """ + Render our pages as a jinja template for fancy templating goodness. + """ + # http://ericholscher.com/blog/2016/jul/25/integrating-jinja-rst-sphinx/ + # Make sure we're outputting HTML + if app.builder.format != 'html': + return + src = source[0] + rendered = app.builder.templates.render_string( + src, app.config.html_context + ) + source[0] = rendered + + def setup(app): + app.connect("source-read", rstjinja) app.connect("autodoc-process-docstring", remove_flags_docstring) app.connect("autodoc-process-docstring", process_class_docstrings) app.add_autodocumenter(AccessorDocumenter) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 514a58456bcd9..7eb9a6cf815ba 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -1103,7 +1103,7 @@ Information on how to write a benchmark and how to use asv can be found in the Documenting your code --------------------- -Changes should be reflected in the release notes located in ``doc/source/whatsnew/vx.y.z.txt``. +Changes should be reflected in the release notes located in ``doc/source/whatsnew/vx.y.z.rst``. This file contains an ongoing change log for each release. Add an entry to this file to document your fix, enhancement or (unavoidable) breaking change. Make sure to include the GitHub issue number when adding your entry (using ``:issue:`1234``` where ``1234`` is the diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index edbd6629a617d..ad389bbe35b71 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying (Note: HTML tables may or may not be compatible with non-HTML Jupyter output formats.) -See :ref:`Options and Settings ` and :ref:`options.available ` +See :ref:`Options and Settings ` and :ref:`options.available` for pandas ``display.`` settings. `quantopian/qgrid `__ diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index d2b88e794e51e..38f73f8617ced 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -118,7 +118,7 @@ See the package overview for more detail about what's in the library. {{ single_doc }} {% endif -%} {% if not single_doc -%} - whatsnew + What's New install contributing overview @@ -159,5 +159,5 @@ See the package overview for more detail about what's in the library. developer internals extending - release + releases {% endif -%} diff --git a/doc/source/io.rst b/doc/source/io.rst index 34dc185c200e6..92fc28af0281a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1580,12 +1580,19 @@ You can pass in a URL to a CSV file: df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep='\t') -S3 URLs are handled as well: +S3 URLs are handled as well but require installing the `S3Fs +`_ library: .. code-block:: python df = pd.read_csv('s3://pandas-test/tips.csv') +If your S3 bucket requires cedentials you will need to set them as environment +variables or in the ``~/.aws/credentials`` config file, refer to the `S3Fs +documentation on credentials +`_. + + Writing out Data '''''''''''''''' diff --git a/doc/source/releases.rst b/doc/source/releases.rst new file mode 100644 index 0000000000000..0167903cce8bc --- /dev/null +++ b/doc/source/releases.rst @@ -0,0 +1,203 @@ +.. _release: + +************* +Release Notes +************* + +This is the list of changes to pandas between each release. For full details, +see the commit logs at http://github.com/pandas-dev/pandas. For install and +upgrade instructions, see :ref:`install`. + +Version 0.24 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.24.0 + +Version 0.23 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.23.4 + whatsnew/v0.23.3 + whatsnew/v0.23.2 + whatsnew/v0.23.1 + whatsnew/v0.23.0 + +Version 0.22 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.22.0 + +Version 0.21 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.21.0 + whatsnew/v0.21.1 + +Version 0.20 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.20.0 + whatsnew/v0.20.2 + whatsnew/v0.20.3 + +Version 0.19 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.19.0 + whatsnew/v0.19.1 + whatsnew/v0.19.2 + +Version 0.18 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.18.0 + whatsnew/v0.18.1 + +Version 0.17 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.17.0 + whatsnew/v0.17.1 + +Version 0.16 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.16.0 + whatsnew/v0.16.1 + whatsnew/v0.16.2 + +Version 0.15 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.15.0 + whatsnew/v0.15.1 + whatsnew/v0.15.2 + +Version 0.14 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.14.0 + whatsnew/v0.14.1 + +Version 0.13 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.13.0 + whatsnew/v0.13.1 + +Version 0.12 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.12.0 + +Version 0.11 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.11.0 + +Version 0.10 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.10.0 + whatsnew/v0.10.1 + +Version 0.9 +----------- + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.9.0 + whatsnew/v0.9.1 + +Version 0.8 +------------ + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.8.0 + whatsnew/v0.8.1 + +Version 0.7 +----------- + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.7.0 + whatsnew/v0.7.1 + whatsnew/v0.7.2 + whatsnew/v0.7.3 + +Version 0.6 +----------- + +.. toctree:: + :maxdepth: 2 + + + whatsnew/v0.6.0 + whatsnew/v0.6.1 + +Version 0.5 +----------- + +.. toctree:: + :maxdepth: 2 + + + whatsnew/v0.5.0 + +Version 0.4 +----------- + +.. toctree:: + :maxdepth: 2 + + whatsnew/v0.4.x diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index 6f66c1a9bf7f9..792fe5120f6e8 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -2,9 +2,7 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "collapsed": true - }, + "metadata": {}, "source": [ "# Styling\n", "\n", @@ -51,7 +49,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "nbsphinx": "hidden" }, "outputs": [], @@ -64,9 +61,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", @@ -132,9 +127,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def color_negative_red(val):\n", @@ -188,9 +181,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def highlight_max(s):\n", @@ -253,9 +244,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def highlight_max(data, color='yellow'):\n", @@ -908,9 +897,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.html import widgets\n", @@ -925,9 +912,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "def magnify():\n", @@ -946,9 +931,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "np.random.seed(25)\n", @@ -985,18 +968,16 @@ "- `vertical-align`\n", "- `white-space: nowrap`\n", "\n", - "Only CSS2 named colors and hex colors of the form `#rgb` or `#rrggbb` are currently supported.\n", "\n", - "The following pseudo CSS properties are also available to set excel specific style properties:\n", - "- `number-format`\n" + "- Only CSS2 named colors and hex colors of the form `#rgb` or `#rrggbb` are currently supported.\n", + "- The following pseudo CSS properties are also available to set excel specific style properties:\n", + " - `number-format`\n" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "df.style.\\\n", @@ -1037,9 +1018,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "from jinja2 import Environment, ChoiceLoader, FileSystemLoader\n", @@ -1047,39 +1026,21 @@ "from pandas.io.formats.style import Styler" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "%mkdir templates" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This next cell writes the custom template.\n", - "We extend the template `html.tpl`, which comes with pandas." + "We'll use the following template:" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ - "%%file templates/myhtml.tpl\n", - "{% extends \"html.tpl\" %}\n", - "{% block table %}\n", - "

{{ table_title|default(\"My Table\") }}

\n", - "{{ super() }}\n", - "{% endblock table %}" + "with open(\"templates/myhtml.tpl\") as f:\n", + " print(f.read())" ] }, { @@ -1093,9 +1054,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "class MyStyler(Styler):\n", @@ -1122,9 +1081,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "MyStyler(df)" @@ -1140,9 +1097,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "HTML(MyStyler(df).render(table_title=\"Extending Example\"))" @@ -1158,9 +1113,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "EasyStyler = Styler.from_custom_template(\"templates\", \"myhtml.tpl\")\n", @@ -1177,9 +1130,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "with open(\"template_structure.html\") as f:\n", @@ -1199,7 +1150,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "nbsphinx": "hidden" }, "outputs": [], @@ -1216,7 +1166,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -1230,14 +1180,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.3" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 1, - "version_minor": 0 - } + "version": "3.7.0" } }, "nbformat": 4, diff --git a/doc/source/templates/myhtml.tpl b/doc/source/templates/myhtml.tpl new file mode 100644 index 0000000000000..1170fd3def653 --- /dev/null +++ b/doc/source/templates/myhtml.tpl @@ -0,0 +1,5 @@ +{% extends "html.tpl" %} +{% block table %} +

{{ table_title|default("My Table") }}

+{{ super() }} +{% endblock table %} diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 42fd356bbe65a..cc377f45c4b8d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -2372,7 +2372,8 @@ can be controlled by the ``nonexistent`` argument. The following options are ava * ``shift``: Shifts nonexistent times forward to the closest real time .. ipython:: python - dti = date_range(start='2015-03-29 01:30:00', periods=3, freq='H') + + dti = pd.date_range(start='2015-03-29 01:30:00', periods=3, freq='H') # 2:30 is a nonexistent time Localization of nonexistent times will raise an error by default. @@ -2385,6 +2386,7 @@ Localization of nonexistent times will raise an error by default. Transform nonexistent times to ``NaT`` or the closest real time forward in time. .. ipython:: python + dti dti.tz_localize('Europe/Warsaw', nonexistent='shift') dti.tz_localize('Europe/Warsaw', nonexistent='NaT') diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 569a6fb7b7a0d..dd8ccfcfd28ac 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -1405,7 +1405,7 @@ Here is an example of one way to easily plot group means with standard deviation # Plot fig, ax = plt.subplots() @savefig errorbar_example.png - means.plot.bar(yerr=errors, ax=ax) + means.plot.bar(yerr=errors, ax=ax, capsize=4) .. ipython:: python :suppress: diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst deleted file mode 100644 index 8672685b3ebb4..0000000000000 --- a/doc/source/whatsnew.rst +++ /dev/null @@ -1,109 +0,0 @@ -.. _whatsnew: - -.. currentmodule:: pandas - -.. ipython:: python - :suppress: - - import numpy as np - from pandas import * - import pandas as pd - randn = np.random.randn - np.set_printoptions(precision=4, suppress=True) - options.display.max_rows = 15 - -********** -What's New -********** - -These are new features and improvements of note in each release. - -.. include:: whatsnew/v0.24.0.txt - -.. include:: whatsnew/v0.23.4.txt - -.. include:: whatsnew/v0.23.3.txt - -.. include:: whatsnew/v0.23.2.txt - -.. include:: whatsnew/v0.23.1.txt - -.. include:: whatsnew/v0.23.0.txt - -.. include:: whatsnew/v0.22.0.txt - -.. include:: whatsnew/v0.21.1.txt - -.. include:: whatsnew/v0.21.0.txt - -.. include:: whatsnew/v0.20.3.txt - -.. include:: whatsnew/v0.20.2.txt - -.. include:: whatsnew/v0.20.0.txt - -.. include:: whatsnew/v0.19.2.txt - -.. include:: whatsnew/v0.19.1.txt - -.. include:: whatsnew/v0.19.0.txt - -.. include:: whatsnew/v0.18.1.txt - -.. include:: whatsnew/v0.18.0.txt - -.. include:: whatsnew/v0.17.1.txt - -.. include:: whatsnew/v0.17.0.txt - -.. include:: whatsnew/v0.16.2.txt - -.. include:: whatsnew/v0.16.1.txt - -.. include:: whatsnew/v0.16.0.txt - -.. include:: whatsnew/v0.15.2.txt - -.. include:: whatsnew/v0.15.1.txt - -.. include:: whatsnew/v0.15.0.txt - -.. include:: whatsnew/v0.14.1.txt - -.. include:: whatsnew/v0.14.0.txt - -.. include:: whatsnew/v0.13.1.txt - -.. include:: whatsnew/v0.13.0.txt - -.. include:: whatsnew/v0.12.0.txt - -.. include:: whatsnew/v0.11.0.txt - -.. include:: whatsnew/v0.10.1.txt - -.. include:: whatsnew/v0.10.0.txt - -.. include:: whatsnew/v0.9.1.txt - -.. include:: whatsnew/v0.9.0.txt - -.. include:: whatsnew/v0.8.1.txt - -.. include:: whatsnew/v0.8.0.txt - -.. include:: whatsnew/v0.7.3.txt - -.. include:: whatsnew/v0.7.2.txt - -.. include:: whatsnew/v0.7.1.txt - -.. include:: whatsnew/v0.7.0.txt - -.. include:: whatsnew/v0.6.1.txt - -.. include:: whatsnew/v0.6.0.txt - -.. include:: whatsnew/v0.5.0.txt - -.. include:: whatsnew/v0.4.x.txt diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.rst similarity index 99% rename from doc/source/whatsnew/v0.10.0.txt rename to doc/source/whatsnew/v0.10.0.rst index 298088a4f96b3..27f20111dbf96 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.rst @@ -1,13 +1,10 @@ .. _whatsnew_0100: -.. ipython:: python - :suppress: - - from pandas.compat import StringIO - v0.10.0 (December 17, 2012) --------------------------- +{{ common_imports }} + This is a major release from 0.9.1 and includes many new features and enhancements along with a large number of bug fixes. There are also a number of important API changes that long-time pandas users should pay close attention @@ -431,3 +428,11 @@ Here is a taste of what to expect. See the :ref:`full release notes ` or issue tracker on GitHub for a complete list. + + +.. _whatsnew_0.10.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.9.0..v0.10.0 diff --git a/doc/source/whatsnew/v0.10.1.txt b/doc/source/whatsnew/v0.10.1.rst similarity index 98% rename from doc/source/whatsnew/v0.10.1.txt rename to doc/source/whatsnew/v0.10.1.rst index f1a32440c6950..5679babf07b73 100644 --- a/doc/source/whatsnew/v0.10.1.txt +++ b/doc/source/whatsnew/v0.10.1.rst @@ -3,6 +3,8 @@ v0.10.1 (January 22, 2013) --------------------------- +{{ common_imports }} + This is a minor release from 0.10.0 and includes new features, enhancements, and bug fixes. In particular, there is substantial new HDFStore functionality contributed by Jeff Reback. @@ -208,3 +210,11 @@ combined result, by using ``where`` on a selector table. See the :ref:`full release notes ` or issue tracker on GitHub for a complete list. + + +.. _whatsnew_0.10.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.10.0..v0.10.1 diff --git a/doc/source/whatsnew/v0.11.0.txt b/doc/source/whatsnew/v0.11.0.rst similarity index 98% rename from doc/source/whatsnew/v0.11.0.txt rename to doc/source/whatsnew/v0.11.0.rst index f39e6c9ff459b..051d735e539aa 100644 --- a/doc/source/whatsnew/v0.11.0.txt +++ b/doc/source/whatsnew/v0.11.0.rst @@ -3,6 +3,8 @@ v0.11.0 (April 22, 2013) ------------------------ +{{ common_imports }} + This is a major release from 0.10.1 and includes many new features and enhancements along with a large number of bug fixes. The methods of Selecting Data have had quite a number of additions, and Dtype support is now full-fledged. @@ -330,3 +332,11 @@ Enhancements See the :ref:`full release notes ` or issue tracker on GitHub for a complete list. + + +.. _whatsnew_0.11.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.10.1..v0.11.0 diff --git a/doc/source/whatsnew/v0.12.0.txt b/doc/source/whatsnew/v0.12.0.rst similarity index 99% rename from doc/source/whatsnew/v0.12.0.txt rename to doc/source/whatsnew/v0.12.0.rst index f66f6c0f72d5d..a462359b6e3c0 100644 --- a/doc/source/whatsnew/v0.12.0.txt +++ b/doc/source/whatsnew/v0.12.0.rst @@ -3,6 +3,8 @@ v0.12.0 (July 24, 2013) ------------------------ +{{ common_imports }} + This is a major release from 0.11.0 and includes several new features and enhancements along with a large number of bug fixes. @@ -504,3 +506,11 @@ Bug Fixes See the :ref:`full release notes ` or issue tracker on GitHub for a complete list. + + +.. _whatsnew_0.12.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.11.0..v0.12.0 diff --git a/doc/source/whatsnew/v0.13.0.txt b/doc/source/whatsnew/v0.13.0.rst similarity index 66% rename from doc/source/whatsnew/v0.13.0.txt rename to doc/source/whatsnew/v0.13.0.rst index 94cd451196ead..037347afb1d59 100644 --- a/doc/source/whatsnew/v0.13.0.txt +++ b/doc/source/whatsnew/v0.13.0.rst @@ -3,6 +3,8 @@ v0.13.0 (January 3, 2014) --------------------------- +{{ common_imports }} + This is a major release from 0.12.0 and includes a number of API changes, several new features and enhancements along with a large number of bug fixes. @@ -425,7 +427,7 @@ than switching to the short info view (:issue:`4886`, :issue:`5550`). This makes the representation more consistent as small DataFrames get larger. -.. image:: _static/df_repr_truncated.png +.. image:: ../_static/df_repr_truncated.png :alt: Truncated HTML representation of a DataFrame To get the info view, call :meth:`DataFrame.info`. If you prefer the @@ -976,11 +978,308 @@ to unify methods and behaviors. Series formerly subclassed directly from s.a = 5 s +.. _release.bug_fixes-0.13.0: + Bug Fixes ~~~~~~~~~ -See :ref:`V0.13.0 Bug Fixes` for an extensive list of bugs that have been fixed in 0.13.0. +- ``HDFStore`` + + - raising an invalid ``TypeError`` rather than ``ValueError`` when + appending with a different block ordering (:issue:`4096`) + - ``read_hdf`` was not respecting as passed ``mode`` (:issue:`4504`) + - appending a 0-len table will work correctly (:issue:`4273`) + - ``to_hdf`` was raising when passing both arguments ``append`` and + ``table`` (:issue:`4584`) + - reading from a store with duplicate columns across dtypes would raise + (:issue:`4767`) + - Fixed a bug where ``ValueError`` wasn't correctly raised when column + names weren't strings (:issue:`4956`) + - A zero length series written in Fixed format not deserializing properly. + (:issue:`4708`) + - Fixed decoding perf issue on pyt3 (:issue:`5441`) + - Validate levels in a MultiIndex before storing (:issue:`5527`) + - Correctly handle ``data_columns`` with a Panel (:issue:`5717`) +- Fixed bug in tslib.tz_convert(vals, tz1, tz2): it could raise IndexError + exception while trying to access trans[pos + 1] (:issue:`4496`) +- The ``by`` argument now works correctly with the ``layout`` argument + (:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods +- Fixed bug in ``PeriodIndex.map`` where using ``str`` would return the str + representation of the index (:issue:`4136`) +- Fixed test failure ``test_time_series_plot_color_with_empty_kwargs`` when + using custom matplotlib default colors (:issue:`4345`) +- Fix running of stata IO tests. Now uses temporary files to write + (:issue:`4353`) +- Fixed an issue where ``DataFrame.sum`` was slower than ``DataFrame.mean`` + for integer valued frames (:issue:`4365`) +- ``read_html`` tests now work with Python 2.6 (:issue:`4351`) +- Fixed bug where ``network`` testing was throwing ``NameError`` because a + local variable was undefined (:issue:`4381`) +- In ``to_json``, raise if a passed ``orient`` would cause loss of data + because of a duplicate index (:issue:`4359`) +- In ``to_json``, fix date handling so milliseconds are the default timestamp + as the docstring says (:issue:`4362`). +- ``as_index`` is no longer ignored when doing groupby apply (:issue:`4648`, + :issue:`3417`) +- JSON NaT handling fixed, NaTs are now serialized to `null` (:issue:`4498`) +- Fixed JSON handling of escapable characters in JSON object keys + (:issue:`4593`) +- Fixed passing ``keep_default_na=False`` when ``na_values=None`` + (:issue:`4318`) +- Fixed bug with ``values`` raising an error on a DataFrame with duplicate + columns and mixed dtypes, surfaced in (:issue:`4377`) +- Fixed bug with duplicate columns and type conversion in ``read_json`` when + ``orient='split'`` (:issue:`4377`) +- Fixed JSON bug where locales with decimal separators other than '.' threw + exceptions when encoding / decoding certain values. (:issue:`4918`) +- Fix ``.iat`` indexing with a ``PeriodIndex`` (:issue:`4390`) +- Fixed an issue where ``PeriodIndex`` joining with self was returning a new + instance rather than the same instance (:issue:`4379`); also adds a test + for this for the other index types +- Fixed a bug with all the dtypes being converted to object when using the + CSV cparser with the usecols parameter (:issue:`3192`) +- Fix an issue in merging blocks where the resulting DataFrame had partially + set _ref_locs (:issue:`4403`) +- Fixed an issue where hist subplots were being overwritten when they were + called using the top level matplotlib API (:issue:`4408`) +- Fixed a bug where calling ``Series.astype(str)`` would truncate the string + (:issue:`4405`, :issue:`4437`) +- Fixed a py3 compat issue where bytes were being repr'd as tuples + (:issue:`4455`) +- Fixed Panel attribute naming conflict if item is named 'a' + (:issue:`3440`) +- Fixed an issue where duplicate indexes were raising when plotting + (:issue:`4486`) +- Fixed an issue where cumsum and cumprod didn't work with bool dtypes + (:issue:`4170`, :issue:`4440`) +- Fixed Panel slicing issued in ``xs`` that was returning an incorrect dimmed + object (:issue:`4016`) +- Fix resampling bug where custom reduce function not used if only one group + (:issue:`3849`, :issue:`4494`) +- Fixed Panel assignment with a transposed frame (:issue:`3830`) +- Raise on set indexing with a Panel and a Panel as a value which needs + alignment (:issue:`3777`) +- frozenset objects now raise in the ``Series`` constructor (:issue:`4482`, + :issue:`4480`) +- Fixed issue with sorting a duplicate MultiIndex that has multiple dtypes + (:issue:`4516`) +- Fixed bug in ``DataFrame.set_values`` which was causing name attributes to + be lost when expanding the index. (:issue:`3742`, :issue:`4039`) +- Fixed issue where individual ``names``, ``levels`` and ``labels`` could be + set on ``MultiIndex`` without validation (:issue:`3714`, :issue:`4039`) +- Fixed (:issue:`3334`) in pivot_table. Margins did not compute if values is + the index. +- Fix bug in having a rhs of ``np.timedelta64`` or ``np.offsets.DateOffset`` + when operating with datetimes (:issue:`4532`) +- Fix arithmetic with series/datetimeindex and ``np.timedelta64`` not working + the same (:issue:`4134`) and buggy timedelta in NumPy 1.6 (:issue:`4135`) +- Fix bug in ``pd.read_clipboard`` on windows with PY3 (:issue:`4561`); not + decoding properly +- ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise + if code argument out of range (:issue:`4519`, :issue:`4520`) +- Fix boolean indexing on an empty series loses index names (:issue:`4235`), + infer_dtype works with empty arrays. +- Fix reindexing with multiple axes; if an axes match was not replacing the + current axes, leading to a possible lazy frequency inference issue + (:issue:`3317`) +- Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly + (causing the original stack trace to be truncated). +- Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`) +- Fix assignment with iloc/loc involving a dtype change in an existing column + (:issue:`4312`, :issue:`5702`) have internal setitem_with_indexer in core/indexing + to use Block.setitem +- Fixed bug where thousands operator was not handled correctly for floating + point numbers in csv_import (:issue:`4322`) +- Fix an issue with CacheableOffset not properly being used by many + DateOffset; this prevented the DateOffset from being cached (:issue:`4609`) +- Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the + rhs (:issue:`4576`) +- Fix error/dtype conversion with setitem of ``None`` on ``Series/DataFrame`` + (:issue:`4667`) +- Fix decoding based on a passed in non-default encoding in ``pd.read_stata`` + (:issue:`4626`) +- Fix ``DataFrame.from_records`` with a plain-vanilla ``ndarray``. + (:issue:`4727`) +- Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename``, + etc. (:issue:`4718`, :issue:`4628`) +- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indices + (:issue:`4726`) +- Bug with using ``QUOTE_NONE`` with ``to_csv`` causing ``Exception``. + (:issue:`4328`) +- Bug with Series indexing not raising an error when the right-hand-side has + an incorrect length (:issue:`2702`) +- Bug in MultiIndexing with a partial string selection as one part of a + MultIndex (:issue:`4758`) +- Bug with reindexing on the index with a non-unique index will now raise + ``ValueError`` (:issue:`4746`) +- Bug in setting with ``loc/ix`` a single indexer with a MultiIndex axis and + a NumPy array, related to (:issue:`3777`) +- Bug in concatenation with duplicate columns across dtypes not merging with + axis=0 (:issue:`4771`, :issue:`4975`) +- Bug in ``iloc`` with a slice index failing (:issue:`4771`) +- Incorrect error message with no colspecs or width in ``read_fwf``. + (:issue:`4774`) +- Fix bugs in indexing in a Series with a duplicate index (:issue:`4548`, + :issue:`4550`) +- Fixed bug with reading compressed files with ``read_fwf`` in Python 3. + (:issue:`3963`) +- Fixed an issue with a duplicate index and assignment with a dtype change + (:issue:`4686`) +- Fixed bug with reading compressed files in as ``bytes`` rather than ``str`` + in Python 3. Simplifies bytes-producing file-handling in Python 3 + (:issue:`3963`, :issue:`4785`). +- Fixed an issue related to ticklocs/ticklabels with log scale bar plots + across different versions of matplotlib (:issue:`4789`) +- Suppressed DeprecationWarning associated with internal calls issued by + repr() (:issue:`4391`) +- Fixed an issue with a duplicate index and duplicate selector with ``.loc`` + (:issue:`4825`) +- Fixed an issue with ``DataFrame.sort_index`` where, when sorting by a + single column and passing a list for ``ascending``, the argument for + ``ascending`` was being interpreted as ``True`` (:issue:`4839`, + :issue:`4846`) +- Fixed ``Panel.tshift`` not working. Added `freq` support to ``Panel.shift`` + (:issue:`4853`) +- Fix an issue in TextFileReader w/ Python engine (i.e. PythonParser) + with thousands != "," (:issue:`4596`) +- Bug in getitem with a duplicate index when using where (:issue:`4879`) +- Fix Type inference code coerces float column into datetime (:issue:`4601`) +- Fixed ``_ensure_numeric`` does not check for complex numbers + (:issue:`4902`) +- Fixed a bug in ``Series.hist`` where two figures were being created when + the ``by`` argument was passed (:issue:`4112`, :issue:`4113`). +- Fixed a bug in ``convert_objects`` for > 2 ndims (:issue:`4937`) +- Fixed a bug in DataFrame/Panel cache insertion and subsequent indexing + (:issue:`4939`, :issue:`5424`) +- Fixed string methods for ``FrozenNDArray`` and ``FrozenList`` + (:issue:`4929`) +- Fixed a bug with setting invalid or out-of-range values in indexing + enlargement scenarios (:issue:`4940`) +- Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr +- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep + separate metadata. (:issue:`4202`, :issue:`4830`) +- Fixed skiprows option in Python parser for read_csv (:issue:`4382`) +- Fixed bug preventing ``cut`` from working with ``np.inf`` levels without + explicitly passing labels (:issue:`3415`) +- Fixed wrong check for overlapping in ``DatetimeIndex.union`` + (:issue:`4564`) +- Fixed conflict between thousands separator and date parser in csv_parser + (:issue:`4678`) +- Fix appending when dtypes are not the same (error showing mixing + float/np.datetime64) (:issue:`4993`) +- Fix repr for DateOffset. No longer show duplicate entries in kwds. + Removed unused offset fields. (:issue:`4638`) +- Fixed wrong index name during read_csv if using usecols. Applies to c + parser only. (:issue:`4201`) +- ``Timestamp`` objects can now appear in the left hand side of a comparison + operation with a ``Series`` or ``DataFrame`` object (:issue:`4982`). +- Fix a bug when indexing with ``np.nan`` via ``iloc/loc`` (:issue:`5016`) +- Fixed a bug where low memory c parser could create different types in + different chunks of the same file. Now coerces to numerical type or raises + warning. (:issue:`3866`) +- Fix a bug where reshaping a ``Series`` to its own shape raised + ``TypeError`` (:issue:`4554`) and other reshaping issues. +- Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`) +- Make sure series-series boolean comparisons are label based (:issue:`4947`) +- Bug in multi-level indexing with a Timestamp partial indexer + (:issue:`4294`) +- Tests/fix for MultiIndex construction of an all-nan frame (:issue:`4078`) +- Fixed a bug where :func:`~pandas.read_html` wasn't correctly inferring + values of tables with commas (:issue:`5029`) +- Fixed a bug where :func:`~pandas.read_html` wasn't providing a stable + ordering of returned tables (:issue:`4770`, :issue:`5029`). +- Fixed a bug where :func:`~pandas.read_html` was incorrectly parsing when + passed ``index_col=0`` (:issue:`5066`). +- Fixed a bug where :func:`~pandas.read_html` was incorrectly inferring the + type of headers (:issue:`5048`). +- Fixed a bug where ``DatetimeIndex`` joins with ``PeriodIndex`` caused a + stack overflow (:issue:`3899`). +- Fixed a bug where ``groupby`` objects didn't allow plots (:issue:`5102`). +- Fixed a bug where ``groupby`` objects weren't tab-completing column names + (:issue:`5102`). +- Fixed a bug where ``groupby.plot()`` and friends were duplicating figures + multiple times (:issue:`5102`). +- Provide automatic conversion of ``object`` dtypes on fillna, related + (:issue:`5103`) +- Fixed a bug where default options were being overwritten in the option + parser cleaning (:issue:`5121`). +- Treat a list/ndarray identically for ``iloc`` indexing with list-like + (:issue:`5006`) +- Fix ``MultiIndex.get_level_values()`` with missing values (:issue:`5074`) +- Fix bound checking for Timestamp() with datetime64 input (:issue:`4065`) +- Fix a bug where ``TestReadHtml`` wasn't calling the correct ``read_html()`` + function (:issue:`5150`). +- Fix a bug with ``NDFrame.replace()`` which made replacement appear as + though it was (incorrectly) using regular expressions (:issue:`5143`). +- Fix better error message for to_datetime (:issue:`4928`) +- Made sure different locales are tested on travis-ci (:issue:`4918`). Also + adds a couple of utilities for getting locales and setting locales with a + context manager. +- Fixed segfault on ``isnull(MultiIndex)`` (now raises an error instead) + (:issue:`5123`, :issue:`5125`) +- Allow duplicate indices when performing operations that align + (:issue:`5185`, :issue:`5639`) +- Compound dtypes in a constructor raise ``NotImplementedError`` + (:issue:`5191`) +- Bug in comparing duplicate frames (:issue:`4421`) related +- Bug in describe on duplicate frames +- Bug in ``to_datetime`` with a format and ``coerce=True`` not raising + (:issue:`5195`) +- Bug in ``loc`` setting with multiple indexers and a rhs of a Series that + needs broadcasting (:issue:`5206`) +- Fixed bug where inplace setting of levels or labels on ``MultiIndex`` would + not clear cached ``values`` property and therefore return wrong ``values``. + (:issue:`5215`) +- Fixed bug where filtering a grouped DataFrame or Series did not maintain + the original ordering (:issue:`4621`). +- Fixed ``Period`` with a business date freq to always roll-forward if on a + non-business date. (:issue:`5203`) +- Fixed bug in Excel writers where frames with duplicate column names weren't + written correctly. (:issue:`5235`) +- Fixed issue with ``drop`` and a non-unique index on Series (:issue:`5248`) +- Fixed segfault in C parser caused by passing more names than columns in + the file. (:issue:`5156`) +- Fix ``Series.isin`` with date/time-like dtypes (:issue:`5021`) +- C and Python Parser can now handle the more common MultiIndex column + format which doesn't have a row for index names (:issue:`4702`) +- Bug when trying to use an out-of-bounds date as an object dtype + (:issue:`5312`) +- Bug when trying to display an embedded PandasObject (:issue:`5324`) +- Allows operating of Timestamps to return a datetime if the result is out-of-bounds + related (:issue:`5312`) +- Fix return value/type signature of ``initObjToJSON()`` to be compatible + with numpy's ``import_array()`` (:issue:`5334`, :issue:`5326`) +- Bug when renaming then set_index on a DataFrame (:issue:`5344`) +- Test suite no longer leaves around temporary files when testing graphics. (:issue:`5347`) + (thanks for catching this @yarikoptic!) +- Fixed html tests on win32. (:issue:`4580`) +- Make sure that ``head/tail`` are ``iloc`` based, (:issue:`5370`) +- Fixed bug for ``PeriodIndex`` string representation if there are 1 or 2 + elements. (:issue:`5372`) +- The GroupBy methods ``transform`` and ``filter`` can be used on Series + and DataFrames that have repeated (non-unique) indices. (:issue:`4620`) +- Fix empty series not printing name in repr (:issue:`4651`) +- Make tests create temp files in temp directory by default. (:issue:`5419`) +- ``pd.to_timedelta`` of a scalar returns a scalar (:issue:`5410`) +- ``pd.to_timedelta`` accepts ``NaN`` and ``NaT``, returning ``NaT`` instead of raising (:issue:`5437`) +- performance improvements in ``isnull`` on larger size pandas objects +- Fixed various setitem with 1d ndarray that does not have a matching + length to the indexer (:issue:`5508`) +- Bug in getitem with a MultiIndex and ``iloc`` (:issue:`5528`) +- Bug in delitem on a Series (:issue:`5542`) +- Bug fix in apply when using custom function and objects are not mutated (:issue:`5545`) +- Bug in selecting from a non-unique index with ``loc`` (:issue:`5553`) +- Bug in groupby returning non-consistent types when user function returns a ``None``, (:issue:`5592`) +- Work around regression in numpy 1.7.0 which erroneously raises IndexError from ``ndarray.item`` (:issue:`5666`) +- Bug in repeated indexing of object with resultant non-unique index (:issue:`5678`) +- Bug in fillna with Series and a passed series/dict (:issue:`5703`) +- Bug in groupby transform with a datetime-like grouper (:issue:`5712`) +- Bug in MultiIndex selection in PY3 when using certain keys (:issue:`5725`) +- Row-wise concat of differing dtypes failing in certain cases (:issue:`5754`) + +.. _whatsnew_0.13.0.contributors: + +Contributors +~~~~~~~~~~~~ -See the :ref:`full release notes -` or issue tracker -on GitHub for a complete list of all API changes, Enhancements and Bug Fixes. +.. contributors:: v0.12.0..v0.13.0 diff --git a/doc/source/whatsnew/v0.13.1.txt b/doc/source/whatsnew/v0.13.1.rst similarity index 64% rename from doc/source/whatsnew/v0.13.1.txt rename to doc/source/whatsnew/v0.13.1.rst index a4807a6d61b76..6a1b578cc08fb 100644 --- a/doc/source/whatsnew/v0.13.1.txt +++ b/doc/source/whatsnew/v0.13.1.rst @@ -3,6 +3,8 @@ v0.13.1 (February 3, 2014) -------------------------- +{{ common_imports }} + This is a minor release from 0.13.0 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -126,10 +128,6 @@ API changes df.equals(df2) df.equals(df2.sort_index()) - import pandas.core.common as com - com.array_equivalent(np.array([0, np.nan]), np.array([0, np.nan])) - np.array_equal(np.array([0, np.nan]), np.array([0, np.nan])) - - ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is empty (:issue:`6007`). @@ -296,11 +294,86 @@ Experimental There are no experimental changes in 0.13.1 +.. _release.bug_fixes-0.13.1: + Bug Fixes ~~~~~~~~~ -See :ref:`V0.13.1 Bug Fixes` for an extensive list of bugs that have been fixed in 0.13.1. +- Bug in ``io.wb.get_countries`` not including all countries (:issue:`6008`) +- Bug in Series replace with timestamp dict (:issue:`5797`) +- read_csv/read_table now respects the `prefix` kwarg (:issue:`5732`). +- Bug in selection with missing values via ``.ix`` from a duplicate indexed DataFrame failing (:issue:`5835`) +- Fix issue of boolean comparison on empty DataFrames (:issue:`5808`) +- Bug in isnull handling ``NaT`` in an object array (:issue:`5443`) +- Bug in ``to_datetime`` when passed a ``np.nan`` or integer datelike and a format string (:issue:`5863`) +- Bug in groupby dtype conversion with datetimelike (:issue:`5869`) +- Regression in handling of empty Series as indexers to Series (:issue:`5877`) +- Bug in internal caching, related to (:issue:`5727`) +- Testing bug in reading JSON/msgpack from a non-filepath on windows under py3 (:issue:`5874`) +- Bug when assigning to .ix[tuple(...)] (:issue:`5896`) +- Bug in fully reindexing a Panel (:issue:`5905`) +- Bug in idxmin/max with object dtypes (:issue:`5914`) +- Bug in ``BusinessDay`` when adding n days to a date not on offset when n>5 and n%5==0 (:issue:`5890`) +- Bug in assigning to chained series with a series via ix (:issue:`5928`) +- Bug in creating an empty DataFrame, copying, then assigning (:issue:`5932`) +- Bug in DataFrame.tail with empty frame (:issue:`5846`) +- Bug in propagating metadata on ``resample`` (:issue:`5862`) +- Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`) +- Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`) +- ``pd.match`` not returning passed sentinel +- ``Panel.to_frame()`` no longer fails when ``major_axis`` is a + ``MultiIndex`` (:issue:`5402`). +- Bug in ``pd.read_msgpack`` with inferring a ``DateTimeIndex`` frequency + incorrectly (:issue:`5947`) +- Fixed ``to_datetime`` for array with both Tz-aware datetimes and ``NaT``'s (:issue:`5961`) +- Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`) +- Bug in scipy ``interpolate`` methods with a datetime index (:issue:`5975`) +- Bug in NaT comparison if a mixed datetime/np.datetime64 with NaT were passed (:issue:`5968`) +- Fixed bug with ``pd.concat`` losing dtype information if all inputs are empty (:issue:`5742`) +- Recent changes in IPython cause warnings to be emitted when using previous versions + of pandas in QTConsole, now fixed. If you're using an older version and + need to suppress the warnings, see (:issue:`5922`). +- Bug in merging ``timedelta`` dtypes (:issue:`5695`) +- Bug in plotting.scatter_matrix function. Wrong alignment among diagonal + and off-diagonal plots, see (:issue:`5497`). +- Regression in Series with a MultiIndex via ix (:issue:`6018`) +- Bug in Series.xs with a MultiIndex (:issue:`6018`) +- Bug in Series construction of mixed type with datelike and an integer (which should result in + object type and not automatic conversion) (:issue:`6028`) +- Possible segfault when chained indexing with an object array under NumPy 1.7.1 (:issue:`6026`, :issue:`6056`) +- Bug in setting using fancy indexing a single element with a non-scalar (e.g. a list), + (:issue:`6043`) +- ``to_sql`` did not respect ``if_exists`` (:issue:`4110` :issue:`4304`) +- Regression in ``.get(None)`` indexing from 0.12 (:issue:`5652`) +- Subtle ``iloc`` indexing bug, surfaced in (:issue:`6059`) +- Bug with insert of strings into DatetimeIndex (:issue:`5818`) +- Fixed unicode bug in to_html/HTML repr (:issue:`6098`) +- Fixed missing arg validation in get_options_data (:issue:`6105`) +- Bug in assignment with duplicate columns in a frame where the locations + are a slice (e.g. next to each other) (:issue:`6120`) +- Bug in propagating _ref_locs during construction of a DataFrame with dups + index/columns (:issue:`6121`) +- Bug in ``DataFrame.apply`` when using mixed datelike reductions (:issue:`6125`) +- Bug in ``DataFrame.append`` when appending a row with different columns (:issue:`6129`) +- Bug in DataFrame construction with recarray and non-ns datetime dtype (:issue:`6140`) +- Bug in ``.loc`` setitem indexing with a dataframe on rhs, multiple item setting, and + a datetimelike (:issue:`6152`) +- Fixed a bug in ``query``/``eval`` during lexicographic string comparisons (:issue:`6155`). +- Fixed a bug in ``query`` where the index of a single-element ``Series`` was + being thrown away (:issue:`6148`). +- Bug in ``HDFStore`` on appending a dataframe with MultiIndexed columns to + an existing table (:issue:`6167`) +- Consistency with dtypes in setting an empty DataFrame (:issue:`6171`) +- Bug in selecting on a MultiIndex ``HDFStore`` even in the presence of under + specified column spec (:issue:`6169`) +- Bug in ``nanops.var`` with ``ddof=1`` and 1 elements would sometimes return ``inf`` + rather than ``nan`` on some platforms (:issue:`6136`) +- Bug in Series and DataFrame bar plots ignoring the ``use_index`` keyword (:issue:`6209`) +- Bug in groupby with mixed str/int under python3 fixed; ``argsort`` was failing (:issue:`6212`) + +.. _whatsnew_0.13.1.contributors: + +Contributors +~~~~~~~~~~~~ -See the :ref:`full release notes -` or issue tracker -on GitHub for a complete list of all API changes, Enhancements and Bug Fixes. +.. contributors:: v0.13.0..v0.13.1 diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.rst similarity index 99% rename from doc/source/whatsnew/v0.14.0.txt rename to doc/source/whatsnew/v0.14.0.rst index d4b7b09c054d6..9606bbac2a1b3 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.rst @@ -3,6 +3,8 @@ v0.14.0 (May 31 , 2014) ----------------------- +{{ common_imports }} + This is a major release from 0.13.1 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -249,13 +251,13 @@ Display Changes constraints were reached and an ellipse (...) signaled that part of the data was cut off. - .. image:: _static/trunc_before.png + .. image:: ../_static/trunc_before.png :alt: The previous look of truncate. In the current version, large DataFrames are centrally truncated, showing a preview of head and tail in both dimensions. - .. image:: _static/trunc_after.png + .. image:: ../_static/trunc_after.png :alt: The new look. - allow option ``'truncate'`` for ``display.show_dimensions`` to only show the dimensions if the @@ -1047,3 +1049,11 @@ Bug Fixes - Bug in expressions evaluation with reversed ops, showing in series-dataframe ops (:issue:`7198`, :issue:`7192`) - Bug in multi-axis indexing with > 2 ndim and a MultiIndex (:issue:`7199`) - Fix a bug where invalid eval/query operations would blow the stack (:issue:`5198`) + + +.. _whatsnew_0.14.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.13.1..v0.14.0 diff --git a/doc/source/whatsnew/v0.14.1.txt b/doc/source/whatsnew/v0.14.1.rst similarity index 99% rename from doc/source/whatsnew/v0.14.1.txt rename to doc/source/whatsnew/v0.14.1.rst index d019cf54086c6..3b0ff5650d90d 100644 --- a/doc/source/whatsnew/v0.14.1.txt +++ b/doc/source/whatsnew/v0.14.1.rst @@ -3,6 +3,8 @@ v0.14.1 (July 11, 2014) ----------------------- +{{ common_imports }} + This is a minor release from 0.14.0 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -269,3 +271,11 @@ Bug Fixes - Bug in grouped `hist` doesn't handle `rot` kw and `sharex` kw properly (:issue:`7234`) - Bug in ``.loc`` performing fallback integer indexing with ``object`` dtype indices (:issue:`7496`) - Bug (regression) in ``PeriodIndex`` constructor when passed ``Series`` objects (:issue:`7701`). + + +.. _whatsnew_0.14.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.14.0..v0.14.1 diff --git a/doc/source/whatsnew/v0.15.0.txt b/doc/source/whatsnew/v0.15.0.rst similarity index 99% rename from doc/source/whatsnew/v0.15.0.txt rename to doc/source/whatsnew/v0.15.0.rst index 4be6975958af5..00eda927a9c73 100644 --- a/doc/source/whatsnew/v0.15.0.txt +++ b/doc/source/whatsnew/v0.15.0.rst @@ -3,6 +3,8 @@ v0.15.0 (October 18, 2014) -------------------------- +{{ common_imports }} + This is a major release from 0.14.1 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -1216,3 +1218,11 @@ Bug Fixes - Suppress FutureWarning generated by NumPy when comparing object arrays containing NaN for equality (:issue:`7065`) - Bug in ``DataFrame.eval()`` where the dtype of the ``not`` operator (``~``) was not correctly inferred as ``bool``. + + +.. _whatsnew_0.15.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.14.1..v0.15.0 diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.rst similarity index 98% rename from doc/source/whatsnew/v0.15.1.txt rename to doc/source/whatsnew/v0.15.1.rst index 8cbf239ea20d0..88127d4e1b8d8 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.rst @@ -3,6 +3,8 @@ v0.15.1 (November 9, 2014) -------------------------- +{{ common_imports }} + This is a minor bug-fix release from 0.15.0 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -304,3 +306,11 @@ Bug Fixes - Bug in Setting by indexer to a scalar value with a mixed-dtype `Panel4d` was failing (:issue:`8702`) - Bug where ``DataReader``'s would fail if one of the symbols passed was invalid. Now returns data for valid symbols and np.nan for invalid (:issue:`8494`) - Bug in ``get_quote_yahoo`` that wouldn't allow non-float return values (:issue:`5229`). + + +.. _whatsnew_0.15.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.0..v0.15.1 diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.rst similarity index 99% rename from doc/source/whatsnew/v0.15.2.txt rename to doc/source/whatsnew/v0.15.2.rst index ee72fab7d23f2..dd988cde88145 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.rst @@ -3,6 +3,8 @@ v0.15.2 (December 12, 2014) --------------------------- +{{ common_imports }} + This is a minor release from 0.15.1 and includes a large number of bug fixes along with several new features, enhancements, and performance improvements. A small number of API changes were necessary to fix existing bugs. @@ -238,3 +240,11 @@ Bug Fixes - Bug in plotting if sharex was enabled and index was a timeseries, would show labels on multiple axes (:issue:`3964`). - Bug where passing a unit to the TimedeltaIndex constructor applied the to nano-second conversion twice. (:issue:`9011`). - Bug in plotting of a period-like array (:issue:`9012`) + + +.. _whatsnew_0.15.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.1..v0.15.2 diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.rst similarity index 99% rename from doc/source/whatsnew/v0.16.0.txt rename to doc/source/whatsnew/v0.16.0.rst index ce525bbb4c1d6..d394b43a7ec88 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.rst @@ -3,6 +3,8 @@ v0.16.0 (March 22, 2015) ------------------------ +{{ common_imports }} + This is a major release from 0.15.2 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -74,7 +76,7 @@ calculate the ratio, and plot PetalRatio = lambda x: x.PetalWidth / x.PetalLength) .plot(kind='scatter', x='SepalRatio', y='PetalRatio')) -.. image:: _static/whatsnew_assign.png +.. image:: ../_static/whatsnew_assign.png :scale: 50 % See the :ref:`documentation ` for more. (:issue:`9229`) @@ -675,3 +677,11 @@ Bug Fixes df1 = DataFrame({'x': Series(['a','b','c']), 'y': Series(['d','e','f'])}) df2 = df1[['x']] df2['y'] = ['g', 'h', 'i'] + + +.. _whatsnew_0.16.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.2..v0.16.0 diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.rst similarity index 99% rename from doc/source/whatsnew/v0.16.1.txt rename to doc/source/whatsnew/v0.16.1.rst index d3a8064a0e786..aae96a5d63c14 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.rst @@ -3,6 +3,8 @@ v0.16.1 (May 11, 2015) ---------------------- +{{ common_imports }} + This is a minor bug-fix release from 0.16.0 and includes a a large number of bug fixes along several new features, enhancements, and performance improvements. We recommend that all users upgrade to this version. @@ -465,3 +467,11 @@ Bug Fixes - Bug in subclassed ``DataFrame``. It may not return the correct class, when slicing or subsetting it. (:issue:`9632`) - Bug in ``.median()`` where non-float null values are not handled correctly (:issue:`10040`) - Bug in Series.fillna() where it raises if a numerically convertible string is given (:issue:`10092`) + + +.. _whatsnew_0.16.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.0..v0.16.1 diff --git a/doc/source/whatsnew/v0.16.2.txt b/doc/source/whatsnew/v0.16.2.rst similarity index 98% rename from doc/source/whatsnew/v0.16.2.txt rename to doc/source/whatsnew/v0.16.2.rst index 047da4c94093b..acae3a55d5f78 100644 --- a/doc/source/whatsnew/v0.16.2.txt +++ b/doc/source/whatsnew/v0.16.2.rst @@ -3,6 +3,8 @@ v0.16.2 (June 12, 2015) ----------------------- +{{ common_imports }} + This is a minor bug-fix release from 0.16.1 and includes a a large number of bug fixes along some new features (:meth:`~DataFrame.pipe` method), enhancements, and performance improvements. @@ -165,3 +167,11 @@ Bug Fixes - Bug in ``read_hdf`` where open stores could not be used (:issue:`10330`). - Bug in adding empty ``DataFrames``, now results in a ``DataFrame`` that ``.equals`` an empty ``DataFrame`` (:issue:`10181`). - Bug in ``to_hdf`` and ``HDFStore`` which did not check that complib choices were valid (:issue:`4582`, :issue:`8874`). + + +.. _whatsnew_0.16.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.1..v0.16.2 diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.rst similarity index 99% rename from doc/source/whatsnew/v0.17.0.txt rename to doc/source/whatsnew/v0.17.0.rst index 404f2bf06e861..abde8d953f4df 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.rst @@ -3,6 +3,8 @@ v0.17.0 (October 9, 2015) ------------------------- +{{ common_imports }} + This is a major release from 0.16.2 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -160,7 +162,7 @@ To alleviate this issue, we have added a new, optional plotting interface, which In [14]: df.plot.bar() -.. image:: _static/whatsnew_plot_submethods.png +.. image:: ../_static/whatsnew_plot_submethods.png As a result of this change, these methods are now all discoverable via tab-completion: @@ -313,11 +315,11 @@ has been changed to make this keyword unnecessary - the change is shown below. **Old** -.. image:: _static/old-excel-index.png +.. image:: ../_static/old-excel-index.png **New** -.. image:: _static/new-excel-index.png +.. image:: ../_static/new-excel-index.png .. warning:: @@ -354,14 +356,14 @@ Some East Asian countries use Unicode characters its width is corresponding to 2 df = pd.DataFrame({u'国籍': ['UK', u'日本'], u'名前': ['Alice', u'しのぶ']}) df; -.. image:: _static/option_unicode01.png +.. image:: ../_static/option_unicode01.png .. ipython:: python pd.set_option('display.unicode.east_asian_width', True) df; -.. image:: _static/option_unicode02.png +.. image:: ../_static/option_unicode02.png For further details, see :ref:`here ` @@ -1167,3 +1169,11 @@ Bug Fixes - Bug in ``.groupby`` when number of keys to group by is same as length of index (:issue:`11185`) - Bug in ``convert_objects`` where converted values might not be returned if all null and ``coerce`` (:issue:`9589`) - Bug in ``convert_objects`` where ``copy`` keyword was not respected (:issue:`9589`) + + +.. _whatsnew_0.17.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.2..v0.17.0 diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.rst similarity index 98% rename from doc/source/whatsnew/v0.17.1.txt rename to doc/source/whatsnew/v0.17.1.rst index 328a8193c8b13..44554a88fba04 100644 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.rst @@ -3,6 +3,8 @@ v0.17.1 (November 21, 2015) --------------------------- +{{ common_imports }} + .. note:: We are proud to announce that *pandas* has become a sponsored project of the (`NumFOCUS organization`_). This will help ensure the success of development of *pandas* as a world-class open-source project. @@ -202,3 +204,11 @@ Bug Fixes - Bug in ``DataFrame.to_sparse()`` loses column names for MultiIndexes (:issue:`11600`) - Bug in ``DataFrame.round()`` with non-unique column index producing a Fatal Python error (:issue:`11611`) - Bug in ``DataFrame.round()`` with ``decimals`` being a non-unique indexed Series producing extra columns (:issue:`11618`) + + +.. _whatsnew_0.17.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.17.0..v0.17.1 diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.rst similarity index 99% rename from doc/source/whatsnew/v0.18.0.txt rename to doc/source/whatsnew/v0.18.0.rst index e38ba54d4b058..5cd4163b1a7a5 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.rst @@ -3,6 +3,8 @@ v0.18.0 (March 13, 2016) ------------------------ +{{ common_imports }} + This is a major release from 0.17.1 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -1290,3 +1292,11 @@ Bug Fixes - Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`) - Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`) - Bug when setting columns of a ``DataFrame`` with duplicate column names (:issue:`12344`) + + +.. _whatsnew_0.18.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.17.1..v0.18.0 diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.rst similarity index 99% rename from doc/source/whatsnew/v0.18.1.txt rename to doc/source/whatsnew/v0.18.1.rst index 34921505a46bf..1dc01d7f1f745 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.rst @@ -3,6 +3,8 @@ v0.18.1 (May 3, 2016) --------------------- +{{ common_imports }} + This is a minor bug-fix release from 0.18.0 and includes a large number of bug fixes along with several new features, enhancements, and performance improvements. We recommend that all users upgrade to this version. @@ -266,7 +268,7 @@ These changes conform sparse handling to return the correct types and work to ma ``SparseArray.take`` now returns a scalar for scalar input, ``SparseArray`` for others. Furthermore, it handles a negative indexer with the same rule as ``Index`` (:issue:`10560`, :issue:`12796`) -.. ipython:: python +.. code-block:: python s = pd.SparseArray([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) s.take(0) @@ -692,3 +694,11 @@ Bug Fixes - Bug in ``pd.to_numeric()`` with ``Index`` returns ``np.ndarray``, rather than ``Index`` (:issue:`12777`) - Bug in ``pd.to_numeric()`` with datetime-like may raise ``TypeError`` (:issue:`12777`) - Bug in ``pd.to_numeric()`` with scalar raises ``ValueError`` (:issue:`12777`) + + +.. _whatsnew_0.18.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.18.0..v0.18.1 diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.rst similarity index 99% rename from doc/source/whatsnew/v0.19.0.txt rename to doc/source/whatsnew/v0.19.0.rst index 73fb124afef87..467319a4527d1 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.rst @@ -3,6 +3,8 @@ v0.19.0 (October 2, 2016) ------------------------- +{{ common_imports }} + This is a major release from 0.18.1 and includes number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -1564,3 +1566,11 @@ Bug Fixes - ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) + + +.. _whatsnew_0.19.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.18.1..v0.19.0 diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.rst similarity index 97% rename from doc/source/whatsnew/v0.19.1.txt rename to doc/source/whatsnew/v0.19.1.rst index 1c577dddf1cd4..0c909fa4195d7 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.rst @@ -3,6 +3,8 @@ v0.19.1 (November 3, 2016) -------------------------- +{{ common_imports }} + This is a minor bug-fix release from 0.19.0 and includes some small regression fixes, bug fixes and performance improvements. We recommend that all users upgrade to this version. @@ -59,3 +61,11 @@ Bug Fixes - Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`) - Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` is not scalar and ``values`` is not specified (:issue:`14380`) + + +.. _whatsnew_0.19.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.0..v0.19.1 diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.rst similarity index 97% rename from doc/source/whatsnew/v0.19.2.txt rename to doc/source/whatsnew/v0.19.2.rst index 171d97b76de75..1cded6d2c94e2 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.rst @@ -3,6 +3,8 @@ v0.19.2 (December 24, 2016) --------------------------- +{{ common_imports }} + This is a minor bug-fix release in the 0.19.x series and includes some small regression fixes, bug fixes and performance improvements. We recommend that all users upgrade to this version. @@ -80,3 +82,11 @@ Bug Fixes - Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) - Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`) - Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`) + + +.. _whatsnew_0.19.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.1..v0.19.2 diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.rst similarity index 99% rename from doc/source/whatsnew/v0.20.0.txt rename to doc/source/whatsnew/v0.20.0.rst index 9f5fbdc195f34..8456449ee4419 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.rst @@ -3,6 +3,8 @@ v0.20.1 (May 5, 2017) --------------------- +{{ common_imports }} + This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -381,7 +383,7 @@ For example, after running the following, ``styled.xlsx`` renders as below: highlight_max() styled.to_excel('styled.xlsx', engine='openpyxl') -.. image:: _static/style-excel.png +.. image:: ../_static/style-excel.png .. ipython:: python :suppress: @@ -1731,3 +1733,11 @@ Other - Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) - Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) - Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) + + +.. _whatsnew_0.20.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.2..v0.20.0 diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.rst similarity index 97% rename from doc/source/whatsnew/v0.20.2.txt rename to doc/source/whatsnew/v0.20.2.rst index 3de6fbc8afaf8..784cd09edff30 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.rst @@ -3,6 +3,8 @@ v0.20.2 (June 4, 2017) ---------------------- +{{ common_imports }} + This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, bug fixes and performance improvements. We recommend that all users upgrade to this version. @@ -125,3 +127,11 @@ Other ^^^^^ - Bug in ``DataFrame.drop()`` with an empty-list with non-unique indices (:issue:`16270`) + + +.. _whatsnew_0.20.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.0..v0.20.2 diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.rst similarity index 95% rename from doc/source/whatsnew/v0.20.3.txt rename to doc/source/whatsnew/v0.20.3.rst index 582f975f81a7a..47bfcc761b088 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.rst @@ -3,6 +3,8 @@ v0.20.3 (July 7, 2017) ----------------------- +{{ common_imports }} + This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes and bug fixes. We recommend that all users upgrade to this version. @@ -58,3 +60,11 @@ Categorical ^^^^^^^^^^^ - Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`) + + +.. _whatsnew_0.20.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.2..v0.20.3 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.rst similarity index 99% rename from doc/source/whatsnew/v0.21.0.txt rename to doc/source/whatsnew/v0.21.0.rst index 77ae5b92d0e70..c9a90f3ada7e5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.rst @@ -3,6 +3,8 @@ v0.21.0 (October 27, 2017) -------------------------- +{{ common_imports }} + This is a major release from 0.20.3 and includes a number of API changes, deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -1176,3 +1178,11 @@ Other - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) + + +.. _whatsnew_0.21.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.3..v0.21.0 diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.rst similarity index 98% rename from doc/source/whatsnew/v0.21.1.txt rename to doc/source/whatsnew/v0.21.1.rst index 49e59c9ddf5a7..bf13d5d67ed63 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.rst @@ -3,6 +3,8 @@ v0.21.1 (December 12, 2017) --------------------------- +{{ common_imports }} + This is a minor bug-fix release in the 0.21.x series and includes some small regression fixes, bug fixes and performance improvements. We recommend that all users upgrade to this version. @@ -169,3 +171,11 @@ String ^^^^^^ - :meth:`Series.str.split()` will now propagate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`) + + +.. _whatsnew_0.21.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.21.0..v0.21.1 diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.rst similarity index 98% rename from doc/source/whatsnew/v0.22.0.txt rename to doc/source/whatsnew/v0.22.0.rst index d165339cb0de9..f05b84a9d8902 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.rst @@ -3,6 +3,8 @@ v0.22.0 (December 29, 2017) --------------------------- +{{ common_imports }} + This is a major release from 0.21.1 and includes a single, API-breaking change. We recommend that all users upgrade to this version after carefully reading the release note (singular!). @@ -241,3 +243,11 @@ With conda, use Note that the inconsistency in the return value for all-*NA* series is still there for pandas 0.20.3 and earlier. Avoiding pandas 0.21 will only help with the empty case. + + +.. _whatsnew_0.22.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.21.1..v0.22.0 diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.rst similarity index 99% rename from doc/source/whatsnew/v0.23.0.txt rename to doc/source/whatsnew/v0.23.0.rst index 473a4bb72e6d9..f84517a3e3b9c 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.rst @@ -1,7 +1,9 @@ .. _whatsnew_0230: -v0.23.0 (May 15, 2018) ----------------------- +What's new in 0.23.0 (May 15, 2018) +----------------------------------- + +{{ common_imports }} This is a major release from 0.22.0 and includes a number of API changes, deprecations, new features, enhancements, and performance improvements along @@ -908,7 +910,7 @@ frames would not fit within the terminal width, and pandas would introduce line breaks to display these 20 columns. This resulted in an output that was relatively difficult to read: -.. image:: _static/print_df_old.png +.. image:: ../_static/print_df_old.png If Python runs in a terminal, the maximum number of columns is now determined automatically so that the printed data frame fits within the current terminal @@ -918,7 +920,7 @@ well as in many IDEs), this value cannot be inferred automatically and is thus set to `20` as in previous versions. In a terminal, this results in a much nicer output: -.. image:: _static/print_df_new.png +.. image:: ../_static/print_df_new.png Note that if you don't like the new default, you can always set this option yourself. To revert to the old setting, you can run this line: @@ -1412,3 +1414,10 @@ Other - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) - Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) - Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) + +.. _whatsnew_0.23.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.22.0..v0.23.0 diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.rst similarity index 97% rename from doc/source/whatsnew/v0.23.1.txt rename to doc/source/whatsnew/v0.23.1.rst index 1a514ba627fcb..e8e0060c48337 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.rst @@ -1,7 +1,9 @@ .. _whatsnew_0231: -v0.23.1 (June 12, 2018) ------------------------ +What's New in 0.23.1 (June 12, 2018) +------------------------------------ + +{{ common_imports }} This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes and bug fixes. We recommend that all users upgrade to this version. @@ -138,3 +140,10 @@ Bug Fixes - Tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) - Bug preventing pandas being used on Windows without C++ redistributable installed (:issue:`21106`) + +.. _whatsnew_0.23.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.0..v0.23.1 diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.rst similarity index 81% rename from doc/source/whatsnew/v0.23.2.txt rename to doc/source/whatsnew/v0.23.2.rst index 7ec6e2632e717..573a30f17846b 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.rst @@ -1,7 +1,9 @@ .. _whatsnew_0232: -v0.23.2 (July 5, 2018) ----------------------- +What's New in 0.23.2 (July 5, 2018) +----------------------------------- + +{{ common_imports }} This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes and bug fixes. We recommend that all users upgrade to this version. @@ -101,8 +103,20 @@ Bug Fixes **Timezones** - Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`) -- Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`) +- Bug in comparing :class:`DataFrame` with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`) +- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`) +- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError`` (:issue:`8910`) +- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`) +- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) +- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`) **Timedelta** - Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`) + +.. _whatsnew_0.23.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.1..v0.23.2 diff --git a/doc/source/whatsnew/v0.23.3.rst b/doc/source/whatsnew/v0.23.3.rst new file mode 100644 index 0000000000000..29758e54b437b --- /dev/null +++ b/doc/source/whatsnew/v0.23.3.rst @@ -0,0 +1,16 @@ +.. _whatsnew_0233: + +What's New in 0.23.3 (July 7, 2018) +----------------------------------- + +{{ common_imports }} + +This release fixes a build issue with the sdist for Python 3.7 (:issue:`21785`) +There are no other changes. + +.. _whatsnew_0.23.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.2..v0.23.3 diff --git a/doc/source/whatsnew/v0.23.3.txt b/doc/source/whatsnew/v0.23.3.txt deleted file mode 100644 index b8adce27d2523..0000000000000 --- a/doc/source/whatsnew/v0.23.3.txt +++ /dev/null @@ -1,7 +0,0 @@ -.. _whatsnew_0233: - -v0.23.3 (July 7, 2018) ----------------------- - -This release fixes a build issue with the sdist for Python 3.7 (:issue:`21785`) -There are no other changes. diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.rst similarity index 84% rename from doc/source/whatsnew/v0.23.4.txt rename to doc/source/whatsnew/v0.23.4.rst index 9a3ad3f61ee49..c8f08d0bb7091 100644 --- a/doc/source/whatsnew/v0.23.4.txt +++ b/doc/source/whatsnew/v0.23.4.rst @@ -1,7 +1,9 @@ .. _whatsnew_0234: -v0.23.4 (August 3, 2018) ------------------------- +What's New in 0.23.4 (August 3, 2018) +------------------------------------- + +{{ common_imports }} This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes and bug fixes. We recommend that all users upgrade to this version. @@ -35,3 +37,10 @@ Bug Fixes **Missing** - Bug in :func:`Series.clip` and :func:`DataFrame.clip` cannot accept list-like threshold containing ``NaN`` (:issue:`19992`) + +.. _whatsnew_0.23.4.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.3..v0.23.4 diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.rst similarity index 95% rename from doc/source/whatsnew/v0.24.0.txt rename to doc/source/whatsnew/v0.24.0.rst index e876bb37eb060..24f7bcc4aa805 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1,24 +1,28 @@ .. _whatsnew_0240: -v0.24.0 (Month XX, 2018) ------------------------- +What's New in 0.24.0 (Month XX, 2018) +------------------------------------- .. warning:: Starting January 1, 2019, pandas feature releases will support Python 3 only. See :ref:`install.dropping-27` for more. +{{ common_imports }} + +These are the changes in pandas 0.24.0. See :ref:`release` for a full changelog +including other versions of pandas. + .. _whatsnew_0240.enhancements: New features ~~~~~~~~~~~~ - :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) -- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups -` for more information (:issue:`15475`, :issue:`15506`) +- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups ` for more information (:issue:`15475`, :issue:`15506`). - :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing -the user to override the engine's default behavior to include or omit the -dataframe's indexes from the resulting Parquet file. (:issue:`20768`) + the user to override the engine's default behavior to include or omit the + dataframe's indexes from the resulting Parquet file. (:issue:`20768`) - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) - :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`) @@ -207,6 +211,7 @@ See the :ref:`advanced docs on renaming` for more details. Other Enhancements ^^^^^^^^^^^^^^^^^^ + - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) - :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether ``NaN``/``NaT`` values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) @@ -228,7 +233,7 @@ Other Enhancements - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) -- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`22647`) +- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`22647`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`pandas.core.dtypes.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, @@ -238,8 +243,10 @@ Other Enhancements - Compatibility with Matplotlib 3.0 (:issue:`22790`). - Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) -- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexsistent` (:issue:`8917`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`) - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) +- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. +- :meth:`DataFrame.to_stata` and :class:` pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) .. _whatsnew_0240.api_breaking: @@ -284,10 +291,10 @@ and replaced it with references to `pyarrow` (:issue:`21639` and :issue:`23053`) .. _whatsnew_0240.api_breaking.csv_line_terminator: `os.linesep` is used for ``line_terminator`` of ``DataFrame.to_csv`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :func:`DataFrame.to_csv` now uses :func:`os.linesep` rather than ``'\n'`` - for the default line terminator (:issue:`20353`). +for the default line terminator (:issue:`20353`). This change only affects when running on Windows, where ``'\r\n'`` was used for line terminator even when ``'\n'`` was passed in ``line_terminator``. @@ -295,26 +302,26 @@ Previous Behavior on Windows: .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: # When passing file PATH to to_csv, line_terminator does not work, and csv is saved with '\r\n'. - ...: # Also, this converts all '\n's in the data to '\r\n'. - ...: data.to_csv("test.csv", index=False, line_terminator='\n') + In [2]: # When passing file PATH to to_csv, line_terminator does not work, and csv is saved with '\r\n'. + ...: # Also, this converts all '\n's in the data to '\r\n'. + ...: data.to_csv("test.csv", index=False, line_terminator='\n') -In [3]: with open("test.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\r\n"a\r\nbc","a\r\r\nbc"\r\n' + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\r\n"a\r\nbc","a\r\r\nbc"\r\n' -In [4]: # When passing file OBJECT with newline option to to_csv, line_terminator works. - ...: with open("test2.csv", mode='w', newline='\n') as f: - ...: data.to_csv(f, index=False, line_terminator='\n') + In [4]: # When passing file OBJECT with newline option to to_csv, line_terminator works. + ...: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False, line_terminator='\n') -In [5]: with open("test2.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + In [5]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' New Behavior on Windows: @@ -323,54 +330,54 @@ New Behavior on Windows: - The value of ``line_terminator`` only affects the line terminator of CSV, so it does not change the value inside the data. -.. code-block:: ipython + .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: data.to_csv("test.csv", index=False, line_terminator='\n') + In [2]: data.to_csv("test.csv", index=False, line_terminator='\n') -In [3]: with open("test.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' - On Windows, the value of ``os.linesep`` is ``'\r\n'``, so if ``line_terminator`` is not set, ``'\r\n'`` is used for line terminator. - Again, it does not affect the value inside the data. -.. code-block:: ipython + .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: data.to_csv("test.csv", index=False) + In [2]: data.to_csv("test.csv", index=False) -In [3]: with open("test.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' - For files objects, specifying ``newline`` is not sufficient to set the line terminator. You must pass in the ``line_terminator`` explicitly, even in this case. -.. code-block:: ipython + .. code-block:: ipython -In [1]: data = pd.DataFrame({ - ...: "string_with_lf": ["a\nbc"], - ...: "string_with_crlf": ["a\r\nbc"] - ...: }) + In [1]: data = pd.DataFrame({ + ...: "string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"] + ...: }) -In [2]: with open("test2.csv", mode='w', newline='\n') as f: - ...: data.to_csv(f, index=False) + In [2]: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False) -In [3]: with open("test2.csv", mode='rb') as f: - ...: print(f.read()) -b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + In [3]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' .. _whatsnew_0240.api_breaking.interval_values: @@ -778,17 +785,20 @@ Previous Behavior: df = pd.DataFrame(arr) .. ipython:: python + # Comparison operations and arithmetic operations both broadcast. df == arr[[0], :] df + arr[[0], :] .. ipython:: python + # Comparison operations and arithmetic operations both broadcast. df == (1, 2) df + (1, 2) .. ipython:: python :okexcept: + # Comparison operations and arithmetic opeartions both raise ValueError. df == (1, 2, 3) df + (1, 2, 3) @@ -798,8 +808,9 @@ Previous Behavior: DataFrame Arithmetic Operations Broadcasting Changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + :class:`DataFrame` arithmetic operations when operating with 2-dimensional -``np.ndarray`` objects now broadcast in the same way as ``np.ndarray``s +``np.ndarray`` objects now broadcast in the same way as ``np.ndarray`` broadcast. (:issue:`23000`) Previous Behavior: @@ -818,11 +829,13 @@ Previous Behavior: *Current Behavior*: .. ipython:: python + arr = np.arange(6).reshape(3, 2) df = pd.DataFrame(arr) df .. ipython:: python + df + arr[[0], :] # 1 row, 2 columns df + arr[:, [1]] # 1 column, 3 rows @@ -889,7 +902,7 @@ Current Behavior: ... OverflowError: Trying to coerce negative values to unsigned integers -.. _whatsnew_0240.api.crosstab_dtypes +.. _whatsnew_0240.api.crosstab_dtypes: Crosstab Preserves Dtypes ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1010,6 +1023,7 @@ Current Behavior: .. ipython:: python :okwarning: + per = pd.Period('2016Q1') per + 3 @@ -1169,7 +1183,7 @@ Timezones - Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) - Bug in ``DataFrame`` assignment with a timezone-aware scalar (:issue:`19843`) - Bug in :func:`DataFrame.asof` that raised a ``TypeError`` when attempting to compare tz-naive and tz-aware timestamps (:issue:`21194`) -- Bug when constructing a :class:`DatetimeIndex` with :class:`Timestamp`s constructed with the ``replace`` method across DST (:issue:`18785`) +- Bug when constructing a :class:`DatetimeIndex` with :class:`Timestamp` constructed with the ``replace`` method across DST (:issue:`18785`) - Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) @@ -1200,6 +1214,7 @@ Numeric - Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype arithmetic operations with ``ndarray`` with integer dtype incorrectly treating the narray as ``timedelta64[ns]`` dtype (:issue:`23114`) - Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`). - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`) Strings ^^^^^^^ @@ -1253,15 +1268,12 @@ MultiIndex ^^^^^^^^^^ - Removed compatibility for :class:`MultiIndex` pickles prior to version 0.8.0; compatibility with :class:`MultiIndex` pickles from version 0.13 forward is maintained (:issue:`21654`) -- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a :class:`MultiIndex`ed object) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`) +- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a ``Series`` or ``DataFrame`` with a :class:`MultiIndex` index) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`) - Fix ``TypeError`` in Python 3 when creating :class:`MultiIndex` in which some levels have mixed types, e.g. when some labels are tuples (:issue:`15457`) I/O ^^^ -- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) -- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) - .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine @@ -1297,6 +1309,9 @@ Current Behavior: Notice how we now instead output ``np.nan`` itself instead of a stringified form of it. +- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) +- Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`) - :func:`read_html()` no longer ignores all-whitespace ```` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`) - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`) @@ -1356,9 +1371,9 @@ Reshaping - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) - Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) -- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`) +- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue:`22796`) - Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) -- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`). +- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`). .. _whatsnew_0240.bug_fixes.sparse: @@ -1391,3 +1406,10 @@ Other - :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) - Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`) + +.. _whatsnew_0.24.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.4..HEAD diff --git a/doc/source/whatsnew/v0.4.x.txt b/doc/source/whatsnew/v0.4.x.rst similarity index 97% rename from doc/source/whatsnew/v0.4.x.txt rename to doc/source/whatsnew/v0.4.x.rst index ed9352059a6dc..e54614849c93b 100644 --- a/doc/source/whatsnew/v0.4.x.txt +++ b/doc/source/whatsnew/v0.4.x.rst @@ -3,6 +3,8 @@ v.0.4.3 through v0.4.1 (September 25 - October 9, 2011) ------------------------------------------------------- +{{ common_imports }} + New Features ~~~~~~~~~~~~ @@ -61,3 +63,7 @@ Performance Enhancements .. _ENHed: https://github.com/pandas-dev/pandas/commit/edd9f1945fc010a57fa0ae3b3444d1fffe592591 .. _ENH56: https://github.com/pandas-dev/pandas/commit/56e0c9ffafac79ce262b55a6a13e1b10a88fbe93 +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.4.1..v0.4.3 diff --git a/doc/source/whatsnew/v0.5.0.txt b/doc/source/whatsnew/v0.5.0.rst similarity index 96% rename from doc/source/whatsnew/v0.5.0.txt rename to doc/source/whatsnew/v0.5.0.rst index 6fe6a02b08f70..c6d17cb1e1290 100644 --- a/doc/source/whatsnew/v0.5.0.txt +++ b/doc/source/whatsnew/v0.5.0.rst @@ -4,6 +4,8 @@ v.0.5.0 (October 24, 2011) -------------------------- +{{ common_imports }} + New Features ~~~~~~~~~~~~ @@ -41,3 +43,11 @@ Performance Enhancements .. _ENH61: https://github.com/pandas-dev/pandas/commit/6141961 .. _ENH5c: https://github.com/pandas-dev/pandas/commit/5ca6ff5d822ee4ddef1ec0d87b6d83d8b4bbd3eb + + +.. _whatsnew_0.5.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.4.0..v0.5.0 diff --git a/doc/source/whatsnew/v0.6.0.txt b/doc/source/whatsnew/v0.6.0.rst similarity index 97% rename from doc/source/whatsnew/v0.6.0.txt rename to doc/source/whatsnew/v0.6.0.rst index bd01dd0a90a59..de45b3b383129 100644 --- a/doc/source/whatsnew/v0.6.0.txt +++ b/doc/source/whatsnew/v0.6.0.rst @@ -3,6 +3,8 @@ v.0.6.0 (November 25, 2011) --------------------------- +{{ common_imports }} + New Features ~~~~~~~~~~~~ - :ref:`Added ` ``melt`` function to ``pandas.core.reshape`` @@ -54,3 +56,11 @@ Performance Enhancements - VBENCH Significantly improved performance of ``Series.order``, which also makes np.unique called on a Series faster (:issue:`327`) - VBENCH Vastly improved performance of GroupBy on axes with a MultiIndex (:issue:`299`) + + +.. _whatsnew_0.6.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.5.0..v0.6.0 diff --git a/doc/source/whatsnew/v0.6.1.txt b/doc/source/whatsnew/v0.6.1.rst similarity index 96% rename from doc/source/whatsnew/v0.6.1.txt rename to doc/source/whatsnew/v0.6.1.rst index acd5b0774f2bb..d01757775d694 100644 --- a/doc/source/whatsnew/v0.6.1.txt +++ b/doc/source/whatsnew/v0.6.1.rst @@ -48,3 +48,11 @@ Performance improvements - Column deletion in DataFrame copies no data (computes views on blocks) (GH #158) + + +.. _whatsnew_0.6.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.6.0..v0.6.1 diff --git a/doc/source/whatsnew/v0.7.0.txt b/doc/source/whatsnew/v0.7.0.rst similarity index 98% rename from doc/source/whatsnew/v0.7.0.txt rename to doc/source/whatsnew/v0.7.0.rst index 21d91950e7b78..e278bc0738108 100644 --- a/doc/source/whatsnew/v0.7.0.txt +++ b/doc/source/whatsnew/v0.7.0.rst @@ -3,6 +3,8 @@ v.0.7.0 (February 9, 2012) -------------------------- +{{ common_imports }} + New features ~~~~~~~~~~~~ @@ -298,3 +300,11 @@ Performance improvements ``level`` parameter passed (:issue:`545`) - Ported skiplist data structure to C to speed up ``rolling_median`` by about 5-10x in most typical use cases (:issue:`374`) + + +.. _whatsnew_0.7.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.6.1..v0.7.0 diff --git a/doc/source/whatsnew/v0.7.1.txt b/doc/source/whatsnew/v0.7.1.rst similarity index 90% rename from doc/source/whatsnew/v0.7.1.txt rename to doc/source/whatsnew/v0.7.1.rst index bc12cb8d200cd..f1a133797fd59 100644 --- a/doc/source/whatsnew/v0.7.1.txt +++ b/doc/source/whatsnew/v0.7.1.rst @@ -3,6 +3,8 @@ v.0.7.1 (February 29, 2012) --------------------------- +{{ common_imports }} + This release includes a few new features and addresses over a dozen bugs in 0.7.0. @@ -28,3 +30,11 @@ Performance improvements - Improve performance and memory usage of fillna on DataFrame - Can concatenate a list of Series along axis=1 to obtain a DataFrame (:issue:`787`) + + +.. _whatsnew_0.7.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.0..v0.7.1 diff --git a/doc/source/whatsnew/v0.7.2.txt b/doc/source/whatsnew/v0.7.2.rst similarity index 89% rename from doc/source/whatsnew/v0.7.2.txt rename to doc/source/whatsnew/v0.7.2.rst index c711639354139..b870db956f4f1 100644 --- a/doc/source/whatsnew/v0.7.2.txt +++ b/doc/source/whatsnew/v0.7.2.rst @@ -3,6 +3,8 @@ v.0.7.2 (March 16, 2012) --------------------------- +{{ common_imports }} + This release targets bugs in 0.7.1, and adds a few minor features. New features @@ -25,3 +27,11 @@ Performance improvements - Use khash for Series.value_counts, add raw function to algorithms.py (:issue:`861`) - Intercept __builtin__.sum in groupby (:issue:`885`) + + +.. _whatsnew_0.7.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.1..v0.7.2 diff --git a/doc/source/whatsnew/v0.7.3.txt b/doc/source/whatsnew/v0.7.3.rst similarity index 92% rename from doc/source/whatsnew/v0.7.3.txt rename to doc/source/whatsnew/v0.7.3.rst index 77cc72d8707cf..30e22f105656c 100644 --- a/doc/source/whatsnew/v0.7.3.txt +++ b/doc/source/whatsnew/v0.7.3.rst @@ -3,6 +3,8 @@ v.0.7.3 (April 12, 2012) ------------------------ +{{ common_imports }} + This is a minor release from 0.7.2 and fixes many minor bugs and adds a number of nice new features. There are also a couple of API changes to note; these should not affect very many users, and we are inclined to call them "bug fixes" @@ -22,7 +24,7 @@ New features from pandas.tools.plotting import scatter_matrix scatter_matrix(df, alpha=0.2) -.. image:: savefig/scatter_matrix_kde.png +.. image:: ../savefig/scatter_matrix_kde.png :width: 5in - Add ``stacked`` argument to Series and DataFrame's ``plot`` method for @@ -32,14 +34,14 @@ New features df.plot(kind='bar', stacked=True) -.. image:: savefig/bar_plot_stacked_ex.png +.. image:: ../savefig/bar_plot_stacked_ex.png :width: 4in .. code-block:: python df.plot(kind='barh', stacked=True) -.. image:: savefig/barh_plot_stacked_ex.png +.. image:: ../savefig/barh_plot_stacked_ex.png :width: 4in - Add log x and y :ref:`scaling options ` to @@ -94,3 +96,11 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: grouped = df.groupby('A')['C'] grouped.describe() grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values + + +.. _whatsnew_0.7.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.2..v0.7.3 diff --git a/doc/source/whatsnew/v0.8.0.txt b/doc/source/whatsnew/v0.8.0.rst similarity index 99% rename from doc/source/whatsnew/v0.8.0.txt rename to doc/source/whatsnew/v0.8.0.rst index 28c043e772605..eedaaa3dfa8bd 100644 --- a/doc/source/whatsnew/v0.8.0.txt +++ b/doc/source/whatsnew/v0.8.0.rst @@ -3,6 +3,8 @@ v0.8.0 (June 29, 2012) ------------------------ +{{ common_imports }} + This is a major release from 0.7.3 and includes extensive work on the time series handling and processing infrastructure as well as a great deal of new functionality throughout the library. It includes over 700 commits from more @@ -269,3 +271,11 @@ unique. In many cases it will no longer fail (some method like ``append`` still check for uniqueness unless disabled). However, all is not lost: you can inspect ``index.is_unique`` and raise an exception explicitly if it is ``False`` or go to a different code branch. + + +.. _whatsnew_0.8.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.3..v0.8.0 diff --git a/doc/source/whatsnew/v0.8.1.txt b/doc/source/whatsnew/v0.8.1.rst similarity index 93% rename from doc/source/whatsnew/v0.8.1.txt rename to doc/source/whatsnew/v0.8.1.rst index add96bec9d1dd..468b99341163c 100644 --- a/doc/source/whatsnew/v0.8.1.txt +++ b/doc/source/whatsnew/v0.8.1.rst @@ -3,6 +3,8 @@ v0.8.1 (July 22, 2012) ---------------------- +{{ common_imports }} + This release includes a few new features, performance enhancements, and over 30 bug fixes from 0.8.0. New features include notably NA friendly string processing functionality and a series of new plot types and options. @@ -34,3 +36,11 @@ Performance improvements Categorical types - Significant datetime parsing performance improvements + + +.. _whatsnew_0.8.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.8.0..v0.8.1 diff --git a/doc/source/whatsnew/v0.9.0.txt b/doc/source/whatsnew/v0.9.0.rst similarity index 96% rename from doc/source/whatsnew/v0.9.0.txt rename to doc/source/whatsnew/v0.9.0.rst index b60fb9cc64f4a..ee4e8c338c984 100644 --- a/doc/source/whatsnew/v0.9.0.txt +++ b/doc/source/whatsnew/v0.9.0.rst @@ -1,9 +1,6 @@ .. _whatsnew_0900: -.. ipython:: python - :suppress: - - from pandas.compat import StringIO +{{ common_imports }} v0.9.0 (October 7, 2012) ------------------------ @@ -95,3 +92,11 @@ See the :ref:`full release notes ` or issue tracker on GitHub for a complete list. + + +.. _whatsnew_0.9.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.8.1..v0.9.0 diff --git a/doc/source/whatsnew/v0.9.1.txt b/doc/source/whatsnew/v0.9.1.rst similarity index 97% rename from doc/source/whatsnew/v0.9.1.txt rename to doc/source/whatsnew/v0.9.1.rst index 1f58170b30244..fe3de9be95a74 100644 --- a/doc/source/whatsnew/v0.9.1.txt +++ b/doc/source/whatsnew/v0.9.1.rst @@ -1,13 +1,10 @@ .. _whatsnew_0901: -.. ipython:: python - :suppress: - - from pandas.compat import StringIO - v0.9.1 (November 14, 2012) -------------------------- +{{ common_imports }} + This is a bug fix release from 0.9.0 and includes several new features and enhancements along with a large number of bug fixes. The new features include by-column sort order for DataFrame and Series, improved NA handling for the rank @@ -158,3 +155,11 @@ API changes See the :ref:`full release notes ` or issue tracker on GitHub for a complete list. + + +.. _whatsnew_0.9.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.9.0..v0.9.1 diff --git a/scripts/announce.py b/doc/sphinxext/announce.py similarity index 75% rename from scripts/announce.py rename to doc/sphinxext/announce.py index 7b7933eba54dd..6bc53d3e96d01 100755 --- a/scripts/announce.py +++ b/doc/sphinxext/announce.py @@ -33,19 +33,21 @@ $ ./scripts/announce.py $GITHUB v1.11.0..v1.11.1 > announce.rst """ -from __future__ import print_function, division +from __future__ import division, print_function +import codecs import os import re -import codecs +import textwrap + from git import Repo UTF8Writer = codecs.getwriter('utf8') -this_repo = Repo(os.path.join(os.path.dirname(__file__), "..")) +this_repo = Repo(os.path.join(os.path.dirname(__file__), "..", "..")) author_msg = """\ -A total of %d people contributed to this release. People with a "+" by their -names contributed a patch for the first time. +A total of %d people contributed patches to this release. People with a +"+" by their names contributed a patch for the first time. """ pull_request_msg = """\ @@ -98,19 +100,35 @@ def get_pull_requests(repo, revision_range): return prs -def main(revision_range, repo): +def build_components(revision_range, heading="Contributors"): lst_release, cur_release = [r.strip() for r in revision_range.split('..')] - - # document authors authors = get_authors(revision_range) - heading = u"Contributors" - print() - print(heading) - print(u"=" * len(heading)) - print(author_msg % len(authors)) - for s in authors: - print(u'* ' + s) + return { + 'heading': heading, + 'author_message': author_msg % len(authors), + 'authors': authors, + } + + +def build_string(revision_range, heading="Contributors"): + components = build_components(revision_range, heading=heading) + components['uline'] = '=' * len(components['heading']) + components['authors'] = "* " + "\n* ".join(components['authors']) + + tpl = textwrap.dedent("""\ + {heading} + {uline} + + {author_message} + {authors}""").format(**components) + return tpl + + +def main(revision_range): + # document authors + text = build_string(revision_range) + print(text) if __name__ == "__main__": @@ -118,7 +136,5 @@ def main(revision_range, repo): parser = ArgumentParser(description="Generate author lists for release") parser.add_argument('revision_range', help='..') - parser.add_argument('--repo', help="Github org/repository", - default="pandas-dev/pandas") args = parser.parse_args() - main(args.revision_range, args.repo) + main(args.revision_range) diff --git a/doc/sphinxext/contributors.py b/doc/sphinxext/contributors.py new file mode 100644 index 0000000000000..0f04d47435699 --- /dev/null +++ b/doc/sphinxext/contributors.py @@ -0,0 +1,40 @@ +"""Sphinx extension for listing code contributors to a release. + +Usage:: + + .. contributors:: v0.23.0..v0.23.1 + +This will be replaced with a message indicating the number of +code contributors and commits, and then list each contributor +individually. +""" +from docutils import nodes +from docutils.parsers.rst import Directive + +from announce import build_components + + +class ContributorsDirective(Directive): + required_arguments = 1 + name = 'contributors' + + def run(self): + components = build_components(self.arguments[0]) + + message = nodes.paragraph() + message += nodes.Text(components['author_message']) + + listnode = nodes.bullet_list() + + for author in components['authors']: + para = nodes.paragraph() + para += nodes.Text(author) + listnode += nodes.list_item('', para) + + return [message, listnode] + + +def setup(app): + app.add_directive('contributors', ContributorsDirective) + + return {'version': '0.1'} diff --git a/environment.yml b/environment.yml index f66625e6a60c7..fc35f1290f1b1 100644 --- a/environment.yml +++ b/environment.yml @@ -13,7 +13,8 @@ dependencies: - Cython>=0.28.2 - flake8 - flake8-comprehensions - - flake8-rst + - flake8-rst=0.4.2 + - gitpython - hypothesis>=3.58.0 - isort - moto diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 4d144dcf2808a..329c368e13d6d 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -53,7 +53,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', int tiebreak = 0 bint keep_na = 0 bint isnan - float count = 0.0 + float64_t count = 0.0 tiebreak = tiebreakers[ties_method] {{if dtype == 'float64'}} @@ -228,7 +228,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', float64_t sum_ranks = 0 int tiebreak = 0 bint keep_na = 0 - float count = 0.0 + float64_t count = 0.0 tiebreak = tiebreakers[ties_method] diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index aa53f5086b894..7be3bdbc1048a 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -116,7 +116,7 @@ cdef class IntervalTree(IntervalMixin): enclosing = self.get_loc(0.5 * (key_left + key_right)) combined = np.concatenate([left_overlap, right_overlap, enclosing]) uniques = pd.unique(combined) - return uniques + return uniques.astype('intp') def get_indexer(self, scalar_t[:] target): """Return the positions corresponding to unique intervals that overlap diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a284d8fb544e7..d651e75674239 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1739,7 +1739,7 @@ cdef class _Period(object): ------- Timestamp - See also + See Also -------- Period.end_time : Return the end Timestamp. Period.dayofyear : Return the day of year. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c09a8e5b395ee..ca8491726a5f7 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1111,14 +1111,14 @@ class Timedelta(_Timedelta): Parameters ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', - 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', - 'T', 'S', 'seconds', 'sec', 'second', 'ms', - 'milliseconds', 'millisecond', 'milli', 'millis', 'L', - 'us', 'microseconds', 'microsecond', 'micro', 'micros', - 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' - 'N'}, optional + unit : str, optional Denote the unit of the input, if input is an integer. Default 'ns'. + Possible values: + {'Y', 'M', 'W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h', + 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', 'sec', 'second', + 'ms', 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', 'U', + 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'} days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional Values for construction in compat with datetime.timedelta. diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 6694737737562..f2ae7f6b56551 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -11,7 +11,7 @@ class DirNamesMixin(object): - _accessors = frozenset([]) + _accessors = frozenset() _deprecations = frozenset( ['asobject', 'base', 'data', 'flags', 'itemsize', 'strides']) @@ -247,7 +247,7 @@ def plot(self): >>> ds.geo.plot() # plots data on a map -See also +See Also -------- %(others)s """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4363f3ccb14e2..333f4e5ea21b4 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -313,10 +313,10 @@ class Categorical(ExtensionArray, PandasObject): See the `user guide `_ for more. - See also + See Also -------- - pandas.api.types.CategoricalDtype : Type for categorical data - CategoricalIndex : An Index with an underlying ``Categorical`` + pandas.api.types.CategoricalDtype : Type for categorical data. + CategoricalIndex : An Index with an underlying ``Categorical``. """ # For comparisons, so that numpy uses our implementation if the compare @@ -457,7 +457,7 @@ def categories(self): If the new categories do not validate as categories or if the number of new categories is unequal the number of old categories - See also + See Also -------- rename_categories reorder_categories @@ -823,7 +823,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, ------- cat : Categorical with reordered categories or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -894,7 +894,7 @@ def rename_categories(self, new_categories, inplace=False): With ``inplace=False``, the new categorical is returned. With ``inplace=True``, there is no return value. - See also + See Also -------- reorder_categories add_categories @@ -971,7 +971,7 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False): ------- cat : Categorical with reordered categories or None if inplace. - See also + See Also -------- rename_categories add_categories @@ -1010,7 +1010,7 @@ def add_categories(self, new_categories, inplace=False): ------- cat : Categorical with new categories added or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -1058,7 +1058,7 @@ def remove_categories(self, removals, inplace=False): ------- cat : Categorical with removed categories or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -1100,7 +1100,7 @@ def remove_unused_categories(self, inplace=False): ------- cat : Categorical with unused categories dropped or None if inplace. - See also + See Also -------- rename_categories reorder_categories @@ -1364,11 +1364,11 @@ def isna(self): ------- a boolean array of whether my values are null - See also + See Also -------- - isna : top-level isna - isnull : alias of isna - Categorical.notna : boolean inverse of Categorical.isna + isna : Top-level isna. + isnull : Alias of isna. + Categorical.notna : Boolean inverse of Categorical.isna. """ @@ -1387,11 +1387,11 @@ def notna(self): ------- a boolean array of whether my values are not null - See also + See Also -------- - notna : top-level notna - notnull : alias of notna - Categorical.isna : boolean inverse of Categorical.notna + notna : Top-level notna. + notnull : Alias of notna. + Categorical.isna : Boolean inverse of Categorical.notna. """ return ~self.isna() @@ -1503,7 +1503,7 @@ def argsort(self, *args, **kwargs): ------- argsorted : numpy array - See also + See Also -------- numpy.ndarray.argsort @@ -2322,7 +2322,7 @@ def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a Categorical. - See also + See Also -------- numpy.ndarray.repeat @@ -2371,7 +2371,7 @@ def isin(self, values): See Also -------- - pandas.Series.isin : equivalent method on Series + pandas.Series.isin : Equivalent method on Series. Examples -------- diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index daf2dcccd284b..094c9c3df0bed 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -39,7 +39,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.common as com -from pandas.core.algorithms import checked_add_with_arr +from pandas.core.algorithms import checked_add_with_arr, take, unique1d from .base import ExtensionOpsMixin from pandas.util._decorators import deprecate_kwarg @@ -196,6 +196,67 @@ def astype(self, dtype, copy=True): return self._box_values(self.asi8) return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) + # ------------------------------------------------------------------ + # ExtensionArray Interface + # TODO: + # * _from_sequence + # * argsort / _values_for_argsort + # * _reduce + + def unique(self): + result = unique1d(self.asi8) + return type(self)(result, dtype=self.dtype) + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to an i8 representation, + raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : np.int64 + + Raises + ------ + ValueError + """ + raise AbstractMethodError(self) + + def take(self, indices, allow_fill=False, fill_value=None): + if allow_fill: + fill_value = self._validate_fill_value(fill_value) + + new_values = take(self.asi8, + indices, + allow_fill=allow_fill, + fill_value=fill_value) + + return type(self)(new_values, dtype=self.dtype) + + @classmethod + def _concat_same_type(cls, to_concat): + dtypes = {x.dtype for x in to_concat} + assert len(dtypes) == 1 + dtype = list(dtypes)[0] + + values = np.concatenate([x.asi8 for x in to_concat]) + return cls(values, dtype=dtype) + + def copy(self, deep=False): + values = self.asi8.copy() + return type(self)(values, dtype=self.dtype, freq=self.freq) + + def _values_for_factorize(self): + return self.asi8, iNaT + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a6f688fb0cf7a..04d6b5f161956 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -12,7 +12,7 @@ conversion, fields, timezones, resolution as libresolution) -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import cache_readonly, Appender from pandas.errors import PerformanceWarning from pandas import compat @@ -21,8 +21,7 @@ is_object_dtype, is_int64_dtype, is_datetime64tz_dtype, - is_datetime64_dtype, - ensure_int64) + is_datetime64_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries @@ -294,7 +293,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if tz is not None and index.tz is None: arr = conversion.tz_localize_to_utc( - ensure_int64(index.values), + index.asi8, tz, ambiguous=ambiguous) index = cls(arr) @@ -317,7 +316,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if not right_closed and len(index) and index[-1] == end: index = index[:-1] - return cls._simple_new(index.values, freq=freq, tz=tz) + return cls._simple_new(index.asi8, freq=freq, tz=tz) # ----------------------------------------------------------------- # Descriptive Properties @@ -419,6 +418,25 @@ def __iter__(self): for v in converted: yield v + # ---------------------------------------------------------------- + # ExtensionArray Interface + + @property + def _ndarray_values(self): + return self._data + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (datetime, np.datetime64)): + self._assert_tzawareness_compat(fill_value) + fill_value = Timestamp(fill_value).value + else: + raise ValueError("'fill_value' should be a Timestamp. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # ----------------------------------------------------------------- # Comparison Methods @@ -566,7 +584,7 @@ def tz_convert(self, tz): See Also -------- - DatetimeIndex.tz : A timezone that has a variable offset from UTC + DatetimeIndex.tz : A timezone that has a variable offset from UTC. DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a given time zone, or remove timezone from a tz-aware DatetimeIndex. @@ -874,10 +892,10 @@ def to_period(self, freq=None): PeriodIndex(['2017-01-01', '2017-01-02'], dtype='period[D]', freq='D') - See also + See Also -------- - pandas.PeriodIndex: Immutable ndarray holding ordinal values - pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object + pandas.PeriodIndex: Immutable ndarray holding ordinal values. + pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. """ from pandas.core.arrays import PeriodArray diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 94be29893d2b9..90118cd300a22 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -94,11 +94,11 @@ See Also -------- -Index : The base pandas Index type -Interval : A bounded slice-like interval; the elements of an %(klass)s -interval_range : Function to create a fixed frequency IntervalIndex -cut : Bin values into discrete Intervals -qcut : Bin values into equal-sized Intervals based on rank or sample quantiles +Index : The base pandas Index type. +Interval : A bounded slice-like interval; the elements of an %(klass)s. +interval_range : Function to create a fixed frequency IntervalIndex. +cut : Bin values into discrete Intervals. +qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. """ @@ -244,9 +244,9 @@ def _from_factorized(cls, values, original): See Also -------- - interval_range : Function to create a fixed frequency IntervalIndex - %(klass)s.from_arrays : Construct from a left and right array - %(klass)s.from_tuples : Construct from a sequence of tuples + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct from a left and right array. + %(klass)s.from_tuples : Construct from a sequence of tuples. """ @classmethod @@ -354,13 +354,13 @@ def from_arrays(cls, left, right, closed='right', copy=False, dtype=None): See Also -------- - interval_range : Function to create a fixed frequency IntervalIndex + interval_range : Function to create a fixed frequency IntervalIndex. %(klass)s.from_arrays : Construct an %(klass)s from a left and - right array + right array. %(klass)s.from_breaks : Construct an %(klass)s from an array of - splits + splits. %(klass)s.from_tuples : Construct an %(klass)s from an - array-like of tuples + array-like of tuples. """ _interval_shared_docs['from_tuples'] = """ @@ -389,11 +389,11 @@ def from_arrays(cls, left, right, closed='right', copy=False, dtype=None): See Also -------- - interval_range : Function to create a fixed frequency IntervalIndex + interval_range : Function to create a fixed frequency IntervalIndex. %(klass)s.from_arrays : Construct an %(klass)s from a left and - right array + right array. %(klass)s.from_breaks : Construct an %(klass)s from an array of - splits + splits. """ @classmethod @@ -1027,9 +1027,9 @@ def repeat(self, repeats, **kwargs): See Also -------- - Index.repeat : Equivalent function for Index - Series.repeat : Equivalent function for Series - numpy.repeat : Underlying implementation + Index.repeat : Equivalent function for Index. + Series.repeat : Equivalent function for Series. + numpy.repeat : Underlying implementation. """ left_repeat = self.left.repeat(repeats, **kwargs) right_repeat = self.right.repeat(repeats, **kwargs) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b343d42ef3b7c..d9989b1ac36c0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -146,8 +146,8 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): See Also -------- - period_array : Create a new PeriodArray - pandas.PeriodIndex : Immutable Index for period data + period_array : Create a new PeriodArray. + pandas.PeriodIndex : Immutable Index for period data. """ # array priority higher than numpy scalars __array_priority__ = 1000 @@ -216,14 +216,6 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): ordinals = libperiod.extract_ordinals(periods, freq) return cls(ordinals, freq=freq) - def _values_for_factorize(self): - return self.asi8, iNaT - - @classmethod - def _from_factorized(cls, values, original): - # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray - return cls(values, freq=original.freq) - @classmethod def _from_datetime64(cls, data, freq, tz=None): """Construct a PeriodArray from a datetime64 array @@ -262,14 +254,6 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq - @classmethod - def _concat_same_type(cls, to_concat): - freq = {x.freq for x in to_concat} - assert len(freq) == 1 - freq = list(freq)[0] - values = np.concatenate([x._data for x in to_concat]) - return cls(values, freq=freq) - # -------------------------------------------------------------------- # Data / Attributes @@ -415,29 +399,20 @@ def __setitem__( raise TypeError(msg) self._data[key] = value - def take(self, indices, allow_fill=False, fill_value=None): - if allow_fill: - if isna(fill_value): - fill_value = iNaT - elif isinstance(fill_value, Period): - if self.freq != fill_value.freq: - msg = DIFFERENT_FREQ_INDEX.format( - self.freq.freqstr, - fill_value.freqstr - ) - raise IncompatibleFrequency(msg) - - fill_value = fill_value.ordinal - else: - msg = "'fill_value' should be a Period. Got '{}'." - raise ValueError(msg.format(fill_value)) - - new_values = algos.take(self._data, - indices, - allow_fill=allow_fill, - fill_value=fill_value) - - return type(self)(new_values, self.freq) + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, Period): + if fill_value.freq != self.freq: + msg = DIFFERENT_FREQ_INDEX.format(self.freq.freqstr, + fill_value.freqstr) + raise IncompatibleFrequency(msg) + fill_value = fill_value.ordinal + else: + raise ValueError("'fill_value' should be a Period. " + "Got '{got}'.".format(got=fill_value)) + return fill_value def fillna(self, value=None, method=None, limit=None): # TODO(#20300) @@ -474,9 +449,6 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def copy(self, deep=False): - return type(self)(self._data.copy(), freq=self.freq) - def value_counts(self, dropna=False): from pandas import Series, PeriodIndex @@ -636,7 +608,7 @@ def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a PeriodArray. - See also + See Also -------- numpy.ndarray.repeat """ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9dbdd6ff8b562..ad564ca34930f 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -9,6 +9,7 @@ from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( array_to_timedelta64, parse_timedelta_unit) +from pandas.util._decorators import Appender from pandas import compat @@ -139,7 +140,7 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result._freq = freq return result - def __new__(cls, values, freq=None): + def __new__(cls, values, freq=None, dtype=_TD_DTYPE): freq, freq_infer = dtl.maybe_infer_freq(freq) @@ -193,6 +194,17 @@ def _generate_range(cls, start, end, periods, freq, closed=None): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (timedelta, np.timedelta64, Tick)): + fill_value = Timedelta(fill_value).value + else: + raise ValueError("'fill_value' should be a Timedelta. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/base.py b/pandas/core/base.py index de368f52b6f00..17108c16c07dc 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -820,7 +820,7 @@ def argmax(self, axis=None): """ return a ndarray of the maximum argument indexer - See also + See Also -------- numpy.ndarray.argmax """ @@ -863,7 +863,7 @@ def argmin(self, axis=None): """ return a ndarray of the minimum argument indexer - See also + See Also -------- numpy.ndarray.argmin """ @@ -1014,8 +1014,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, See Also -------- - Series.count: number of non-NA elements in a Series - DataFrame.count: number of non-NA elements in a DataFrame + Series.count: Number of non-NA elements in a Series. + DataFrame.count: Number of non-NA elements in a DataFrame. Examples -------- diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 94e9b72b001b1..a01266870b8fc 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -150,32 +150,59 @@ def is_object_dtype(arr_or_dtype): def is_sparse(arr): """ - Check whether an array-like is a pandas sparse array. + Check whether an array-like is a 1-D pandas sparse array. + + Check that the one-dimensional array-like is a pandas sparse array. + Returns True if it is a pandas sparse array, not another type of + sparse array. Parameters ---------- arr : array-like - The array-like to check. + Array-like to check. Returns ------- - boolean : Whether or not the array-like is a pandas sparse array. + bool + Whether or not the array-like is a pandas sparse array. + + See Also + -------- + DataFrame.to_sparse : Convert DataFrame to a SparseDataFrame. + Series.to_sparse : Convert Series to SparseSeries. + Series.to_dense : Return dense representation of a Series. Examples -------- - >>> is_sparse(np.array([1, 2, 3])) - False - >>> is_sparse(pd.SparseArray([1, 2, 3])) + Returns `True` if the parameter is a 1-D pandas sparse array. + + >>> is_sparse(pd.SparseArray([0, 0, 1, 0])) True - >>> is_sparse(pd.SparseSeries([1, 2, 3])) + >>> is_sparse(pd.SparseSeries([0, 0, 1, 0])) True - This function checks only for pandas sparse array instances, so - sparse arrays from other libraries will return False. + Returns `False` if the parameter is not sparse. + + >>> is_sparse(np.array([0, 0, 1, 0])) + False + >>> is_sparse(pd.Series([0, 1, 0, 0])) + False + + Returns `False` if the parameter is not a pandas sparse array. >>> from scipy.sparse import bsr_matrix - >>> is_sparse(bsr_matrix([1, 2, 3])) + >>> is_sparse(bsr_matrix([0, 1, 0, 0])) False + + Returns `False` if the parameter has more than one dimension. + + >>> df = pd.SparseDataFrame([389., 24., 80.5, np.nan], + columns=['max_speed'], + index=['falcon', 'parrot', 'lion', 'monkey']) + >>> is_sparse(df) + False + >>> is_sparse(df.max_speed) + True """ from pandas.core.arrays.sparse import SparseDtype diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index bb4ab823069ee..ebfb41825ae0a 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -476,13 +476,7 @@ def _concat_datetimetz(to_concat, name=None): all inputs must be DatetimeIndex it is used in DatetimeIndex.append also """ - # do not pass tz to set because tzlocal cannot be hashed - if len({str(x.dtype) for x in to_concat}) != 1: - raise ValueError('to_concat must have the same tz') - tz = to_concat[0].tz - # no need to localize because internal repr will not be changed - new_values = np.concatenate([x.asi8 for x in to_concat]) - return to_concat[0]._simple_new(new_values, tz=tz, name=name) + return to_concat[0]._concat_same_dtype(to_concat, name=name) def _concat_index_same_dtype(indexes, klass=None): diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 5f35a040d7d47..241a1b471f677 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -44,7 +44,7 @@ def is_number(obj): See Also -------- - pandas.api.types.is_integer: checks a subgroup of numbers + pandas.api.types.is_integer: Checks a subgroup of numbers. Examples -------- diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index a12985bb7d42c..fa60c326a19ea 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -43,7 +43,7 @@ def isna(obj): See Also -------- - notna : boolean inverse of pandas.isna. + notna : Boolean inverse of pandas.isna. Series.isna : Detect missing values in a Series. DataFrame.isna : Detect missing values in a DataFrame. Index.isna : Detect missing values in an Index. @@ -274,7 +274,7 @@ def notna(obj): See Also -------- - isna : boolean inverse of pandas.notna. + isna : Boolean inverse of pandas.notna. Series.notna : Detect valid values in a Series. DataFrame.notna : Detect valid values in a DataFrame. Index.notna : Detect valid values in an Index. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b56eaaaaebdcb..5f728012b2292 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -221,9 +221,9 @@ See Also -------- -merge_ordered : merge with optional filling/interpolation. -merge_asof : merge on nearest keys. -DataFrame.join : similar method using indices. +merge_ordered : Merge with optional filling/interpolation. +merge_asof : Merge on nearest keys. +DataFrame.join : Similar method using indices. Examples -------- @@ -346,12 +346,12 @@ class DataFrame(NDFrame): 1 4 5 6 2 7 8 9 - See also + See Also -------- - DataFrame.from_records : constructor from tuples, also record arrays - DataFrame.from_dict : from dicts of Series, arrays, or dicts - DataFrame.from_items : from sequence of (key, value) pairs - pandas.read_csv, pandas.read_table, pandas.read_clipboard + DataFrame.from_records : Constructor from tuples, also record arrays. + DataFrame.from_dict : From dicts of Series, arrays, or dicts. + DataFrame.from_items : From sequence of (key, value) pairs + pandas.read_csv, pandas.read_table, pandas.read_clipboard. """ @property @@ -1066,8 +1066,8 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None): See Also -------- DataFrame.from_records : DataFrame from ndarray (structured - dtype), list of tuples, dict, or DataFrame - DataFrame : DataFrame object creation using constructor + dtype), list of tuples, dict, or DataFrame. + DataFrame : DataFrame object creation using constructor. Examples -------- @@ -1484,9 +1484,9 @@ def to_records(self, index=True, convert_datetime64=None): See Also -------- - DataFrame.from_records: convert structured or record ndarray + DataFrame.from_records: Convert structured or record ndarray to DataFrame. - numpy.recarray: ndarray that allows field access using + numpy.recarray: An ndarray that allows field access using attributes, analogous to typed columns in a spreadsheet. @@ -1694,7 +1694,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, datetime format based on the first datetime string. If the format can be inferred, there often will be a large parsing speed-up. - See also + See Also -------- pandas.read_csv @@ -1829,24 +1829,6 @@ def to_panel(self): return self._constructor_expanddim(new_mgr) - @Appender(_shared_docs['to_excel'] % _shared_doc_kwargs) - def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', - float_format=None, columns=None, header=True, index=True, - index_label=None, startrow=0, startcol=0, engine=None, - merge_cells=True, encoding=None, inf_rep='inf', verbose=True, - freeze_panes=None): - - from pandas.io.formats.excel import ExcelFormatter - formatter = ExcelFormatter(self, na_rep=na_rep, cols=columns, - header=header, - float_format=float_format, index=index, - index_label=index_label, - merge_cells=merge_cells, - inf_rep=inf_rep) - formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow, - startcol=startcol, freeze_panes=freeze_panes, - engine=engine) - @deprecate_kwarg(old_arg_name='encoding', new_arg_name=None) def to_stata(self, fname, convert_dates=None, write_index=True, encoding="latin-1", byteorder=None, time_stamp=None, @@ -1918,9 +1900,10 @@ def to_stata(self, fname, convert_dates=None, write_index=True, See Also -------- - pandas.read_stata : Import Stata data files - pandas.io.stata.StataWriter : low-level writer for Stata data files - pandas.io.stata.StataWriter117 : low-level writer for version 117 files + pandas.read_stata : Import Stata data files. + pandas.io.stata.StataWriter : Low-level writer for Stata data files. + pandas.io.stata.StataWriter117 : Low-level writer for version 117 + files. Examples -------- @@ -3399,6 +3382,7 @@ def assign(self, **kwargs): Berkeley 25.0 Where the value is a callable, evaluated on `df`: + >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) temp_c temp_f Portland 17.0 62.6 @@ -3406,6 +3390,7 @@ def assign(self, **kwargs): Alternatively, the same behavior can be achieved by directly referencing an existing Series or sequence: + >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) temp_c temp_f Portland 17.0 62.6 @@ -3414,6 +3399,7 @@ def assign(self, **kwargs): In Python 3.6+, you can create multiple columns within the same assign where one of the columns depends on another one defined within the same assign: + >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) temp_c temp_f temp_k @@ -3734,9 +3720,9 @@ def drop(self, labels=None, axis=0, index=None, columns=None, -------- DataFrame.loc : Label-location based indexer for selection by label. DataFrame.dropna : Return DataFrame with labels on given axis omitted - where (all or any) data are missing + where (all or any) data are missing. DataFrame.drop_duplicates : Return DataFrame with duplicate rows - removed, optionally only considering certain columns + removed, optionally only considering certain columns. Series.drop : Return Series with specified index labels removed. Raises @@ -4688,7 +4674,7 @@ def nlargest(self, n, columns, keep='first'): -------- DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in ascending order. - DataFrame.sort_values : Sort DataFrame by the values + DataFrame.sort_values : Sort DataFrame by the values. DataFrame.head : Return the first `n` rows without re-ordering. Notes @@ -5075,7 +5061,7 @@ def combine(self, other, func, fill_value=None, overwrite=True): See Also -------- DataFrame.combine_first : Combine two DataFrame objects and default to - non-null values in frame calling the method + non-null values in frame calling the method. """ other_idxlen = len(other.index) # save for compare @@ -5181,7 +5167,7 @@ def combine_first(self, other): See Also -------- DataFrame.combine : Perform series-wise operation on two DataFrames - using a given function + using a given function. """ import pandas.core.computation.expressions as expressions @@ -5393,9 +5379,9 @@ def update(self, other, join='left', overwrite=True, filter_func=None, See Also -------- - DataFrame.pivot_table : generalization of pivot that can handle + DataFrame.pivot_table : Generalization of pivot that can handle duplicate values for one index/column pair. - DataFrame.unstack : pivot based on the index values instead of a + DataFrame.unstack : Pivot based on the index values instead of a column. Notes @@ -5579,10 +5565,10 @@ def pivot(self, index=None, columns=None, values=None): ------- table : DataFrame - See also + See Also -------- - DataFrame.pivot : pivot without aggregation that can handle - non-numeric data + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. """ @Substitution('') @@ -5784,7 +5770,7 @@ def unstack(self, level=-1, fill_value=None): .. versionadded:: 0.18.0 - See also + See Also -------- DataFrame.pivot : Pivot a table based on column values. DataFrame.stack : Pivot a level of the column labels (inverse operation @@ -5854,7 +5840,7 @@ def unstack(self, level=-1, fill_value=None): col_level : int or string, optional If columns are a MultiIndex then use this level to melt. - See also + See Also -------- %(other)s pivot_table @@ -6095,7 +6081,7 @@ def _gotitem(self, 3 NaN dtype: float64 - See also + See Also -------- DataFrame.apply : Perform any type of operations. DataFrame.transform : Perform transformation type operations. @@ -6229,11 +6215,11 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, side-effects, as they will take effect twice for the first column/row. - See also + See Also -------- - DataFrame.applymap: For elementwise operations - DataFrame.aggregate: only perform aggregating type operations - DataFrame.transform: only perform transforming type operations + DataFrame.applymap: For elementwise operations. + DataFrame.aggregate: Only perform aggregating type operations. + DataFrame.transform: Only perform transforming type operations. Examples -------- @@ -6338,9 +6324,9 @@ def applymap(self, func): DataFrame Transformed DataFrame. - See also + See Also -------- - DataFrame.apply : Apply a function along input axis of DataFrame + DataFrame.apply : Apply a function along input axis of DataFrame. Examples -------- @@ -6421,10 +6407,10 @@ def append(self, other, ignore_index=False, those rows to a list and then concatenate the list with the original DataFrame all at once. - See also + See Also -------- pandas.concat : General function to concatenate DataFrame, Series - or Panel objects + or Panel objects. Examples -------- @@ -6891,10 +6877,10 @@ def cov(self, min_periods=None): See Also -------- - pandas.Series.cov : compute covariance with another Series - pandas.core.window.EWM.cov: exponential weighted sample covariance - pandas.core.window.Expanding.cov : expanding sample covariance - pandas.core.window.Rolling.cov : rolling sample covariance + pandas.Series.cov : Compute covariance with another Series. + pandas.core.window.EWM.cov: Exponential weighted sample covariance. + pandas.core.window.Expanding.cov : Expanding sample covariance. + pandas.core.window.Rolling.cov : Rolling sample covariance. Notes ----- @@ -7051,11 +7037,11 @@ def count(self, axis=0, level=None, numeric_only=False): See Also -------- - Series.count: number of non-NA elements in a Series - DataFrame.shape: number of DataFrame rows and columns (including NA - elements) - DataFrame.isna: boolean same-sized DataFrame showing places of NA - elements + Series.count: Number of non-NA elements in a Series. + DataFrame.shape: Number of DataFrame rows and columns (including NA + elements). + DataFrame.isna: Boolean same-sized DataFrame showing places of NA + elements. Examples -------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 34f25c5634d5b..9f56433c6868e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11,6 +11,8 @@ import pandas as pd from pandas._libs import properties, Timestamp, iNaT +from pandas.errors import AbstractMethodError + from pandas.core.dtypes.common import ( ensure_int64, ensure_object, @@ -115,7 +117,7 @@ class NDFrame(PandasObject, SelectionMixin): '_default_fill_value', '_metadata', '__array_struct__', '__array_interface__'] _internal_names_set = set(_internal_names) - _accessors = frozenset([]) + _accessors = frozenset() _deprecations = frozenset(['as_blocks', 'blocks', 'convert_objects', 'is_copy']) _metadata = [] @@ -200,7 +202,7 @@ def _constructor(self): """Used when a manipulation result has the same dimensions as the original. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def __unicode__(self): # unicode representation based upon iterating over self @@ -221,7 +223,7 @@ def _constructor_sliced(self): """Used when a manipulation result has one lower dimension(s) as the original, such as DataFrame single columns slicing. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @property def _constructor_expanddim(self): @@ -852,8 +854,8 @@ def squeeze(self, axis=None): See Also -------- - Series.iloc : Integer-location based indexing for selecting scalars - DataFrame.iloc : Integer-location based indexing for selecting Series + Series.iloc : Integer-location based indexing for selecting scalars. + DataFrame.iloc : Integer-location based indexing for selecting Series. Series.to_frame : Inverse of DataFrame.squeeze for a single-column DataFrame. @@ -1164,9 +1166,9 @@ def rename_axis(self, mapper=None, **kwargs): See Also -------- - pandas.Series.rename : Alter Series index labels or name - pandas.DataFrame.rename : Alter DataFrame index labels or name - pandas.Index.rename : Set new names on index + pandas.Series.rename : Alter Series index labels or name. + pandas.DataFrame.rename : Alter DataFrame index labels or name. + pandas.Index.rename : Set new names on index. Examples -------- @@ -1848,7 +1850,7 @@ def empty(self): >>> df.dropna().empty True - See also + See Also -------- pandas.Series.dropna pandas.DataFrame.dropna @@ -1975,16 +1977,17 @@ def _repr_latex_(self): # I/O Methods _shared_docs['to_excel'] = """ - Write %(klass)s to an excel sheet. + Write %(klass)s to an Excel sheet. - To write a single %(klass)s to an excel .xlsx file it is only necessary to + To write a single %(klass)s to an Excel .xlsx file it is only necessary to specify a target file name. To write to multiple sheets it is necessary to create an `ExcelWriter` object with a target file name, and specify a sheet - in the file to write to. Multiple sheets may be written to by - specifying unique `sheet_name`. With all data written to the file it is - necessary to save the changes. Note that creating an ExcelWriter object - with a file name that already exists will result in the contents of the - existing file being erased. + in the file to write to. + + Multiple sheets may be written to by specifying unique `sheet_name`. + With all data written to the file it is necessary to save the changes. + Note that creating an `ExcelWriter` object with a file name that already + exists will result in the contents of the existing file being erased. Parameters ---------- @@ -2395,7 +2398,7 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, See Also -------- - pandas.read_sql : read a DataFrame from a table + pandas.read_sql : Read a DataFrame from a table. Notes ----- @@ -2884,7 +2887,7 @@ def _iget_item_cache(self, item): return lower def _box_item_values(self, key, values): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. @@ -4977,8 +4980,8 @@ def values(self): See Also -------- - pandas.DataFrame.index : Retrieve the index labels - pandas.DataFrame.columns : Retrieving the column names + pandas.DataFrame.index : Retrieve the index labels. + pandas.DataFrame.columns : Retrieving the column names. """ self._consolidate_inplace() return self._data.as_array(transpose=self._AXIS_REVERSED) @@ -5130,7 +5133,7 @@ def dtypes(self): See Also -------- - pandas.DataFrame.ftypes : dtype and sparsity information. + pandas.DataFrame.ftypes : Dtype and sparsity information. Examples -------- @@ -5307,7 +5310,7 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs): 1 2 dtype: int64 - See also + See Also -------- pandas.to_datetime : Convert argument to datetime. pandas.to_timedelta : Convert argument to timedelta. @@ -5891,8 +5894,8 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None): See Also -------- - %(klass)s.fillna : Fill NA values - %(klass)s.where : Replace values based on boolean condition + %(klass)s.fillna : Fill NA values. + %(klass)s.where : Replace values based on boolean condition. Series.str.replace : Simple string replacement. Returns @@ -6506,16 +6509,16 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, def asof(self, where, subset=None): """ - Return the last row(s) without any `NaN`s before `where`. + Return the last row(s) without any NaNs before `where`. The last row (for each element in `where`, if list) without any - `NaN` is taken. - In case of a :class:`~pandas.DataFrame`, the last row without `NaN` + NaN is taken. + In case of a :class:`~pandas.DataFrame`, the last row without NaN considering only the subset of columns (if not `None`) .. versionadded:: 0.19.0 For DataFrame - If there is no good value, `NaN` is returned for a Series or + If there is no good value, NaN is returned for a Series or a Series of NaN values for a DataFrame Parameters @@ -6524,7 +6527,7 @@ def asof(self, where, subset=None): Date(s) before which the last row(s) are returned. subset : str or array-like of str, default `None` For DataFrame, if not `None`, only use these columns to - check for `NaN`s. + check for NaNs. Notes ----- @@ -6560,7 +6563,7 @@ def asof(self, where, subset=None): 2.0 For a sequence `where`, a Series is returned. The first value is - ``NaN``, because the first element of `where` is before the first + NaN, because the first element of `where` is before the first index value. >>> s.asof([5, 20]) @@ -6569,7 +6572,7 @@ def asof(self, where, subset=None): dtype: float64 Missing values are not considered. The following is ``2.0``, not - ``NaN``, even though ``NaN`` is at the index location for ``30``. + NaN, even though NaN is at the index location for ``30``. >>> s.asof(30) 2.0 @@ -6691,10 +6694,10 @@ def asof(self, where, subset=None): See Also -------- - %(klass)s.isnull : alias of isna - %(klass)s.notna : boolean inverse of isna - %(klass)s.dropna : omit axes labels with missing values - isna : top-level isna + %(klass)s.isnull : Alias of isna. + %(klass)s.notna : Boolean inverse of isna. + %(klass)s.dropna : Omit axes labels with missing values. + isna : Top-level isna. Examples -------- @@ -6759,10 +6762,10 @@ def isnull(self): See Also -------- - %(klass)s.notnull : alias of notna - %(klass)s.isna : boolean inverse of notna - %(klass)s.dropna : omit axes labels with missing values - notna : top-level notna + %(klass)s.notnull : Alias of notna. + %(klass)s.isna : Boolean inverse of notna. + %(klass)s.dropna : Omit axes labels with missing values. + notna : Top-level notna. Examples -------- @@ -7012,12 +7015,12 @@ def clip_upper(self, threshold, axis=None, inplace=False): See Also -------- DataFrame.clip : General purpose method to trim DataFrame values to - given threshold(s) + given threshold(s). DataFrame.clip_lower : Trim DataFrame values below given - threshold(s) + threshold(s). Series.clip : General purpose method to trim Series values to given - threshold(s) - Series.clip_lower : Trim Series values below given threshold(s) + threshold(s). + Series.clip_lower : Trim Series values below given threshold(s). Examples -------- @@ -7090,12 +7093,12 @@ def clip_lower(self, threshold, axis=None, inplace=False): See Also -------- DataFrame.clip : General purpose method to trim DataFrame values to - given threshold(s) + given threshold(s). DataFrame.clip_upper : Trim DataFrame values above given - threshold(s) + threshold(s). Series.clip : General purpose method to trim Series values to given - threshold(s) - Series.clip_upper : Trim Series values above given threshold(s) + threshold(s). + Series.clip_upper : Trim Series values above given threshold(s). Examples -------- @@ -7420,11 +7423,11 @@ def at_time(self, time, asof=False): See Also -------- - between_time : Select values between particular times of the day - first : Select initial periods of time series based on a date offset - last : Select final periods of time series based on a date offset + between_time : Select values between particular times of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. DatetimeIndex.indexer_at_time : Get just the index locations for - values at particular time of the day + values at particular time of the day. """ try: indexer = self.index.indexer_at_time(time, asof=asof) @@ -7482,11 +7485,11 @@ def between_time(self, start_time, end_time, include_start=True, See Also -------- - at_time : Select values at a particular time of the day - first : Select initial periods of time series based on a date offset - last : Select final periods of time series based on a date offset + at_time : Select values at a particular time of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. DatetimeIndex.indexer_between_time : Get just the index locations for - values between particular times of the day + values between particular times of the day. """ try: indexer = self.index.indexer_between_time( @@ -7837,9 +7840,9 @@ def first(self, offset): See Also -------- - last : Select final periods of time series based on a date offset - at_time : Select values at a particular time of the day - between_time : Select values between particular times of the day + last : Select final periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. """ if not isinstance(self.index, DatetimeIndex): raise TypeError("'first' only supports a DatetimeIndex index") @@ -7900,9 +7903,9 @@ def last(self, offset): See Also -------- - first : Select initial periods of time series based on a date offset - at_time : Select values at a particular time of the day - between_time : Select values between particular times of the day + first : Select initial periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. """ if not isinstance(self.index, DatetimeIndex): raise TypeError("'last' only supports a DatetimeIndex index") @@ -8386,7 +8389,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, See Also -------- :func:`DataFrame.%(name_other)s` : Return an object of same shape as - self + self. Examples -------- @@ -9058,7 +9061,7 @@ def abs(self): See Also -------- - numpy.absolute : calculate the absolute value element-wise. + numpy.absolute : Calculate the absolute value element-wise. """ return np.abs(self) @@ -9949,6 +9952,25 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, if path_or_buf is None: return formatter.path_or_buf.getvalue() + @Appender(_shared_docs["to_excel"] % dict(klass="object")) + def to_excel(self, excel_writer, sheet_name="Sheet1", na_rep="", + float_format=None, columns=None, header=True, index=True, + index_label=None, startrow=0, startcol=0, engine=None, + merge_cells=True, encoding=None, inf_rep="inf", verbose=True, + freeze_panes=None): + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + from pandas.io.formats.excel import ExcelFormatter + formatter = ExcelFormatter(df, na_rep=na_rep, cols=columns, + header=header, + float_format=float_format, index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep) + formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow, + startcol=startcol, freeze_panes=freeze_panes, + engine=engine) + def _doc_parms(cls): """Return a tuple of the doc parms.""" @@ -10088,10 +10110,10 @@ def _doc_parms(cls): """ _all_see_also = """\ -See also +See Also -------- -pandas.Series.all : Return True if all elements are True -pandas.DataFrame.any : Return True if one (or more) elements are True +pandas.Series.all : Return True if all elements are True. +pandas.DataFrame.any : Return True if one (or more) elements are True. """ _cnum_doc = """ @@ -10115,7 +10137,7 @@ def _doc_parms(cls): ------- %(outname)s : %(name1)s or %(name2)s\n %(examples)s -See also +See Also -------- pandas.core.window.Expanding.%(accum_func_name)s : Similar functionality but ignores ``NaN`` values. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 451f1199ac8e6..ee84f8cda07d0 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -18,6 +18,7 @@ import pandas.compat as compat from pandas.compat import lzip, map from pandas.compat.numpy import _np_version_under1p13 +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -240,7 +241,7 @@ def _aggregate_generic(self, func, *args, **kwargs): return self._wrap_generic_output(result, obj) def _wrap_aggregated_output(self, output, names=None): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _aggregate_item_by_item(self, func, *args, **kwargs): # only for axis==0 @@ -733,7 +734,7 @@ def _selection_name(self): 1 1 2 2 3 4 - See also + See Also -------- pandas.Series.groupby.apply pandas.Series.groupby.transform @@ -1288,7 +1289,7 @@ class DataFrameGroupBy(NDFrameGroupBy): 1 1 2 0.590716 2 3 4 0.704907 - See also + See Also -------- pandas.DataFrame.groupby.apply pandas.DataFrame.groupby.transform @@ -1659,4 +1660,4 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): raise ValueError("axis value must be greater than 0") def _wrap_aggregated_output(self, output, names=None): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ea7507799fa9a..96aff09126772 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -20,6 +20,7 @@ class providing the base-class of operations. import pandas.compat as compat from pandas.compat import callable, range, set_function_name, zip from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.util._validators import validate_kwargs @@ -42,7 +43,7 @@ class providing the base-class of operations. _doc_template = """ - See also + See Also -------- pandas.Series.%(name)s pandas.DataFrame.%(name)s @@ -90,7 +91,7 @@ class providing the base-class of operations. -------- {examples} - See also + See Also -------- pipe : Apply function to the full GroupBy object instead of to each group. @@ -215,8 +216,8 @@ class providing the base-class of operations. See Also -------- -pandas.Series.pipe : Apply a function with arguments to a series -pandas.DataFrame.pipe: Apply a function with arguments to a dataframe +pandas.Series.pipe : Apply a function with arguments to a series. +pandas.DataFrame.pipe: Apply a function with arguments to a dataframe. apply : Apply function to each group instead of to the full %(klass)s object. """ @@ -252,7 +253,7 @@ class providing the base-class of operations. ------- %(klass)s -See also +See Also -------- aggregate, transform @@ -323,7 +324,7 @@ def _group_selection_context(groupby): class _GroupBy(PandasObject, SelectionMixin): _group_selection = None - _apply_whitelist = frozenset([]) + _apply_whitelist = frozenset() def __init__(self, obj, keys=None, axis=0, level=None, grouper=None, exclusions=None, selection=None, as_index=True, @@ -706,7 +707,7 @@ def _iterate_slices(self): yield self._selection_name, self._selected_obj def transform(self, func, *args, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _cumcount_array(self, ascending=True): """ @@ -861,7 +862,7 @@ def _python_agg_general(self, func, *args, **kwargs): return self._wrap_aggregated_output(output) def _wrap_applied_output(self, *args, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _concat_objects(self, keys, values, not_indexed_same=False): from pandas.core.reshape.concat import concat @@ -1306,12 +1307,111 @@ def describe(self, **kwargs): return result.T return result.unstack() - @Substitution(name='groupby') - @Appender(_doc_template) def resample(self, rule, *args, **kwargs): """ - Provide resampling when using a TimeGrouper - Return a new grouper with our resampler appended + Provide resampling when using a TimeGrouper. + + Given a grouper, the function resamples it according to a string + "string" -> "frequency". + + See the :ref:`frequency aliases ` + documentation for more details. + + Parameters + ---------- + rule : str or DateOffset + The offset string or object representing target grouper conversion. + *args, **kwargs + Possible arguments are `how`, `fill_method`, `limit`, `kind` and + `on`, and other arguments of `TimeGrouper`. + + Returns + ------- + Grouper + Return a new grouper with our resampler appended. + + See Also + -------- + pandas.Grouper : Specify a frequency to resample with when + grouping by a key. + DatetimeIndex.resample : Frequency conversion and resampling of + time series. + + Examples + -------- + >>> idx = pd.date_range('1/1/2000', periods=4, freq='T') + >>> df = pd.DataFrame(data=4 * [range(2)], + ... index=idx, + ... columns=['a', 'b']) + >>> df.iloc[2, 0] = 5 + >>> df + a b + 2000-01-01 00:00:00 0 1 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:02:00 5 1 + 2000-01-01 00:03:00 0 1 + + Downsample the DataFrame into 3 minute bins and sum the values of + the timestamps falling into a bin. + + >>> df.groupby('a').resample('3T').sum() + a b + a + 0 2000-01-01 00:00:00 0 2 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:00:00 5 1 + + Upsample the series into 30 second bins. + + >>> df.groupby('a').resample('30S').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:00:30 0 0 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:01:30 0 0 + 2000-01-01 00:02:00 0 0 + 2000-01-01 00:02:30 0 0 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:02:00 5 1 + + Resample by month. Values are assigned to the month of the period. + + >>> df.groupby('a').resample('M').sum() + a b + a + 0 2000-01-31 0 3 + 5 2000-01-31 5 1 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> df.groupby('a').resample('3T', closed='right').sum() + a b + a + 0 1999-12-31 23:57:00 0 1 + 2000-01-01 00:00:00 0 2 + 5 2000-01-01 00:00:00 5 1 + + Downsample the series into 3 minute bins and close the right side of + the bin interval, but label each bin using the right edge instead of + the left. + + >>> df.groupby('a').resample('3T', closed='right', label='right').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:03:00 0 2 + 5 2000-01-01 00:03:00 5 1 + + Add an offset of twenty seconds. + + >>> df.groupby('a').resample('3T', loffset='20s').sum() + a b + a + 0 2000-01-01 00:00:20 0 2 + 2000-01-01 00:03:20 0 1 + 5 2000-01-01 00:00:20 5 1 """ from pandas.core.resample import get_resampler_for_grouping return get_resampler_for_grouping(self, rule, *args, **kwargs) @@ -1623,7 +1723,7 @@ def ngroup(self, ascending=True): 5 0 dtype: int64 - See also + See Also -------- .cumcount : Number the rows in each group. """ @@ -1679,7 +1779,7 @@ def cumcount(self, ascending=True): 5 0 dtype: int64 - See also + See Also -------- .ngroup : Number the groups themselves. """ diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 390334a89cbfe..125bd9a5e855d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -13,6 +13,7 @@ from pandas._libs import NaT, groupby as libgroupby, iNaT, lib, reduction from pandas.compat import lzip, range, zip +from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -841,7 +842,7 @@ def _chop(self, sdata, slice_obj): return sdata.iloc[slice_obj] def apply(self, f): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SeriesSplitter(DataSplitter): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 263de57d32f31..2f449b4b33d8d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -212,10 +212,10 @@ class Index(IndexOpsMixin, PandasObject): See Also --------- - RangeIndex : Index implementing a monotonic integer range + RangeIndex : Index implementing a monotonic integer range. CategoricalIndex : Index of :class:`Categorical` s. - MultiIndex : A multi-level, or hierarchical, Index - IntervalIndex : an Index of :class:`Interval` s. + MultiIndex : A multi-level, or hierarchical, Index. + IntervalIndex : An Index of :class:`Interval` s. DatetimeIndex, TimedeltaIndex, PeriodIndex Int64Index, UInt64Index, Float64Index """ @@ -820,8 +820,8 @@ def repeat(self, repeats, *args, **kwargs): See Also -------- - Series.repeat : Equivalent function for Series - numpy.repeat : Underlying implementation + Series.repeat : Equivalent function for Series. + numpy.repeat : Underlying implementation. Examples -------- @@ -876,7 +876,7 @@ def ravel(self, order='C'): """ return an ndarray of the flattened values of the underlying data - See also + See Also -------- numpy.ndarray.ravel """ @@ -1113,6 +1113,26 @@ def _format_attrs(self): """ return format_object_attrs(self) + def to_flat_index(self): + """ + Identity method. + + .. versionadded:: 0.24.0 + + This is implemented for compatability with subclass implementations + when chaining. + + Returns + ------- + pd.Index + Caller. + + See Also + -------- + MultiIndex.to_flat_index : Subclass implementation. + """ + return self + def to_series(self, index=None, name=None): """ Create a Series with both index and values equal to the index keys @@ -2160,7 +2180,7 @@ def _concat_same_dtype(self, to_concat, name): If allow_fill=True and fill_value is not None, indices specified by -1 is regarded as NA. If Index doesn't hold NA, raise ValueError - See also + See Also -------- numpy.ndarray.take """ @@ -2249,10 +2269,10 @@ def isna(self): See Also -------- - pandas.Index.notna : boolean inverse of isna. - pandas.Index.dropna : omit entries with missing values. - pandas.isna : top-level isna. - Series.isna : detect missing values in Series object. + pandas.Index.notna : Boolean inverse of isna. + pandas.Index.dropna : Omit entries with missing values. + pandas.isna : Top-level isna. + Series.isna : Detect missing values in Series object. Examples -------- @@ -2305,11 +2325,11 @@ def notna(self): numpy.ndarray Boolean array to indicate which entries are not NA. - See also + See Also -------- - Index.notnull : alias of notna - Index.isna: inverse of notna - pandas.notna : top-level notna + Index.notnull : Alias of notna. + Index.isna: Inverse of notna. + pandas.notna : Top-level notna. Examples -------- @@ -2338,7 +2358,7 @@ def putmask(self, mask, value): """ return a new Index of the values set with the mask - See also + See Also -------- numpy.ndarray.putmask """ @@ -2486,7 +2506,7 @@ def asof(self, label): passed index. merge_asof : Perform an asof merge (similar to left join but it matches on nearest key rather than equal key). - Index.get_loc : `asof` is a thin wrapper around `get_loc` + Index.get_loc : An `asof` is a thin wrapper around `get_loc` with method='pad'. Examples @@ -2695,7 +2715,7 @@ def argsort(self, *args, **kwargs): Integer indices that would sort the index if used as an indexer. - See also + See Also -------- numpy.argsort : Similar method for NumPy arrays. Index.sort_values : Return sorted copy of Index. @@ -3202,9 +3222,9 @@ def _get_level_values(self, level): values : Index Calling object, as there is only one level in the Index. - See also + See Also -------- - MultiIndex.get_level_values : get values for a level of a MultiIndex + MultiIndex.get_level_values : Get values for a level of a MultiIndex. Notes ----- @@ -3607,7 +3627,7 @@ def isin(self, values, level=None): is_contained : ndarray NumPy array of boolean values. - See also + See Also -------- Series.isin : Same for Series. DataFrame.isin : Same method for DataFrames. @@ -4409,7 +4429,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): See Also -------- - Index.get_loc : Get location for a single label + Index.get_loc : Get location for a single label. """ inc = (step is None or step >= 0) @@ -4561,9 +4581,9 @@ def drop_duplicates(self, keep='first'): See Also -------- - Series.drop_duplicates : equivalent method on Series - DataFrame.drop_duplicates : equivalent method on DataFrame - Index.duplicated : related method on Index, indicating duplicate + Series.drop_duplicates : Equivalent method on Series. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Index.duplicated : Related method on Index, indicating duplicate Index values. Examples @@ -4642,9 +4662,9 @@ def duplicated(self, keep='first'): See Also -------- - pandas.Series.duplicated : Equivalent method on pandas.Series - pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame - pandas.Index.drop_duplicates : Remove duplicate values from Index + pandas.Series.duplicated : Equivalent method on pandas.Series. + pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame. + pandas.Index.drop_duplicates : Remove duplicate values from Index. """ return super(Index, self).duplicated(keep=keep) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1d9d3b1d3bd16..7616bc4ea10ef 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -10,6 +10,7 @@ from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64 import pandas.compat as compat from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -17,11 +18,10 @@ is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, is_integer, is_integer_dtype, is_list_like, is_object_dtype, is_period_dtype, is_scalar, is_string_dtype) -import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna -from pandas.core import algorithms, common as com, ops +from pandas.core import algorithms, ops from pandas.core.arrays import PeriodArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin import pandas.core.indexes.base as ibase @@ -58,7 +58,7 @@ def strftime(self, date_format): See Also -------- - pandas.to_datetime : Convert the given argument to datetime + pandas.to_datetime : Convert the given argument to datetime. DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. DatetimeIndex.round : Round the DatetimeIndex to the specified freq. DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. @@ -90,6 +90,8 @@ class TimelikeOps(object): :ref:`frequency aliases ` for a list of possible `freq` values. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False designates @@ -98,7 +100,6 @@ class TimelikeOps(object): - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - Only relevant for DatetimeIndex .. versionadded:: 0.24.0 nonexistent : 'shift', 'NaT', default 'raise' @@ -213,6 +214,11 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'): class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ + # override DatetimeLikeArrayMixin method + copy = Index.copy + unique = Index.unique + take = Index.take + # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index # subclasses bc they are immutable @@ -434,7 +440,7 @@ def min(self, axis=None, *args, **kwargs): Return the minimum value of the Index or minimum along an axis. - See also + See Also -------- numpy.ndarray.min """ @@ -463,7 +469,7 @@ def argmin(self, axis=None, *args, **kwargs): See `numpy.ndarray.argmin` for more information on the `axis` parameter. - See also + See Also -------- numpy.ndarray.argmin """ @@ -484,7 +490,7 @@ def max(self, axis=None, *args, **kwargs): Return the maximum value of the Index or maximum along an axis. - See also + See Also -------- numpy.ndarray.max """ @@ -513,7 +519,7 @@ def argmax(self, axis=None, *args, **kwargs): See `numpy.ndarray.argmax` for more information on the `axis` parameter. - See also + See Also -------- numpy.ndarray.argmax """ @@ -531,7 +537,7 @@ def argmax(self, axis=None, *args, **kwargs): @property def _formatter_func(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _format_attrs(self): """ @@ -683,17 +689,21 @@ def _concat_same_dtype(self, to_concat, name): """ attribs = self._get_attributes_dict() attribs['name'] = name + # do not pass tz to set because tzlocal cannot be hashed + if len({str(x.dtype) for x in to_concat}) != 1: + raise ValueError('to_concat must have the same tz') if not is_period_dtype(self): # reset freq attribs['freq'] = None - - if getattr(self, 'tz', None) is not None: - return _concat._concat_datetimetz(to_concat, name) + # TODO(DatetimeArray) + # - remove the .asi8 here + # - remove the _maybe_box_as_values + # - combine with the `else` block + new_data = self._concat_same_type(to_concat).asi8 else: - new_data = np.concatenate([c.asi8 for c in to_concat]) + new_data = type(self._values)._concat_same_type(to_concat) - new_data = self._maybe_box_as_values(new_data, **attribs) return self._simple_new(new_data, **attribs) def _maybe_box_as_values(self, values, **attribs): @@ -702,7 +712,6 @@ def _maybe_box_as_values(self, values, **attribs): # but others are not. When everyone is an ExtensionArray, this can # be removed. Currently used in # - sort_values - # - _concat_same_dtype return values def astype(self, dtype, copy=True): @@ -759,7 +768,7 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): try: return np.array(other, copy=False).view('i8') except TypeError: - # period array cannot be coerces to int + # period array cannot be coerced to int other = Index(other) return other.asi8 diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b754b2705d034..a1fed8b93fcbb 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -164,10 +164,10 @@ class DatetimeIndex(DatetimeArray, DatelikeOps, TimelikeOps, See Also --------- - Index : The base pandas Index type - TimedeltaIndex : Index of timedelta64 data - PeriodIndex : Index of Period data - pandas.to_datetime : Convert argument to datetime + Index : The base pandas Index type. + TimedeltaIndex : Index of timedelta64 data. + PeriodIndex : Index of Period data. + pandas.to_datetime : Convert argument to datetime. """ _typ = 'datetimeindex' @@ -551,16 +551,13 @@ def snap(self, freq='S'): # TODO: what about self.name? if so, use shallow_copy? def unique(self, level=None): - # Override here since IndexOpsMixin.unique uses self._values.unique - # For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error - # So we extract the tz-naive DatetimeIndex, unique that, and wrap the - # result with out TZ. - if self.tz is not None: - naive = type(self)(self._ndarray_values, copy=False) - else: - naive = self - result = super(DatetimeIndex, naive).unique(level=level) - return self._shallow_copy(result.values) + if level is not None: + self._validate_index_level(level) + + # TODO(DatetimeArray): change dispatch once inheritance is removed + # call DatetimeArray method + result = DatetimeArray.unique(self) + return self._shallow_copy(result._data) def union(self, other): """ diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 3ac4a2bf31a7e..cb145dd9eed4d 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -164,7 +164,7 @@ def searchsorted(self, value, side="left", sorter=None): See Also -------- - numpy.searchsorted : equivalent function + numpy.searchsorted : Equivalent function. """ # We are much more performant if the searched diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index eb4284203d865..2b157bf91c5a2 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1185,7 +1185,7 @@ def interval_range(start=None, end=None, periods=None, freq=None, See Also -------- - IntervalIndex : an Index of intervals that are all closed on the same side. + IntervalIndex : An Index of intervals that are all closed on the same side. """ start = com.maybe_box_datetimelike(start) end = com.maybe_box_datetimelike(end) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 01304cce507f0..dbb1b8e196bf7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -171,11 +171,11 @@ class MultiIndex(Index): See Also -------- - MultiIndex.from_arrays : Convert list of arrays to MultiIndex + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. MultiIndex.from_product : Create a MultiIndex from the cartesian product - of iterables - MultiIndex.from_tuples : Convert list of tuples to a MultiIndex - Index : The base pandas Index type + of iterables. + MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. + Index : The base pandas Index type. Attributes ---------- @@ -193,6 +193,7 @@ class MultiIndex(Index): set_levels set_labels to_frame + to_flat_index is_lexsorted sortlevel droplevel @@ -1176,7 +1177,7 @@ def to_frame(self, index=True, name=None): ------- DataFrame : a DataFrame containing the original MultiIndex data. - See also + See Also -------- DataFrame """ @@ -1246,6 +1247,34 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): FutureWarning, stacklevel=2) return MultiIndex(levels=levels, labels=labels, names=names) + def to_flat_index(self): + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + .. versionadded:: 0.24.0 + + Returns + ------- + pd.Index + Index with the MultiIndex data represented in Tuples. + + Notes + ----- + This method will simply return the caller if called by anything other + than a MultiIndex. + + Examples + -------- + >>> index = pd.MultiIndex.from_product( + ... [['foo', 'bar'], ['baz', 'qux']], + ... names=['a', 'b']) + >>> index.to_flat_index() + Index([('foo', 'baz'), ('foo', 'qux'), + ('bar', 'baz'), ('bar', 'qux')], + dtype='object') + """ + return Index(self.values, tupleize_cols=False) + @property def is_all_dates(self): return False @@ -1296,9 +1325,9 @@ def from_arrays(cls, arrays, sortorder=None, names=None): See Also -------- - MultiIndex.from_tuples : Convert list of tuples to MultiIndex + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. MultiIndex.from_product : Make a MultiIndex from cartesian product - of iterables + of iterables. """ if not is_list_like(arrays): raise TypeError("Input must be a list / sequence of array-likes.") @@ -1402,8 +1431,8 @@ def from_product(cls, iterables, sortorder=None, names=None): See Also -------- - MultiIndex.from_arrays : Convert list of arrays to MultiIndex - MultiIndex.from_tuples : Convert list of tuples to MultiIndex + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. """ from pandas.core.arrays.categorical import _factorize_from_iterables from pandas.core.reshape.util import cartesian_product @@ -1804,9 +1833,9 @@ def swaplevel(self, i=-2, j=-1): See Also -------- - Series.swaplevel : Swap levels i and j in a MultiIndex + Series.swaplevel : Swap levels i and j in a MultiIndex. Dataframe.swaplevel : Swap levels i and j in a MultiIndex on a - particular axis + particular axis. Examples -------- @@ -2204,9 +2233,9 @@ def get_loc(self, key, method=None): or a sequence of such. If you want to use those, use :meth:`MultiIndex.get_locs` instead. - See also + See Also -------- - Index.get_loc : get_loc method for (single-level) index. + Index.get_loc : The get_loc method for (single-level) index. MultiIndex.slice_locs : Get slice location given start label(s) and end label(s). MultiIndex.get_locs : Get location for a label/slice/list/mask or a @@ -2310,7 +2339,7 @@ def get_loc_level(self, key, level=0, drop_level=True): --------- MultiIndex.get_loc : Get location for a label or a tuple of labels. MultiIndex.get_locs : Get location for a label/slice/list/mask or a - sequence of such + sequence of such. """ def maybe_droplevels(indexer, levels, drop_level): @@ -2530,7 +2559,7 @@ def get_locs(self, seq): >>> mi.get_locs([[True, False, True], slice('e', 'f')]) array([2], dtype=int64) - See also + See Also -------- MultiIndex.get_loc : Get location for a label or a tuple of labels. MultiIndex.slice_locs : Get slice location given start label(s) and @@ -2657,7 +2686,7 @@ def equals(self, other): Determines if two MultiIndex objects have the same labeling information (the levels themselves do not necessarily have to be the same) - See also + See Also -------- equal_levels """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 795ffeefa1794..23b2dde2e6494 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -155,9 +155,9 @@ def insert(self, loc, item): ----- An Index instance can **only** contain hashable objects. - See also + See Also -------- - Index : The base pandas Index type + Index : The base pandas Index type. """ _int64_descr_args = dict( diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 7890f03a1eba7..f35e775fc0a28 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -165,10 +165,10 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, See Also --------- - Index : The base pandas Index type - Period : Represents a period of time - DatetimeIndex : Index with datetime64 data - TimedeltaIndex : Index of timedelta64 data + Index : The base pandas Index type. + Period : Represents a period of time. + DatetimeIndex : Index with datetime64 data. + TimedeltaIndex : Index of timedelta64 data. """ _typ = 'periodindex' _attributes = ['name', 'freq'] diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e4c177a08462e..0d4e7aaebeca5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -49,8 +49,8 @@ class RangeIndex(Int64Index): See Also -------- - Index : The base pandas Index type - Int64Index : Index of int64 data + Index : The base pandas Index type. + Int64Index : Index of int64 data. Attributes ---------- @@ -305,7 +305,7 @@ def argsort(self, *args, **kwargs): ------- argsorted : numpy array - See also + See Also -------- numpy.ndarray.argsort """ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index d9625d38b85de..9f9d8b1b67171 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -72,10 +72,10 @@ class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, See Also --------- - Index : The base pandas Index type + Index : The base pandas Index type. Timedelta : Represents a duration between two dates or times. - DatetimeIndex : Index of datetime64 data - PeriodIndex : Index of Period data + DatetimeIndex : Index of datetime64 data. + PeriodIndex : Index of Period data. Attributes ---------- diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0c6aaf4b46d6a..d376c95a7e1b3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1569,11 +1569,11 @@ class _LocIndexer(_LocationIndexer): See Also -------- - DataFrame.at : Access a single value for a row/column label pair - DataFrame.iloc : Access group of rows and columns by integer position(s) + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.iloc : Access group of rows and columns by integer position(s). DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the Series/DataFrame. - Series.loc : Access group of values using labels + Series.loc : Access group of values using labels. Examples -------- @@ -2302,9 +2302,9 @@ class _AtIndexer(_ScalarAccessIndexer): See Also -------- DataFrame.iat : Access a single value for a row/column pair by integer - position - DataFrame.loc : Access a group of rows and columns by label(s) - Series.at : Access a single value using a label + position. + DataFrame.loc : Access a group of rows and columns by label(s). + Series.at : Access a single value using a label. Examples -------- @@ -2372,9 +2372,9 @@ class _iAtIndexer(_ScalarAccessIndexer): See Also -------- - DataFrame.at : Access a single value for a row/column label pair - DataFrame.loc : Access a group of rows and columns by label(s) - DataFrame.iloc : Access a group of rows and columns by integer position(s) + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.loc : Access a group of rows and columns by label(s). + DataFrame.iloc : Access a group of rows and columns by integer position(s). Examples -------- diff --git a/pandas/core/ops.py b/pandas/core/ops.py index fbfdfb9c01237..1f422a6098fa0 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -88,7 +88,7 @@ def _maybe_match_name(a, b): ------- name : str or None - See also + See Also -------- pandas.core.common.consensus_name_attr """ @@ -609,7 +609,7 @@ def _get_op_name(op, special): e NaN dtype: float64 -See also +See Also -------- Series.{reverse} """ @@ -673,7 +673,7 @@ def _get_op_name(op, special): -------- {df_examples} -See also +See Also -------- DataFrame.{reverse} """ @@ -692,7 +692,7 @@ def _get_op_name(op, special): ------- Panel -See also +See Also -------- Panel.{reverse} """ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 36476a8ecb657..0a275c7a3575b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -239,7 +239,7 @@ def pipe(self, func, *args, **kwargs): 2013-01-01 00:00:02 7 4.949747 2013-01-01 00:00:04 5 NaN - See also + See Also -------- pandas.DataFrame.groupby.aggregate pandas.DataFrame.resample.transform @@ -983,7 +983,7 @@ def _upsample(self, method, limit=None, fill_value=None): fill_value : scalar, default None Value to use for missing values - See also + See Also -------- .fillna @@ -1113,7 +1113,7 @@ def _upsample(self, method, limit=None, fill_value=None): fill_value : scalar, default None Value to use for missing values - See also + See Also -------- .fillna diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e09cf0a527ff9..3d6f55c907269 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -199,7 +199,7 @@ def merge_ordered(left, right, on=None, The output type will the be same as 'left', if it is a subclass of DataFrame. - See also + See Also -------- merge merge_asof @@ -447,7 +447,7 @@ def merge_asof(left, right, on=None, 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN - See also + See Also -------- merge merge_ordered diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index 07f7272398777..7f43a0e9719b8 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -26,7 +26,7 @@ def cartesian_product(X): [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), array([1, 2, 1, 2, 1, 2])] - See also + See Also -------- itertools.product : Cartesian product of input iterables. Equivalent to nested for-loops. diff --git a/pandas/core/series.py b/pandas/core/series.py index 20e4720a3bde7..8fba3030be9d4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -314,7 +314,6 @@ def from_array(cls, arr, index=None, name=None, dtype=None, copy=False, .. deprecated :: 0.23.0 Use pd.Series(..) constructor instead. - """ warnings.warn("'from_array' is deprecated and will be removed in a " "future version. Please use the pd.Series(..) " @@ -437,7 +436,6 @@ def values(self): array(['2013-01-01T05:00:00.000000000', '2013-01-02T05:00:00.000000000', '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') - """ return self._data.external_values() @@ -475,7 +473,7 @@ def ravel(self, order='C'): """ Return the flattened underlying data as an ndarray - See also + See Also -------- numpy.ndarray.ravel """ @@ -487,7 +485,7 @@ def compress(self, condition, *args, **kwargs): .. deprecated:: 0.24.0 - See also + See Also -------- numpy.ndarray.compress """ @@ -538,7 +536,7 @@ def put(self, *args, **kwargs): Applies the `put` method to its `values` attribute if it has one. - See also + See Also -------- numpy.ndarray.put """ @@ -992,7 +990,7 @@ def repeat(self, repeats, *args, **kwargs): Repeat elements of an Series. Refer to `numpy.ndarray.repeat` for more information about the `repeats` argument. - See also + See Also -------- numpy.ndarray.repeat """ @@ -1456,8 +1454,8 @@ def unique(self): See Also -------- - pandas.unique : top-level unique method for any 1-d array-like object. - Index.unique : return Index with unique values from an Index object. + pandas.unique : Top-level unique method for any 1-d array-like object. + Index.unique : Return Index with unique values from an Index object. Examples -------- @@ -1516,9 +1514,9 @@ def drop_duplicates(self, keep='first', inplace=False): See Also -------- - Index.drop_duplicates : equivalent method on Index - DataFrame.drop_duplicates : equivalent method on DataFrame - Series.duplicated : related method on Series, indicating duplicate + Index.drop_duplicates : Equivalent method on Index. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Series.duplicated : Related method on Series, indicating duplicate Series values. Examples @@ -1638,9 +1636,9 @@ def duplicated(self, keep='first'): See Also -------- - pandas.Index.duplicated : Equivalent method on pandas.Index - pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame - pandas.Series.drop_duplicates : Remove duplicate values from Series + pandas.Index.duplicated : Equivalent method on pandas.Index. + pandas.DataFrame.duplicated : Equivalent method on pandas.DataFrame. + pandas.Series.drop_duplicates : Remove duplicate values from Series. """ return super(Series, self).duplicated(keep=keep) @@ -1824,7 +1822,6 @@ def round(self, decimals=0, *args, **kwargs): -------- numpy.around DataFrame.round - """ nv.validate_round(args, kwargs) result = com.values_from_object(self).round(decimals) @@ -1906,7 +1903,6 @@ def corr(self, other, method='pearson', min_periods=None): min_periods : int, optional Minimum number of observations needed to have a valid result - Returns ------- correlation : float @@ -1976,7 +1972,7 @@ def diff(self, periods=1): Series.pct_change: Percent change over given number of periods. Series.shift: Shift index by desired number of periods with an optional time freq. - DataFrame.diff: First discrete difference of object + DataFrame.diff: First discrete difference of object. Examples -------- @@ -2181,10 +2177,10 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): list and then concatenate the list with the original Series all at once. - See also + See Also -------- pandas.concat : General function to concatenate DataFrame, Series - or Panel objects + or Panel objects. Returns ------- @@ -2230,8 +2226,6 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): Traceback (most recent call last): ... ValueError: Indexes have overlapping values: [0, 1, 2] - - """ from pandas.core.reshape.concat import concat @@ -2316,7 +2310,7 @@ def combine(self, other, func, fill_value=None): See Also -------- Series.combine_first : Combine Series values, choosing the calling - Series's values first + Series's values first. """ if fill_value is None: fill_value = na_value_for_dtype(self.dtype, compat=False) @@ -2381,7 +2375,7 @@ def combine_first(self, other): See Also -------- Series.combine : Perform elementwise operation on two Series - using a given function + using a given function. """ new_index = self.index.union(other.index) this = self.reindex(new_index, copy=False) @@ -2436,7 +2430,6 @@ def update(self, other): 1 2 2 6 dtype: int64 - """ other = other.reindex_like(self) mask = notna(other) @@ -2649,9 +2642,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, See Also -------- - DataFrame.sort_index: Sort DataFrame by the index - DataFrame.sort_values: Sort DataFrame by the value - Series.sort_values : Sort Series by the value + DataFrame.sort_index: Sort DataFrame by the index. + DataFrame.sort_values: Sort DataFrame by the value. + Series.sort_values : Sort Series by the value. Examples -------- @@ -2784,7 +2777,7 @@ def argsort(self, axis=0, kind='quicksort', order=None): ------- argsorted : Series, with -1 indicated where nan values are present - See also + See Also -------- numpy.ndarray.argsort """ @@ -3011,7 +3004,6 @@ def swaplevel(self, i=-2, j=-1, copy=True): The indexes ``i`` and ``j`` are now optional, and default to the two innermost levels of the index. - """ new_index = self.index.swaplevel(i, j) return self._constructor(self._values, index=new_index, @@ -3198,7 +3190,7 @@ def _gotitem(self, key, ndim, subset=None): max 4 dtype: int64 - See also + See Also -------- pandas.Series.apply : Invoke function on a Series. pandas.Series.transform : Transform function producing @@ -3263,11 +3255,11 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): ------- y : Series or DataFrame if func returns a Series - See also + See Also -------- - Series.map: For element-wise operations - Series.agg: only perform aggregating type operations - Series.transform: only perform transforming type operations + Series.map: For element-wise operations. + Series.agg: Only perform aggregating type operations. + Series.transform: Only perform transforming type operations. Examples -------- @@ -3336,8 +3328,6 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): New York 3.044522 Helsinki 2.484907 dtype: float64 - - """ if len(self) == 0: return self._constructor(dtype=self.dtype, @@ -3742,7 +3732,7 @@ def isin(self, values): See Also -------- - pandas.DataFrame.isin : equivalent method on DataFrame + pandas.DataFrame.isin : Equivalent method on DataFrame. Examples -------- @@ -3800,8 +3790,8 @@ def between(self, left, right, inclusive=True): See Also -------- - pandas.Series.gt : Greater than of series and other - pandas.Series.lt : Less than of series and other + pandas.Series.gt : Greater than of series and other. + pandas.Series.lt : Less than of series and other. Examples -------- @@ -3891,7 +3881,7 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, datetime format based on the first datetime string. If the format can be inferred, there often will be a large parsing speed-up. - See also + See Also -------- pandas.read_csv @@ -3970,19 +3960,6 @@ def to_csv(self, *args, **kwargs): kwargs["header"] = False # Backwards compatibility. return self.to_frame().to_csv(**kwargs) - @Appender(generic._shared_docs['to_excel'] % _shared_doc_kwargs) - def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', - float_format=None, columns=None, header=True, index=True, - index_label=None, startrow=0, startcol=0, engine=None, - merge_cells=True, encoding=None, inf_rep='inf', verbose=True): - df = self.to_frame() - df.to_excel(excel_writer=excel_writer, sheet_name=sheet_name, - na_rep=na_rep, float_format=float_format, columns=columns, - header=header, index=index, index_label=index_label, - startrow=startrow, startcol=startcol, engine=engine, - merge_cells=merge_cells, encoding=encoding, - inf_rep=inf_rep, verbose=verbose) - @Appender(generic._shared_docs['isna'] % _shared_doc_kwargs) def isna(self): return super(Series, self).isna() diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a12605aaed554..99aab607b2bf0 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -205,7 +205,7 @@ def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): See Also -------- - match : analogous, but stricter, relying on re.match instead of re.search + match : Analogous, but stricter, relying on re.match instead of re.search. Series.str.startswith : Test if the start of each string element matches a pattern. Series.str.endswith : Same as startswith, but tests the end of string. @@ -667,9 +667,9 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan): See Also -------- - contains : analogous, but less strict, relying on re.search instead of - re.match - extract : extract matched groups + contains : Analogous, but less strict, relying on re.search instead of + re.match. + extract : Extract matched groups. """ if not case: @@ -803,7 +803,7 @@ def str_extract(arr, pat, flags=0, expand=True): See Also -------- - extractall : returns all matches (not just the first match) + extractall : Returns all matches (not just the first match). Examples -------- @@ -890,7 +890,7 @@ def str_extractall(arr, pat, flags=0): See Also -------- - extract : returns first match only (not all matches) + extract : Returns first match only (not all matches). Examples -------- @@ -2104,8 +2104,8 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): See Also -------- - split : Split each string in the Series/Index - join : Join lists contained as elements in the Series/Index + split : Split each string in the Series/Index. + join : Join lists contained as elements in the Series/Index. Examples -------- @@ -2676,9 +2676,9 @@ def encode(self, encoding, errors="strict"): See Also -------- - Series.str.strip : Remove leading and trailing characters in Series/Index - Series.str.lstrip : Remove leading characters in Series/Index - Series.str.rstrip : Remove trailing characters in Series/Index + Series.str.strip : Remove leading and trailing characters in Series/Index. + Series.str.lstrip : Remove leading characters in Series/Index. + Series.str.rstrip : Remove trailing characters in Series/Index. Examples -------- diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index dcba51d26980f..0eb2ffeab28f1 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -543,7 +543,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 1 1960-01-03 2 1960-01-04 - See also + See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_timedelta : Convert argument to timedelta. diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 4bb5c223d1bcc..12699927141cb 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -88,7 +88,7 @@ def to_numeric(arg, errors='raise', downcast=None): 3 -3.0 dtype: float64 - See also + See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index fad136b3b5a45..db93820c6942f 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -21,14 +21,14 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): Parameters ---------- arg : string, timedelta, list, tuple, 1-d array, or Series - unit : string, {'Y', 'M', 'W', 'D', 'days', 'day', - 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', - 'T', 'S', 'seconds', 'sec', 'second', 'ms', - 'milliseconds', 'millisecond', 'milli', 'millis', 'L', - 'us', 'microseconds', 'microsecond', 'micro', 'micros', - 'U', 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond' - 'N'}, optional + unit : str, optional Denote the unit of the input, if input is an integer. Default 'ns'. + Possible values: + {'Y', 'M', 'W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h', + 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', 'sec', 'second', + 'ms', 'milliseconds', 'millisecond', 'milli', 'millis', 'L', + 'us', 'microseconds', 'microsecond', 'micro', 'micros', 'U', + 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'} box : boolean, default True - If True returns a Timedelta/TimedeltaIndex of the results - if False returns a np.timedelta64 or ndarray of values of dtype @@ -68,7 +68,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) - See also + See Also -------- pandas.DataFrame.astype : Cast argument to a specified dtype. pandas.to_datetime : Convert argument to datetime. diff --git a/pandas/core/window.py b/pandas/core/window.py index be28a3bcccec6..edd7191bcc458 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -50,7 +50,7 @@ ------- same type as input -See also +See Also -------- pandas.Series.%(name)s pandas.DataFrame.%(name)s @@ -416,10 +416,10 @@ def aggregate(self, arg, *args, **kwargs): See Also -------- - Series.%(name)s : Calling object with Series data - DataFrame.%(name)s : Calling object with DataFrames - Series.mean : Equivalent method for Series - DataFrame.mean : Equivalent method for DataFrame + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.mean : Equivalent method for Series. + DataFrame.mean : Equivalent method for DataFrame. Examples -------- @@ -592,7 +592,7 @@ class Window(_Window): See Also -------- expanding : Provides expanding transformations. - ewm : Provides exponential weighted functions + ewm : Provides exponential weighted functions. """ def validate(self): @@ -733,7 +733,7 @@ def f(arg, *args, **kwargs): 8 -0.096361 0.818139 0.472290 9 0.070889 0.134399 -0.031308 - See also + See Also -------- pandas.DataFrame.rolling.aggregate pandas.DataFrame.aggregate @@ -903,9 +903,9 @@ class _Rolling_and_Expanding(_Rolling): See Also -------- - pandas.Series.%(name)s : Calling object with Series data - pandas.DataFrame.%(name)s : Calling object with DataFrames - pandas.DataFrame.count : Count of the full DataFrame + pandas.Series.%(name)s : Calling object with Series data. + pandas.DataFrame.%(name)s : Calling object with DataFrames. + pandas.DataFrame.count : Count of the full DataFrame. Examples -------- @@ -1031,10 +1031,10 @@ def max(self, *args, **kwargs): See Also -------- - Series.%(name)s : Calling object with a Series - DataFrame.%(name)s : Calling object with a DataFrame - Series.min : Similar method for Series - DataFrame.min : Similar method for DataFrame + Series.%(name)s : Calling object with a Series. + DataFrame.%(name)s : Calling object with a DataFrame. + Series.min : Similar method for Series. + DataFrame.min : Similar method for DataFrame. Examples -------- @@ -1074,10 +1074,10 @@ def mean(self, *args, **kwargs): See Also -------- - Series.%(name)s : Calling object with Series data - DataFrame.%(name)s : Calling object with DataFrames - Series.median : Equivalent method for Series - DataFrame.median : Equivalent method for DataFrame + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.median : Equivalent method for Series. + DataFrame.median : Equivalent method for DataFrame. Examples -------- @@ -1117,11 +1117,11 @@ def median(self, **kwargs): See Also -------- - Series.%(name)s : Calling object with Series data - DataFrame.%(name)s : Calling object with DataFrames - Series.std : Equivalent method for Series - DataFrame.std : Equivalent method for DataFrame - numpy.std : Equivalent method for Numpy array + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.std : Equivalent method for Series. + DataFrame.std : Equivalent method for DataFrame. + numpy.std : Equivalent method for Numpy array. Notes ----- @@ -1188,11 +1188,11 @@ def f(arg, *args, **kwargs): See Also -------- - Series.%(name)s : Calling object with Series data - DataFrame.%(name)s : Calling object with DataFrames - Series.var : Equivalent method for Series - DataFrame.var : Equivalent method for DataFrame - numpy.var : Equivalent method for Numpy array + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.var : Equivalent method for Series. + DataFrame.var : Equivalent method for DataFrame. + numpy.var : Equivalent method for Numpy array. Notes ----- @@ -1255,12 +1255,12 @@ def skew(self, **kwargs): See Also -------- - Series.%(name)s : Calling object with Series data - DataFrame.%(name)s : Calling object with DataFrames - Series.kurt : Equivalent method for Series - DataFrame.kurt : Equivalent method for DataFrame - scipy.stats.skew : Third moment of a probability density - scipy.stats.kurtosis : Reference SciPy method + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.kurt : Equivalent method for Series. + DataFrame.kurt : Equivalent method for DataFrame. + scipy.stats.skew : Third moment of a probability density. + scipy.stats.kurtosis : Reference SciPy method. Notes ----- @@ -1414,12 +1414,12 @@ def _get_cov(X, Y): See Also -------- - Series.%(name)s : Calling object with Series data - DataFrame.%(name)s : Calling object with DataFrames - Series.corr : Equivalent method for Series - DataFrame.corr : Equivalent method for DataFrame - %(name)s.cov : Similar method to calculate covariance - numpy.corrcoef : NumPy Pearson's correlation calculation + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.corr : Equivalent method for Series. + DataFrame.corr : Equivalent method for DataFrame. + %(name)s.cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. Notes ----- @@ -1640,7 +1640,7 @@ def _validate_freq(self): 8 -0.289082 -1.647453 9 0.212668 -1.647453 - See also + See Also -------- pandas.Series.rolling pandas.DataFrame.rolling @@ -1849,8 +1849,8 @@ class Expanding(_Rolling_and_Expanding): See Also -------- - rolling : Provides rolling window calculations - ewm : Provides exponential weighted functions + rolling : Provides rolling window calculations. + ewm : Provides exponential weighted functions. """ _attributes = ['min_periods', 'center', 'axis'] @@ -1916,7 +1916,7 @@ def _get_window(self, other=None): 8 0.067236 0.948257 0.163353 9 -0.286980 0.618493 -0.694496 - See also + See Also -------- pandas.DataFrame.expanding.aggregate pandas.DataFrame.rolling.aggregate @@ -2168,7 +2168,7 @@ class EWM(_Rolling): See Also -------- - rolling : Provides rolling window calculations + rolling : Provides rolling window calculations. expanding : Provides expanding transformations. """ _attributes = ['com', 'min_periods', 'adjust', 'ignore_na', 'axis'] @@ -2219,7 +2219,7 @@ def _constructor(self): 8 0.067236 0.948257 0.163353 9 -0.286980 0.618493 -0.694496 - See also + See Also -------- pandas.DataFrame.rolling.aggregate diff --git a/pandas/io/common.py b/pandas/io/common.py index 155cf566b4c40..3a67238a66450 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,12 +10,11 @@ import pandas.compat as compat from pandas.compat import BytesIO, StringIO, string_types, text_type from pandas.errors import ( # noqa - DtypeWarning, EmptyDataError, ParserError, ParserWarning) + AbstractMethodError, DtypeWarning, EmptyDataError, ParserError, + ParserWarning) from pandas.core.dtypes.common import is_file_like, is_number -import pandas.core.common as com - from pandas.io.formats.printing import pprint_thing # gh-12665: Alias for now and remove later. @@ -67,7 +66,7 @@ def __iter__(self): return self def __next__(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) if not compat.PY3: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index c25a7670cce44..141d2c79a1927 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -175,12 +175,16 @@ convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric data will be read in as floats: Excel stores all numbers as floats internally +mangle_dupe_cols : boolean, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. Returns ------- parsed : DataFrame or Dict of DataFrames - DataFrame from the passed in Excel file. See notes in sheet_name - argument for more information on when a Dict of Dataframes is returned. + DataFrame from the passed in Excel file. See notes in sheet_name + argument for more information on when a dict of DataFrames is returned. Examples -------- @@ -314,6 +318,7 @@ def read_excel(io, comment=None, skipfooter=0, convert_float=True, + mangle_dupe_cols=True, **kwds): # Can't use _deprecate_kwarg since sheetname=None has a special meaning @@ -349,6 +354,7 @@ def read_excel(io, comment=comment, skipfooter=skipfooter, convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, **kwds) @@ -441,6 +447,7 @@ def parse(self, comment=None, skipfooter=0, convert_float=True, + mangle_dupe_cols=True, **kwds): """ Parse specified sheet(s) into a DataFrame @@ -476,6 +483,7 @@ def parse(self, comment=comment, skipfooter=skipfooter, convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, **kwds) def _parse_excel(self, @@ -498,6 +506,7 @@ def _parse_excel(self, comment=None, skipfooter=0, convert_float=True, + mangle_dupe_cols=True, **kwds): _validate_header_arg(header) @@ -634,20 +643,24 @@ def _parse_cell(cell_contents, cell_typ): else: offset = 1 + max(header) - for col in index_col: - last = data[offset][col] + # Check if we have an empty dataset + # before trying to collect data. + if offset < len(data): + for col in index_col: + last = data[offset][col] - for row in range(offset + 1, len(data)): - if data[row][col] == '' or data[row][col] is None: - data[row][col] = last - else: - last = data[row][col] + for row in range(offset + 1, len(data)): + if data[row][col] == '' or data[row][col] is None: + data[row][col] = last + else: + last = data[row][col] has_index_names = is_list_like(header) and len(header) > 1 # GH 12292 : error when read one empty column from excel file try: parser = TextParser(data, + names=names, header=header, index_col=index_col, has_index_names=has_index_names, @@ -664,13 +677,11 @@ def _parse_cell(cell_contents, cell_typ): comment=comment, skipfooter=skipfooter, usecols=usecols, + mangle_dupe_cols=mangle_dupe_cols, **kwds) output[asheetname] = parser.read(nrows=nrows) - if names is not None: - output[asheetname].columns = names - if not squeeze or isinstance(output[asheetname], DataFrame): output[asheetname].columns = output[ asheetname].columns.set_names(header_names) diff --git a/pandas/io/html.py b/pandas/io/html.py index bcbb07c6dddfb..c967bdd29df1f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -12,12 +12,11 @@ from pandas.compat import ( binary_type, iteritems, lmap, lrange, raise_with_traceback, string_types, u) -from pandas.errors import EmptyDataError +from pandas.errors import AbstractMethodError, EmptyDataError from pandas.core.dtypes.common import is_list_like from pandas import Series -import pandas.core.common as com from pandas.io.common import _is_url, _validate_header_arg, urlopen from pandas.io.formats.printing import pprint_thing @@ -256,7 +255,7 @@ def _text_getter(self, obj): text : str or unicode The text from an individual DOM node. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_td(self, obj): """Return the td elements from a row element. @@ -271,7 +270,7 @@ def _parse_td(self, obj): list of node-like These are the elements of each row, i.e., the columns. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_thead_tr(self, table): """ @@ -286,7 +285,7 @@ def _parse_thead_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tbody_tr(self, table): """ @@ -305,7 +304,7 @@ def _parse_tbody_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tfoot_tr(self, table): """ @@ -320,7 +319,7 @@ def _parse_tfoot_tr(self, table): list of node-like These are the row elements of a table. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_tables(self, doc, match, attrs): """ @@ -346,7 +345,7 @@ def _parse_tables(self, doc, match, attrs): list of node-like HTML elements to be parsed into raw data. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _equals_tag(self, obj, tag): """ @@ -365,7 +364,7 @@ def _equals_tag(self, obj, tag): boolean Whether `obj`'s tag name is `tag` """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _build_doc(self): """ @@ -376,7 +375,7 @@ def _build_doc(self): node-like The DOM from which to parse the table element. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _parse_thead_tbody_tfoot(self, table_html): """ diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index af7b390de213d..4453416a97f89 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -7,11 +7,11 @@ import pandas._libs.json as json from pandas._libs.tslibs import iNaT from pandas.compat import StringIO, long, to_str, u +from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import is_period_dtype from pandas import DataFrame, MultiIndex, Series, compat, isna, to_datetime -import pandas.core.common as com from pandas.core.reshape.concat import concat from pandas.io.common import ( @@ -97,7 +97,7 @@ def __init__(self, obj, orient, date_format, double_precision, self._format_axes() def _format_axes(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def write(self): return self._write(self.obj, self.orient, self.double_precision, @@ -658,7 +658,7 @@ def _convert_axes(self): setattr(self.obj, axis, new_axis) def _try_convert_types(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): @@ -771,7 +771,7 @@ def _try_convert_to_date(self, data): return data, False def _try_convert_dates(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SeriesParser(Parser): diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py index 3b4ebb638412e..2c2ecf75bbe7b 100644 --- a/pandas/io/json/table_schema.py +++ b/pandas/io/json/table_schema.py @@ -290,9 +290,9 @@ def parse_table_schema(json, precise_float): :class:`Index` name of 'index' and :class:`MultiIndex` names starting with 'level_' are not supported. - See also + See Also -------- - build_table_schema : inverse function + build_table_schema : Inverse function. pandas.read_json """ table = loads(json, precise_float=precise_float) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 3d72b1ec3a47f..aad59f9805a3b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -4,9 +4,9 @@ from warnings import catch_warnings from pandas.compat import string_types +from pandas.errors import AbstractMethodError from pandas import DataFrame, get_option -import pandas.core.common as com from pandas.io.common import get_filepath_or_buffer, is_s3_url @@ -67,10 +67,10 @@ def validate_dataframe(df): raise ValueError("Index level names must be strings") def write(self, df, path, compression, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def read(self, path, columns=None, **kwargs): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class PyArrowImpl(BaseImpl): diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 12914c10e0655..9fd35effe1b07 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -20,7 +20,8 @@ import pandas.compat as compat from pandas.compat import ( PY3, StringIO, lrange, lzip, map, range, string_types, u, zip) -from pandas.errors import EmptyDataError, ParserError, ParserWarning +from pandas.errors import ( + AbstractMethodError, EmptyDataError, ParserError, ParserWarning) from pandas.util._decorators import Appender from pandas.core.dtypes.cast import astype_nansafe @@ -33,7 +34,6 @@ from pandas.core import algorithms from pandas.core.arrays import Categorical -import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.index import ( Index, MultiIndex, RangeIndex, ensure_index_from_sequences) @@ -1050,7 +1050,7 @@ def _make_engine(self, engine='c'): self._engine = klass(self.f, **self.options) def _failover_to_python(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def read(self, nrows=None): nrows = _validate_integer('nrows', nrows) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4c28e0f88b1ae..8c574bcb70363 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -326,8 +326,8 @@ def read_hdf(path_or_buf, key=None, mode='r', **kwargs): See Also -------- - pandas.DataFrame.to_hdf : write a HDF file from a DataFrame - pandas.HDFStore : low-level access to HDF files + pandas.DataFrame.to_hdf : Write a HDF file from a DataFrame. + pandas.HDFStore : Low-level access to HDF files. Examples -------- diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2f411a956dfb8..a81bc11130153 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -222,7 +222,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None, ----- Any datetime values with time zone information will be converted to UTC. - See also + See Also -------- read_sql_query : Read SQL query into a DataFrame. read_sql @@ -302,7 +302,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, Any datetime values with time zone information parsed via the `parse_dates` parameter will be converted to UTC. - See also + See Also -------- read_sql_table : Read SQL database table into a DataFrame. read_sql @@ -366,7 +366,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, ------- DataFrame - See also + See Also -------- read_sql_table : Read SQL database table into a DataFrame. read_sql_query : Read SQL query into a DataFrame. @@ -1002,7 +1002,7 @@ def read_table(self, table_name, index_col=None, coerce_float=True, ------- DataFrame - See also + See Also -------- pandas.read_sql_table SQLDatabase.read_query @@ -1063,9 +1063,9 @@ def read_query(self, sql, index_col=None, coerce_float=True, ------- DataFrame - See also + See Also -------- - read_sql_table : Read SQL database table into a DataFrame + read_sql_table : Read SQL database table into a DataFrame. read_sql """ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 66e996075f1ed..7e8ab002f7978 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -98,8 +98,8 @@ See Also -------- -pandas.io.stata.StataReader : low-level reader for Stata data files -pandas.DataFrame.to_stata: export Stata data files +pandas.io.stata.StataReader : Low-level reader for Stata data files. +pandas.DataFrame.to_stata: Export Stata data files. Examples -------- @@ -2558,6 +2558,8 @@ def generate_table(self): for o, (idx, row) in enumerate(selected.iterrows()): for j, (col, v) in enumerate(col_index): val = row[col] + # Allow columns with mixed str and None (GH 23633) + val = '' if val is None else val key = gso_table.get(val, None) if key is None: # Stata prefers human numbers diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 405c534e8528b..83ab3b6098956 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -12,6 +12,7 @@ from pandas.util._decorators import cache_readonly, Appender from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat +from pandas.errors import AbstractMethodError import pandas.core.common as com from pandas.core.base import PandasObject @@ -373,7 +374,7 @@ def _compute_plot_data(self): self.data = numeric_data def _make_plot(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _add_table(self): if self.table is False: @@ -3491,7 +3492,7 @@ def scatter(self, x, y, s=None, c=None, **kwds): See Also -------- - matplotlib.pyplot.scatter : scatter plot using multiple input data + matplotlib.pyplot.scatter : Scatter plot using multiple input data formats. Examples @@ -3567,7 +3568,7 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, See Also -------- DataFrame.plot : Make plots of a DataFrame. - matplotlib.pyplot.hexbin : hexagonal binning plot using matplotlib, + matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, the matplotlib function that is used under the hood. Examples diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index f889e08b5d348..39765b27cf19c 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -185,7 +185,7 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): See Also -------- - pandas.plotting.andrews_curves : Plot clustering visualization + pandas.plotting.andrews_curves : Plot clustering visualization. Examples -------- diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index bb4022c9cac9a..a1242e2481fed 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -56,7 +56,68 @@ def timedelta_index(request): return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) -class TestDatetimeArray(object): +class SharedTests(object): + index_cls = None + + def test_take(self): + data = np.arange(100, dtype='i8') + np.random.shuffle(data) + + idx = self.index_cls._simple_new(data, freq='D') + arr = self.array_cls(idx) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_take_fill(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D') + arr = self.array_cls(idx) + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT) + assert result[0] is pd.NaT + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2.0) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, + fill_value=pd.Timestamp.now().time) + + def test_concat_same_type(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT) + arr = self.array_cls(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + + tm.assert_index_equal(self.index_cls(result), expected) + + +class TestDatetimeArray(SharedTests): + index_cls = pd.DatetimeIndex + array_cls = DatetimeArray def test_array_object_dtype(self, tz_naive_fixture): # GH#23524 @@ -175,8 +236,60 @@ def test_int_properties(self, datetime_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, datetime_index, tz_naive_fixture): + dti = datetime_index.tz_localize(tz_naive_fixture) + arr = DatetimeArray(dti) + + now = pd.Timestamp.now().tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + with pytest.raises(ValueError): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=pd.Period('2014Q1')) + + tz = None if dti.tz is not None else 'US/Eastern' + now = pd.Timestamp.now().tz_localize(tz) + with pytest.raises(TypeError): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) + + def test_concat_same_type_invalid(self, datetime_index): + # different timezones + dti = datetime_index + arr = DatetimeArray(dti) + + if arr.tz is None: + other = arr.tz_localize('UTC') + else: + other = arr.tz_localize(None) + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + + def test_concat_same_type_different_freq(self): + # we *can* concatentate DTI with different freqs. + a = DatetimeArray(pd.date_range('2000', periods=2, freq='D', + tz='US/Central')) + b = DatetimeArray(pd.date_range('2000', periods=2, freq='H', + tz='US/Central')) + result = DatetimeArray._concat_same_type([a, b]) + expected = DatetimeArray(pd.to_datetime([ + '2000-01-01 00:00:00', '2000-01-02 00:00:00', + '2000-01-01 00:00:00', '2000-01-01 01:00:00', + ]).tz_localize("US/Central")) + + tm.assert_datetime_array_equal(result, expected) + + +class TestTimedeltaArray(SharedTests): + index_cls = pd.TimedeltaIndex + array_cls = TimedeltaArray -class TestTimedeltaArray(object): def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArray(tdi) @@ -223,8 +336,27 @@ def test_int_properties(self, timedelta_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_take_fill_valid(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + now = pd.Timestamp.now() + with pytest.raises(ValueError): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D')) + -class TestPeriodArray(object): +class TestPeriodArray(SharedTests): + index_cls = pd.PeriodIndex + array_cls = PeriodArray def test_from_pi(self, period_index): pi = period_index diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 1ff3005722341..f2552cffc6651 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -59,7 +59,7 @@ def coerce(request): ({'a', 1}, 'set', 'set'), # noqa: E241 (set(), 'set', 'set-empty'), # noqa: E241 (frozenset({'a', 1}), 'set', 'frozenset'), # noqa: E241 - (frozenset([]), 'set', 'frozenset-empty'), # noqa: E241 + (frozenset(), 'set', 'frozenset-empty'), # noqa: E241 (iter([1, 2]), True, 'iterator'), # noqa: E241 (iter([]), True, 'iterator-empty'), # noqa: E241 ((x for x in [1, 2]), True, 'generator'), # noqa: E241 diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index 078c48539de16..eaba5f7ec7790 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -309,3 +309,10 @@ def test_rank_pct_true(self, method, exp): expected = DataFrame(exp) tm.assert_frame_equal(result, expected) + + def test_pct_max_many_rows(self): + # GH 18271 + df = DataFrame({'A': np.arange(2**24 + 1), + 'B': np.arange(2**24 + 1, 0, -1)}) + result = df.rank(pct=True).max() + assert (result == 1).all() diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 79494a7c77cbd..fb734b016518e 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -170,3 +170,11 @@ def test_to_series_with_arguments(idx): assert s.values is not idx.values assert s.index is not idx assert s.name != idx.name + + +def test_to_flat_index(idx): + expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')), + tupleize_cols=False) + result = idx.to_flat_index() + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4a3efe22926f7..619f60a42e0be 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2266,6 +2266,14 @@ def test_tab_complete_warning(self, ip): with provisionalcompleter('ignore'): list(ip.Completer.completions('idx.', 4)) + def test_to_flat_index(self, indices): + # 22866 + if isinstance(indices, MultiIndex): + pytest.skip("Separate expectation for MultiIndex") + + result = indices.to_flat_index() + tm.assert_index_equal(result, indices) + class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ diff --git a/pandas/tests/io/data/test1.xls b/pandas/tests/io/data/test1.xls index db0f9dec7d5e4..a5940b2cfa6c2 100644 Binary files a/pandas/tests/io/data/test1.xls and b/pandas/tests/io/data/test1.xls differ diff --git a/pandas/tests/io/data/test1.xlsm b/pandas/tests/io/data/test1.xlsm index 4c873e55a5300..981c303b7bd30 100644 Binary files a/pandas/tests/io/data/test1.xlsm and b/pandas/tests/io/data/test1.xlsm differ diff --git a/pandas/tests/io/data/test1.xlsx b/pandas/tests/io/data/test1.xlsx index e6d3a0d503cf2..8f011d0687521 100644 Binary files a/pandas/tests/io/data/test1.xlsx and b/pandas/tests/io/data/test1.xlsx differ diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 50d927176a7b4..21286e9b82323 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -6,9 +6,9 @@ from pandas._libs.tslib import Timestamp from pandas.compat import StringIO +from pandas.errors import AbstractMethodError from pandas import DataFrame, read_csv, read_table -import pandas.core.common as com import pandas.util.testing as tm from .c_parser_only import CParserTests @@ -46,7 +46,7 @@ def read_table(self, *args, **kwargs): raise NotImplementedError def float_precision_choices(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @pytest.fixture(autouse=True) def setup_method(self, datapath): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 9b147d53c06c4..a097e0adbeb7a 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -235,6 +235,16 @@ def test_index_col_label_error(self, ext): self.get_exceldf("test1", ext, "Sheet1", index_col=["A"], usecols=["A", "C"]) + def test_index_col_empty(self, ext): + # see gh-9208 + result = self.get_exceldf("test1", ext, "Sheet3", + index_col=["A", "B", "C"]) + expected = DataFrame(columns=["D", "E", "F"], + index=MultiIndex(levels=[[]] * 3, + labels=[[]] * 3, + names=["A", "B", "C"])) + tm.assert_frame_equal(result, expected) + def test_usecols_pass_non_existent_column(self, ext): msg = ("Usecols do not match columns, " "columns expected but not found: " + r"\['E'\]") @@ -1836,33 +1846,41 @@ def roundtrip(data, header=True, parser_hdr=0, index=True): def test_duplicated_columns(self, *_): # see gh-5235 - write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]]) - col_names = ["A", "B", "B"] - - write_frame.columns = col_names - write_frame.to_excel(self.path, "test1") + df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], + columns=["A", "B", "B"]) + df.to_excel(self.path, "test1") + expected = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], + columns=["A", "B", "B.1"]) - read_frame = read_excel(self.path, "test1", index_col=0) - read_frame.columns = col_names + # By default, we mangle. + result = read_excel(self.path, "test1", index_col=0) + tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(write_frame, read_frame) + # Explicitly, we pass in the parameter. + result = read_excel(self.path, "test1", index_col=0, + mangle_dupe_cols=True) + tm.assert_frame_equal(result, expected) # see gh-11007, gh-10970 - write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=["A", "B", "A", "B"]) - write_frame.to_excel(self.path, "test1") - - read_frame = read_excel(self.path, "test1", index_col=0) - read_frame.columns = ["A", "B", "A", "B"] + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "A", "B"]) + df.to_excel(self.path, "test1") - tm.assert_frame_equal(write_frame, read_frame) + result = read_excel(self.path, "test1", index_col=0) + expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "A.1", "B.1"]) + tm.assert_frame_equal(result, expected) # see gh-10982 - write_frame.to_excel(self.path, "test1", index=False, header=False) - read_frame = read_excel(self.path, "test1", header=None) + df.to_excel(self.path, "test1", index=False, header=False) + result = read_excel(self.path, "test1", header=None) - write_frame.columns = [0, 1, 2, 3] - tm.assert_frame_equal(write_frame, read_frame) + expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + tm.assert_frame_equal(result, expected) + + msg = "Setting mangle_dupe_cols=False is not supported yet" + with pytest.raises(ValueError, match=msg): + read_excel(self.path, "test1", header=None, mangle_dupe_cols=False) def test_swapped_columns(self, merge_cells, engine, ext): # Test for issue #5427. diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c6b6f6cab9ddd..47293e8765d26 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1505,3 +1505,20 @@ def test_unicode_dta_118(self): expected = pd.DataFrame(values, columns=columns) tm.assert_frame_equal(unicode_df, expected) + + def test_mixed_string_strl(self): + # GH 23633 + output = [ + {'mixed': 'string' * 500, + 'number': 0}, + {'mixed': None, + 'number': 1} + ] + + output = pd.DataFrame(output) + with tm.ensure_clean() as path: + output.to_stata(path, write_index=False, version=117) + reread = read_stata(path) + expected = output.fillna('') + expected.number = expected.number.astype('int32') + tm.assert_frame_equal(reread, expected) diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 9772ceecfc7b1..5b0ea37a0bfcf 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -495,3 +495,10 @@ def test_rank_first_pct(dtype, ser, exp): result = s.rank(method='first', pct=True) expected = Series(exp).astype(result.dtype) assert_series_equal(result, expected) + + +def test_pct_max_many_rows(): + # GH 18271 + s = Series(np.arange(2**24 + 1)) + result = s.rank(pct=True).max() + assert result == 1 diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3642c4ee98a9e..ff505f2986b1a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1462,6 +1462,15 @@ def test_too_many_ndims(self): with pytest.raises(TypeError, match=msg): algos.rank(arr) + @pytest.mark.parametrize('values', [ + np.arange(2**24 + 1), + np.arange(2**25 + 2).reshape(2**24 + 1, 2)], + ids=['1d', '2d']) + def test_pct_max_many_rows(self, values): + # GH 18271 + result = algos.rank(values, pct=True).max() + assert result == 1 + def test_pad_backfill_object_segfault(): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 756385f0cfb56..7e0342e8b987a 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -24,12 +24,11 @@ notna, Timestamp, Timedelta) from pandas.compat import range, lrange, zip, OrderedDict -from pandas.errors import UnsupportedFunctionCall +from pandas.errors import AbstractMethodError, UnsupportedFunctionCall import pandas.tseries.offsets as offsets from pandas.tseries.offsets import Minute, BDay from pandas.core.groupby.groupby import DataError -import pandas.core.common as com from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import period_range, PeriodIndex, Period @@ -599,7 +598,7 @@ def index(self, _index_start, _index_end, _index_freq): @pytest.fixture def _series_name(self): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @pytest.fixture def _static_values(self, index): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 25c419e485db1..067a7d4622ca2 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -9,7 +9,6 @@ from pandas.core.dtypes.generic import ABCPeriod from pandas.core.tools.datetimes import to_datetime -import pandas.core.common as com # import after tools, dateutil check from dateutil.easter import easter @@ -29,6 +28,7 @@ roll_yearday, shift_month, BaseOffset) +from pandas.errors import AbstractMethodError __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay', @@ -1097,7 +1097,7 @@ def apply(self, other): def _apply(self, n, other): """Handle specific apply logic for child classes""" - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) @apply_index_wraps def apply_index(self, i): @@ -1137,11 +1137,11 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): The roll array is based on the fact that i gets rolled back to the first day of the month. """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _apply_index_days(self, i, roll): """Apply the correct day for each date in i""" - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) class SemiMonthEnd(SemiMonthOffset): diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index e51e0c88e5b95..1171478de2eb4 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -196,8 +196,8 @@ def validate_args_and_kwargs(fname, args, kwargs, See Also -------- - validate_args : purely args validation - validate_kwargs : purely kwargs validation + validate_args : Purely args validation. + validate_kwargs : Purely kwargs validation. """ # Check that the total number of arguments passed in (i.e. diff --git a/requirements-dev.txt b/requirements-dev.txt index 93145d948c218..6678d205aca6c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,7 +4,8 @@ pytz Cython>=0.28.2 flake8 flake8-comprehensions -flake8-rst +flake8-rst==0.4.2 +gitpython hypothesis>=3.58.0 isort moto diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index ccd5f56141a6a..c1bdab73c2671 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -350,6 +350,35 @@ def private_classes(self): This mentions NDFrame, which is not correct. """ + def unknown_section(self): + """ + This section has an unknown section title. + + Unknown Section + --------------- + This should raise an error in the validation. + """ + + def sections_in_wrong_order(self): + """ + This docstring has the sections in the wrong order. + + Parameters + ---------- + name : str + This section is in the right position. + + Examples + -------- + >>> print('So far Examples is good, as it goes before Parameters') + So far Examples is good, as it goes before Parameters + + See Also + -------- + function : This should generate an error, as See Also needs to go + before Examples. + """ + class BadSummaries(object): @@ -706,6 +735,11 @@ def test_bad_generic_functions(self, func): ('BadGenericDocStrings', 'private_classes', ("Private classes (NDFrame) should not be mentioned in public " 'docstrings',)), + ('BadGenericDocStrings', 'unknown_section', + ('Found unknown section "Unknown Section".',)), + ('BadGenericDocStrings', 'sections_in_wrong_order', + ('Wrong order of sections. "See Also" should be located before ' + '"Notes"',)), ('BadSeeAlso', 'desc_no_period', ('Missing period at end of description for See Also "Series.iloc"',)), ('BadSeeAlso', 'desc_first_letter_lowercase', diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index ed84e58049cae..7da77a1f60ad5 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -56,6 +56,9 @@ PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] DIRECTIVES = ['versionadded', 'versionchanged', 'deprecated'] +ALLOWED_SECTIONS = ['Parameters', 'Attributes', 'Methods', 'Returns', 'Yields', + 'Other Parameters', 'Raises', 'Warns', 'See Also', 'Notes', + 'References', 'Examples'] ERROR_MSGS = { 'GL01': 'Docstring text (summary) should start in the line immediately ' 'after the opening quotes (not in the same line, or leaving a ' @@ -69,6 +72,10 @@ 'mentioned in public docstrings', 'GL05': 'Tabs found at the start of line "{line_with_tabs}", please use ' 'whitespace only', + 'GL06': 'Found unknown section "{section}". Allowed sections are: ' + '{allowed_sections}', + 'GL07': 'Wrong order of sections. "{wrong_section}" should be located ' + 'before "{goes_before}", the right order is: {sorted_sections}', 'SS01': 'No summary found (a short summary in a single line should be ' 'present at the beginning of the docstring)', 'SS02': 'Summary does not start with a capital letter', @@ -353,6 +360,18 @@ def double_blank_lines(self): prev = row.strip() return False + @property + def section_titles(self): + sections = [] + self.doc._doc.reset() + while not self.doc._doc.eof(): + content = self.doc._read_to_next_section() + if (len(content) > 1 + and len(content[0]) == len(content[1]) + and set(content[1]) == {'-'}): + sections.append(content[0]) + return sections + @property def summary(self): return ' '.join(self.doc['Summary']) @@ -580,6 +599,25 @@ def validate_one(func_name): if re.match("^ *\t", line): errs.append(error('GL05', line_with_tabs=line.lstrip())) + unseen_sections = list(ALLOWED_SECTIONS) + for section in doc.section_titles: + if section not in ALLOWED_SECTIONS: + errs.append(error('GL06', + section=section, + allowed_sections=', '.join(ALLOWED_SECTIONS))) + else: + if section in unseen_sections: + section_idx = unseen_sections.index(section) + unseen_sections = unseen_sections[section_idx + 1:] + else: + section_idx = ALLOWED_SECTIONS.index(section) + goes_before = ALLOWED_SECTIONS[section_idx + 1] + errs.append(error('GL07', + sorted_sections=' > '.join(ALLOWED_SECTIONS), + wrong_section=section, + goes_before=goes_before)) + break + if not doc.summary: errs.append(error('SS01')) else: diff --git a/setup.cfg b/setup.cfg index 9f5384170a245..7212833435997 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,22 @@ exclude = ignore = F821, # undefined name W391, # blank line at end of file [Seems to be a bug (v0.4.1)] - +exclude = + doc/source/whatsnew/v0.7.0.rst + doc/source/whatsnew/v0.10.1.rst + doc/source/whatsnew/v0.12.0.rst + doc/source/whatsnew/v0.13.0.rst + doc/source/whatsnew/v0.13.1.rst + doc/source/whatsnew/v0.14.0.rst + doc/source/whatsnew/v0.15.0.rst + doc/source/whatsnew/v0.16.0.rst + doc/source/whatsnew/v0.16.2.rst + doc/source/whatsnew/v0.17.0.rst + doc/source/whatsnew/v0.18.0.rst + doc/source/whatsnew/v0.18.1.rst + doc/source/whatsnew/v0.20.0.rst + doc/source/whatsnew/v0.21.0.rst + doc/source/whatsnew/v0.23.0.rst [yapf] based_on_style = pep8 @@ -405,3 +420,4 @@ skip= pandas/types/common.py, pandas/plotting/_compat.py, pandas/tests/extension/arrow/test_bool.py + doc/source/conf.py