Skip to content

Commit

Permalink
Merge branch 'master' into fixturize_frame_tests_1
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari authored Oct 1, 2018
2 parents 733b889 + 2f1b842 commit 16a5297
Show file tree
Hide file tree
Showing 63 changed files with 749 additions and 254 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@
<tr>
<td></td>
<td>
<a href="https://ci.appveyor.com/project/pandas-dev/pandas">
<img src="https://ci.appveyor.com/api/projects/status/86vn83mxgnl4xf1s/branch/master?svg=true" alt="appveyor build status" />
<a href="https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master">
<img src="https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master" alt="Azure Pipelines build status" />
</a>
</td>
</tr>
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
try:
hashing = import_module(imp)
break
except:
except (ImportError, TypeError, ValueError):
pass

from .pandas_vb_common import setup # noqa
Expand Down
13 changes: 10 additions & 3 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,14 +505,21 @@ class NSort(object):
param_names = ['keep']

def setup(self, keep):
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
self.df = DataFrame(np.random.randn(100000, 3),
columns=list('ABC'))

def time_nlargest(self, keep):
def time_nlargest_one_column(self, keep):
self.df.nlargest(100, 'A', keep=keep)

def time_nsmallest(self, keep):
def time_nlargest_two_columns(self, keep):
self.df.nlargest(100, ['A', 'B'], keep=keep)

def time_nsmallest_one_column(self, keep):
self.df.nsmallest(100, 'A', keep=keep)

def time_nsmallest_two_columns(self, keep):
self.df.nsmallest(100, ['A', 'B'], keep=keep)


class Describe(object):

Expand Down
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import random
import timeit
import string

import numpy as np
import pandas.util.testing as tm
from pandas import DataFrame, Categorical, date_range, read_csv
from pandas.compat import PY2
from pandas.compat import cStringIO as StringIO

from ..pandas_vb_common import setup, BaseIO # noqa
Expand Down Expand Up @@ -181,8 +179,8 @@ def time_read_csv(self, sep, decimal, float_precision):
names=list('abc'), float_precision=float_precision)

def time_read_csv_python_engine(self, sep, decimal, float_precision):
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
float_precision=None, names=list('abc'))
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
engine='python', float_precision=None, names=list('abc'))


class ReadCSVCategorical(BaseIO):
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def setup(self):
try:
with warnings.catch_warnings(record=True):
self.mdf1.consolidate(inplace=True)
except:
except (AttributeError, TypeError):
pass
self.mdf2 = self.mdf1.copy()
self.mdf2.index = self.df2.index
Expand Down
5 changes: 2 additions & 3 deletions asv_bench/benchmarks/pandas_vb_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
from importlib import import_module

import numpy as np
from pandas import Panel

# Compatibility import for lib
for imp in ['pandas._libs.lib', 'pandas.lib']:
try:
lib = import_module(imp)
break
except:
except (ImportError, TypeError, ValueError):
pass

numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
Expand All @@ -34,7 +33,7 @@ def remove(self, f):
"""Remove created files"""
try:
os.remove(f)
except:
except OSError:
# On Windows, attempting to remove a file that is in use
# causes an exception to be raised
pass
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def setup(self, op, dtype, axis, use_bottleneck):
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except:
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df_func = getattr(df, op)
Expand Down Expand Up @@ -56,7 +56,7 @@ def setup(self, op, dtype, use_bottleneck):
s = pd.Series(np.random.randn(100000)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
except:
except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s_func = getattr(s, op)
Expand Down
1 change: 0 additions & 1 deletion asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import warnings
from datetime import timedelta

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion ci/doctests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then

# DataFrame / Series docstrings
pytest --doctest-modules -v pandas/core/frame.py \
-k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
-k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"

if [ $? -ne "0" ]; then
RET=1
Expand Down
4 changes: 2 additions & 2 deletions ci/requirements-optional-pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ lxml
matplotlib
nbsphinx
numexpr
openpyxl=2.5.5
openpyxl==2.5.5
pyarrow
pymysql
tables
Expand All @@ -28,4 +28,4 @@ statsmodels
xarray
xlrd
xlsxwriter
xlwt
xlwt
Binary file added doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf
Binary file not shown.
Binary file added doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx
Binary file not shown.
Binary file removed doc/cheatsheet/Pandas_Cheat_Sheet_JP.pdf
Binary file not shown.
Binary file removed doc/cheatsheet/Pandas_Cheat_Sheet_JP.pptx
Binary file not shown.
6 changes: 3 additions & 3 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,10 @@ def _sphinx_build(self, kind):
'-b{}'.format(kind),
'-{}'.format(
'v' * self.verbosity) if self.verbosity else '',
'-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
'-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
'-Dexclude_patterns={}'.format(self.exclude_patterns),
SOURCE_PATH,
os.path.join(BUILD_PATH, kind))
'"{}"'.format(SOURCE_PATH),
'"{}"'.format(os.path.join(BUILD_PATH, kind)))

def _open_browser(self):
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
Expand Down
9 changes: 9 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2603,3 +2603,12 @@ objects.
generated/pandas.Series.ix
generated/pandas.Series.imag
generated/pandas.Series.real


.. Can't convince sphinx to generate toctree for this class attribute.
.. So we do it manually to avoid a warning
.. toctree::
:hidden:

generated/pandas.api.extensions.ExtensionDtype.na_value
2 changes: 1 addition & 1 deletion doc/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
* :ref:`Categorical <categorical>`
* :ref:`Datetime with Timezone <timeseries.timezone_series>`
* :ref:`Period <timeseries.periods>`
* :ref:`Interval <advanced.indexing.intervallindex>`
* :ref:`Interval <indexing.intervallindex>`

Pandas uses the ``object`` dtype for storing strings.

Expand Down
15 changes: 15 additions & 0 deletions doc/source/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,21 @@ Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
frame.corr(min_periods=12)
.. versionadded:: 0.24.0

The ``method`` argument can also be a callable for a generic correlation
calculation. In this case, it should be a single function
that produces a single value from two ndarray inputs. Suppose we wanted to
compute the correlation based on histogram intersection:

.. ipython:: python
# histogram intersection
histogram_intersection = lambda a, b: np.minimum(
np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
).sum()
frame.corr(method=histogram_intersection)
A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to
compute the correlation between like-labeled Series contained in different
DataFrame objects.
Expand Down
6 changes: 2 additions & 4 deletions doc/source/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
.. ipython:: python
df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
gb = df.groupby('A')
def replace(g):
mask = g < 0
g.loc[mask] = g[~mask].mean()
return g
mask = g < 0
return g.where(mask, g[~mask].mean())
gb.transform(replace)
Expand Down
8 changes: 4 additions & 4 deletions doc/source/ecosystem.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ large data to thin clients.
`seaborn <https://seaborn.pydata.org>`__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Seaborn is a Python visualization library based on `matplotlib
<http://matplotlib.org>`__. It provides a high-level, dataset-oriented
Seaborn is a Python visualization library based on
`matplotlib <http://matplotlib.org>`__. It provides a high-level, dataset-oriented
interface for creating attractive statistical graphics. The plotting functions
in seaborn understand pandas objects and leverage pandas grouping operations
internally to support concise specification of complex visualizations. Seaborn
Expand Down Expand Up @@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
(Note: HTML tables may or may not be
compatible with non-HTML Jupyter output formats.)

See :ref:`Options and Settings <options>` and :ref:`<options.available>`
See :ref:`Options and Settings <options>` and :ref:`options.available <available>`
for pandas ``display.`` settings.

`quantopian/qgrid <https://github.com/quantopian/qgrid>`__
Expand Down Expand Up @@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
Most pandas classes, methods and data attributes can be autocompleted in
Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
`IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
and Spyder's `Help pane <https://docs.spyder-ide.org/help.html>`__ can retrieve
and render Numpydoc documentation on pandas objects in rich text with Sphinx
both automatically and on-demand.

Expand Down
29 changes: 13 additions & 16 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
CSV & Text files
----------------

The two workhorse functions for reading text files (a.k.a. flat files) are
:func:`read_csv` and :func:`read_table`. They both use the same parsing code to
intelligently convert tabular data into a ``DataFrame`` object. See the
:ref:`cookbook<cookbook.csv>` for some advanced strategies.
The workhorse function for reading text files (a.k.a. flat files) is
:func:`read_csv`. See the :ref:`cookbook<cookbook.csv>` for some advanced strategies.

Parsing options
'''''''''''''''

The functions :func:`read_csv` and :func:`read_table` accept the following
common arguments:
:func:`read_csv` accepts the following common arguments:

Basic
+++++
Expand Down Expand Up @@ -780,8 +777,8 @@ Date Handling
Specifying Date Columns
+++++++++++++++++++++++

To better facilitate working with datetime data, :func:`read_csv` and
:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser``
To better facilitate working with datetime data, :func:`read_csv`
uses the keyword arguments ``parse_dates`` and ``date_parser``
to allow users to specify a variety of columns and date/time formats to turn the
input text data into ``datetime`` objects.

Expand Down Expand Up @@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns:
print(open('data/mindex_ex.csv').read())
The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of
The ``index_col`` argument to ``read_csv`` can take a list of
column numbers to turn multiple columns into a ``MultiIndex`` for the index of the
returned object:

Expand Down Expand Up @@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``.
.. ipython:: python
print(open('tmp2.sv').read())
pd.read_csv('tmp2.sv', sep=None, engine='python')
print(open('tmp2.sv').read())
pd.read_csv('tmp2.sv', sep=None, engine='python')
.. _io.multiple_files:

Expand All @@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following:
.. ipython:: python
print(open('tmp.sv').read())
table = pd.read_table('tmp.sv', sep='|')
table = pd.read_csv('tmp.sv', sep='|')
table
By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return
By specifying a ``chunksize`` to ``read_csv``, the return
value will be an iterable object of type ``TextFileReader``:

.. ipython:: python
reader = pd.read_table('tmp.sv', sep='|', chunksize=4)
reader = pd.read_csv('tmp.sv', sep='|', chunksize=4)
reader
for chunk in reader:
Expand All @@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:

.. ipython:: python
reader = pd.read_table('tmp.sv', sep='|', iterator=True)
reader = pd.read_csv('tmp.sv', sep='|', iterator=True)
reader.get_chunk(5)
.. ipython:: python
Expand Down Expand Up @@ -3067,7 +3064,7 @@ Clipboard

A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method,
which takes the contents of the clipboard buffer and passes them to the
``read_table`` method. For instance, you can copy the following text to the
``read_csv`` method. For instance, you can copy the following text to the
clipboard (CTRL-C on many operating systems):

.. code-block:: python
Expand Down
5 changes: 3 additions & 2 deletions doc/source/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi
s
u
s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-')
s.str.cat([u.values,
u.index.astype(str).values], na_rep='-')
All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:

.. ipython:: python
v
s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-')
s.str.cat([u, v], join='outer', na_rep='-')
If using ``join='right'`` on a list of ``others`` that contains different indexes,
the union of these indexes will be used as the basis for the final concatenation:
Expand Down
Loading

0 comments on commit 16a5297

Please sign in to comment.