diff --git a/.pep8speaks.yml b/.pep8speaks.yml
index fda26d87bf7f6..cd610907007eb 100644
--- a/.pep8speaks.yml
+++ b/.pep8speaks.yml
@@ -8,5 +8,4 @@ pycodestyle:
ignore: # Errors and warnings to ignore
- E402, # module level import not at top of file
- E731, # do not assign a lambda expression, use a def
- - E741, # do not use variables named 'l', 'O', or 'I'
- W503 # line break before binary operator
diff --git a/.travis.yml b/.travis.yml
index 32e6d2eae90a7..40baee2c03ea0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,12 +30,8 @@ matrix:
exclude:
# Exclude the default Python 3.5 build
- python: 3.5
- include:
- - os: osx
- language: generic
- env:
- - JOB="3.5, OSX" ENV_FILE="ci/travis-35-osx.yaml" TEST_ARGS="--skip-slow --skip-network"
+ include:
- dist: trusty
env:
- JOB="3.7" ENV_FILE="ci/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
@@ -64,7 +60,7 @@ matrix:
# In allow_failures
- dist: trusty
env:
- - JOB="3.6, NumPy dev" ENV_FILE="ci/travis-36-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
+ - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
addons:
apt:
packages:
@@ -79,7 +75,7 @@ matrix:
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
- dist: trusty
env:
- - JOB="3.6, NumPy dev" ENV_FILE="ci/travis-36-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
+ - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
addons:
apt:
packages:
diff --git a/README.md b/README.md
index 3dde5e5e2a76e..f26b9598bb5d3 100644
--- a/README.md
+++ b/README.md
@@ -56,8 +56,8 @@
-
-
+
+
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index c6199c1493f22..0000000000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,91 +0,0 @@
-# With infos from
-# http://tjelvarolsson.com/blog/how-to-continuously-test-your-python-code-on-windows-using-appveyor/
-# https://packaging.python.org/en/latest/appveyor/
-# https://github.com/rmcgibbo/python-appveyor-conda-example
-
-# Backslashes in quotes need to be escaped: \ -> "\\"
-
-matrix:
- fast_finish: true # immediately finish build once one of the jobs fails.
-
-environment:
- global:
- # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
- # /E:ON and /V:ON options are not enabled in the batch script interpreter
- # See: http://stackoverflow.com/a/13751649/163740
- CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd"
- clone_folder: C:\projects\pandas
- PANDAS_TESTING_MODE: "deprecate"
-
- matrix:
-
- - CONDA_ROOT: "C:\\Miniconda3_64"
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
- PYTHON_VERSION: "3.6"
- PYTHON_ARCH: "64"
- CONDA_PY: "36"
- CONDA_NPY: "113"
-
- - CONDA_ROOT: "C:\\Miniconda3_64"
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
- PYTHON_VERSION: "2.7"
- PYTHON_ARCH: "64"
- CONDA_PY: "27"
- CONDA_NPY: "110"
-
-# We always use a 64-bit machine, but can build x86 distributions
-# with the PYTHON_ARCH variable (which is used by CMD_IN_ENV).
-platform:
- - x64
-
-# all our python builds have to happen in tests_script...
-build: false
-
-install:
- # cancel older builds for the same PR
- - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
- https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
- Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
- throw "There are newer queued builds for this pull request, failing early." }
-
- # this installs the appropriate Miniconda (Py2/Py3, 32/64 bit)
- # updates conda & installs: conda-build jinja2 anaconda-client
- - powershell .\ci\install.ps1
- - SET PATH=%CONDA_ROOT%;%CONDA_ROOT%\Scripts;%PATH%
- - echo "install"
- - cd
- - ls -ltr
- - git tag --sort v:refname
-
- # this can conflict with git
- - cmd: rmdir C:\cygwin /s /q
-
- # install our build environment
- - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false
- - cmd: conda update -q conda
- - cmd: conda config --set ssl_verify false
-
- # add the pandas channel *before* defaults to have defaults take priority
- - cmd: conda config --add channels conda-forge
- - cmd: conda config --add channels pandas
- - cmd: conda config --remove channels defaults
- - cmd: conda config --add channels defaults
-
- # this is now the downloaded conda...
- - cmd: conda info -a
-
- # create our env
- - cmd: conda env create -q -n pandas --file=ci\appveyor-%CONDA_PY%.yaml
- - cmd: activate pandas
- - cmd: conda list -n pandas
- # uninstall pandas if it's present
- - cmd: conda remove pandas -y --force & exit 0
- - cmd: pip uninstall -y pandas & exit 0
-
- # build em using the local source checkout in the correct windows env
- - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
-
-test_script:
- # tests
- - cmd: activate pandas
- - cmd: test.bat
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index cccd38ef11251..fc34440ece2ed 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -9,7 +9,7 @@
try:
hashing = import_module(imp)
break
- except:
+ except (ImportError, TypeError, ValueError):
pass
from .pandas_vb_common import setup # noqa
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index 1819cfa2725db..f911d506b1f4f 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -505,14 +505,21 @@ class NSort(object):
param_names = ['keep']
def setup(self, keep):
- self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
+ self.df = DataFrame(np.random.randn(100000, 3),
+ columns=list('ABC'))
- def time_nlargest(self, keep):
+ def time_nlargest_one_column(self, keep):
self.df.nlargest(100, 'A', keep=keep)
- def time_nsmallest(self, keep):
+ def time_nlargest_two_columns(self, keep):
+ self.df.nlargest(100, ['A', 'B'], keep=keep)
+
+ def time_nsmallest_one_column(self, keep):
self.df.nsmallest(100, 'A', keep=keep)
+ def time_nsmallest_two_columns(self, keep):
+ self.df.nsmallest(100, ['A', 'B'], keep=keep)
+
class Describe(object):
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index 739ad6a3d278b..c5b147b152aa6 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -11,95 +11,110 @@
class NumericSeriesIndexing(object):
goal_time = 0.2
- params = [Int64Index, Float64Index]
- param = ['index']
+ params = [
+ (Int64Index, Float64Index),
+ ('unique_monotonic_inc', 'nonunique_monotonic_inc'),
+ ]
+ param_names = ['index_dtype', 'index_structure']
- def setup(self, index):
+ def setup(self, index, index_structure):
N = 10**6
- idx = index(range(N))
- self.data = Series(np.random.rand(N), index=idx)
+ indices = {
+ 'unique_monotonic_inc': index(range(N)),
+ 'nonunique_monotonic_inc': index(
+ list(range(55)) + [54] + list(range(55, N - 1))),
+ }
+ self.data = Series(np.random.rand(N), index=indices[index_structure])
self.array = np.arange(10000)
self.array_list = self.array.tolist()
- def time_getitem_scalar(self, index):
+ def time_getitem_scalar(self, index, index_structure):
self.data[800000]
- def time_getitem_slice(self, index):
+ def time_getitem_slice(self, index, index_structure):
self.data[:800000]
- def time_getitem_list_like(self, index):
+ def time_getitem_list_like(self, index, index_structure):
self.data[[800000]]
- def time_getitem_array(self, index):
+ def time_getitem_array(self, index, index_structure):
self.data[self.array]
- def time_getitem_lists(self, index):
+ def time_getitem_lists(self, index, index_structure):
self.data[self.array_list]
- def time_iloc_array(self, index):
+ def time_iloc_array(self, index, index_structure):
self.data.iloc[self.array]
- def time_iloc_list_like(self, index):
+ def time_iloc_list_like(self, index, index_structure):
self.data.iloc[[800000]]
- def time_iloc_scalar(self, index):
+ def time_iloc_scalar(self, index, index_structure):
self.data.iloc[800000]
- def time_iloc_slice(self, index):
+ def time_iloc_slice(self, index, index_structure):
self.data.iloc[:800000]
- def time_ix_array(self, index):
+ def time_ix_array(self, index, index_structure):
self.data.ix[self.array]
- def time_ix_list_like(self, index):
+ def time_ix_list_like(self, index, index_structure):
self.data.ix[[800000]]
- def time_ix_scalar(self, index):
+ def time_ix_scalar(self, index, index_structure):
self.data.ix[800000]
- def time_ix_slice(self, index):
+ def time_ix_slice(self, index, index_structure):
self.data.ix[:800000]
- def time_loc_array(self, index):
+ def time_loc_array(self, index, index_structure):
self.data.loc[self.array]
- def time_loc_list_like(self, index):
+ def time_loc_list_like(self, index, index_structure):
self.data.loc[[800000]]
- def time_loc_scalar(self, index):
+ def time_loc_scalar(self, index, index_structure):
self.data.loc[800000]
- def time_loc_slice(self, index):
+ def time_loc_slice(self, index, index_structure):
self.data.loc[:800000]
class NonNumericSeriesIndexing(object):
goal_time = 0.2
- params = ['string', 'datetime']
- param_names = ['index']
+ params = [
+ ('string', 'datetime'),
+ ('unique_monotonic_inc', 'nonunique_monotonic_inc'),
+ ]
+ param_names = ['index_dtype', 'index_structure']
- def setup(self, index):
- N = 10**5
+ def setup(self, index, index_structure):
+ N = 10**6
indexes = {'string': tm.makeStringIndex(N),
'datetime': date_range('1900', periods=N, freq='s')}
index = indexes[index]
+ if index_structure == 'nonunique_monotonic_inc':
+ index = index.insert(item=index[2], loc=2)[:-1]
self.s = Series(np.random.rand(N), index=index)
self.lbl = index[80000]
- def time_getitem_label_slice(self, index):
+ def time_getitem_label_slice(self, index, index_structure):
self.s[:self.lbl]
- def time_getitem_pos_slice(self, index):
+ def time_getitem_pos_slice(self, index, index_structure):
self.s[:80000]
- def time_get_value(self, index):
+ def time_get_value(self, index, index_structure):
with warnings.catch_warnings(record=True):
self.s.get_value(self.lbl)
- def time_getitem_scalar(self, index):
+ def time_getitem_scalar(self, index, index_structure):
self.s[self.lbl]
+ def time_getitem_list_like(self, index, index_structure):
+ self.s[[self.lbl]]
+
class DataFrameStringIndexing(object):
diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 2d4bdc7ae812a..12cb893462b87 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -1,11 +1,9 @@
import random
-import timeit
import string
import numpy as np
import pandas.util.testing as tm
from pandas import DataFrame, Categorical, date_range, read_csv
-from pandas.compat import PY2
from pandas.compat import cStringIO as StringIO
from ..pandas_vb_common import setup, BaseIO # noqa
@@ -181,8 +179,8 @@ def time_read_csv(self, sep, decimal, float_precision):
names=list('abc'), float_precision=float_precision)
def time_read_csv_python_engine(self, sep, decimal, float_precision):
- read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
- float_precision=None, names=list('abc'))
+ read_csv(self.data(self.StringIO_input), sep=sep, header=None,
+ engine='python', float_precision=None, names=list('abc'))
class ReadCSVCategorical(BaseIO):
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index de0a3b33da147..7487a0d8489b7 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -29,7 +29,7 @@ def setup(self):
try:
with warnings.catch_warnings(record=True):
self.mdf1.consolidate(inplace=True)
- except:
+ except (AttributeError, TypeError):
pass
self.mdf2 = self.mdf1.copy()
self.mdf2.index = self.df2.index
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
index e255cd94f265b..e7b25d567e03b 100644
--- a/asv_bench/benchmarks/pandas_vb_common.py
+++ b/asv_bench/benchmarks/pandas_vb_common.py
@@ -2,14 +2,13 @@
from importlib import import_module
import numpy as np
-from pandas import Panel
# Compatibility import for lib
for imp in ['pandas._libs.lib', 'pandas.lib']:
try:
lib = import_module(imp)
break
- except:
+ except (ImportError, TypeError, ValueError):
pass
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
@@ -34,7 +33,7 @@ def remove(self, f):
"""Remove created files"""
try:
os.remove(f)
- except:
+ except OSError:
# On Windows, attempting to remove a file that is in use
# causes an exception to be raised
pass
diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
index c447c78d0d070..ecfcb27806f54 100644
--- a/asv_bench/benchmarks/stat_ops.py
+++ b/asv_bench/benchmarks/stat_ops.py
@@ -18,7 +18,7 @@ def setup(self, op, dtype, axis, use_bottleneck):
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
- except:
+ except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.df_func = getattr(df, op)
@@ -56,7 +56,7 @@ def setup(self, op, dtype, use_bottleneck):
s = pd.Series(np.random.randn(100000)).astype(dtype)
try:
pd.options.compute.use_bottleneck = use_bottleneck
- except:
+ except TypeError:
from pandas.core import nanops
nanops._USE_BOTTLENECK = use_bottleneck
self.s_func = getattr(s, op)
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 2c98cc1659519..2557ba7672a0e 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -1,4 +1,3 @@
-import warnings
from datetime import timedelta
import numpy as np
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
new file mode 100644
index 0000000000000..5d473bfc5a38c
--- /dev/null
+++ b/azure-pipelines.yml
@@ -0,0 +1,25 @@
+# Adapted from https://github.com/numba/numba/blob/master/azure-pipelines.yml
+jobs:
+# Mac and Linux could potentially use the same template
+# except it isn't clear how to use a different build matrix
+# for each, so for now they are separate
+- template: ci/azure/macos.yml
+ parameters:
+ name: macOS
+ vmImage: xcode9-macos10.13
+# - template: ci/azure/linux.yml
+# parameters:
+# name: Linux
+# vmImage: ubuntu-16.04
+
+# Windows Python 2.7 needs VC 9.0 installed, and not sure
+# how to make that a conditional task, so for now these are
+# separate templates as well
+- template: ci/azure/windows.yml
+ parameters:
+ name: Windows
+ vmImage: vs2017-win2016
+- template: ci/azure/windows-py27.yml
+ parameters:
+ name: WindowsPy27
+ vmImage: vs2017-win2016
diff --git a/ci/travis-35-osx.yaml b/ci/azure-macos-35.yml
similarity index 100%
rename from ci/travis-35-osx.yaml
rename to ci/azure-macos-35.yml
diff --git a/ci/appveyor-27.yaml b/ci/azure-windows-27.yaml
similarity index 100%
rename from ci/appveyor-27.yaml
rename to ci/azure-windows-27.yaml
diff --git a/ci/appveyor-36.yaml b/ci/azure-windows-36.yaml
similarity index 100%
rename from ci/appveyor-36.yaml
rename to ci/azure-windows-36.yaml
diff --git a/ci/azure/macos.yml b/ci/azure/macos.yml
new file mode 100644
index 0000000000000..5bf8d18d6cbb9
--- /dev/null
+++ b/ci/azure/macos.yml
@@ -0,0 +1,43 @@
+parameters:
+ name: ''
+ vmImage: ''
+
+jobs:
+- job: ${{ parameters.name }}
+ pool:
+ vmImage: ${{ parameters.vmImage }}
+ strategy:
+ maxParallel: 11
+ matrix:
+ py35_np_110:
+ ENV_FILE: ci/azure-macos-35.yml
+ CONDA_PY: "35"
+ CONDA_ENV: pandas
+ TEST_ARGS: "--skip-slow --skip-network"
+
+ steps:
+ - script: |
+ if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386; fi
+ echo "Installing Miniconda"
+ ci/incremental/install_miniconda.sh
+ export PATH=$HOME/miniconda3/bin:$PATH
+ echo "Setting up Conda environment"
+ ci/incremental/setup_conda_environment.sh
+ displayName: 'Before Install'
+ - script: |
+ export PATH=$HOME/miniconda3/bin:$PATH
+ ci/incremental/build.sh
+ displayName: 'Build'
+ - script: |
+ export PATH=$HOME/miniconda3/bin:$PATH
+ ci/script_single.sh
+ ci/script_multi.sh
+ echo "[Test done]"
+ displayName: 'Test'
+ - script: |
+ export PATH=$HOME/miniconda3/bin:$PATH
+ source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
+ - task: PublishTestResults@2
+ inputs:
+ testResultsFiles: '/tmp/*.xml'
+ testRunTitle: 'MacOS-35'
diff --git a/ci/azure/windows-py27.yml b/ci/azure/windows-py27.yml
new file mode 100644
index 0000000000000..3e92c96263930
--- /dev/null
+++ b/ci/azure/windows-py27.yml
@@ -0,0 +1,45 @@
+parameters:
+ name: ''
+ vmImage: ''
+
+jobs:
+- job: ${{ parameters.name }}
+ pool:
+ vmImage: ${{ parameters.vmImage }}
+ strategy:
+ maxParallel: 11
+ matrix:
+ py36_np14:
+ ENV_FILE: ci/azure-windows-27.yml
+ CONDA_PY: "27"
+ CONDA_ENV: pandas
+
+ steps:
+ - task: CondaEnvironment@1
+ inputs:
+ updateConda: no
+ packageSpecs: ''
+
+ # Need to install VC 9.0 only for Python 2.7
+ # Once we understand how to do tasks conditional on build matrix variables
+ # we could merge this into azure-windows.yml
+ - powershell: |
+ $wc = New-Object net.webclient
+ $wc.Downloadfile("https://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi", "VCForPython27.msi")
+ Start-Process "VCForPython27.msi" /qn -Wait
+ displayName: 'Install VC 9.0'
+
+ - script: |
+ ci\\incremental\\setup_conda_environment.cmd
+ displayName: 'Before Install'
+ - script: |
+ ci\\incremental\\build.cmd
+ displayName: 'Build'
+ - script: |
+ call activate %CONDA_ENV%
+ pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict %*
+ displayName: 'Test'
+ - task: PublishTestResults@2
+ inputs:
+ testResultsFiles: 'test-data.xml'
+ testRunTitle: 'Windows 27'
diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml
new file mode 100644
index 0000000000000..2ab8c6f320188
--- /dev/null
+++ b/ci/azure/windows.yml
@@ -0,0 +1,36 @@
+parameters:
+ name: ''
+ vmImage: ''
+
+jobs:
+- job: ${{ parameters.name }}
+ pool:
+ vmImage: ${{ parameters.vmImage }}
+ strategy:
+ maxParallel: 11
+ matrix:
+ py36_np14:
+ ENV_FILE: ci/azure-windows-36.yml
+ CONDA_PY: "36"
+ CONDA_ENV: pandas
+
+ steps:
+ - task: CondaEnvironment@1
+ inputs:
+ updateConda: no
+ packageSpecs: ''
+
+ - script: |
+ ci\\incremental\\setup_conda_environment.cmd
+ displayName: 'Before Install'
+ - script: |
+ ci\\incremental\\build.cmd
+ displayName: 'Build'
+ - script: |
+ call activate %CONDA_ENV%
+ pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict %*
+ displayName: 'Test'
+ - task: PublishTestResults@2
+ inputs:
+ testResultsFiles: 'test-data.xml'
+ testRunTitle: 'Windows 36'
diff --git a/ci/doctests.sh b/ci/doctests.sh
index 2af5dbd26aeb1..b3d7f6785815a 100755
--- a/ci/doctests.sh
+++ b/ci/doctests.sh
@@ -21,21 +21,21 @@ if [ "$DOCTEST" ]; then
# DataFrame / Series docstrings
pytest --doctest-modules -v pandas/core/frame.py \
- -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform"
+ -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
if [ $? -ne "0" ]; then
RET=1
fi
pytest --doctest-modules -v pandas/core/series.py \
- -k"-nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict"
+ -k"-nonzero -reindex -searchsorted -to_dict"
if [ $? -ne "0" ]; then
RET=1
fi
pytest --doctest-modules -v pandas/core/generic.py \
- -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transform -transpose -values -xs"
+ -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs"
if [ $? -ne "0" ]; then
RET=1
diff --git a/ci/incremental/build.cmd b/ci/incremental/build.cmd
new file mode 100644
index 0000000000000..d2fd06d7d9e50
--- /dev/null
+++ b/ci/incremental/build.cmd
@@ -0,0 +1,10 @@
+@rem https://github.com/numba/numba/blob/master/buildscripts/incremental/build.cmd
+call activate %CONDA_ENV%
+
+@rem Build numba extensions without silencing compile errors
+python setup.py build_ext -q --inplace
+
+@rem Install pandas locally
+python -m pip install -e .
+
+if %errorlevel% neq 0 exit /b %errorlevel%
diff --git a/ci/incremental/build.sh b/ci/incremental/build.sh
new file mode 100755
index 0000000000000..8f2301a3b7ef5
--- /dev/null
+++ b/ci/incremental/build.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+source activate $CONDA_ENV
+
+# Make sure any error below is reported as such
+set -v -e
+
+echo "[building extensions]"
+python setup.py build_ext -q --inplace
+python -m pip install -e .
+
+echo
+echo "[show environment]"
+conda list
+
+echo
+echo "[done]"
+exit 0
diff --git a/ci/incremental/install_miniconda.sh b/ci/incremental/install_miniconda.sh
new file mode 100755
index 0000000000000..a47dfdb324b34
--- /dev/null
+++ b/ci/incremental/install_miniconda.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -v -e
+
+# Install Miniconda
+unamestr=`uname`
+if [[ "$unamestr" == 'Linux' ]]; then
+ if [[ "$BITS32" == "yes" ]]; then
+ wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86.sh -O miniconda.sh
+ else
+ wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+ fi
+elif [[ "$unamestr" == 'Darwin' ]]; then
+ wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh
+else
+ echo Error
+fi
+chmod +x miniconda.sh
+./miniconda.sh -b
diff --git a/ci/incremental/setup_conda_environment.cmd b/ci/incremental/setup_conda_environment.cmd
new file mode 100644
index 0000000000000..b4446c49fabd3
--- /dev/null
+++ b/ci/incremental/setup_conda_environment.cmd
@@ -0,0 +1,21 @@
+@rem https://github.com/numba/numba/blob/master/buildscripts/incremental/setup_conda_environment.cmd
+@rem The cmd /C hack circumvents a regression where conda installs a conda.bat
+@rem script in non-root environments.
+set CONDA_INSTALL=cmd /C conda install -q -y
+set PIP_INSTALL=pip install -q
+
+@echo on
+
+@rem Deactivate any environment
+call deactivate
+@rem Display root environment (for debugging)
+conda list
+@rem Clean up any left-over from a previous build
+conda remove --all -q -y -n %CONDA_ENV%
+@rem Scipy, CFFI, jinja2 and IPython are optional dependencies, but exercised in the test suite
+conda env create -n %CONDA_ENV% --file=ci\azure-windows-%CONDA_PY%.yaml
+
+call activate %CONDA_ENV%
+conda list
+
+if %errorlevel% neq 0 exit /b %errorlevel%
diff --git a/ci/incremental/setup_conda_environment.sh b/ci/incremental/setup_conda_environment.sh
new file mode 100755
index 0000000000000..c716a39138644
--- /dev/null
+++ b/ci/incremental/setup_conda_environment.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+set -v -e
+
+CONDA_INSTALL="conda install -q -y"
+PIP_INSTALL="pip install -q"
+
+# Deactivate any environment
+source deactivate
+# Display root environment (for debugging)
+conda list
+# Clean up any left-over from a previous build
+# (note workaround for https://github.com/conda/conda/issues/2679:
+# `conda env remove` issue)
+conda remove --all -q -y -n $CONDA_ENV
+
+echo
+echo "[create env]"
+time conda env create -q -n "${CONDA_ENV}" --file="${ENV_FILE}" || exit 1
+
+# Activate first
+set +v
+source activate $CONDA_ENV
+set -v
+
+# remove any installed pandas package
+# w/o removing anything else
+echo
+echo "[removing installed pandas]"
+conda remove pandas -y --force
+pip uninstall -y pandas
+
+echo
+echo "[no installed pandas]"
+conda list pandas
+
+# # Install the compiler toolchain
+# if [[ $(uname) == Linux ]]; then
+# if [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]] ; then
+# $CONDA_INSTALL gcc_linux-32 gxx_linux-32
+# else
+# $CONDA_INSTALL gcc_linux-64 gxx_linux-64
+# fi
+# elif [[ $(uname) == Darwin ]]; then
+# $CONDA_INSTALL clang_osx-64 clangxx_osx-64
+# # Install llvm-openmp and intel-openmp on OSX too
+# $CONDA_INSTALL llvm-openmp intel-openmp
+# fi
diff --git a/ci/install.ps1 b/ci/install.ps1
deleted file mode 100644
index 64ec7f81884cd..0000000000000
--- a/ci/install.ps1
+++ /dev/null
@@ -1,92 +0,0 @@
-# Sample script to install Miniconda under Windows
-# Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner, Robert McGibbon
-# License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
-
-$MINICONDA_URL = "http://repo.continuum.io/miniconda/"
-
-
-function DownloadMiniconda ($python_version, $platform_suffix) {
- $webclient = New-Object System.Net.WebClient
- $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe"
- $url = $MINICONDA_URL + $filename
-
- $basedir = $pwd.Path + "\"
- $filepath = $basedir + $filename
- if (Test-Path $filename) {
- Write-Host "Reusing" $filepath
- return $filepath
- }
-
- # Download and retry up to 3 times in case of network transient errors.
- Write-Host "Downloading" $filename "from" $url
- $retry_attempts = 2
- for($i=0; $i -lt $retry_attempts; $i++){
- try {
- $webclient.DownloadFile($url, $filepath)
- break
- }
- Catch [Exception]{
- Start-Sleep 1
- }
- }
- if (Test-Path $filepath) {
- Write-Host "File saved at" $filepath
- } else {
- # Retry once to get the error message if any at the last try
- $webclient.DownloadFile($url, $filepath)
- }
- return $filepath
-}
-
-
-function InstallMiniconda ($python_version, $architecture, $python_home) {
- Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
- if (Test-Path $python_home) {
- Write-Host $python_home "already exists, skipping."
- return $false
- }
- if ($architecture -match "32") {
- $platform_suffix = "x86"
- } else {
- $platform_suffix = "x86_64"
- }
-
- $filepath = DownloadMiniconda $python_version $platform_suffix
- Write-Host "Installing" $filepath "to" $python_home
- $install_log = $python_home + ".log"
- $args = "/S /D=$python_home"
- Write-Host $filepath $args
- Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru
- if (Test-Path $python_home) {
- Write-Host "Python $python_version ($architecture) installation complete"
- } else {
- Write-Host "Failed to install Python in $python_home"
- Get-Content -Path $install_log
- Exit 1
- }
-}
-
-
-function InstallCondaPackages ($python_home, $spec) {
- $conda_path = $python_home + "\Scripts\conda.exe"
- $args = "install --yes " + $spec
- Write-Host ("conda " + $args)
- Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
-}
-
-function UpdateConda ($python_home) {
- $conda_path = $python_home + "\Scripts\conda.exe"
- Write-Host "Updating conda..."
- $args = "update --yes conda"
- Write-Host $conda_path $args
- Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
-}
-
-
-function main () {
- InstallMiniconda "3.5" $env:PYTHON_ARCH $env:CONDA_ROOT
- UpdateConda $env:CONDA_ROOT
- InstallCondaPackages $env:CONDA_ROOT "conda-build jinja2 anaconda-client"
-}
-
-main
diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt
index 2e1bf0ca22bcf..09ce8e59a3b46 100644
--- a/ci/requirements-optional-pip.txt
+++ b/ci/requirements-optional-pip.txt
@@ -14,7 +14,7 @@ lxml
matplotlib
nbsphinx
numexpr
-openpyxl=2.5.5
+openpyxl==2.5.5
pyarrow
pymysql
tables
@@ -28,4 +28,4 @@ statsmodels
xarray
xlrd
xlsxwriter
-xlwt
+xlwt
\ No newline at end of file
diff --git a/ci/travis-36-numpydev.yaml b/ci/travis-37-numpydev.yaml
similarity index 95%
rename from ci/travis-36-numpydev.yaml
rename to ci/travis-37-numpydev.yaml
index aba28634edd0d..82c75b7c91b1f 100644
--- a/ci/travis-36-numpydev.yaml
+++ b/ci/travis-37-numpydev.yaml
@@ -2,7 +2,7 @@ name: pandas
channels:
- defaults
dependencies:
- - python=3.6*
+ - python=3.7*
- pytz
- Cython>=0.28.2
# universal
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf
new file mode 100644
index 0000000000000..daa65a944e68a
Binary files /dev/null and b/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf differ
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx b/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx
new file mode 100644
index 0000000000000..6270a71e20ee8
Binary files /dev/null and b/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx differ
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet_JP.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet_JP.pdf
deleted file mode 100644
index 746d1b6c980fe..0000000000000
Binary files a/doc/cheatsheet/Pandas_Cheat_Sheet_JP.pdf and /dev/null differ
diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet_JP.pptx b/doc/cheatsheet/Pandas_Cheat_Sheet_JP.pptx
deleted file mode 100644
index f8b98a6f1f8e4..0000000000000
Binary files a/doc/cheatsheet/Pandas_Cheat_Sheet_JP.pptx and /dev/null differ
diff --git a/doc/make.py b/doc/make.py
index d85747458148d..cab5fa0ed4c52 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -233,10 +233,10 @@ def _sphinx_build(self, kind):
'-b{}'.format(kind),
'-{}'.format(
'v' * self.verbosity) if self.verbosity else '',
- '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
+ '-d"{}"'.format(os.path.join(BUILD_PATH, 'doctrees')),
'-Dexclude_patterns={}'.format(self.exclude_patterns),
- SOURCE_PATH,
- os.path.join(BUILD_PATH, kind))
+ '"{}"'.format(SOURCE_PATH),
+ '"{}"'.format(os.path.join(BUILD_PATH, kind)))
def _open_browser(self):
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 611afb3670ebc..835c4cc9d4ab3 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -15,7 +15,8 @@
MultiIndex / Advanced Indexing
******************************
-This section covers indexing with a ``MultiIndex`` and :ref:`more advanced indexing features `.
+This section covers :ref:`indexing with a MultiIndex `
+and :ref:`other advanced indexing features `.
See the :ref:`Indexing and Selecting Data ` for general indexing documentation.
@@ -37,7 +38,7 @@ Hierarchical / Multi-level indexing is very exciting as it opens the door to som
quite sophisticated data analysis and manipulation, especially for working with
higher dimensional data. In essence, it enables you to store and manipulate
data with an arbitrary number of dimensions in lower dimensional data
-structures like Series (1d) and DataFrame (2d).
+structures like ``Series`` (1d) and ``DataFrame`` (2d).
In this section, we will show what exactly we mean by "hierarchical" indexing
and how it integrates with all of the pandas indexing functionality
@@ -83,8 +84,8 @@ to use the :meth:`MultiIndex.from_product` method:
iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
pd.MultiIndex.from_product(iterables, names=['first', 'second'])
-As a convenience, you can pass a list of arrays directly into Series or
-DataFrame to construct a ``MultiIndex`` automatically:
+As a convenience, you can pass a list of arrays directly into ``Series`` or
+``DataFrame`` to construct a ``MultiIndex`` automatically:
.. ipython:: python
@@ -213,8 +214,8 @@ tuples:
s + s[:-2]
s + s[::2]
-``reindex`` can be called with another ``MultiIndex``, or even a list or array
-of tuples:
+The :meth:`~DataFrame.reindex` method of ``Series``/``DataFrames`` can be
+called with another ``MultiIndex``, or even a list or array of tuples:
.. ipython:: python
@@ -413,7 +414,7 @@ selecting data at a particular level of a ``MultiIndex`` easier.
# using the slicers
df.loc[(slice(None),'one'),:]
-You can also select on the columns with :meth:`~pandas.MultiIndex.xs`, by
+You can also select on the columns with ``xs``, by
providing the axis argument.
.. ipython:: python
@@ -426,7 +427,7 @@ providing the axis argument.
# using the slicers
df.loc[:,(slice(None),'one')]
-:meth:`~pandas.MultiIndex.xs` also allows selection with multiple keys.
+``xs`` also allows selection with multiple keys.
.. ipython:: python
@@ -437,7 +438,7 @@ providing the axis argument.
# using the slicers
df.loc[:,('bar','one')]
-You can pass ``drop_level=False`` to :meth:`~pandas.MultiIndex.xs` to retain
+You can pass ``drop_level=False`` to ``xs`` to retain
the level that was selected.
.. ipython:: python
@@ -460,9 +461,9 @@ Compare the above with the result using ``drop_level=True`` (the default value).
Advanced reindexing and alignment
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The parameter ``level`` has been added to the ``reindex`` and ``align`` methods
-of pandas objects. This is useful to broadcast values across a level. For
-instance:
+Using the parameter ``level`` in the :meth:`~DataFrame.reindex` and
+:meth:`~DataFrame.align` methods of pandas objects is useful to broadcast
+values across a level. For instance:
.. ipython:: python
@@ -480,10 +481,10 @@ instance:
df2_aligned
-Swapping levels with :meth:`~pandas.MultiIndex.swaplevel`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Swapping levels with ``swaplevel``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The ``swaplevel`` function can switch the order of two levels:
+The :meth:`~MultiIndex.swaplevel` method can switch the order of two levels:
.. ipython:: python
@@ -492,21 +493,21 @@ The ``swaplevel`` function can switch the order of two levels:
.. _advanced.reorderlevels:
-Reordering levels with :meth:`~pandas.MultiIndex.reorder_levels`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Reordering levels with ``reorder_levels``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The ``reorder_levels`` function generalizes the ``swaplevel`` function,
-allowing you to permute the hierarchical index levels in one step:
+The :meth:`~MultiIndex.reorder_levels` method generalizes the ``swaplevel``
+method, allowing you to permute the hierarchical index levels in one step:
.. ipython:: python
df[:5].reorder_levels([1,0], axis=0)
-Sorting a :class:`~pandas.MultiIndex`
--------------------------------------
+Sorting a ``MultiIndex``
+------------------------
-For MultiIndex-ed objects to be indexed and sliced effectively, they need
-to be sorted. As with any index, you can use ``sort_index``.
+For :class:`MultiIndex`-ed objects to be indexed and sliced effectively,
+they need to be sorted. As with any index, you can use :meth:`~DataFrame.sort_index`.
.. ipython:: python
@@ -658,9 +659,9 @@ faster than fancy indexing.
Index Types
-----------
-We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex``
-are shown :ref:`here `, and information about
-``TimedeltaIndex`` is found :ref:`here `.
+We have discussed ``MultiIndex`` in the previous sections pretty extensively.
+Documentation about ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `,
+and documentation about ``TimedeltaIndex`` is found :ref:`here `.
In the following sub-sections we will highlight some other index types.
@@ -1004,8 +1005,8 @@ Non-monotonic indexes require exact matches
If the index of a ``Series`` or ``DataFrame`` is monotonically increasing or decreasing, then the bounds
of a label-based slice can be outside the range of the index, much like slice indexing a
-normal Python ``list``. Monotonicity of an index can be tested with the ``is_monotonic_increasing`` and
-``is_monotonic_decreasing`` attributes.
+normal Python ``list``. Monotonicity of an index can be tested with the :meth:`~Index.is_monotonic_increasing` and
+:meth:`~Index.is_monotonic_decreasing` attributes.
.. ipython:: python
@@ -1039,9 +1040,9 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
In [11]: df.loc[2:3, :]
KeyError: 'Cannot get right slice bound for non-unique label: 3'
-:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
+``Index.is_monotonic_increasing`` and ``Index.is_monotonic_decreasing`` only check that
an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with
-:meth:`Index.is_unique`
+the :meth:`~Index.is_unique` attribute.
.. ipython:: python
@@ -1057,7 +1058,7 @@ Compared with standard Python sequence slicing in which the slice endpoint is
not inclusive, label-based slicing in pandas **is inclusive**. The primary
reason for this is that it is often not possible to easily determine the
"successor" or next element after a particular label in an index. For example,
-consider the following Series:
+consider the following ``Series``:
.. ipython:: python
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 9c3770a497cf8..073ed8a082a11 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -61,6 +61,12 @@ Excel
read_excel
ExcelFile.parse
+.. autosummary::
+ :toctree: generated/
+ :template: autosummary/class_without_autosummary.rst
+
+ ExcelWriter
+
JSON
~~~~
@@ -2597,3 +2603,12 @@ objects.
generated/pandas.Series.ix
generated/pandas.Series.imag
generated/pandas.Series.real
+
+
+.. Can't convince sphinx to generate toctree for this class attribute.
+.. So we do it manually to avoid a warning
+
+.. toctree::
+ :hidden:
+
+ generated/pandas.api.extensions.ExtensionDtype.na_value
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index c18b94fea9a28..6eeb97349100a 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases.
* :ref:`Categorical `
* :ref:`Datetime with Timezone `
* :ref:`Period `
-* :ref:`Interval `
+* :ref:`Interval `
Pandas uses the ``object`` dtype for storing strings.
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 5e7b8be5f8af0..0d2021de8f88e 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -153,6 +153,21 @@ Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword:
frame.corr(min_periods=12)
+.. versionadded:: 0.24.0
+
+The ``method`` argument can also be a callable for a generic correlation
+calculation. In this case, it should be a single function
+that produces a single value from two ndarray inputs. Suppose we wanted to
+compute the correlation based on histogram intersection:
+
+.. ipython:: python
+
+ # histogram intersection
+ histogram_intersection = lambda a, b: np.minimum(
+ np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
+ ).sum()
+ frame.corr(method=histogram_intersection)
+
A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to
compute the correlation between like-labeled Series contained in different
DataFrame objects.
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 60bfd07961b38..65e151feeba67 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -632,6 +632,14 @@ Otherwise, you need to do it manually:
warnings.warn('Use new_func instead.', FutureWarning, stacklevel=2)
new_func()
+You'll also need to
+
+1. write a new test that asserts a warning is issued when calling with the deprecated argument
+2. Update all of pandas existing tests and code to use the new argument
+
+See :ref:`contributing.warnings` for more.
+
+
.. _contributing.ci:
Testing With Continuous Integration
@@ -859,6 +867,55 @@ preferred if the inputs or logic are simple, with Hypothesis tests reserved
for cases with complex logic or where there are too many combinations of
options or subtle interactions to test (or think of!) all of them.
+.. _contributing.warnings:
+
+Testing Warnings
+~~~~~~~~~~~~~~~~
+
+By default, one of pandas CI workers will fail if any unhandled warnings are emitted.
+
+If your change involves checking that a warning is actually emitted, use
+``tm.assert_produces_warning(ExpectedWarning)``.
+
+
+.. code-block:: python
+
+ with tm.assert_prodcues_warning(FutureWarning):
+ df.some_operation()
+
+We prefer this to the ``pytest.warns`` context manager because ours checks that the warning's
+stacklevel is set correctly. The stacklevel is what ensure the *user's* file name and line number
+is printed in the warning, rather than something internal to pandas. It represents the number of
+function calls from user code (e.g. ``df.some_operation()``) to the function that actually emits
+the warning. Our linter will fail the build if you use ``pytest.warns`` in a test.
+
+If you have a test that would emit a warning, but you aren't actually testing the
+warning itself (say because it's going to be removed in the future, or because we're
+matching a 3rd-party library's behavior), then use ``pytest.mark.filterwarnings`` to
+ignore the error.
+
+.. code-block:: python
+
+ @pytest.mark.filterwarnings("ignore:msg:category")
+ def test_thing(self):
+ ...
+
+If the test generates a warning of class ``category`` whose message starts
+with ``msg``, the warning will be ignored and the test will pass.
+
+If you need finer-grained control, you can use Python's usual
+`warnings module `__
+to control whether a warning is ignored / raised at different places within
+a single test.
+
+.. code-block:: python
+
+ with warch.catch_warnings():
+ warnings.simplefilter("ignore", FutureWarning)
+ # Or use warnings.filterwarnings(...)
+
+Alternatively, consider breaking up the unit test.
+
Running the test suite
----------------------
diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
index f6fa9e9f86143..a4dc99383a562 100644
--- a/doc/source/cookbook.rst
+++ b/doc/source/cookbook.rst
@@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
.. ipython:: python
df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]})
-
gb = df.groupby('A')
def replace(g):
- mask = g < 0
- g.loc[mask] = g[~mask].mean()
- return g
+ mask = g < 0
+ return g.where(mask, g[~mask].mean())
gb.transform(replace)
diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index 1014982fea21a..7fffcadd8ee8c 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -73,8 +73,8 @@ large data to thin clients.
`seaborn `__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Seaborn is a Python visualization library based on `matplotlib
-`__. It provides a high-level, dataset-oriented
+Seaborn is a Python visualization library based on
+`matplotlib `__. It provides a high-level, dataset-oriented
interface for creating attractive statistical graphics. The plotting functions
in seaborn understand pandas objects and leverage pandas grouping operations
internally to support concise specification of complex visualizations. Seaborn
@@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying
(Note: HTML tables may or may not be
compatible with non-HTML Jupyter output formats.)
-See :ref:`Options and Settings ` and :ref:``
+See :ref:`Options and Settings ` and :ref:`options.available `
for pandas ``display.`` settings.
`quantopian/qgrid `__
@@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
Most pandas classes, methods and data attributes can be autocompleted in
Spyder's `Editor `__ and
`IPython Console `__,
-and Spyder's `Help pane`__ can retrieve
+and Spyder's `Help pane `__ can retrieve
and render Numpydoc documentation on pandas objects in rich text with Sphinx
both automatically and on-demand.
diff --git a/doc/source/io.rst b/doc/source/io.rst
index c2c8c1c17700f..039cba2993381 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
CSV & Text files
----------------
-The two workhorse functions for reading text files (a.k.a. flat files) are
-:func:`read_csv` and :func:`read_table`. They both use the same parsing code to
-intelligently convert tabular data into a ``DataFrame`` object. See the
-:ref:`cookbook` for some advanced strategies.
+The workhorse function for reading text files (a.k.a. flat files) is
+:func:`read_csv`. See the :ref:`cookbook` for some advanced strategies.
Parsing options
'''''''''''''''
-The functions :func:`read_csv` and :func:`read_table` accept the following
-common arguments:
+:func:`read_csv` accepts the following common arguments:
Basic
+++++
@@ -780,8 +777,8 @@ Date Handling
Specifying Date Columns
+++++++++++++++++++++++
-To better facilitate working with datetime data, :func:`read_csv` and
-:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser``
+To better facilitate working with datetime data, :func:`read_csv`
+uses the keyword arguments ``parse_dates`` and ``date_parser``
to allow users to specify a variety of columns and date/time formats to turn the
input text data into ``datetime`` objects.
@@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns:
print(open('data/mindex_ex.csv').read())
-The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of
+The ``index_col`` argument to ``read_csv`` can take a list of
column numbers to turn multiple columns into a ``MultiIndex`` for the index of the
returned object:
@@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``.
.. ipython:: python
- print(open('tmp2.sv').read())
- pd.read_csv('tmp2.sv', sep=None, engine='python')
+ print(open('tmp2.sv').read())
+ pd.read_csv('tmp2.sv', sep=None, engine='python')
.. _io.multiple_files:
@@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following:
.. ipython:: python
print(open('tmp.sv').read())
- table = pd.read_table('tmp.sv', sep='|')
+ table = pd.read_csv('tmp.sv', sep='|')
table
-By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return
+By specifying a ``chunksize`` to ``read_csv``, the return
value will be an iterable object of type ``TextFileReader``:
.. ipython:: python
- reader = pd.read_table('tmp.sv', sep='|', chunksize=4)
+ reader = pd.read_csv('tmp.sv', sep='|', chunksize=4)
reader
for chunk in reader:
@@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object:
.. ipython:: python
- reader = pd.read_table('tmp.sv', sep='|', iterator=True)
+ reader = pd.read_csv('tmp.sv', sep='|', iterator=True)
reader.get_chunk(5)
.. ipython:: python
@@ -3067,7 +3064,7 @@ Clipboard
A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method,
which takes the contents of the clipboard buffer and passes them to the
-``read_table`` method. For instance, you can copy the following text to the
+``read_csv`` method. For instance, you can copy the following text to the
clipboard (CTRL-C on many operating systems):
.. code-block:: python
@@ -4570,6 +4567,9 @@ dtypes, including extension dtypes such as datetime with tz.
Several caveats.
* Duplicate column names and non-string columns names are not supported.
+* The ``pyarrow`` engine always writes the index to the output, but ``fastparquet`` only writes non-default
+ indexes. This extra column can cause problems for non-Pandas consumers that are not expecting it. You can
+ force including or omitting indexes with the ``index`` argument, regardless of the underlying engine.
* Index level names, if specified, must be strings.
* Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
* Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message
@@ -4633,6 +4633,41 @@ Read only certain columns of a parquet file.
os.remove('example_pa.parquet')
os.remove('example_fp.parquet')
+
+Handling Indexes
+''''''''''''''''
+
+Serializing a ``DataFrame`` to parquet may include the implicit index as one or
+more columns in the output file. Thus, this code:
+
+.. ipython:: python
+
+ df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
+ df.to_parquet('test.parquet', engine='pyarrow')
+
+creates a parquet file with *three* columns if you use ``pyarrow`` for serialization:
+``a``, ``b``, and ``__index_level_0__``. If you're using ``fastparquet``, the
+index `may or may not `_
+be written to the file.
+
+This unexpected extra column causes some databases like Amazon Redshift to reject
+the file, because that column doesn't exist in the target table.
+
+If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
+:func:`~pandas.DataFrame.to_parquet`:
+
+.. ipython:: python
+
+ df.to_parquet('test.parquet', index=False)
+
+This creates a parquet file with just the two expected columns, ``a`` and ``b``.
+If your ``DataFrame`` has a custom index, you won't get it back when you load
+this file into a ``DataFrame``.
+
+Passing ``index=True`` will *always* write the index, even if that's not the
+underlying engine's default behavior.
+
+
.. _io.sql:
SQL Queries
diff --git a/doc/source/text.rst b/doc/source/text.rst
index 61583a179e572..d01c48695d0d6 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi
s
u
- s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-')
+ s.str.cat([u.values,
+ u.index.astype(str).values], na_rep='-')
All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None:
.. ipython:: python
v
- s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-')
+ s.str.cat([u, v], join='outer', na_rep='-')
If using ``join='right'`` on a list of ``others`` that contains different indexes,
the union of these indexes will be used as the basis for the final concatenation:
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 5dfac98d069e7..85b0abe421eb2 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -21,51 +21,59 @@
Time Series / Date functionality
********************************
-pandas has proven very successful as a tool for working with time series data,
-especially in the financial data analysis space. Using the NumPy ``datetime64`` and ``timedelta64`` dtypes,
-we have consolidated a large number of features from other Python libraries like ``scikits.timeseries`` as well as created
+pandas contains extensive capabilities and features for working with time series data for all domains.
+Using the NumPy ``datetime64`` and ``timedelta64`` dtypes, pandas has consolidated a large number of
+features from other Python libraries like ``scikits.timeseries`` as well as created
a tremendous amount of new functionality for manipulating time series data.
-In working with time series data, we will frequently seek to:
+For example, pandas supports:
-* generate sequences of fixed-frequency dates and time spans
-* conform or convert time series to a particular frequency
-* compute "relative" dates based on various non-standard time increments
- (e.g. 5 business days before the last business day of the year), or "roll"
- dates forward or backward
+Parsing time series information from various sources and formats
-pandas provides a relatively compact and self-contained set of tools for
-performing the above tasks.
+.. ipython:: python
+
+ dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'), datetime(2018, 1, 1)])
+ dti
-Create a range of dates:
+Generate sequences of fixed-frequency dates and time spans
.. ipython:: python
- # 72 hours starting with midnight Jan 1st, 2011
- rng = pd.date_range('1/1/2011', periods=72, freq='H')
- rng[:5]
+ dti = pd.date_range('2018-01-01', periods=3, freq='H')
+ dti
-Index pandas objects with dates:
+Manipulating and converting date times with timezone information
.. ipython:: python
- ts = pd.Series(np.random.randn(len(rng)), index=rng)
- ts.head()
+ dti = dti.tz_localize('UTC')
+ dti
+ dti.tz_convert('US/Pacific')
-Change frequency and fill gaps:
+Resampling or converting a time series to a particular frequency
.. ipython:: python
- # to 45 minute frequency and forward fill
- converted = ts.asfreq('45Min', method='pad')
- converted.head()
+ idx = pd.date_range('2018-01-01', periods=5, freq='H')
+ ts = pd.Series(range(len(idx)), index=idx)
+ ts
+ ts.resample('2H').mean()
-Resample the series to a daily frequency:
+Performing date and time arithmetic with absolute or relative time increments
.. ipython:: python
- # Daily means
- ts.resample('D').mean()
+ friday = pd.Timestamp('2018-01-05')
+ friday.day_name()
+ # Add 1 day
+ saturday = friday + pd.Timedelta('1 day')
+ saturday.day_name()
+ # Add 1 business day (Friday --> Monday)
+ monday = friday + pd.tseries.offsets.BDay()
+ monday.day_name()
+
+pandas provides a relatively compact and self-contained set of tools for
+performing the above tasks and more.
.. _timeseries.overview:
@@ -73,17 +81,54 @@ Resample the series to a daily frequency:
Overview
--------
-The following table shows the type of time-related classes pandas can handle and
-how to create them.
+pandas captures 4 general time related concepts:
+
+#. Date times: A specific date and time with timezone support. Similar to ``datetime.datetime`` from the standard library.
+#. Time deltas: An absolute time duration. Similar to ``datetime.timedelta`` from the standard library.
+#. Time spans: A span of time defined by a point in time and its associated frequency.
+#. Date offsets: A relative time duration that respects calendar arithmetic. Similar to ``dateutil.relativedelta.relativedelta`` from the ``dateutil`` package.
+
+===================== ================= =================== ============================================ ========================================
+Concept Scalar Class Array Class pandas Data Type Primary Creation Method
+===================== ================= =================== ============================================ ========================================
+Date times ``Timestamp`` ``DatetimeIndex`` ``datetime64[ns]`` or ``datetime64[ns, tz]`` ``to_datetime`` or ``date_range``
+Time deltas ``Timedelta`` ``TimedeltaIndex`` ``timedelta64[ns]`` ``to_timedelta`` or ``timedelta_range``
+Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period`` or ``period_range``
+Date offsets ``DateOffset`` ``None`` ``None`` ``DateOffset``
+===================== ================= =================== ============================================ ========================================
+
+For time series data, it's conventional to represent the time component in the index of a :class:`Series` or :class:`DataFrame`
+so manipulations can be performed with respect to the time element.
+
+.. ipython:: python
+
+ pd.Series(range(3), index=pd.date_range('2000', freq='D', periods=3))
+
+However, :class:`Series` and :class:`DataFrame` can directly also support the time component as data itself.
+
+.. ipython:: python
+
+ pd.Series(pd.date_range('2000', freq='D', periods=3))
+
+:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime`` and ``timedelta``
+data when the time data is used as data itself. The ``Period`` and ``DateOffset`` data will be stored as ``object`` data.
+
+.. ipython:: python
+
+ pd.Series(pd.period_range('1/1/2011', freq='M', periods=3))
+ pd.Series(pd.date_range('1/1/2011', freq='M', periods=3))
+
+Lastly, pandas represents null date times, time deltas, and time spans as ``NaT`` which
+is useful for representing missing or null date like values and behaves similar
+as ``np.nan`` does for float data.
+
+.. ipython:: python
-================= =============================== ===================================================================
-Class Remarks How to create
-================= =============================== ===================================================================
-``Timestamp`` Represents a single timestamp ``to_datetime``, ``Timestamp``
-``DatetimeIndex`` Index of ``Timestamp`` ``to_datetime``, ``date_range``, ``bdate_range``, ``DatetimeIndex``
-``Period`` Represents a single time span ``Period``
-``PeriodIndex`` Index of ``Period`` ``period_range``, ``PeriodIndex``
-================= =============================== ===================================================================
+ pd.Timestamp(pd.NaT)
+ pd.Timedelta(pd.NaT)
+ pd.Period(pd.NaT)
+ # Equality acts as np.nan would
+ pd.NaT == pd.NaT
.. _timeseries.representation:
@@ -708,18 +753,28 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost:
Iterating through groups
------------------------
-With the :ref:`Resampler` object in hand, iterating through the grouped data is very
+With the ``Resampler`` object in hand, iterating through the grouped data is very
natural and functions similarly to :py:func:`itertools.groupby`:
.. ipython:: python
- resampled = df.resample('H')
+ small = pd.Series(
+ range(6),
+ index=pd.to_datetime(['2017-01-01T00:00:00',
+ '2017-01-01T00:30:00',
+ '2017-01-01T00:31:00',
+ '2017-01-01T01:00:00',
+ '2017-01-01T03:00:00',
+ '2017-01-01T03:05:00'])
+ )
+ resampled = small.resample('H')
for name, group in resampled:
- print(name)
- print(group)
+ print("Group: ", name)
+ print("-" * 27)
+ print(group, end="\n\n")
-See :ref:`groupby.iterating-label`.
+See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more.
.. _timeseries.components:
@@ -865,26 +920,22 @@ It's definitely worth exploring the ``pandas.tseries.offsets`` module and the
various docstrings for the classes.
These operations (``apply``, ``rollforward`` and ``rollback``) preserve time
-(hour, minute, etc) information by default. To reset time, use ``normalize=True``
-when creating the offset instance. If ``normalize=True``, the result is
-normalized after the function is applied.
-
+(hour, minute, etc) information by default. To reset time, use ``normalize``
+before or after applying the operation (depending on whether you want the
+time information included in the operation.
.. ipython:: python
+ ts = pd.Timestamp('2014-01-01 09:00')
day = Day()
- day.apply(pd.Timestamp('2014-01-01 09:00'))
-
- day = Day(normalize=True)
- day.apply(pd.Timestamp('2014-01-01 09:00'))
+ day.apply(ts)
+ day.apply(ts).normalize()
+ ts = pd.Timestamp('2014-01-01 22:00')
hour = Hour()
- hour.apply(pd.Timestamp('2014-01-01 22:00'))
-
- hour = Hour(normalize=True)
- hour.apply(pd.Timestamp('2014-01-01 22:00'))
- hour.apply(pd.Timestamp('2014-01-01 23:00'))
-
+ hour.apply(ts)
+ hour.apply(ts).normalize()
+ hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize()
.. _timeseries.dayvscalendarday:
@@ -1444,6 +1495,7 @@ the pandas objects.
.. ipython:: python
+ ts = pd.Series(range(len(rng)), index=rng)
ts = ts[:5]
ts.shift(1)
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index a3213136d998a..e38ba54d4b058 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -373,7 +373,7 @@ New Behavior:
s = pd.Series([1,2,3], index=np.arange(3.))
s
s.index
- print(s.to_csv(path=None))
+ print(s.to_csv(path_or_buf=None, header=False))
Changes to dtype assignment behaviors
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 3c0818343208a..9f5fbdc195f34 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -186,7 +186,7 @@ Previously, only ``gzip`` compression was supported. By default, compression of
URLs and paths are now inferred using their file extensions. Additionally,
support for bz2 compression in the python 2 C-engine improved (:issue:`14874`).
-.. ipython:: python
+.. code-block:: python
url = 'https://github.com/{repo}/raw/{branch}/{path}'.format(
repo = 'pandas-dev/pandas',
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 649629714c3b1..9b71ab656920d 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -17,6 +17,12 @@ New features
- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
+- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing
+the user to override the engine's default behavior to include or omit the
+dataframe's indexes from the resulting Parquet file. (:issue:`20768`)
+- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
+
+
.. _whatsnew_0240.enhancements.extension_array_operators:
``ExtensionArray`` operator support
@@ -170,9 +176,9 @@ Other Enhancements
- :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`)
- Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`)
- :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to
- reflect changes from the `Pandas-GBQ library version 0.5.0
- `__.
- (:issue:`21627`)
+ reflect changes from the `Pandas-GBQ library version 0.6.0
+ `__.
+ (:issue:`21627`, :issue:`22557`)
- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`)
- :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`)
- :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`)
@@ -182,9 +188,12 @@ Other Enhancements
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
+- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
+- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
+- Compatibility with Matplotlib 3.0 (:issue:`22790`).
.. _whatsnew_0240.api_breaking:
@@ -246,7 +255,6 @@ UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`)
.. code-block:: ipython
-
In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30")
Out[2]: Timestamp('2015-11-18 10:00:00')
@@ -284,6 +292,7 @@ Passing ``utc=True`` will mimic the previous behavior but will correctly indicat
that the dates have been converted to UTC
.. ipython:: python
+
pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True)
.. _whatsnew_0240.api_breaking.calendarday:
@@ -450,7 +459,7 @@ Previous Behavior:
Out[3]: Int64Index([0, 1, 2], dtype='int64')
-.. _whatsnew_0240.api.timedelta64_subtract_nan
+.. _whatsnew_0240.api.timedelta64_subtract_nan:
Addition/Subtraction of ``NaN`` from :class:`DataFrame`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -461,9 +470,10 @@ all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and
``Series`` behavior (:issue:`22163`)
.. ipython:: python
+ :okexcept:
- df = pd.DataFrame([pd.Timedelta(days=1)])
- df - np.nan
+ df = pd.DataFrame([pd.Timedelta(days=1)])
+ df - np.nan
Previous Behavior:
@@ -485,6 +495,7 @@ ExtensionType Changes
- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
+- An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`)
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
@@ -492,6 +503,7 @@ ExtensionType Changes
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
+- Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
- Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)
.. _whatsnew_0240.api.incompatibilities:
@@ -561,6 +573,7 @@ Deprecations
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)
- :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`)
+- :func:`DatetimeIndex.shift` now accepts ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`)
.. _whatsnew_0240.prior_deprecations:
@@ -577,6 +590,7 @@ Removal of prior version deprecations/changes
- Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`)
- :meth:`Categorical.searchsorted` and :meth:`Series.searchsorted` have renamed the ``v`` argument to ``value`` (:issue:`14645`)
- :meth:`TimedeltaIndex.searchsorted`, :meth:`DatetimeIndex.searchsorted`, and :meth:`PeriodIndex.searchsorted` have renamed the ``key`` argument to ``value`` (:issue:`14645`)
+- Removal of the previously deprecated module ``pandas.json`` (:issue:`19944`)
.. _whatsnew_0240.performance:
@@ -596,6 +610,8 @@ Performance Improvements
:meth:`~HDFStore.keys`. (i.e. ``x in store`` checks are much faster)
(:issue:`21372`)
- Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`)
+- Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`)
+
.. _whatsnew_0240.docs:
@@ -615,6 +631,8 @@ Categorical
^^^^^^^^^^^
- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``.
+- Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`)
+- Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`).
Datetimelike
^^^^^^^^^^^^
@@ -635,7 +653,9 @@ Datetimelike
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`, :issue:`22163`)
- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`)
- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
--
+- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`)
+- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)
+- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
Timedelta
^^^^^^^^^
@@ -645,7 +665,8 @@ Timedelta
- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
- Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`)
--
+- Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`)
+- Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`)
-
Timezones
@@ -669,6 +690,7 @@ Timezones
- Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`)
- Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`)
- Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`)
+- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`)
Offsets
^^^^^^^
@@ -720,13 +742,17 @@ Indexing
- Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`)
- ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`)
- Bug in :meth:`DataFrame.loc` when indexing with an :class:`IntervalIndex` (:issue:`19977`)
+- :class:`Index` no longer mangles ``None``, ``NaN`` and ``NaT``, i.e. they are treated as three different keys. However, for numeric Index all three are still coerced to a ``NaN`` (:issue:`22332`)
+- Bug in `scalar in Index` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`)
Missing
^^^^^^^
- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
-- :func:`Series.isin` now treats all nans as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
+- :func:`Series.isin` now treats all NaN-floats as equal also for `np.object`-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
+- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for `np.object`-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
+
MultiIndex
^^^^^^^^^^
@@ -740,8 +766,13 @@ I/O
- :func:`read_html()` no longer ignores all-whitespace ```` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`)
- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`)
+- :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`)
- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`)
- :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`)
+- :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`)
+- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`)
+- Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`)
+- Bug in :func:`to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`)
Plotting
^^^^^^^^
@@ -761,6 +792,8 @@ Groupby/Resample/Rolling
- Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`).
- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`).
- Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`).
+- Bug in :meth:`SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`)
+- :func:`RollingGroupby.agg` and :func:`ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`)
Sparse
^^^^^^
@@ -782,6 +815,8 @@ Reshaping
- Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`)
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`)
+- Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`)
+- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)
Build Changes
^^^^^^^^^^^^^
@@ -796,7 +831,6 @@ Other
- :meth:`~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`)
- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`)
- :meth:`~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`)
-- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax``. ``NaN`` values are also handled properly. (:issue:`21548`, :issue:`21526`)
--
--
--
+- :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`)
+- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly.
+- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`)
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 97ae73174c09c..e446782d9665e 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -61,9 +61,6 @@
# extension module deprecations
from pandas.util._depr_module import _DeprecatedModule
-json = _DeprecatedModule(deprmod='pandas.json',
- moved={'dumps': 'pandas.io.json.dumps',
- 'loads': 'pandas.io.json.loads'})
parser = _DeprecatedModule(deprmod='pandas.parser',
removals=['na_values'],
moved={'CParserError': 'pandas.errors.ParserError'})
@@ -83,6 +80,7 @@
from ._version import get_versions
v = get_versions()
__version__ = v.get('closest-tag', v['version'])
+__git_version__ = v.get('full-revisionid')
del get_versions, v
# module level doc-string
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 415e7026e09c8..d2914dc8ac751 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -353,6 +353,523 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
return result
+# ----------------------------------------------------------------------
+
+ctypedef fused algos_t:
+ float64_t
+ float32_t
+ object
+ int32_t
+ int64_t
+ uint64_t
+ uint8_t
+
+
+# TODO: unused; needed?
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cpdef map_indices(ndarray[algos_t] index):
+ """
+ Produce a dict mapping the values of the input array to their respective
+ locations.
+
+ Example:
+ array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1}
+
+ Better to do this with Cython because of the enormous speed boost.
+ """
+ cdef:
+ Py_ssize_t i, length
+ dict result = {}
+
+ length = len(index)
+
+ for i in range(length):
+ result[index[i]] = i
+
+ return result
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
+ cdef:
+ Py_ssize_t i, j, nleft, nright
+ ndarray[int64_t, ndim=1] indexer
+ algos_t cur, next
+ int lim, fill_count = 0
+
+ nleft = len(old)
+ nright = len(new)
+ indexer = np.empty(nright, dtype=np.int64)
+ indexer.fill(-1)
+
+ if limit is None:
+ lim = nright
+ else:
+ if not util.is_integer_object(limit):
+ raise ValueError('Limit must be an integer')
+ if limit < 1:
+ raise ValueError('Limit must be greater than 0')
+ lim = limit
+
+ if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
+ return indexer
+
+ i = j = 0
+
+ cur = old[0]
+
+ while j <= nright - 1 and new[j] < cur:
+ j += 1
+
+ while True:
+ if j == nright:
+ break
+
+ if i == nleft - 1:
+ while j < nright:
+ if new[j] == cur:
+ indexer[j] = i
+ elif new[j] > cur and fill_count < lim:
+ indexer[j] = i
+ fill_count += 1
+ j += 1
+ break
+
+ next = old[i + 1]
+
+ while j < nright and cur <= new[j] < next:
+ if new[j] == cur:
+ indexer[j] = i
+ elif fill_count < lim:
+ indexer[j] = i
+ fill_count += 1
+ j += 1
+
+ fill_count = 0
+ i += 1
+ cur = next
+
+ return indexer
+
+
+pad_float64 = pad["float64_t"]
+pad_float32 = pad["float32_t"]
+pad_object = pad["object"]
+pad_int64 = pad["int64_t"]
+pad_int32 = pad["int32_t"]
+pad_uint64 = pad["uint64_t"]
+pad_bool = pad["uint8_t"]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def pad_inplace(ndarray[algos_t] values,
+ ndarray[uint8_t, cast=True] mask,
+ limit=None):
+ cdef:
+ Py_ssize_t i, N
+ algos_t val
+ int lim, fill_count = 0
+
+ N = len(values)
+
+ # GH#2778
+ if N == 0:
+ return
+
+ if limit is None:
+ lim = N
+ else:
+ if not util.is_integer_object(limit):
+ raise ValueError('Limit must be an integer')
+ if limit < 1:
+ raise ValueError('Limit must be greater than 0')
+ lim = limit
+
+ val = values[0]
+ for i in range(N):
+ if mask[i]:
+ if fill_count >= lim:
+ continue
+ fill_count += 1
+ values[i] = val
+ else:
+ fill_count = 0
+ val = values[i]
+
+
+pad_inplace_float64 = pad_inplace["float64_t"]
+pad_inplace_float32 = pad_inplace["float32_t"]
+pad_inplace_object = pad_inplace["object"]
+pad_inplace_int64 = pad_inplace["int64_t"]
+pad_inplace_int32 = pad_inplace["int32_t"]
+pad_inplace_uint64 = pad_inplace["uint64_t"]
+pad_inplace_bool = pad_inplace["uint8_t"]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
+ ndarray[uint8_t, ndim=2] mask,
+ limit=None):
+ cdef:
+ Py_ssize_t i, j, N, K
+ algos_t val
+ int lim, fill_count = 0
+
+ K, N = ( values).shape
+
+ # GH#2778
+ if N == 0:
+ return
+
+ if limit is None:
+ lim = N
+ else:
+ if not util.is_integer_object(limit):
+ raise ValueError('Limit must be an integer')
+ if limit < 1:
+ raise ValueError('Limit must be greater than 0')
+ lim = limit
+
+ for j in range(K):
+ fill_count = 0
+ val = values[j, 0]
+ for i in range(N):
+ if mask[j, i]:
+ if fill_count >= lim:
+ continue
+ fill_count += 1
+ values[j, i] = val
+ else:
+ fill_count = 0
+ val = values[j, i]
+
+
+pad_2d_inplace_float64 = pad_2d_inplace["float64_t"]
+pad_2d_inplace_float32 = pad_2d_inplace["float32_t"]
+pad_2d_inplace_object = pad_2d_inplace["object"]
+pad_2d_inplace_int64 = pad_2d_inplace["int64_t"]
+pad_2d_inplace_int32 = pad_2d_inplace["int32_t"]
+pad_2d_inplace_uint64 = pad_2d_inplace["uint64_t"]
+pad_2d_inplace_bool = pad_2d_inplace["uint8_t"]
+
+
+"""
+Backfilling logic for generating fill vector
+
+Diagram of what's going on
+
+Old New Fill vector Mask
+ . 0 1
+ . 0 1
+ . 0 1
+A A 0 1
+ . 1 1
+ . 1 1
+ . 1 1
+ . 1 1
+ . 1 1
+B B 1 1
+ . 2 1
+ . 2 1
+ . 2 1
+C C 2 1
+ . 0
+ . 0
+D
+"""
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
+ cdef:
+ Py_ssize_t i, j, nleft, nright
+ ndarray[int64_t, ndim=1] indexer
+ algos_t cur, prev
+ int lim, fill_count = 0
+
+ nleft = len(old)
+ nright = len(new)
+ indexer = np.empty(nright, dtype=np.int64)
+ indexer.fill(-1)
+
+ if limit is None:
+ lim = nright
+ else:
+ if not util.is_integer_object(limit):
+ raise ValueError('Limit must be an integer')
+ if limit < 1:
+ raise ValueError('Limit must be greater than 0')
+ lim = limit
+
+ if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
+ return indexer
+
+ i = nleft - 1
+ j = nright - 1
+
+ cur = old[nleft - 1]
+
+ while j >= 0 and new[j] > cur:
+ j -= 1
+
+ while True:
+ if j < 0:
+ break
+
+ if i == 0:
+ while j >= 0:
+ if new[j] == cur:
+ indexer[j] = i
+ elif new[j] < cur and fill_count < lim:
+ indexer[j] = i
+ fill_count += 1
+ j -= 1
+ break
+
+ prev = old[i - 1]
+
+ while j >= 0 and prev < new[j] <= cur:
+ if new[j] == cur:
+ indexer[j] = i
+ elif new[j] < cur and fill_count < lim:
+ indexer[j] = i
+ fill_count += 1
+ j -= 1
+
+ fill_count = 0
+ i -= 1
+ cur = prev
+
+ return indexer
+
+
+backfill_float64 = backfill["float64_t"]
+backfill_float32 = backfill["float32_t"]
+backfill_object = backfill["object"]
+backfill_int64 = backfill["int64_t"]
+backfill_int32 = backfill["int32_t"]
+backfill_uint64 = backfill["uint64_t"]
+backfill_bool = backfill["uint8_t"]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def backfill_inplace(ndarray[algos_t] values,
+ ndarray[uint8_t, cast=True] mask,
+ limit=None):
+ cdef:
+ Py_ssize_t i, N
+ algos_t val
+ int lim, fill_count = 0
+
+ N = len(values)
+
+ # GH#2778
+ if N == 0:
+ return
+
+ if limit is None:
+ lim = N
+ else:
+ if not util.is_integer_object(limit):
+ raise ValueError('Limit must be an integer')
+ if limit < 1:
+ raise ValueError('Limit must be greater than 0')
+ lim = limit
+
+ val = values[N - 1]
+ for i in range(N - 1, -1, -1):
+ if mask[i]:
+ if fill_count >= lim:
+ continue
+ fill_count += 1
+ values[i] = val
+ else:
+ fill_count = 0
+ val = values[i]
+
+
+backfill_inplace_float64 = backfill_inplace["float64_t"]
+backfill_inplace_float32 = backfill_inplace["float32_t"]
+backfill_inplace_object = backfill_inplace["object"]
+backfill_inplace_int64 = backfill_inplace["int64_t"]
+backfill_inplace_int32 = backfill_inplace["int32_t"]
+backfill_inplace_uint64 = backfill_inplace["uint64_t"]
+backfill_inplace_bool = backfill_inplace["uint8_t"]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
+ ndarray[uint8_t, ndim=2] mask,
+ limit=None):
+ cdef:
+ Py_ssize_t i, j, N, K
+ algos_t val
+ int lim, fill_count = 0
+
+ K, N = ( values).shape
+
+ # GH#2778
+ if N == 0:
+ return
+
+ if limit is None:
+ lim = N
+ else:
+ if not util.is_integer_object(limit):
+ raise ValueError('Limit must be an integer')
+ if limit < 1:
+ raise ValueError('Limit must be greater than 0')
+ lim = limit
+
+ for j in range(K):
+ fill_count = 0
+ val = values[j, N - 1]
+ for i in range(N - 1, -1, -1):
+ if mask[j, i]:
+ if fill_count >= lim:
+ continue
+ fill_count += 1
+ values[j, i] = val
+ else:
+ fill_count = 0
+ val = values[j, i]
+
+
+backfill_2d_inplace_float64 = backfill_2d_inplace["float64_t"]
+backfill_2d_inplace_float32 = backfill_2d_inplace["float32_t"]
+backfill_2d_inplace_object = backfill_2d_inplace["object"]
+backfill_2d_inplace_int64 = backfill_2d_inplace["int64_t"]
+backfill_2d_inplace_int32 = backfill_2d_inplace["int32_t"]
+backfill_2d_inplace_uint64 = backfill_2d_inplace["uint64_t"]
+backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"]
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def arrmap(ndarray[algos_t] index, object func):
+ cdef:
+ Py_ssize_t length = index.shape[0]
+ Py_ssize_t i = 0
+ ndarray[object] result = np.empty(length, dtype=np.object_)
+
+ from pandas._libs.lib import maybe_convert_objects
+
+ for i in range(length):
+ result[i] = func(index[i])
+
+ return maybe_convert_objects(result)
+
+
+arrmap_float64 = arrmap["float64_t"]
+arrmap_float32 = arrmap["float32_t"]
+arrmap_object = arrmap["object"]
+arrmap_int64 = arrmap["int64_t"]
+arrmap_int32 = arrmap["int32_t"]
+arrmap_uint64 = arrmap["uint64_t"]
+arrmap_bool = arrmap["uint8_t"]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def is_monotonic(ndarray[algos_t] arr, bint timelike):
+ """
+ Returns
+ -------
+ is_monotonic_inc, is_monotonic_dec, is_unique
+ """
+ cdef:
+ Py_ssize_t i, n
+ algos_t prev, cur
+ bint is_monotonic_inc = 1
+ bint is_monotonic_dec = 1
+ bint is_unique = 1
+ bint is_strict_monotonic = 1
+
+ n = len(arr)
+
+ if n == 1:
+ if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
+ # single value is NaN
+ return False, False, True
+ else:
+ return True, True, True
+ elif n < 2:
+ return True, True, True
+
+ if timelike and arr[0] == iNaT:
+ return False, False, True
+
+ if algos_t is not object:
+ with nogil:
+ prev = arr[0]
+ for i in range(1, n):
+ cur = arr[i]
+ if timelike and cur == iNaT:
+ is_monotonic_inc = 0
+ is_monotonic_dec = 0
+ break
+ if cur < prev:
+ is_monotonic_inc = 0
+ elif cur > prev:
+ is_monotonic_dec = 0
+ elif cur == prev:
+ is_unique = 0
+ else:
+ # cur or prev is NaN
+ is_monotonic_inc = 0
+ is_monotonic_dec = 0
+ break
+ if not is_monotonic_inc and not is_monotonic_dec:
+ is_monotonic_inc = 0
+ is_monotonic_dec = 0
+ break
+ prev = cur
+ else:
+ # object-dtype, identical to above except we cannot use `with nogil`
+ prev = arr[0]
+ for i in range(1, n):
+ cur = arr[i]
+ if timelike and cur == iNaT:
+ is_monotonic_inc = 0
+ is_monotonic_dec = 0
+ break
+ if cur < prev:
+ is_monotonic_inc = 0
+ elif cur > prev:
+ is_monotonic_dec = 0
+ elif cur == prev:
+ is_unique = 0
+ else:
+ # cur or prev is NaN
+ is_monotonic_inc = 0
+ is_monotonic_dec = 0
+ break
+ if not is_monotonic_inc and not is_monotonic_dec:
+ is_monotonic_inc = 0
+ is_monotonic_dec = 0
+ break
+ prev = cur
+
+ is_strict_monotonic = is_unique and (is_monotonic_inc or is_monotonic_dec)
+ return is_monotonic_inc, is_monotonic_dec, is_strict_monotonic
+
+
+is_monotonic_float64 = is_monotonic["float64_t"]
+is_monotonic_float32 = is_monotonic["float32_t"]
+is_monotonic_object = is_monotonic["object"]
+is_monotonic_int64 = is_monotonic["int64_t"]
+is_monotonic_int32 = is_monotonic["int32_t"]
+is_monotonic_uint64 = is_monotonic["uint64_t"]
+is_monotonic_bool = is_monotonic["uint8_t"]
+
+
# generated from template
include "algos_common_helper.pxi"
include "algos_rank_helper.pxi"
diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
index ed4c0e4c59609..40b1b1a282670 100644
--- a/pandas/_libs/algos_common_helper.pxi.in
+++ b/pandas/_libs/algos_common_helper.pxi.in
@@ -15,443 +15,6 @@ Template for each `dtype` helper function using 1-d template
WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
"""
-#----------------------------------------------------------------------
-# 1-d template
-#----------------------------------------------------------------------
-
-{{py:
-
-# name, c_type, dtype, can_hold_na, nogil
-dtypes = [('float64', 'float64_t', 'np.float64', True, True),
- ('float32', 'float32_t', 'np.float32', True, True),
- ('object', 'object', 'object', True, False),
- ('int32', 'int32_t', 'np.int32', False, True),
- ('int64', 'int64_t', 'np.int64', False, True),
- ('uint64', 'uint64_t', 'np.uint64', False, True),
- ('bool', 'uint8_t', 'np.bool', False, True)]
-
-def get_dispatch(dtypes):
-
- for name, c_type, dtype, can_hold_na, nogil in dtypes:
-
- nogil_str = 'with nogil:' if nogil else ''
- tab = ' ' if nogil else ''
- yield name, c_type, dtype, can_hold_na, nogil_str, tab
-}}
-
-{{for name, c_type, dtype, can_hold_na, nogil_str, tab
- in get_dispatch(dtypes)}}
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def map_indices_{{name}}(ndarray[{{c_type}}] index):
- """
- Produce a dict mapping the values of the input array to their respective
- locations.
-
- Example:
- array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1}
-
- Better to do this with Cython because of the enormous speed boost.
- """
- cdef:
- Py_ssize_t i, length
- dict result = {}
-
- length = len(index)
-
- for i in range(length):
- result[index[i]] = i
-
- return result
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):
- cdef:
- Py_ssize_t i, j, nleft, nright
- ndarray[int64_t, ndim=1] indexer
- {{c_type}} cur, next
- int lim, fill_count = 0
-
- nleft = len(old)
- nright = len(new)
- indexer = np.empty(nright, dtype=np.int64)
- indexer.fill(-1)
-
- if limit is None:
- lim = nright
- else:
- if not util.is_integer_object(limit):
- raise ValueError('Limit must be an integer')
- if limit < 1:
- raise ValueError('Limit must be greater than 0')
- lim = limit
-
- if nleft == 0 or nright == 0 or new[nright - 1] < old[0]:
- return indexer
-
- i = j = 0
-
- cur = old[0]
-
- while j <= nright - 1 and new[j] < cur:
- j += 1
-
- while True:
- if j == nright:
- break
-
- if i == nleft - 1:
- while j < nright:
- if new[j] == cur:
- indexer[j] = i
- elif new[j] > cur and fill_count < lim:
- indexer[j] = i
- fill_count += 1
- j += 1
- break
-
- next = old[i + 1]
-
- while j < nright and cur <= new[j] < next:
- if new[j] == cur:
- indexer[j] = i
- elif fill_count < lim:
- indexer[j] = i
- fill_count += 1
- j += 1
-
- fill_count = 0
- i += 1
- cur = next
-
- return indexer
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
- ndarray[uint8_t, cast=True] mask,
- limit=None):
- cdef:
- Py_ssize_t i, N
- {{c_type}} val
- int lim, fill_count = 0
-
- N = len(values)
-
- # GH 2778
- if N == 0:
- return
-
- if limit is None:
- lim = N
- else:
- if not util.is_integer_object(limit):
- raise ValueError('Limit must be an integer')
- if limit < 1:
- raise ValueError('Limit must be greater than 0')
- lim = limit
-
- val = values[0]
- for i in range(N):
- if mask[i]:
- if fill_count >= lim:
- continue
- fill_count += 1
- values[i] = val
- else:
- fill_count = 0
- val = values[i]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
- ndarray[uint8_t, ndim=2] mask,
- limit=None):
- cdef:
- Py_ssize_t i, j, N, K
- {{c_type}} val
- int lim, fill_count = 0
-
- K, N = ( values).shape
-
- # GH 2778
- if N == 0:
- return
-
- if limit is None:
- lim = N
- else:
- if not util.is_integer_object(limit):
- raise ValueError('Limit must be an integer')
- if limit < 1:
- raise ValueError('Limit must be greater than 0')
- lim = limit
-
- for j in range(K):
- fill_count = 0
- val = values[j, 0]
- for i in range(N):
- if mask[j, i]:
- if fill_count >= lim:
- continue
- fill_count += 1
- values[j, i] = val
- else:
- fill_count = 0
- val = values[j, i]
-
-"""
-Backfilling logic for generating fill vector
-
-Diagram of what's going on
-
-Old New Fill vector Mask
- . 0 1
- . 0 1
- . 0 1
-A A 0 1
- . 1 1
- . 1 1
- . 1 1
- . 1 1
- . 1 1
-B B 1 1
- . 2 1
- . 2 1
- . 2 1
-C C 2 1
- . 0
- . 0
-D
-"""
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
- limit=None):
- cdef:
- Py_ssize_t i, j, nleft, nright
- ndarray[int64_t, ndim=1] indexer
- {{c_type}} cur, prev
- int lim, fill_count = 0
-
- nleft = len(old)
- nright = len(new)
- indexer = np.empty(nright, dtype=np.int64)
- indexer.fill(-1)
-
- if limit is None:
- lim = nright
- else:
- if not util.is_integer_object(limit):
- raise ValueError('Limit must be an integer')
- if limit < 1:
- raise ValueError('Limit must be greater than 0')
- lim = limit
-
- if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]:
- return indexer
-
- i = nleft - 1
- j = nright - 1
-
- cur = old[nleft - 1]
-
- while j >= 0 and new[j] > cur:
- j -= 1
-
- while True:
- if j < 0:
- break
-
- if i == 0:
- while j >= 0:
- if new[j] == cur:
- indexer[j] = i
- elif new[j] < cur and fill_count < lim:
- indexer[j] = i
- fill_count += 1
- j -= 1
- break
-
- prev = old[i - 1]
-
- while j >= 0 and prev < new[j] <= cur:
- if new[j] == cur:
- indexer[j] = i
- elif new[j] < cur and fill_count < lim:
- indexer[j] = i
- fill_count += 1
- j -= 1
-
- fill_count = 0
- i -= 1
- cur = prev
-
- return indexer
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
- ndarray[uint8_t, cast=True] mask,
- limit=None):
- cdef:
- Py_ssize_t i, N
- {{c_type}} val
- int lim, fill_count = 0
-
- N = len(values)
-
- # GH 2778
- if N == 0:
- return
-
- if limit is None:
- lim = N
- else:
- if not util.is_integer_object(limit):
- raise ValueError('Limit must be an integer')
- if limit < 1:
- raise ValueError('Limit must be greater than 0')
- lim = limit
-
- val = values[N - 1]
- for i in range(N - 1, -1, -1):
- if mask[i]:
- if fill_count >= lim:
- continue
- fill_count += 1
- values[i] = val
- else:
- fill_count = 0
- val = values[i]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
- ndarray[uint8_t, ndim=2] mask,
- limit=None):
- cdef:
- Py_ssize_t i, j, N, K
- {{c_type}} val
- int lim, fill_count = 0
-
- K, N = ( values).shape
-
- # GH 2778
- if N == 0:
- return
-
- if limit is None:
- lim = N
- else:
- if not util.is_integer_object(limit):
- raise ValueError('Limit must be an integer')
- if limit < 1:
- raise ValueError('Limit must be greater than 0')
- lim = limit
-
- for j in range(K):
- fill_count = 0
- val = values[j, N - 1]
- for i in range(N - 1, -1, -1):
- if mask[j, i]:
- if fill_count >= lim:
- continue
- fill_count += 1
- values[j, i] = val
- else:
- fill_count = 0
- val = values[j, i]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
- """
- Returns
- -------
- is_monotonic_inc, is_monotonic_dec, is_unique
- """
- cdef:
- Py_ssize_t i, n
- {{c_type}} prev, cur
- bint is_monotonic_inc = 1
- bint is_monotonic_dec = 1
- bint is_unique = 1
-
- n = len(arr)
-
- if n == 1:
- if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
- # single value is NaN
- return False, False, True
- else:
- return True, True, True
- elif n < 2:
- return True, True, True
-
- if timelike and arr[0] == iNaT:
- return False, False, True
-
- {{nogil_str}}
- {{tab}}prev = arr[0]
- {{tab}}for i in range(1, n):
- {{tab}} cur = arr[i]
- {{tab}} if timelike and cur == iNaT:
- {{tab}} is_monotonic_inc = 0
- {{tab}} is_monotonic_dec = 0
- {{tab}} break
- {{tab}} if cur < prev:
- {{tab}} is_monotonic_inc = 0
- {{tab}} elif cur > prev:
- {{tab}} is_monotonic_dec = 0
- {{tab}} elif cur == prev:
- {{tab}} is_unique = 0
- {{tab}} else:
- {{tab}} # cur or prev is NaN
- {{tab}} is_monotonic_inc = 0
- {{tab}} is_monotonic_dec = 0
- {{tab}} break
- {{tab}} if not is_monotonic_inc and not is_monotonic_dec:
- {{tab}} is_monotonic_inc = 0
- {{tab}} is_monotonic_dec = 0
- {{tab}} break
- {{tab}} prev = cur
- return is_monotonic_inc, is_monotonic_dec, \
- is_unique and (is_monotonic_inc or is_monotonic_dec)
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
- cdef:
- Py_ssize_t length = index.shape[0]
- Py_ssize_t i = 0
- ndarray[object] result = np.empty(length, dtype=np.object_)
-
- from pandas._libs.lib import maybe_convert_objects
-
- for i in range(length):
- result[i] = func(index[i])
-
- return maybe_convert_objects(result)
-
-{{endfor}}
-
-#----------------------------------------------------------------------
-# put template
-#----------------------------------------------------------------------
-
{{py:
# name, c_type, dest_type, dest_dtype
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 550cabd5e3192..f294fd141a9f1 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -470,7 +470,6 @@ cdef class {{name}}HashTable(HashTable):
int ret = 0
{{dtype}}_t val
khiter_t k
- bint seen_na = 0
{{name}}Vector uniques = {{name}}Vector()
{{name}}VectorData *ud
@@ -479,22 +478,6 @@ cdef class {{name}}HashTable(HashTable):
with nogil:
for i in range(n):
val = values[i]
- {{if float_group}}
- if val == val:
- k = kh_get_{{dtype}}(self.table, val)
- if k == self.table.n_buckets:
- kh_put_{{dtype}}(self.table, val, &ret)
- if needs_resize(ud):
- with gil:
- uniques.resize()
- append_data_{{dtype}}(ud, val)
- elif not seen_na:
- seen_na = 1
- if needs_resize(ud):
- with gil:
- uniques.resize()
- append_data_{{dtype}}(ud, NAN)
- {{else}}
k = kh_get_{{dtype}}(self.table, val)
if k == self.table.n_buckets:
kh_put_{{dtype}}(self.table, val, &ret)
@@ -502,7 +485,6 @@ cdef class {{name}}HashTable(HashTable):
with gil:
uniques.resize()
append_data_{{dtype}}(ud, val)
- {{endif}}
return uniques.to_array()
{{endfor}}
@@ -747,9 +729,6 @@ cdef class StringHashTable(HashTable):
return np.asarray(labels)
-na_sentinel = object
-
-
cdef class PyObjectHashTable(HashTable):
def __init__(self, size_hint=1):
@@ -767,8 +746,7 @@ cdef class PyObjectHashTable(HashTable):
def __contains__(self, object key):
cdef khiter_t k
hash(key)
- if key != key or key is None:
- key = na_sentinel
+
k = kh_get_pymap(self.table, key)
return k != self.table.n_buckets
@@ -780,8 +758,7 @@ cdef class PyObjectHashTable(HashTable):
cpdef get_item(self, object val):
cdef khiter_t k
- if val != val or val is None:
- val = na_sentinel
+
k = kh_get_pymap(self.table, val)
if k != self.table.n_buckets:
return self.table.vals[k]
@@ -795,8 +772,7 @@ cdef class PyObjectHashTable(HashTable):
char* buf
hash(key)
- if key != key or key is None:
- key = na_sentinel
+
k = kh_put_pymap(self.table, key, &ret)
# self.table.keys[k] = key
if kh_exist_pymap(self.table, k):
@@ -814,8 +790,6 @@ cdef class PyObjectHashTable(HashTable):
for i in range(n):
val = values[i]
hash(val)
- if val != val or val is None:
- val = na_sentinel
k = kh_put_pymap(self.table, val, &ret)
self.table.vals[k] = i
@@ -831,8 +805,6 @@ cdef class PyObjectHashTable(HashTable):
for i in range(n):
val = values[i]
hash(val)
- if val != val or val is None:
- val = na_sentinel
k = kh_get_pymap(self.table, val)
if k != self.table.n_buckets:
@@ -849,24 +821,14 @@ cdef class PyObjectHashTable(HashTable):
object val
khiter_t k
ObjectVector uniques = ObjectVector()
- bint seen_na = 0
for i in range(n):
val = values[i]
hash(val)
-
- # `val is None` below is exception to prevent mangling of None and
- # other NA values; note however that other NA values (ex: pd.NaT
- # and np.nan) will still get mangled, so many not be a permanent
- # solution; see GH 20866
- if not checknull(val) or val is None:
- k = kh_get_pymap(self.table, val)
- if k == self.table.n_buckets:
- kh_put_pymap(self.table, val, &ret)
- uniques.append(val)
- elif not seen_na:
- seen_na = 1
- uniques.append(nan)
+ k = kh_get_pymap(self.table, val)
+ if k == self.table.n_buckets:
+ kh_put_pymap(self.table, val, &ret)
+ uniques.append(val)
return uniques.to_array()
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 562c1ba218141..3f76915655f58 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -294,14 +294,23 @@ cdef class IndexEngine:
result = np.empty(n_alloc, dtype=np.int64)
missing = np.empty(n_t, dtype=np.int64)
- # form the set of the results (like ismember)
- members = np.empty(n, dtype=np.uint8)
- for i in range(n):
- val = values[i]
- if val in stargets:
- if val not in d:
- d[val] = []
- d[val].append(i)
+ # map each starget to its position in the index
+ if stargets and len(stargets) < 5 and self.is_monotonic_increasing:
+ # if there are few enough stargets and the index is monotonically
+ # increasing, then use binary search for each starget
+ for starget in stargets:
+ start = values.searchsorted(starget, side='left')
+ end = values.searchsorted(starget, side='right')
+ if start != end:
+ d[starget] = list(range(start, end))
+ else:
+ # otherwise, map by iterating through all items in the index
+ for i in range(n):
+ val = values[i]
+ if val in stargets:
+ if val not in d:
+ d[val] = []
+ d[val].append(i)
for i in range(n_t):
val = targets[i]
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 8d7e314517ed8..9f4e67ca4e256 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -1,15 +1,95 @@
# -*- coding: utf-8 -*-
-cimport cython
-from cython cimport Py_ssize_t
+import cython
+from cython import Py_ssize_t
-import numpy as np
-from numpy cimport (ndarray,
- int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
uint32_t, uint64_t, float32_t, float64_t)
-cdef double NaN = np.NaN
-cdef double nan = NaN
+ctypedef fused reshape_t:
+ uint8_t
+ uint16_t
+ uint32_t
+ uint64_t
+ int8_t
+ int16_t
+ int32_t
+ int64_t
+ float32_t
+ float64_t
+ object
-include "reshape_helper.pxi"
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def unstack(reshape_t[:, :] values, uint8_t[:] mask,
+ Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
+ reshape_t[:, :] new_values, uint8_t[:, :] new_mask):
+ """
+ transform long sorted_values to wide new_values
+
+ Parameters
+ ----------
+ values : typed ndarray
+ mask : boolean ndarray
+ stride : int
+ length : int
+ width : int
+ new_values : typed ndarray
+ result array
+ new_mask : boolean ndarray
+ result mask
+ """
+ cdef:
+ Py_ssize_t i, j, w, nulls, s, offset
+
+ if reshape_t is not object:
+ # evaluated at compile-time
+ with nogil:
+ for i in range(stride):
+
+ nulls = 0
+ for j in range(length):
+
+ for w in range(width):
+
+ offset = j * width + w
+
+ if mask[offset]:
+ s = i * width + w
+ new_values[j, s] = values[offset - nulls, i]
+ new_mask[j, s] = 1
+ else:
+ nulls += 1
+
+ else:
+ # object-dtype, identical to above but we cannot use nogil
+ for i in range(stride):
+
+ nulls = 0
+ for j in range(length):
+
+ for w in range(width):
+
+ offset = j * width + w
+
+ if mask[offset]:
+ s = i * width + w
+ new_values[j, s] = values[offset - nulls, i]
+ new_mask[j, s] = 1
+ else:
+ nulls += 1
+
+
+unstack_uint8 = unstack["uint8_t"]
+unstack_uint16 = unstack["uint16_t"]
+unstack_uint32 = unstack["uint32_t"]
+unstack_uint64 = unstack["uint64_t"]
+unstack_int8 = unstack["int8_t"]
+unstack_int16 = unstack["int16_t"]
+unstack_int32 = unstack["int32_t"]
+unstack_int64 = unstack["int64_t"]
+unstack_float32 = unstack["float32_t"]
+unstack_float64 = unstack["float64_t"]
+unstack_object = unstack["object"]
diff --git a/pandas/_libs/reshape_helper.pxi.in b/pandas/_libs/reshape_helper.pxi.in
deleted file mode 100644
index bb9a5977f8b45..0000000000000
--- a/pandas/_libs/reshape_helper.pxi.in
+++ /dev/null
@@ -1,81 +0,0 @@
-"""
-Template for each `dtype` helper function for take
-
-WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
-"""
-
-# ----------------------------------------------------------------------
-# reshape
-# ----------------------------------------------------------------------
-
-{{py:
-
-# name, c_type
-dtypes = [('uint8', 'uint8_t'),
- ('uint16', 'uint16_t'),
- ('uint32', 'uint32_t'),
- ('uint64', 'uint64_t'),
- ('int8', 'int8_t'),
- ('int16', 'int16_t'),
- ('int32', 'int32_t'),
- ('int64', 'int64_t'),
- ('float32', 'float32_t'),
- ('float64', 'float64_t'),
- ('object', 'object')]
-}}
-
-{{for dtype, c_type in dtypes}}
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values,
- ndarray[uint8_t, ndim=1] mask,
- Py_ssize_t stride,
- Py_ssize_t length,
- Py_ssize_t width,
- ndarray[{{c_type}}, ndim=2] new_values,
- ndarray[uint8_t, ndim=2] new_mask):
- """
- transform long sorted_values to wide new_values
-
- Parameters
- ----------
- values : typed ndarray
- mask : boolean ndarray
- stride : int
- length : int
- width : int
- new_values : typed ndarray
- result array
- new_mask : boolean ndarray
- result mask
-
- """
-
- cdef:
- Py_ssize_t i, j, w, nulls, s, offset
-
- {{if dtype == 'object'}}
- if True:
- {{else}}
- with nogil:
- {{endif}}
-
- for i in range(stride):
-
- nulls = 0
- for j in range(length):
-
- for w in range(width):
-
- offset = j * width + w
-
- if mask[offset]:
- s = i * width + w
- new_values[j, s] = values[offset - nulls, i]
- new_mask[j, s] = 1
- else:
- nulls += 1
-
-{{endfor}}
diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
index 2993114a668bb..d852711d3b707 100644
--- a/pandas/_libs/sparse.pyx
+++ b/pandas/_libs/sparse.pyx
@@ -1,7 +1,4 @@
# -*- coding: utf-8 -*-
-import operator
-import sys
-
import cython
import numpy as np
diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c
index 8300e889d4157..19271c78501ba 100644
--- a/pandas/_libs/src/parser/io.c
+++ b/pandas/_libs/src/parser/io.c
@@ -150,7 +150,11 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
return NULL;
} else if (!PyBytes_Check(result)) {
tmp = PyUnicode_AsUTF8String(result);
- Py_XDECREF(result);
+ Py_DECREF(result);
+ if (tmp == NULL) {
+ PyGILState_Release(state);
+ return NULL;
+ }
result = tmp;
}
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
index da0a9f7498aa8..2fce241027d56 100644
--- a/pandas/_libs/src/parser/tokenizer.c
+++ b/pandas/_libs/src/parser/tokenizer.c
@@ -1150,7 +1150,7 @@ static int parser_handle_eof(parser_t *self) {
case IN_QUOTED_FIELD:
self->error_msg = (char *)malloc(bufsize);
snprintf(self->error_msg, bufsize,
- "EOF inside string starting at line %lld",
+ "EOF inside string starting at row %lld",
(long long)self->file_lines);
return -1;
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 16fea0615f199..9012ebefe0975 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -15,8 +15,6 @@ import numpy as np
cnp.import_array()
import pytz
-from dateutil.tz import tzlocal, tzutc as dateutil_utc
-
from util cimport (is_integer_object, is_float_object, is_string_object,
is_datetime64_object)
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index fd8486f690745..ae4f9c821b5d1 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -477,6 +477,13 @@ class NaTType(_NaT):
Parameters
----------
freq : a freq string indicating the rounding resolution
+ ambiguous : bool, 'NaT', default 'raise'
+ - bool contains flags to determine if time is dst or not (note
+ that this flag is only applicable for ambiguous fall dst dates)
+ - 'NaT' will return NaT for an ambiguous time
+ - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+
+ .. versionadded:: 0.24.0
Raises
------
@@ -489,6 +496,17 @@ class NaTType(_NaT):
Parameters
----------
freq : a freq string indicating the flooring resolution
+ ambiguous : bool, 'NaT', default 'raise'
+ - bool contains flags to determine if time is dst or not (note
+ that this flag is only applicable for ambiguous fall dst dates)
+ - 'NaT' will return NaT for an ambiguous time
+ - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+
+ .. versionadded:: 0.24.0
+
+ Raises
+ ------
+ ValueError if the freq cannot be converted
""")
ceil = _make_nat_func('ceil', # noqa:E128
"""
@@ -497,6 +515,17 @@ class NaTType(_NaT):
Parameters
----------
freq : a freq string indicating the ceiling resolution
+ ambiguous : bool, 'NaT', default 'raise'
+ - bool contains flags to determine if time is dst or not (note
+ that this flag is only applicable for ambiguous fall dst dates)
+ - 'NaT' will return NaT for an ambiguous time
+ - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+
+ .. versionadded:: 0.24.0
+
+ Raises
+ ------
+ ValueError if the freq cannot be converted
""")
tz_convert = _make_nat_func('tz_convert', # noqa:E128
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 9b13ef5982396..9c8be1901d1dc 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -541,10 +541,12 @@ def _binary_op_method_timedeltalike(op, name):
elif hasattr(other, 'dtype'):
# nd-array like
- if other.dtype.kind not in ['m', 'M']:
- # raise rathering than letting numpy return wrong answer
+ if other.dtype.kind in ['m', 'M']:
+ return op(self.to_timedelta64(), other)
+ elif other.dtype.kind == 'O':
+ return np.array([op(self, x) for x in other])
+ else:
return NotImplemented
- return op(self.to_timedelta64(), other)
elif not _validate_ops_compat(other):
return NotImplemented
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 52343593d1cc1..0c2753dbc6f28 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -22,6 +22,7 @@ cimport ccalendar
from conversion import tz_localize_to_utc, normalize_i8_timestamps
from conversion cimport (tz_convert_single, _TSObject,
convert_to_tsobject, convert_datetime_to_tsobject)
+import enum
from fields import get_start_end_field, get_date_name_field
from nattype import NaT
from nattype cimport NPY_NAT
@@ -57,50 +58,114 @@ cdef inline object create_timestamp_from_ts(int64_t value,
return ts_base
-def round_ns(values, rounder, freq):
+@enum.unique
+class RoundTo(enum.Enum):
"""
- Applies rounding function at given frequency
+ enumeration defining the available rounding modes
+
+ Attributes
+ ----------
+ MINUS_INFTY
+ round towards -∞, or floor [2]_
+ PLUS_INFTY
+ round towards +∞, or ceil [3]_
+ NEAREST_HALF_EVEN
+ round to nearest, tie-break half to even [6]_
+ NEAREST_HALF_MINUS_INFTY
+ round to nearest, tie-break half to -∞ [5]_
+ NEAREST_HALF_PLUS_INFTY
+ round to nearest, tie-break half to +∞ [4]_
+
+
+ References
+ ----------
+ .. [1] "Rounding - Wikipedia"
+ https://en.wikipedia.org/wiki/Rounding
+ .. [2] "Rounding down"
+ https://en.wikipedia.org/wiki/Rounding#Rounding_down
+ .. [3] "Rounding up"
+ https://en.wikipedia.org/wiki/Rounding#Rounding_up
+ .. [4] "Round half up"
+ https://en.wikipedia.org/wiki/Rounding#Round_half_up
+ .. [5] "Round half down"
+ https://en.wikipedia.org/wiki/Rounding#Round_half_down
+ .. [6] "Round half to even"
+ https://en.wikipedia.org/wiki/Rounding#Round_half_to_even
+ """
+ MINUS_INFTY = 0
+ PLUS_INFTY = 1
+ NEAREST_HALF_EVEN = 2
+ NEAREST_HALF_PLUS_INFTY = 3
+ NEAREST_HALF_MINUS_INFTY = 4
+
+
+cdef inline _npdivmod(x1, x2):
+ """implement divmod for numpy < 1.13"""
+ return np.floor_divide(x1, x2), np.remainder(x1, x2)
+
+
+try:
+ from numpy import divmod as npdivmod
+except ImportError:
+ npdivmod = _npdivmod
+
+
+cdef inline _floor_int64(values, unit):
+ return values - np.remainder(values, unit)
+
+cdef inline _ceil_int64(values, unit):
+ return values + np.remainder(-values, unit)
+
+cdef inline _rounddown_int64(values, unit):
+ return _ceil_int64(values - unit//2, unit)
+
+cdef inline _roundup_int64(values, unit):
+ return _floor_int64(values + unit//2, unit)
+
+
+def round_nsint64(values, mode, freq):
+ """
+ Applies rounding mode at given frequency
Parameters
----------
values : :obj:`ndarray`
- rounder : function, eg. 'ceil', 'floor', 'round'
+ mode : instance of `RoundTo` enumeration
freq : str, obj
Returns
-------
:obj:`ndarray`
"""
+
+ if not isinstance(mode, RoundTo):
+ raise ValueError('mode should be a RoundTo member')
+
unit = to_offset(freq).nanos
- # GH21262 If the Timestamp is multiple of the freq str
- # don't apply any rounding
- mask = values % unit == 0
- if mask.all():
- return values
- r = values.copy()
-
- if unit < 1000:
- # for nano rounding, work with the last 6 digits separately
- # due to float precision
- buff = 1000000
- r[~mask] = (buff * (values[~mask] // buff) +
- unit * (rounder((values[~mask] % buff) *
- (1 / float(unit)))).astype('i8'))
- else:
- if unit % 1000 != 0:
- msg = 'Precision will be lost using frequency: {}'
- warnings.warn(msg.format(freq))
- # GH19206
- # to deal with round-off when unit is large
- if unit >= 1e9:
- divisor = 10 ** int(np.log10(unit / 1e7))
- else:
- divisor = 10
- r[~mask] = (unit * rounder((values[~mask] *
- (divisor / float(unit))) / divisor)
- .astype('i8'))
- return r
+ if mode is RoundTo.MINUS_INFTY:
+ return _floor_int64(values, unit)
+ elif mode is RoundTo.PLUS_INFTY:
+ return _ceil_int64(values, unit)
+ elif mode is RoundTo.NEAREST_HALF_MINUS_INFTY:
+ return _rounddown_int64(values, unit)
+ elif mode is RoundTo.NEAREST_HALF_PLUS_INFTY:
+ return _roundup_int64(values, unit)
+ elif mode is RoundTo.NEAREST_HALF_EVEN:
+ # for odd unit there is no need of a tie break
+ if unit % 2:
+ return _rounddown_int64(values, unit)
+ quotient, remainder = npdivmod(values, unit)
+ mask = np.logical_or(
+ remainder > (unit // 2),
+ np.logical_and(remainder == (unit // 2), quotient % 2)
+ )
+ quotient[mask] += 1
+ return quotient * unit
+
+ # if/elif above should catch all rounding modes defined in enum 'RoundTo':
+ # if flow of control arrives here, it is a bug
+ assert False, "round_nsint64 called with an unrecognized rounding mode"
# This is PITA. Because we inherit from datetime, which has very specific
@@ -656,7 +721,7 @@ class Timestamp(_Timestamp):
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
- def _round(self, freq, rounder):
+ def _round(self, freq, mode, ambiguous='raise'):
if self.tz is not None:
value = self.tz_localize(None).value
else:
@@ -665,13 +730,13 @@ class Timestamp(_Timestamp):
value = np.array([value], dtype=np.int64)
# Will only ever contain 1 element for timestamp
- r = round_ns(value, rounder, freq)[0]
+ r = round_nsint64(value, mode, freq)[0]
result = Timestamp(r, unit='ns')
if self.tz is not None:
- result = result.tz_localize(self.tz)
+ result = result.tz_localize(self.tz, ambiguous=ambiguous)
return result
- def round(self, freq):
+ def round(self, freq, ambiguous='raise'):
"""
Round the Timestamp to the specified resolution
@@ -682,32 +747,61 @@ class Timestamp(_Timestamp):
Parameters
----------
freq : a freq string indicating the rounding resolution
+ ambiguous : bool, 'NaT', default 'raise'
+ - bool contains flags to determine if time is dst or not (note
+ that this flag is only applicable for ambiguous fall dst dates)
+ - 'NaT' will return NaT for an ambiguous time
+ - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+
+ .. versionadded:: 0.24.0
Raises
------
ValueError if the freq cannot be converted
"""
- return self._round(freq, np.round)
+ return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
- def floor(self, freq):
+ def floor(self, freq, ambiguous='raise'):
"""
return a new Timestamp floored to this resolution
Parameters
----------
freq : a freq string indicating the flooring resolution
+ ambiguous : bool, 'NaT', default 'raise'
+ - bool contains flags to determine if time is dst or not (note
+ that this flag is only applicable for ambiguous fall dst dates)
+ - 'NaT' will return NaT for an ambiguous time
+ - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+
+ .. versionadded:: 0.24.0
+
+ Raises
+ ------
+ ValueError if the freq cannot be converted
"""
- return self._round(freq, np.floor)
+ return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
- def ceil(self, freq):
+ def ceil(self, freq, ambiguous='raise'):
"""
return a new Timestamp ceiled to this resolution
Parameters
----------
freq : a freq string indicating the ceiling resolution
+ ambiguous : bool, 'NaT', default 'raise'
+ - bool contains flags to determine if time is dst or not (note
+ that this flag is only applicable for ambiguous fall dst dates)
+ - 'NaT' will return NaT for an ambiguous time
+ - 'raise' will raise an AmbiguousTimeError for an ambiguous time
+
+ .. versionadded:: 0.24.0
+
+ Raises
+ ------
+ ValueError if the freq cannot be converted
"""
- return self._round(freq, np.ceil)
+ return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
@property
def tz(self):
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 28a55133e68aa..1453725225e7d 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -38,6 +38,7 @@
import struct
import inspect
from collections import namedtuple
+import collections
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] >= 3
@@ -135,6 +136,11 @@ def lfilter(*args, **kwargs):
from importlib import reload
reload = reload
+ Hashable = collections.abc.Hashable
+ Iterable = collections.abc.Iterable
+ Mapping = collections.abc.Mapping
+ Sequence = collections.abc.Sequence
+ Sized = collections.abc.Sized
else:
# Python 2
@@ -190,6 +196,12 @@ def get_range_parameters(data):
reload = builtins.reload
+ Hashable = collections.Hashable
+ Iterable = collections.Iterable
+ Mapping = collections.Mapping
+ Sequence = collections.Sequence
+ Sized = collections.Sized
+
if PY2:
def iteritems(obj, **kw):
return obj.iteritems(**kw)
diff --git a/pandas/compat/chainmap_impl.py b/pandas/compat/chainmap_impl.py
index c4aa8c8d6ab30..3ea5414cc41eb 100644
--- a/pandas/compat/chainmap_impl.py
+++ b/pandas/compat/chainmap_impl.py
@@ -1,4 +1,11 @@
-from collections import MutableMapping
+import sys
+
+PY3 = sys.version_info[0] >= 3
+
+if PY3:
+ from collections.abc import MutableMapping
+else:
+ from collections import MutableMapping
try:
from thread import get_ident
diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
index c1a9a9fc1ed13..713a5b1120beb 100644
--- a/pandas/compat/pickle_compat.py
+++ b/pandas/compat/pickle_compat.py
@@ -33,7 +33,7 @@ def load_reduce(self):
cls = args[0]
stack[-1] = object.__new__(cls)
return
- except:
+ except TypeError:
pass
# try to re-encode the arguments
@@ -44,7 +44,7 @@ def load_reduce(self):
try:
stack[-1] = func(*args)
return
- except:
+ except TypeError:
pass
# unknown exception, re-raise
@@ -182,7 +182,7 @@ def load_newobj_ex(self):
try:
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
-except:
+except (AttributeError, KeyError):
pass
@@ -210,5 +210,5 @@ def load(fh, encoding=None, compat=False, is_verbose=False):
up.is_verbose = is_verbose
return up.load()
- except:
+ except (ValueError, TypeError):
raise
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 28c24fc8c0640..621de3ffd4b12 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -286,6 +286,18 @@ def nulls_fixture(request):
nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture
+@pytest.fixture(params=[None, np.nan, pd.NaT])
+def unique_nulls_fixture(request):
+ """
+ Fixture for each null type in pandas, each null type exactly once
+ """
+ return request.param
+
+
+# Generate cartesian product of unique_nulls_fixture:
+unique_nulls_fixture2 = unique_nulls_fixture
+
+
TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific',
'dateutil/Asia/Singapore']
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index e5b6c84d37541..e91cc8ec1e996 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -3,7 +3,7 @@
intended for public consumption
"""
from __future__ import division
-from warnings import warn, catch_warnings
+from warnings import warn, catch_warnings, simplefilter
from textwrap import dedent
import numpy as np
@@ -91,7 +91,8 @@ def _ensure_data(values, dtype=None):
# ignore the fact that we are casting to float
# which discards complex parts
- with catch_warnings(record=True):
+ with catch_warnings():
+ simplefilter("ignore", np.ComplexWarning)
values = ensure_float64(values)
return values, 'float64', 'float64'
@@ -1213,41 +1214,56 @@ def get_indexer(current_indexer, other_indexer):
indexer = Int64Index([])
for i, column in enumerate(columns):
-
# For each column we apply method to cur_frame[column].
- # If it is the last column in columns, or if the values
- # returned are unique in frame[column] we save this index
- # and break
- # Otherwise we must save the index of the non duplicated values
- # and set the next cur_frame to cur_frame filtered on all
- # duplcicated values (#GH15297)
+ # If it's the last column or if we have the number of
+ # results desired we are done.
+ # Otherwise there are duplicates of the largest/smallest
+ # value and we need to look at the rest of the columns
+ # to determine which of the rows with the largest/smallest
+ # value in the column to keep.
series = cur_frame[column]
- values = getattr(series, method)(cur_n, keep=self.keep)
is_last_column = len(columns) - 1 == i
- if is_last_column or values.nunique() == series.isin(values).sum():
+ values = getattr(series, method)(
+ cur_n,
+ keep=self.keep if is_last_column else 'all')
- # Last column in columns or values are unique in
- # series => values
- # is all that matters
+ if is_last_column or len(values) <= cur_n:
indexer = get_indexer(indexer, values.index)
break
- duplicated_filter = series.duplicated(keep=False)
- duplicated = values[duplicated_filter]
- non_duplicated = values[~duplicated_filter]
- indexer = get_indexer(indexer, non_duplicated.index)
+ # Now find all values which are equal to
+ # the (nsmallest: largest)/(nlarrgest: smallest)
+ # from our series.
+ border_value = values == values[values.index[-1]]
+
+ # Some of these values are among the top-n
+ # some aren't.
+ unsafe_values = values[border_value]
+
+ # These values are definitely among the top-n
+ safe_values = values[~border_value]
+ indexer = get_indexer(indexer, safe_values.index)
- # Must set cur frame to include all duplicated values
- # to consider for the next column, we also can reduce
- # cur_n by the current length of the indexer
- cur_frame = cur_frame[series.isin(duplicated)]
+ # Go on and separate the unsafe_values on the remaining
+ # columns.
+ cur_frame = cur_frame.loc[unsafe_values.index]
cur_n = n - len(indexer)
frame = frame.take(indexer)
# Restore the index on frame
frame.index = original_index.take(indexer)
- return frame
+
+ # If there is only one column, the frame is already sorted.
+ if len(columns) == 1:
+ return frame
+
+ ascending = method == 'nsmallest'
+
+ return frame.sort_values(
+ columns,
+ ascending=ascending,
+ kind='mergesort')
# ------- ## ---- #
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 63a1dacb47abb..216bccf7d6309 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2439,9 +2439,13 @@ def _get_codes_for_values(values, categories):
"""
utility routine to turn values into codes given the specified categories
"""
-
from pandas.core.algorithms import _get_data_algo, _hashtables
- if not is_dtype_equal(values.dtype, categories.dtype):
+ if is_dtype_equal(values.dtype, categories.dtype):
+ # To prevent erroneous dtype coercion in _get_data_algo, retrieve
+ # the underlying numpy array. gh-22702
+ values = getattr(values, 'values', values)
+ categories = getattr(categories, 'values', categories)
+ else:
values = ensure_object(values)
categories = ensure_object(categories)
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 12e1dd1052e0b..91c119808db52 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -38,6 +38,7 @@
from pandas.core.algorithms import checked_add_with_arr
from .base import ExtensionOpsMixin
+from pandas.util._decorators import deprecate_kwarg
def _make_comparison_op(op, cls):
@@ -59,6 +60,7 @@ def cmp_method(self, other):
# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
with warnings.catch_warnings(record=True):
+ warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all='ignore'):
result = op(self.values, np.asarray(other))
@@ -521,40 +523,54 @@ def _addsub_offset_array(self, other, op):
kwargs['freq'] = 'infer'
return type(self)(res_values, **kwargs)
- def shift(self, n, freq=None):
+ @deprecate_kwarg(old_arg_name='n', new_arg_name='periods')
+ def shift(self, periods, freq=None):
"""
- Specialized shift which produces a Datetime/Timedelta Array/Index
+ Shift index by desired number of time frequency increments.
+
+ This method is for shifting the values of datetime-like indexes
+ by a specified time increment a given number of times.
Parameters
----------
- n : int
- Periods to shift by
- freq : DateOffset or timedelta-like, optional
+ periods : int
+ Number of periods (or increments) to shift by,
+ can be positive or negative.
+
+ .. versionchanged:: 0.24.0
+
+ freq : pandas.DateOffset, pandas.Timedelta or string, optional
+ Frequency increment to shift by.
+ If None, the index is shifted by its own `freq` attribute.
+ Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
Returns
-------
- shifted : same type as self
+ pandas.DatetimeIndex
+ Shifted index.
+
+ See Also
+ --------
+ Index.shift : Shift values of Index.
"""
if freq is not None and freq != self.freq:
if isinstance(freq, compat.string_types):
freq = frequencies.to_offset(freq)
- offset = n * freq
+ offset = periods * freq
result = self + offset
-
if hasattr(self, 'tz'):
result._tz = self.tz
-
return result
- if n == 0:
+ if periods == 0:
# immutable so OK
return self.copy()
if self.freq is None:
raise NullFrequencyError("Cannot shift with no freq")
- start = self[0] + n * self.freq
- end = self[-1] + n * self.freq
+ start = self[0] + periods * self.freq
+ end = self[-1] + periods * self.freq
attribs = self._get_attributes_dict()
return self._generate_range(start=start, end=end, periods=None,
**attribs)
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index aebc7a6a04ffc..e58109a25e1a5 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -5,7 +5,7 @@
from pandas._libs.lib import infer_dtype
from pandas.util._decorators import cache_readonly
-from pandas.compat import u, range
+from pandas.compat import u, range, string_types
from pandas.compat import set_function_name
from pandas.core.dtypes.cast import astype_nansafe
@@ -147,6 +147,11 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
dtype = values.dtype
if dtype is not None:
+ if (isinstance(dtype, string_types) and
+ (dtype.startswith("Int") or dtype.startswith("UInt"))):
+ # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
+ # https://github.com/numpy/numpy/pull/7476
+ dtype = dtype.lower()
if not issubclass(type(dtype), _IntegerDtype):
try:
dtype = _dtypes[str(np.dtype(dtype))]
@@ -507,7 +512,8 @@ def cmp_method(self, other):
# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
- with warnings.catch_warnings(record=True):
+ with warnings.catch_warnings():
+ warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all='ignore'):
result = op(self._data, other)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index d831dc69338bd..00c049497c0d8 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -245,8 +245,8 @@ def _obj_with_exclusions(self):
def __getitem__(self, key):
if self._selection is not None:
- raise Exception('Column(s) {selection} already selected'
- .format(selection=self._selection))
+ raise IndexError('Column(s) {selection} already selected'
+ .format(selection=self._selection))
if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
np.ndarray)):
@@ -663,6 +663,21 @@ def transpose(self, *args, **kwargs):
T = property(transpose, doc="return the transpose, which is by "
"definition self")
+ @property
+ def _is_homogeneous_type(self):
+ """Whether the object has a single dtype.
+
+ By definition, Series and Index are always considered homogeneous.
+ A MultiIndex may or may not be homogeneous, depending on the
+ dtypes of the levels.
+
+ See Also
+ --------
+ DataFrame._is_homogeneous_type
+ MultiIndex._is_homogeneous_type
+ """
+ return True
+
@property
def shape(self):
""" return a tuple of the shape of the underlying data """
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 92e4e23ce958e..14e47936e1b50 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -15,7 +15,9 @@
from pandas import compat
from pandas.compat import iteritems, PY36, OrderedDict
from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass
-from pandas.core.dtypes.common import is_integer
+from pandas.core.dtypes.common import (
+ is_integer, is_bool_dtype, is_extension_array_dtype, is_array_like
+)
from pandas.core.dtypes.inference import _iterable_not_string
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
@@ -100,17 +102,45 @@ def maybe_box_datetimelike(value):
def is_bool_indexer(key):
- if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)):
+ # type: (Any) -> bool
+ """
+ Check whether `key` is a valid boolean indexer.
+
+ Parameters
+ ----------
+ key : Any
+ Only list-likes may be considered boolean indexers.
+ All other types are not considered a boolean indexer.
+ For array-like input, boolean ndarrays or ExtensionArrays
+ with ``_is_boolean`` set are considered boolean indexers.
+
+ Returns
+ -------
+ bool
+
+ Raises
+ ------
+ ValueError
+ When the array is an object-dtype ndarray or ExtensionArray
+ and contains missing values.
+ """
+ na_msg = 'cannot index with vector containing NA / NaN values'
+ if (isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or
+ (is_array_like(key) and is_extension_array_dtype(key.dtype))):
if key.dtype == np.object_:
key = np.asarray(values_from_object(key))
if not lib.is_bool_array(key):
if isna(key).any():
- raise ValueError('cannot index with vector containing '
- 'NA / NaN values')
+ raise ValueError(na_msg)
return False
return True
- elif key.dtype == np.bool_:
+ elif is_bool_dtype(key.dtype):
+ # an ndarray with bool-dtype by definition has no missing values.
+ # So we only need to check for NAs in ExtensionArrays
+ if is_extension_array_dtype(key.dtype):
+ if np.any(key.isna()):
+ raise ValueError(na_msg)
return True
elif isinstance(key, list):
try:
@@ -356,7 +386,7 @@ def standardize_mapping(into):
return partial(
collections.defaultdict, into.default_factory)
into = type(into)
- if not issubclass(into, collections.Mapping):
+ if not issubclass(into, compat.Mapping):
raise TypeError('unsupported type: {into}'.format(into=into))
elif into == collections.defaultdict:
raise TypeError(
diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
index 434d7f6ccfe13..7025f3000eb5f 100644
--- a/pandas/core/computation/eval.py
+++ b/pandas/core/computation/eval.py
@@ -323,6 +323,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
# to use a non-numeric indexer
try:
with warnings.catch_warnings(record=True):
+ # TODO: Filter the warnings we actually care about here.
target[assigner] = ret
except (TypeError, IndexError):
raise ValueError("Cannot assign expression output to target")
diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py
index 2bd1b0c5b3507..e08df3e340138 100644
--- a/pandas/core/computation/pytables.py
+++ b/pandas/core/computation/pytables.py
@@ -411,7 +411,7 @@ def visit_Subscript(self, node, **kwargs):
slobj = self.visit(node.slice)
try:
value = value.value
- except:
+ except AttributeError:
pass
try:
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 7dcdf878231f1..a552251ebbafa 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -106,6 +106,25 @@ def _is_numeric(self):
"""
return False
+ @property
+ def _is_boolean(self):
+ # type: () -> bool
+ """
+ Whether this dtype should be considered boolean.
+
+ By default, ExtensionDtypes are assumed to be non-numeric.
+ Setting this to True will affect the behavior of several places,
+ e.g.
+
+ * is_bool
+ * boolean indexing
+
+ Returns
+ -------
+ bool
+ """
+ return False
+
class ExtensionDtype(_DtypeOpsMixin):
"""A custom data type, to be paired with an ExtensionArray.
@@ -125,6 +144,7 @@ class ExtensionDtype(_DtypeOpsMixin):
pandas operations
* _is_numeric
+ * _is_boolean
Optionally one can override construct_array_type for construction
with the name of this dtype via the Registry. See
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index b8cbb41501dd1..5f0b71d4505c2 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -90,6 +90,33 @@ def ensure_categorical(arr):
return arr
+def ensure_int64_or_float64(arr, copy=False):
+ """
+ Ensure that an dtype array of some integer dtype
+ has an int64 dtype if possible
+ If it's not possible, potentially because of overflow,
+ convert the array to float64 instead.
+
+ Parameters
+ ----------
+ arr : array-like
+ The array whose data type we want to enforce.
+ copy: boolean
+ Whether to copy the original array or reuse
+ it in place, if possible.
+
+ Returns
+ -------
+ out_arr : The input array cast as int64 if
+ possible without overflow.
+ Otherwise the input array cast to float64.
+ """
+ try:
+ return arr.astype('int64', copy=copy, casting='safe')
+ except TypeError:
+ return arr.astype('float64', copy=copy)
+
+
def is_object_dtype(arr_or_dtype):
"""
Check whether an array-like or dtype is of the object dtype.
@@ -440,7 +467,7 @@ def is_timedelta64_dtype(arr_or_dtype):
return False
try:
tipo = _get_dtype_type(arr_or_dtype)
- except:
+ except (TypeError, ValueError, SyntaxError):
return False
return issubclass(tipo, np.timedelta64)
@@ -1592,6 +1619,11 @@ def is_bool_dtype(arr_or_dtype):
-------
boolean : Whether or not the array or dtype is of a boolean dtype.
+ Notes
+ -----
+ An ExtensionArray is considered boolean when the ``_is_boolean``
+ attribute is set to True.
+
Examples
--------
>>> is_bool_dtype(str)
@@ -1608,6 +1640,8 @@ def is_bool_dtype(arr_or_dtype):
False
>>> is_bool_dtype(np.array([True, False]))
True
+ >>> is_bool_dtype(pd.Categorical([True, False]))
+ True
"""
if arr_or_dtype is None:
@@ -1618,6 +1652,13 @@ def is_bool_dtype(arr_or_dtype):
# this isn't even a dtype
return False
+ if isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex)):
+ arr_or_dtype = arr_or_dtype.dtype
+
+ if isinstance(arr_or_dtype, CategoricalDtype):
+ arr_or_dtype = arr_or_dtype.categories
+ # now we use the special definition for Index
+
if isinstance(arr_or_dtype, ABCIndexClass):
# TODO(jreback)
@@ -1626,6 +1667,9 @@ def is_bool_dtype(arr_or_dtype):
# guess this
return (arr_or_dtype.is_object and
arr_or_dtype.inferred_type == 'boolean')
+ elif is_extension_array_dtype(arr_or_dtype):
+ dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype)
+ return dtype._is_boolean
return issubclass(tipo, np.bool_)
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 4fd77e41a1c67..fe5cc9389a8ba 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -358,11 +358,11 @@ def construct_from_string(cls, string):
try:
if string == 'category':
return cls()
- except:
+ else:
+ raise TypeError("cannot construct a CategoricalDtype")
+ except AttributeError:
pass
- raise TypeError("cannot construct a CategoricalDtype")
-
@staticmethod
def validate_ordered(ordered):
"""
@@ -462,6 +462,12 @@ def ordered(self):
"""Whether the categories have an ordered relationship"""
return self._ordered
+ @property
+ def _is_boolean(self):
+ from pandas.core.dtypes.common import is_bool_dtype
+
+ return is_bool_dtype(self.categories)
+
class DatetimeTZDtypeType(type):
"""
@@ -513,7 +519,7 @@ def __new__(cls, unit=None, tz=None):
if m is not None:
unit = m.groupdict()['unit']
tz = m.groupdict()['tz']
- except:
+ except TypeError:
raise ValueError("could not construct DatetimeTZDtype")
elif isinstance(unit, compat.string_types):
diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
index ed416c3ef857d..67f391615eedb 100644
--- a/pandas/core/dtypes/inference.py
+++ b/pandas/core/dtypes/inference.py
@@ -1,10 +1,9 @@
""" basic inference routines """
-import collections
import re
import numpy as np
-from collections import Iterable
from numbers import Number
+from pandas import compat
from pandas.compat import (PY2, string_types, text_type,
string_and_binary_types, re_type)
from pandas._libs import lib
@@ -112,7 +111,7 @@ def _iterable_not_string(obj):
False
"""
- return (isinstance(obj, collections.Iterable) and
+ return (isinstance(obj, compat.Iterable) and
not isinstance(obj, string_types))
@@ -284,7 +283,7 @@ def is_list_like(obj):
False
"""
- return (isinstance(obj, Iterable) and
+ return (isinstance(obj, compat.Iterable) and
# we do not count strings/unicode/bytes as list-like
not isinstance(obj, string_and_binary_types) and
# exclude zero-dimensional numpy arrays, effectively scalars
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 251bc6587872d..138d1017aa43d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -109,10 +109,9 @@
_shared_doc_kwargs = dict(
axes='index, columns', klass='DataFrame',
axes_single_arg="{0 or 'index', 1 or 'columns'}",
- axis="""
- axis : {0 or 'index', 1 or 'columns'}, default 0
- - 0 or 'index': apply function to each column.
- - 1 or 'columns': apply function to each row.""",
+ axis="""axis : {0 or 'index', 1 or 'columns'}, default 0
+ If 0 or 'index': apply function to each column.
+ If 1 or 'columns': apply function to each row.""",
optional_by="""
by : str or list of str
Name or list of names to sort by.
@@ -418,9 +417,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
copy=copy)
# For data is list-like, or Iterable (will consume into list)
- elif (isinstance(data, collections.Iterable)
+ elif (isinstance(data, compat.Iterable)
and not isinstance(data, string_and_binary_types)):
- if not isinstance(data, collections.Sequence):
+ if not isinstance(data, compat.Sequence):
data = list(data)
if len(data) > 0:
if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1:
@@ -614,6 +613,35 @@ def shape(self):
"""
return len(self.index), len(self.columns)
+ @property
+ def _is_homogeneous_type(self):
+ """
+ Whether all the columns in a DataFrame have the same type.
+
+ Returns
+ -------
+ bool
+
+ Examples
+ --------
+ >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous_type
+ True
+ >>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous_type
+ False
+
+ Items with the same type but different sizes are considered
+ different types.
+
+ >>> DataFrame({
+ ... "A": np.array([1, 2], dtype=np.int32),
+ ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type
+ False
+ """
+ if self._data.any_extension_types:
+ return len({block.dtype for block in self._data.blocks}) == 1
+ else:
+ return not self._data.is_mixed_type
+
def _repr_fits_vertical_(self):
"""
Check length against max_rows.
@@ -751,14 +779,52 @@ def style(self):
return Styler(self)
def iteritems(self):
- """
+ r"""
Iterator over (column name, Series) pairs.
- See also
+ Iterates over the DataFrame columns, returning a tuple with
+ the column name and the content as a Series.
+
+ Yields
+ ------
+ label : object
+ The column names for the DataFrame being iterated over.
+ content : Series
+ The column entries belonging to each label, as a Series.
+
+ See Also
--------
- iterrows : Iterate over DataFrame rows as (index, Series) pairs.
- itertuples : Iterate over DataFrame rows as namedtuples of the values.
+ DataFrame.iterrows : Iterate over DataFrame rows as
+ (index, Series) pairs.
+ DataFrame.itertuples : Iterate over DataFrame rows as namedtuples
+ of the values.
+ Examples
+ --------
+ >>> df = pd.DataFrame({'species': ['bear', 'bear', 'marsupial'],
+ ... 'population': [1864, 22000, 80000]},
+ ... index=['panda', 'polar', 'koala'])
+ >>> df
+ species population
+ panda bear 1864
+ polar bear 22000
+ koala marsupial 80000
+ >>> for label, content in df.iteritems():
+ ... print('label:', label)
+ ... print('content:', content, sep='\n')
+ ...
+ label: species
+ content:
+ panda bear
+ polar bear
+ koala marsupial
+ Name: species, dtype: object
+ label: population
+ content:
+ panda 1864
+ polar 22000
+ koala 80000
+ Name: population, dtype: int64
"""
if self.columns.is_unique and hasattr(self, '_item_cache'):
for k in self.columns:
@@ -1875,7 +1941,7 @@ def to_feather(self, fname):
to_feather(self, fname)
def to_parquet(self, fname, engine='auto', compression='snappy',
- **kwargs):
+ index=None, **kwargs):
"""
Write a DataFrame to the binary parquet format.
@@ -1897,6 +1963,13 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
'pyarrow' is unavailable.
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
Name of the compression to use. Use ``None`` for no compression.
+ index : bool, default None
+ If ``True``, include the dataframe's index(es) in the file output.
+ If ``False``, they will not be written to the file. If ``None``,
+ the behavior depends on the chosen engine.
+
+ .. versionadded:: 0.24.0
+
**kwargs
Additional arguments passed to the parquet library. See
:ref:`pandas io ` for more details.
@@ -1925,7 +1998,7 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
"""
from pandas.io.parquet import to_parquet
to_parquet(self, fname, engine,
- compression=compression, **kwargs)
+ compression=compression, index=index, **kwargs)
@Substitution(header='Write out the column names. If a list of strings '
'is given, it is assumed to be aliases for the '
@@ -3187,7 +3260,7 @@ def _ensure_valid_index(self, value):
if not len(self.index) and is_list_like(value):
try:
value = Series(value)
- except:
+ except (ValueError, NotImplementedError, TypeError):
raise ValueError('Cannot set a frame with no defined index '
'and a value that cannot be converted to a '
'Series')
@@ -3246,7 +3319,7 @@ def assign(self, **kwargs):
Parameters
----------
- kwargs : keyword, value pairs
+ **kwargs : dict of {str: callable or Series}
The column names are keywords. If the values are
callable, they are computed on the DataFrame and
assigned to the new columns. The callable must not
@@ -3256,7 +3329,7 @@ def assign(self, **kwargs):
Returns
-------
- df : DataFrame
+ DataFrame
A new DataFrame with the new columns in addition to
all the existing columns.
@@ -3276,48 +3349,34 @@ def assign(self, **kwargs):
Examples
--------
- >>> df = pd.DataFrame({'A': range(1, 11), 'B': np.random.randn(10)})
+ >>> df = pd.DataFrame({'temp_c': [17.0, 25.0]},
+ ... index=['Portland', 'Berkeley'])
+ >>> df
+ temp_c
+ Portland 17.0
+ Berkeley 25.0
Where the value is a callable, evaluated on `df`:
-
- >>> df.assign(ln_A = lambda x: np.log(x.A))
- A B ln_A
- 0 1 0.426905 0.000000
- 1 2 -0.780949 0.693147
- 2 3 -0.418711 1.098612
- 3 4 -0.269708 1.386294
- 4 5 -0.274002 1.609438
- 5 6 -0.500792 1.791759
- 6 7 1.649697 1.945910
- 7 8 -1.495604 2.079442
- 8 9 0.549296 2.197225
- 9 10 -0.758542 2.302585
-
- Where the value already exists and is inserted:
-
- >>> newcol = np.log(df['A'])
- >>> df.assign(ln_A=newcol)
- A B ln_A
- 0 1 0.426905 0.000000
- 1 2 -0.780949 0.693147
- 2 3 -0.418711 1.098612
- 3 4 -0.269708 1.386294
- 4 5 -0.274002 1.609438
- 5 6 -0.500792 1.791759
- 6 7 1.649697 1.945910
- 7 8 -1.495604 2.079442
- 8 9 0.549296 2.197225
- 9 10 -0.758542 2.302585
-
- Where the keyword arguments depend on each other
-
- >>> df = pd.DataFrame({'A': [1, 2, 3]})
-
- >>> df.assign(B=df.A, C=lambda x:x['A']+ x['B'])
- A B C
- 0 1 1 2
- 1 2 2 4
- 2 3 3 6
+ >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32)
+ temp_c temp_f
+ Portland 17.0 62.6
+ Berkeley 25.0 77.0
+
+ Alternatively, the same behavior can be achieved by directly
+ referencing an existing Series or sequence:
+ >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32)
+ temp_c temp_f
+ Portland 17.0 62.6
+ Berkeley 25.0 77.0
+
+ In Python 3.6+, you can create multiple columns within the same assign
+ where one of the columns depends on another one defined within the same
+ assign:
+ >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
+ ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9)
+ temp_c temp_f temp_k
+ Portland 17.0 62.6 290.15
+ Berkeley 25.0 77.0 298.15
"""
data = self.copy()
@@ -3570,7 +3629,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
fill_axis=fill_axis,
broadcast_axis=broadcast_axis)
- @Appender(_shared_docs['reindex'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(NDFrame.reindex.__doc__)
@rewrite_axis_style_signature('labels', [('method', None),
('copy', True),
('level', None),
@@ -4420,7 +4480,8 @@ def f(vals):
# ----------------------------------------------------------------------
# Sorting
- @Appender(_shared_docs['sort_values'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(NDFrame.sort_values.__doc__)
def sort_values(self, by, axis=0, ascending=True, inplace=False,
kind='quicksort', na_position='last'):
inplace = validate_bool_kwarg(inplace, 'inplace')
@@ -4462,7 +4523,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
else:
return self._constructor(new_data).__finalize__(self)
- @Appender(_shared_docs['sort_index'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(NDFrame.sort_index.__doc__)
def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
kind='quicksort', na_position='last', sort_remaining=True,
by=None):
@@ -4827,7 +4889,7 @@ def _arith_op(left, right):
left, right = ops.fill_binop(left, right, fill_value)
return func(left, right)
- if this._is_mixed_type or other._is_mixed_type:
+ if ops.should_series_dispatch(this, other, func):
# iterate over columns
return ops.dispatch_to_series(this, other, _arith_op)
else:
@@ -6652,10 +6714,14 @@ def corr(self, method='pearson', min_periods=1):
Parameters
----------
- method : {'pearson', 'kendall', 'spearman'}
+ method : {'pearson', 'kendall', 'spearman'} or callable
* pearson : standard correlation coefficient
* kendall : Kendall Tau correlation coefficient
* spearman : Spearman rank correlation
+ * callable: callable with input two 1d ndarrays
+ and returning a float
+ .. versionadded:: 0.24.0
+
min_periods : int, optional
Minimum number of observations required per pair of columns
to have a valid result. Currently only available for pearson
@@ -6664,6 +6730,18 @@ def corr(self, method='pearson', min_periods=1):
Returns
-------
y : DataFrame
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> histogram_intersection = lambda a, b: np.minimum(a, b
+ ... ).sum().round(decimals=1)
+ >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)],
+ ... columns=['dogs', 'cats'])
+ >>> df.corr(method=histogram_intersection)
+ dogs cats
+ dogs 1.0 0.3
+ cats 0.3 1.0
"""
numeric_df = self._get_numeric_data()
cols = numeric_df.columns
@@ -6675,7 +6753,7 @@ def corr(self, method='pearson', min_periods=1):
elif method == 'spearman':
correl = libalgos.nancorr_spearman(ensure_float64(mat),
minp=min_periods)
- elif method == 'kendall':
+ elif method == 'kendall' or callable(method):
if min_periods is None:
min_periods = 1
mat = ensure_float64(mat).T
@@ -7228,38 +7306,82 @@ def _get_agg_axis(self, axis_num):
def mode(self, axis=0, numeric_only=False, dropna=True):
"""
- Gets the mode(s) of each element along the axis selected. Adds a row
- for each mode per label, fills in gaps with nan.
+ Get the mode(s) of each element along the selected axis.
- Note that there could be multiple values returned for the selected
- axis (when more than one item share the maximum frequency), which is
- the reason why a dataframe is returned. If you want to impute missing
- values with the mode in a dataframe ``df``, you can just do this:
- ``df.fillna(df.mode().iloc[0])``
+ The mode of a set of values is the value that appears most often.
+ It can be multiple values.
Parameters
----------
axis : {0 or 'index', 1 or 'columns'}, default 0
+ The axis to iterate over while searching for the mode:
+
* 0 or 'index' : get mode of each column
* 1 or 'columns' : get mode of each row
- numeric_only : boolean, default False
- if True, only apply to numeric columns
- dropna : boolean, default True
+ numeric_only : bool, default False
+ If True, only apply to numeric columns.
+ dropna : bool, default True
Don't consider counts of NaN/NaT.
.. versionadded:: 0.24.0
Returns
-------
- modes : DataFrame (sorted)
+ DataFrame
+ The modes of each column or row.
+
+ See Also
+ --------
+ Series.mode : Return the highest frequency value in a Series.
+ Series.value_counts : Return the counts of values in a Series.
Examples
--------
- >>> df = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 3]})
+ >>> df = pd.DataFrame([('bird', 2, 2),
+ ... ('mammal', 4, np.nan),
+ ... ('arthropod', 8, 0),
+ ... ('bird', 2, np.nan)],
+ ... index=('falcon', 'horse', 'spider', 'ostrich'),
+ ... columns=('species', 'legs', 'wings'))
+ >>> df
+ species legs wings
+ falcon bird 2 2.0
+ horse mammal 4 NaN
+ spider arthropod 8 0.0
+ ostrich bird 2 NaN
+
+ By default, missing values are not considered, and the mode of wings
+ are both 0 and 2. The second row of species and legs contains ``NaN``,
+ because they have only one mode, but the DataFrame has two rows.
+
>>> df.mode()
- A
- 0 1
- 1 2
+ species legs wings
+ 0 bird 2.0 0.0
+ 1 NaN NaN 2.0
+
+ Setting ``dropna=False`` ``NaN`` values are considered and they can be
+ the mode (like for wings).
+
+ >>> df.mode(dropna=False)
+ species legs wings
+ 0 bird 2 NaN
+
+ Setting ``numeric_only=True``, only the mode of numeric columns is
+ computed, and columns of other types are ignored.
+
+ >>> df.mode(numeric_only=True)
+ legs wings
+ 0 2.0 0.0
+ 1 NaN 2.0
+
+ To compute the mode over columns and not rows, use the axis parameter:
+
+ >>> df.mode(axis='columns', numeric_only=True)
+ 0 1
+ falcon 2.0 NaN
+ horse 4.0 NaN
+ spider 0.0 8.0
+ ostrich 2.0 NaN
"""
data = self if not numeric_only else self._get_numeric_data()
@@ -7431,52 +7553,66 @@ def to_period(self, freq=None, axis=0, copy=True):
def isin(self, values):
"""
- Return boolean DataFrame showing whether each element in the
- DataFrame is contained in values.
+ Whether each element in the DataFrame is contained in values.
Parameters
----------
- values : iterable, Series, DataFrame or dictionary
+ values : iterable, Series, DataFrame or dict
The result will only be true at a location if all the
labels match. If `values` is a Series, that's the index. If
- `values` is a dictionary, the keys must be the column names,
+ `values` is a dict, the keys must be the column names,
which must match. If `values` is a DataFrame,
then both the index and column labels must match.
Returns
-------
+ DataFrame
+ DataFrame of booleans showing whether each element in the DataFrame
+ is contained in values.
- DataFrame of booleans
+ See Also
+ --------
+ DataFrame.eq: Equality test for DataFrame.
+ Series.isin: Equivalent method on Series.
+ Series.str.contains: Test if pattern or regex is contained within a
+ string of a Series or Index.
Examples
--------
- When ``values`` is a list:
-
- >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})
- >>> df.isin([1, 3, 12, 'a'])
- A B
- 0 True True
- 1 False False
- 2 True False
-
- When ``values`` is a dict:
-
- >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 4, 7]})
- >>> df.isin({'A': [1, 3], 'B': [4, 7, 12]})
- A B
- 0 True False # Note that B didn't match the 1 here.
- 1 False True
- 2 True True
-
- When ``values`` is a Series or DataFrame:
-
- >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})
- >>> df2 = pd.DataFrame({'A': [1, 3, 3, 2], 'B': ['e', 'f', 'f', 'e']})
- >>> df.isin(df2)
- A B
- 0 True False
- 1 False False # Column A in `df2` has a 3, but not at index 1.
- 2 True True
+
+ >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
+ ... index=['falcon', 'dog'])
+ >>> df
+ num_legs num_wings
+ falcon 2 2
+ dog 4 0
+
+ When ``values`` is a list check whether every value in the DataFrame
+ is present in the list (which animals have 0 or 2 legs or wings)
+
+ >>> df.isin([0, 2])
+ num_legs num_wings
+ falcon True True
+ dog False True
+
+ When ``values`` is a dict, we can pass values to check for each
+ column separately:
+
+ >>> df.isin({'num_wings': [0, 3]})
+ num_legs num_wings
+ falcon False False
+ dog False True
+
+ When ``values`` is a Series or DataFrame the index and column must
+ match. Note that 'falcon' does not match based on the number of legs
+ in df2.
+
+ >>> other = pd.DataFrame({'num_legs': [8, 2],'num_wings': [0, 2]},
+ ... index=['spider', 'falcon'])
+ >>> df.isin(other)
+ num_legs num_wings
+ falcon True True
+ dog False False
"""
if isinstance(values, dict):
from pandas.core.reshape.concat import concat
@@ -7614,7 +7750,7 @@ def convert(v):
values = np.array([convert(v) for v in values])
else:
values = convert(values)
- except:
+ except (ValueError, TypeError):
values = convert(values)
else:
@@ -7655,7 +7791,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None):
if isinstance(data[0], (list, tuple)):
return _list_to_arrays(data, columns, coerce_float=coerce_float,
dtype=dtype)
- elif isinstance(data[0], collections.Mapping):
+ elif isinstance(data[0], compat.Mapping):
return _list_of_dict_to_arrays(data, columns,
coerce_float=coerce_float, dtype=dtype)
elif isinstance(data[0], Series):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index cdc5b4310bce2..cc157cc7228a8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -643,7 +643,8 @@ def _set_axis(self, axis, labels):
self._data.set_axis(axis, labels)
self._clear_item_cache()
- _shared_docs['transpose'] = """
+ def transpose(self, *args, **kwargs):
+ """
Permute the dimensions of the %(klass)s
Parameters
@@ -663,9 +664,6 @@ def _set_axis(self, axis, labels):
y : same as input
"""
- @Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
- def transpose(self, *args, **kwargs):
-
# construct the args
axes, kwargs = self._construct_axes_from_arguments(args, kwargs,
require_all=True)
@@ -965,9 +963,8 @@ def swaplevel(self, i=-2, j=-1, axis=0):
# ----------------------------------------------------------------------
# Rename
- # TODO: define separate funcs for DataFrame, Series and Panel so you can
- # get completion on keyword arguments.
- _shared_docs['rename'] = """
+ def rename(self, *args, **kwargs):
+ """
Alter axes input function or functions. Function / dict values must be
unique (1-to-1). Labels not contained in a dict / Series will be left
as-is. Extra labels listed don't throw an error. Alternatively, change
@@ -975,13 +972,11 @@ def swaplevel(self, i=-2, j=-1, axis=0):
Parameters
----------
- %(optional_mapper)s
%(axes)s : scalar, list-like, dict-like or function, optional
Scalar or list-like will alter the ``Series.name`` attribute,
and raise on DataFrame or Panel.
dict-like or functions are transformations to apply to
that axis' values
- %(optional_axis)s
copy : boolean, default True
Also copy underlying data
inplace : boolean, default False
@@ -1069,12 +1064,6 @@ def swaplevel(self, i=-2, j=-1, axis=0):
See the :ref:`user guide ` for more.
"""
-
- @Appender(_shared_docs['rename'] % dict(axes='axes keywords for this'
- ' object', klass='NDFrame',
- optional_mapper='',
- optional_axis=''))
- def rename(self, *args, **kwargs):
axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
copy = kwargs.pop('copy', True)
inplace = kwargs.pop('inplace', False)
@@ -1127,8 +1116,6 @@ def f(x):
else:
return result.__finalize__(self)
- rename.__doc__ = _shared_docs['rename']
-
def rename_axis(self, mapper, axis=0, copy=True, inplace=False):
"""
Alter the name of the index or columns.
@@ -1979,11 +1966,17 @@ def _repr_latex_(self):
If you wish to write to more than one sheet in the workbook, it is
necessary to specify an ExcelWriter object:
- >>> writer = pd.ExcelWriter('output2.xlsx', engine='xlsxwriter')
- >>> df1.to_excel(writer, sheet_name='Sheet1')
>>> df2 = df1.copy()
- >>> df2.to_excel(writer, sheet_name='Sheet2')
- >>> writer.save()
+ >>> with pd.ExcelWriter('output.xlsx') as writer:
+ ... df1.to_excel(writer, sheet_name='Sheet_name_1')
+ ... df2.to_excel(writer, sheet_name='Sheet_name_2')
+
+ To set the library that is used to write the Excel file,
+ you can pass the `engine` keyword (the default engine is
+ automatically chosen depending on the file extension):
+
+ >>> df1.to_excel('output1.xlsx', engine='xlsxwriter')
+
"""
def to_json(self, path_or_buf=None, orient=None, date_format=None,
@@ -2054,10 +2047,12 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
like.
.. versionadded:: 0.19.0
- compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
- default 'infer'
+
+ compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
+
A string representing the compression to use in the output file,
- only used when the first argument is a filename.
+ only used when the first argument is a filename. By default, the
+ compression is inferred from the filename.
.. versionadded:: 0.21.0
.. versionchanged:: 0.24.0
@@ -2494,80 +2489,70 @@ def to_xarray(self):
Returns
-------
- a DataArray for a Series
- a Dataset for a DataFrame
- a DataArray for higher dims
+ xarray.DataArray or xarray.Dataset
+ Data in the pandas structure converted to Dataset if the object is
+ a DataFrame, or a DataArray if the object is a Series.
+
+ See Also
+ --------
+ DataFrame.to_hdf : Write DataFrame to an HDF5 file.
+ DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
Examples
--------
- >>> df = pd.DataFrame({'A' : [1, 1, 2],
- 'B' : ['foo', 'bar', 'foo'],
- 'C' : np.arange(4.,7)})
+ >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2),
+ ... ('parrot', 'bird', 24.0, 2),
+ ... ('lion', 'mammal', 80.5, 4),
+ ... ('monkey', 'mammal', np.nan, 4)],
+ ... columns=['name', 'class', 'max_speed',
+ ... 'num_legs'])
>>> df
- A B C
- 0 1 foo 4.0
- 1 1 bar 5.0
- 2 2 foo 6.0
+ name class max_speed num_legs
+ 0 falcon bird 389.0 2
+ 1 parrot bird 24.0 2
+ 2 lion mammal 80.5 4
+ 3 monkey mammal NaN 4
>>> df.to_xarray()
- Dimensions: (index: 3)
+ Dimensions: (index: 4)
Coordinates:
- * index (index) int64 0 1 2
+ * index (index) int64 0 1 2 3
Data variables:
- A (index) int64 1 1 2
- B (index) object 'foo' 'bar' 'foo'
- C (index) float64 4.0 5.0 6.0
-
- >>> df = pd.DataFrame({'A' : [1, 1, 2],
- 'B' : ['foo', 'bar', 'foo'],
- 'C' : np.arange(4.,7)}
- ).set_index(['B','A'])
- >>> df
- C
- B A
- foo 1 4.0
- bar 1 5.0
- foo 2 6.0
-
- >>> df.to_xarray()
+ name (index) object 'falcon' 'parrot' 'lion' 'monkey'
+ class (index) object 'bird' 'bird' 'mammal' 'mammal'
+ max_speed (index) float64 389.0 24.0 80.5 nan
+ num_legs (index) int64 2 2 4 4
+
+ >>> df['max_speed'].to_xarray()
+
+ array([389. , 24. , 80.5, nan])
+ Coordinates:
+ * index (index) int64 0 1 2 3
+
+ >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01',
+ ... '2018-01-02', '2018-01-02'])
+ >>> df_multiindex = pd.DataFrame({'date': dates,
+ ... 'animal': ['falcon', 'parrot', 'falcon',
+ ... 'parrot'],
+ ... 'speed': [350, 18, 361, 15]}).set_index(['date',
+ ... 'animal'])
+ >>> df_multiindex
+ speed
+ date animal
+ 2018-01-01 falcon 350
+ parrot 18
+ 2018-01-02 falcon 361
+ parrot 15
+
+ >>> df_multiindex.to_xarray()
- Dimensions: (A: 2, B: 2)
+ Dimensions: (animal: 2, date: 2)
Coordinates:
- * B (B) object 'bar' 'foo'
- * A (A) int64 1 2
+ * date (date) datetime64[ns] 2018-01-01 2018-01-02
+ * animal (animal) object 'falcon' 'parrot'
Data variables:
- C (B, A) float64 5.0 nan 4.0 6.0
-
- >>> p = pd.Panel(np.arange(24).reshape(4,3,2),
- items=list('ABCD'),
- major_axis=pd.date_range('20130101', periods=3),
- minor_axis=['first', 'second'])
- >>> p
-
- Dimensions: 4 (items) x 3 (major_axis) x 2 (minor_axis)
- Items axis: A to D
- Major_axis axis: 2013-01-01 00:00:00 to 2013-01-03 00:00:00
- Minor_axis axis: first to second
-
- >>> p.to_xarray()
-
- array([[[ 0, 1],
- [ 2, 3],
- [ 4, 5]],
- [[ 6, 7],
- [ 8, 9],
- [10, 11]],
- [[12, 13],
- [14, 15],
- [16, 17]],
- [[18, 19],
- [20, 21],
- [22, 23]]])
- Coordinates:
- * items (items) object 'A' 'B' 'C' 'D'
- * major_axis (major_axis) datetime64[ns] 2013-01-01 2013-01-02 2013-01-03 # noqa
- * minor_axis (minor_axis) object 'first' 'second'
+ speed (date, animal) int64 350 18 361 15
Notes
-----
@@ -3026,7 +3011,8 @@ def __delitem__(self, key):
except KeyError:
pass
- _shared_docs['_take'] = """
+ def _take(self, indices, axis=0, is_copy=True):
+ """
Return the elements in the given *positional* indices along an axis.
This means that we are not indexing according to actual values in
@@ -3057,9 +3043,6 @@ def __delitem__(self, key):
numpy.ndarray.take
numpy.take
"""
-
- @Appender(_shared_docs['_take'])
- def _take(self, indices, axis=0, is_copy=True):
self._consolidate_inplace()
new_data = self._data.take(indices,
@@ -3074,7 +3057,8 @@ def _take(self, indices, axis=0, is_copy=True):
return result
- _shared_docs['take'] = """
+ def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs):
+ """
Return the elements in the given *positional* indices along an axis.
This means that we are not indexing according to actual values in
@@ -3157,9 +3141,6 @@ class max_speed
1 monkey mammal NaN
3 lion mammal 80.5
"""
-
- @Appender(_shared_docs['take'])
- def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs):
if convert is not None:
msg = ("The 'convert' parameter is deprecated "
"and will be removed in a future version.")
@@ -3582,7 +3563,9 @@ def add_suffix(self, suffix):
mapper = {self._info_axis_name: f}
return self.rename(**mapper)
- _shared_docs['sort_values'] = """
+ def sort_values(self, by=None, axis=0, ascending=True, inplace=False,
+ kind='quicksort', na_position='last'):
+ """
Sort by the values along either axis
Parameters
@@ -3667,17 +3650,12 @@ def add_suffix(self, suffix):
0 A 2 0
1 A 1 1
"""
-
- def sort_values(self, by=None, axis=0, ascending=True, inplace=False,
- kind='quicksort', na_position='last'):
- """
- NOT IMPLEMENTED: do not call this method, as sorting values is not
- supported for Panel objects and will raise an error.
- """
raise NotImplementedError("sort_values has not been implemented "
"on Panel or Panel4D objects.")
- _shared_docs['sort_index'] = """
+ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
+ kind='quicksort', na_position='last', sort_remaining=True):
+ """
Sort object by labels (along an axis)
Parameters
@@ -3705,10 +3683,6 @@ def sort_values(self, by=None, axis=0, ascending=True, inplace=False,
-------
sorted_obj : %(klass)s
"""
-
- @Appender(_shared_docs['sort_index'] % dict(axes="axes", klass="NDFrame"))
- def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
- kind='quicksort', na_position='last', sort_remaining=True):
inplace = validate_bool_kwarg(inplace, 'inplace')
axis = self._get_axis_number(axis)
axis_name = self._get_axis_name(axis)
@@ -3726,7 +3700,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
new_axis = labels.take(sort_index)
return self.reindex(**{axis_name: new_axis})
- _shared_docs['reindex'] = """
+ def reindex(self, *args, **kwargs):
+ """
Conform %(klass)s to new index with optional filling logic, placing
NA/NaN in locations having no value in the previous index. A new object
is produced unless the new index is equivalent to the current one and
@@ -3922,14 +3897,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
-------
reindexed : %(klass)s
"""
-
- # TODO: Decide if we care about having different examples for different
- # kinds
-
- @Appender(_shared_docs['reindex'] % dict(axes="axes", klass="NDFrame",
- optional_labels="",
- optional_axis=""))
- def reindex(self, *args, **kwargs):
+ # TODO: Decide if we care about having different examples for different
+ # kinds
# construct the args
axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
@@ -4545,17 +4514,16 @@ def pipe(self, func, *args, **kwargs):
Parameters
----------
- func : function, string, dictionary, or list of string/functions
+ func : function, str, list or dict
Function to use for aggregating the data. If a function, must either
- work when passed a %(klass)s or when passed to %(klass)s.apply. For
- a DataFrame, can pass a dict, if the keys are DataFrame column names.
+ work when passed a %(klass)s or when passed to %(klass)s.apply.
Accepted combinations are:
- - string function name.
- - function.
- - list of functions.
- - dict of column names -> functions (or list of functions).
+ - function
+ - string function name
+ - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+ - dict of axis labels -> functions, function names or list of such.
%(axis)s
*args
Positional arguments to pass to `func`.
@@ -4564,7 +4532,11 @@ def pipe(self, func, *args, **kwargs):
Returns
-------
- aggregated : %(klass)s
+ DataFrame, Series or scalar
+ if DataFrame.agg is called with a single function, returns a Series
+ if DataFrame.agg is called with several functions, returns a DataFrame
+ if Series.agg is called with single function, returns a scalar
+ if Series.agg is called with several functions, returns a Series
Notes
-----
@@ -4574,50 +4546,71 @@ def pipe(self, func, *args, **kwargs):
""")
_shared_docs['transform'] = ("""
- Call function producing a like-indexed %(klass)s
- and return a %(klass)s with the transformed values
+ Call ``func`` on self producing a %(klass)s with transformed values
+ and that has the same axis length as self.
.. versionadded:: 0.20.0
Parameters
----------
- func : callable, string, dictionary, or list of string/callables
- To apply to column
+ func : function, str, list or dict
+ Function to use for transforming the data. If a function, must either
+ work when passed a %(klass)s or when passed to %(klass)s.apply.
- Accepted Combinations are:
+ Accepted combinations are:
- - string function name
- function
- - list of functions
- - dict of column names -> functions (or list of functions)
+ - string function name
+ - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']``
+ - dict of axis labels -> functions, function names or list of such.
+ %(axis)s
+ *args
+ Positional arguments to pass to `func`.
+ **kwargs
+ Keyword arguments to pass to `func`.
Returns
-------
- transformed : %(klass)s
+ %(klass)s
+ A %(klass)s that must have the same length as self.
- Examples
+ Raises
+ ------
+ ValueError : If the returned %(klass)s has a different length than self.
+
+ See Also
--------
- >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
- ... index=pd.date_range('1/1/2000', periods=10))
- df.iloc[3:7] = np.nan
-
- >>> df.transform(lambda x: (x - x.mean()) / x.std())
- A B C
- 2000-01-01 0.579457 1.236184 0.123424
- 2000-01-02 0.370357 -0.605875 -1.231325
- 2000-01-03 1.455756 -0.277446 0.288967
- 2000-01-04 NaN NaN NaN
- 2000-01-05 NaN NaN NaN
- 2000-01-06 NaN NaN NaN
- 2000-01-07 NaN NaN NaN
- 2000-01-08 -0.498658 1.274522 1.642524
- 2000-01-09 -0.540524 -1.012676 -0.828968
- 2000-01-10 -1.366388 -0.614710 0.005378
-
- See also
+ %(klass)s.agg : Only perform aggregating type operations.
+ %(klass)s.apply : Invoke function on a %(klass)s.
+
+ Examples
--------
- pandas.%(klass)s.aggregate
- pandas.%(klass)s.apply
+ >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)})
+ >>> df
+ A B
+ 0 0 1
+ 1 1 2
+ 2 2 3
+ >>> df.transform(lambda x: x + 1)
+ A B
+ 0 1 2
+ 1 2 3
+ 2 3 4
+
+ Even though the resulting %(klass)s must have the same length as the
+ input %(klass)s, it is possible to provide several input functions:
+
+ >>> s = pd.Series(range(3))
+ >>> s
+ 0 0
+ 1 1
+ 2 2
+ dtype: int64
+ >>> s.transform([np.sqrt, np.exp])
+ sqrt exp
+ 0 0.000000 1.000000
+ 1 1.000000 2.718282
+ 2 1.414214 7.389056
""")
# ----------------------------------------------------------------------
@@ -7041,8 +7034,12 @@ def clip_lower(self, threshold, axis=None, inplace=False):
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
group_keys=True, squeeze=False, observed=False, **kwargs):
"""
- Group series using mapper (dict or key function, apply given function
- to group, return result as series) or by a series of columns.
+ Group DataFrame or Series using a mapper or by a Series of columns.
+
+ A groupby operation involves some combination of splitting the
+ object, applying a function, and combining the results. This can be
+ used to group large amounts of data and compute operations on these
+ groups.
Parameters
----------
@@ -7055,54 +7052,95 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
values are used as-is determine the groups. A label or list of
labels may be passed to group by the columns in ``self``. Notice
that a tuple is interpreted a (single) key.
- axis : int, default 0
+ axis : {0 or 'index', 1 or 'columns'}, default 0
+ Split along rows (0) or columns (1).
level : int, level name, or sequence of such, default None
If the axis is a MultiIndex (hierarchical), group by a particular
- level or levels
- as_index : boolean, default True
+ level or levels.
+ as_index : bool, default True
For aggregated output, return object with group labels as the
index. Only relevant for DataFrame input. as_index=False is
- effectively "SQL-style" grouped output
- sort : boolean, default True
+ effectively "SQL-style" grouped output.
+ sort : bool, default True
Sort group keys. Get better performance by turning this off.
Note this does not influence the order of observations within each
- group. groupby preserves the order of rows within each group.
- group_keys : boolean, default True
- When calling apply, add group keys to index to identify pieces
- squeeze : boolean, default False
- reduce the dimensionality of the return type if possible,
- otherwise return a consistent type
- observed : boolean, default False
- This only applies if any of the groupers are Categoricals
+ group. Groupby preserves the order of rows within each group.
+ group_keys : bool, default True
+ When calling apply, add group keys to index to identify pieces.
+ squeeze : bool, default False
+ Reduce the dimensionality of the return type if possible,
+ otherwise return a consistent type.
+ observed : bool, default False
+ This only applies if any of the groupers are Categoricals.
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.
.. versionadded:: 0.23.0
+ **kwargs
+ Optional, only accepts keyword argument 'mutated' and is passed
+ to groupby.
+
Returns
-------
- GroupBy object
+ DataFrameGroupBy or SeriesGroupBy
+ Depends on the calling object and returns groupby object that
+ contains information about the groups.
- Examples
+ See Also
--------
- DataFrame results
-
- >>> data.groupby(func, axis=0).mean()
- >>> data.groupby(['col1', 'col2'])['col3'].mean()
-
- DataFrame with hierarchical index
-
- >>> data.groupby(['col1', 'col2']).mean()
+ resample : Convenience method for frequency conversion and resampling
+ of time series.
Notes
-----
See the `user guide
`_ for more.
- See also
+ Examples
--------
- resample : Convenience method for frequency conversion and resampling
- of time series.
+ >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon',
+ ... 'Parrot', 'Parrot'],
+ ... 'Max Speed' : [380., 370., 24., 26.]})
+ >>> df
+ Animal Max Speed
+ 0 Falcon 380.0
+ 1 Falcon 370.0
+ 2 Parrot 24.0
+ 3 Parrot 26.0
+ >>> df.groupby(['Animal']).mean()
+ Max Speed
+ Animal
+ Falcon 375.0
+ Parrot 25.0
+
+ **Hierarchical Indexes**
+
+ We can groupby different levels of a hierarchical index
+ using the `level` parameter:
+
+ >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+ ... ['Capitve', 'Wild', 'Capitve', 'Wild']]
+ >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+ >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]},
+ ... index=index)
+ >>> df
+ Max Speed
+ Animal Type
+ Falcon Capitve 390.0
+ Wild 350.0
+ Parrot Capitve 30.0
+ Wild 20.0
+ >>> df.groupby(level=0).mean()
+ Max Speed
+ Animal
+ Falcon 370.0
+ Parrot 25.0
+ >>> df.groupby(level=1).mean()
+ Max Speed
+ Type
+ Capitve 210.0
+ Wild 185.0
"""
from pandas.core.groupby.groupby import groupby
@@ -8268,6 +8306,11 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None,
See Notes.
axis : %(axes_single_arg)s
+ See Also
+ --------
+ Index.shift : Shift values of Index.
+ DatetimeIndex.shift : Shift values of DatetimeIndex.
+
Notes
-----
If freq is specified then the index values are shifted but the data
@@ -9401,7 +9444,7 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None,
cls.ewm = ewm
- @Appender(_shared_docs['transform'] % _shared_doc_kwargs)
+ @Appender(_shared_docs['transform'] % dict(axis="", **_shared_doc_kwargs))
def transform(self, func, *args, **kwargs):
result = self.agg(func, *args, **kwargs)
if is_scalar(result) or len(result) != len(self):
@@ -9476,80 +9519,110 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=None, date_format=None, doublequote=True,
escapechar=None, decimal='.'):
- r"""Write object to a comma-separated values (csv) file
+ r"""
+ Write object to a comma-separated values (csv) file.
+
+ .. versionchanged:: 0.24.0
+ The order of arguments for Series was changed.
Parameters
----------
- path_or_buf : string or file handle, default None
+ path_or_buf : str or file handle, default None
File path or object, if None is provided the result is returned as
a string.
+
.. versionchanged:: 0.24.0
- Was previously named "path" for Series.
- sep : character, default ','
- Field delimiter for the output file.
- na_rep : string, default ''
- Missing data representation
- float_format : string, default None
- Format string for floating point numbers
+
+ Was previously named "path" for Series.
+
+ sep : str, default ','
+ String of length 1. Field delimiter for the output file.
+ na_rep : str, default ''
+ Missing data representation.
+ float_format : str, default None
+ Format string for floating point numbers.
columns : sequence, optional
- Columns to write
- header : boolean or list of string, default True
+ Columns to write.
+ header : bool or list of str, default True
Write out the column names. If a list of strings is given it is
- assumed to be aliases for the column names
+ assumed to be aliases for the column names.
+
.. versionchanged:: 0.24.0
- Previously defaulted to False for Series.
- index : boolean, default True
- Write row names (index)
- index_label : string or sequence, or False, default None
+
+ Previously defaulted to False for Series.
+
+ index : bool, default True
+ Write row names (index).
+ index_label : str or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
- sequence should be given if the object uses MultiIndex. If
+ sequence should be given if the object uses MultiIndex. If
False do not print fields for index names. Use index_label=False
- for easier importing in R
+ for easier importing in R.
mode : str
- Python write mode, default 'w'
- encoding : string, optional
+ Python write mode, default 'w'.
+ encoding : str, optional
A string representing the encoding to use in the output file,
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
- compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None},
- default 'infer'
- If 'infer' and `path_or_buf` is path-like, then detect compression
- from the following extensions: '.gz', '.bz2', '.zip' or '.xz'
- (otherwise no compression).
+ compression : str, default 'infer'
+ Compression mode among the following possible values: {'infer',
+ 'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf`
+ is path-like, then detect compression from the following
+ extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no
+ compression).
.. versionchanged:: 0.24.0
- 'infer' option added and set to default
- line_terminator : string, default ``'\n'``
- The newline character or character sequence to use in the output
- file
+
+ 'infer' option added and set to default.
+
quoting : optional constant from csv module
- defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
+ Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
- will treat them as non-numeric
- quotechar : string (length 1), default '\"'
- character used to quote fields
- doublequote : boolean, default True
- Control quoting of `quotechar` inside a field
- escapechar : string (length 1), default None
- character used to escape `sep` and `quotechar` when appropriate
+ will treat them as non-numeric.
+ quotechar : str, default '\"'
+ String of length 1. Character used to quote fields.
+ line_terminator : string, default ``'\n'``
+ The newline character or character sequence to use in the output
+ file.
chunksize : int or None
- rows to write at a time
- tupleize_cols : boolean, default False
- .. deprecated:: 0.21.0
- This argument will be removed and will always write each row
- of the multi-index as a separate row in the CSV file.
-
+ Rows to write at a time.
+ tupleize_cols : bool, default False
Write MultiIndex columns as a list of tuples (if True) or in
the new, expanded format, where each MultiIndex column is a row
in the CSV (if False).
- date_format : string, default None
- Format string for datetime objects
- decimal: string, default '.'
+
+ .. deprecated:: 0.21.0
+ This argument will be removed and will always write each row
+ of the multi-index as a separate row in the CSV file.
+ date_format : str, default None
+ Format string for datetime objects.
+ doublequote : bool, default True
+ Control quoting of `quotechar` inside a field.
+ escapechar : str, default None
+ String of length 1. Character used to escape `sep` and `quotechar`
+ when appropriate.
+ decimal : str, default '.'
Character recognized as decimal separator. E.g. use ',' for
- European data
+ European data.
- .. versionchanged:: 0.24.0
- The order of arguments for Series was changed.
+ Returns
+ -------
+ None or str
+ If path_or_buf is None, returns the resulting csv format as a
+ string. Otherwise returns None.
+
+ See Also
+ --------
+ pandas.read_csv : Load a CSV file into a DataFrame.
+ pandas.to_excel : Load an Excel file into a DataFrame.
+
+ Examples
+ --------
+ >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
+ ... 'mask': ['red', 'purple'],
+ ... 'weapon': ['sai', 'bo staff']})
+ >>> df.to_csv(index=False)
+ 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
"""
df = self if isinstance(self, ABCDataFrame) else self.to_frame()
@@ -9651,15 +9724,15 @@ def _doc_parms(cls):
original index.
* None : reduce all axes, return a scalar.
+bool_only : boolean, default None
+ Include only boolean columns. If None, will attempt to use everything,
+ then use only boolean data. Not implemented for Series.
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA.
level : int or level name, default None
If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a %(name1)s.
-bool_only : boolean, default None
- Include only boolean columns. If None, will attempt to use everything,
- then use only boolean data. Not implemented for Series.
**kwargs : any, default None
Additional keywords have no effect but might be accepted for
compatibility with NumPy.
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 96c74f7fd4d75..ac84971de08d8 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -44,8 +44,15 @@ def _gotitem(self, key, ndim, subset=None):
# we need to make a shallow copy of ourselves
# with the same groupby
kwargs = {attr: getattr(self, attr) for attr in self._attributes}
+
+ # Try to select from a DataFrame, falling back to a Series
+ try:
+ groupby = self._groupby[key]
+ except IndexError:
+ groupby = self._groupby
+
self = self.__class__(subset,
- groupby=self._groupby[key],
+ groupby=groupby,
parent=self,
**kwargs)
self._reset_cache()
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 685635fb6854d..f15b1203a334e 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -758,7 +758,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
if isinstance(func_or_funcs, compat.string_types):
return getattr(self, func_or_funcs)(*args, **kwargs)
- if isinstance(func_or_funcs, collections.Iterable):
+ if isinstance(func_or_funcs, compat.Iterable):
# Catch instances of lists / tuples
# but not the class list / tuple itself.
ret = self._aggregate_multiple_funcs(func_or_funcs,
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ba04ff3a3d3ee..d9f7b4d9c31c3 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -23,6 +23,7 @@
ensure_float64,
ensure_platform_int,
ensure_int64,
+ ensure_int64_or_float64,
ensure_object,
needs_i8_conversion,
is_integer_dtype,
@@ -471,7 +472,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
if (values == iNaT).any():
values = ensure_float64(values)
else:
- values = values.astype('int64', copy=False)
+ values = ensure_int64_or_float64(values)
elif is_numeric and not is_complex_dtype(values):
values = ensure_float64(values)
else:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ca381160de0df..af04a846ed787 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -98,6 +98,7 @@ def cmp_method(self, other):
# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
with warnings.catch_warnings(record=True):
+ warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all='ignore'):
result = op(self.values, np.asarray(other))
@@ -1113,7 +1114,7 @@ def to_series(self, index=None, name=None):
if name is None:
name = self.name
- return Series(self._to_embed(), index=index, name=name)
+ return Series(self.values.copy(), index=index, name=name)
def to_frame(self, index=True, name=None):
"""
@@ -1176,18 +1177,6 @@ def to_frame(self, index=True, name=None):
result.index = self
return result
- def _to_embed(self, keep_tz=False, dtype=None):
- """
- *this is an internal non-public method*
-
- return an array repr of this object, potentially casting to object
-
- """
- if dtype is not None:
- return self.astype(dtype)._to_embed(keep_tz=keep_tz)
-
- return self.values.copy()
-
_index_shared_docs['astype'] = """
Create an Index with values cast to dtypes. The class of a new Index
is determined by dtype. When conversion is impossible, a ValueError
@@ -3109,7 +3098,6 @@ def get_loc(self, key, method=None, tolerance=None):
return self._engine.get_loc(key)
except KeyError:
return self._engine.get_loc(self._maybe_cast_indexer(key))
-
indexer = self.get_indexer([key], method=method, tolerance=tolerance)
if indexer.ndim > 1 or indexer.size > 1:
raise TypeError('get_loc requires scalar valued input')
@@ -4475,10 +4463,6 @@ def insert(self, loc, item):
-------
new_index : Index
"""
- if is_scalar(item) and isna(item):
- # GH 18295
- item = self._na_value
-
_self = np.asarray(self)
item = self._coerce_scalar_to_index(item)._ndarray_values
idx = np.concatenate((_self[:loc], item, _self[loc:]))
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 3f8c07fe7cd21..37a12a588db03 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -11,7 +11,7 @@
import numpy as np
from pandas._libs import lib, iNaT, NaT
-from pandas._libs.tslibs.timestamps import round_ns
+from pandas._libs.tslibs.timestamps import round_nsint64, RoundTo
from pandas.core.dtypes.common import (
ensure_int64,
@@ -99,6 +99,18 @@ class TimelikeOps(object):
frequency like 'S' (second) not 'ME' (month end). See
:ref:`frequency aliases ` for
a list of possible `freq` values.
+ ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
+ - 'infer' will attempt to infer fall dst-transition hours based on
+ order
+ - bool-ndarray where True signifies a DST time, False designates
+ a non-DST time (note that this flag is only applicable for
+ ambiguous times)
+ - 'NaT' will return NaT where there are ambiguous times
+ - 'raise' will raise an AmbiguousTimeError if there are ambiguous
+ times
+ Only relevant for DatetimeIndex
+
+ .. versionadded:: 0.24.0
Returns
-------
@@ -168,10 +180,10 @@ class TimelikeOps(object):
"""
)
- def _round(self, freq, rounder):
+ def _round(self, freq, mode, ambiguous):
# round the local times
values = _ensure_datetimelike_to_i8(self)
- result = round_ns(values, rounder, freq)
+ result = round_nsint64(values, mode, freq)
result = self._maybe_mask_results(result, fill_value=NaT)
attribs = self._get_attributes_dict()
@@ -180,19 +192,20 @@ def _round(self, freq, rounder):
if 'tz' in attribs:
attribs['tz'] = None
return self._ensure_localized(
- self._shallow_copy(result, **attribs))
+ self._shallow_copy(result, **attribs), ambiguous
+ )
@Appender((_round_doc + _round_example).format(op="round"))
- def round(self, freq, *args, **kwargs):
- return self._round(freq, np.round)
+ def round(self, freq, ambiguous='raise'):
+ return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
@Appender((_round_doc + _floor_example).format(op="floor"))
- def floor(self, freq):
- return self._round(freq, np.floor)
+ def floor(self, freq, ambiguous='raise'):
+ return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
@Appender((_round_doc + _ceil_example).format(op="ceil"))
- def ceil(self, freq):
- return self._round(freq, np.ceil)
+ def ceil(self, freq, ambiguous='raise'):
+ return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
@@ -264,7 +277,7 @@ def _evaluate_compare(self, other, op):
except TypeError:
return result
- def _ensure_localized(self, result):
+ def _ensure_localized(self, arg, ambiguous='raise', from_utc=False):
"""
ensure that we are re-localized
@@ -273,7 +286,11 @@ def _ensure_localized(self, result):
Parameters
----------
- result : DatetimeIndex / i8 ndarray
+ arg : DatetimeIndex / i8 ndarray
+ ambiguous : str, bool, or bool-ndarray, default 'raise'
+ from_utc : bool, default False
+ If True, localize the i8 ndarray to UTC first before converting to
+ the appropriate tz. If False, localize directly to the tz.
Returns
-------
@@ -282,10 +299,13 @@ def _ensure_localized(self, result):
# reconvert to local tz
if getattr(self, 'tz', None) is not None:
- if not isinstance(result, ABCIndexClass):
- result = self._simple_new(result)
- result = result.tz_localize(self.tz)
- return result
+ if not isinstance(arg, ABCIndexClass):
+ arg = self._simple_new(arg)
+ if from_utc:
+ arg = arg.tz_localize('UTC').tz_convert(self.tz)
+ else:
+ arg = arg.tz_localize(self.tz, ambiguous=ambiguous)
+ return arg
def _box_values_as_index(self):
"""
@@ -607,11 +627,11 @@ def repeat(self, repeats, *args, **kwargs):
@Appender(_index_shared_docs['where'] % _index_doc_kwargs)
def where(self, cond, other=None):
- other = _ensure_datetimelike_to_i8(other)
- values = _ensure_datetimelike_to_i8(self)
+ other = _ensure_datetimelike_to_i8(other, to_utc=True)
+ values = _ensure_datetimelike_to_i8(self, to_utc=True)
result = np.where(cond, values, other).astype('i8')
- result = self._ensure_localized(result)
+ result = self._ensure_localized(result, from_utc=True)
return self._shallow_copy(result,
**self._get_attributes_dict())
@@ -680,23 +700,37 @@ def astype(self, dtype, copy=True):
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
-def _ensure_datetimelike_to_i8(other):
- """ helper for coercing an input scalar or array to i8 """
+def _ensure_datetimelike_to_i8(other, to_utc=False):
+ """
+ helper for coercing an input scalar or array to i8
+
+ Parameters
+ ----------
+ other : 1d array
+ to_utc : bool, default False
+ If True, convert the values to UTC before extracting the i8 values
+ If False, extract the i8 values directly.
+
+ Returns
+ -------
+ i8 1d array
+ """
if is_scalar(other) and isna(other):
- other = iNaT
+ return iNaT
elif isinstance(other, ABCIndexClass):
# convert tz if needed
if getattr(other, 'tz', None) is not None:
- other = other.tz_localize(None).asi8
- else:
- other = other.asi8
+ if to_utc:
+ other = other.tz_convert('UTC')
+ else:
+ other = other.tz_localize(None)
else:
try:
- other = np.array(other, copy=False).view('i8')
+ return np.array(other, copy=False).view('i8')
except TypeError:
# period array cannot be coerces to int
- other = Index(other).asi8
- return other
+ other = Index(other)
+ return other.asi8
def wrap_arithmetic_op(self, other, result):
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 46741ab15aa31..a6cdaa0c2163a 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -665,23 +665,13 @@ def to_series(self, keep_tz=False, index=None, name=None):
if name is None:
name = self.name
- return Series(self._to_embed(keep_tz), index=index, name=name)
-
- def _to_embed(self, keep_tz=False, dtype=None):
- """
- return an array repr of this object, potentially casting to object
-
- This is for internal compat
- """
- if dtype is not None:
- return self.astype(dtype)._to_embed(keep_tz=keep_tz)
-
if keep_tz and self.tz is not None:
-
# preserve the tz & copy
- return self.copy(deep=True)
+ values = self.copy(deep=True)
+ else:
+ values = self.values.copy()
- return self.values.copy()
+ return Series(values, index=index, name=name)
def to_period(self, freq=None):
"""
@@ -860,8 +850,6 @@ def union_many(self, others):
if isinstance(this, DatetimeIndex):
this._tz = timezones.tz_standardize(tz)
- if this.freq is None:
- this.freq = to_offset(this.inferred_freq)
return this
def join(self, other, how='left', level=None, return_indexers=False,
diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py
index 5a37e03b700f9..289970aaf3a82 100644
--- a/pandas/core/indexes/frozen.py
+++ b/pandas/core/indexes/frozen.py
@@ -139,7 +139,7 @@ def searchsorted(self, value, side="left", sorter=None):
# xref: https://github.com/numpy/numpy/issues/5370
try:
value = self.dtype.type(value)
- except:
+ except ValueError:
pass
return super(FrozenNDArray, self).searchsorted(
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index a7932f667f6de..119a607fc0e68 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -6,6 +6,7 @@
import numpy as np
from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp
+from pandas._libs import tslibs
from pandas.compat import range, zip, lrange, lzip, map
from pandas.compat.numpy import function as nv
@@ -288,6 +289,28 @@ def _verify_integrity(self, labels=None, levels=None):
def levels(self):
return self._levels
+ @property
+ def _is_homogeneous_type(self):
+ """Whether the levels of a MultiIndex all have the same dtype.
+
+ This looks at the dtypes of the levels.
+
+ See Also
+ --------
+ Index._is_homogeneous_type
+ DataFrame._is_homogeneous_type
+
+ Examples
+ --------
+ >>> MultiIndex.from_tuples([
+ ... ('a', 'b'), ('a', 'c')])._is_homogeneous_type
+ True
+ >>> MultiIndex.from_tuples([
+ ... ('a', 1), ('a', 2)])._is_homogeneous_type
+ False
+ """
+ return len({x.dtype for x in self.levels}) <= 1
+
def _set_levels(self, levels, level=None, copy=False, validate=True,
verify_integrity=False):
# This is NOT part of the levels property because it should be
@@ -980,12 +1003,13 @@ def _try_mi(k):
return _try_mi(key)
except (KeyError):
raise
- except:
+ except (IndexError, ValueError, TypeError):
pass
try:
return _try_mi(Timestamp(key))
- except:
+ except (KeyError, TypeError,
+ IndexError, ValueError, tslibs.OutOfBoundsDatetime):
pass
raise InvalidIndexError(key)
@@ -1664,7 +1688,7 @@ def append(self, other):
# if all(isinstance(x, MultiIndex) for x in other):
try:
return MultiIndex.from_tuples(new_tuples, names=self.names)
- except:
+ except (TypeError, IndexError):
return Index(new_tuples)
def argsort(self, *args, **kwargs):
@@ -2293,7 +2317,7 @@ def maybe_droplevels(indexer, levels, drop_level):
for i in sorted(levels, reverse=True):
try:
new_index = new_index.droplevel(i)
- except:
+ except ValueError:
# no dropping here
return orig_index
@@ -2796,7 +2820,7 @@ def _convert_can_do_setop(self, other):
msg = 'other must be a MultiIndex or a list of tuples'
try:
other = MultiIndex.from_tuples(other)
- except:
+ except TypeError:
raise TypeError(msg)
else:
result_names = self.names if self.names == other.names else None
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index e0627432cbc2e..7f64fb744c682 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -6,9 +6,11 @@
pandas_dtype,
needs_i8_conversion,
is_integer_dtype,
+ is_float,
is_bool,
is_bool_dtype,
is_scalar)
+from pandas.core.dtypes.missing import isna
from pandas import compat
from pandas.core import algorithms
@@ -114,6 +116,13 @@ def is_all_dates(self):
"""
return False
+ @Appender(Index.insert.__doc__)
+ def insert(self, loc, item):
+ # treat NA values as nans:
+ if is_scalar(item) and isna(item):
+ item = self._na_value
+ return super(NumericIndex, self).insert(loc, item)
+
_num_index_shared_docs['class_descr'] = """
Immutable ndarray implementing an ordered, sliceable set. The basic object
@@ -154,7 +163,25 @@ def is_all_dates(self):
)
-class Int64Index(NumericIndex):
+class IntegerIndex(NumericIndex):
+ """
+ This is an abstract class for Int64Index, UInt64Index.
+ """
+
+ def __contains__(self, key):
+ """
+ Check if key is a float and has a decimal. If it has, return False.
+ """
+ hash(key)
+ try:
+ if is_float(key) and int(key) != key:
+ return False
+ return key in self._engine
+ except (OverflowError, TypeError, ValueError):
+ return False
+
+
+class Int64Index(IntegerIndex):
__doc__ = _num_index_shared_docs['class_descr'] % _int64_descr_args
_typ = 'int64index'
@@ -212,7 +239,7 @@ def _assert_safe_casting(cls, data, subarr):
)
-class UInt64Index(NumericIndex):
+class UInt64Index(IntegerIndex):
__doc__ = _num_index_shared_docs['class_descr'] % _uint64_descr_args
_typ = 'uint64index'
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 0f86e18103e3c..969391569ce50 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -365,16 +365,6 @@ def __array_wrap__(self, result, context=None):
# cannot pass _simple_new as it is
return self._shallow_copy(result, freq=self.freq, name=self.name)
- def _to_embed(self, keep_tz=False, dtype=None):
- """
- return an array repr of this object, potentially casting to object
- """
-
- if dtype is not None:
- return self.astype(dtype)._to_embed(keep_tz=keep_tz)
-
- return self.astype(object).values
-
@property
def size(self):
# Avoid materializing self._values
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index b63f874abff85..150518aadcfd9 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -2146,7 +2146,7 @@ def _getitem_tuple(self, tup):
self._has_valid_tuple(tup)
try:
return self._getitem_lowerdim(tup)
- except:
+ except IndexingError:
pass
retval = self.obj
@@ -2705,13 +2705,13 @@ def maybe_droplevels(index, key):
for _ in key:
try:
index = index.droplevel(0)
- except:
+ except ValueError:
# we have dropped too much, so back out
return original_index
else:
try:
index = index.droplevel(0)
- except:
+ except ValueError:
pass
return index
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index e735b35653cd4..0e57dd33b1c4e 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -666,7 +666,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
newb = make_block(values, placement=self.mgr_locs,
klass=klass, ndim=self.ndim)
- except:
+ except Exception: # noqa: E722
if errors == 'raise':
raise
newb = self.copy() if copy else self
@@ -1142,7 +1142,7 @@ def check_int_bool(self, inplace):
# a fill na type method
try:
m = missing.clean_fill_method(method)
- except:
+ except ValueError:
m = None
if m is not None:
@@ -1157,7 +1157,7 @@ def check_int_bool(self, inplace):
# try an interp method
try:
m = missing.clean_interp_method(method, **kwargs)
- except:
+ except ValueError:
m = None
if m is not None:
@@ -2438,7 +2438,7 @@ def set(self, locs, values, check=False):
try:
if (self.values[locs] == values).all():
return
- except:
+ except (IndexError, ValueError):
pass
try:
self.values[locs] = values
@@ -3172,7 +3172,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
def __len__(self):
try:
return self.sp_index.length
- except:
+ except AttributeError:
return 0
def copy(self, deep=True, mgr=None):
@@ -3490,6 +3490,7 @@ def _putmask_smart(v, m, n):
# we ignore ComplexWarning here
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", np.ComplexWarning)
nn_at = nn.astype(v.dtype)
# avoid invalid dtype comparisons
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 63738594799f5..2f29f1ae2509f 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -12,9 +12,6 @@
from pandas.util._validators import validate_bool_kwarg
from pandas.compat import range, map, zip
-from pandas.core.dtypes.dtypes import (
- ExtensionDtype,
- PandasExtensionDtype)
from pandas.core.dtypes.common import (
_NS_DTYPE,
is_datetimelike_v_numeric,
@@ -791,6 +788,11 @@ def _interleave(self):
"""
dtype = _interleaved_dtype(self.blocks)
+ if is_extension_array_dtype(dtype):
+ # TODO: https://github.com/pandas-dev/pandas/issues/22791
+ # Give EAs some input on what happens here. Sparse needs this.
+ dtype = 'object'
+
result = np.empty(self.shape, dtype=dtype)
if result.shape[0] == 0:
@@ -906,14 +908,25 @@ def fast_xs(self, loc):
# unique
dtype = _interleaved_dtype(self.blocks)
+
n = len(items)
- result = np.empty(n, dtype=dtype)
+ if is_extension_array_dtype(dtype):
+ # we'll eventually construct an ExtensionArray.
+ result = np.empty(n, dtype=object)
+ else:
+ result = np.empty(n, dtype=dtype)
+
for blk in self.blocks:
# Such assignment may incorrectly coerce NaT to None
# result[blk.mgr_locs] = blk._slice((slice(None), loc))
for i, rl in enumerate(blk.mgr_locs):
result[rl] = blk._try_coerce_result(blk.iget((i, loc)))
+ if is_extension_array_dtype(dtype):
+ result = dtype.construct_array_type()._from_sequence(
+ result, dtype=dtype
+ )
+
return result
def consolidate(self):
@@ -1855,16 +1868,22 @@ def _shape_compat(x):
def _interleaved_dtype(blocks):
- if not len(blocks):
- return None
+ # type: (List[Block]) -> Optional[Union[np.dtype, ExtensionDtype]]
+ """Find the common dtype for `blocks`.
- dtype = find_common_type([b.dtype for b in blocks])
+ Parameters
+ ----------
+ blocks : List[Block]
- # only numpy compat
- if isinstance(dtype, (PandasExtensionDtype, ExtensionDtype)):
- dtype = np.object
+ Returns
+ -------
+ dtype : Optional[Union[np.dtype, ExtensionDtype]]
+ None is returned when `blocks` is empty.
+ """
+ if not len(blocks):
+ return None
- return dtype
+ return find_common_type([b.dtype for b in blocks])
def _consolidate(blocks):
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index f44fb4f6e9e14..232d030da7f1e 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -503,7 +503,8 @@ def reduction(values, axis=None, skipna=True):
try:
result = getattr(values, meth)(axis, dtype=dtype_max)
result.fill(np.nan)
- except:
+ except (AttributeError, TypeError,
+ ValueError, np.core._internal.AxisError):
result = np.nan
else:
result = getattr(values, meth)(axis)
@@ -766,6 +767,8 @@ def nancorr(a, b, method='pearson', min_periods=None):
def get_corr_func(method):
if method in ['kendall', 'spearman']:
from scipy.stats import kendalltau, spearmanr
+ elif callable(method):
+ return method
def _pearson(a, b):
return np.corrcoef(a, b)[0, 1]
@@ -813,7 +816,7 @@ def _ensure_numeric(x):
elif is_object_dtype(x):
try:
x = x.astype(np.complex128)
- except:
+ except (TypeError, ValueError):
x = x.astype(np.float64)
else:
if not np.any(x.imag):
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index ca9c2528f0aef..8171840c96b6e 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -900,6 +900,42 @@ def invalid_comparison(left, right, op):
return res_values
+# -----------------------------------------------------------------------------
+# Dispatch logic
+
+def should_series_dispatch(left, right, op):
+ """
+ Identify cases where a DataFrame operation should dispatch to its
+ Series counterpart.
+
+ Parameters
+ ----------
+ left : DataFrame
+ right : DataFrame
+ op : binary operator
+
+ Returns
+ -------
+ override : bool
+ """
+ if left._is_mixed_type or right._is_mixed_type:
+ return True
+
+ if not len(left.columns) or not len(right.columns):
+ # ensure obj.dtypes[0] exists for each obj
+ return False
+
+ ldtype = left.dtypes.iloc[0]
+ rdtype = right.dtypes.iloc[0]
+
+ if ((is_timedelta64_dtype(ldtype) and is_integer_dtype(rdtype)) or
+ (is_timedelta64_dtype(rdtype) and is_integer_dtype(ldtype))):
+ # numpy integer dtypes as timedelta64 dtypes in this scenario
+ return True
+
+ return False
+
+
# -----------------------------------------------------------------------------
# Functions that add arithmetic methods to objects, given arithmetic factory
# methods
@@ -1525,28 +1561,28 @@ def _bool_method_SERIES(cls, op, special):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
+ op_name = _get_op_name(op, special)
def na_op(x, y):
try:
result = op(x, y)
except TypeError:
- if isinstance(y, list):
- y = construct_1d_object_array_from_listlike(y)
-
- if isinstance(y, (np.ndarray, ABCSeries)):
- if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)):
- result = op(x, y) # when would this be hit?
- else:
- x = ensure_object(x)
- y = ensure_object(y)
- result = libops.vec_binop(x, y, op)
+ assert not isinstance(y, (list, ABCSeries, ABCIndexClass))
+ if isinstance(y, np.ndarray):
+ # bool-bool dtype operations should be OK, should not get here
+ assert not (is_bool_dtype(x) and is_bool_dtype(y))
+ x = ensure_object(x)
+ y = ensure_object(y)
+ result = libops.vec_binop(x, y, op)
else:
# let null fall thru
+ assert lib.is_scalar(y)
if not isna(y):
y = bool(y)
try:
result = libops.scalar_binop(x, y, op)
- except:
+ except (TypeError, ValueError, AttributeError,
+ OverflowError, NotImplementedError):
raise TypeError("cannot compare a dtyped [{dtype}] array "
"with a scalar of type [{typ}]"
.format(dtype=x.dtype,
@@ -1561,33 +1597,42 @@ def wrapper(self, other):
is_self_int_dtype = is_integer_dtype(self.dtype)
self, other = _align_method_SERIES(self, other, align_asobject=True)
+ res_name = get_op_result_name(self, other)
if isinstance(other, ABCDataFrame):
# Defer to DataFrame implementation; fail early
return NotImplemented
- elif isinstance(other, ABCSeries):
- name = get_op_result_name(self, other)
+ elif isinstance(other, (ABCSeries, ABCIndexClass)):
is_other_int_dtype = is_integer_dtype(other.dtype)
other = fill_int(other) if is_other_int_dtype else fill_bool(other)
- filler = (fill_int if is_self_int_dtype and is_other_int_dtype
- else fill_bool)
-
- res_values = na_op(self.values, other.values)
- unfilled = self._constructor(res_values,
- index=self.index, name=name)
- return filler(unfilled)
+ ovalues = other.values
+ finalizer = lambda x: x
else:
# scalars, list, tuple, np.array
- filler = (fill_int if is_self_int_dtype and
- is_integer_dtype(np.asarray(other)) else fill_bool)
-
- res_values = na_op(self.values, other)
- unfilled = self._constructor(res_values, index=self.index)
- return filler(unfilled).__finalize__(self)
+ is_other_int_dtype = is_integer_dtype(np.asarray(other))
+ if is_list_like(other) and not isinstance(other, np.ndarray):
+ # TODO: Can we do this before the is_integer_dtype check?
+ # could the is_integer_dtype check be checking the wrong
+ # thing? e.g. other = [[0, 1], [2, 3], [4, 5]]?
+ other = construct_1d_object_array_from_listlike(other)
+
+ ovalues = other
+ finalizer = lambda x: x.__finalize__(self)
+
+ # For int vs int `^`, `|`, `&` are bitwise operators and return
+ # integer dtypes. Otherwise these are boolean ops
+ filler = (fill_int if is_self_int_dtype and is_other_int_dtype
+ else fill_bool)
+ res_values = na_op(self.values, ovalues)
+ unfilled = self._constructor(res_values,
+ index=self.index, name=res_name)
+ filled = filler(unfilled)
+ return finalizer(filled)
+ wrapper.__name__ = op_name
return wrapper
@@ -1794,8 +1839,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
other = _align_method_FRAME(self, other, axis)
- if isinstance(other, ABCDataFrame): # Another DataFrame
- return self._combine_frame(other, na_op, fill_value, level)
+ if isinstance(other, ABCDataFrame):
+ # Another DataFrame
+ pass_op = op if should_series_dispatch(self, other, op) else na_op
+ return self._combine_frame(other, pass_op, fill_value, level)
elif isinstance(other, ABCSeries):
return _combine_series_frame(self, other, na_op,
fill_value=fill_value, axis=axis,
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 81d1e83ee6870..1e2d4000413bb 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -1215,7 +1215,8 @@ def _wrap_result(self, result, axis):
return self._construct_return_type(result, axes)
- @Appender(_shared_docs['reindex'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(NDFrame.reindex.__doc__)
def reindex(self, *args, **kwargs):
major = kwargs.pop("major", None)
minor = kwargs.pop('minor', None)
@@ -1236,7 +1237,8 @@ def reindex(self, *args, **kwargs):
kwargs.pop('labels', None)
return super(Panel, self).reindex(**kwargs)
- @Appender(_shared_docs['rename'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(NDFrame.rename.__doc__)
def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs):
major_axis = (major_axis if major_axis is not None else
kwargs.pop('major', None))
@@ -1253,7 +1255,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
copy=copy, limit=limit,
fill_value=fill_value)
- @Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(NDFrame.transpose.__doc__)
def transpose(self, *args, **kwargs):
# check if a list of axes was passed in instead as a
# single *args element
@@ -1536,6 +1539,13 @@ def _extract_axis(self, data, axis=0, intersect=False):
return ensure_index(index)
+ def sort_values(self, *args, **kwargs):
+ """
+ NOT IMPLEMENTED: do not call this method, as sorting values is not
+ supported for Panel objects and will raise an error.
+ """
+ super(Panel, self).sort_values(*args, **kwargs)
+
Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0,
stat_axis=1, aliases={'major': 'major_axis',
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 1ef8a0854887b..878ac957a8557 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1328,8 +1328,7 @@ def _get_time_bins(self, ax):
data=[], freq=self.freq, name=ax.name)
return binner, [], labels
- first, last = ax.min(), ax.max()
- first, last = _get_range_edges(first, last, self.freq,
+ first, last = _get_range_edges(ax.min(), ax.max(), self.freq,
closed=self.closed,
base=self.base)
tz = ax.tz
@@ -1519,9 +1518,6 @@ def _take_new_index(obj, indexer, new_index, axis=0):
def _get_range_edges(first, last, offset, closed='left', base=0):
- if isinstance(offset, compat.string_types):
- offset = to_offset(offset)
-
if isinstance(offset, Tick):
is_day = isinstance(offset, Day)
day_nanos = delta_to_nanoseconds(timedelta(1))
@@ -1531,8 +1527,7 @@ def _get_range_edges(first, last, offset, closed='left', base=0):
return _adjust_dates_anchored(first, last, offset,
closed=closed, base=base)
- if not isinstance(offset, Tick): # and first.time() != last.time():
- # hack!
+ else:
first = first.normalize()
last = last.normalize()
@@ -1553,19 +1548,16 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
#
# See https://github.com/pandas-dev/pandas/issues/8683
- # 14682 - Since we need to drop the TZ information to perform
- # the adjustment in the presence of a DST change,
- # save TZ Info and the DST state of the first and last parameters
- # so that we can accurately rebuild them at the end.
+ # GH 10117 & GH 19375. If first and last contain timezone information,
+ # Perform the calculation in UTC in order to avoid localizing on an
+ # Ambiguous or Nonexistent time.
first_tzinfo = first.tzinfo
last_tzinfo = last.tzinfo
- first_dst = bool(first.dst())
- last_dst = bool(last.dst())
-
- first = first.tz_localize(None)
- last = last.tz_localize(None)
-
start_day_nanos = first.normalize().value
+ if first_tzinfo is not None:
+ first = first.tz_convert('UTC')
+ if last_tzinfo is not None:
+ last = last.tz_convert('UTC')
base_nanos = (base % offset.n) * offset.nanos // offset.n
start_day_nanos += base_nanos
@@ -1598,9 +1590,13 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
lresult = last.value + (offset.nanos - loffset)
else:
lresult = last.value + offset.nanos
-
- return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst),
- Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
+ fresult = Timestamp(fresult)
+ lresult = Timestamp(lresult)
+ if first_tzinfo is not None:
+ fresult = fresult.tz_localize('UTC').tz_convert(first_tzinfo)
+ if last_tzinfo is not None:
+ lresult = lresult.tz_localize('UTC').tz_convert(last_tzinfo)
+ return fresult, lresult
def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
index f4b96c8f1ca49..26221143c0cdf 100644
--- a/pandas/core/reshape/melt.py
+++ b/pandas/core/reshape/melt.py
@@ -409,14 +409,14 @@ def melt_stub(df, stub, i, j, value_vars, sep):
return newdf.set_index(i + [j])
- if any(col in stubnames for col in df.columns):
- raise ValueError("stubname can't be identical to a column name")
-
if not is_list_like(stubnames):
stubnames = [stubnames]
else:
stubnames = list(stubnames)
+ if any(col in stubnames for col in df.columns):
+ raise ValueError("stubname can't be identical to a column name")
+
if not is_list_like(i):
i = [i]
else:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a4d403e4bcd94..82198c2b3edd5 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -6,7 +6,6 @@
# pylint: disable=E1101,E1103
# pylint: disable=W0703,W0622,W0613,W0201
-import collections
import warnings
from textwrap import dedent
@@ -89,10 +88,8 @@
_shared_doc_kwargs = dict(
axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
- axis="""
- axis : {0 or 'index'}
- Parameter needed for compatibility with DataFrame.
- """,
+ axis="""axis : {0 or 'index'}
+ Parameter needed for compatibility with DataFrame.""",
inplace="""inplace : boolean, default False
If True, performs operation inplace and returns None.""",
unique='np.ndarray', duplicated='Series',
@@ -242,8 +239,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
raise TypeError("{0!r} type is unordered"
"".format(data.__class__.__name__))
# If data is Iterable but not list-like, consume into list.
- elif (isinstance(data, collections.Iterable)
- and not isinstance(data, collections.Sized)):
+ elif (isinstance(data, compat.Iterable)
+ and not isinstance(data, compat.Sized)):
data = list(data)
else:
@@ -1913,10 +1910,14 @@ def corr(self, other, method='pearson', min_periods=None):
Parameters
----------
other : Series
- method : {'pearson', 'kendall', 'spearman'}
+ method : {'pearson', 'kendall', 'spearman'} or callable
* pearson : standard correlation coefficient
* kendall : Kendall Tau correlation coefficient
* spearman : Spearman rank correlation
+ * callable: callable with input two 1d ndarray
+ and returning a float
+ .. versionadded:: 0.24.0
+
min_periods : int, optional
Minimum number of observations needed to have a valid result
@@ -1924,12 +1925,22 @@ def corr(self, other, method='pearson', min_periods=None):
Returns
-------
correlation : float
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> histogram_intersection = lambda a, b: np.minimum(a, b
+ ... ).sum().round(decimals=1)
+ >>> s1 = pd.Series([.2, .0, .6, .2])
+ >>> s2 = pd.Series([.3, .6, .0, .1])
+ >>> s1.corr(s2, method=histogram_intersection)
+ 0.3
"""
this, other = self.align(other, join='inner', copy=False)
if len(this) == 0:
return np.nan
- if method in ['pearson', 'spearman', 'kendall']:
+ if method in ['pearson', 'spearman', 'kendall'] or callable(method):
return nanops.nancorr(this.values, other.values, method=method,
min_periods=min_periods)
@@ -2024,7 +2035,10 @@ def diff(self, periods=1):
def autocorr(self, lag=1):
"""
- Lag-N autocorrelation
+ Compute the lag-N autocorrelation.
+
+ This method computes the Pearson correlation between
+ the Series and its shifted self.
Parameters
----------
@@ -2033,7 +2047,34 @@ def autocorr(self, lag=1):
Returns
-------
- autocorr : float
+ float
+ The Pearson correlation between self and self.shift(lag).
+
+ See Also
+ --------
+ Series.corr : Compute the correlation between two Series.
+ Series.shift : Shift index by desired number of periods.
+ DataFrame.corr : Compute pairwise correlation of columns.
+ DataFrame.corrwith : Compute pairwise correlation between rows or
+ columns of two DataFrame objects.
+
+ Notes
+ -----
+ If the Pearson correlation is not well defined return 'NaN'.
+
+ Examples
+ --------
+ >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
+ >>> s.autocorr() # doctest: +ELLIPSIS
+ 0.10355...
+ >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
+ -0.99999...
+
+ If the Pearson correlation is not well defined, then 'NaN' is returned.
+
+ >>> s = pd.Series([1, 0, 0, 0])
+ >>> s.autocorr()
+ nan
"""
return self.corr(self.shift(lag))
@@ -2743,17 +2784,21 @@ def nlargest(self, n=5, keep='first'):
Parameters
----------
- n : int
- Return this many descending sorted values
- keep : {'first', 'last'}, default 'first'
- Where there are duplicate values:
- - ``first`` : take the first occurrence.
- - ``last`` : take the last occurrence.
+ n : int, default 5
+ Return this many descending sorted values.
+ keep : {'first', 'last', 'all'}, default 'first'
+ When there are duplicate values that cannot all fit in a
+ Series of `n` elements:
+
+ - ``first`` : take the first occurrences based on the index order
+ - ``last`` : take the last occurrences based on the index order
+ - ``all`` : keep all occurrences. This can result in a Series of
+ size larger than `n`.
Returns
-------
- top_n : Series
- The n largest values in the Series, in sorted order
+ Series
+ The `n` largest values in the Series, sorted in decreasing order.
Notes
-----
@@ -2762,23 +2807,70 @@ def nlargest(self, n=5, keep='first'):
See Also
--------
- Series.nsmallest
+ Series.nsmallest: Get the `n` smallest elements.
+ Series.sort_values: Sort Series by values.
+ Series.head: Return the first `n` rows.
Examples
--------
- >>> s = pd.Series(np.random.randn(10**6))
- >>> s.nlargest(10) # only sorts up to the N requested
- 219921 4.644710
- 82124 4.608745
- 421689 4.564644
- 425277 4.447014
- 718691 4.414137
- 43154 4.403520
- 283187 4.313922
- 595519 4.273635
- 503969 4.250236
- 121637 4.240952
- dtype: float64
+ >>> countries_population = {"Italy": 59000000, "France": 65000000,
+ ... "Malta": 434000, "Maldives": 434000,
+ ... "Brunei": 434000, "Iceland": 337000,
+ ... "Nauru": 11300, "Tuvalu": 11300,
+ ... "Anguilla": 11300, "Monserat": 5200}
+ >>> s = pd.Series(countries_population)
+ >>> s
+ Italy 59000000
+ France 65000000
+ Malta 434000
+ Maldives 434000
+ Brunei 434000
+ Iceland 337000
+ Nauru 11300
+ Tuvalu 11300
+ Anguilla 11300
+ Monserat 5200
+ dtype: int64
+
+ The `n` largest elements where ``n=5`` by default.
+
+ >>> s.nlargest()
+ France 65000000
+ Italy 59000000
+ Malta 434000
+ Maldives 434000
+ Brunei 434000
+ dtype: int64
+
+ The `n` largest elements where ``n=3``. Default `keep` value is 'first'
+ so Malta will be kept.
+
+ >>> s.nlargest(3)
+ France 65000000
+ Italy 59000000
+ Malta 434000
+ dtype: int64
+
+ The `n` largest elements where ``n=3`` and keeping the last duplicates.
+ Brunei will be kept since it is the last with value 434000 based on
+ the index order.
+
+ >>> s.nlargest(3, keep='last')
+ France 65000000
+ Italy 59000000
+ Brunei 434000
+ dtype: int64
+
+ The `n` largest elements where ``n=3`` with all duplicates kept. Note
+ that the returned Series has five elements due to the three duplicates.
+
+ >>> s.nlargest(3, keep='all')
+ France 65000000
+ Italy 59000000
+ Malta 434000
+ Maldives 434000
+ Brunei 434000
+ dtype: int64
"""
return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
@@ -2788,17 +2880,21 @@ def nsmallest(self, n=5, keep='first'):
Parameters
----------
- n : int
- Return this many ascending sorted values
- keep : {'first', 'last'}, default 'first'
- Where there are duplicate values:
- - ``first`` : take the first occurrence.
- - ``last`` : take the last occurrence.
+ n : int, default 5
+ Return this many ascending sorted values.
+ keep : {'first', 'last', 'all'}, default 'first'
+ When there are duplicate values that cannot all fit in a
+ Series of `n` elements:
+
+ - ``first`` : take the first occurrences based on the index order
+ - ``last`` : take the last occurrences based on the index order
+ - ``all`` : keep all occurrences. This can result in a Series of
+ size larger than `n`.
Returns
-------
- bottom_n : Series
- The n smallest values in the Series, in sorted order
+ Series
+ The `n` smallest values in the Series, sorted in increasing order.
Notes
-----
@@ -2807,23 +2903,69 @@ def nsmallest(self, n=5, keep='first'):
See Also
--------
- Series.nlargest
+ Series.nlargest: Get the `n` largest elements.
+ Series.sort_values: Sort Series by values.
+ Series.head: Return the first `n` rows.
Examples
--------
- >>> s = pd.Series(np.random.randn(10**6))
- >>> s.nsmallest(10) # only sorts up to the N requested
- 288532 -4.954580
- 732345 -4.835960
- 64803 -4.812550
- 446457 -4.609998
- 501225 -4.483945
- 669476 -4.472935
- 973615 -4.401699
- 621279 -4.355126
- 773916 -4.347355
- 359919 -4.331927
- dtype: float64
+ >>> countries_population = {"Italy": 59000000, "France": 65000000,
+ ... "Brunei": 434000, "Malta": 434000,
+ ... "Maldives": 434000, "Iceland": 337000,
+ ... "Nauru": 11300, "Tuvalu": 11300,
+ ... "Anguilla": 11300, "Monserat": 5200}
+ >>> s = pd.Series(countries_population)
+ >>> s
+ Italy 59000000
+ France 65000000
+ Brunei 434000
+ Malta 434000
+ Maldives 434000
+ Iceland 337000
+ Nauru 11300
+ Tuvalu 11300
+ Anguilla 11300
+ Monserat 5200
+ dtype: int64
+
+ The `n` largest elements where ``n=5`` by default.
+
+ >>> s.nsmallest()
+ Monserat 5200
+ Nauru 11300
+ Tuvalu 11300
+ Anguilla 11300
+ Iceland 337000
+ dtype: int64
+
+ The `n` smallest elements where ``n=3``. Default `keep` value is
+ 'first' so Nauru and Tuvalu will be kept.
+
+ >>> s.nsmallest(3)
+ Monserat 5200
+ Nauru 11300
+ Tuvalu 11300
+ dtype: int64
+
+ The `n` smallest elements where ``n=3`` and keeping the last
+ duplicates. Anguilla and Tuvalu will be kept since they are the last
+ with value 11300 based on the index order.
+
+ >>> s.nsmallest(3, keep='last')
+ Monserat 5200
+ Anguilla 11300
+ Tuvalu 11300
+ dtype: int64
+
+ The `n` smallest elements where ``n=3`` with all duplicates kept. Note
+ that the returned Series has four elements due to the three duplicates.
+
+ >>> s.nsmallest(3, keep='all')
+ Monserat 5200
+ Nauru 11300
+ Tuvalu 11300
+ Anguilla 11300
+ dtype: int64
"""
return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
@@ -2886,7 +3028,6 @@ def reorder_levels(self, order):
----------
order : list of int representing new level order.
(reference level by number or key)
- axis : where to reorder levels
Returns
-------
@@ -3098,6 +3239,12 @@ def aggregate(self, func, axis=0, *args, **kwargs):
agg = aggregate
+ @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs)
+ def transform(self, func, axis=0, *args, **kwargs):
+ # Validate the axis parameter
+ self._get_axis_number(axis)
+ return super(Series, self).transform(func, *args, **kwargs)
+
def apply(self, func, convert_dtype=True, args=(), **kwds):
"""
Invoke function on values of Series. Can be ufunc (a NumPy function
@@ -3349,7 +3496,8 @@ def rename(self, index=None, **kwargs):
return self._set_name(index, inplace=kwargs.get('inplace'))
return super(Series, self).rename(index=index, **kwargs)
- @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(generic.NDFrame.reindex.__doc__)
def reindex(self, index=None, **kwargs):
return super(Series, self).reindex(index=index, **kwargs)
@@ -3533,7 +3681,7 @@ def memory_usage(self, index=True, deep=False):
v += self.index.memory_usage(deep=deep)
return v
- @Appender(generic._shared_docs['_take'])
+ @Appender(generic.NDFrame._take.__doc__)
def _take(self, indices, axis=0, is_copy=False):
indices = ensure_platform_int(indices)
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index eb07e5ef6c85f..186a2490a5f2e 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -306,7 +306,7 @@ def __setstate__(self, state):
def __len__(self):
try:
return self.sp_index.length
- except:
+ except AttributeError:
return 0
def __unicode__(self):
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 8ac5d81f23bb2..97cd3a0a1fb6a 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -19,7 +19,7 @@
import pandas.core.indexes.base as ibase
import pandas.core.ops as ops
import pandas._libs.index as libindex
-from pandas.util._decorators import Appender
+from pandas.util._decorators import Appender, Substitution
from pandas.core.sparse.array import (
make_sparse, SparseArray,
@@ -563,7 +563,8 @@ def copy(self, deep=True):
return self._constructor(new_data, sparse_index=self.sp_index,
fill_value=self.fill_value).__finalize__(self)
- @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
+ @Substitution(**_shared_doc_kwargs)
+ @Appender(generic.NDFrame.reindex.__doc__)
def reindex(self, index=None, method=None, copy=True, limit=None,
**kwargs):
@@ -592,7 +593,7 @@ def sparse_reindex(self, new_index):
sparse_index=new_index,
fill_value=self.fill_value).__finalize__(self)
- @Appender(generic._shared_docs['take'])
+ @Appender(generic.NDFrame.take.__doc__)
def take(self, indices, axis=0, convert=None, *args, **kwargs):
if convert is not None:
msg = ("The 'convert' parameter is deprecated "
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 08709d15c48bf..861739f6c694c 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -678,20 +678,42 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
def str_repeat(arr, repeats):
"""
- Duplicate each string in the Series/Index by indicated number
- of times.
+ Duplicate each string in the Series or Index.
Parameters
----------
- repeats : int or array
- Same value for all (int) or different value per (array)
+ repeats : int or sequence of int
+ Same value for all (int) or different value per (sequence).
Returns
-------
- repeated : Series/Index of objects
+ Series or Index of object
+ Series or Index of repeated string objects specified by
+ input parameter repeats.
+
+ Examples
+ --------
+ >>> s = pd.Series(['a', 'b', 'c'])
+ >>> s
+ 0 a
+ 1 b
+ 2 c
+
+ Single int repeats string in Series
+
+ >>> s.str.repeat(repeats=2)
+ 0 aa
+ 1 bb
+ 2 cc
+
+ Sequence of int repeats corresponding string in Series
+
+ >>> s.str.repeat(repeats=[1, 2, 3])
+ 0 a
+ 1 bb
+ 2 ccc
"""
if is_scalar(repeats):
-
def rep(x):
try:
return compat.binary_type.__mul__(x, repeats)
@@ -935,19 +957,23 @@ def str_extractall(arr, pat, flags=0):
Parameters
----------
- pat : string
- Regular expression pattern with capturing groups
+ pat : str
+ Regular expression pattern with capturing groups.
flags : int, default 0 (no flags)
- re module flags, e.g. re.IGNORECASE
+ A ``re`` module flag, for example ``re.IGNORECASE``. These allow
+ to modify regular expression matching for things like case, spaces,
+ etc. Multiple flags can be combined with the bitwise OR operator,
+ for example ``re.IGNORECASE | re.MULTILINE``.
Returns
-------
- A DataFrame with one row for each match, and one column for each
- group. Its rows have a MultiIndex with first levels that come from
- the subject Series. The last level is named 'match' and indicates
- the order in the subject. Any capture group names in regular
- expression pat will be used for column names; otherwise capture
- group numbers will be used.
+ DataFrame
+ A ``DataFrame`` with one row for each match, and one column for each
+ group. Its rows have a ``MultiIndex`` with first levels that come from
+ the subject ``Series``. The last level is named 'match' and indexes the
+ matches in each item of the ``Series``. Any capture group names in
+ regular expression pat will be used for column names; otherwise capture
+ group numbers will be used.
See Also
--------
@@ -993,7 +1019,6 @@ def str_extractall(arr, pat, flags=0):
1 a 2
B 0 b 1
C 0 NaN 1
-
"""
regex = re.compile(pat, flags=flags)
@@ -1307,23 +1332,57 @@ def str_index(arr, sub, start=0, end=None, side='left'):
def str_pad(arr, width, side='left', fillchar=' '):
"""
- Pad strings in the Series/Index with an additional character to
- specified side.
+ Pad strings in the Series/Index up to width.
Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled
- with spaces
+ with character defined in `fillchar`.
side : {'left', 'right', 'both'}, default 'left'
- fillchar : str
- Additional character for filling, default is whitespace
+ Side from which to fill resulting string.
+ fillchar : str, default ' '
+ Additional character for filling, default is whitespace.
Returns
-------
- padded : Series/Index of objects
- """
+ Series or Index of object
+ Returns Series or Index with minimum number of char in object.
+
+ See Also
+ --------
+ Series.str.rjust: Fills the left side of strings with an arbitrary
+ character. Equivalent to ``Series.str.pad(side='left')``.
+ Series.str.ljust: Fills the right side of strings with an arbitrary
+ character. Equivalent to ``Series.str.pad(side='right')``.
+ Series.str.center: Fills boths sides of strings with an arbitrary
+ character. Equivalent to ``Series.str.pad(side='both')``.
+ Series.str.zfill: Pad strings in the Series/Index by prepending '0'
+ character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
+
+ Examples
+ --------
+ >>> s = pd.Series(["caribou", "tiger"])
+ >>> s
+ 0 caribou
+ 1 tiger
+ dtype: object
+
+ >>> s.str.pad(width=10)
+ 0 caribou
+ 1 tiger
+ dtype: object
+ >>> s.str.pad(width=10, side='right', fillchar='-')
+ 0 caribou---
+ 1 tiger-----
+ dtype: object
+
+ >>> s.str.pad(width=10, side='both', fillchar='-')
+ 0 -caribou--
+ 1 --tiger---
+ dtype: object
+ """
if not isinstance(fillchar, compat.string_types):
msg = 'fillchar must be a character, not {0}'
raise TypeError(msg.format(type(fillchar).__name__))
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 57387b9ea870a..eb8d2b0b6c809 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -99,13 +99,13 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
result = Series(arg).map(cache_array)
if box:
if errors == 'ignore':
- return Index(result)
+ return Index(result, name=name)
else:
return DatetimeIndex(result, name=name)
return result.values
-def _return_parsed_timezone_results(result, timezones, box, tz):
+def _return_parsed_timezone_results(result, timezones, box, tz, name):
"""
Return results from array_strptime if a %z or %Z directive was passed.
@@ -119,6 +119,9 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
True boxes result as an Index-like, False returns an ndarray
tz : object
None or pytz timezone object
+ name : string, default None
+ Name for a DatetimeIndex
+
Returns
-------
tz_result : ndarray of parsed dates with timezone
@@ -136,7 +139,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
in zip(result, timezones)])
if box:
from pandas import Index
- return Index(tz_results)
+ return Index(tz_results, name=name)
return tz_results
@@ -209,7 +212,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
if box:
if errors == 'ignore':
from pandas import Index
- return Index(result)
+ return Index(result, name=name)
return DatetimeIndex(result, tz=tz, name=name)
return result
@@ -241,7 +244,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
if format == '%Y%m%d':
try:
result = _attempt_YYYYMMDD(arg, errors=errors)
- except:
+ except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
raise ValueError("cannot convert the input to "
"'%Y%m%d' date format")
@@ -252,7 +255,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
arg, format, exact=exact, errors=errors)
if '%Z' in format or '%z' in format:
return _return_parsed_timezone_results(
- result, timezones, box, tz)
+ result, timezones, box, tz, name)
except tslibs.OutOfBoundsDatetime:
if errors == 'raise':
raise
@@ -331,7 +334,7 @@ def _adjust_to_origin(arg, origin, unit):
raise ValueError("unit must be 'D' for origin='julian'")
try:
arg = arg - j0
- except:
+ except TypeError:
raise ValueError("incompatible 'arg' type for given "
"'origin'='julian'")
@@ -728,21 +731,21 @@ def calc_with_mask(carg, mask):
# try intlike / strings that are ints
try:
return calc(arg.astype(np.int64))
- except:
+ except ValueError:
pass
# a float with actual np.nan
try:
carg = arg.astype(np.float64)
return calc_with_mask(carg, notna(carg))
- except:
+ except ValueError:
pass
# string with NaN-like
try:
mask = ~algorithms.isin(arg, list(tslib.nat_strings))
return calc_with_mask(arg, mask)
- except:
+ except ValueError:
pass
return None
diff --git a/pandas/core/window.py b/pandas/core/window.py
index eed0e97f30dc9..4281d66a640e3 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -1404,7 +1404,7 @@ def _get_cov(X, Y):
otherwise defaults to `False`.
Not relevant for :class:`~pandas.Series`.
**kwargs
- Under Review.
+ Unused.
Returns
-------
@@ -1430,7 +1430,7 @@ def _get_cov(X, Y):
all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
set to `True`.
- Function will return `NaN`s for correlations of equal valued sequences;
+ Function will return ``NaN`` for correlations of equal valued sequences;
this is the result of a 0/0 division error.
When `pairwise` is set to `False`, only matching columns between `self` and
@@ -1446,7 +1446,7 @@ def _get_cov(X, Y):
Examples
--------
The below example shows a rolling calculation with a window size of
- four matching the equivalent function call using `numpy.corrcoef`.
+ four matching the equivalent function call using :meth:`numpy.corrcoef`.
>>> v1 = [3, 3, 3, 5, 8]
>>> v2 = [3, 4, 4, 4, 8]
@@ -2387,11 +2387,13 @@ def dataframe_from_int_dict(data, frame_template):
if not arg2.columns.is_unique:
raise ValueError("'arg2' columns are not unique")
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
X, Y = arg1.align(arg2, join='outer')
X = X + 0 * Y
Y = Y + 0 * X
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
res_columns = arg1.columns.union(arg2.columns)
for col in res_columns:
if col in X and col in Y:
@@ -2502,7 +2504,7 @@ def _offset(window, center):
offset = (window - 1) / 2. if center else 0
try:
return int(offset)
- except:
+ except TypeError:
return offset.astype(int)
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 69cb9ed46419c..405911eda7e9e 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -386,6 +386,8 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
# ZIP Compression
elif compression == 'zip':
zf = BytesZipFile(path_or_buf, mode)
+ # Ensure the container is closed as well.
+ handles.append(zf)
if zf.mode == 'w':
f = zf
elif zf.mode == 'r':
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index e2db6643c5ef0..00b4c704c681b 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -824,8 +824,43 @@ class ExcelWriter(object):
Notes
-----
+ None of the methods and properties are considered public.
+
For compatibility with CSV writers, ExcelWriter serializes lists
and dicts to strings before writing.
+
+ Examples
+ --------
+ Default usage:
+
+ >>> with ExcelWriter('path_to_file.xlsx') as writer:
+ ... df.to_excel(writer)
+
+ To write to separate sheets in a single file:
+
+ >>> with ExcelWriter('path_to_file.xlsx') as writer:
+ ... df1.to_excel(writer, sheet_name='Sheet1')
+ ... df2.to_excel(writer, sheet_name='Sheet2')
+
+ You can set the date format or datetime format:
+
+ >>> with ExcelWriter('path_to_file.xlsx',
+ date_format='YYYY-MM-DD',
+ datetime_format='YYYY-MM-DD HH:MM:SS') as writer:
+ ... df.to_excel(writer)
+
+ You can also append to an existing Excel file:
+
+ >>> with ExcelWriter('path_to_file.xlsx', mode='a') as writer:
+ ... df.to_excel(writer, sheet_name='Sheet3')
+
+ Attributes
+ ----------
+ None
+
+ Methods
+ -------
+ None
"""
# Defining an ExcelWriter implementation (see abstract methods for more...)
diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py
index 45d50ea3fa073..b8b28a0b0c98c 100644
--- a/pandas/io/formats/console.py
+++ b/pandas/io/formats/console.py
@@ -21,7 +21,7 @@ def detect_console_encoding():
encoding = None
try:
encoding = sys.stdout.encoding or sys.stdin.encoding
- except AttributeError:
+ except (AttributeError, IOError):
pass
# try again for something better
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index 0bc268bc18b95..d6fcfb2207cf9 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -34,15 +34,6 @@ def __init__(self, row, col, val, style=None, mergestart=None,
self.mergeend = mergeend
-header_style = {"font": {"bold": True},
- "borders": {"top": "thin",
- "right": "thin",
- "bottom": "thin",
- "left": "thin"},
- "alignment": {"horizontal": "center",
- "vertical": "top"}}
-
-
class CSSToExcelConverter(object):
"""A callable for converting CSS declarations to ExcelWriter styles
@@ -389,6 +380,16 @@ def __init__(self, df, na_rep='', float_format=None, cols=None,
self.merge_cells = merge_cells
self.inf_rep = inf_rep
+ @property
+ def header_style(self):
+ return {"font": {"bold": True},
+ "borders": {"top": "thin",
+ "right": "thin",
+ "bottom": "thin",
+ "left": "thin"},
+ "alignment": {"horizontal": "center",
+ "vertical": "top"}}
+
def _format_value(self, val):
if is_scalar(val) and missing.isna(val):
val = self.na_rep
@@ -427,7 +428,7 @@ def _format_header_mi(self):
# Format multi-index as a merged cells.
for lnum in range(len(level_lengths)):
name = columns.names[lnum]
- yield ExcelCell(lnum, coloffset, name, header_style)
+ yield ExcelCell(lnum, coloffset, name, self.header_style)
for lnum, (spans, levels, labels) in enumerate(zip(
level_lengths, columns.levels, columns.labels)):
@@ -435,16 +436,16 @@ def _format_header_mi(self):
for i in spans:
if spans[i] > 1:
yield ExcelCell(lnum, coloffset + i + 1, values[i],
- header_style, lnum,
+ self.header_style, lnum,
coloffset + i + spans[i])
else:
yield ExcelCell(lnum, coloffset + i + 1, values[i],
- header_style)
+ self.header_style)
else:
# Format in legacy format with dots to indicate levels.
for i, values in enumerate(zip(*level_strs)):
v = ".".join(map(pprint_thing, values))
- yield ExcelCell(lnum, coloffset + i + 1, v, header_style)
+ yield ExcelCell(lnum, coloffset + i + 1, v, self.header_style)
self.rowcounter = lnum
@@ -469,7 +470,7 @@ def _format_header_regular(self):
for colindex, colname in enumerate(colnames):
yield ExcelCell(self.rowcounter, colindex + coloffset, colname,
- header_style)
+ self.header_style)
def _format_header(self):
if isinstance(self.columns, ABCMultiIndex):
@@ -482,7 +483,8 @@ def _format_header(self):
row = [x if x is not None else ''
for x in self.df.index.names] + [''] * len(self.columns)
if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
- gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style)
+ gen2 = (ExcelCell(self.rowcounter, colindex, val,
+ self.header_style)
for colindex, val in enumerate(row))
self.rowcounter += 1
return itertools.chain(gen, gen2)
@@ -518,7 +520,7 @@ def _format_regular_rows(self):
if index_label and self.header is not False:
yield ExcelCell(self.rowcounter - 1, 0, index_label,
- header_style)
+ self.header_style)
# write index_values
index_values = self.df.index
@@ -526,7 +528,8 @@ def _format_regular_rows(self):
index_values = self.df.index.to_timestamp()
for idx, idxval in enumerate(index_values):
- yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)
+ yield ExcelCell(self.rowcounter + idx, 0, idxval,
+ self.header_style)
coloffset = 1
else:
@@ -562,7 +565,7 @@ def _format_hierarchical_rows(self):
for cidx, name in enumerate(index_labels):
yield ExcelCell(self.rowcounter - 1, cidx, name,
- header_style)
+ self.header_style)
if self.merge_cells:
# Format hierarchical rows as merged cells.
@@ -581,12 +584,12 @@ def _format_hierarchical_rows(self):
for i in spans:
if spans[i] > 1:
yield ExcelCell(self.rowcounter + i, gcolidx,
- values[i], header_style,
+ values[i], self.header_style,
self.rowcounter + i + spans[i] - 1,
gcolidx)
else:
yield ExcelCell(self.rowcounter + i, gcolidx,
- values[i], header_style)
+ values[i], self.header_style)
gcolidx += 1
else:
@@ -594,7 +597,7 @@ def _format_hierarchical_rows(self):
for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
yield ExcelCell(self.rowcounter + idx, gcolidx,
- indexcolval, header_style)
+ indexcolval, self.header_style)
gcolidx += 1
for cell in self._generate_body(gcolidx):
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 1ff0613876838..db86409adc2b0 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -288,8 +288,7 @@ def to_string(self):
if self.index:
result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
else:
- result = self.adj.adjoin(3, fmt_values).replace('\n ',
- '\n').strip()
+ result = self.adj.adjoin(3, fmt_values)
if self.header and have_header:
result = fmt_index[0] + '\n' + result
@@ -650,8 +649,6 @@ def to_string(self):
self._chk_truncate()
strcols = self._to_str_columns()
text = self.adj.adjoin(1, *strcols)
- if not self.index:
- text = text.replace('\n ', '\n').strip()
self.buf.writelines(text)
if self.should_show_dimensions:
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index b175dd540a518..f4bb53ba4f218 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -1073,6 +1073,7 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100,
percent of the cell's width.
align : {'left', 'zero',' mid'}, default 'left'
How to align the bars with the cells.
+
- 'left' : the min value starts at the left of the cell.
- 'zero' : a value of zero is located at the center of the cell.
- 'mid' : the center of the cell is at (max-min)/2, or
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index 87a0e4d5d1747..46e1b13631f07 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -1,5 +1,7 @@
""" Google BigQuery support """
+import warnings
+
def _try_import():
# since pandas is a dependency of pandas-gbq
@@ -23,7 +25,7 @@ def _try_import():
def read_gbq(query, project_id=None, index_col=None, col_order=None,
reauth=False, private_key=None, auth_local_webserver=False,
- dialect='legacy', location=None, configuration=None,
+ dialect=None, location=None, configuration=None,
verbose=None):
"""
Load data from Google BigQuery.
@@ -65,6 +67,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
*New in version 0.2.0 of pandas-gbq*.
dialect : str, default 'legacy'
+ Note: The default value is changing to 'standard' in a future verion.
+
SQL syntax dialect to use. Value can be one of:
``'legacy'``
@@ -76,6 +80,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
compliant with the SQL 2011 standard. For more information
see `BigQuery Standard SQL Reference
`__.
+
+ .. versionchanged:: 0.24.0
location : str, optional
Location where the query job should run. See the `BigQuery locations
documentation
@@ -108,6 +114,17 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
pandas.DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
"""
pandas_gbq = _try_import()
+
+ if dialect is None:
+ dialect = "legacy"
+ warnings.warn(
+ 'The default value for dialect is changing to "standard" in a '
+ 'future version of pandas-gbq. Pass in dialect="legacy" to '
+ "disable this warning.",
+ FutureWarning,
+ stacklevel=2,
+ )
+
return pandas_gbq.read_gbq(
query, project_id=project_id, index_col=index_col,
col_order=col_order, reauth=reauth, verbose=verbose,
diff --git a/pandas/io/html.py b/pandas/io/html.py
index cca27db00f48d..04534ff591a2c 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -6,7 +6,6 @@
import os
import re
import numbers
-import collections
from distutils.version import LooseVersion
@@ -14,6 +13,7 @@
from pandas.errors import EmptyDataError
from pandas.io.common import _is_url, urlopen, _validate_header_arg
from pandas.io.parsers import TextParser
+from pandas import compat
from pandas.compat import (lrange, lmap, u, string_types, iteritems,
raise_with_traceback, binary_type)
from pandas import Series
@@ -859,7 +859,7 @@ def _validate_flavor(flavor):
flavor = 'lxml', 'bs4'
elif isinstance(flavor, string_types):
flavor = flavor,
- elif isinstance(flavor, collections.Iterable):
+ elif isinstance(flavor, compat.Iterable):
if not all(isinstance(flav, string_types) for flav in flavor):
raise TypeError('Object of type {typ!r} is not an iterable of '
'strings'
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index a99014f07a6b3..6ab56c68a510a 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -103,19 +103,27 @@ def __init__(self):
self.api = pyarrow
def write(self, df, path, compression='snappy',
- coerce_timestamps='ms', **kwargs):
+ coerce_timestamps='ms', index=None, **kwargs):
self.validate_dataframe(df)
- if self._pyarrow_lt_070:
+
+ # Only validate the index if we're writing it.
+ if self._pyarrow_lt_070 and index is not False:
self._validate_write_lt_070(df)
path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
+ if index is None:
+ from_pandas_kwargs = {}
+ else:
+ from_pandas_kwargs = {'preserve_index': index}
+
if self._pyarrow_lt_060:
- table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
+ table = self.api.Table.from_pandas(df, timestamps_to_ms=True,
+ **from_pandas_kwargs)
self.api.parquet.write_table(
table, path, compression=compression, **kwargs)
else:
- table = self.api.Table.from_pandas(df)
+ table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
self.api.parquet.write_table(
table, path, compression=compression,
coerce_timestamps=coerce_timestamps, **kwargs)
@@ -197,7 +205,7 @@ def __init__(self):
)
self.api = fastparquet
- def write(self, df, path, compression='snappy', **kwargs):
+ def write(self, df, path, compression='snappy', index=None, **kwargs):
self.validate_dataframe(df)
# thriftpy/protocol/compact.py:339:
# DeprecationWarning: tostring() is deprecated.
@@ -214,8 +222,8 @@ def write(self, df, path, compression='snappy', **kwargs):
path, _, _, _ = get_filepath_or_buffer(path)
with catch_warnings(record=True):
- self.api.write(path, df,
- compression=compression, **kwargs)
+ self.api.write(path, df, compression=compression,
+ write_index=index, **kwargs)
def read(self, path, columns=None, **kwargs):
if is_s3_url(path):
@@ -234,7 +242,8 @@ def read(self, path, columns=None, **kwargs):
return parquet_file.to_pandas(columns=columns, **kwargs)
-def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
+def to_parquet(df, path, engine='auto', compression='snappy', index=None,
+ **kwargs):
"""
Write a DataFrame to the parquet format.
@@ -250,11 +259,17 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
'pyarrow' is unavailable.
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
Name of the compression to use. Use ``None`` for no compression.
+ index : bool, default None
+ If ``True``, include the dataframe's index(es) in the file output. If
+ ``False``, they will not be written to the file. If ``None``, the
+ engine's default behavior will be used.
+
+ .. versionadded 0.24.0
kwargs
Additional keyword arguments passed to the engine
"""
impl = get_engine(engine)
- return impl.write(df, path, compression=compression, **kwargs)
+ return impl.write(df, path, compression=compression, index=index, **kwargs)
def read_parquet(path, engine='auto', columns=None, **kwargs):
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 8d37bf4c84d5d..a4f1155117b12 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2727,9 +2727,6 @@ def _next_iter_line(self, row_num):
'cannot be processed in Python\'s '
'native csv library at the moment, '
'so please pass in engine=\'c\' instead')
- elif 'newline inside string' in msg:
- msg = ('EOF inside string starting with '
- 'line ' + str(row_num))
if self.skipfooter > 0:
reason = ('Error could possibly be due to '
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 6738daec9397c..9c219d7fd6997 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -160,7 +160,8 @@ def try_read(path, encoding=None):
# GH 6899
try:
with warnings.catch_warnings(record=True):
- # We want to silencce any warnings about, e.g. moved modules.
+ # We want to silence any warnings about, e.g. moved modules.
+ warnings.simplefilter("ignore", Warning)
return read_wrapper(lambda f: pkl.load(f))
except Exception:
# reg/patched pickle
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index c57b1c3e211f6..fc9e415ed38f7 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -258,7 +258,7 @@ def _tables():
try:
_table_file_open_policy_is_strict = (
tables.file._FILE_OPEN_POLICY == 'strict')
- except:
+ except AttributeError:
pass
return _table_mod
@@ -395,11 +395,11 @@ def read_hdf(path_or_buf, key=None, mode='r', **kwargs):
'contains multiple datasets.')
key = candidate_only_group._v_pathname
return store.select(key, auto_close=auto_close, **kwargs)
- except:
+ except (ValueError, TypeError):
# if there is an error, close the store
try:
store.close()
- except:
+ except AttributeError:
pass
raise
@@ -517,7 +517,7 @@ def __getattr__(self, name):
""" allow attribute access to get stores """
try:
return self.get(name)
- except:
+ except (KeyError, ClosedFileError):
pass
raise AttributeError("'%s' object has no attribute '%s'" %
(type(self).__name__, name))
@@ -675,7 +675,7 @@ def flush(self, fsync=False):
if fsync:
try:
os.fsync(self._handle.fileno())
- except:
+ except OSError:
pass
def get(self, key):
@@ -1161,7 +1161,7 @@ def get_node(self, key):
if not key.startswith('/'):
key = '/' + key
return self._handle.get_node(self.root, key)
- except:
+ except _table_mod.exceptions.NoSuchNodeError:
return None
def get_storer(self, key):
@@ -1270,7 +1270,7 @@ def _validate_format(self, format, kwargs):
# validate
try:
kwargs['format'] = _FORMAT_MAP[format.lower()]
- except:
+ except KeyError:
raise TypeError("invalid HDFStore format specified [{0}]"
.format(format))
@@ -1307,7 +1307,7 @@ def error(t):
try:
pt = _TYPE_MAP[type(value)]
- except:
+ except KeyError:
error('_TYPE_MAP')
# we are actually a table
@@ -1318,7 +1318,7 @@ def error(t):
if u('table') not in pt:
try:
return globals()[_STORER_MAP[pt]](self, group, **kwargs)
- except:
+ except KeyError:
error('_STORER_MAP')
# existing node (and must be a table)
@@ -1354,12 +1354,12 @@ def error(t):
fields = group.table._v_attrs.fields
if len(fields) == 1 and fields[0] == u('value'):
tt = u('legacy_frame')
- except:
+ except IndexError:
pass
try:
return globals()[_TABLE_MAP[tt]](self, group, **kwargs)
- except:
+ except KeyError:
error('_TABLE_MAP')
def _write_to_group(self, key, value, format, index=True, append=False,
@@ -1624,7 +1624,7 @@ def is_indexed(self):
""" return whether I am an indexed column """
try:
return getattr(self.table.cols, self.cname).is_indexed
- except:
+ except AttributeError:
False
def copy(self):
@@ -1654,9 +1654,10 @@ def convert(self, values, nan_rep, encoding, errors):
kwargs['freq'] = _ensure_decoded(self.freq)
if self.index_name is not None:
kwargs['name'] = _ensure_decoded(self.index_name)
+ # making an Index instance could throw a number of different errors
try:
self.values = Index(values, **kwargs)
- except:
+ except Exception: # noqa: E722
# if the output freq is different that what we recorded,
# it should be None (see also 'doc example part 2')
@@ -1869,7 +1870,7 @@ def create_for_block(
m = re.search(r"values_block_(\d+)", name)
if m:
name = "values_%s" % m.groups()[0]
- except:
+ except IndexError:
pass
return cls(name=name, cname=cname, **kwargs)
@@ -2232,7 +2233,7 @@ def convert(self, values, nan_rep, encoding, errors):
try:
self.data = self.data.astype(dtype, copy=False)
- except:
+ except TypeError:
self.data = self.data.astype('O', copy=False)
# convert nans / decode
@@ -2325,7 +2326,7 @@ def set_version(self):
self.version = tuple(int(x) for x in version.split('.'))
if len(self.version) == 2:
self.version = self.version + (0,)
- except:
+ except AttributeError:
self.version = (0, 0, 0)
@property
@@ -2769,7 +2770,7 @@ def write_array(self, key, value, items=None):
else:
try:
items = list(items)
- except:
+ except TypeError:
pass
ws = performance_doc % (inferred_type, key, items)
warnings.warn(ws, PerformanceWarning, stacklevel=7)
@@ -2843,7 +2844,7 @@ class SeriesFixed(GenericFixed):
def shape(self):
try:
return len(getattr(self.group, 'values')),
- except:
+ except (TypeError, AttributeError):
return None
def read(self, **kwargs):
@@ -2961,7 +2962,7 @@ def shape(self):
shape = shape[::-1]
return shape
- except:
+ except AttributeError:
return None
def read(self, start=None, stop=None, **kwargs):
@@ -3495,7 +3496,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
if axes is None:
try:
axes = _AXES_MAP[type(obj)]
- except:
+ except KeyError:
raise TypeError("cannot properly create the storer for: "
"[group->%s,value->%s]"
% (self.group._v_name, type(obj)))
@@ -3614,7 +3615,7 @@ def get_blk_items(mgr, blocks):
b, b_items = by_items.pop(items)
new_blocks.append(b)
new_blk_items.append(b_items)
- except:
+ except (IndexError, KeyError):
raise ValueError(
"cannot match existing table structure for [%s] on "
"appending data" % ','.join(pprint_thing(item) for
@@ -3642,7 +3643,7 @@ def get_blk_items(mgr, blocks):
if existing_table is not None and validate:
try:
existing_col = existing_table.values_axes[i]
- except:
+ except (IndexError, KeyError):
raise ValueError("Incompatible appended table [%s] with "
"existing table [%s]"
% (blocks, existing_table.values_axes))
@@ -4460,7 +4461,7 @@ def _get_info(info, name):
""" get/create the info for this name """
try:
idx = info[name]
- except:
+ except KeyError:
idx = info[name] = dict()
return idx
@@ -4782,7 +4783,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
)
self.coordinates = where
- except:
+ except ValueError:
pass
if self.coordinates is None:
diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index 221c07a0631d2..a5bfd5866a261 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -244,8 +244,8 @@ cdef class Parser(object):
self.parser = parser
self.header_length = self.parser.header_length
self.column_count = parser.column_count
- self.lengths = parser._column_data_lengths
- self.offsets = parser._column_data_offsets
+ self.lengths = parser.column_data_lengths()
+ self.offsets = parser.column_data_offsets()
self.byte_chunk = parser._byte_chunk
self.string_chunk = parser._string_chunk
self.row_length = parser.row_length
@@ -257,7 +257,7 @@ cdef class Parser(object):
# page indicators
self.update_next_page()
- column_types = parser.column_types
+ column_types = parser.column_types()
# map column types
for j in range(self.column_count):
@@ -375,7 +375,7 @@ cdef class Parser(object):
if done:
return True
return False
- elif self.current_page_type == page_data_type:
+ elif self.current_page_type & page_data_type == page_data_type:
self.process_byte_array_with_data(
bit_offset + subheader_pointers_offset +
self.current_row_on_page_index * self.row_length,
@@ -437,7 +437,7 @@ cdef class Parser(object):
elif column_types[j] == column_type_string:
# string
string_chunk[js, current_row] = np.array(source[start:(
- start + lngt)]).tostring().rstrip()
+ start + lngt)]).tostring().rstrip(b"\x00 ")
js += 1
self.current_row_on_page_index += 1
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index efeb306b618d1..3582f538c16bf 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -82,7 +82,6 @@ def __init__(self, path_or_buf, index=None, convert_dates=True,
self.compression = ""
self.column_names_strings = []
self.column_names = []
- self.column_types = []
self.column_formats = []
self.columns = []
@@ -90,6 +89,8 @@ def __init__(self, path_or_buf, index=None, convert_dates=True,
self._cached_page = None
self._column_data_lengths = []
self._column_data_offsets = []
+ self._column_types = []
+
self._current_row_in_file_index = 0
self._current_row_on_page_index = 0
self._current_row_in_file_index = 0
@@ -102,6 +103,19 @@ def __init__(self, path_or_buf, index=None, convert_dates=True,
self._get_properties()
self._parse_metadata()
+ def column_data_lengths(self):
+ """Return a numpy int64 array of the column data lengths"""
+ return np.asarray(self._column_data_lengths, dtype=np.int64)
+
+ def column_data_offsets(self):
+ """Return a numpy int64 array of the column offsets"""
+ return np.asarray(self._column_data_offsets, dtype=np.int64)
+
+ def column_types(self):
+ """Returns a numpy character array of the column types:
+ s (string) or d (double)"""
+ return np.asarray(self._column_types, dtype=np.dtype('S1'))
+
def close(self):
try:
self.handle.close()
@@ -287,8 +301,10 @@ def _process_page_meta(self):
pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
if self._current_page_type in pt:
self._process_page_metadata()
- return ((self._current_page_type in [256] + const.page_mix_types) or
- (self._current_page_data_subheader_pointers is not None))
+ is_data_page = self._current_page_type & const.page_data_type
+ is_mix_page = self._current_page_type in const.page_mix_types
+ return (is_data_page or is_mix_page
+ or self._current_page_data_subheader_pointers != [])
def _read_page_header(self):
bit_offset = self._page_bit_offset
@@ -503,12 +519,6 @@ def _process_columnattributes_subheader(self, offset, length):
int_len = self._int_length
column_attributes_vectors_count = (
length - 2 * int_len - 12) // (int_len + 8)
- self.column_types = np.empty(
- column_attributes_vectors_count, dtype=np.dtype('S1'))
- self._column_data_lengths = np.empty(
- column_attributes_vectors_count, dtype=np.int64)
- self._column_data_offsets = np.empty(
- column_attributes_vectors_count, dtype=np.int64)
for i in range(column_attributes_vectors_count):
col_data_offset = (offset + int_len +
const.column_data_offset_offset +
@@ -520,16 +530,13 @@ def _process_columnattributes_subheader(self, offset, length):
const.column_type_offset + i * (int_len + 8))
x = self._read_int(col_data_offset, int_len)
- self._column_data_offsets[i] = x
+ self._column_data_offsets.append(x)
x = self._read_int(col_data_len, const.column_data_length_length)
- self._column_data_lengths[i] = x
+ self._column_data_lengths.append(x)
x = self._read_int(col_types, const.column_type_length)
- if x == 1:
- self.column_types[i] = b'd'
- else:
- self.column_types[i] = b's'
+ self._column_types.append(b'd' if x == 1 else b's')
def _process_columnlist_subheader(self, offset, length):
# unknown purpose
@@ -586,7 +593,7 @@ def _process_format_subheader(self, offset, length):
col.name = self.column_names[current_column_number]
col.label = column_label
col.format = column_format
- col.ctype = self.column_types[current_column_number]
+ col.ctype = self._column_types[current_column_number]
col.length = self._column_data_lengths[current_column_number]
self.column_formats.append(column_format)
@@ -599,7 +606,7 @@ def read(self, nrows=None):
elif nrows is None:
nrows = self.row_count
- if len(self.column_types) == 0:
+ if len(self._column_types) == 0:
self.close()
raise EmptyDataError("No columns to parse from file")
@@ -610,8 +617,8 @@ def read(self, nrows=None):
if nrows > m:
nrows = m
- nd = (self.column_types == b'd').sum()
- ns = (self.column_types == b's').sum()
+ nd = self._column_types.count(b'd')
+ ns = self._column_types.count(b's')
self._string_chunk = np.empty((ns, nrows), dtype=np.object)
self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8)
@@ -639,11 +646,13 @@ def _read_next_page(self):
self._page_length))
self._read_page_header()
- if self._current_page_type == const.page_meta_type:
+ page_type = self._current_page_type
+ if page_type == const.page_meta_type:
self._process_page_metadata()
- pt = [const.page_meta_type, const.page_data_type]
- pt += [const.page_mix_types]
- if self._current_page_type not in pt:
+
+ is_data_page = page_type & const.page_data_type
+ pt = [const.page_meta_type] + const.page_mix_types
+ if not is_data_page and self._current_page_type not in pt:
return self._read_next_page()
return False
@@ -660,7 +669,7 @@ def _chunk_to_dataframe(self):
name = self.column_names[j]
- if self.column_types[j] == b'd':
+ if self._column_types[j] == b'd':
rslt[name] = self._byte_chunk[jb, :].view(
dtype=self.byte_order + 'd')
rslt[name] = np.asarray(rslt[name], dtype=np.float64)
@@ -674,7 +683,7 @@ def _chunk_to_dataframe(self):
rslt[name] = pd.to_datetime(rslt[name], unit=unit,
origin="1960-01-01")
jb += 1
- elif self.column_types[j] == b's':
+ elif self._column_types[j] == b's':
rslt[name] = self._string_chunk[js, :]
if self.convert_text and (self.encoding is not None):
rslt[name] = rslt[name].str.decode(
@@ -686,6 +695,6 @@ def _chunk_to_dataframe(self):
else:
self.close()
raise ValueError("unknown column type %s" %
- self.column_types[j])
+ self._column_types[j])
return rslt
diff --git a/pandas/json.py b/pandas/json.py
deleted file mode 100644
index 16d6580c87951..0000000000000
--- a/pandas/json.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# flake8: noqa
-
-import warnings
-warnings.warn("The pandas.json module is deprecated and will be "
- "removed in a future version. Please import from "
- "pandas.io.json instead", FutureWarning, stacklevel=2)
-from pandas._libs.json import dumps, loads
diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py
index 46ebd4217862d..5032b259e9831 100644
--- a/pandas/plotting/_compat.py
+++ b/pandas/plotting/_compat.py
@@ -29,3 +29,4 @@ def inner():
_mpl_ge_2_0_1 = _mpl_version('2.0.1', operator.ge)
_mpl_ge_2_1_0 = _mpl_version('2.1.0', operator.ge)
_mpl_ge_2_2_0 = _mpl_version('2.2.0', operator.ge)
+_mpl_ge_3_0_0 = _mpl_version('3.0.0', operator.ge)
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 4fa3b51c60ee4..77c97412bd3d7 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -32,7 +32,8 @@
from pandas.plotting._compat import (_mpl_ge_1_3_1,
_mpl_ge_1_5_0,
- _mpl_ge_2_0_0)
+ _mpl_ge_2_0_0,
+ _mpl_ge_3_0_0)
from pandas.plotting._style import (plot_params,
_get_standard_colors)
from pandas.plotting._tools import (_subplots, _flatten, table,
@@ -843,11 +844,16 @@ def _plot_colorbar(self, ax, **kwds):
# For a more detailed description of the issue
# see the following link:
# https://github.com/ipython/ipython/issues/11215
-
img = ax.collections[0]
cbar = self.fig.colorbar(img, ax=ax, **kwds)
+
+ if _mpl_ge_3_0_0():
+ # The workaround below is no longer necessary.
+ return
+
points = ax.get_position().get_points()
cbar_points = cbar.ax.get_position().get_points()
+
cbar.ax.set_position([cbar_points[0, 0],
points[0, 1],
cbar_points[1, 0] - cbar_points[0, 0],
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index bf9e14b427015..4033d46e161ad 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import sys
-from warnings import catch_warnings
import pytest
import pandas as pd
@@ -35,7 +34,7 @@ class TestPDApi(Base):
'util', 'options', 'io']
# these are already deprecated; awaiting removal
- deprecated_modules = ['parser', 'json', 'lib', 'tslib']
+ deprecated_modules = ['parser', 'lib', 'tslib']
# misc
misc = ['IndexSlice', 'NaT']
@@ -173,32 +172,25 @@ def test_get_store(self):
s.close()
-class TestJson(object):
-
- def test_deprecation_access_func(self):
- with catch_warnings(record=True):
- pd.json.dumps([])
-
-
class TestParser(object):
+ @pytest.mark.filterwarnings("ignore")
def test_deprecation_access_func(self):
- with catch_warnings(record=True):
- pd.parser.na_values
+ pd.parser.na_values
class TestLib(object):
+ @pytest.mark.filterwarnings("ignore")
def test_deprecation_access_func(self):
- with catch_warnings(record=True):
- pd.lib.infer_dtype('foo')
+ pd.lib.infer_dtype('foo')
class TestTSLib(object):
+ @pytest.mark.filterwarnings("ignore")
def test_deprecation_access_func(self):
- with catch_warnings(record=True):
- pd.tslib.Timestamp('20160101')
+ pd.tslib.Timestamp('20160101')
class TestTypes(object):
diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py
index bd4891326c751..ed80c1414dbaa 100644
--- a/pandas/tests/api/test_types.py
+++ b/pandas/tests/api/test_types.py
@@ -1,10 +1,7 @@
# -*- coding: utf-8 -*-
-
+import sys
import pytest
-from warnings import catch_warnings
-
-import pandas
from pandas.api import types
from pandas.util import testing as tm
@@ -59,7 +56,13 @@ def test_deprecated_from_api_types(self):
def test_moved_infer_dtype():
+ # del from sys.modules to ensure we try to freshly load.
+ # if this was imported from another test previously, we would
+ # not see the warning, since the import is otherwise cached.
+ sys.modules.pop("pandas.lib", None)
+
+ with tm.assert_produces_warning(FutureWarning):
+ import pandas.lib
- with catch_warnings(record=True):
e = pandas.lib.infer_dtype('foo')
assert e is not None
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index b19cc61a2999e..36bb0aca066fb 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1803,6 +1803,10 @@ def test_dt64_with_DateOffsets(klass, normalize, cls_and_kwargs):
offset_cls = getattr(pd.offsets, cls_name)
with warnings.catch_warnings(record=True):
+ # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being
+ # applied to Series or DatetimeIndex
+ # we aren't testing that here, so ignore.
+ warnings.simplefilter("ignore", PerformanceWarning)
for n in [0, 5]:
if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth',
'FY5253Quarter', 'FY5253'] and n == 0):
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
index fcfc3994a88c8..0449212713048 100644
--- a/pandas/tests/arithmetic/test_numeric.py
+++ b/pandas/tests/arithmetic/test_numeric.py
@@ -4,7 +4,6 @@
# Specifically for numeric dtypes
from decimal import Decimal
import operator
-from collections import Iterable
import pytest
import numpy as np
@@ -12,7 +11,7 @@
import pandas as pd
import pandas.util.testing as tm
-from pandas.compat import PY3
+from pandas.compat import PY3, Iterable
from pandas.core import ops
from pandas import Timedelta, Series, Index, TimedeltaIndex
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index b5f499ba27323..998c1182c013a 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -42,6 +42,12 @@ def test_constructor_empty(self):
expected = pd.Int64Index([1, 2, 3])
tm.assert_index_equal(c.categories, expected)
+ def test_constructor_empty_boolean(self):
+ # see gh-22702
+ cat = pd.Categorical([], categories=[True, False])
+ categories = sorted(cat.categories.tolist())
+ assert categories == [False, True]
+
def test_constructor_tuples(self):
values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
result = Categorical(values)
diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py
index b54ac2835bee3..d23da1565a952 100644
--- a/pandas/tests/arrays/categorical/test_indexing.py
+++ b/pandas/tests/arrays/categorical/test_indexing.py
@@ -5,7 +5,8 @@
import numpy as np
import pandas.util.testing as tm
-from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
+from pandas import Categorical, Index, CategoricalIndex, PeriodIndex, Series
+import pandas.core.common as com
from pandas.tests.arrays.categorical.common import TestCategorical
@@ -121,3 +122,27 @@ def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
tm.assert_numpy_array_equal(expected, result)
tm.assert_numpy_array_equal(exp_miss, res_miss)
+
+
+@pytest.mark.parametrize("index", [True, False])
+def test_mask_with_boolean(index):
+ s = Series(range(3))
+ idx = Categorical([True, False, True])
+ if index:
+ idx = CategoricalIndex(idx)
+
+ assert com.is_bool_indexer(idx)
+ result = s[idx]
+ expected = s[idx.astype('object')]
+ tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("index", [True, False])
+def test_mask_with_boolean_raises(index):
+ s = Series(range(3))
+ idx = Categorical([True, False, None])
+ if index:
+ idx = CategoricalIndex(idx)
+
+ with tm.assert_raises_regex(ValueError, 'NA / NaN'):
+ s[idx]
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 118b05d16ab09..eef8646e4d6d2 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -1,5 +1,4 @@
import warnings
-from warnings import catch_warnings
import operator
from itertools import product
@@ -924,12 +923,18 @@ def testit(r_idx_type, c_idx_type, index_name):
# only test dt with dt, otherwise weird joins result
args = product(['i', 'u', 's'], ['i', 'u', 's'], ('index', 'columns'))
with warnings.catch_warnings(record=True):
+ # avoid warning about comparing strings and ints
+ warnings.simplefilter("ignore", RuntimeWarning)
+
for r_idx_type, c_idx_type, index_name in args:
testit(r_idx_type, c_idx_type, index_name)
# dt with dt
args = product(['dt'], ['dt'], ('index', 'columns'))
with warnings.catch_warnings(record=True):
+ # avoid warning about comparing strings and ints
+ warnings.simplefilter("ignore", RuntimeWarning)
+
for r_idx_type, c_idx_type, index_name in args:
testit(r_idx_type, c_idx_type, index_name)
@@ -1112,13 +1117,13 @@ def test_bool_ops_with_constants(self):
exp = eval(ex)
assert res == exp
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_panel_fails(self):
- with catch_warnings(record=True):
- x = Panel(randn(3, 4, 5))
- y = Series(randn(10))
- with pytest.raises(NotImplementedError):
- self.eval('x + y',
- local_dict={'x': x, 'y': y})
+ x = Panel(randn(3, 4, 5))
+ y = Series(randn(10))
+ with pytest.raises(NotImplementedError):
+ self.eval('x + y',
+ local_dict={'x': x, 'y': y})
def test_4d_ndarray_fails(self):
x = randn(3, 4, 5, 6)
@@ -1382,6 +1387,7 @@ def test_query_inplace(self):
@pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2],
np.array([]), (1, 3)])
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_cannot_item_assign(self, invalid_target):
msg = "Cannot assign expression output to target"
expression = "a = 1 + 2"
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 55c841ba1fc46..e3d14497a38f9 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -17,7 +17,7 @@
is_dtype_equal, is_datetime64_ns_dtype,
is_datetime64_dtype, is_interval_dtype,
is_datetime64_any_dtype, is_string_dtype,
- _coerce_to_dtype)
+ _coerce_to_dtype, is_bool_dtype)
import pandas.util.testing as tm
@@ -126,6 +126,18 @@ def test_tuple_categories(self):
result = CategoricalDtype(categories)
assert all(result.categories == categories)
+ @pytest.mark.parametrize("categories, expected", [
+ ([True, False], True),
+ ([True, False, None], True),
+ ([True, False, "a", "b'"], False),
+ ([0, 1], False),
+ ])
+ def test_is_boolean(self, categories, expected):
+ cat = Categorical(categories)
+ assert cat.dtype._is_boolean is expected
+ assert is_bool_dtype(cat) is expected
+ assert is_bool_dtype(cat.dtype) is expected
+
class TestDatetimeTZDtype(Base):
diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py
index 53f92b98f022e..38d1143f3838b 100644
--- a/pandas/tests/dtypes/test_generic.py
+++ b/pandas/tests/dtypes/test_generic.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import numpy as np
import pandas as pd
from pandas.core.dtypes import generic as gt
@@ -35,6 +35,7 @@ def test_abc_types(self):
assert isinstance(pd.Series([1, 2, 3]), gt.ABCSeries)
assert isinstance(self.df, gt.ABCDataFrame)
with catch_warnings(record=True):
+ simplefilter('ignore', FutureWarning)
assert isinstance(self.df.to_panel(), gt.ABCPanel)
assert isinstance(self.sparse_series, gt.ABCSparseSeries)
assert isinstance(self.sparse_array, gt.ABCSparseArray)
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index dc330666b4b6c..76cd6aabb93ae 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -5,7 +5,7 @@
related to inference and not otherwise tested in types/test_common.py
"""
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import collections
import re
from datetime import datetime, date, timedelta, time
@@ -20,6 +20,7 @@
DatetimeIndex, TimedeltaIndex, Timestamp,
Panel, Period, Categorical, isna, Interval,
DateOffset)
+from pandas import compat
from pandas.compat import u, PY2, StringIO, lrange
from pandas.core.dtypes import inference
from pandas.core.dtypes.common import (
@@ -226,7 +227,7 @@ class OldStyleClass():
pass
c = OldStyleClass()
- assert not isinstance(c, collections.Hashable)
+ assert not isinstance(c, compat.Hashable)
assert inference.is_hashable(c)
hash(c) # this will not raise
@@ -1158,6 +1159,7 @@ def test_is_scalar_numpy_zerodim_arrays(self):
assert not is_scalar(zerodim)
assert is_scalar(lib.item_from_zerodim(zerodim))
+ @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
def test_is_scalar_numpy_arrays(self):
assert not is_scalar(np.array([]))
assert not is_scalar(np.array([[]]))
@@ -1176,6 +1178,7 @@ def test_is_scalar_pandas_containers(self):
assert not is_scalar(DataFrame())
assert not is_scalar(DataFrame([[1]]))
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
assert not is_scalar(Panel())
assert not is_scalar(Panel([[[1]]]))
assert not is_scalar(Index([]))
@@ -1210,6 +1213,7 @@ def test_nan_to_nat_conversions():
@td.skip_if_no_scipy
+@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
def test_is_scipy_sparse(spmatrix): # noqa: F811
assert is_scipy_sparse(spmatrix([[0, 1]]))
assert not is_scipy_sparse(np.array([1]))
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index ca9a2dc81fcc6..8f82db69a9213 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import pytest
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import numpy as np
from datetime import datetime
from pandas.util import testing as tm
@@ -94,6 +94,7 @@ def test_isna_isnull(self, isna_f):
# panel
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
for p in [tm.makePanel(), tm.makePeriodPanel(),
tm.add_nans(tm.makePanel())]:
result = isna_f(p)
diff --git a/pandas/tests/extension/arrow/__init__.py b/pandas/tests/extension/arrow/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py
new file mode 100644
index 0000000000000..a9da25cdd2755
--- /dev/null
+++ b/pandas/tests/extension/arrow/bool.py
@@ -0,0 +1,108 @@
+"""Rudimentary Apache Arrow-backed ExtensionArray.
+
+At the moment, just a boolean array / type is implemented.
+Eventually, we'll want to parametrize the type and support
+multiple dtypes. Not all methods are implemented yet, and the
+current implementation is not efficient.
+"""
+import copy
+import itertools
+
+import numpy as np
+import pyarrow as pa
+import pandas as pd
+from pandas.api.extensions import (
+ ExtensionDtype, ExtensionArray, take, register_extension_dtype
+)
+
+
+@register_extension_dtype
+class ArrowBoolDtype(ExtensionDtype):
+
+ type = np.bool_
+ kind = 'b'
+ name = 'arrow_bool'
+ na_value = pa.NULL
+
+ @classmethod
+ def construct_from_string(cls, string):
+ if string == cls.name:
+ return cls()
+ else:
+ raise TypeError("Cannot construct a '{}' from "
+ "'{}'".format(cls, string))
+
+ @classmethod
+ def construct_array_type(cls):
+ return ArrowBoolArray
+
+ def _is_boolean(self):
+ return True
+
+
+class ArrowBoolArray(ExtensionArray):
+ def __init__(self, values):
+ if not isinstance(values, pa.ChunkedArray):
+ raise ValueError
+
+ assert values.type == pa.bool_()
+ self._data = values
+ self._dtype = ArrowBoolDtype()
+
+ def __repr__(self):
+ return "ArrowBoolArray({})".format(repr(self._data))
+
+ @classmethod
+ def from_scalars(cls, values):
+ arr = pa.chunked_array([pa.array(np.asarray(values))])
+ return cls(arr)
+
+ @classmethod
+ def from_array(cls, arr):
+ assert isinstance(arr, pa.Array)
+ return cls(pa.chunked_array([arr]))
+
+ @classmethod
+ def _from_sequence(cls, scalars, dtype=None, copy=False):
+ return cls.from_scalars(scalars)
+
+ def __getitem__(self, item):
+ return self._data.to_pandas()[item]
+
+ def __len__(self):
+ return len(self._data)
+
+ @property
+ def dtype(self):
+ return self._dtype
+
+ @property
+ def nbytes(self):
+ return sum(x.size for chunk in self._data.chunks
+ for x in chunk.buffers()
+ if x is not None)
+
+ def isna(self):
+ return pd.isna(self._data.to_pandas())
+
+ def take(self, indices, allow_fill=False, fill_value=None):
+ data = self._data.to_pandas()
+
+ if allow_fill and fill_value is None:
+ fill_value = self.dtype.na_value
+
+ result = take(data, indices, fill_value=fill_value,
+ allow_fill=allow_fill)
+ return self._from_sequence(result, dtype=self.dtype)
+
+ def copy(self, deep=False):
+ if deep:
+ return copy.deepcopy(self._data)
+ else:
+ return copy.copy(self._data)
+
+ def _concat_same_type(cls, to_concat):
+ chunks = list(itertools.chain.from_iterable(x._data.chunks
+ for x in to_concat))
+ arr = pa.chunked_array(chunks)
+ return cls(arr)
diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py
new file mode 100644
index 0000000000000..e1afedcade3ff
--- /dev/null
+++ b/pandas/tests/extension/arrow/test_bool.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pytest
+import pandas as pd
+import pandas.util.testing as tm
+from pandas.tests.extension import base
+
+pytest.importorskip('pyarrow', minversion="0.10.0")
+
+from .bool import ArrowBoolDtype, ArrowBoolArray
+
+
+@pytest.fixture
+def dtype():
+ return ArrowBoolDtype()
+
+
+@pytest.fixture
+def data():
+ return ArrowBoolArray.from_scalars(np.random.randint(0, 2, size=100,
+ dtype=bool))
+
+
+class BaseArrowTests(object):
+ pass
+
+
+class TestDtype(BaseArrowTests, base.BaseDtypeTests):
+ def test_array_type_with_arg(self, data, dtype):
+ pytest.skip("GH-22666")
+
+
+class TestInterface(BaseArrowTests, base.BaseInterfaceTests):
+ def test_repr(self, data):
+ raise pytest.skip("TODO")
+
+
+class TestConstructors(BaseArrowTests, base.BaseConstructorsTests):
+ def test_from_dtype(self, data):
+ pytest.skip("GH-22666")
+
+
+def test_is_bool_dtype(data):
+ assert pd.api.types.is_bool_dtype(data)
+ assert pd.core.common.is_bool_indexer(data)
+ s = pd.Series(range(len(data)))
+ result = s[data]
+ expected = s[np.asarray(data)]
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
index 02b7c9527769f..8d1f1cadcc23f 100644
--- a/pandas/tests/extension/base/dtype.py
+++ b/pandas/tests/extension/base/dtype.py
@@ -1,3 +1,5 @@
+import warnings
+
import numpy as np
import pandas as pd
@@ -67,7 +69,12 @@ def test_check_dtype(self, data):
expected = pd.Series([True, True, False, False],
index=list('ABCD'))
- result = df.dtypes == str(dtype)
+ # XXX: This should probably be *fixed* not ignored.
+ # See libops.scalar_compare
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+ result = df.dtypes == str(dtype)
+
self.assert_series_equal(result, expected)
expected = pd.Series([True, True, False, False],
diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
index 4bbbb7df2f399..8e397d228a5b6 100644
--- a/pandas/tests/extension/conftest.py
+++ b/pandas/tests/extension/conftest.py
@@ -31,12 +31,24 @@ def all_data(request, data, data_missing):
@pytest.fixture
-def data_repeated():
- """Return different versions of data for count times"""
+def data_repeated(data):
+ """
+ Generate many datasets.
+
+ Parameters
+ ----------
+ data : fixture implementing `data`
+
+ Returns
+ -------
+ Callable[[int], Generator]:
+ A callable that takes a `count` argument and
+ returns a generator yielding `count` datasets.
+ """
def gen(count):
for _ in range(count):
- yield NotImplementedError
- yield gen
+ yield data
+ return gen
@pytest.fixture
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 03fdd25826b79..93b8ea786ef5b 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -30,14 +30,6 @@ def data_missing():
return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
-@pytest.fixture
-def data_repeated():
- def gen(count):
- for _ in range(count):
- yield DecimalArray(make_data())
- yield gen
-
-
@pytest.fixture
def data_for_sorting():
return DecimalArray([decimal.Decimal('1'),
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 980c245d55711..6ce0d63eb63ec 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -17,12 +17,13 @@
import numpy as np
+from pandas import compat
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.arrays import ExtensionArray
class JSONDtype(ExtensionDtype):
- type = collections.Mapping
+ type = compat.Mapping
name = 'json'
try:
na_value = collections.UserDict()
@@ -79,7 +80,7 @@ def __getitem__(self, item):
return self.data[item]
elif isinstance(item, np.ndarray) and item.dtype == 'bool':
return self._from_sequence([x for x, m in zip(self, item) if m])
- elif isinstance(item, collections.Iterable):
+ elif isinstance(item, compat.Iterable):
# fancy indexing
return type(self)([self.data[i] for i in item])
else:
@@ -91,7 +92,7 @@ def __setitem__(self, key, value):
self.data[key] = value
else:
if not isinstance(value, (type(self),
- collections.Sequence)):
+ compat.Sequence)):
# broadcast value
value = itertools.cycle([value])
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 6c6cf80c16da6..ff66f53eab6f6 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -45,15 +45,6 @@ def data_missing():
return Categorical([np.nan, 'A'])
-@pytest.fixture
-def data_repeated():
- """Return different versions of data for count times"""
- def gen(count):
- for _ in range(count):
- yield Categorical(make_data())
- yield gen
-
-
@pytest.fixture
def data_for_sorting():
return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index 57e0922a0b7d9..7aa33006dadda 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -47,14 +47,6 @@ def data_missing(dtype):
return integer_array([np.nan, 1], dtype=dtype)
-@pytest.fixture
-def data_repeated(data):
- def gen(count):
- for _ in range(count):
- yield data
- yield gen
-
-
@pytest.fixture
def data_for_sorting(dtype):
return integer_array([1, 2, 0], dtype=dtype)
diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
index 34b98f590df0d..7302c5757d144 100644
--- a/pandas/tests/extension/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -47,15 +47,6 @@ def data_missing():
return IntervalArray.from_tuples([None, (0, 1)])
-@pytest.fixture
-def data_repeated():
- """Return different versions of data for count times"""
- def gen(count):
- for _ in range(count):
- yield IntervalArray(make_data())
- yield gen
-
-
@pytest.fixture
def data_for_sorting():
return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)])
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
index fdedb93835d75..4a4ce4540b9d5 100644
--- a/pandas/tests/frame/conftest.py
+++ b/pandas/tests/frame/conftest.py
@@ -70,9 +70,10 @@ def mixed_float_frame():
Columns are ['A', 'B', 'C', 'D'].
"""
df = DataFrame(tm.getSeriesData())
- df.A = df.A.astype('float16')
+ df.A = df.A.astype('float32')
df.B = df.B.astype('float32')
- df.C = df.C.astype('float64')
+ df.C = df.C.astype('float16')
+ df.D = df.D.astype('float64')
return df
@@ -84,9 +85,10 @@ def mixed_float_frame2():
Columns are ['A', 'B', 'C', 'D'].
"""
df = DataFrame(tm.getSeriesData())
- df.D = df.D.astype('float16')
+ df.D = df.D.astype('float32')
df.C = df.C.astype('float32')
- df.B = df.B.astype('float64')
+ df.B = df.B.astype('float16')
+ df.D = df.D.astype('float64')
return df
@@ -99,10 +101,10 @@ def mixed_int_frame():
"""
df = DataFrame({k: v.astype(int)
for k, v in compat.iteritems(tm.getSeriesData())})
- df.A = df.A.astype('uint8')
- df.B = df.B.astype('int32')
- df.C = df.C.astype('int64')
- df.D = np.ones(len(df.D), dtype='uint64')
+ df.A = df.A.astype('int32')
+ df.B = np.ones(len(df.B), dtype='uint64')
+ df.C = df.C.astype('uint8')
+ df.D = df.C.astype('int64')
return df
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index f06c8336373ca..baebf414969be 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -116,8 +116,8 @@ def test_corr_int_and_boolean(self):
'a', 'b'], columns=['a', 'b'])
for meth in ['pearson', 'kendall', 'spearman']:
- # RuntimeWarning
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
result = df.corr(meth)
tm.assert_frame_equal(result, expected)
@@ -549,6 +549,8 @@ def test_mean(self):
def test_product(self):
self._check_stat_op('product', np.prod)
+ # TODO: Ensure warning isn't emitted in the first place
+ @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning")
def test_median(self):
def wrapper(x):
if isna(x).any():
@@ -559,6 +561,7 @@ def wrapper(x):
def test_min(self):
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
self._check_stat_op('min', np.min, check_dates=True)
self._check_stat_op('min', np.min, frame=self.intframe)
@@ -610,6 +613,7 @@ def test_cummax(self):
def test_max(self):
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
self._check_stat_op('max', np.max, check_dates=True)
self._check_stat_op('max', np.max, frame=self.intframe)
@@ -1123,6 +1127,8 @@ def test_stats_mixed_type(self):
self.mixed_frame.mean(1)
self.mixed_frame.skew(1)
+ # TODO: Ensure warning isn't emitted in the first place
+ @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning")
def test_median_corner(self):
def wrapper(x):
if isna(x).any():
@@ -2089,6 +2095,24 @@ def test_n_all_dtypes(self, df_main_dtypes):
df.nsmallest(2, list(set(df) - {'category_string', 'string'}))
df.nlargest(2, list(set(df) - {'category_string', 'string'}))
+ @pytest.mark.parametrize('method,expected', [
+ ('nlargest',
+ pd.DataFrame({'a': [2, 2, 2, 1], 'b': [3, 2, 1, 3]},
+ index=[2, 1, 0, 3])),
+ ('nsmallest',
+ pd.DataFrame({'a': [1, 1, 1, 2], 'b': [1, 2, 3, 1]},
+ index=[5, 4, 3, 0]))])
+ def test_duplicates_on_starter_columns(self, method, expected):
+ # regression test for #22752
+
+ df = pd.DataFrame({
+ 'a': [2, 2, 2, 1, 1, 1],
+ 'b': [1, 2, 3, 3, 2, 1]
+ })
+
+ result = getattr(df, method)(4, columns=['a', 'b'])
+ tm.assert_frame_equal(result, expected)
+
def test_n_identical_values(self):
# GH15297
df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]})
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 78a19029db567..35f2f566ef85e 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -24,8 +24,6 @@
import pandas.util.testing as tm
-from pandas.tests.frame.common import TestData
-
class SharedWithSparse(object):
"""
@@ -43,57 +41,57 @@ def _assert_series_equal(self, left, right):
"""Dispatch to series class dependent assertion"""
raise NotImplementedError
- def test_copy_index_name_checking(self):
+ def test_copy_index_name_checking(self, float_frame):
# don't want to be able to modify the index stored elsewhere after
# making a copy
for attr in ('index', 'columns'):
- ind = getattr(self.frame, attr)
+ ind = getattr(float_frame, attr)
ind.name = None
- cp = self.frame.copy()
+ cp = float_frame.copy()
getattr(cp, attr).name = 'foo'
- assert getattr(self.frame, attr).name is None
+ assert getattr(float_frame, attr).name is None
- def test_getitem_pop_assign_name(self):
- s = self.frame['A']
+ def test_getitem_pop_assign_name(self, float_frame):
+ s = float_frame['A']
assert s.name == 'A'
- s = self.frame.pop('A')
+ s = float_frame.pop('A')
assert s.name == 'A'
- s = self.frame.loc[:, 'B']
+ s = float_frame.loc[:, 'B']
assert s.name == 'B'
s2 = s.loc[:]
assert s2.name == 'B'
- def test_get_value(self):
- for idx in self.frame.index:
- for col in self.frame.columns:
+ def test_get_value(self, float_frame):
+ for idx in float_frame.index:
+ for col in float_frame.columns:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
- result = self.frame.get_value(idx, col)
- expected = self.frame[col][idx]
+ result = float_frame.get_value(idx, col)
+ expected = float_frame[col][idx]
tm.assert_almost_equal(result, expected)
- def test_add_prefix_suffix(self):
- with_prefix = self.frame.add_prefix('foo#')
- expected = pd.Index(['foo#%s' % c for c in self.frame.columns])
+ def test_add_prefix_suffix(self, float_frame):
+ with_prefix = float_frame.add_prefix('foo#')
+ expected = pd.Index(['foo#%s' % c for c in float_frame.columns])
tm.assert_index_equal(with_prefix.columns, expected)
- with_suffix = self.frame.add_suffix('#foo')
- expected = pd.Index(['%s#foo' % c for c in self.frame.columns])
+ with_suffix = float_frame.add_suffix('#foo')
+ expected = pd.Index(['%s#foo' % c for c in float_frame.columns])
tm.assert_index_equal(with_suffix.columns, expected)
- with_pct_prefix = self.frame.add_prefix('%')
- expected = pd.Index(['%{}'.format(c) for c in self.frame.columns])
+ with_pct_prefix = float_frame.add_prefix('%')
+ expected = pd.Index(['%{}'.format(c) for c in float_frame.columns])
tm.assert_index_equal(with_pct_prefix.columns, expected)
- with_pct_suffix = self.frame.add_suffix('%')
- expected = pd.Index(['{}%'.format(c) for c in self.frame.columns])
+ with_pct_suffix = float_frame.add_suffix('%')
+ expected = pd.Index(['{}%'.format(c) for c in float_frame.columns])
tm.assert_index_equal(with_pct_suffix.columns, expected)
- def test_get_axis(self):
- f = self.frame
+ def test_get_axis(self, float_frame):
+ f = float_frame
assert f._get_axis_number(0) == 0
assert f._get_axis_number(1) == 1
assert f._get_axis_number('index') == 0
@@ -118,13 +116,13 @@ def test_get_axis(self):
tm.assert_raises_regex(ValueError, 'No axis named',
f._get_axis_number, None)
- def test_keys(self):
- getkeys = self.frame.keys
- assert getkeys() is self.frame.columns
+ def test_keys(self, float_frame):
+ getkeys = float_frame.keys
+ assert getkeys() is float_frame.columns
- def test_column_contains_typeerror(self):
+ def test_column_contains_typeerror(self, float_frame):
try:
- self.frame.columns in self.frame
+ float_frame.columns in float_frame
except TypeError:
pass
@@ -146,10 +144,10 @@ def test_tab_completion(self):
assert key not in dir(df)
assert isinstance(df.__getitem__('A'), pd.DataFrame)
- def test_not_hashable(self):
+ def test_not_hashable(self, empty_frame):
df = self.klass([1])
pytest.raises(TypeError, hash, df)
- pytest.raises(TypeError, hash, self.empty)
+ pytest.raises(TypeError, hash, empty_frame)
def test_new_empty_index(self):
df1 = self.klass(randn(0, 3))
@@ -157,29 +155,29 @@ def test_new_empty_index(self):
df1.index.name = 'foo'
assert df2.index.name is None
- def test_array_interface(self):
+ def test_array_interface(self, float_frame):
with np.errstate(all='ignore'):
- result = np.sqrt(self.frame)
- assert isinstance(result, type(self.frame))
- assert result.index is self.frame.index
- assert result.columns is self.frame.columns
+ result = np.sqrt(float_frame)
+ assert isinstance(result, type(float_frame))
+ assert result.index is float_frame.index
+ assert result.columns is float_frame.columns
- self._assert_frame_equal(result, self.frame.apply(np.sqrt))
+ self._assert_frame_equal(result, float_frame.apply(np.sqrt))
- def test_get_agg_axis(self):
- cols = self.frame._get_agg_axis(0)
- assert cols is self.frame.columns
+ def test_get_agg_axis(self, float_frame):
+ cols = float_frame._get_agg_axis(0)
+ assert cols is float_frame.columns
- idx = self.frame._get_agg_axis(1)
- assert idx is self.frame.index
+ idx = float_frame._get_agg_axis(1)
+ assert idx is float_frame.index
- pytest.raises(ValueError, self.frame._get_agg_axis, 2)
+ pytest.raises(ValueError, float_frame._get_agg_axis, 2)
- def test_nonzero(self):
- assert self.empty.empty
+ def test_nonzero(self, float_frame, float_string_frame, empty_frame):
+ assert empty_frame.empty
- assert not self.frame.empty
- assert not self.mixed_frame.empty
+ assert not float_frame.empty
+ assert not float_string_frame.empty
# corner case
df = DataFrame({'A': [1., 2., 3.],
@@ -202,16 +200,16 @@ def test_items(self):
assert isinstance(v, Series)
assert (df[k] == v).all()
- def test_iter(self):
- assert tm.equalContents(list(self.frame), self.frame.columns)
+ def test_iter(self, float_frame):
+ assert tm.equalContents(list(float_frame), float_frame.columns)
- def test_iterrows(self):
- for k, v in self.frame.iterrows():
- exp = self.frame.loc[k]
+ def test_iterrows(self, float_frame, float_string_frame):
+ for k, v in float_frame.iterrows():
+ exp = float_frame.loc[k]
self._assert_series_equal(v, exp)
- for k, v in self.mixed_frame.iterrows():
- exp = self.mixed_frame.loc[k]
+ for k, v in float_string_frame.iterrows():
+ exp = float_string_frame.loc[k]
self._assert_series_equal(v, exp)
def test_iterrows_iso8601(self):
@@ -226,11 +224,11 @@ def test_iterrows_iso8601(self):
exp = s.loc[k]
self._assert_series_equal(v, exp)
- def test_itertuples(self):
- for i, tup in enumerate(self.frame.itertuples()):
+ def test_itertuples(self, float_frame):
+ for i, tup in enumerate(float_frame.itertuples()):
s = self.klass._constructor_sliced(tup[1:])
s.name = tup[0]
- expected = self.frame.iloc[i, :].reset_index(drop=True)
+ expected = float_frame.iloc[i, :].reset_index(drop=True)
self._assert_series_equal(s, expected)
df = self.klass({'floats': np.random.randn(5),
@@ -289,11 +287,11 @@ def test_sequence_like_with_categorical(self):
for c, col in df.iteritems():
str(s)
- def test_len(self):
- assert len(self.frame) == len(self.frame.index)
+ def test_len(self, float_frame):
+ assert len(float_frame) == len(float_frame.index)
- def test_values(self):
- frame = self.frame
+ def test_values(self, float_frame, float_string_frame):
+ frame = float_frame
arr = frame.values
frame_cols = frame.columns
@@ -306,20 +304,20 @@ def test_values(self):
assert value == frame[col][i]
# mixed type
- arr = self.mixed_frame[['foo', 'A']].values
+ arr = float_string_frame[['foo', 'A']].values
assert arr[0, 0] == 'bar'
- df = self.klass({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]})
+ df = self.klass({'complex': [1j, 2j, 3j], 'real': [1, 2, 3]})
arr = df.values
assert arr[0, 0] == 1j
# single block corner case
- arr = self.frame[['A', 'B']].values
- expected = self.frame.reindex(columns=['A', 'B']).values
+ arr = float_frame[['A', 'B']].values
+ expected = float_frame.reindex(columns=['A', 'B']).values
assert_almost_equal(arr, expected)
- def test_transpose(self):
- frame = self.frame
+ def test_transpose(self, float_frame):
+ frame = float_frame
dft = frame.T
for idx, series in compat.iteritems(dft):
for col, value in compat.iteritems(series):
@@ -343,8 +341,8 @@ def test_swapaxes(self):
self._assert_frame_equal(df, df.swapaxes(0, 0))
pytest.raises(ValueError, df.swapaxes, 2, 5)
- def test_axis_aliases(self):
- f = self.frame
+ def test_axis_aliases(self, float_frame):
+ f = float_frame
# reg name
expected = f.sum(axis=0)
@@ -361,23 +359,23 @@ def test_class_axis(self):
assert pydoc.getdoc(DataFrame.index)
assert pydoc.getdoc(DataFrame.columns)
- def test_more_values(self):
- values = self.mixed_frame.values
- assert values.shape[1] == len(self.mixed_frame.columns)
+ def test_more_values(self, float_string_frame):
+ values = float_string_frame.values
+ assert values.shape[1] == len(float_string_frame.columns)
- def test_repr_with_mi_nat(self):
+ def test_repr_with_mi_nat(self, float_string_frame):
df = self.klass({'X': [1, 2]},
index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']])
res = repr(df)
exp = ' X\nNaT a 1\n2013-01-01 b 2'
assert res == exp
- def test_iteritems_names(self):
- for k, v in compat.iteritems(self.mixed_frame):
+ def test_iteritems_names(self, float_string_frame):
+ for k, v in compat.iteritems(float_string_frame):
assert v.name == k
- def test_series_put_names(self):
- series = self.mixed_frame._series
+ def test_series_put_names(self, float_string_frame):
+ series = float_string_frame._series
for k, v in compat.iteritems(series):
assert v.name == k
@@ -408,36 +406,37 @@ def test_with_datetimelikes(self):
tm.assert_series_equal(result, expected)
-class TestDataFrameMisc(SharedWithSparse, TestData):
+class TestDataFrameMisc(SharedWithSparse):
klass = DataFrame
# SharedWithSparse tests use generic, klass-agnostic assertion
_assert_frame_equal = staticmethod(assert_frame_equal)
_assert_series_equal = staticmethod(assert_series_equal)
- def test_values(self):
- self.frame.values[:, 0] = 5.
- assert (self.frame.values[:, 0] == 5).all()
+ def test_values(self, float_frame):
+ float_frame.values[:, 0] = 5.
+ assert (float_frame.values[:, 0] == 5).all()
- def test_as_matrix_deprecated(self):
+ def test_as_matrix_deprecated(self, float_frame):
# GH18458
with tm.assert_produces_warning(FutureWarning):
- result = self.frame.as_matrix(columns=self.frame.columns.tolist())
- expected = self.frame.values
+ cols = float_frame.columns.tolist()
+ result = float_frame.as_matrix(columns=cols)
+ expected = float_frame.values
tm.assert_numpy_array_equal(result, expected)
- def test_deepcopy(self):
- cp = deepcopy(self.frame)
+ def test_deepcopy(self, float_frame):
+ cp = deepcopy(float_frame)
series = cp['A']
series[:] = 10
for idx, value in compat.iteritems(series):
- assert self.frame['A'][idx] != value
+ assert float_frame['A'][idx] != value
- def test_transpose_get_view(self):
- dft = self.frame.T
+ def test_transpose_get_view(self, float_frame):
+ dft = float_frame.T
dft.values[:, 5:10] = 5
- assert (self.frame.values[5:10] == 5).all()
+ assert (float_frame.values[5:10] == 5).all()
def test_inplace_return_self(self):
# re #1893
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
index 8beab3fb816df..e27115cfc255b 100644
--- a/pandas/tests/frame/test_apply.py
+++ b/pandas/tests/frame/test_apply.py
@@ -11,6 +11,8 @@
import warnings
import numpy as np
+from hypothesis import given
+from hypothesis.strategies import composite, dates, integers, sampled_from
from pandas import (notna, DataFrame, Series, MultiIndex, date_range,
Timestamp, compat)
@@ -21,25 +23,36 @@
assert_frame_equal)
import pandas.util.testing as tm
from pandas.conftest import _get_cython_table_params
-from pandas.tests.frame.common import TestData
-class TestDataFrameApply(TestData):
+@pytest.fixture
+def int_frame_const_col():
+ """
+ Fixture for DataFrame of ints which are constant per column
+
+ Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3]
+ """
+ df = DataFrame(np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
+ columns=['A', 'B', 'C'])
+ return df
- def test_apply(self):
+
+class TestDataFrameApply():
+
+ def test_apply(self, float_frame):
with np.errstate(all='ignore'):
# ufunc
- applied = self.frame.apply(np.sqrt)
- tm.assert_series_equal(np.sqrt(self.frame['A']), applied['A'])
+ applied = float_frame.apply(np.sqrt)
+ tm.assert_series_equal(np.sqrt(float_frame['A']), applied['A'])
# aggregator
- applied = self.frame.apply(np.mean)
- assert applied['A'] == np.mean(self.frame['A'])
+ applied = float_frame.apply(np.mean)
+ assert applied['A'] == np.mean(float_frame['A'])
- d = self.frame.index[0]
- applied = self.frame.apply(np.mean, axis=1)
- assert applied[d] == np.mean(self.frame.xs(d))
- assert applied.index is self.frame.index # want this
+ d = float_frame.index[0]
+ applied = float_frame.apply(np.mean, axis=1)
+ assert applied[d] == np.mean(float_frame.xs(d))
+ assert applied.index is float_frame.index # want this
# invalid axis
df = DataFrame(
@@ -63,22 +76,22 @@ def test_apply_mixed_datetimelike(self):
result = df.apply(lambda x: x, axis=1)
assert_frame_equal(result, df)
- def test_apply_empty(self):
+ def test_apply_empty(self, float_frame, empty_frame):
# empty
- applied = self.empty.apply(np.sqrt)
+ applied = empty_frame.apply(np.sqrt)
assert applied.empty
- applied = self.empty.apply(np.mean)
+ applied = empty_frame.apply(np.mean)
assert applied.empty
- no_rows = self.frame[:0]
+ no_rows = float_frame[:0]
result = no_rows.apply(lambda x: x.mean())
- expected = Series(np.nan, index=self.frame.columns)
+ expected = Series(np.nan, index=float_frame.columns)
assert_series_equal(result, expected)
- no_cols = self.frame.loc[:, []]
+ no_cols = float_frame.loc[:, []]
result = no_cols.apply(lambda x: x.mean(), axis=1)
- expected = Series(np.nan, index=self.frame.index)
+ expected = Series(np.nan, index=float_frame.index)
assert_series_equal(result, expected)
# 2476
@@ -86,12 +99,12 @@ def test_apply_empty(self):
rs = xp.apply(lambda x: x['a'], axis=1)
assert_frame_equal(xp, rs)
- def test_apply_with_reduce_empty(self):
+ def test_apply_with_reduce_empty(self, empty_frame):
# reduce with an empty DataFrame
x = []
- result = self.empty.apply(x.append, axis=1, result_type='expand')
- assert_frame_equal(result, self.empty)
- result = self.empty.apply(x.append, axis=1, result_type='reduce')
+ result = empty_frame.apply(x.append, axis=1, result_type='expand')
+ assert_frame_equal(result, empty_frame)
+ result = empty_frame.apply(x.append, axis=1, result_type='reduce')
assert_series_equal(result, Series(
[], index=pd.Index([], dtype=object)))
@@ -105,10 +118,10 @@ def test_apply_with_reduce_empty(self):
# Ensure that x.append hasn't been called
assert x == []
- def test_apply_deprecate_reduce(self):
- with warnings.catch_warnings(record=True):
- x = []
- self.empty.apply(x.append, axis=1, result_type='reduce')
+ def test_apply_deprecate_reduce(self, empty_frame):
+ x = []
+ with tm.assert_produces_warning(FutureWarning):
+ empty_frame.apply(x.append, axis=1, reduce=True)
def test_apply_standard_nonunique(self):
df = DataFrame(
@@ -128,110 +141,98 @@ def test_apply_standard_nonunique(self):
pytest.param([], {'numeric_only': True}, id='optional_kwds'),
pytest.param([1, None], {'numeric_only': True}, id='args_and_kwds')
])
- def test_apply_with_string_funcs(self, func, args, kwds):
- result = self.frame.apply(func, *args, **kwds)
- expected = getattr(self.frame, func)(*args, **kwds)
+ def test_apply_with_string_funcs(self, float_frame, func, args, kwds):
+ result = float_frame.apply(func, *args, **kwds)
+ expected = getattr(float_frame, func)(*args, **kwds)
tm.assert_series_equal(result, expected)
- def test_apply_broadcast_deprecated(self):
+ def test_apply_broadcast_deprecated(self, float_frame):
with tm.assert_produces_warning(FutureWarning):
- self.frame.apply(np.mean, broadcast=True)
+ float_frame.apply(np.mean, broadcast=True)
- def test_apply_broadcast(self):
+ def test_apply_broadcast(self, float_frame, int_frame_const_col):
# scalars
- result = self.frame.apply(np.mean, result_type='broadcast')
- expected = DataFrame([self.frame.mean()], index=self.frame.index)
+ result = float_frame.apply(np.mean, result_type='broadcast')
+ expected = DataFrame([float_frame.mean()], index=float_frame.index)
tm.assert_frame_equal(result, expected)
- result = self.frame.apply(np.mean, axis=1, result_type='broadcast')
- m = self.frame.mean(axis=1)
- expected = DataFrame({c: m for c in self.frame.columns})
+ result = float_frame.apply(np.mean, axis=1, result_type='broadcast')
+ m = float_frame.mean(axis=1)
+ expected = DataFrame({c: m for c in float_frame.columns})
tm.assert_frame_equal(result, expected)
# lists
- result = self.frame.apply(
- lambda x: list(range(len(self.frame.columns))),
+ result = float_frame.apply(
+ lambda x: list(range(len(float_frame.columns))),
axis=1,
result_type='broadcast')
- m = list(range(len(self.frame.columns)))
- expected = DataFrame([m] * len(self.frame.index),
+ m = list(range(len(float_frame.columns)))
+ expected = DataFrame([m] * len(float_frame.index),
dtype='float64',
- index=self.frame.index,
- columns=self.frame.columns)
+ index=float_frame.index,
+ columns=float_frame.columns)
tm.assert_frame_equal(result, expected)
- result = self.frame.apply(lambda x: list(range(len(self.frame.index))),
- result_type='broadcast')
- m = list(range(len(self.frame.index)))
- expected = DataFrame({c: m for c in self.frame.columns},
+ result = float_frame.apply(lambda x:
+ list(range(len(float_frame.index))),
+ result_type='broadcast')
+ m = list(range(len(float_frame.index)))
+ expected = DataFrame({c: m for c in float_frame.columns},
dtype='float64',
- index=self.frame.index)
+ index=float_frame.index)
tm.assert_frame_equal(result, expected)
# preserve columns
- df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1,
- columns=list('ABC'))
- result = df.apply(lambda x: [1, 2, 3],
- axis=1,
- result_type='broadcast')
+ df = int_frame_const_col
+ result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast')
tm.assert_frame_equal(result, df)
- df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1,
- columns=list('ABC'))
+ df = int_frame_const_col
result = df.apply(lambda x: Series([1, 2, 3], index=list('abc')),
- axis=1,
- result_type='broadcast')
+ axis=1, result_type='broadcast')
expected = df.copy()
tm.assert_frame_equal(result, expected)
- def test_apply_broadcast_error(self):
- df = DataFrame(
- np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
- columns=['A', 'B', 'C'])
+ def test_apply_broadcast_error(self, int_frame_const_col):
+ df = int_frame_const_col
# > 1 ndim
with pytest.raises(ValueError):
df.apply(lambda x: np.array([1, 2]).reshape(-1, 2),
- axis=1,
- result_type='broadcast')
+ axis=1, result_type='broadcast')
# cannot broadcast
with pytest.raises(ValueError):
- df.apply(lambda x: [1, 2],
- axis=1,
- result_type='broadcast')
+ df.apply(lambda x: [1, 2], axis=1, result_type='broadcast')
with pytest.raises(ValueError):
- df.apply(lambda x: Series([1, 2]),
- axis=1,
- result_type='broadcast')
+ df.apply(lambda x: Series([1, 2]), axis=1, result_type='broadcast')
- def test_apply_raw(self):
- result0 = self.frame.apply(np.mean, raw=True)
- result1 = self.frame.apply(np.mean, axis=1, raw=True)
+ def test_apply_raw(self, float_frame):
+ result0 = float_frame.apply(np.mean, raw=True)
+ result1 = float_frame.apply(np.mean, axis=1, raw=True)
- expected0 = self.frame.apply(lambda x: x.values.mean())
- expected1 = self.frame.apply(lambda x: x.values.mean(), axis=1)
+ expected0 = float_frame.apply(lambda x: x.values.mean())
+ expected1 = float_frame.apply(lambda x: x.values.mean(), axis=1)
assert_series_equal(result0, expected0)
assert_series_equal(result1, expected1)
# no reduction
- result = self.frame.apply(lambda x: x * 2, raw=True)
- expected = self.frame * 2
+ result = float_frame.apply(lambda x: x * 2, raw=True)
+ expected = float_frame * 2
assert_frame_equal(result, expected)
- def test_apply_axis1(self):
- d = self.frame.index[0]
- tapplied = self.frame.apply(np.mean, axis=1)
- assert tapplied[d] == np.mean(self.frame.xs(d))
+ def test_apply_axis1(self, float_frame):
+ d = float_frame.index[0]
+ tapplied = float_frame.apply(np.mean, axis=1)
+ assert tapplied[d] == np.mean(float_frame.xs(d))
- def test_apply_ignore_failures(self):
- result = frame_apply(self.mixed_frame,
- np.mean, 0,
+ def test_apply_ignore_failures(self, float_string_frame):
+ result = frame_apply(float_string_frame, np.mean, 0,
ignore_failures=True).apply_standard()
- expected = self.mixed_frame._get_numeric_data().apply(np.mean)
+ expected = float_string_frame._get_numeric_data().apply(np.mean)
assert_series_equal(result, expected)
def test_apply_mixed_dtype_corner(self):
@@ -259,6 +260,7 @@ def test_apply_empty_infer_type(self):
def _check(df, f):
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
test_res = f(np.array([], dtype='f8'))
is_reduction = not isinstance(test_res, np.ndarray)
@@ -285,7 +287,7 @@ def _checkit(axis=0, raw=False):
result = no_cols.apply(lambda x: x.mean(), result_type='broadcast')
assert isinstance(result, DataFrame)
- def test_apply_with_args_kwds(self):
+ def test_apply_with_args_kwds(self, float_frame):
def add_some(x, howmuch=0):
return x + howmuch
@@ -295,26 +297,26 @@ def agg_and_add(x, howmuch=0):
def subtract_and_divide(x, sub, divide=1):
return (x - sub) / divide
- result = self.frame.apply(add_some, howmuch=2)
- exp = self.frame.apply(lambda x: x + 2)
+ result = float_frame.apply(add_some, howmuch=2)
+ exp = float_frame.apply(lambda x: x + 2)
assert_frame_equal(result, exp)
- result = self.frame.apply(agg_and_add, howmuch=2)
- exp = self.frame.apply(lambda x: x.mean() + 2)
+ result = float_frame.apply(agg_and_add, howmuch=2)
+ exp = float_frame.apply(lambda x: x.mean() + 2)
assert_series_equal(result, exp)
- res = self.frame.apply(subtract_and_divide, args=(2,), divide=2)
- exp = self.frame.apply(lambda x: (x - 2.) / 2.)
+ res = float_frame.apply(subtract_and_divide, args=(2,), divide=2)
+ exp = float_frame.apply(lambda x: (x - 2.) / 2.)
assert_frame_equal(res, exp)
- def test_apply_yield_list(self):
- result = self.frame.apply(list)
- assert_frame_equal(result, self.frame)
+ def test_apply_yield_list(self, float_frame):
+ result = float_frame.apply(list)
+ assert_frame_equal(result, float_frame)
- def test_apply_reduce_Series(self):
- self.frame.loc[::2, 'A'] = np.nan
- expected = self.frame.mean(1)
- result = self.frame.apply(np.mean, axis=1)
+ def test_apply_reduce_Series(self, float_frame):
+ float_frame.loc[::2, 'A'] = np.nan
+ expected = float_frame.mean(1)
+ result = float_frame.apply(np.mean, axis=1)
assert_series_equal(result, expected)
def test_apply_differently_indexed(self):
@@ -405,31 +407,31 @@ def test_apply_convert_objects(self):
result = data.apply(lambda x: x, axis=1)
assert_frame_equal(result._convert(datetime=True), data)
- def test_apply_attach_name(self):
- result = self.frame.apply(lambda x: x.name)
- expected = Series(self.frame.columns, index=self.frame.columns)
+ def test_apply_attach_name(self, float_frame):
+ result = float_frame.apply(lambda x: x.name)
+ expected = Series(float_frame.columns, index=float_frame.columns)
assert_series_equal(result, expected)
- result = self.frame.apply(lambda x: x.name, axis=1)
- expected = Series(self.frame.index, index=self.frame.index)
+ result = float_frame.apply(lambda x: x.name, axis=1)
+ expected = Series(float_frame.index, index=float_frame.index)
assert_series_equal(result, expected)
# non-reductions
- result = self.frame.apply(lambda x: np.repeat(x.name, len(x)))
- expected = DataFrame(np.tile(self.frame.columns,
- (len(self.frame.index), 1)),
- index=self.frame.index,
- columns=self.frame.columns)
+ result = float_frame.apply(lambda x: np.repeat(x.name, len(x)))
+ expected = DataFrame(np.tile(float_frame.columns,
+ (len(float_frame.index), 1)),
+ index=float_frame.index,
+ columns=float_frame.columns)
assert_frame_equal(result, expected)
- result = self.frame.apply(lambda x: np.repeat(x.name, len(x)),
- axis=1)
- expected = Series(np.repeat(t[0], len(self.frame.columns))
- for t in self.frame.itertuples())
- expected.index = self.frame.index
+ result = float_frame.apply(lambda x: np.repeat(x.name, len(x)),
+ axis=1)
+ expected = Series(np.repeat(t[0], len(float_frame.columns))
+ for t in float_frame.itertuples())
+ expected.index = float_frame.index
assert_series_equal(result, expected)
- def test_apply_multi_index(self):
+ def test_apply_multi_index(self, float_frame):
index = MultiIndex.from_arrays([['a', 'a', 'b'], ['c', 'd', 'd']])
s = DataFrame([[1, 2], [3, 4], [5, 6]],
index=index,
@@ -460,13 +462,13 @@ def test_apply_dict(self):
assert_frame_equal(reduce_false, df)
assert_series_equal(reduce_none, dicts)
- def test_applymap(self):
- applied = self.frame.applymap(lambda x: x * 2)
- tm.assert_frame_equal(applied, self.frame * 2)
- self.frame.applymap(type)
+ def test_applymap(self, float_frame):
+ applied = float_frame.applymap(lambda x: x * 2)
+ tm.assert_frame_equal(applied, float_frame * 2)
+ float_frame.applymap(type)
# gh-465: function returning tuples
- result = self.frame.applymap(lambda x: (x, x))
+ result = float_frame.applymap(lambda x: (x, x))
assert isinstance(result['A'][0], tuple)
# gh-2909: object conversion to float in constructor?
@@ -718,33 +720,27 @@ def test_consistent_coerce_for_shapes(self):
expected = Series([[1, 2] for t in df.itertuples()])
assert_series_equal(result, expected)
- def test_consistent_names(self):
+ def test_consistent_names(self, int_frame_const_col):
# if a Series is returned, we should use the resulting index names
- df = DataFrame(
- np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
- columns=['A', 'B', 'C'])
+ df = int_frame_const_col
result = df.apply(lambda x: Series([1, 2, 3],
index=['test', 'other', 'cols']),
axis=1)
- expected = DataFrame(
- np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
- columns=['test', 'other', 'cols'])
+ expected = int_frame_const_col.rename(columns={'A': 'test',
+ 'B': 'other',
+ 'C': 'cols'})
assert_frame_equal(result, expected)
- result = df.apply(
- lambda x: pd.Series([1, 2], index=['test', 'other']), axis=1)
- expected = DataFrame(
- np.tile(np.arange(2, dtype='int64'), 6).reshape(6, -1) + 1,
- columns=['test', 'other'])
+ result = df.apply(lambda x: Series([1, 2], index=['test', 'other']),
+ axis=1)
+ expected = expected[['test', 'other']]
assert_frame_equal(result, expected)
- def test_result_type(self):
+ def test_result_type(self, int_frame_const_col):
# result_type should be consistent no matter which
# path we take in the code
- df = DataFrame(
- np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
- columns=['A', 'B', 'C'])
+ df = int_frame_const_col
result = df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand')
expected = df.copy()
@@ -762,11 +758,8 @@ def test_result_type(self):
assert_frame_equal(result, expected)
columns = ['other', 'col', 'names']
- result = df.apply(
- lambda x: pd.Series([1, 2, 3],
- index=columns),
- axis=1,
- result_type='broadcast')
+ result = df.apply(lambda x: Series([1, 2, 3], index=columns),
+ axis=1, result_type='broadcast')
expected = df.copy()
assert_frame_equal(result, expected)
@@ -777,24 +770,18 @@ def test_result_type(self):
# series result with other index
columns = ['other', 'col', 'names']
- result = df.apply(
- lambda x: pd.Series([1, 2, 3], index=columns),
- axis=1)
+ result = df.apply(lambda x: Series([1, 2, 3], index=columns), axis=1)
expected = df.copy()
expected.columns = columns
assert_frame_equal(result, expected)
@pytest.mark.parametrize("result_type", ['foo', 1])
- def test_result_type_error(self, result_type):
+ def test_result_type_error(self, result_type, int_frame_const_col):
# allowed result_type
- df = DataFrame(
- np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
- columns=['A', 'B', 'C'])
+ df = int_frame_const_col
with pytest.raises(ValueError):
- df.apply(lambda x: [1, 2, 3],
- axis=1,
- result_type=result_type)
+ df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
@pytest.mark.parametrize(
"box",
@@ -802,19 +789,17 @@ def test_result_type_error(self, result_type):
lambda x: tuple(x),
lambda x: np.array(x, dtype='int64')],
ids=['list', 'tuple', 'array'])
- def test_consistency_for_boxed(self, box):
+ def test_consistency_for_boxed(self, box, int_frame_const_col):
# passing an array or list should not affect the output shape
- df = DataFrame(
- np.tile(np.arange(3, dtype='int64'), 6).reshape(6, -1) + 1,
- columns=['A', 'B', 'C'])
+ df = int_frame_const_col
result = df.apply(lambda x: box([1, 2]), axis=1)
expected = Series([box([1, 2]) for t in df.itertuples()])
assert_series_equal(result, expected)
result = df.apply(lambda x: box([1, 2]), axis=1, result_type='expand')
- expected = DataFrame(
- np.tile(np.arange(2, dtype='int64'), 6).reshape(6, -1) + 1)
+ expected = int_frame_const_col[['A', 'B']].rename(columns={'A': 0,
+ 'B': 1})
assert_frame_equal(result, expected)
@@ -837,71 +822,71 @@ def zip_frames(frames, axis=1):
return pd.DataFrame(zipped)
-class TestDataFrameAggregate(TestData):
+class TestDataFrameAggregate():
- def test_agg_transform(self, axis):
+ def test_agg_transform(self, axis, float_frame):
other_axis = 1 if axis in {0, 'index'} else 0
with np.errstate(all='ignore'):
- f_abs = np.abs(self.frame)
- f_sqrt = np.sqrt(self.frame)
+ f_abs = np.abs(float_frame)
+ f_sqrt = np.sqrt(float_frame)
# ufunc
- result = self.frame.transform(np.sqrt, axis=axis)
+ result = float_frame.transform(np.sqrt, axis=axis)
expected = f_sqrt.copy()
assert_frame_equal(result, expected)
- result = self.frame.apply(np.sqrt, axis=axis)
+ result = float_frame.apply(np.sqrt, axis=axis)
assert_frame_equal(result, expected)
- result = self.frame.transform(np.sqrt, axis=axis)
+ result = float_frame.transform(np.sqrt, axis=axis)
assert_frame_equal(result, expected)
# list-like
- result = self.frame.apply([np.sqrt], axis=axis)
+ result = float_frame.apply([np.sqrt], axis=axis)
expected = f_sqrt.copy()
if axis in {0, 'index'}:
expected.columns = pd.MultiIndex.from_product(
- [self.frame.columns, ['sqrt']])
+ [float_frame.columns, ['sqrt']])
else:
expected.index = pd.MultiIndex.from_product(
- [self.frame.index, ['sqrt']])
+ [float_frame.index, ['sqrt']])
assert_frame_equal(result, expected)
- result = self.frame.transform([np.sqrt], axis=axis)
+ result = float_frame.transform([np.sqrt], axis=axis)
assert_frame_equal(result, expected)
# multiple items in list
# these are in the order as if we are applying both
# functions per series and then concatting
- result = self.frame.apply([np.abs, np.sqrt], axis=axis)
+ result = float_frame.apply([np.abs, np.sqrt], axis=axis)
expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
if axis in {0, 'index'}:
expected.columns = pd.MultiIndex.from_product(
- [self.frame.columns, ['absolute', 'sqrt']])
+ [float_frame.columns, ['absolute', 'sqrt']])
else:
expected.index = pd.MultiIndex.from_product(
- [self.frame.index, ['absolute', 'sqrt']])
+ [float_frame.index, ['absolute', 'sqrt']])
assert_frame_equal(result, expected)
- result = self.frame.transform([np.abs, 'sqrt'], axis=axis)
+ result = float_frame.transform([np.abs, 'sqrt'], axis=axis)
assert_frame_equal(result, expected)
- def test_transform_and_agg_err(self, axis):
+ def test_transform_and_agg_err(self, axis, float_frame):
# cannot both transform and agg
def f():
- self.frame.transform(['max', 'min'], axis=axis)
+ float_frame.transform(['max', 'min'], axis=axis)
pytest.raises(ValueError, f)
def f():
with np.errstate(all='ignore'):
- self.frame.agg(['max', 'sqrt'], axis=axis)
+ float_frame.agg(['max', 'sqrt'], axis=axis)
pytest.raises(ValueError, f)
def f():
with np.errstate(all='ignore'):
- self.frame.transform(['max', 'sqrt'], axis=axis)
+ float_frame.transform(['max', 'sqrt'], axis=axis)
pytest.raises(ValueError, f)
df = pd.DataFrame({'A': range(5), 'B': 5})
@@ -971,49 +956,49 @@ def test_agg_dict_nested_renaming_depr(self):
df.agg({'A': {'foo': 'min'},
'B': {'bar': 'max'}})
- def test_agg_reduce(self, axis):
+ def test_agg_reduce(self, axis, float_frame):
other_axis = 1 if axis in {0, 'index'} else 0
- name1, name2 = self.frame.axes[other_axis].unique()[:2].sort_values()
+ name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
# all reducers
- expected = pd.concat([self.frame.mean(axis=axis),
- self.frame.max(axis=axis),
- self.frame.sum(axis=axis),
+ expected = pd.concat([float_frame.mean(axis=axis),
+ float_frame.max(axis=axis),
+ float_frame.sum(axis=axis),
], axis=1)
expected.columns = ['mean', 'max', 'sum']
expected = expected.T if axis in {0, 'index'} else expected
- result = self.frame.agg(['mean', 'max', 'sum'], axis=axis)
+ result = float_frame.agg(['mean', 'max', 'sum'], axis=axis)
assert_frame_equal(result, expected)
# dict input with scalars
func = OrderedDict([(name1, 'mean'), (name2, 'sum')])
- result = self.frame.agg(func, axis=axis)
- expected = Series([self.frame.loc(other_axis)[name1].mean(),
- self.frame.loc(other_axis)[name2].sum()],
+ result = float_frame.agg(func, axis=axis)
+ expected = Series([float_frame.loc(other_axis)[name1].mean(),
+ float_frame.loc(other_axis)[name2].sum()],
index=[name1, name2])
assert_series_equal(result, expected)
# dict input with lists
func = OrderedDict([(name1, ['mean']), (name2, ['sum'])])
- result = self.frame.agg(func, axis=axis)
+ result = float_frame.agg(func, axis=axis)
expected = DataFrame({
- name1: Series([self.frame.loc(other_axis)[name1].mean()],
+ name1: Series([float_frame.loc(other_axis)[name1].mean()],
index=['mean']),
- name2: Series([self.frame.loc(other_axis)[name2].sum()],
+ name2: Series([float_frame.loc(other_axis)[name2].sum()],
index=['sum'])})
expected = expected.T if axis in {1, 'columns'} else expected
assert_frame_equal(result, expected)
# dict input with lists with multiple
func = OrderedDict([(name1, ['mean', 'sum']), (name2, ['sum', 'max'])])
- result = self.frame.agg(func, axis=axis)
+ result = float_frame.agg(func, axis=axis)
expected = DataFrame(OrderedDict([
- (name1, Series([self.frame.loc(other_axis)[name1].mean(),
- self.frame.loc(other_axis)[name1].sum()],
+ (name1, Series([float_frame.loc(other_axis)[name1].mean(),
+ float_frame.loc(other_axis)[name1].sum()],
index=['mean', 'sum'])),
- (name2, Series([self.frame.loc(other_axis)[name2].sum(),
- self.frame.loc(other_axis)[name2].max()],
+ (name2, Series([float_frame.loc(other_axis)[name2].sum(),
+ float_frame.loc(other_axis)[name2].max()],
index=['sum', 'max'])),
]))
expected = expected.T if axis in {1, 'columns'} else expected
@@ -1155,3 +1140,24 @@ def test_agg_cython_table_raises(self, df, func, expected, axis):
# GH21224
with pytest.raises(expected):
df.agg(func, axis=axis)
+
+ @composite
+ def indices(draw, max_length=5):
+ date = draw(
+ dates(
+ min_value=Timestamp.min.ceil("D").to_pydatetime().date(),
+ max_value=Timestamp.max.floor("D").to_pydatetime().date(),
+ ).map(Timestamp)
+ )
+ periods = draw(integers(0, max_length))
+ freq = draw(sampled_from(list("BDHTS")))
+ dr = date_range(date, periods=periods, freq=freq)
+ return pd.DatetimeIndex(list(dr))
+
+ @given(index=indices(5), num_columns=integers(0, 5))
+ def test_frequency_is_original(self, index, num_columns):
+ # GH22150
+ original = index.copy()
+ df = DataFrame(True, index=index, columns=range(num_columns))
+ df.apply(lambda x: x)
+ assert index.freq == original.freq
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 9c61f13b944ea..2eb11c3a2e2f7 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -4,8 +4,7 @@
import pytest
import numpy as np
-from pandas.compat import range, PY3
-import pandas.io.formats.printing as printing
+from pandas.compat import range
import pandas as pd
import pandas.util.testing as tm
@@ -127,132 +126,88 @@ def test_df_add_flex_filled_mixed_dtypes(self):
'B': ser * 2})
tm.assert_frame_equal(result, expected)
- def test_arith_flex_frame(self):
- seriesd = tm.getSeriesData()
- frame = pd.DataFrame(seriesd).copy()
-
- mixed_float = pd.DataFrame({'A': frame['A'].copy().astype('float32'),
- 'B': frame['B'].copy().astype('float32'),
- 'C': frame['C'].copy().astype('float16'),
- 'D': frame['D'].copy().astype('float64')})
-
- intframe = pd.DataFrame({k: v.astype(int)
- for k, v in seriesd.items()})
- mixed_int = pd.DataFrame({'A': intframe['A'].copy().astype('int32'),
- 'B': np.ones(len(intframe), dtype='uint64'),
- 'C': intframe['C'].copy().astype('uint8'),
- 'D': intframe['D'].copy().astype('int64')})
-
- # force these all to int64 to avoid platform testing issues
- intframe = pd.DataFrame({c: s for c, s in intframe.items()},
- dtype=np.int64)
-
- ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod']
- if not PY3:
- aliases = {}
- else:
- aliases = {'div': 'truediv'}
-
- for op in ops:
- try:
- alias = aliases.get(op, op)
- f = getattr(operator, alias)
- result = getattr(frame, op)(2 * frame)
- exp = f(frame, 2 * frame)
- tm.assert_frame_equal(result, exp)
-
- # vs mix float
- result = getattr(mixed_float, op)(2 * mixed_float)
- exp = f(mixed_float, 2 * mixed_float)
- tm.assert_frame_equal(result, exp)
- _check_mixed_float(result, dtype=dict(C=None))
-
- # vs mix int
- if op in ['add', 'sub', 'mul']:
- result = getattr(mixed_int, op)(2 + mixed_int)
- exp = f(mixed_int, 2 + mixed_int)
-
- # no overflow in the uint
- dtype = None
- if op in ['sub']:
- dtype = dict(B='uint64', C=None)
- elif op in ['add', 'mul']:
- dtype = dict(C=None)
- tm.assert_frame_equal(result, exp)
- _check_mixed_int(result, dtype=dtype)
-
- # rops
- r_f = lambda x, y: f(y, x)
- result = getattr(frame, 'r' + op)(2 * frame)
- exp = r_f(frame, 2 * frame)
- tm.assert_frame_equal(result, exp)
-
- # vs mix float
- result = getattr(mixed_float, op)(2 * mixed_float)
- exp = f(mixed_float, 2 * mixed_float)
- tm.assert_frame_equal(result, exp)
- _check_mixed_float(result, dtype=dict(C=None))
-
- result = getattr(intframe, op)(2 * intframe)
- exp = f(intframe, 2 * intframe)
- tm.assert_frame_equal(result, exp)
-
- # vs mix int
- if op in ['add', 'sub', 'mul']:
- result = getattr(mixed_int, op)(2 + mixed_int)
- exp = f(mixed_int, 2 + mixed_int)
-
- # no overflow in the uint
- dtype = None
- if op in ['sub']:
- dtype = dict(B='uint64', C=None)
- elif op in ['add', 'mul']:
- dtype = dict(C=None)
- tm.assert_frame_equal(result, exp)
- _check_mixed_int(result, dtype=dtype)
- except:
- printing.pprint_thing("Failing operation %r" % op)
- raise
-
- # ndim >= 3
- ndim_5 = np.ones(frame.shape + (3, 4, 5))
+ def test_arith_flex_frame(self, all_arithmetic_operators, float_frame,
+ mixed_float_frame):
+ # one instance of parametrized fixture
+ op = all_arithmetic_operators
+
+ def f(x, y):
+ # r-versions not in operator-stdlib; get op without "r" and invert
+ if op.startswith('__r'):
+ return getattr(operator, op.replace('__r', '__'))(y, x)
+ return getattr(operator, op)(x, y)
+
+ result = getattr(float_frame, op)(2 * float_frame)
+ exp = f(float_frame, 2 * float_frame)
+ tm.assert_frame_equal(result, exp)
+
+ # vs mix float
+ result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
+ exp = f(mixed_float_frame, 2 * mixed_float_frame)
+ tm.assert_frame_equal(result, exp)
+ _check_mixed_float(result, dtype=dict(C=None))
+
+ @pytest.mark.parametrize('op', ['__add__', '__sub__', '__mul__'])
+ def test_arith_flex_frame_mixed(self, op, int_frame, mixed_int_frame,
+ mixed_float_frame):
+ f = getattr(operator, op)
+
+ # vs mix int
+ result = getattr(mixed_int_frame, op)(2 + mixed_int_frame)
+ exp = f(mixed_int_frame, 2 + mixed_int_frame)
+
+ # no overflow in the uint
+ dtype = None
+ if op in ['__sub__']:
+ dtype = dict(B='uint64', C=None)
+ elif op in ['__add__', '__mul__']:
+ dtype = dict(C=None)
+ tm.assert_frame_equal(result, exp)
+ _check_mixed_int(result, dtype=dtype)
+
+ # vs mix float
+ result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
+ exp = f(mixed_float_frame, 2 * mixed_float_frame)
+ tm.assert_frame_equal(result, exp)
+ _check_mixed_float(result, dtype=dict(C=None))
+
+ # vs plain int
+ result = getattr(int_frame, op)(2 * int_frame)
+ exp = f(int_frame, 2 * int_frame)
+ tm.assert_frame_equal(result, exp)
+
+ def test_arith_flex_frame_raise(self, all_arithmetic_operators,
+ float_frame):
+ # one instance of parametrized fixture
+ op = all_arithmetic_operators
+
+ # Check that arrays with dim >= 3 raise
+ for dim in range(3, 6):
+ arr = np.ones((1,) * dim)
msg = "Unable to coerce to Series/DataFrame"
with tm.assert_raises_regex(ValueError, msg):
- f(frame, ndim_5)
+ getattr(float_frame, op)(arr)
- with tm.assert_raises_regex(ValueError, msg):
- getattr(frame, op)(ndim_5)
-
- # res_add = frame.add(frame)
- # res_sub = frame.sub(frame)
- # res_mul = frame.mul(frame)
- # res_div = frame.div(2 * frame)
-
- # tm.assert_frame_equal(res_add, frame + frame)
- # tm.assert_frame_equal(res_sub, frame - frame)
- # tm.assert_frame_equal(res_mul, frame * frame)
- # tm.assert_frame_equal(res_div, frame / (2 * frame))
+ def test_arith_flex_frame_corner(self, float_frame):
- const_add = frame.add(1)
- tm.assert_frame_equal(const_add, frame + 1)
+ const_add = float_frame.add(1)
+ tm.assert_frame_equal(const_add, float_frame + 1)
# corner cases
- result = frame.add(frame[:0])
- tm.assert_frame_equal(result, frame * np.nan)
+ result = float_frame.add(float_frame[:0])
+ tm.assert_frame_equal(result, float_frame * np.nan)
+
+ result = float_frame[:0].add(float_frame)
+ tm.assert_frame_equal(result, float_frame * np.nan)
- result = frame[:0].add(frame)
- tm.assert_frame_equal(result, frame * np.nan)
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
- frame.add(frame.iloc[0], fill_value=3)
+ float_frame.add(float_frame.iloc[0], fill_value=3)
+
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
- frame.add(frame.iloc[0], axis='index', fill_value=3)
+ float_frame.add(float_frame.iloc[0], axis='index', fill_value=3)
- def test_arith_flex_series(self):
- arr = np.array([[1., 2., 3.],
- [4., 5., 6.],
- [7., 8., 9.]])
- df = pd.DataFrame(arr, columns=['one', 'two', 'three'],
- index=['a', 'b', 'c'])
+ def test_arith_flex_series(self, simple_frame):
+ df = simple_frame
row = df.xs('a')
col = df['two']
@@ -311,3 +266,18 @@ def test_df_bool_mul_int(self):
result = 1 * df
kinds = result.dtypes.apply(lambda x: x.kind)
assert (kinds == 'i').all()
+
+ def test_td64_df_add_int_frame(self):
+ # GH#22696 Check that we don't dispatch to numpy implementation,
+ # which treats int64 as m8[ns]
+ tdi = pd.timedelta_range('1', periods=3)
+ df = tdi.to_frame()
+ other = pd.DataFrame([1, 2, 3], index=tdi) # indexed like `df`
+ with pytest.raises(TypeError):
+ df + other
+ with pytest.raises(TypeError):
+ other + df
+ with pytest.raises(TypeError):
+ df - other
+ with pytest.raises(TypeError):
+ other - df
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 6c84beb64e196..2f1c9e05a01b0 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -916,9 +916,8 @@ def test_constructor_list_of_lists(self):
def test_constructor_sequence_like(self):
# GH 3783
# collections.Squence like
- import collections
- class DummyContainer(collections.Sequence):
+ class DummyContainer(compat.Sequence):
def __init__(self, lst):
self._lst = lst
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 2472022b862bc..a0e23d256c25b 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -110,9 +110,8 @@ def test_to_records_with_multindex(self):
def test_to_records_with_Mapping_type(self):
import email
from email.parser import Parser
- import collections
- collections.Mapping.register(email.message.Message)
+ compat.Mapping.register(email.message.Message)
headers = Parser().parsestr('From: \n'
'To: \n'
diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
index 3b3ab3d03dce9..c91370dc36770 100644
--- a/pandas/tests/frame/test_dtypes.py
+++ b/pandas/tests/frame/test_dtypes.py
@@ -815,6 +815,38 @@ def test_constructor_list_str_na(self, string_dtype):
expected = DataFrame({"A": ['1.0', '2.0', None]}, dtype=object)
assert_frame_equal(result, expected)
+ @pytest.mark.parametrize("data, expected", [
+ # empty
+ (DataFrame(), True),
+ # multi-same
+ (DataFrame({"A": [1, 2], "B": [1, 2]}), True),
+ # multi-object
+ (DataFrame({"A": np.array([1, 2], dtype=object),
+ "B": np.array(["a", "b"], dtype=object)}), True),
+ # multi-extension
+ (DataFrame({"A": pd.Categorical(['a', 'b']),
+ "B": pd.Categorical(['a', 'b'])}), True),
+ # differ types
+ (DataFrame({"A": [1, 2], "B": [1., 2.]}), False),
+ # differ sizes
+ (DataFrame({"A": np.array([1, 2], dtype=np.int32),
+ "B": np.array([1, 2], dtype=np.int64)}), False),
+ # multi-extension differ
+ (DataFrame({"A": pd.Categorical(['a', 'b']),
+ "B": pd.Categorical(['b', 'c'])}), False),
+
+ ])
+ def test_is_homogeneous_type(self, data, expected):
+ assert data._is_homogeneous_type is expected
+
+ def test_asarray_homogenous(self):
+ df = pd.DataFrame({"A": pd.Categorical([1, 2]),
+ "B": pd.Categorical([1, 2])})
+ result = np.asarray(df)
+ # may change from object in the future
+ expected = np.array([[1, 1], [2, 2]], dtype='object')
+ tm.assert_numpy_array_equal(result, expected)
+
class TestDataFrameDatetimeWithTZ(TestData):
diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py
index 96b2e98dd7e8d..2b93af357481a 100644
--- a/pandas/tests/frame/test_indexing.py
+++ b/pandas/tests/frame/test_indexing.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from datetime import datetime, date, timedelta, time
@@ -364,6 +364,7 @@ def test_getitem_ix_mixed_integer(self):
assert_frame_equal(result, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[[1, 10]]
expected = df.ix[Index([1, 10], dtype=object)]
assert_frame_equal(result, expected)
@@ -383,37 +384,45 @@ def test_getitem_ix_mixed_integer(self):
def test_getitem_setitem_ix_negative_integers(self):
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = self.frame.ix[:, -1]
assert_series_equal(result, self.frame['D'])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = self.frame.ix[:, [-1]]
assert_frame_equal(result, self.frame[['D']])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = self.frame.ix[:, [-1, -2]]
assert_frame_equal(result, self.frame[['D', 'C']])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
self.frame.ix[:, [-1]] = 0
assert (self.frame['D'] == 0).all()
df = DataFrame(np.random.randn(8, 4))
# ix does label-based indexing when having an integer index
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
with pytest.raises(KeyError):
df.ix[[-1]]
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
with pytest.raises(KeyError):
df.ix[:, [-1]]
# #1942
a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
a.ix[-1] = a.ix[-2]
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_series_equal(a.ix[-1], a.ix[-2], check_names=False)
assert a.ix[-1].name == 'T'
assert a.ix[-2].name == 'S'
@@ -790,16 +799,19 @@ def test_getitem_fancy_2d(self):
f = self.frame
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(f.ix[:, ['B', 'A']],
f.reindex(columns=['B', 'A']))
subidx = self.frame.index[[5, 4, 1]]
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(f.ix[subidx, ['B', 'A']],
f.reindex(index=subidx, columns=['B', 'A']))
# slicing rows, etc.
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(f.ix[5:10], f[5:10])
assert_frame_equal(f.ix[5:10, :], f[5:10])
assert_frame_equal(f.ix[:5, ['A', 'B']],
@@ -808,22 +820,26 @@ def test_getitem_fancy_2d(self):
# slice rows with labels, inclusive!
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
expected = f.ix[5:11]
result = f.ix[f.index[5]:f.index[10]]
assert_frame_equal(expected, result)
# slice columns
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(f.ix[:, :2], f.reindex(columns=['A', 'B']))
# get view
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
exp = f.copy()
f.ix[5:10].values[:] = 5
exp.values[5:10] = 5
assert_frame_equal(f, exp)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
pytest.raises(ValueError, f.ix.__getitem__, f > 0.5)
def test_slice_floats(self):
@@ -879,6 +895,7 @@ def test_setitem_fancy_2d(self):
expected = frame.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame.ix[:, ['B', 'A']] = 1
expected['B'] = 1.
expected['A'] = 1.
@@ -894,6 +911,7 @@ def test_setitem_fancy_2d(self):
values = randn(3, 2)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame.ix[subidx, ['B', 'A']] = values
frame2.ix[[5, 4, 1], ['B', 'A']] = values
@@ -907,12 +925,14 @@ def test_setitem_fancy_2d(self):
frame = self.frame.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
expected1 = self.frame.copy()
frame.ix[5:10] = 1.
expected1.values[5:10] = 1.
assert_frame_equal(frame, expected1)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
expected2 = self.frame.copy()
arr = randn(5, len(frame.columns))
frame.ix[5:10] = arr
@@ -921,6 +941,7 @@ def test_setitem_fancy_2d(self):
# case 4
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame = self.frame.copy()
frame.ix[5:10, :] = 1.
assert_frame_equal(frame, expected1)
@@ -929,6 +950,7 @@ def test_setitem_fancy_2d(self):
# case 5
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame = self.frame.copy()
frame2 = self.frame.copy()
@@ -941,11 +963,13 @@ def test_setitem_fancy_2d(self):
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame2.ix[:5, [0, 1]] = values
assert_frame_equal(frame2, expected)
# case 6: slice rows with labels, inclusive!
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame = self.frame.copy()
expected = self.frame.copy()
@@ -955,6 +979,7 @@ def test_setitem_fancy_2d(self):
# case 7: slice columns
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame = self.frame.copy()
frame2 = self.frame.copy()
expected = self.frame.copy()
@@ -997,6 +1022,7 @@ def test_fancy_setitem_int_labels(self):
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
tmp = df.copy()
exp = df.copy()
tmp.ix[[0, 2, 4]] = 5
@@ -1004,6 +1030,7 @@ def test_fancy_setitem_int_labels(self):
assert_frame_equal(tmp, exp)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
tmp = df.copy()
exp = df.copy()
tmp.ix[6] = 5
@@ -1011,6 +1038,7 @@ def test_fancy_setitem_int_labels(self):
assert_frame_equal(tmp, exp)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
tmp = df.copy()
exp = df.copy()
tmp.ix[:, 2] = 5
@@ -1024,21 +1052,25 @@ def test_fancy_getitem_int_labels(self):
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[[4, 2, 0], [2, 0]]
expected = df.reindex(index=[4, 2, 0], columns=[2, 0])
assert_frame_equal(result, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[[4, 2, 0]]
expected = df.reindex(index=[4, 2, 0])
assert_frame_equal(result, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[4]
expected = df.xs(4)
assert_series_equal(result, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[:, 3]
expected = df[3]
assert_series_equal(result, expected)
@@ -1047,6 +1079,7 @@ def test_fancy_index_int_labels_exceptions(self):
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
# labels that aren't contained
pytest.raises(KeyError, df.ix.__setitem__,
@@ -1065,6 +1098,7 @@ def test_fancy_index_int_labels_exceptions(self):
def test_setitem_fancy_mixed_2d(self):
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5
result = self.mixed_frame.ix[:5, ['C', 'B', 'A']]
assert (result.values == 5).all()
@@ -1078,6 +1112,7 @@ def test_setitem_fancy_mixed_2d(self):
# #1432
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df = DataFrame({1: [1., 2., 3.],
2: [3, 4, 5]})
assert df._is_mixed_type
@@ -1095,27 +1130,32 @@ def test_ix_align(self):
df = df_orig.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df.ix[:, 0] = b
assert_series_equal(df.ix[:, 0].reindex(b.index), b)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
dft = df_orig.T
dft.ix[0, :] = b
assert_series_equal(dft.ix[0, :].reindex(b.index), b)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df = df_orig.copy()
df.ix[:5, 0] = b
s = df.ix[:5, 0]
assert_series_equal(s, b.reindex(s.index))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
dft = df_orig.T
dft.ix[0, :5] = b
s = dft.ix[0, :5]
assert_series_equal(s, b.reindex(s.index))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df = df_orig.copy()
idx = [0, 1, 3, 5]
df.ix[idx, 0] = b
@@ -1123,6 +1163,7 @@ def test_ix_align(self):
assert_series_equal(s, b.reindex(s.index))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
dft = df_orig.T
dft.ix[0, idx] = b
s = dft.ix[0, idx]
@@ -1134,6 +1175,7 @@ def test_ix_frame_align(self):
df = df_orig.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df.ix[:3] = b
out = b.ix[:3]
assert_frame_equal(out, b)
@@ -1141,12 +1183,14 @@ def test_ix_frame_align(self):
b.sort_index(inplace=True)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df = df_orig.copy()
df.ix[[0, 1, 2]] = b
out = df.ix[[0, 1, 2]].reindex(b.index)
assert_frame_equal(out, b)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df = df_orig.copy()
df.ix[:3] = b
out = df.ix[:3]
@@ -1189,6 +1233,7 @@ def test_ix_multi_take_nonint_index(self):
df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
columns=['a', 'b'])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
rs = df.ix[[0], [0]]
xp = df.reindex(['x'], columns=['a'])
assert_frame_equal(rs, xp)
@@ -1197,6 +1242,7 @@ def test_ix_multi_take_multiindex(self):
df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
columns=[['a', 'b'], ['1', '2']])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
rs = df.ix[[0], [0]]
xp = df.reindex(['x'], columns=[('a', '1')])
assert_frame_equal(rs, xp)
@@ -1206,14 +1252,17 @@ def test_ix_dup(self):
df = DataFrame(np.random.randn(len(idx), 3), idx)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
sub = df.ix[:'d']
assert_frame_equal(sub, df)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
sub = df.ix['a':'c']
assert_frame_equal(sub, df.ix[0:4])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
sub = df.ix['b':'d']
assert_frame_equal(sub, df.ix[2:])
@@ -1222,48 +1271,57 @@ def test_getitem_fancy_1d(self):
# return self if no slicing...for now
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert f.ix[:, :] is f
# low dimensional slice
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
xs1 = f.ix[2, ['C', 'B', 'A']]
xs2 = f.xs(f.index[2]).reindex(['C', 'B', 'A'])
tm.assert_series_equal(xs1, xs2)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
ts1 = f.ix[5:10, 2]
ts2 = f[f.columns[2]][5:10]
tm.assert_series_equal(ts1, ts2)
# positional xs
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
xs1 = f.ix[0]
xs2 = f.xs(f.index[0])
tm.assert_series_equal(xs1, xs2)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
xs1 = f.ix[f.index[5]]
xs2 = f.xs(f.index[5])
tm.assert_series_equal(xs1, xs2)
# single column
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_series_equal(f.ix[:, 'A'], f['A'])
# return view
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
exp = f.copy()
exp.values[5] = 4
f.ix[5][:] = 4
tm.assert_frame_equal(exp, f)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
exp.values[:, 1] = 6
f.ix[:, 1][:] = 6
tm.assert_frame_equal(exp, f)
# slice of mixed-frame
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
xs = self.mixed_frame.ix[5]
exp = self.mixed_frame.xs(self.mixed_frame.index[5])
tm.assert_series_equal(xs, exp)
@@ -1275,6 +1333,7 @@ def test_setitem_fancy_1d(self):
expected = self.frame.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame.ix[2, ['C', 'B', 'A']] = [1., 2., 3.]
expected['C'][2] = 1.
expected['B'][2] = 2.
@@ -1282,6 +1341,7 @@ def test_setitem_fancy_1d(self):
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame2 = self.frame.copy()
frame2.ix[2, [3, 2, 1]] = [1., 2., 3.]
assert_frame_equal(frame, expected)
@@ -1291,12 +1351,14 @@ def test_setitem_fancy_1d(self):
expected = self.frame.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
vals = randn(5)
expected.values[5:10, 2] = vals
frame.ix[5:10, 2] = vals
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame2 = self.frame.copy()
frame2.ix[5:10, 'B'] = vals
assert_frame_equal(frame, expected)
@@ -1306,11 +1368,13 @@ def test_setitem_fancy_1d(self):
expected = self.frame.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame.ix[4] = 5.
expected.values[4] = 5.
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame.ix[frame.index[4]] = 6.
expected.values[4] = 6.
assert_frame_equal(frame, expected)
@@ -1320,6 +1384,7 @@ def test_setitem_fancy_1d(self):
expected = self.frame.copy()
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
frame.ix[:, 'A'] = 7.
expected['A'] = 7.
assert_frame_equal(frame, expected)
@@ -1830,6 +1895,7 @@ def test_single_element_ix_dont_upcast(self):
assert issubclass(self.frame['E'].dtype.type, (int, np.integer))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = self.frame.ix[self.frame.index[5], 'E']
assert is_integer(result)
@@ -1841,6 +1907,7 @@ def test_single_element_ix_dont_upcast(self):
df["b"] = 666
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[0, "b"]
assert is_integer(result)
result = df.loc[0, "b"]
@@ -1848,6 +1915,7 @@ def test_single_element_ix_dont_upcast(self):
expected = Series([666], [0], name='b')
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[[0], "b"]
assert_series_equal(result, expected)
result = df.loc[[0], "b"]
@@ -1919,12 +1987,14 @@ def test_iloc_duplicates(self):
result = df.iloc[0]
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result2 = df.ix[0]
assert isinstance(result, Series)
assert_almost_equal(result.values, df.values[0])
assert_series_equal(result, result2)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.T.iloc[:, 0]
result2 = df.T.ix[:, 0]
assert isinstance(result, Series)
@@ -1937,16 +2007,19 @@ def test_iloc_duplicates(self):
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
rs = df.iloc[0]
xp = df.ix[0]
assert_series_equal(rs, xp)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
rs = df.iloc[:, 0]
xp = df.T.ix[0]
assert_series_equal(rs, xp)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
rs = df.iloc[:, [0]]
xp = df.ix[:, [0]]
assert_frame_equal(rs, xp)
@@ -2168,6 +2241,7 @@ def test_getitem_ix_float_duplicates(self):
expect = df.iloc[1:]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[1:, 0]
@@ -2177,6 +2251,7 @@ def test_getitem_ix_float_duplicates(self):
expect = df.iloc[1:]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[1:, 0]
@@ -2187,6 +2262,7 @@ def test_getitem_ix_float_duplicates(self):
expect = df.iloc[1:-1]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[1:-1, 0]
@@ -2196,6 +2272,7 @@ def test_getitem_ix_float_duplicates(self):
expect = df.iloc[[1, -1]]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[[1, -1], 0]
@@ -2411,6 +2488,7 @@ def test_index_namedtuple(self):
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[IndexType("foo", "bar")]["A"]
assert result == 1
diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
index da4424b1ae626..97c94e1134cc8 100644
--- a/pandas/tests/frame/test_operators.py
+++ b/pandas/tests/frame/test_operators.py
@@ -209,6 +209,8 @@ def _check_unary_op(op):
@pytest.mark.parametrize('op,res', [('__eq__', False),
('__ne__', True)])
+ # not sure what's correct here.
+ @pytest.mark.filterwarnings("ignore:elementwise:FutureWarning")
def test_logical_typeerror_with_non_valid(self, op, res):
# we are comparing floats vs a string
result = getattr(self.frame, op)('foo')
@@ -278,7 +280,9 @@ def test_pos_numeric(self, df):
assert_series_equal(+df['a'], df['a'])
@pytest.mark.parametrize('df', [
- pd.DataFrame({'a': ['a', 'b']}),
+ # numpy changing behavior in the future
+ pytest.param(pd.DataFrame({'a': ['a', 'b']}),
+ marks=[pytest.mark.filterwarnings("ignore")]),
pd.DataFrame({'a': np.array([-1, 2], dtype=object)}),
pd.DataFrame({'a': [Decimal('-1.0'), Decimal('2.0')]}),
])
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index 3be7ad12db883..3c6f0f0b2ab94 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -360,6 +360,7 @@ def to_series(mi, level):
else:
raise AssertionError("object must be a Series or Index")
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_raise_on_panel_with_multiindex(self, parser, engine):
p = tm.makePanel(7)
p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index 2f90d24f652ca..9f6735c7ba2bf 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -2,7 +2,7 @@
from __future__ import print_function
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from datetime import datetime
import itertools
@@ -56,6 +56,7 @@ def test_pivot(self):
with catch_warnings(record=True):
# pivot multiple columns
+ simplefilter("ignore", FutureWarning)
wp = tm.makePanel()
lp = wp.to_frame()
df = lp.reset_index()
diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py
index caaa311e9ee96..07289d897be62 100644
--- a/pandas/tests/frame/test_subclass.py
+++ b/pandas/tests/frame/test_subclass.py
@@ -2,7 +2,7 @@
from __future__ import print_function
-from warnings import catch_warnings
+import pytest
import numpy as np
from pandas import DataFrame, Series, MultiIndex, Panel, Index
@@ -126,28 +126,28 @@ def test_indexing_sliced(self):
tm.assert_series_equal(res, exp)
assert isinstance(res, tm.SubclassedSeries)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_to_panel_expanddim(self):
# GH 9762
- with catch_warnings(record=True):
- class SubclassedFrame(DataFrame):
-
- @property
- def _constructor_expanddim(self):
- return SubclassedPanel
-
- class SubclassedPanel(Panel):
- pass
-
- index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)])
- df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index)
- result = df.to_panel()
- assert isinstance(result, SubclassedPanel)
- expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]],
- items=['X', 'Y'], major_axis=[0],
- minor_axis=[0, 1, 2],
- dtype='int64')
- tm.assert_panel_equal(result, expected)
+ class SubclassedFrame(DataFrame):
+
+ @property
+ def _constructor_expanddim(self):
+ return SubclassedPanel
+
+ class SubclassedPanel(Panel):
+ pass
+
+ index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)])
+ df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index)
+ result = df.to_panel()
+ assert isinstance(result, SubclassedPanel)
+ expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]],
+ items=['X', 'Y'], major_axis=[0],
+ minor_axis=[0, 1, 2],
+ dtype='int64')
+ tm.assert_panel_equal(result, expected)
def test_subclass_attr_err_propagation(self):
# GH 11808
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 533bff0384ad9..1652835de8228 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -2,7 +2,7 @@
# pylint: disable-msg=E1101,W0612
from copy import copy, deepcopy
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import pytest
import numpy as np
@@ -638,6 +638,7 @@ def test_sample(sel):
s.sample(n=3, weights='weight_column')
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
panel = Panel(items=[0, 1, 2], major_axis=[2, 3, 4],
minor_axis=[3, 4, 5])
with pytest.raises(ValueError):
@@ -705,6 +706,7 @@ def test_sample(sel):
# Test default axes
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
p = Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6],
minor_axis=[1, 3, 5])
assert_panel_equal(
@@ -743,6 +745,7 @@ def test_squeeze(self):
for df in [tm.makeTimeDataFrame()]:
tm.assert_frame_equal(df.squeeze(), df)
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
for p in [tm.makePanel()]:
tm.assert_panel_equal(p.squeeze(), p)
@@ -751,6 +754,7 @@ def test_squeeze(self):
tm.assert_series_equal(df.squeeze(), df['A'])
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
p = tm.makePanel().reindex(items=['ItemA'])
tm.assert_frame_equal(p.squeeze(), p['ItemA'])
@@ -761,6 +765,7 @@ def test_squeeze(self):
empty_series = Series([], name='five')
empty_frame = DataFrame([empty_series])
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
empty_panel = Panel({'six': empty_frame})
[tm.assert_series_equal(empty_series, higher_dim.squeeze())
@@ -798,6 +803,7 @@ def test_transpose(self):
tm.assert_frame_equal(df.transpose().transpose(), df)
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
for p in [tm.makePanel()]:
tm.assert_panel_equal(p.transpose(2, 0, 1)
.transpose(1, 2, 0), p)
@@ -820,6 +826,7 @@ def test_numpy_transpose(self):
np.transpose, df, axes=1)
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
p = tm.makePanel()
tm.assert_panel_equal(np.transpose(
np.transpose(p, axes=(2, 0, 1)),
@@ -842,6 +849,7 @@ def test_take(self):
indices = [-3, 2, 0, 1]
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
for p in [tm.makePanel()]:
out = p.take(indices)
expected = Panel(data=p.values.take(indices, axis=0),
@@ -856,6 +864,7 @@ def test_take_invalid_kwargs(self):
df = tm.makeTimeDataFrame()
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
p = tm.makePanel()
for obj in (s, df, p):
@@ -963,6 +972,7 @@ def test_equals(self):
def test_describe_raises(self):
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
with pytest.raises(NotImplementedError):
tm.makePanel().describe()
@@ -996,6 +1006,7 @@ def test_pipe_tuple_error(self):
def test_pipe_panel(self):
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})})
f = lambda x, y: x + y
result = wp.pipe(f, 2)
diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py
index 49cb773a1bd10..fe80b2af5ea63 100644
--- a/pandas/tests/generic/test_panel.py
+++ b/pandas/tests/generic/test_panel.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# pylint: disable-msg=E1101,W0612
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from pandas import Panel
from pandas.util.testing import (assert_panel_equal,
@@ -21,6 +21,7 @@ def test_to_xarray(self):
from xarray import DataArray
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
p = tm.makePanel()
result = p.to_xarray()
@@ -51,6 +52,7 @@ def f():
def tester(self):
f = getattr(super(TestPanel, self), t)
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
f()
return tester
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 48a45e93e1e8e..d8a545b323674 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -25,7 +25,12 @@
'var',
'sem',
'mean',
- 'median',
+ pytest.param('median',
+ # ignore mean of empty slice
+ # and all-NaN
+ marks=[pytest.mark.filterwarnings(
+ "ignore::RuntimeWarning"
+ )]),
'prod',
'min',
'max',
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index f8a0f1688c64e..775747ce0c6c1 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1125,3 +1125,12 @@ def h(df, arg3):
expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3]))
tm.assert_series_equal(result, expected)
+
+
+def test_groupby_mean_no_overflow():
+ # Regression test for (#22487)
+ df = pd.DataFrame({
+ "user": ["A", "A", "A", "A", "A"],
+ "connections": [4970, 4749, 4719, 4704, 18446744073699999744]
+ })
+ assert df.groupby('user')['connections'].mean()['A'] == 3689348814740003840
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 9affd0241d028..3cdd0965ccfd0 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -3,7 +3,6 @@
import pytest
-from warnings import catch_warnings
from datetime import datetime
from decimal import Decimal
@@ -508,30 +507,30 @@ def test_frame_multi_key_function_list():
@pytest.mark.parametrize('op', [lambda x: x.sum(), lambda x: x.mean()])
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_groupby_multiple_columns(df, op):
data = df
grouped = data.groupby(['A', 'B'])
- with catch_warnings(record=True):
- result1 = op(grouped)
-
- expected = defaultdict(dict)
- for n1, gp1 in data.groupby('A'):
- for n2, gp2 in gp1.groupby('B'):
- expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
- expected = {k: DataFrame(v)
- for k, v in compat.iteritems(expected)}
- expected = Panel.fromDict(expected).swapaxes(0, 1)
- expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
-
- # a little bit crude
- for col in ['C', 'D']:
- result_col = op(grouped[col])
- exp = expected[col]
- pivoted = result1[col].unstack()
- pivoted2 = result_col.unstack()
- assert_frame_equal(pivoted.reindex_like(exp), exp)
- assert_frame_equal(pivoted2.reindex_like(exp), exp)
+ result1 = op(grouped)
+
+ expected = defaultdict(dict)
+ for n1, gp1 in data.groupby('A'):
+ for n2, gp2 in gp1.groupby('B'):
+ expected[n1][n2] = op(gp2.loc[:, ['C', 'D']])
+ expected = {k: DataFrame(v)
+ for k, v in compat.iteritems(expected)}
+ expected = Panel.fromDict(expected).swapaxes(0, 1)
+ expected.major_axis.name, expected.minor_axis.name = 'A', 'B'
+
+ # a little bit crude
+ for col in ['C', 'D']:
+ result_col = op(grouped[col])
+ exp = expected[col]
+ pivoted = result1[col].unstack()
+ pivoted2 = result_col.unstack()
+ assert_frame_equal(pivoted.reindex_like(exp), exp)
+ assert_frame_equal(pivoted2.reindex_like(exp), exp)
# test single series works the same
result = data['C'].groupby([data['A'], data['B']]).mean()
@@ -624,8 +623,14 @@ def test_as_index_series_return_frame(df):
assert isinstance(result2, DataFrame)
assert_frame_equal(result2, expected2)
- # corner case
- pytest.raises(Exception, grouped['C'].__getitem__, 'D')
+
+def test_as_index_series_column_slice_raises(df):
+ # GH15072
+ grouped = df.groupby('A', as_index=False)
+ msg = r"Column\(s\) C already selected"
+
+ with tm.assert_raises_regex(IndexError, msg):
+ grouped['C'].__getitem__('D')
def test_groupby_as_index_cython(df):
@@ -1032,6 +1037,8 @@ def test_groupby_mixed_type_columns():
tm.assert_frame_equal(result, expected)
+# TODO: Ensure warning isn't emitted in the first place
+@pytest.mark.filterwarnings("ignore:Mean of:RuntimeWarning")
def test_cython_grouper_series_bug_noncontig():
arr = np.empty((100, 100))
arr.fill(np.nan)
@@ -1181,11 +1188,11 @@ def test_groupby_nat_exclude():
pytest.raises(KeyError, grouped.get_group, pd.NaT)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_sparse_friendly(df):
sdf = df[['C', 'D']].to_sparse()
- with catch_warnings(record=True):
- panel = tm.makePanel()
- tm.add_nans(panel)
+ panel = tm.makePanel()
+ tm.add_nans(panel)
def _check_work(gp):
gp.mean()
@@ -1201,29 +1208,29 @@ def _check_work(gp):
# _check_work(panel.groupby(lambda x: x.month, axis=1))
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_panel_groupby():
- with catch_warnings(record=True):
- panel = tm.makePanel()
- tm.add_nans(panel)
- grouped = panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1},
- axis='items')
- agged = grouped.mean()
- agged2 = grouped.agg(lambda x: x.mean('items'))
+ panel = tm.makePanel()
+ tm.add_nans(panel)
+ grouped = panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1},
+ axis='items')
+ agged = grouped.mean()
+ agged2 = grouped.agg(lambda x: x.mean('items'))
- tm.assert_panel_equal(agged, agged2)
+ tm.assert_panel_equal(agged, agged2)
- tm.assert_index_equal(agged.items, Index([0, 1]))
+ tm.assert_index_equal(agged.items, Index([0, 1]))
- grouped = panel.groupby(lambda x: x.month, axis='major')
- agged = grouped.mean()
+ grouped = panel.groupby(lambda x: x.month, axis='major')
+ agged = grouped.mean()
- exp = Index(sorted(list(set(panel.major_axis.month))))
- tm.assert_index_equal(agged.major_axis, exp)
+ exp = Index(sorted(list(set(panel.major_axis.month))))
+ tm.assert_index_equal(agged.major_axis, exp)
- grouped = panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
- axis='minor')
- agged = grouped.mean()
- tm.assert_index_equal(agged.minor_axis, Index([0, 1]))
+ grouped = panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1},
+ axis='minor')
+ agged = grouped.mean()
+ tm.assert_index_equal(agged.minor_axis, Index([0, 1]))
def test_groupby_2d_malformed():
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 737e8a805f3ce..e7c0881b11871 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -4,7 +4,6 @@
import pytest
-from warnings import catch_warnings
from pandas import (date_range, Timestamp,
Index, MultiIndex, DataFrame, Series, CategoricalIndex)
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
@@ -557,15 +556,15 @@ def test_list_grouper_with_nat(self):
class TestGetGroup():
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_get_group(self):
- with catch_warnings(record=True):
- wp = tm.makePanel()
- grouped = wp.groupby(lambda x: x.month, axis='major')
+ wp = tm.makePanel()
+ grouped = wp.groupby(lambda x: x.month, axis='major')
- gp = grouped.get_group(1)
- expected = wp.reindex(
- major=[x for x in wp.major_axis if x.month == 1])
- assert_panel_equal(gp, expected)
+ gp = grouped.get_group(1)
+ expected = wp.reindex(
+ major=[x for x in wp.major_axis if x.month == 1])
+ assert_panel_equal(gp, expected)
# GH 5267
# be datelike friendly
@@ -743,18 +742,18 @@ def test_multi_iter_frame(self, three_group):
for key, group in grouped:
pass
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_multi_iter_panel(self):
- with catch_warnings(record=True):
- wp = tm.makePanel()
- grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()],
- axis=1)
-
- for (month, wd), group in grouped:
- exp_axis = [x
- for x in wp.major_axis
- if x.month == month and x.weekday() == wd]
- expected = wp.reindex(major=exp_axis)
- assert_panel_equal(group, expected)
+ wp = tm.makePanel()
+ grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()],
+ axis=1)
+
+ for (month, wd), group in grouped:
+ exp_axis = [x
+ for x in wp.major_axis
+ if x.month == month and x.weekday() == wd]
+ expected = wp.reindex(major=exp_axis)
+ assert_panel_equal(group, expected)
def test_dictify(self, df):
dict(iter(df.groupby('A')))
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index 3afc278f9bc93..ae033f7b3f251 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -133,11 +133,15 @@ def df_letters():
return df
-@pytest.mark.parametrize(
- "obj, whitelist", zip((df_letters(), df_letters().floats),
- (df_whitelist, s_whitelist)))
-def test_groupby_whitelist(df_letters, obj, whitelist):
+@pytest.mark.parametrize("whitelist", [df_whitelist, s_whitelist])
+def test_groupby_whitelist(df_letters, whitelist):
df = df_letters
+ if whitelist == df_whitelist:
+ # dataframe
+ obj = df_letters
+ else:
+ obj = df_letters['floats']
+
gb = obj.groupby(df.letters)
assert set(whitelist) == set(gb._apply_whitelist)
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index db3de0ceced0c..5ab32ee3863ae 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -1,4 +1,3 @@
-import warnings
import sys
import pytest
@@ -201,7 +200,7 @@ def test_get_duplicates(self):
idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02',
'2000-01-03', '2000-01-03', '2000-01-04'])
- with warnings.catch_warnings(record=True):
+ with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
result = idx.get_duplicates()
diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
index 6ccd310f33bbd..b60b222d095b9 100644
--- a/pandas/tests/indexes/datetimes/test_ops.py
+++ b/pandas/tests/indexes/datetimes/test_ops.py
@@ -534,12 +534,22 @@ def test_shift(self):
assert shifted[0] == self.rng[0]
assert shifted.freq == self.rng.freq
- # PerformanceWarning
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", pd.errors.PerformanceWarning)
rng = date_range(START, END, freq=BMonthEnd())
shifted = rng.shift(1, freq=CDay())
assert shifted[0] == rng[0] + CDay()
+ def test_shift_periods(self):
+ # GH #22458 : argument 'n' was deprecated in favor of 'periods'
+ idx = pd.DatetimeIndex(start=START, end=END,
+ periods=3)
+ tm.assert_index_equal(idx.shift(periods=0), idx)
+ tm.assert_index_equal(idx.shift(0), idx)
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=True):
+ tm.assert_index_equal(idx.shift(n=0), idx)
+
def test_pickle_unpickle(self):
unpickled = tm.round_trip_pickle(self.rng)
assert unpickled.freq is not None
diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py
index 6f6f4eb8d24e3..d054121c6dfab 100644
--- a/pandas/tests/indexes/datetimes/test_scalar_compat.py
+++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py
@@ -11,6 +11,7 @@
import pandas as pd
from pandas import date_range, Timestamp, DatetimeIndex
+from pandas.tseries.frequencies import to_offset
class TestDatetimeIndexOps(object):
@@ -124,7 +125,7 @@ def test_round(self, tz_naive_fixture):
expected = DatetimeIndex(['2016-10-17 12:00:00.001501030'])
tm.assert_index_equal(result, expected)
- with tm.assert_produces_warning():
+ with tm.assert_produces_warning(False):
ts = '2016-10-17 12:00:00.001501031'
DatetimeIndex([ts]).round('1010ns')
@@ -169,6 +170,46 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
expected = DatetimeIndex(list(expected))
assert expected.equals(result)
+ @pytest.mark.parametrize('start, index_freq, periods', [
+ ('2018-01-01', '12H', 25),
+ ('2018-01-01 0:0:0.124999', '1ns', 1000),
+ ])
+ @pytest.mark.parametrize('round_freq', [
+ '2ns', '3ns', '4ns', '5ns', '6ns', '7ns',
+ '250ns', '500ns', '750ns',
+ '1us', '19us', '250us', '500us', '750us',
+ '1s', '2s', '3s',
+ '12H', '1D',
+ ])
+ def test_round_int64(self, start, index_freq, periods, round_freq):
+ dt = DatetimeIndex(start=start, freq=index_freq, periods=periods)
+ unit = to_offset(round_freq).nanos
+
+ # test floor
+ result = dt.floor(round_freq)
+ diff = dt.asi8 - result.asi8
+ mod = result.asi8 % unit
+ assert (mod == 0).all(), "floor not a {} multiple".format(round_freq)
+ assert (0 <= diff).all() and (diff < unit).all(), "floor error"
+
+ # test ceil
+ result = dt.ceil(round_freq)
+ diff = result.asi8 - dt.asi8
+ mod = result.asi8 % unit
+ assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq)
+ assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
+
+ # test round
+ result = dt.round(round_freq)
+ diff = abs(result.asi8 - dt.asi8)
+ mod = result.asi8 % unit
+ assert (mod == 0).all(), "round not a {} multiple".format(round_freq)
+ assert (diff <= unit // 2).all(), "round error"
+ if unit % 2 == 0:
+ assert (
+ result.asi8[diff == unit // 2] % 2 == 0
+ ).all(), "round half to even error"
+
# ----------------------------------------------------------------
# DatetimeIndex.normalize
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index bef9b73773f46..3b7d6a709230b 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -233,6 +233,15 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
with pytest.raises(ValueError):
pd.to_datetime([date], format=fmt)
+ def test_to_datetime_parse_timezone_keeps_name(self):
+ # GH 21697
+ fmt = '%Y-%m-%d %H:%M:%S %z'
+ arg = pd.Index(['2010-01-01 12:00:00 Z'], name='foo')
+ result = pd.to_datetime(arg, format=fmt)
+ expected = pd.DatetimeIndex(['2010-01-01 12:00:00'], tz='UTC',
+ name='foo')
+ tm.assert_index_equal(result, expected)
+
class TestToDatetime(object):
def test_to_datetime_pydatetime(self):
@@ -765,6 +774,14 @@ def test_unit_rounding(self, cache):
expected = pd.Timestamp('2015-06-19 19:55:31.877000093')
assert result == expected
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_unit_ignore_keeps_name(self, cache):
+ # GH 21697
+ expected = pd.Index([15e9] * 2, name='name')
+ result = pd.to_datetime(expected, errors='ignore', box=True, unit='s',
+ cache=cache)
+ tm.assert_index_equal(result, expected)
+
@pytest.mark.parametrize('cache', [True, False])
def test_dataframe(self, cache):
@@ -1175,6 +1192,8 @@ def test_dayfirst(self, cache):
class TestGuessDatetimeFormat(object):
@td.skip_if_not_us_locale
+ @pytest.mark.filterwarnings("ignore:_timelex:DeprecationWarning")
+ # https://github.com/pandas-dev/pandas/issues/21322
def test_guess_datetime_format_for_array(self):
expected_format = '%Y-%m-%d %H:%M:%S.%f'
dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format)
@@ -1573,12 +1592,20 @@ def test_parsers_timezone_minute_offsets_roundtrip(self, cache, dt_string,
@pytest.fixture(params=['D', 's', 'ms', 'us', 'ns'])
def units(request):
+ """Day and some time units.
+
+ * D
+ * s
+ * ms
+ * us
+ * ns
+ """
return request.param
@pytest.fixture
def epoch_1960():
- # for origin as 1960-01-01
+ """Timestamp at 1960-01-01."""
return Timestamp('1960-01-01')
@@ -1587,12 +1614,25 @@ def units_from_epochs():
return list(range(5))
-@pytest.fixture(params=[epoch_1960(),
- epoch_1960().to_pydatetime(),
- epoch_1960().to_datetime64(),
- str(epoch_1960())])
-def epochs(request):
- return request.param
+@pytest.fixture(params=['timestamp', 'pydatetime', 'datetime64', 'str_1960'])
+def epochs(epoch_1960, request):
+ """Timestamp at 1960-01-01 in various forms.
+
+ * pd.Timestamp
+ * datetime.datetime
+ * numpy.datetime64
+ * str
+ """
+ assert request.param in {'timestamp', 'pydatetime', 'datetime64',
+ "str_1960"}
+ if request.param == 'timestamp':
+ return epoch_1960
+ elif request.param == 'pydatetime':
+ return epoch_1960.to_pydatetime()
+ elif request.param == "datetime64":
+ return epoch_1960.to_datetime64()
+ else:
+ return str(epoch_1960)
@pytest.fixture
diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py
index 1cdf0ca6e013e..54a12137c9457 100644
--- a/pandas/tests/indexes/multi/test_duplicates.py
+++ b/pandas/tests/indexes/multi/test_duplicates.py
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
-import warnings
from itertools import product
import pytest
@@ -241,7 +240,7 @@ def test_get_duplicates():
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates
- with warnings.catch_warnings(record=True):
+ with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []]))
@@ -257,7 +256,7 @@ def test_get_duplicates():
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
- with warnings.catch_warnings(record=True):
+ with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
[[], []]))
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 755b3cc7f1dca..99a909849822b 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -560,8 +560,9 @@ def test_insert(self):
tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a'))
def test_insert_missing(self, nulls_fixture):
- # GH 18295 (test missing)
- expected = Index(['a', np.nan, 'b', 'c'])
+ # GH 22295
+ # test there is no mangling of NA values
+ expected = Index(['a', nulls_fixture, 'b', 'c'])
result = Index(list('abc')).insert(1, nulls_fixture)
tm.assert_index_equal(result, expected)
@@ -714,6 +715,8 @@ def test_empty_fancy_raises(self, attr):
pytest.raises(IndexError, index.__getitem__, empty_farr)
@pytest.mark.parametrize("itm", [101, 'no_int'])
+ # FutureWarning from non-tuple sequence of nd indexing
+ @pytest.mark.filterwarnings("ignore::FutureWarning")
def test_getitem_error(self, indices, itm):
with pytest.raises(IndexError):
indices[itm]
@@ -1364,6 +1367,21 @@ def test_get_indexer_numeric_index_boolean_target(self):
expected = np.array([-1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
+ def test_get_indexer_with_NA_values(self, unique_nulls_fixture,
+ unique_nulls_fixture2):
+ # GH 22332
+ # check pairwise, that no pair of na values
+ # is mangled
+ if unique_nulls_fixture is unique_nulls_fixture2:
+ return # skip it, values are not unique
+ arr = np.array([unique_nulls_fixture,
+ unique_nulls_fixture2], dtype=np.object)
+ index = pd.Index(arr, dtype=np.object)
+ result = index.get_indexer([unique_nulls_fixture,
+ unique_nulls_fixture2, 'Unknown'])
+ expected = np.array([0, 1, -1], dtype=np.int64)
+ tm.assert_numpy_array_equal(result, expected)
+
@pytest.mark.parametrize("method", [None, 'pad', 'backfill', 'nearest'])
def test_get_loc(self, method):
index = pd.Index([0, 1, 2])
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 2221fd023b561..d49a6a6abc7c9 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -136,6 +136,12 @@ def test_construction_with_dtype(self):
result = CategoricalIndex(idx, categories=idx, ordered=True)
tm.assert_index_equal(result, expected, exact=True)
+ def test_construction_empty_with_bool_categories(self):
+ # see gh-22702
+ cat = pd.CategoricalIndex([], categories=[True, False])
+ categories = sorted(cat.categories.tolist())
+ assert categories == [False, True]
+
def test_construction_with_categorical_dtype(self):
# construction with CategoricalDtype
# GH18109
diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py
index 2e257bb8a500a..d7bdd18f48523 100644
--- a/pandas/tests/indexes/timedeltas/test_ops.py
+++ b/pandas/tests/indexes/timedeltas/test_ops.py
@@ -334,7 +334,7 @@ def test_freq_setter_errors(self):
idx.freq = '5D'
# setting with a non-fixed frequency
- msg = '<2 \* BusinessDays> is a non-fixed frequency'
+ msg = r'<2 \* BusinessDays> is a non-fixed frequency'
with tm.assert_raises_regex(ValueError, msg):
idx.freq = '2B'
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index d7745ffd94cd9..c329d8d15d729 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -1,5 +1,3 @@
-import warnings
-
import pytest
import numpy as np
@@ -147,7 +145,7 @@ def test_get_duplicates(self):
idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day',
'4day'])
- with warnings.catch_warnings(record=True):
+ with tm.assert_produces_warning(FutureWarning):
# Deprecated - see GH20239
result = idx.get_duplicates()
diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py
index cbf1bdbce9574..127548bdaf106 100644
--- a/pandas/tests/indexing/common.py
+++ b/pandas/tests/indexing/common.py
@@ -2,6 +2,7 @@
import itertools
from warnings import catch_warnings, filterwarnings
+import pytest
import numpy as np
from pandas.compat import lrange
@@ -25,6 +26,7 @@ def _axify(obj, key, axis):
return tuple(axes)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class Base(object):
""" indexing comprehensive base class """
@@ -49,22 +51,20 @@ def setup_method(self, method):
self.frame_uints = DataFrame(np.random.randn(4, 4),
index=UInt64Index(lrange(0, 8, 2)),
columns=UInt64Index(lrange(0, 12, 3)))
- with catch_warnings(record=True):
- self.panel_uints = Panel(np.random.rand(4, 4, 4),
- items=UInt64Index(lrange(0, 8, 2)),
- major_axis=UInt64Index(lrange(0, 12, 3)),
- minor_axis=UInt64Index(lrange(0, 16, 4)))
+ self.panel_uints = Panel(np.random.rand(4, 4, 4),
+ items=UInt64Index(lrange(0, 8, 2)),
+ major_axis=UInt64Index(lrange(0, 12, 3)),
+ minor_axis=UInt64Index(lrange(0, 16, 4)))
self.series_floats = Series(np.random.rand(4),
index=Float64Index(range(0, 8, 2)))
self.frame_floats = DataFrame(np.random.randn(4, 4),
index=Float64Index(range(0, 8, 2)),
columns=Float64Index(range(0, 12, 3)))
- with catch_warnings(record=True):
- self.panel_floats = Panel(np.random.rand(4, 4, 4),
- items=Float64Index(range(0, 8, 2)),
- major_axis=Float64Index(range(0, 12, 3)),
- minor_axis=Float64Index(range(0, 16, 4)))
+ self.panel_floats = Panel(np.random.rand(4, 4, 4),
+ items=Float64Index(range(0, 8, 2)),
+ major_axis=Float64Index(range(0, 12, 3)),
+ minor_axis=Float64Index(range(0, 16, 4)))
m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]),
MultiIndex.from_product([[5, 6], [7, 8]]),
@@ -75,35 +75,31 @@ def setup_method(self, method):
self.frame_multi = DataFrame(np.random.randn(4, 4),
index=m_idces[0],
columns=m_idces[1])
- with catch_warnings(record=True):
- self.panel_multi = Panel(np.random.rand(4, 4, 4),
- items=m_idces[0],
- major_axis=m_idces[1],
- minor_axis=m_idces[2])
+ self.panel_multi = Panel(np.random.rand(4, 4, 4),
+ items=m_idces[0],
+ major_axis=m_idces[1],
+ minor_axis=m_idces[2])
self.series_labels = Series(np.random.randn(4), index=list('abcd'))
self.frame_labels = DataFrame(np.random.randn(4, 4),
index=list('abcd'), columns=list('ABCD'))
- with catch_warnings(record=True):
- self.panel_labels = Panel(np.random.randn(4, 4, 4),
- items=list('abcd'),
- major_axis=list('ABCD'),
- minor_axis=list('ZYXW'))
+ self.panel_labels = Panel(np.random.randn(4, 4, 4),
+ items=list('abcd'),
+ major_axis=list('ABCD'),
+ minor_axis=list('ZYXW'))
self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8])
self.frame_mixed = DataFrame(np.random.randn(4, 4),
index=[2, 4, 'null', 8])
- with catch_warnings(record=True):
- self.panel_mixed = Panel(np.random.randn(4, 4, 4),
- items=[2, 4, 'null', 8])
+ self.panel_mixed = Panel(np.random.randn(4, 4, 4),
+ items=[2, 4, 'null', 8])
self.series_ts = Series(np.random.randn(4),
index=date_range('20130101', periods=4))
self.frame_ts = DataFrame(np.random.randn(4, 4),
index=date_range('20130101', periods=4))
- with catch_warnings(record=True):
- self.panel_ts = Panel(np.random.randn(4, 4, 4),
- items=date_range('20130101', periods=4))
+ self.panel_ts = Panel(np.random.randn(4, 4, 4),
+ items=date_range('20130101', periods=4))
dates_rev = (date_range('20130101', periods=4)
.sort_values(ascending=False))
@@ -111,14 +107,12 @@ def setup_method(self, method):
index=dates_rev)
self.frame_ts_rev = DataFrame(np.random.randn(4, 4),
index=dates_rev)
- with catch_warnings(record=True):
- self.panel_ts_rev = Panel(np.random.randn(4, 4, 4),
- items=dates_rev)
+ self.panel_ts_rev = Panel(np.random.randn(4, 4, 4),
+ items=dates_rev)
self.frame_empty = DataFrame({})
self.series_empty = Series({})
- with catch_warnings(record=True):
- self.panel_empty = Panel({})
+ self.panel_empty = Panel({})
# form agglomerates
for o in self._objs:
@@ -175,6 +169,7 @@ def get_value(self, f, i, values=False):
# v = v.__getitem__(a)
# return v
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
return f.ix[i]
def check_values(self, f, func, values=False):
diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
index 0e396a3248e3f..a7e55cdf9936e 100644
--- a/pandas/tests/indexing/test_chaining_and_caching.py
+++ b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -1,5 +1,3 @@
-from warnings import catch_warnings
-
import pytest
import numpy as np
@@ -366,22 +364,22 @@ def check(result, expected):
result4 = df['A'].iloc[2]
check(result4, expected)
+ @pytest.mark.filterwarnings("ignore::DeprecationWarning")
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_cache_updating(self):
# GH 4939, make sure to update the cache on setitem
df = tm.makeDataFrame()
df['A'] # cache series
- with catch_warnings(record=True):
- df.ix["Hello Friend"] = df.ix[0]
+ df.ix["Hello Friend"] = df.ix[0]
assert "Hello Friend" in df['A'].index
assert "Hello Friend" in df['B'].index
- with catch_warnings(record=True):
- panel = tm.makePanel()
- panel.ix[0] # get first item into cache
- panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1
- assert "A+1" in panel.ix[0].columns
- assert "A+1" in panel.ix[1].columns
+ panel = tm.makePanel()
+ panel.ix[0] # get first item into cache
+ panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1
+ assert "A+1" in panel.ix[0].columns
+ assert "A+1" in panel.ix[1].columns
# 5216
# make sure that we don't try to set a dead cache
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index e7daefffe5f6f..2f44cb36eeb11 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -590,11 +590,9 @@ def test_where_series_datetime64(self, fill_val, exp_dtype):
pd.Timestamp('2011-01-03'), values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
- @pytest.mark.parametrize("fill_val,exp_dtype", [
- (pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
- (pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
- ids=['datetime64', 'datetime64tz'])
- def test_where_index_datetime(self, fill_val, exp_dtype):
+ def test_where_index_datetime(self):
+ fill_val = pd.Timestamp('2012-01-01')
+ exp_dtype = 'datetime64[ns]'
obj = pd.Index([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
@@ -613,13 +611,33 @@ def test_where_index_datetime(self, fill_val, exp_dtype):
pd.Timestamp('2011-01-03'),
pd.Timestamp('2012-01-04')])
- if fill_val.tz:
- self._assert_where_conversion(obj, cond, values, exp,
- 'datetime64[ns]')
- pytest.xfail("ToDo: do not ignore timezone, must be object")
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
- pytest.xfail("datetime64 + datetime64 -> datetime64 must support"
- " scalar")
+
+ @pytest.mark.xfail(
+ reason="GH 22839: do not ignore timezone, must be object")
+ def test_where_index_datetimetz(self):
+ fill_val = pd.Timestamp('2012-01-01', tz='US/Eastern')
+ exp_dtype = np.object
+ obj = pd.Index([pd.Timestamp('2011-01-01'),
+ pd.Timestamp('2011-01-02'),
+ pd.Timestamp('2011-01-03'),
+ pd.Timestamp('2011-01-04')])
+ assert obj.dtype == 'datetime64[ns]'
+ cond = pd.Index([True, False, True, False])
+
+ msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
+ "of some kind")
+ with tm.assert_raises_regex(TypeError, msg):
+ obj.where(cond, fill_val)
+
+ values = pd.Index(pd.date_range(fill_val, periods=4))
+ exp = pd.Index([pd.Timestamp('2011-01-01'),
+ pd.Timestamp('2012-01-02', tz='US/Eastern'),
+ pd.Timestamp('2011-01-03'),
+ pd.Timestamp('2012-01-04', tz='US/Eastern')],
+ dtype=exp_dtype)
+
+ self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
def test_where_index_complex128(self):
pass
diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
index ba1f1de21871f..3773b432135b9 100644
--- a/pandas/tests/indexing/test_floats.py
+++ b/pandas/tests/indexing/test_floats.py
@@ -10,6 +10,9 @@
import pandas.util.testing as tm
+ignore_ix = pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+
+
class TestFloatIndexers(object):
def check(self, result, original, indexer, getitem):
@@ -57,6 +60,7 @@ def f():
s.iloc[3.0] = 0
pytest.raises(TypeError, f)
+ @ignore_ix
def test_scalar_non_numeric(self):
# GH 4892
@@ -145,6 +149,7 @@ def f():
s[3]
pytest.raises(TypeError, lambda: s[3.0])
+ @ignore_ix
def test_scalar_with_mixed(self):
s2 = Series([1, 2, 3], index=['a', 'b', 'c'])
@@ -202,6 +207,7 @@ def f():
expected = 3
assert result == expected
+ @ignore_ix
def test_scalar_integer(self):
# test how scalar float indexers work on int indexes
@@ -254,6 +260,7 @@ def compare(x, y):
# coerce to equal int
assert 3.0 in s
+ @ignore_ix
def test_scalar_float(self):
# scalar float indexers work on a float index
@@ -269,8 +276,7 @@ def test_scalar_float(self):
(lambda x: x, True)]:
# getting
- with catch_warnings(record=True):
- result = idxr(s)[indexer]
+ result = idxr(s)[indexer]
self.check(result, s, 3, getitem)
# setting
@@ -305,6 +311,7 @@ def g():
s2.iloc[3.0] = 0
pytest.raises(TypeError, g)
+ @ignore_ix
def test_slice_non_numeric(self):
# GH 4892
@@ -356,6 +363,7 @@ def f():
idxr(s)[l] = 0
pytest.raises(TypeError, f)
+ @ignore_ix
def test_slice_integer(self):
# same as above, but for Integer based indexes
@@ -483,6 +491,7 @@ def f():
pytest.raises(TypeError, f)
+ @ignore_ix
def test_slice_integer_frame_getitem(self):
# similar to above, but on the getitem dim (of a DataFrame)
@@ -554,6 +563,7 @@ def f():
with catch_warnings(record=True):
f(lambda x: x.ix)
+ @ignore_ix
def test_slice_float(self):
# same as above, but for floats
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 3dcfe6a68ad9f..538d9706d54d6 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -2,7 +2,7 @@
import pytest
-from warnings import catch_warnings
+from warnings import catch_warnings, filterwarnings, simplefilter
import numpy as np
import pandas as pd
@@ -388,45 +388,53 @@ def test_iloc_getitem_frame(self):
result = df.iloc[2]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
exp = df.ix[4]
tm.assert_series_equal(result, exp)
result = df.iloc[2, 2]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
exp = df.ix[4, 4]
assert result == exp
# slice
result = df.iloc[4:8]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[8:14]
tm.assert_frame_equal(result, expected)
result = df.iloc[:, 2:3]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[:, 4:5]
tm.assert_frame_equal(result, expected)
# list of integers
result = df.iloc[[0, 1, 3]]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[0, 2, 6]]
tm.assert_frame_equal(result, expected)
result = df.iloc[[0, 1, 3], [0, 1]]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[0, 2, 6], [0, 2]]
tm.assert_frame_equal(result, expected)
# neg indices
result = df.iloc[[-1, 1, 3], [-1, 1]]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[18, 2, 6], [6, 2]]
tm.assert_frame_equal(result, expected)
# dups indices
result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[18, 18, 2, 6], [6, 2]]
tm.assert_frame_equal(result, expected)
@@ -434,6 +442,7 @@ def test_iloc_getitem_frame(self):
s = Series(index=lrange(1, 5))
result = df.iloc[s.index]
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
expected = df.ix[[2, 4, 6, 8]]
tm.assert_frame_equal(result, expected)
@@ -609,6 +618,7 @@ def test_iloc_mask(self):
# UserWarnings from reindex of a boolean mask
with catch_warnings(record=True):
+ simplefilter("ignore", UserWarning)
result = dict()
for idx in [None, 'index', 'locs']:
mask = (df.nums > 2).values
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index f64c50699461f..0f524ca0aaac5 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -6,7 +6,7 @@
import pytest
import weakref
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from datetime import datetime
from pandas.core.dtypes.common import (
@@ -419,11 +419,13 @@ def test_setitem_list(self):
# ix with a list
df = DataFrame(index=[0, 1], columns=[0])
with catch_warnings(record=True):
+ simplefilter("ignore")
df.ix[1, 0] = [1, 2, 3]
df.ix[1, 0] = [1, 2]
result = DataFrame(index=[0, 1], columns=[0])
with catch_warnings(record=True):
+ simplefilter("ignore")
result.ix[1, 0] = [1, 2]
tm.assert_frame_equal(result, df)
@@ -447,11 +449,13 @@ def view(self):
df = DataFrame(index=[0, 1], columns=[0])
with catch_warnings(record=True):
+ simplefilter("ignore")
df.ix[1, 0] = TO(1)
df.ix[1, 0] = TO(2)
result = DataFrame(index=[0, 1], columns=[0])
with catch_warnings(record=True):
+ simplefilter("ignore")
result.ix[1, 0] = TO(2)
tm.assert_frame_equal(result, df)
@@ -459,6 +463,7 @@ def view(self):
# remains object dtype even after setting it back
df = DataFrame(index=[0, 1], columns=[0])
with catch_warnings(record=True):
+ simplefilter("ignore")
df.ix[1, 0] = TO(1)
df.ix[1, 0] = np.nan
result = DataFrame(index=[0, 1], columns=[0])
@@ -626,9 +631,25 @@ def test_mixed_index_not_contains(self, index, val):
# GH 19860
assert val not in index
+ def test_contains_with_float_index(self):
+ # GH#22085
+ integer_index = pd.Int64Index([0, 1, 2, 3])
+ uinteger_index = pd.UInt64Index([0, 1, 2, 3])
+ float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3])
+
+ for index in (integer_index, uinteger_index):
+ assert 1.1 not in index
+ assert 1.0 in index
+ assert 1 in index
+
+ assert 1.1 in float_index
+ assert 1.0 not in float_index
+ assert 1 not in float_index
+
def test_index_type_coercion(self):
with catch_warnings(record=True):
+ simplefilter("ignore")
# GH 11836
# if we have an index type and set it with something that looks
@@ -760,16 +781,20 @@ def run_tests(df, rhs, right):
left = df.copy()
with catch_warnings(record=True):
+ # XXX: finer-filter here.
+ simplefilter("ignore")
left.ix[s, l] = rhs
tm.assert_frame_equal(left, right)
left = df.copy()
with catch_warnings(record=True):
+ simplefilter("ignore")
left.ix[i, j] = rhs
tm.assert_frame_equal(left, right)
left = df.copy()
with catch_warnings(record=True):
+ simplefilter("ignore")
left.ix[r, c] = rhs
tm.assert_frame_equal(left, right)
@@ -821,6 +846,7 @@ def test_slice_with_zero_step_raises(self):
tm.assert_raises_regex(ValueError, 'slice step cannot be zero',
lambda: s.loc[::0])
with catch_warnings(record=True):
+ simplefilter("ignore")
tm.assert_raises_regex(ValueError,
'slice step cannot be zero',
lambda: s.ix[::0])
@@ -839,11 +865,13 @@ def test_indexing_dtypes_on_empty(self):
# Check that .iloc and .ix return correct dtypes GH9983
df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']})
with catch_warnings(record=True):
+ simplefilter("ignore")
df2 = df.ix[[], :]
assert df2.loc[:, 'a'].dtype == np.int64
tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0])
with catch_warnings(record=True):
+ simplefilter("ignore")
tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0])
def test_range_in_series_indexing(self):
@@ -917,6 +945,7 @@ def test_no_reference_cycle(self):
for name in ('loc', 'iloc', 'at', 'iat'):
getattr(df, name)
with catch_warnings(record=True):
+ simplefilter("ignore")
getattr(df, 'ix')
wr = weakref.ref(df)
del df
@@ -1050,3 +1079,31 @@ def test_validate_indices_high():
def test_validate_indices_empty():
with tm.assert_raises_regex(IndexError, "indices are out"):
validate_indices(np.array([0, 1]), 0)
+
+
+def test_extension_array_cross_section():
+ # A cross-section of a homogeneous EA should be an EA
+ df = pd.DataFrame({
+ "A": pd.core.arrays.integer_array([1, 2]),
+ "B": pd.core.arrays.integer_array([3, 4])
+ }, index=['a', 'b'])
+ expected = pd.Series(pd.core.arrays.integer_array([1, 3]),
+ index=['A', 'B'], name='a')
+ result = df.loc['a']
+ tm.assert_series_equal(result, expected)
+
+ result = df.iloc[0]
+ tm.assert_series_equal(result, expected)
+
+
+def test_extension_array_cross_section_converts():
+ df = pd.DataFrame({
+ "A": pd.core.arrays.integer_array([1, 2]),
+ "B": np.array([1, 2]),
+ }, index=['a', 'b'])
+ result = df.loc['a']
+ expected = pd.Series([1, 1], dtype=object, index=['A', 'B'], name='a')
+ tm.assert_series_equal(result, expected)
+
+ result = df.iloc[0]
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py
index f4d581f450363..61e5fdd7b9562 100644
--- a/pandas/tests/indexing/test_indexing_slow.py
+++ b/pandas/tests/indexing/test_indexing_slow.py
@@ -12,6 +12,7 @@
class TestIndexingSlow(object):
@pytest.mark.slow
+ @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_multiindex_get_loc(self): # GH7724, GH2646
with warnings.catch_warnings(record=True):
diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py
index c84576c984525..04d0e04b5651e 100644
--- a/pandas/tests/indexing/test_ix.py
+++ b/pandas/tests/indexing/test_ix.py
@@ -14,15 +14,17 @@
from pandas.errors import PerformanceWarning
-class TestIX(object):
+def test_ix_deprecation():
+ # GH 15114
+
+ df = DataFrame({'A': [1, 2, 3]})
+ with tm.assert_produces_warning(DeprecationWarning,
+ check_stacklevel=False):
+ df.ix[1, 'A']
- def test_ix_deprecation(self):
- # GH 15114
- df = DataFrame({'A': [1, 2, 3]})
- with tm.assert_produces_warning(DeprecationWarning,
- check_stacklevel=False):
- df.ix[1, 'A']
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
+class TestIX(object):
def test_ix_loc_setitem_consistency(self):
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 2e52154d7679b..9fa705f923c88 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -3,7 +3,7 @@
import itertools
import pytest
-from warnings import catch_warnings
+from warnings import catch_warnings, filterwarnings
import numpy as np
import pandas as pd
@@ -699,6 +699,7 @@ def test_loc_name(self):
assert result == 'index_name'
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\n.ix", DeprecationWarning)
result = df.ix[[0, 1]].index.name
assert result == 'index_name'
diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
index d2c4c8f5e149b..b8f80164e5402 100644
--- a/pandas/tests/indexing/test_multiindex.py
+++ b/pandas/tests/indexing/test_multiindex.py
@@ -9,6 +9,7 @@
from pandas.tests.indexing.common import _mklbl
+@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
class TestMultiIndexBasic(object):
def test_iloc_getitem_multiindex2(self):
@@ -732,6 +733,14 @@ def test_multiindex_contains_dropped(self):
assert 'a' in idx.levels[0]
assert 'a' not in idx
+ @pytest.mark.parametrize("data, expected", [
+ (MultiIndex.from_product([(), ()]), True),
+ (MultiIndex.from_product([(1, 2), (3, 4)]), True),
+ (MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
+ ])
+ def test_multiindex_is_homogeneous_type(self, data, expected):
+ assert data._is_homogeneous_type is expected
+
class TestMultiIndexSlicers(object):
@@ -1232,101 +1241,99 @@ def f():
tm.assert_frame_equal(df, expected)
+@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning')
class TestMultiIndexPanel(object):
def test_iloc_getitem_panel_multiindex(self):
- with catch_warnings(record=True):
+ # GH 7199
+ # Panel with multi-index
+ multi_index = MultiIndex.from_tuples([('ONE', 'one'),
+ ('TWO', 'two'),
+ ('THREE', 'three')],
+ names=['UPPER', 'lower'])
+
+ simple_index = [x[0] for x in multi_index]
+ wd1 = Panel(items=['First', 'Second'],
+ major_axis=['a', 'b', 'c', 'd'],
+ minor_axis=multi_index)
+
+ wd2 = Panel(items=['First', 'Second'],
+ major_axis=['a', 'b', 'c', 'd'],
+ minor_axis=simple_index)
+
+ expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
+ result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG
+ tm.assert_frame_equal(result1, expected1)
+
+ expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
+ result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
+ tm.assert_frame_equal(result2, expected2)
+
+ expected1 = DataFrame(index=['a'], columns=multi_index,
+ dtype='float64')
+ result1 = wd1.iloc[0, [0], [0, 1, 2]]
+ tm.assert_frame_equal(result1, expected1)
+
+ expected2 = DataFrame(index=['a'], columns=simple_index,
+ dtype='float64')
+ result2 = wd2.iloc[0, [0], [0, 1, 2]]
+ tm.assert_frame_equal(result2, expected2)
+
+ # GH 7516
+ mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
+ p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
+ items=['a', 'b', 'c'], major_axis=mi,
+ minor_axis=['u', 'v', 'w'])
+ result = p.iloc[:, 1, 0]
+ expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
+ tm.assert_series_equal(result, expected)
- # GH 7199
- # Panel with multi-index
- multi_index = MultiIndex.from_tuples([('ONE', 'one'),
- ('TWO', 'two'),
- ('THREE', 'three')],
- names=['UPPER', 'lower'])
-
- simple_index = [x[0] for x in multi_index]
- wd1 = Panel(items=['First', 'Second'],
- major_axis=['a', 'b', 'c', 'd'],
- minor_axis=multi_index)
-
- wd2 = Panel(items=['First', 'Second'],
- major_axis=['a', 'b', 'c', 'd'],
- minor_axis=simple_index)
-
- expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
- result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG
- tm.assert_frame_equal(result1, expected1)
-
- expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
- result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
- tm.assert_frame_equal(result2, expected2)
-
- expected1 = DataFrame(index=['a'], columns=multi_index,
- dtype='float64')
- result1 = wd1.iloc[0, [0], [0, 1, 2]]
- tm.assert_frame_equal(result1, expected1)
-
- expected2 = DataFrame(index=['a'], columns=simple_index,
- dtype='float64')
- result2 = wd2.iloc[0, [0], [0, 1, 2]]
- tm.assert_frame_equal(result2, expected2)
-
- # GH 7516
- mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
- p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
- items=['a', 'b', 'c'], major_axis=mi,
- minor_axis=['u', 'v', 'w'])
- result = p.iloc[:, 1, 0]
- expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
- tm.assert_series_equal(result, expected)
-
- result = p.loc[:, (1, 'y'), 'u']
- tm.assert_series_equal(result, expected)
+ result = p.loc[:, (1, 'y'), 'u']
+ tm.assert_series_equal(result, expected)
def test_panel_setitem_with_multiindex(self):
- with catch_warnings(record=True):
- # 10360
- # failing with a multi-index
- arr = np.array([[[1, 2, 3], [0, 0, 0]],
- [[0, 0, 0], [0, 0, 0]]],
- dtype=np.float64)
-
- # reg index
- axes = dict(items=['A', 'B'], major_axis=[0, 1],
- minor_axis=['X', 'Y', 'Z'])
- p1 = Panel(0., **axes)
- p1.iloc[0, 0, :] = [1, 2, 3]
- expected = Panel(arr, **axes)
- tm.assert_panel_equal(p1, expected)
-
- # multi-indexes
- axes['items'] = MultiIndex.from_tuples(
- [('A', 'a'), ('B', 'b')])
- p2 = Panel(0., **axes)
- p2.iloc[0, 0, :] = [1, 2, 3]
- expected = Panel(arr, **axes)
- tm.assert_panel_equal(p2, expected)
-
- axes['major_axis'] = MultiIndex.from_tuples(
- [('A', 1), ('A', 2)])
- p3 = Panel(0., **axes)
- p3.iloc[0, 0, :] = [1, 2, 3]
- expected = Panel(arr, **axes)
- tm.assert_panel_equal(p3, expected)
-
- axes['minor_axis'] = MultiIndex.from_product(
- [['X'], range(3)])
- p4 = Panel(0., **axes)
- p4.iloc[0, 0, :] = [1, 2, 3]
- expected = Panel(arr, **axes)
- tm.assert_panel_equal(p4, expected)
-
- arr = np.array(
- [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
- dtype=np.float64)
- p5 = Panel(0., **axes)
- p5.iloc[0, :, 0] = [1, 2]
- expected = Panel(arr, **axes)
- tm.assert_panel_equal(p5, expected)
+ # 10360
+ # failing with a multi-index
+ arr = np.array([[[1, 2, 3], [0, 0, 0]],
+ [[0, 0, 0], [0, 0, 0]]],
+ dtype=np.float64)
+
+ # reg index
+ axes = dict(items=['A', 'B'], major_axis=[0, 1],
+ minor_axis=['X', 'Y', 'Z'])
+ p1 = Panel(0., **axes)
+ p1.iloc[0, 0, :] = [1, 2, 3]
+ expected = Panel(arr, **axes)
+ tm.assert_panel_equal(p1, expected)
+
+ # multi-indexes
+ axes['items'] = MultiIndex.from_tuples(
+ [('A', 'a'), ('B', 'b')])
+ p2 = Panel(0., **axes)
+ p2.iloc[0, 0, :] = [1, 2, 3]
+ expected = Panel(arr, **axes)
+ tm.assert_panel_equal(p2, expected)
+
+ axes['major_axis'] = MultiIndex.from_tuples(
+ [('A', 1), ('A', 2)])
+ p3 = Panel(0., **axes)
+ p3.iloc[0, 0, :] = [1, 2, 3]
+ expected = Panel(arr, **axes)
+ tm.assert_panel_equal(p3, expected)
+
+ axes['minor_axis'] = MultiIndex.from_product(
+ [['X'], range(3)])
+ p4 = Panel(0., **axes)
+ p4.iloc[0, 0, :] = [1, 2, 3]
+ expected = Panel(arr, **axes)
+ tm.assert_panel_equal(p4, expected)
+
+ arr = np.array(
+ [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
+ dtype=np.float64)
+ p5 = Panel(0., **axes)
+ p5.iloc[0, :, 0] = [1, 2]
+ expected = Panel(arr, **axes)
+ tm.assert_panel_equal(p5, expected)
diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py
index 1085e2a61be48..2cd05b5779f30 100644
--- a/pandas/tests/indexing/test_panel.py
+++ b/pandas/tests/indexing/test_panel.py
@@ -6,6 +6,7 @@
from pandas import Panel, date_range, DataFrame
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestPanel(object):
def test_iloc_getitem_panel(self):
@@ -110,6 +111,7 @@ def test_iloc_panel_issue(self):
assert p.iloc[1, :3, 1].shape == (3, )
assert p.iloc[:3, 1, 1].shape == (3, )
+ @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
def test_panel_getitem(self):
with catch_warnings(record=True):
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 3c7a7f070805d..5910f462cb3df 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -16,6 +16,8 @@
class TestPartialSetting(object):
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+ @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
def test_partial_setting(self):
# GH2578, allow ix and friends to partially set
@@ -404,6 +406,7 @@ def test_series_partial_set_with_name(self):
result = ser.iloc[[1, 1, 0, 0]]
tm.assert_series_equal(result, expected, check_index_type=True)
+ @pytest.mark.filterwarnings("ignore:\\n.ix")
def test_partial_set_invalid(self):
# GH 4940
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 34f22513106ba..86251ad7529d5 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -1285,7 +1285,7 @@ def test_deprecated_fastpath():
def test_validate_ndim():
values = np.array([1.0, 2.0])
placement = slice(2)
- msg = "Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
+ msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]"
with tm.assert_raises_regex(ValueError, msg):
make_block(values, placement, ndim=2)
diff --git a/pandas/tests/io/formats/data/unicode_series.csv b/pandas/tests/io/formats/data/unicode_series.csv
deleted file mode 100644
index 2485e149edb06..0000000000000
--- a/pandas/tests/io/formats/data/unicode_series.csv
+++ /dev/null
@@ -1,18 +0,0 @@
-1617,King of New York (1990)
-1618,All Things Fair (1996)
-1619,"Sixth Man, The (1997)"
-1620,Butterfly Kiss (1995)
-1621,"Paris, France (1993)"
-1622,"Cérémonie, La (1995)"
-1623,Hush (1998)
-1624,Nightwatch (1997)
-1625,Nobody Loves Me (Keiner liebt mich) (1994)
-1626,"Wife, The (1995)"
-1627,Lamerica (1994)
-1628,Nico Icon (1995)
-1629,"Silence of the Palace, The (Saimt el Qusur) (1994)"
-1630,"Slingshot, The (1993)"
-1631,Land and Freedom (Tierra y libertad) (1995)
-1632,Á köldum klaka (Cold Fever) (1994)
-1633,Etz Hadomim Tafus (Under the Domin Tree) (1994)
-1634,Two Friends (1986)
diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py
new file mode 100644
index 0000000000000..055763bf62d6e
--- /dev/null
+++ b/pandas/tests/io/formats/test_console.py
@@ -0,0 +1,74 @@
+import pytest
+
+from pandas.io.formats.console import detect_console_encoding
+
+
+class MockEncoding(object): # TODO(py27): replace with mock
+ """
+ Used to add a side effect when accessing the 'encoding' property. If the
+ side effect is a str in nature, the value will be returned. Otherwise, the
+ side effect should be an exception that will be raised.
+ """
+ def __init__(self, encoding):
+ super(MockEncoding, self).__init__()
+ self.val = encoding
+
+ @property
+ def encoding(self):
+ return self.raise_or_return(self.val)
+
+ @staticmethod
+ def raise_or_return(val):
+ if isinstance(val, str):
+ return val
+ else:
+ raise val
+
+
+@pytest.mark.parametrize('empty,filled', [
+ ['stdin', 'stdout'],
+ ['stdout', 'stdin']
+])
+def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled):
+ # Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when
+ # they have values filled.
+ # GH 21552
+ with monkeypatch.context() as context:
+ context.setattr('sys.{}'.format(empty), MockEncoding(''))
+ context.setattr('sys.{}'.format(filled), MockEncoding(filled))
+ assert detect_console_encoding() == filled
+
+
+@pytest.mark.parametrize('encoding', [
+ AttributeError,
+ IOError,
+ 'ascii'
+])
+def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
+ # GH 21552
+ with monkeypatch.context() as context:
+ context.setattr('locale.getpreferredencoding', lambda: 'foo')
+ context.setattr('sys.stdout', MockEncoding(encoding))
+ assert detect_console_encoding() == 'foo'
+
+
+@pytest.mark.parametrize('std,locale', [
+ ['ascii', 'ascii'],
+ ['ascii', Exception],
+ [AttributeError, 'ascii'],
+ [AttributeError, Exception],
+ [IOError, 'ascii'],
+ [IOError, Exception]
+])
+def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):
+ # When both the stdout/stdin encoding and locale preferred encoding checks
+ # fail (or return 'ascii', we should default to the sys default encoding.
+ # GH 21552
+ with monkeypatch.context() as context:
+ context.setattr(
+ 'locale.getpreferredencoding',
+ lambda: MockEncoding.raise_or_return(locale)
+ )
+ context.setattr('sys.stdout', MockEncoding(std))
+ context.setattr('sys.getdefaultencoding', lambda: 'sysDefaultEncoding')
+ assert detect_console_encoding() == 'sysDefaultEncoding'
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index c19f8e57f9ae7..03e830fb09ad6 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -955,7 +955,7 @@ def test_unicode_problem_decoding_as_ascii(self):
compat.text_type(dm.to_string())
def test_string_repr_encoding(self, datapath):
- filepath = datapath('io', 'formats', 'data', 'unicode_series.csv')
+ filepath = datapath('io', 'parser', 'data', 'unicode_series.csv')
df = pd.read_csv(filepath, header=None, encoding='latin1')
repr(df)
repr(df[1])
@@ -1269,18 +1269,42 @@ def test_to_string_specified_header(self):
df.to_string(header=['X'])
def test_to_string_no_index(self):
- df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
+ # GH 16839, GH 13032
+ df = DataFrame({'x': [11, 22], 'y': [33, -44], 'z': ['AAA', ' ']})
df_s = df.to_string(index=False)
- expected = "x y\n1 4\n2 5\n3 6"
+ # Leading space is expected for positive numbers.
+ expected = (" x y z\n"
+ " 11 33 AAA\n"
+ " 22 -44 ")
+ assert df_s == expected
+ df_s = df[['y', 'x', 'z']].to_string(index=False)
+ expected = (" y x z\n"
+ " 33 11 AAA\n"
+ "-44 22 ")
assert df_s == expected
def test_to_string_line_width_no_index(self):
+ # GH 13998, GH 22505
df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
df_s = df.to_string(line_width=1, index=False)
- expected = "x \\\n1 \n2 \n3 \n\ny \n4 \n5 \n6"
+ expected = " x \\\n 1 \n 2 \n 3 \n\n y \n 4 \n 5 \n 6 "
+
+ assert df_s == expected
+
+ df = DataFrame({'x': [11, 22, 33], 'y': [4, 5, 6]})
+
+ df_s = df.to_string(line_width=1, index=False)
+ expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 "
+
+ assert df_s == expected
+
+ df = DataFrame({'x': [11, 22, -33], 'y': [4, 5, -6]})
+
+ df_s = df.to_string(line_width=1, index=False)
+ expected = " x \\\n 11 \n 22 \n-33 \n\n y \n 4 \n 5 \n-6 "
assert df_s == expected
@@ -1793,7 +1817,7 @@ def test_to_string_without_index(self):
# GH 11729 Test index=False option
s = Series([1, 2, 3, 4])
result = s.to_string(index=False)
- expected = (u('1\n') + '2\n' + '3\n' + '4')
+ expected = (u(' 1\n') + ' 2\n' + ' 3\n' + ' 4')
assert result == expected
def test_unicode_name_in_footer(self):
diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py
index 9fc16c43f5c1d..7d54f93c9831e 100644
--- a/pandas/tests/io/formats/test_to_excel.py
+++ b/pandas/tests/io/formats/test_to_excel.py
@@ -6,8 +6,8 @@
import pytest
import pandas.util.testing as tm
-from warnings import catch_warnings
from pandas.io.formats.excel import CSSToExcelConverter
+from pandas.io.formats.css import CSSWarning
@pytest.mark.parametrize('css,expected', [
@@ -272,6 +272,6 @@ def test_css_to_excel_bad_colors(input_color):
"patternType": "solid"
}
- with catch_warnings(record=True):
+ with tm.assert_produces_warning(CSSWarning):
convert = CSSToExcelConverter()
assert expected == convert(css)
diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py
index aa020ba4c0623..4ebf435f7d75f 100755
--- a/pandas/tests/io/generate_legacy_storage_files.py
+++ b/pandas/tests/io/generate_legacy_storage_files.py
@@ -35,7 +35,7 @@
"""
from __future__ import print_function
-from warnings import catch_warnings
+from warnings import catch_warnings, filterwarnings
from distutils.version import LooseVersion
from pandas import (Series, DataFrame, Panel,
SparseSeries, SparseDataFrame,
@@ -187,6 +187,7 @@ def create_data():
)
with catch_warnings(record=True):
+ filterwarnings("ignore", "\\nPanel", FutureWarning)
mixed_dup_panel = Panel({u'ItemA': frame[u'float'],
u'ItemB': frame[u'int']})
mixed_dup_panel.items = [u'ItemA', u'ItemA']
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 9e871d27f0ce8..49e42786d6fb8 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -9,6 +9,7 @@
import sys
from datetime import datetime
from collections import OrderedDict
+from io import TextIOWrapper
import pytest
import numpy as np
@@ -197,20 +198,6 @@ def test_malformed(self):
header=1, comment='#',
skipfooter=1)
- def test_quoting(self):
- bad_line_small = """printer\tresult\tvariant_name
-Klosterdruckerei\tKlosterdruckerei (1611-1804)\tMuller, Jacob
-Klosterdruckerei\tKlosterdruckerei (1611-1804)\tMuller, Jakob
-Klosterdruckerei\tKlosterdruckerei (1609-1805)\t"Furststiftische Hofdruckerei, (1609-1805)\tGaller, Alois
-Klosterdruckerei\tKlosterdruckerei (1609-1805)\tHochfurstliche Buchhandlung """ # noqa
- pytest.raises(Exception, self.read_table, StringIO(bad_line_small),
- sep='\t')
-
- good_line_small = bad_line_small + '"'
- df = self.read_table(StringIO(good_line_small), sep='\t')
- assert len(df) == 3
-
def test_unnamed_columns(self):
data = """A,B,C,,
1,2,3,4,5
@@ -1609,3 +1596,11 @@ def test_skip_bad_lines(self):
val = sys.stderr.getvalue()
assert 'Skipping line 3' in val
assert 'Skipping line 5' in val
+
+ def test_buffer_rd_bytes_bad_unicode(self):
+ # Regression test for #22748
+ t = BytesIO(b"\xB0")
+ if PY3:
+ t = TextIOWrapper(t, encoding='ascii', errors='surrogateescape')
+ with pytest.raises(UnicodeError):
+ pd.read_csv(t, encoding='UTF-8')
diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index e4950af19ea95..5a28b6263f20f 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -30,9 +30,8 @@ def test_zip(self):
expected = self.read_csv(self.csv1)
with tm.ensure_clean('test_file.zip') as path:
- tmp = zipfile.ZipFile(path, mode='w')
- tmp.writestr('test_file', data)
- tmp.close()
+ with zipfile.ZipFile(path, mode='w') as tmp:
+ tmp.writestr('test_file', data)
result = self.read_csv(path, compression='zip')
tm.assert_frame_equal(result, expected)
@@ -47,10 +46,9 @@ def test_zip(self):
with tm.ensure_clean('combined_zip.zip') as path:
inner_file_names = ['test_file', 'second_file']
- tmp = zipfile.ZipFile(path, mode='w')
- for file_name in inner_file_names:
- tmp.writestr(file_name, data)
- tmp.close()
+ with zipfile.ZipFile(path, mode='w') as tmp:
+ for file_name in inner_file_names:
+ tmp.writestr(file_name, data)
tm.assert_raises_regex(ValueError, 'Multiple files',
self.read_csv, path, compression='zip')
@@ -60,8 +58,8 @@ def test_zip(self):
compression='infer')
with tm.ensure_clean() as path:
- tmp = zipfile.ZipFile(path, mode='w')
- tmp.close()
+ with zipfile.ZipFile(path, mode='w') as tmp:
+ pass
tm.assert_raises_regex(ValueError, 'Zero files',
self.read_csv, path, compression='zip')
@@ -84,9 +82,8 @@ def test_other_compression(self, compress_type, compress_method, ext):
expected = self.read_csv(self.csv1)
with tm.ensure_clean() as path:
- tmp = compress_method(path, mode='wb')
- tmp.write(data)
- tmp.close()
+ with compress_method(path, mode='wb') as tmp:
+ tmp.write(data)
result = self.read_csv(path, compression=compress_type)
tm.assert_frame_equal(result, expected)
@@ -100,9 +97,8 @@ def test_other_compression(self, compress_type, compress_method, ext):
tm.assert_frame_equal(result, expected)
with tm.ensure_clean('test.{}'.format(ext)) as path:
- tmp = compress_method(path, mode='wb')
- tmp.write(data)
- tmp.close()
+ with compress_method(path, mode='wb') as tmp:
+ tmp.write(data)
result = self.read_csv(path, compression='infer')
tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/quoting.py b/pandas/tests/io/parser/quoting.py
index 15427aaf9825c..013e635f80d21 100644
--- a/pandas/tests/io/parser/quoting.py
+++ b/pandas/tests/io/parser/quoting.py
@@ -9,6 +9,7 @@
import pandas.util.testing as tm
from pandas import DataFrame
+from pandas.errors import ParserError
from pandas.compat import PY3, StringIO, u
@@ -151,3 +152,19 @@ def test_quotechar_unicode(self):
if PY3:
result = self.read_csv(StringIO(data), quotechar=u('\u0001'))
tm.assert_frame_equal(result, expected)
+
+ def test_unbalanced_quoting(self):
+ # see gh-22789.
+ data = "a,b,c\n1,2,\"3"
+
+ if self.engine == "c":
+ regex = "EOF inside string starting at row 1"
+ else:
+ regex = "unexpected end of data"
+
+ with tm.assert_raises_regex(ParserError, regex):
+ self.read_csv(StringIO(data))
+
+ expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
+ data = self.read_csv(StringIO(data + '"'))
+ tm.assert_frame_equal(data, expected)
diff --git a/pandas/tests/io/sas/data/load_log.sas7bdat b/pandas/tests/io/sas/data/load_log.sas7bdat
new file mode 100644
index 0000000000000..dc78925471baf
Binary files /dev/null and b/pandas/tests/io/sas/data/load_log.sas7bdat differ
diff --git a/pandas/tests/io/sas/data/many_columns.csv b/pandas/tests/io/sas/data/many_columns.csv
new file mode 100644
index 0000000000000..307fc30f33b9f
--- /dev/null
+++ b/pandas/tests/io/sas/data/many_columns.csv
@@ -0,0 +1,4 @@
+DATASRC,PDDOCID,age,agegt89,ASSESSA,ASSESS1,ASSESS3,ASSESS4,ASSESS5,ASSESS6,ASSESS7,week,BECK,conf1,conf2,conf3,demo3,demo4,demo5,demo6,demo7,demo11a,demo11b,demo11c,demo11d,derm1b,derm2,derm3,derm4,derm5a,derm5b,derm7,derm7a,derm7b,derm8,derm9,ECG3,ecgrtxt,ecgrhr,ecgrpr,ecgrqrs,ecgrqrsaxis,ecgrqt,ecgrqtc,ecgrrep,ecgrtime,mmse1,mmse2,mmse3,mmse4,mmse5,mmse6,mmse7,mmse8,mmse9,mmse10,mmse11,mmse12,mmse13,mmse14,mmse15,mmse16,mmse17,mmse18,mmse19,mmse20,mmse,mmsescor,mrf1,mrf2,mrf3,mrf4,mrf5,mrf6,mrf7,mrf8,mrf9,mrf10,mrf11,mrf12,mrf13,nvitl1s,nvitl1d,nvitl1r,nvitl2s,nvitl2d,nvitl2r,nvitl3s,nvitl3d,nvitl3r,nvitl4s,nvitl4d,nvitl4r,nvitl5,nvitl1,nvitl2,nvitl3,nvitl4,phys1,phys1a,phys14,phys15a,phys15b,phys15c,phys15d,phys16a,phys16b,phys16c,phys16d,phys17a,phys17b,phys17c,phys17d,phys18a,phys18b,phys18c,phys18d,phys19a,phys19b,phys20,phys22,phys24,phys26,phys28,PREG1,PREG2,updrsa,updrs1,updrs2,updrs3,updrs4,updrs5a,updrs6a,updrs7a,updrs8a,updrs9a,updrs10a,updrs11a,updrs12a,updrs13a,updrs14a,updrs15a,updrs16a,updrs17a,updrs18a,updrs19a,updrs20a1,updrs20b1,updrs20c1,updrs20d1,updrs20e1,updrs21a1,updrs21b1,updrs22a1,updrs22b1,updrs22c1,updrs22d1,updrs22e1,updrs23a1,updrs23b1,updrs24a1,updrs24b1,updrs25a1,updrs25b1,updrs26a1,updrs26b1,updrs26c1,updrs26d1,updrs27a,updrs28a,updrs29a,updrs30a,updrs31a,updrs32a,updrs33a,updrs34a,updrs35,updrs36,updrs37,updrs38,updrs39,updrs5b,updrs6b,updrs7b,updrs8b,updrs9b,updrs10b,updrs11b,updrs12b,updrs13b,updrs14b,updrs15b,updrs16b,updrs17b,updrs18b,updrs19b,updrs20a2,updrs20b2,updrs20c2,updrs20d2,updrs20e2,updrs21a2,updrs21b2,updrs22a2,updrs22b2,updrs22c2,updrs22d2,updrs22e2,updrs23a2,updrs23b2,updrs24a2,updrs24b2,updrs25a2,updrs25b2,updrs26a2,updrs26b2,updrs26c2,updrs26d2,updrs27b,updrs28b,updrs29b,updrs30b,updrs31b,updrs32b,updrs33b,updrs34b,updrs5c,updrs6c,updrs7c,updrs8c,updrs9c,updrs10c,updrs11c,updrs12c,updrs13c,updrs14c,updrs15c,updrs16c,updrs17c,updrs32c,updrs33c,updrs34c,updrsmental,updrsadl,updrsadlon,updrsadloff,updrsadlmin,updrstremor,updrstremortreat,updrstremormin,updrsrigid,updrsrigidtreat,updrsrigidmin,updrsmotor,updrsmotortreat,updrsmotormin,updrs,updrstrt,updrsmin,updrs4a,updrs41,updrs42,updrs43,updrs44,updrs45,updrs46,updrs47,updrs48,updrs49,updrs410,updrs411,vitl1s,vitl1d,vitl2,vitl3s,vitl3d,vitl4,vitl5,vitl6,assess,fbeck,conf,demo1,derm,ecg,ecgr,mrf,nvitl,fphys1,fpreg,fupdrs,fupdrs4,vitl,site,race,rImaged,rPD,rPDlt5,rAgeGt30,rHY,rMed,rMelanoma,rPreclude,rNeed,rEligible,gender,incsae,incsusp,incterm,increlated,inctermat,increason,incafter24,incendp,incres,disp2,disp3,disp4,disp6,inex1,inex2,inex3,inex4,inex5,inex6,inex7,inex8,inex9,inex10,inex11,inex12,inex13,inex14,inex15,inex16,inex17,inex18,inex19,inex20,inex21,inex22,inex23,inex24,inex25,inex26,inex27,inex28,treatment,treat,disp,inex,classify,enrollyr,demoyear,dob_yr,inexdays,demodays,onsetdays,diagdays,medstartdays,physdays,phys21dys,phys23dys,phys25dys,phys27dys,phys29dys,confdays,pregdays,nvitldays,nvitlscandays,vitldays,labdays,ecgdays,ecgtestdays,mrfdays,dermdays,dermexamdays,dermbiopdays,mmsedays,beckdays,updrdays,updr4days,assessdays,daystotherapy,dispdays,endpdys,termdys,SAEdys,resdys,lmeddys,wddays,VISIT_NO
+a030,ab304,43.0,0.0,0.0,0.0,,,,,,-2.0,0.0,1.0,1.0,,2.0,1.0,19.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,,,,,,,0.0,2.0,ABNORMAL,75.0,150.0,100.0,-3.0,410.0,460.0,2.0,1000.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,3.0,5.0,2.0,1.0,1.0,1.0,0.0,3.0,1.0,1.0,1.0,26.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,150.0,94.0,73.0,155.0,96.0,71.0,148.0,91.0,69.0,146.0,67.0,72.0,1.0,42840.0,46080.0,46980.0,30600.0,100.0,175.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0,2.0,1.0,,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.5,0.0,0.0,0.0,1.0,1.0,2.0,2.0,1.0,1.5,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,2.5,95.0,95.0,7.0,,2.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,5.0,,,5.0,1.5,,1.5,7.5,,7.5,20.0,,20.0,25.0,,25.0,,,,,,,,,,,,,138.0,86.0,72.0,130.0,80.0,80.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,abc,1.0,1.0,1.0,0.0,1.0,34.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,,0.0,3.0,0.0,1.0,0.0,4.0,3.0,,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Placebo,1.0,1.0,1.0,1.0,2002.0,2002.0,1914.0,-28.0,-28.0,-404.0,-28.0,0.0,-28.0,,,,,-6.0,-28.0,-13.0,-13.0,-12.0,-28.0,-28.0,-28.0,-28.0,-28.0,-14.0,-14.0,,-28.0,-28.0,-28.0,,-28.0,,659.0,426.0,659.0,,,658.0,100.0,ab
+a030,ab304,43.0,0.0,0.0,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0,1.0,2.0,95.0,95.0,7.0,,2.0,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,3.0,,,3.0,0.0,,0.0,3.0,,3.0,13.0,,13.0,16.0,,16.0,,,,,,,,,,,,,140.0,86.0,76.0,132.0,80.0,84.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,abc,0.0,0.0,1.0,0.0,1.0,34.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,,0.0,3.0,0.0,1.0,0.0,4.0,3.0,,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Placebo,1.0,1.0,1.0,1.0,2002.0,,1914.0,-28.0,,,,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,,0.0,,0.0,,659.0,426.0,659.0,,,658.0,100.0,ab
+a030,ab304,43.0,0.0,0.0,0.0,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000.0,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,2.0,0.0,1.0,1.0,0.5,1.0,2.0,90.0,95.0,7.0,,2.0,2.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,5.0,,,5.0,0.5,,0.5,2.0,,2.0,16.0,,16.0,21.0,,21.0,0.0,,,,,,,,,,,,149.0,88.0,80.0,136.0,90.0,82.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,abc,0.0,0.0,1.0,1.0,1.0,34.0,5.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,,0.0,3.0,0.0,1.0,0.0,4.0,3.0,,1.0,1.0,1.0,1.0,1.0,1.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Placebo,1.0,1.0,1.0,1.0,2002.0,,1914.0,-28.0,,,,0.0,,,,,,,,,,,29.0,29.0,,,,,,,,,29.0,29.0,29.0,,659.0,426.0,659.0,,,658.0,100.0,ab
diff --git a/pandas/tests/io/sas/data/many_columns.sas7bdat b/pandas/tests/io/sas/data/many_columns.sas7bdat
new file mode 100644
index 0000000000000..582316fc59e18
Binary files /dev/null and b/pandas/tests/io/sas/data/many_columns.sas7bdat differ
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index efde152a918bd..705387188438f 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -9,6 +9,8 @@
import pytest
+# https://github.com/cython/cython/issues/1720
+@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
class TestSAS7BDAT(object):
@pytest.fixture(autouse=True)
@@ -199,6 +201,22 @@ def test_compact_numerical_values(datapath):
tm.assert_series_equal(result, expected, check_exact=True)
+def test_many_columns(datapath):
+ # Test for looking for column information in more places (PR #22628)
+ fname = datapath("io", "sas", "data", "many_columns.sas7bdat")
+ df = pd.read_sas(fname, encoding='latin-1')
+ fname = datapath("io", "sas", "data", "many_columns.csv")
+ df0 = pd.read_csv(fname, encoding='latin-1')
+ tm.assert_frame_equal(df, df0)
+
+
+def test_inconsistent_number_of_rows(datapath):
+ # Regression test for issue #16615. (PR #22628)
+ fname = datapath("io", "sas", "data", "load_log.sas7bdat")
+ df = pd.read_sas(fname, encoding='latin-1')
+ assert len(df) == 2097
+
+
def test_zero_variables(datapath):
# Check if the SAS file has zero variables (PR #18184)
fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index a6b331685e72a..bb73c6bc6b38b 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -13,7 +13,6 @@
from pandas.util import testing as tm
from pandas.util.testing import makeCustomDataframe as mkdf
from pandas.io.clipboard.exceptions import PyperclipException
-from pandas.io.clipboard import clipboard_set, clipboard_get
try:
@@ -76,10 +75,53 @@ def df(request):
raise ValueError
+@pytest.fixture
+def mock_clipboard(mock, request):
+ """Fixture mocking clipboard IO.
+
+ This mocks pandas.io.clipboard.clipboard_get and
+ pandas.io.clipboard.clipboard_set.
+
+ This uses a local dict for storing data. The dictionary
+ key used is the test ID, available with ``request.node.name``.
+
+ This returns the local dictionary, for direct manipulation by
+ tests.
+ """
+
+ # our local clipboard for tests
+ _mock_data = {}
+
+ def _mock_set(data):
+ _mock_data[request.node.name] = data
+
+ def _mock_get():
+ return _mock_data[request.node.name]
+
+ mock_set = mock.patch("pandas.io.clipboard.clipboard_set",
+ side_effect=_mock_set)
+ mock_get = mock.patch("pandas.io.clipboard.clipboard_get",
+ side_effect=_mock_get)
+ with mock_get, mock_set:
+ yield _mock_data
+
+
+@pytest.mark.clipboard
+def test_mock_clipboard(mock_clipboard):
+ import pandas.io.clipboard
+ pandas.io.clipboard.clipboard_set("abc")
+ assert "abc" in set(mock_clipboard.values())
+ result = pandas.io.clipboard.clipboard_get()
+ assert result == "abc"
+
+
@pytest.mark.single
+@pytest.mark.clipboard
@pytest.mark.skipif(not _DEPS_INSTALLED,
reason="clipboard primitives not installed")
+@pytest.mark.usefixtures("mock_clipboard")
class TestClipboard(object):
+
def check_round_trip_frame(self, data, excel=None, sep=None,
encoding=None):
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
@@ -118,15 +160,18 @@ def test_copy_delim_warning(self, df):
# delimited and excel="True"
@pytest.mark.parametrize('sep', ['\t', None, 'default'])
@pytest.mark.parametrize('excel', [True, None, 'default'])
- def test_clipboard_copy_tabs_default(self, sep, excel, df):
+ def test_clipboard_copy_tabs_default(self, sep, excel, df, request,
+ mock_clipboard):
kwargs = build_kwargs(sep, excel)
df.to_clipboard(**kwargs)
if PY2:
# to_clipboard copies unicode, to_csv produces bytes. This is
# expected behavior
- assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
+ result = mock_clipboard[request.node.name].encode('utf-8')
+ expected = df.to_csv(sep='\t')
+ assert result == expected
else:
- assert clipboard_get() == df.to_csv(sep='\t')
+ assert mock_clipboard[request.node.name] == df.to_csv(sep='\t')
# Tests reading of white space separated tables
@pytest.mark.parametrize('sep', [None, 'default'])
@@ -138,7 +183,8 @@ def test_clipboard_copy_strings(self, sep, excel, df):
assert result.to_string() == df.to_string()
assert df.shape == result.shape
- def test_read_clipboard_infer_excel(self):
+ def test_read_clipboard_infer_excel(self, request,
+ mock_clipboard):
# gh-19010: avoid warnings
clip_kwargs = dict(engine="python")
@@ -147,7 +193,7 @@ def test_read_clipboard_infer_excel(self):
1 2
4 Harry Carney
""".strip())
- clipboard_set(text)
+ mock_clipboard[request.node.name] = text
df = pd.read_clipboard(**clip_kwargs)
# excel data is parsed correctly
@@ -159,7 +205,7 @@ def test_read_clipboard_infer_excel(self):
1 2
3 4
""".strip())
- clipboard_set(text)
+ mock_clipboard[request.node.name] = text
res = pd.read_clipboard(**clip_kwargs)
text = dedent("""
@@ -167,7 +213,7 @@ def test_read_clipboard_infer_excel(self):
1 2
3 4
""".strip())
- clipboard_set(text)
+ mock_clipboard[request.node.name] = text
exp = pd.read_clipboard(**clip_kwargs)
tm.assert_frame_equal(res, exp)
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 991b8ee508760..73e29e6eb9a6a 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -44,6 +44,8 @@ def __fspath__(self):
HERE = os.path.abspath(os.path.dirname(__file__))
+# https://github.com/cython/cython/issues/1720
+@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
class TestCommonIOCapabilities(object):
data1 = """index,A,B,C,D
foo,2,3,4,5
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 1806ddd2bbcc6..b62a1e6c4933e 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -1,5 +1,6 @@
import os
import warnings
+import contextlib
import pytest
@@ -8,12 +9,15 @@
import pandas.util.testing as tm
+@contextlib.contextmanager
def catch_to_csv_depr():
# Catching warnings because Series.to_csv has
# been deprecated. Remove this context when
# Series.to_csv has been aligned.
- return warnings.catch_warnings(record=True)
+ with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", FutureWarning)
+ yield
@pytest.mark.parametrize('obj', [
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 6741645e466f3..a639556eb07d6 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -611,6 +611,8 @@ def test_read_from_s3_url(self, ext):
tm.assert_frame_equal(url_table, local_table)
@pytest.mark.slow
+ # ignore warning from old xlrd
+ @pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning")
def test_read_from_file_url(self, ext):
# FILE
@@ -2189,6 +2191,7 @@ def test_ExcelWriter_dispatch_raises(self):
with tm.assert_raises_regex(ValueError, 'No engine'):
ExcelWriter('nothing')
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_register_writer(self):
# some awkward mocking to test out dispatch and such actually works
called_save = []
diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py
index dc6c319bb3366..68413d610e615 100644
--- a/pandas/tests/io/test_gbq.py
+++ b/pandas/tests/io/test_gbq.py
@@ -4,11 +4,17 @@
import platform
import os
+try:
+ from unittest import mock
+except ImportError:
+ mock = pytest.importorskip("mock")
+
import numpy as np
import pandas as pd
from pandas import compat, DataFrame
-
from pandas.compat import range
+import pandas.util.testing as tm
+
pandas_gbq = pytest.importorskip('pandas_gbq')
@@ -93,6 +99,16 @@ def make_mixed_dataframe_v2(test_size):
index=range(test_size))
+def test_read_gbq_without_dialect_warns_future_change(monkeypatch):
+ # Default dialect is changing to standard SQL. See:
+ # https://github.com/pydata/pandas-gbq/issues/195
+ mock_read_gbq = mock.Mock()
+ mock_read_gbq.return_value = DataFrame([[1.0]])
+ monkeypatch.setattr(pandas_gbq, 'read_gbq', mock_read_gbq)
+ with tm.assert_produces_warning(FutureWarning):
+ pd.read_gbq("SELECT 1")
+
+
@pytest.mark.single
class TestToGBQIntegrationWithServiceAccountKeyPath(object):
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index 412e218f95c6f..ee45f8828d85e 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -91,6 +91,7 @@ def check_arbitrary(a, b):
assert(a == b)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestPackers(object):
def setup_method(self, method):
@@ -105,6 +106,7 @@ def encode_decode(self, x, compress=None, **kwargs):
return read_msgpack(p, **kwargs)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestAPI(TestPackers):
def test_string_io(self):
@@ -464,6 +466,7 @@ def test_basic(self):
assert_categorical_equal(i, i_rec)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestNDFrame(TestPackers):
def setup_method(self, method):
@@ -486,10 +489,9 @@ def setup_method(self, method):
'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)),
'mixed': DataFrame(data)}
- with catch_warnings(record=True):
- self.panel = {
- 'float': Panel(dict(ItemA=self.frame['float'],
- ItemB=self.frame['float'] + 1))}
+ self.panel = {
+ 'float': Panel(dict(ItemA=self.frame['float'],
+ ItemB=self.frame['float'] + 1))}
def test_basic_frame(self):
@@ -846,6 +848,7 @@ def legacy_packer(request, datapath):
return datapath(request.param)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestMsgpack(object):
"""
How to add msgpack tests:
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index fefbe8afb59cb..ab7f04ad86ffc 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -368,6 +368,40 @@ def test_multiindex_with_columns(self, pa_ge_070):
check_round_trip(df, engine, read_kwargs={'columns': ['A', 'B']},
expected=df[['A', 'B']])
+ def test_write_ignoring_index(self, engine):
+ # ENH 20768
+ # Ensure index=False omits the index from the written Parquet file.
+ df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']})
+
+ write_kwargs = {
+ 'compression': None,
+ 'index': False,
+ }
+
+ # Because we're dropping the index, we expect the loaded dataframe to
+ # have the default integer index.
+ expected = df.reset_index(drop=True)
+
+ check_round_trip(df, engine, write_kwargs=write_kwargs,
+ expected=expected)
+
+ # Ignore custom index
+ df = pd.DataFrame({'a': [1, 2, 3], 'b': ['q', 'r', 's']},
+ index=['zyx', 'wvu', 'tsr'])
+
+ check_round_trip(df, engine, write_kwargs=write_kwargs,
+ expected=expected)
+
+ # Ignore multi-indexes as well.
+ arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
+ ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
+ df = pd.DataFrame({'one': [i for i in range(8)],
+ 'two': [-i for i in range(8)]}, index=arrays)
+
+ expected = df.reset_index(drop=True)
+ check_round_trip(df, engine, write_kwargs=write_kwargs,
+ expected=expected)
+
class TestParquetPyArrow(Base):
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 77b4a3c7cac5f..a47c3c01fc80e 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -14,7 +14,7 @@
"""
import glob
import pytest
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import os
from distutils.version import LooseVersion
@@ -202,6 +202,7 @@ def test_pickles(current_pickle_data, legacy_pickle):
version = os.path.basename(os.path.dirname(legacy_pickle))
with catch_warnings(record=True):
+ simplefilter("ignore")
compare(current_pickle_data, legacy_pickle, version)
@@ -332,9 +333,9 @@ def compress_file(self, src_path, dest_path, compression):
f = bz2.BZ2File(dest_path, "w")
elif compression == 'zip':
import zipfile
- f = zipfile.ZipFile(dest_path, "w",
- compression=zipfile.ZIP_DEFLATED)
- f.write(src_path, os.path.basename(src_path))
+ with zipfile.ZipFile(dest_path, "w",
+ compression=zipfile.ZIP_DEFLATED) as f:
+ f.write(src_path, os.path.basename(src_path))
elif compression == 'xz':
lzma = pandas.compat.import_lzma()
f = lzma.LZMAFile(dest_path, "w")
@@ -343,9 +344,8 @@ def compress_file(self, src_path, dest_path, compression):
raise ValueError(msg)
if compression != "zip":
- with open(src_path, "rb") as fh:
+ with open(src_path, "rb") as fh, f:
f.write(fh.read())
- f.close()
def test_write_explicit(self, compression, get_random_path):
base = get_random_path
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index ddcfcc0842d1a..ea5f1684c0695 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -2,7 +2,7 @@
import os
import tempfile
from contextlib import contextmanager
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from distutils.version import LooseVersion
import datetime
@@ -40,6 +40,10 @@
LooseVersion('2.2') else 'zlib')
+ignore_natural_naming_warning = pytest.mark.filterwarnings(
+ "ignore:object name:tables.exceptions.NaturalNameWarning"
+)
+
# contextmanager to ensure the file cleanup
@@ -139,12 +143,14 @@ def teardown_method(self, method):
@pytest.mark.single
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestHDFStore(Base):
def test_factory_fun(self):
path = create_tempfile(self.path)
try:
- with catch_warnings(record=True):
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
with get_store(path) as tbl:
raise ValueError('blah')
except ValueError:
@@ -153,11 +159,13 @@ def test_factory_fun(self):
safe_remove(path)
try:
- with catch_warnings(record=True):
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
with get_store(path) as tbl:
tbl['a'] = tm.makeDataFrame()
- with catch_warnings(record=True):
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
with get_store(path) as tbl:
assert len(tbl) == 1
assert type(tbl['a']) == DataFrame
@@ -425,8 +433,8 @@ def test_repr(self):
df.loc[3:6, ['obj1']] = np.nan
df = df._consolidate()._convert(datetime=True)
- # PerformanceWarning
with catch_warnings(record=True):
+ simplefilter("ignore", pd.errors.PerformanceWarning)
store['df'] = df
# make a random group in hdf space
@@ -446,6 +454,7 @@ def test_repr(self):
repr(s)
str(s)
+ @ignore_natural_naming_warning
def test_contains(self):
with ensure_clean_store(self.path) as store:
@@ -912,11 +921,15 @@ def test_put_mixed_type(self):
# PerformanceWarning
with catch_warnings(record=True):
+ simplefilter("ignore", pd.errors.PerformanceWarning)
store.put('df', df)
expected = store.get('df')
tm.assert_frame_equal(expected, df)
+ @pytest.mark.filterwarnings(
+ "ignore:object name:tables.exceptions.NaturalNameWarning"
+ )
def test_append(self):
with ensure_clean_store(self.path) as store:
@@ -1075,6 +1088,7 @@ def check(format, index):
# PerformanceWarning
with catch_warnings(record=True):
+ simplefilter("ignore", pd.errors.PerformanceWarning)
check('fixed', index)
@pytest.mark.skipif(not is_platform_little_endian(),
@@ -1355,6 +1369,7 @@ def test_append_with_strings(self):
with ensure_clean_store(self.path) as store:
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
wp = tm.makePanel()
wp2 = wp.rename_axis(
{x: "%s_extra" % x for x in wp.minor_axis}, axis=2)
@@ -2553,6 +2568,7 @@ def test_terms(self):
with ensure_clean_store(self.path) as store:
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
wp = tm.makePanel()
wpneg = Panel.fromDict({-1: tm.makeDataFrame(),
@@ -2758,8 +2774,10 @@ def test_tuple_index(self):
DF = DataFrame(data, index=idx, columns=col)
with catch_warnings(record=True):
+ simplefilter("ignore", pd.errors.PerformanceWarning)
self._check_roundtrip(DF, tm.assert_frame_equal)
+ @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
def test_index_types(self):
with catch_warnings(record=True):
@@ -2988,6 +3006,9 @@ def test_wide(self):
wp = tm.makePanel()
self._check_roundtrip(wp, assert_panel_equal)
+ @pytest.mark.filterwarnings(
+ "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
+ )
def test_select_with_dups(self):
# single dtypes
@@ -3047,6 +3068,9 @@ def test_select_with_dups(self):
result = store.select('df', columns=['B', 'A'])
assert_frame_equal(result, expected, by_blocks=True)
+ @pytest.mark.filterwarnings(
+ "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"
+ )
def test_wide_table_dups(self):
with ensure_clean_store(self.path) as store:
with catch_warnings(record=True):
@@ -3589,6 +3613,9 @@ def test_select_iterator_many_empty_frames(self):
# should be []
assert len(results) == 0
+ @pytest.mark.filterwarnings(
+ "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
+ )
def test_retain_index_attributes(self):
# GH 3499, losing frequency info on index recreation
@@ -3631,6 +3658,9 @@ def test_retain_index_attributes(self):
freq='D'))))
store.append('df2', df3)
+ @pytest.mark.filterwarnings(
+ "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"
+ )
def test_retain_index_attributes2(self):
with ensure_clean_path(self.path) as path:
@@ -4533,7 +4563,8 @@ def test_legacy_table_read(self, datapath):
datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
mode='r') as store:
- with catch_warnings(record=True):
+ with catch_warnings():
+ simplefilter("ignore", pd.io.pytables.IncompatibilityWarning)
store.select('df1')
store.select('df2')
store.select('wp1')
@@ -4665,6 +4696,7 @@ def test_unicode_index(self):
# PerformanceWarning
with catch_warnings(record=True):
+ simplefilter("ignore", pd.errors.PerformanceWarning)
s = Series(np.random.randn(len(unicode_values)), unicode_values)
self._check_roundtrip(s, tm.assert_series_equal)
@@ -4933,6 +4965,7 @@ def test_columns_multiindex_modified(self):
df_loaded = read_hdf(path, 'df', columns=cols2load) # noqa
assert cols2load_original == cols2load
+ @ignore_natural_naming_warning
def test_to_hdf_with_object_column_names(self):
# GH9057
# Writing HDF5 table format should only work for string-like
@@ -5277,6 +5310,7 @@ def test_complex_mixed_table(self):
reread = read_hdf(path, 'df')
assert_frame_equal(df, reread)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_complex_across_dimensions_fixed(self):
with catch_warnings(record=True):
complex128 = np.array(
@@ -5294,6 +5328,7 @@ def test_complex_across_dimensions_fixed(self):
reread = read_hdf(path, 'obj')
comp(obj, reread)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_complex_across_dimensions(self):
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
s = Series(complex128, index=list('abcd'))
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index e4df7043919ae..237cc2936919e 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -18,7 +18,6 @@
"""
from __future__ import print_function
-from warnings import catch_warnings
import pytest
import sqlite3
import csv
@@ -582,11 +581,11 @@ def test_to_sql_series(self):
s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn)
tm.assert_frame_equal(s.to_frame(), s2)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_to_sql_panel(self):
- with catch_warnings(record=True):
- panel = tm.makePanel()
- pytest.raises(NotImplementedError, sql.to_sql, panel,
- 'test_panel', self.conn)
+ panel = tm.makePanel()
+ pytest.raises(NotImplementedError, sql.to_sql, panel,
+ 'test_panel', self.conn)
def test_roundtrip(self):
sql.to_sql(self.test_frame1, 'test_frame_roundtrip',
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index cfe47cae7e5e1..303d3a3d8dbe9 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -120,7 +120,7 @@ def test_read_empty_dta(self, version):
def test_data_method(self):
# Minimal testing of legacy data method
with StataReader(self.dta1_114) as rdr:
- with warnings.catch_warnings(record=True) as w: # noqa
+ with tm.assert_produces_warning(UserWarning):
parsed_114_data = rdr.data()
with StataReader(self.dta1_114) as rdr:
@@ -388,10 +388,8 @@ def test_read_write_dta11(self):
formatted = formatted.astype(np.int32)
with tm.ensure_clean() as path:
- with warnings.catch_warnings(record=True) as w:
+ with tm.assert_produces_warning(pd.io.stata.InvalidColumnName):
original.to_stata(path, None)
- # should get a warning for that format.
- assert len(w) == 1
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(
@@ -871,6 +869,9 @@ def test_drop_column(self):
read_stata(self.dta15_117, convert_dates=True, columns=columns)
@pytest.mark.parametrize('version', [114, 117])
+ @pytest.mark.filterwarnings(
+ "ignore:\\nStata value:pandas.io.stata.ValueLabelTypeMismatch"
+ )
def test_categorical_writing(self, version):
original = DataFrame.from_records(
[
@@ -901,12 +902,10 @@ def test_categorical_writing(self, version):
expected.index.name = 'index'
with tm.ensure_clean() as path:
- with warnings.catch_warnings(record=True) as w: # noqa
- # Silence warnings
- original.to_stata(path, version=version)
- written_and_read_again = self.read_dta(path)
- res = written_and_read_again.set_index('index')
- tm.assert_frame_equal(res, expected, check_categorical=False)
+ original.to_stata(path, version=version)
+ written_and_read_again = self.read_dta(path)
+ res = written_and_read_again.set_index('index')
+ tm.assert_frame_equal(res, expected, check_categorical=False)
def test_categorical_warnings_and_errors(self):
# Warning for non-string labels
@@ -933,10 +932,9 @@ def test_categorical_warnings_and_errors(self):
original = pd.concat([original[col].astype('category')
for col in original], axis=1)
- with warnings.catch_warnings(record=True) as w:
+ with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch):
original.to_stata(path)
# should get a warning for mixed content
- assert len(w) == 1
@pytest.mark.parametrize('version', [114, 117])
def test_categorical_with_stata_missing_values(self, version):
@@ -1445,7 +1443,7 @@ def test_convert_strl_name_swap(self):
columns=['long1' * 10, 'long', 1])
original.index.name = 'index'
- with warnings.catch_warnings(record=True) as w: # noqa
+ with tm.assert_produces_warning(pd.io.stata.InvalidColumnName):
with tm.ensure_clean() as path:
original.to_stata(path, convert_strl=['long', 1], version=117)
reread = self.read_dta(path)
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index 09687dd97bd43..5c88926828fa6 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -57,6 +57,7 @@ def setup_method(self, method):
self.mpl_ge_2_0_0 = plotting._compat._mpl_ge_2_0_0()
self.mpl_ge_2_0_1 = plotting._compat._mpl_ge_2_0_1()
self.mpl_ge_2_2_0 = plotting._compat._mpl_ge_2_2_0()
+ self.mpl_ge_3_0_0 = plotting._compat._mpl_ge_3_0_0()
if self.mpl_ge_1_4_0:
self.bp_n_objects = 7
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index 0abe82d138e5e..de6f6b931987c 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -151,7 +151,7 @@ def test_high_freq(self):
freaks = ['ms', 'us']
for freq in freaks:
_, ax = self.plt.subplots()
- rng = date_range('1/1/2012', periods=100000, freq=freq)
+ rng = date_range('1/1/2012', periods=100, freq=freq)
ser = Series(np.random.randn(len(rng)), rng)
_check_plot_works(ser.plot, ax=ax)
@@ -1492,7 +1492,11 @@ def test_matplotlib_scatter_datetime64(self):
ax.scatter(x="time", y="y", data=df)
fig.canvas.draw()
label = ax.get_xticklabels()[0]
- assert label.get_text() == '2017-12-12'
+ if self.mpl_ge_3_0_0:
+ expected = "2017-12-08"
+ else:
+ expected = "2017-12-12"
+ assert label.get_text() == expected
def _check_plot_works(f, freq=None, series=None, *args, **kwargs):
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 772989231e9a7..cd297c356d60e 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -628,6 +628,7 @@ def test_subplots_multiple_axes(self):
# TestDataFrameGroupByPlots.test_grouped_box_multiple_axes
fig, axes = self.plt.subplots(2, 2)
with warnings.catch_warnings():
+ warnings.simplefilter("ignore", UserWarning)
df = DataFrame(np.random.rand(10, 4),
index=list(string.ascii_letters[:10]))
@@ -1574,7 +1575,11 @@ def test_hist_df(self):
self._check_ticks_props(axes, xrot=40, yrot=0)
tm.close()
- ax = series.plot.hist(normed=True, cumulative=True, bins=4)
+ if plotting._compat._mpl_ge_2_2_0():
+ kwargs = {"density": True}
+ else:
+ kwargs = {"normed": True}
+ ax = series.plot.hist(cumulative=True, bins=4, **kwargs)
# height of last bin (index 5) must be 1.0
rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
tm.assert_almost_equal(rects[-1].get_height(), 1.0)
@@ -1850,7 +1855,7 @@ def test_line_colors(self):
tm.close()
- ax2 = df.plot(colors=custom_colors)
+ ax2 = df.plot(color=custom_colors)
lines2 = ax2.get_lines()
for l1, l2 in zip(ax.get_lines(), lines2):
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
index 864d39eba29c5..2864877550bac 100644
--- a/pandas/tests/plotting/test_hist_method.py
+++ b/pandas/tests/plotting/test_hist_method.py
@@ -12,6 +12,7 @@
from numpy.random import randn
from pandas.plotting._core import grouped_hist
+from pandas.plotting._compat import _mpl_ge_2_2_0
from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works)
@@ -193,7 +194,11 @@ def test_hist_df_legacy(self):
tm.close()
# make sure kwargs to hist are handled
- ax = ser.hist(normed=True, cumulative=True, bins=4)
+ if _mpl_ge_2_2_0():
+ kwargs = {"density": True}
+ else:
+ kwargs = {"normed": True}
+ ax = ser.hist(cumulative=True, bins=4, **kwargs)
# height of last bin (index 5) must be 1.0
rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
tm.assert_almost_equal(rects[-1].get_height(), 1.0)
@@ -279,9 +284,15 @@ def test_grouped_hist_legacy(self):
# make sure kwargs to hist are handled
xf, yf = 20, 18
xrot, yrot = 30, 40
- axes = grouped_hist(df.A, by=df.C, normed=True, cumulative=True,
+
+ if _mpl_ge_2_2_0():
+ kwargs = {"density": True}
+ else:
+ kwargs = {"normed": True}
+
+ axes = grouped_hist(df.A, by=df.C, cumulative=True,
bins=4, xlabelsize=xf, xrot=xrot,
- ylabelsize=yf, yrot=yrot)
+ ylabelsize=yf, yrot=yrot, **kwargs)
# height of last bin (index 5) must be 1.0
for ax in axes.ravel():
rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index e80443954a434..8c84b785c88e4 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -212,6 +212,8 @@ def test_parallel_coordinates(self, iris):
with tm.assert_produces_warning(FutureWarning):
parallel_coordinates(df, 'Name', colors=colors)
+ # not sure if this is indicative of a problem
+ @pytest.mark.filterwarnings("ignore:Attempting to set:UserWarning")
def test_parallel_coordinates_with_sorted_labels(self):
""" For #15908 """
from pandas.plotting import parallel_coordinates
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 09f511886583c..e965ff7a78a39 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -19,6 +19,7 @@
a_ = np.array
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestJoin(object):
def setup_method(self, method):
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 42df4511578f1..50ef622a4147f 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -601,6 +601,30 @@ def test_merge_on_datetime64tz(self):
assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]'
assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]'
+ def test_merge_datetime64tz_with_dst_transition(self):
+ # GH 18885
+ df1 = pd.DataFrame(pd.date_range(
+ '2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'),
+ columns=['date'])
+ df1['value'] = 1
+ df2 = pd.DataFrame({
+ 'date': pd.to_datetime([
+ '2017-10-29 03:00:00', '2017-10-29 04:00:00',
+ '2017-10-29 05:00:00'
+ ]),
+ 'value': 2
+ })
+ df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert(
+ 'Europe/Madrid')
+ result = pd.merge(df1, df2, how='outer', on='date')
+ expected = pd.DataFrame({
+ 'date': pd.date_range(
+ '2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'),
+ 'value_x': [1] * 4 + [np.nan] * 3,
+ 'value_y': [np.nan] * 4 + [2] * 3
+ })
+ assert_frame_equal(result, expected)
+
def test_merge_non_unique_period_index(self):
# GH #16871
index = pd.period_range('2016-01-01', periods=16, freq='M')
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 762b04cc3bd4f..2aaa04d571e69 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1,5 +1,6 @@
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from itertools import combinations
+from collections import deque
import datetime as dt
import dateutil
@@ -13,6 +14,7 @@
read_csv, isna, Series, date_range,
Index, Panel, MultiIndex, Timestamp,
DatetimeIndex, Categorical)
+from pandas.compat import Iterable
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.util import testing as tm
from pandas.util.testing import (assert_frame_equal,
@@ -1465,6 +1467,7 @@ def test_concat_mixed_objs(self):
# invalid concatente of mixed dims
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
panel = tm.makePanel()
pytest.raises(ValueError, lambda: concat([panel, s1], axis=1))
@@ -1503,59 +1506,61 @@ def test_dtype_coerceion(self):
result = concat([df.iloc[[0]], df.iloc[[1]]])
tm.assert_series_equal(result.dtypes, df.dtypes)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_panel_concat_other_axes(self):
- with catch_warnings(record=True):
- panel = tm.makePanel()
+ panel = tm.makePanel()
- p1 = panel.iloc[:, :5, :]
- p2 = panel.iloc[:, 5:, :]
+ p1 = panel.iloc[:, :5, :]
+ p2 = panel.iloc[:, 5:, :]
- result = concat([p1, p2], axis=1)
- tm.assert_panel_equal(result, panel)
+ result = concat([p1, p2], axis=1)
+ tm.assert_panel_equal(result, panel)
- p1 = panel.iloc[:, :, :2]
- p2 = panel.iloc[:, :, 2:]
+ p1 = panel.iloc[:, :, :2]
+ p2 = panel.iloc[:, :, 2:]
- result = concat([p1, p2], axis=2)
- tm.assert_panel_equal(result, panel)
+ result = concat([p1, p2], axis=2)
+ tm.assert_panel_equal(result, panel)
- # if things are a bit misbehaved
- p1 = panel.iloc[:2, :, :2]
- p2 = panel.iloc[:, :, 2:]
- p1['ItemC'] = 'baz'
+ # if things are a bit misbehaved
+ p1 = panel.iloc[:2, :, :2]
+ p2 = panel.iloc[:, :, 2:]
+ p1['ItemC'] = 'baz'
- result = concat([p1, p2], axis=2)
+ result = concat([p1, p2], axis=2)
- expected = panel.copy()
- expected['ItemC'] = expected['ItemC'].astype('O')
- expected.loc['ItemC', :, :2] = 'baz'
- tm.assert_panel_equal(result, expected)
+ expected = panel.copy()
+ expected['ItemC'] = expected['ItemC'].astype('O')
+ expected.loc['ItemC', :, :2] = 'baz'
+ tm.assert_panel_equal(result, expected)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+ # Panel.rename warning we don't care about
+ @pytest.mark.filterwarnings("ignore:Using:FutureWarning")
def test_panel_concat_buglet(self, sort):
- with catch_warnings(record=True):
- # #2257
- def make_panel():
- index = 5
- cols = 3
+ # #2257
+ def make_panel():
+ index = 5
+ cols = 3
- def df():
- return DataFrame(np.random.randn(index, cols),
- index=["I%s" % i for i in range(index)],
- columns=["C%s" % i for i in range(cols)])
- return Panel({"Item%s" % x: df() for x in ['A', 'B', 'C']})
+ def df():
+ return DataFrame(np.random.randn(index, cols),
+ index=["I%s" % i for i in range(index)],
+ columns=["C%s" % i for i in range(cols)])
+ return Panel({"Item%s" % x: df() for x in ['A', 'B', 'C']})
- panel1 = make_panel()
- panel2 = make_panel()
+ panel1 = make_panel()
+ panel2 = make_panel()
- panel2 = panel2.rename_axis({x: "%s_1" % x
- for x in panel2.major_axis},
- axis=1)
+ panel2 = panel2.rename_axis({x: "%s_1" % x
+ for x in panel2.major_axis},
+ axis=1)
- panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1)
- panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2)
+ panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1)
+ panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2)
- # it works!
- concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort)
+ # it works!
+ concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort)
def test_concat_series(self):
@@ -1722,8 +1727,6 @@ def test_concat_series_axis1_same_names_ignore_index(self):
tm.assert_index_equal(result.columns, expected)
def test_concat_iterables(self):
- from collections import deque, Iterable
-
# GH8645 check concat works with tuples, list, generators, and weird
# stuff like deque and custom iterables
df1 = DataFrame([1, 2, 3])
@@ -2351,30 +2354,30 @@ def test_concat_datetime_timezone(self):
tm.assert_frame_equal(result, expected)
# GH 13783: Concat after resample
- with catch_warnings(record=True):
- result = pd.concat([df1.resample('H').mean(),
- df2.resample('H').mean()])
- expected = pd.DataFrame({'a': [1, 2, 3] + [np.nan] * 3,
- 'b': [np.nan] * 3 + [1, 2, 3]},
- index=idx1.append(idx1))
- tm.assert_frame_equal(result, expected)
+ result = pd.concat([df1.resample('H').mean(),
+ df2.resample('H').mean()], sort=True)
+ expected = pd.DataFrame({'a': [1, 2, 3] + [np.nan] * 3,
+ 'b': [np.nan] * 3 + [1, 2, 3]},
+ index=idx1.append(idx1))
+ tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
@pytest.mark.parametrize('dt', np.sctypes['float'])
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_concat_no_unnecessary_upcast(dt, pdt):
- with catch_warnings(record=True):
- # GH 13247
- dims = pdt().ndim
- dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
- pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
- pdt(np.array([5], dtype=dt, ndmin=dims))]
- x = pd.concat(dfs)
- assert x.values.dtype == dt
+ # GH 13247
+ dims = pdt().ndim
+ dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)),
+ pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
+ pdt(np.array([5], dtype=dt, ndmin=dims))]
+ x = pd.concat(dfs)
+ assert x.values.dtype == dt
@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
@pytest.mark.parametrize('dt', np.sctypes['int'])
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_concat_will_upcast(dt, pdt):
with catch_warnings(record=True):
dims = pdt().ndim
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index 81570de7586de..e83a2cb483de7 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -640,3 +640,24 @@ def test_float_suffix(self):
result = wide_to_long(df, ['result', 'treatment'],
i='A', j='colname', suffix='[0-9.]+', sep='_')
tm.assert_frame_equal(result, expected)
+
+ def test_col_substring_of_stubname(self):
+ # GH22468
+ # Don't raise ValueError when a column name is a substring
+ # of a stubname that's been passed as a string
+ wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
+ 'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81},
+ 'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6},
+ 'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67},
+ 'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}
+ }
+ wide_df = pd.DataFrame.from_dict(wide_data)
+ expected = pd.wide_to_long(wide_df,
+ stubnames=['PA'],
+ i=['node_id', 'A'],
+ j='time')
+ result = pd.wide_to_long(wide_df,
+ stubnames='PA',
+ i=['node_id', 'A'],
+ j='time')
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py
index 3f4ccd7693a8f..ed9ad06a9b371 100644
--- a/pandas/tests/reshape/test_reshape.py
+++ b/pandas/tests/reshape/test_reshape.py
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
# pylint: disable-msg=W0612,E1101
-from warnings import catch_warnings
import pytest
from collections import OrderedDict
@@ -501,12 +500,12 @@ def test_get_dummies_duplicate_columns(self, df):
class TestCategoricalReshape(object):
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_reshaping_panel_categorical(self):
- with catch_warnings(record=True):
- p = tm.makePanel()
- p['str'] = 'foo'
- df = p.to_frame()
+ p = tm.makePanel()
+ p['str'] = 'foo'
+ df = p.to_frame()
df['category'] = df['str'].astype('category')
result = df['category'].unstack()
diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
index 9636c92ec22d5..fce1ef29235cc 100644
--- a/pandas/tests/scalar/timedelta/test_arithmetic.py
+++ b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -200,6 +200,57 @@ def test_td_rsub_numeric_raises(self):
with pytest.raises(TypeError):
2.0 - td
+ def test_td_sub_timedeltalike_object_dtype_array(self):
+ # GH 21980
+ arr = np.array([Timestamp('20130101 9:01'),
+ Timestamp('20121230 9:02')])
+ exp = np.array([Timestamp('20121231 9:01'),
+ Timestamp('20121229 9:02')])
+ res = arr - pd.Timedelta('1D')
+ tm.assert_numpy_array_equal(res, exp)
+
+ def test_td_sub_mixed_most_timedeltalike_object_dtype_array(self):
+ # GH 21980
+ now = pd.Timestamp.now()
+ arr = np.array([now,
+ pd.Timedelta('1D'),
+ np.timedelta64(2, 'h')])
+ exp = np.array([now - pd.Timedelta('1D'),
+ pd.Timedelta('0D'),
+ np.timedelta64(2, 'h') - pd.Timedelta('1D')])
+ res = arr - pd.Timedelta('1D')
+ tm.assert_numpy_array_equal(res, exp)
+
+ def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self):
+ # GH 21980
+ now = pd.Timestamp.now()
+ arr = np.array([now,
+ pd.Timedelta('1D'),
+ np.timedelta64(2, 'h')])
+ with pytest.raises(TypeError):
+ pd.Timedelta('1D') - arr
+
+ @pytest.mark.parametrize('op', [operator.add, ops.radd])
+ def test_td_add_timedeltalike_object_dtype_array(self, op):
+ # GH 21980
+ arr = np.array([Timestamp('20130101 9:01'),
+ Timestamp('20121230 9:02')])
+ exp = np.array([Timestamp('20130102 9:01'),
+ Timestamp('20121231 9:02')])
+ res = op(arr, pd.Timedelta('1D'))
+ tm.assert_numpy_array_equal(res, exp)
+
+ @pytest.mark.parametrize('op', [operator.add, ops.radd])
+ def test_td_add_mixed_timedeltalike_object_dtype_array(self, op):
+ # GH 21980
+ now = pd.Timestamp.now()
+ arr = np.array([now,
+ pd.Timedelta('1D')])
+ exp = np.array([now + pd.Timedelta('1D'),
+ pd.Timedelta('2D')])
+ res = op(arr, pd.Timedelta('1D'))
+ tm.assert_numpy_array_equal(res, exp)
+
class TestTimedeltaMultiplicationDivision(object):
"""
@@ -616,3 +667,17 @@ def test_rdivmod_invalid(self):
with pytest.raises(TypeError):
divmod(np.array([22, 24]), td)
+
+ @pytest.mark.parametrize('op', [
+ operator.mul,
+ ops.rmul,
+ operator.truediv,
+ ops.rdiv,
+ ops.rsub])
+ @pytest.mark.parametrize('arr', [
+ np.array([Timestamp('20130101 9:01'), Timestamp('20121230 9:02')]),
+ np.array([pd.Timestamp.now(), pd.Timedelta('1D')])
+ ])
+ def test_td_op_timedelta_timedeltalike_array(self, op, arr):
+ with pytest.raises(TypeError):
+ op(arr, pd.Timedelta('1D'))
diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
index bf41840c58ded..b6c783dc07aec 100644
--- a/pandas/tests/scalar/timestamp/test_unary_ops.py
+++ b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -13,6 +13,7 @@
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG
from pandas import Timestamp, NaT
+from pandas.tseries.frequencies import to_offset
class TestTimestampUnaryOps(object):
@@ -70,7 +71,7 @@ def test_round_subsecond(self):
assert result == expected
def test_round_nonstandard_freq(self):
- with tm.assert_produces_warning():
+ with tm.assert_produces_warning(False):
Timestamp('2016-10-17 12:00:00.001501031').round('1010ns')
def test_round_invalid_arg(self):
@@ -132,6 +133,68 @@ def test_floor(self):
expected = Timestamp('20130101')
assert result == expected
+ @pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
+ def test_round_dst_border(self, method):
+ # GH 18946 round near DST
+ ts = Timestamp('2017-10-29 00:00:00', tz='UTC').tz_convert(
+ 'Europe/Madrid'
+ )
+ #
+ result = getattr(ts, method)('H', ambiguous=True)
+ assert result == ts
+
+ result = getattr(ts, method)('H', ambiguous=False)
+ expected = Timestamp('2017-10-29 01:00:00', tz='UTC').tz_convert(
+ 'Europe/Madrid'
+ )
+ assert result == expected
+
+ result = getattr(ts, method)('H', ambiguous='NaT')
+ assert result is NaT
+
+ with pytest.raises(pytz.AmbiguousTimeError):
+ getattr(ts, method)('H', ambiguous='raise')
+
+ @pytest.mark.parametrize('timestamp', [
+ '2018-01-01 0:0:0.124999360',
+ '2018-01-01 0:0:0.125000367',
+ '2018-01-01 0:0:0.125500',
+ '2018-01-01 0:0:0.126500',
+ '2018-01-01 12:00:00',
+ '2019-01-01 12:00:00',
+ ])
+ @pytest.mark.parametrize('freq', [
+ '2ns', '3ns', '4ns', '5ns', '6ns', '7ns',
+ '250ns', '500ns', '750ns',
+ '1us', '19us', '250us', '500us', '750us',
+ '1s', '2s', '3s',
+ '1D',
+ ])
+ def test_round_int64(self, timestamp, freq):
+ """check that all rounding modes are accurate to int64 precision
+ see GH#22591
+ """
+ dt = Timestamp(timestamp)
+ unit = to_offset(freq).nanos
+
+ # test floor
+ result = dt.floor(freq)
+ assert result.value % unit == 0, "floor not a {} multiple".format(freq)
+ assert 0 <= dt.value - result.value < unit, "floor error"
+
+ # test ceil
+ result = dt.ceil(freq)
+ assert result.value % unit == 0, "ceil not a {} multiple".format(freq)
+ assert 0 <= result.value - dt.value < unit, "ceil error"
+
+ # test round
+ result = dt.round(freq)
+ assert result.value % unit == 0, "round not a {} multiple".format(freq)
+ assert abs(result.value - dt.value) <= unit // 2, "round error"
+ if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2:
+ # round half to even
+ assert result.value // unit % 2 == 0, "round half to even error"
+
# --------------------------------------------------------------
# Timestamp.replace
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index bcea47f42056b..d1f022ef982c0 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -383,6 +383,8 @@ def test_getitem_setitem_periodindex():
assert_series_equal(result, ts)
+# FutureWarning from NumPy.
+@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_getitem_median_slice_bug():
index = date_range('20090415', '20090519', freq='2B')
s = Series(np.random.randn(13), index=index)
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index 25bc394e312a0..aa4f58089a933 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -390,6 +390,8 @@ def test_setslice(test_data):
assert sl.index.is_unique
+# FutureWarning from NumPy about [slice(None, 5).
+@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_basic_getitem_setitem_corner(test_data):
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
with tm.assert_raises_regex(ValueError, 'tuple-index'):
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index d5d9e5f4f14de..58a160d17cbe8 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -789,6 +789,38 @@ def test_corr_invalid_method(self):
with tm.assert_raises_regex(ValueError, msg):
s1.corr(s2, method="____")
+ def test_corr_callable_method(self):
+ # simple correlation example
+ # returns 1 if exact equality, 0 otherwise
+ my_corr = lambda a, b: 1. if (a == b).all() else 0.
+
+ # simple example
+ s1 = Series([1, 2, 3, 4, 5])
+ s2 = Series([5, 4, 3, 2, 1])
+ expected = 0
+ tm.assert_almost_equal(
+ s1.corr(s2, method=my_corr),
+ expected)
+
+ # full overlap
+ tm.assert_almost_equal(
+ self.ts.corr(self.ts, method=my_corr), 1.)
+
+ # partial overlap
+ tm.assert_almost_equal(
+ self.ts[:15].corr(self.ts[5:], method=my_corr), 1.)
+
+ # No overlap
+ assert np.isnan(
+ self.ts[::2].corr(self.ts[1::2], method=my_corr))
+
+ # dataframe example
+ df = pd.DataFrame([s1, s2])
+ expected = pd.DataFrame([
+ {0: 1., 1: 0}, {0: 0, 1: 1.}])
+ tm.assert_almost_equal(
+ df.transpose().corr(method=my_corr), expected)
+
def test_cov(self):
# full overlap
tm.assert_almost_equal(self.ts.cov(self.ts), self.ts.std() ** 2)
@@ -1640,8 +1672,35 @@ def test_value_counts_categorical_not_ordered(self):
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
+main_dtypes = [
+ 'datetime',
+ 'datetimetz',
+ 'timedelta',
+ 'int8',
+ 'int16',
+ 'int32',
+ 'int64',
+ 'float32',
+ 'float64',
+ 'uint8',
+ 'uint16',
+ 'uint32',
+ 'uint64'
+]
+
+
@pytest.fixture
def s_main_dtypes():
+ """A DataFrame with many dtypes
+
+ * datetime
+ * datetimetz
+ * timedelta
+ * [u]int{8,16,32,64}
+ * float{32,64}
+
+ The columns are the name of the dtype.
+ """
df = pd.DataFrame(
{'datetime': pd.to_datetime(['2003', '2002',
'2001', '2002',
@@ -1661,6 +1720,12 @@ def s_main_dtypes():
return df
+@pytest.fixture(params=main_dtypes)
+def s_main_dtypes_split(request, s_main_dtypes):
+ """Each series in s_main_dtypes."""
+ return s_main_dtypes[request.param]
+
+
class TestMode(object):
@pytest.mark.parametrize('dropna, expected', [
@@ -1864,12 +1929,10 @@ def test_error(self, r):
with tm.assert_raises_regex(TypeError, msg):
method(arg)
- @pytest.mark.parametrize(
- "s",
- [v for k, v in s_main_dtypes().iteritems()])
- def test_nsmallest_nlargest(self, s):
+ def test_nsmallest_nlargest(self, s_main_dtypes_split):
# float, int, datetime64 (use i8), timedelts64 (same),
# object that are numbers, object that are strings
+ s = s_main_dtypes_split
assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]])
assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]])
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index da9b03e81994d..3b82242626c20 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -1,6 +1,7 @@
# coding=utf-8
# pylint: disable-msg=E1101,W0612
from collections import OrderedDict
+import warnings
import pydoc
import pytest
@@ -728,8 +729,12 @@ def test_dt_accessor_api_for_categorical(self):
func_defs.append(f_def)
for func, args, kwargs in func_defs:
- res = getattr(c.dt, func)(*args, **kwargs)
- exp = getattr(s.dt, func)(*args, **kwargs)
+ with warnings.catch_warnings():
+ if func == 'to_period':
+ # dropping TZ
+ warnings.simplefilter("ignore", UserWarning)
+ res = getattr(c.dt, func)(*args, **kwargs)
+ exp = getattr(s.dt, func)(*args, **kwargs)
if isinstance(res, DataFrame):
tm.assert_frame_equal(res, exp)
diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py
index b717d75d835d0..20215279cf031 100644
--- a/pandas/tests/series/test_apply.py
+++ b/pandas/tests/series/test_apply.py
@@ -17,18 +17,18 @@
import pandas.util.testing as tm
from pandas.conftest import _get_cython_table_params
-from .common import TestData
+class TestSeriesApply():
-class TestSeriesApply(TestData):
-
- def test_apply(self):
+ def test_apply(self, datetime_series):
with np.errstate(all='ignore'):
- tm.assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts))
+ tm.assert_series_equal(datetime_series.apply(np.sqrt),
+ np.sqrt(datetime_series))
# element-wise apply
import math
- tm.assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts))
+ tm.assert_series_equal(datetime_series.apply(math.exp),
+ np.exp(datetime_series))
# empty series
s = Series(dtype=object, name='foo', index=pd.Index([], name='bar'))
@@ -66,11 +66,11 @@ def test_apply_dont_convert_dtype(self):
result = s.apply(f, convert_dtype=False)
assert result.dtype == object
- def test_with_string_args(self):
+ def test_with_string_args(self, datetime_series):
for arg in ['sum', 'mean', 'min', 'max', 'std']:
- result = self.ts.apply(arg)
- expected = getattr(self.ts, arg)()
+ result = datetime_series.apply(arg)
+ expected = getattr(datetime_series, arg)()
assert result == expected
def test_apply_args(self):
@@ -165,34 +165,34 @@ def test_apply_dict_depr(self):
tsdf.A.agg({'foo': ['sum', 'mean']})
-class TestSeriesAggregate(TestData):
+class TestSeriesAggregate():
- def test_transform(self):
+ def test_transform(self, string_series):
# transforming functions
with np.errstate(all='ignore'):
- f_sqrt = np.sqrt(self.series)
- f_abs = np.abs(self.series)
+ f_sqrt = np.sqrt(string_series)
+ f_abs = np.abs(string_series)
# ufunc
- result = self.series.transform(np.sqrt)
+ result = string_series.transform(np.sqrt)
expected = f_sqrt.copy()
assert_series_equal(result, expected)
- result = self.series.apply(np.sqrt)
+ result = string_series.apply(np.sqrt)
assert_series_equal(result, expected)
# list-like
- result = self.series.transform([np.sqrt])
+ result = string_series.transform([np.sqrt])
expected = f_sqrt.to_frame().copy()
expected.columns = ['sqrt']
assert_frame_equal(result, expected)
- result = self.series.transform([np.sqrt])
+ result = string_series.transform([np.sqrt])
assert_frame_equal(result, expected)
- result = self.series.transform(['sqrt'])
+ result = string_series.transform(['sqrt'])
assert_frame_equal(result, expected)
# multiple items in list
@@ -200,10 +200,10 @@ def test_transform(self):
# series and then concatting
expected = pd.concat([f_sqrt, f_abs], axis=1)
expected.columns = ['sqrt', 'absolute']
- result = self.series.apply([np.sqrt, np.abs])
+ result = string_series.apply([np.sqrt, np.abs])
assert_frame_equal(result, expected)
- result = self.series.transform(['sqrt', 'abs'])
+ result = string_series.transform(['sqrt', 'abs'])
expected.columns = ['sqrt', 'abs']
assert_frame_equal(result, expected)
@@ -212,28 +212,28 @@ def test_transform(self):
expected.columns = ['foo', 'bar']
expected = expected.unstack().rename('series')
- result = self.series.apply({'foo': np.sqrt, 'bar': np.abs})
+ result = string_series.apply({'foo': np.sqrt, 'bar': np.abs})
assert_series_equal(result.reindex_like(expected), expected)
- def test_transform_and_agg_error(self):
+ def test_transform_and_agg_error(self, string_series):
# we are trying to transform with an aggregator
def f():
- self.series.transform(['min', 'max'])
+ string_series.transform(['min', 'max'])
pytest.raises(ValueError, f)
def f():
with np.errstate(all='ignore'):
- self.series.agg(['sqrt', 'max'])
+ string_series.agg(['sqrt', 'max'])
pytest.raises(ValueError, f)
def f():
with np.errstate(all='ignore'):
- self.series.transform(['sqrt', 'max'])
+ string_series.transform(['sqrt', 'max'])
pytest.raises(ValueError, f)
def f():
with np.errstate(all='ignore'):
- self.series.agg({'foo': np.sqrt, 'bar': 'sum'})
+ string_series.agg({'foo': np.sqrt, 'bar': 'sum'})
pytest.raises(ValueError, f)
def test_demo(self):
@@ -272,33 +272,34 @@ def test_multiple_aggregators_with_dict_api(self):
'min', 'sum']).unstack().rename('series')
tm.assert_series_equal(result.reindex_like(expected), expected)
- def test_agg_apply_evaluate_lambdas_the_same(self):
+ def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
# test that we are evaluating row-by-row first
# before vectorized evaluation
- result = self.series.apply(lambda x: str(x))
- expected = self.series.agg(lambda x: str(x))
+ result = string_series.apply(lambda x: str(x))
+ expected = string_series.agg(lambda x: str(x))
tm.assert_series_equal(result, expected)
- result = self.series.apply(str)
- expected = self.series.agg(str)
+ result = string_series.apply(str)
+ expected = string_series.agg(str)
tm.assert_series_equal(result, expected)
- def test_with_nested_series(self):
+ def test_with_nested_series(self, datetime_series):
# GH 2316
# .agg with a reducer and a transform, what to do
- result = self.ts.apply(lambda x: Series(
+ result = datetime_series.apply(lambda x: Series(
[x, x ** 2], index=['x', 'x^2']))
- expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2})
+ expected = DataFrame({'x': datetime_series,
+ 'x^2': datetime_series ** 2})
tm.assert_frame_equal(result, expected)
- result = self.ts.agg(lambda x: Series(
+ result = datetime_series.agg(lambda x: Series(
[x, x ** 2], index=['x', 'x^2']))
tm.assert_frame_equal(result, expected)
- def test_replicate_describe(self):
+ def test_replicate_describe(self, string_series):
# this also tests a result set that is all scalars
- expected = self.series.describe()
- result = self.series.apply(OrderedDict(
+ expected = string_series.describe()
+ result = string_series.apply(OrderedDict(
[('count', 'count'),
('mean', 'mean'),
('std', 'std'),
@@ -309,13 +310,13 @@ def test_replicate_describe(self):
('max', 'max')]))
assert_series_equal(result, expected)
- def test_reduce(self):
+ def test_reduce(self, string_series):
# reductions with named functions
- result = self.series.agg(['sum', 'mean'])
- expected = Series([self.series.sum(),
- self.series.mean()],
+ result = string_series.agg(['sum', 'mean'])
+ expected = Series([string_series.sum(),
+ string_series.mean()],
['sum', 'mean'],
- name=self.series.name)
+ name=string_series.name)
assert_series_equal(result, expected)
def test_non_callable_aggregates(self):
@@ -414,9 +415,9 @@ def test_agg_cython_table_raises(self, series, func, expected):
series.agg(func)
-class TestSeriesMap(TestData):
+class TestSeriesMap():
- def test_map(self):
+ def test_map(self, datetime_series):
index, data = tm.getMixedTypeDict()
source = Series(data['B'], index=data['C'])
@@ -434,8 +435,8 @@ def test_map(self):
assert v == source[target[k]]
# function
- result = self.ts.map(lambda x: x * 2)
- tm.assert_series_equal(result, self.ts * 2)
+ result = datetime_series.map(lambda x: x * 2)
+ tm.assert_series_equal(result, datetime_series * 2)
# GH 10324
a = Series([1, 2, 3, 4])
@@ -500,10 +501,10 @@ def test_map_type_inference(self):
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
assert issubclass(s2.dtype.type, np.integer)
- def test_map_decimal(self):
+ def test_map_decimal(self, string_series):
from decimal import Decimal
- result = self.series.map(lambda x: Decimal(str(x)))
+ result = string_series.map(lambda x: Decimal(str(x)))
assert result.dtype == np.object_
assert isinstance(result[0], Decimal)
diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py
index 3104d85601434..e85a0ac42ae1a 100644
--- a/pandas/tests/series/test_asof.py
+++ b/pandas/tests/series/test_asof.py
@@ -8,10 +8,8 @@
import pandas.util.testing as tm
-from .common import TestData
-
-class TestSeriesAsof(TestData):
+class TestSeriesAsof():
def test_basic(self):
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 9faf47ace242d..4817f5bdccc29 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -957,6 +957,8 @@ def test_constructor_set(self):
values = frozenset(values)
pytest.raises(TypeError, Series, values)
+ # https://github.com/pandas-dev/pandas/issues/22698
+ @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning")
def test_fromDict(self):
data = {'a': 0, 'b': 1, 'c': 2, 'd': 3}
diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py
index 5b45c6003a005..fee2323310b9c 100644
--- a/pandas/tests/series/test_datetime_values.py
+++ b/pandas/tests/series/test_datetime_values.py
@@ -5,6 +5,7 @@
import calendar
import unicodedata
import pytest
+import pytz
from datetime import datetime, time, date
@@ -95,42 +96,6 @@ def compare(s, name):
expected = Series(exp_values, index=s.index, name='xxx')
tm.assert_series_equal(result, expected)
- # round
- s = Series(pd.to_datetime(['2012-01-01 13:00:00',
- '2012-01-01 12:01:00',
- '2012-01-01 08:00:00']), name='xxx')
- result = s.dt.round('D')
- expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
- '2012-01-01']), name='xxx')
- tm.assert_series_equal(result, expected)
-
- # round with tz
- result = (s.dt.tz_localize('UTC')
- .dt.tz_convert('US/Eastern')
- .dt.round('D'))
- exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
- '2012-01-01']).tz_localize('US/Eastern')
- expected = Series(exp_values, name='xxx')
- tm.assert_series_equal(result, expected)
-
- # floor
- s = Series(pd.to_datetime(['2012-01-01 13:00:00',
- '2012-01-01 12:01:00',
- '2012-01-01 08:00:00']), name='xxx')
- result = s.dt.floor('D')
- expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01',
- '2012-01-01']), name='xxx')
- tm.assert_series_equal(result, expected)
-
- # ceil
- s = Series(pd.to_datetime(['2012-01-01 13:00:00',
- '2012-01-01 12:01:00',
- '2012-01-01 08:00:00']), name='xxx')
- result = s.dt.ceil('D')
- expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
- '2012-01-02']), name='xxx')
- tm.assert_series_equal(result, expected)
-
# datetimeindex with tz
s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
name='xxx')
@@ -261,6 +226,64 @@ def get_dir(s):
with pytest.raises(com.SettingWithCopyError):
s.dt.hour[0] = 5
+ @pytest.mark.parametrize('method, dates', [
+ ['round', ['2012-01-02', '2012-01-02', '2012-01-01']],
+ ['floor', ['2012-01-01', '2012-01-01', '2012-01-01']],
+ ['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']]
+ ])
+ def test_dt_round(self, method, dates):
+ # round
+ s = Series(pd.to_datetime(['2012-01-01 13:00:00',
+ '2012-01-01 12:01:00',
+ '2012-01-01 08:00:00']), name='xxx')
+ result = getattr(s.dt, method)('D')
+ expected = Series(pd.to_datetime(dates), name='xxx')
+ tm.assert_series_equal(result, expected)
+
+ def test_dt_round_tz(self):
+ s = Series(pd.to_datetime(['2012-01-01 13:00:00',
+ '2012-01-01 12:01:00',
+ '2012-01-01 08:00:00']), name='xxx')
+ result = (s.dt.tz_localize('UTC')
+ .dt.tz_convert('US/Eastern')
+ .dt.round('D'))
+
+ exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
+ '2012-01-01']).tz_localize('US/Eastern')
+ expected = Series(exp_values, name='xxx')
+ tm.assert_series_equal(result, expected)
+
+ @pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
+ def test_dt_round_tz_ambiguous(self, method):
+ # GH 18946 round near DST
+ df1 = pd.DataFrame([
+ pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True),
+ pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True),
+ pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True)
+ ],
+ columns=['date'])
+ df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid')
+ # infer
+ result = getattr(df1.date.dt, method)('H', ambiguous='infer')
+ expected = df1['date']
+ tm.assert_series_equal(result, expected)
+
+ # bool-array
+ result = getattr(df1.date.dt, method)(
+ 'H', ambiguous=[True, False, False]
+ )
+ tm.assert_series_equal(result, expected)
+
+ # NaT
+ result = getattr(df1.date.dt, method)('H', ambiguous='NaT')
+ expected = df1['date'].copy()
+ expected.iloc[0:2] = pd.NaT
+ tm.assert_series_equal(result, expected)
+
+ # raise
+ with pytest.raises(pytz.AmbiguousTimeError):
+ getattr(df1.date.dt, method)('H', ambiguous='raise')
+
def test_dt_namespace_accessor_categorical(self):
# GH 19468
dti = DatetimeIndex(['20171111', '20181212']).repeat(2)
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index dd1b623f0f7ff..125dff9ecfa7c 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -428,8 +428,10 @@ def test_astype_empty_constructor_equality(self, dtype):
if dtype not in ('S', 'V'): # poor support (if any) currently
with warnings.catch_warnings(record=True):
- # Generic timestamp dtypes ('M' and 'm') are deprecated,
- # but we test that already in series/test_constructors.py
+ if dtype in ('M', 'm'):
+ # Generic timestamp dtypes ('M' and 'm') are deprecated,
+ # but we test that already in series/test_constructors.py
+ warnings.simplefilter("ignore", FutureWarning)
init_empty = Series([], dtype=dtype)
as_type_empty = Series([]).astype(dtype)
@@ -506,3 +508,8 @@ def test_infer_objects_series(self):
assert actual.dtype == 'object'
tm.assert_series_equal(actual, expected)
+
+ def test_is_homogeneous_type(self):
+ assert Series()._is_homogeneous_type
+ assert Series([1, 2])._is_homogeneous_type
+ assert Series(pd.Categorical([1, 2]))._is_homogeneous_type
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 5e5e9c0895ccf..601e251d45b4b 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -14,6 +14,7 @@
NaT, date_range, timedelta_range, Categorical)
from pandas.core.indexes.datetimes import Timestamp
import pandas.core.nanops as nanops
+from pandas.core import ops
from pandas.compat import range
from pandas import compat
@@ -603,6 +604,42 @@ def test_ops_datetimelike_align(self):
result = (dt2.to_frame() - dt.to_frame())[0]
assert_series_equal(result, expected)
+ @pytest.mark.parametrize('op', [
+ operator.and_,
+ operator.or_,
+ operator.xor,
+ pytest.param(ops.rand_,
+ marks=pytest.mark.xfail(reason="GH#22092 Index "
+ "implementation returns "
+ "Index",
+ raises=AssertionError,
+ strict=True)),
+ pytest.param(ops.ror_,
+ marks=pytest.mark.xfail(reason="GH#22092 Index "
+ "implementation raises",
+ raises=ValueError, strict=True)),
+ pytest.param(ops.rxor,
+ marks=pytest.mark.xfail(reason="GH#22092 Index "
+ "implementation raises",
+ raises=TypeError, strict=True))
+ ])
+ def test_bool_ops_with_index(self, op):
+ # GH#22092, GH#19792
+ ser = Series([True, True, False, False])
+ idx1 = Index([True, False, True, False])
+ idx2 = Index([1, 0, 1, 0])
+
+ expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))])
+
+ result = op(ser, idx1)
+ assert_series_equal(result, expected)
+
+ expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))],
+ dtype=bool)
+
+ result = op(ser, idx2)
+ assert_series_equal(result, expected)
+
def test_operators_bitwise(self):
# GH 9016: support bitwise op for integer types
index = list('bca')
diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py
index a0cde5f81d021..8c4b6ee5b1d75 100644
--- a/pandas/tests/series/test_validate.py
+++ b/pandas/tests/series/test_validate.py
@@ -1,14 +1,7 @@
-from pandas.core.series import Series
-
import pytest
import pandas.util.testing as tm
-@pytest.fixture
-def series():
- return Series([1, 2, 3, 4, 5])
-
-
class TestSeriesValidate(object):
"""Tests for error handling related to data types of method arguments."""
@@ -16,7 +9,7 @@ class TestSeriesValidate(object):
"sort_values", "sort_index",
"rename", "dropna"])
@pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0])
- def test_validate_bool_args(self, series, func, inplace):
+ def test_validate_bool_args(self, string_series, func, inplace):
msg = "For argument \"inplace\" expected type bool"
kwargs = dict(inplace=inplace)
@@ -24,4 +17,4 @@ def test_validate_bool_args(self, series, func, inplace):
kwargs["name"] = "hello"
with tm.assert_raises_regex(ValueError, msg):
- getattr(series, func)(**kwargs)
+ getattr(string_series, func)(**kwargs)
diff --git a/pandas/tests/sparse/frame/conftest.py b/pandas/tests/sparse/frame/conftest.py
new file mode 100644
index 0000000000000..f36b4e643d10b
--- /dev/null
+++ b/pandas/tests/sparse/frame/conftest.py
@@ -0,0 +1,116 @@
+import pytest
+
+import numpy as np
+
+from pandas import SparseDataFrame, SparseArray, DataFrame, bdate_range
+
+data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6],
+ 'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6],
+ 'C': np.arange(10, dtype=np.float64),
+ 'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]}
+dates = bdate_range('1/1/2011', periods=10)
+
+
+# fixture names must be compatible with the tests in
+# tests/frame/test_api.SharedWithSparse
+
+@pytest.fixture
+def float_frame_dense():
+ """
+ Fixture for dense DataFrame of floats with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D']; some entries are missing
+ """
+ return DataFrame(data, index=dates)
+
+
+@pytest.fixture
+def float_frame():
+ """
+ Fixture for sparse DataFrame of floats with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D']; some entries are missing
+ """
+ # default_kind='block' is the default
+ return SparseDataFrame(data, index=dates, default_kind='block')
+
+
+@pytest.fixture
+def float_frame_int_kind():
+ """
+ Fixture for sparse DataFrame of floats with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'.
+ Some entries are missing.
+ """
+ return SparseDataFrame(data, index=dates, default_kind='integer')
+
+
+@pytest.fixture
+def float_string_frame():
+ """
+ Fixture for sparse DataFrame of floats and strings with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing
+ """
+ sdf = SparseDataFrame(data, index=dates)
+ sdf['foo'] = SparseArray(['bar'] * len(dates))
+ return sdf
+
+
+@pytest.fixture
+def float_frame_fill0_dense():
+ """
+ Fixture for dense DataFrame of floats with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0
+ """
+ values = SparseDataFrame(data).values
+ values[np.isnan(values)] = 0
+ return DataFrame(values, columns=['A', 'B', 'C', 'D'], index=dates)
+
+
+@pytest.fixture
+def float_frame_fill0():
+ """
+ Fixture for sparse DataFrame of floats with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0
+ """
+ values = SparseDataFrame(data).values
+ values[np.isnan(values)] = 0
+ return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
+ default_fill_value=0, index=dates)
+
+
+@pytest.fixture
+def float_frame_fill2_dense():
+ """
+ Fixture for dense DataFrame of floats with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2
+ """
+ values = SparseDataFrame(data).values
+ values[np.isnan(values)] = 2
+ return DataFrame(values, columns=['A', 'B', 'C', 'D'], index=dates)
+
+
+@pytest.fixture
+def float_frame_fill2():
+ """
+ Fixture for sparse DataFrame of floats with DatetimeIndex
+
+ Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2
+ """
+ values = SparseDataFrame(data).values
+ values[np.isnan(values)] = 2
+ return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
+ default_fill_value=2, index=dates)
+
+
+@pytest.fixture
+def empty_frame():
+ """
+ Fixture for empty SparseDataFrame
+ """
+ return SparseDataFrame()
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index be5a1710119ee..5e5a341ca76d6 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -3,7 +3,6 @@
import operator
import pytest
-from warnings import catch_warnings
from numpy import nan
import numpy as np
import pandas as pd
@@ -28,42 +27,6 @@ class TestSparseDataFrame(SharedWithSparse):
_assert_frame_equal = staticmethod(tm.assert_sp_frame_equal)
_assert_series_equal = staticmethod(tm.assert_sp_series_equal)
- def setup_method(self, method):
- self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
- 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
- 'C': np.arange(10, dtype=np.float64),
- 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
-
- self.dates = bdate_range('1/1/2011', periods=10)
-
- self.orig = pd.DataFrame(self.data, index=self.dates)
- self.iorig = pd.DataFrame(self.data, index=self.dates)
-
- self.frame = SparseDataFrame(self.data, index=self.dates)
- self.iframe = SparseDataFrame(self.data, index=self.dates,
- default_kind='integer')
- self.mixed_frame = self.frame.copy(False)
- self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates))
-
- values = self.frame.values.copy()
- values[np.isnan(values)] = 0
-
- self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
- index=self.dates)
- self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
- default_fill_value=0, index=self.dates)
-
- values = self.frame.values.copy()
- values[np.isnan(values)] = 2
-
- self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'],
- index=self.dates)
- self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'],
- default_fill_value=2,
- index=self.dates)
-
- self.empty = SparseDataFrame()
-
def test_fill_value_when_combine_const(self):
# GH12723
dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float')
@@ -73,8 +36,8 @@ def test_fill_value_when_combine_const(self):
res = df.add(2, fill_value=0)
tm.assert_sp_frame_equal(res, exp)
- def test_values(self):
- empty = self.empty.values
+ def test_values(self, empty_frame, float_frame):
+ empty = empty_frame.values
assert empty.shape == (0, 0)
no_cols = SparseDataFrame(index=np.arange(10))
@@ -85,28 +48,29 @@ def test_values(self):
mat = no_index.values
assert mat.shape == (0, 10)
- def test_copy(self):
- cp = self.frame.copy()
+ def test_copy(self, float_frame):
+ cp = float_frame.copy()
assert isinstance(cp, SparseDataFrame)
- tm.assert_sp_frame_equal(cp, self.frame)
+ tm.assert_sp_frame_equal(cp, float_frame)
# as of v0.15.0
# this is now identical (but not is_a )
- assert cp.index.identical(self.frame.index)
+ assert cp.index.identical(float_frame.index)
- def test_constructor(self):
- for col, series in compat.iteritems(self.frame):
+ def test_constructor(self, float_frame, float_frame_int_kind,
+ float_frame_fill0):
+ for col, series in compat.iteritems(float_frame):
assert isinstance(series, SparseSeries)
- assert isinstance(self.iframe['A'].sp_index, IntIndex)
+ assert isinstance(float_frame_int_kind['A'].sp_index, IntIndex)
# constructed zframe from matrix above
- assert self.zframe['A'].fill_value == 0
+ assert float_frame_fill0['A'].fill_value == 0
tm.assert_numpy_array_equal(pd.SparseArray([1., 2., 3., 4., 5., 6.]),
- self.zframe['A'].values)
+ float_frame_fill0['A'].values)
tm.assert_numpy_array_equal(np.array([0., 0., 0., 0., 1., 2.,
3., 4., 5., 6.]),
- self.zframe['A'].to_dense().values)
+ float_frame_fill0['A'].to_dense().values)
# construct no data
sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10))
@@ -115,29 +79,29 @@ def test_constructor(self):
# construct from nested dict
data = {}
- for c, s in compat.iteritems(self.frame):
+ for c, s in compat.iteritems(float_frame):
data[c] = s.to_dict()
sdf = SparseDataFrame(data)
- tm.assert_sp_frame_equal(sdf, self.frame)
+ tm.assert_sp_frame_equal(sdf, float_frame)
# TODO: test data is copied from inputs
# init dict with different index
- idx = self.frame.index[:5]
+ idx = float_frame.index[:5]
cons = SparseDataFrame(
- self.frame, index=idx, columns=self.frame.columns,
- default_fill_value=self.frame.default_fill_value,
- default_kind=self.frame.default_kind, copy=True)
- reindexed = self.frame.reindex(idx)
+ float_frame, index=idx, columns=float_frame.columns,
+ default_fill_value=float_frame.default_fill_value,
+ default_kind=float_frame.default_kind, copy=True)
+ reindexed = float_frame.reindex(idx)
tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False)
# assert level parameter breaks reindex
with pytest.raises(TypeError):
- self.frame.reindex(idx, level=0)
+ float_frame.reindex(idx, level=0)
- repr(self.frame)
+ repr(float_frame)
def test_constructor_dict_order(self):
# GH19018
@@ -151,24 +115,26 @@ def test_constructor_dict_order(self):
expected = SparseDataFrame(data=d, columns=list('ab'))
tm.assert_sp_frame_equal(frame, expected)
- def test_constructor_ndarray(self):
+ def test_constructor_ndarray(self, float_frame):
# no index or columns
- sp = SparseDataFrame(self.frame.values)
+ sp = SparseDataFrame(float_frame.values)
# 1d
- sp = SparseDataFrame(self.data['A'], index=self.dates, columns=['A'])
- tm.assert_sp_frame_equal(sp, self.frame.reindex(columns=['A']))
+ sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index,
+ columns=['A'])
+ tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A']))
# raise on level argument
- pytest.raises(TypeError, self.frame.reindex, columns=['A'],
+ pytest.raises(TypeError, float_frame.reindex, columns=['A'],
level=1)
# wrong length index / columns
with tm.assert_raises_regex(ValueError, "^Index length"):
- SparseDataFrame(self.frame.values, index=self.frame.index[:-1])
+ SparseDataFrame(float_frame.values, index=float_frame.index[:-1])
with tm.assert_raises_regex(ValueError, "^Column length"):
- SparseDataFrame(self.frame.values, columns=self.frame.columns[:-1])
+ SparseDataFrame(float_frame.values,
+ columns=float_frame.columns[:-1])
# GH 9272
def test_constructor_empty(self):
@@ -176,10 +142,10 @@ def test_constructor_empty(self):
assert len(sp.index) == 0
assert len(sp.columns) == 0
- def test_constructor_dataframe(self):
- dense = self.frame.to_dense()
+ def test_constructor_dataframe(self, float_frame):
+ dense = float_frame.to_dense()
sp = SparseDataFrame(dense)
- tm.assert_sp_frame_equal(sp, self.frame)
+ tm.assert_sp_frame_equal(sp, float_frame)
def test_constructor_convert_index_once(self):
arr = np.array([1.5, 2.5, 3.5])
@@ -292,12 +258,13 @@ def test_dtypes(self):
expected = Series({'float64': 4})
tm.assert_series_equal(result, expected)
- def test_shape(self):
+ def test_shape(self, float_frame, float_frame_int_kind,
+ float_frame_fill0, float_frame_fill2):
# see gh-10452
- assert self.frame.shape == (10, 4)
- assert self.iframe.shape == (10, 4)
- assert self.zframe.shape == (10, 4)
- assert self.fill_frame.shape == (10, 4)
+ assert float_frame.shape == (10, 4)
+ assert float_frame_int_kind.shape == (10, 4)
+ assert float_frame_fill0.shape == (10, 4)
+ assert float_frame_fill2.shape == (10, 4)
def test_str(self):
df = DataFrame(np.random.randn(10000, 4))
@@ -306,12 +273,14 @@ def test_str(self):
sdf = df.to_sparse()
str(sdf)
- def test_array_interface(self):
- res = np.sqrt(self.frame)
- dres = np.sqrt(self.frame.to_dense())
+ def test_array_interface(self, float_frame):
+ res = np.sqrt(float_frame)
+ dres = np.sqrt(float_frame.to_dense())
tm.assert_frame_equal(res.to_dense(), dres)
- def test_pickle(self):
+ def test_pickle(self, float_frame, float_frame_int_kind, float_frame_dense,
+ float_frame_fill0, float_frame_fill0_dense,
+ float_frame_fill2, float_frame_fill2_dense):
def _test_roundtrip(frame, orig):
result = tm.round_trip_pickle(frame)
@@ -319,7 +288,10 @@ def _test_roundtrip(frame, orig):
tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False)
_test_roundtrip(SparseDataFrame(), DataFrame())
- self._check_all(_test_roundtrip)
+ _test_roundtrip(float_frame, float_frame_dense)
+ _test_roundtrip(float_frame_int_kind, float_frame_dense)
+ _test_roundtrip(float_frame_fill0, float_frame_fill0_dense)
+ _test_roundtrip(float_frame_fill2, float_frame_fill2_dense)
def test_dense_to_sparse(self):
df = DataFrame({'A': [nan, nan, nan, 1, 2],
@@ -353,17 +325,17 @@ def test_density(self):
def test_sparse_to_dense(self):
pass
- def test_sparse_series_ops(self):
- self._check_frame_ops(self.frame)
+ def test_sparse_series_ops(self, float_frame):
+ self._check_frame_ops(float_frame)
- def test_sparse_series_ops_i(self):
- self._check_frame_ops(self.iframe)
+ def test_sparse_series_ops_i(self, float_frame_int_kind):
+ self._check_frame_ops(float_frame_int_kind)
- def test_sparse_series_ops_z(self):
- self._check_frame_ops(self.zframe)
+ def test_sparse_series_ops_z(self, float_frame_fill0):
+ self._check_frame_ops(float_frame_fill0)
- def test_sparse_series_ops_fill(self):
- self._check_frame_ops(self.fill_frame)
+ def test_sparse_series_ops_fill(self, float_frame_fill2):
+ self._check_frame_ops(float_frame_fill2)
def _check_frame_ops(self, frame):
@@ -417,18 +389,18 @@ def _compare_to_dense(a, b, da, db, op):
_compare_to_dense(s, frame, s, frame.to_dense(), op)
# it works!
- result = self.frame + self.frame.loc[:, ['A', 'B']] # noqa
+ result = frame + frame.loc[:, ['A', 'B']] # noqa
- def test_op_corners(self):
- empty = self.empty + self.empty
+ def test_op_corners(self, float_frame, empty_frame):
+ empty = empty_frame + empty_frame
assert empty.empty
- foo = self.frame + self.empty
+ foo = float_frame + empty_frame
assert isinstance(foo.index, DatetimeIndex)
- tm.assert_frame_equal(foo, self.frame * np.nan)
+ tm.assert_frame_equal(foo, float_frame * np.nan)
- foo = self.empty + self.frame
- tm.assert_frame_equal(foo, self.frame * np.nan)
+ foo = empty_frame + float_frame
+ tm.assert_frame_equal(foo, float_frame * np.nan)
def test_scalar_ops(self):
pass
@@ -443,12 +415,12 @@ def test_getitem(self):
pytest.raises(Exception, sdf.__getitem__, ['a', 'd'])
- def test_iloc(self):
+ def test_iloc(self, float_frame):
- # 2227
- result = self.frame.iloc[:, 0]
+ # GH 2227
+ result = float_frame.iloc[:, 0]
assert isinstance(result, SparseSeries)
- tm.assert_sp_series_equal(result, self.frame['A'])
+ tm.assert_sp_series_equal(result, float_frame['A'])
# preserve sparse index type. #2251
data = {'A': [0, 1]}
@@ -456,22 +428,22 @@ def test_iloc(self):
tm.assert_class_equal(iframe['A'].sp_index,
iframe.iloc[:, 0].sp_index)
- def test_set_value(self):
+ def test_set_value(self, float_frame):
# ok, as the index gets converted to object
- frame = self.frame.copy()
+ frame = float_frame.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res = frame.set_value('foobar', 'B', 1.5)
assert res.index.dtype == 'object'
- res = self.frame
+ res = float_frame
res.index = res.index.astype(object)
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
- res = self.frame.set_value('foobar', 'B', 1.5)
- assert res is not self.frame
+ res = float_frame.set_value('foobar', 'B', 1.5)
+ assert res is not float_frame
assert res.index[-1] == 'foobar'
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
@@ -482,38 +454,42 @@ def test_set_value(self):
res2 = res.set_value('foobar', 'qux', 1.5)
assert res2 is not res
tm.assert_index_equal(res2.columns,
- pd.Index(list(self.frame.columns) + ['qux']))
+ pd.Index(list(float_frame.columns) + ['qux']))
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
assert res2.get_value('foobar', 'qux') == 1.5
- def test_fancy_index_misc(self):
+ def test_fancy_index_misc(self, float_frame):
# axis = 0
- sliced = self.frame.iloc[-2:, :]
- expected = self.frame.reindex(index=self.frame.index[-2:])
+ sliced = float_frame.iloc[-2:, :]
+ expected = float_frame.reindex(index=float_frame.index[-2:])
tm.assert_sp_frame_equal(sliced, expected)
# axis = 1
- sliced = self.frame.iloc[:, -2:]
- expected = self.frame.reindex(columns=self.frame.columns[-2:])
+ sliced = float_frame.iloc[:, -2:]
+ expected = float_frame.reindex(columns=float_frame.columns[-2:])
tm.assert_sp_frame_equal(sliced, expected)
- def test_getitem_overload(self):
+ def test_getitem_overload(self, float_frame):
# slicing
- sl = self.frame[:20]
- tm.assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20]))
+ sl = float_frame[:20]
+ tm.assert_sp_frame_equal(sl,
+ float_frame.reindex(float_frame.index[:20]))
# boolean indexing
- d = self.frame.index[5]
- indexer = self.frame.index > d
+ d = float_frame.index[5]
+ indexer = float_frame.index > d
- subindex = self.frame.index[indexer]
- subframe = self.frame[indexer]
+ subindex = float_frame.index[indexer]
+ subframe = float_frame[indexer]
tm.assert_index_equal(subindex, subframe.index)
- pytest.raises(Exception, self.frame.__getitem__, indexer[:-1])
+ pytest.raises(Exception, float_frame.__getitem__, indexer[:-1])
- def test_setitem(self):
+ def test_setitem(self, float_frame, float_frame_int_kind,
+ float_frame_dense,
+ float_frame_fill0, float_frame_fill0_dense,
+ float_frame_fill2, float_frame_fill2_dense):
def _check_frame(frame, orig):
N = len(frame)
@@ -566,24 +542,27 @@ def _check_frame(frame, orig):
frame['K'] = frame.default_fill_value
assert len(frame['K'].sp_values) == 0
- self._check_all(_check_frame)
+ _check_frame(float_frame, float_frame_dense)
+ _check_frame(float_frame_int_kind, float_frame_dense)
+ _check_frame(float_frame_fill0, float_frame_fill0_dense)
+ _check_frame(float_frame_fill2, float_frame_fill2_dense)
- def test_setitem_corner(self):
- self.frame['a'] = self.frame['B']
- tm.assert_sp_series_equal(self.frame['a'], self.frame['B'],
+ def test_setitem_corner(self, float_frame):
+ float_frame['a'] = float_frame['B']
+ tm.assert_sp_series_equal(float_frame['a'], float_frame['B'],
check_names=False)
- def test_setitem_array(self):
- arr = self.frame['B']
+ def test_setitem_array(self, float_frame):
+ arr = float_frame['B']
- self.frame['E'] = arr
- tm.assert_sp_series_equal(self.frame['E'], self.frame['B'],
+ float_frame['E'] = arr
+ tm.assert_sp_series_equal(float_frame['E'], float_frame['B'],
check_names=False)
- self.frame['F'] = arr[:-1]
- index = self.frame.index[:-1]
- tm.assert_sp_series_equal(self.frame['E'].reindex(index),
- self.frame['F'].reindex(index),
+ float_frame['F'] = arr[:-1]
+ index = float_frame.index[:-1]
+ tm.assert_sp_series_equal(float_frame['E'].reindex(index),
+ float_frame['F'].reindex(index),
check_names=False)
def test_setitem_chained_no_consolidate(self):
@@ -595,44 +574,44 @@ def test_setitem_chained_no_consolidate(self):
sdf[0][1] = 2
assert len(sdf._data.blocks) == 2
- def test_delitem(self):
- A = self.frame['A']
- C = self.frame['C']
+ def test_delitem(self, float_frame):
+ A = float_frame['A']
+ C = float_frame['C']
- del self.frame['B']
- assert 'B' not in self.frame
- tm.assert_sp_series_equal(self.frame['A'], A)
- tm.assert_sp_series_equal(self.frame['C'], C)
+ del float_frame['B']
+ assert 'B' not in float_frame
+ tm.assert_sp_series_equal(float_frame['A'], A)
+ tm.assert_sp_series_equal(float_frame['C'], C)
- del self.frame['D']
- assert 'D' not in self.frame
+ del float_frame['D']
+ assert 'D' not in float_frame
- del self.frame['A']
- assert 'A' not in self.frame
+ del float_frame['A']
+ assert 'A' not in float_frame
- def test_set_columns(self):
- self.frame.columns = self.frame.columns
- pytest.raises(Exception, setattr, self.frame, 'columns',
- self.frame.columns[:-1])
+ def test_set_columns(self, float_frame):
+ float_frame.columns = float_frame.columns
+ pytest.raises(Exception, setattr, float_frame, 'columns',
+ float_frame.columns[:-1])
- def test_set_index(self):
- self.frame.index = self.frame.index
- pytest.raises(Exception, setattr, self.frame, 'index',
- self.frame.index[:-1])
+ def test_set_index(self, float_frame):
+ float_frame.index = float_frame.index
+ pytest.raises(Exception, setattr, float_frame, 'index',
+ float_frame.index[:-1])
- def test_append(self):
- a = self.frame[:5]
- b = self.frame[5:]
+ def test_append(self, float_frame):
+ a = float_frame[:5]
+ b = float_frame[5:]
appended = a.append(b)
- tm.assert_sp_frame_equal(appended, self.frame, exact_indices=False)
+ tm.assert_sp_frame_equal(appended, float_frame, exact_indices=False)
- a = self.frame.iloc[:5, :3]
- b = self.frame.iloc[5:]
+ a = float_frame.iloc[:5, :3]
+ b = float_frame.iloc[5:]
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
# Stacklevel is set for pd.concat, not append
appended = a.append(b)
- tm.assert_sp_frame_equal(appended.iloc[:, :3], self.frame.iloc[:, :3],
+ tm.assert_sp_frame_equal(appended.iloc[:, :3], float_frame.iloc[:, :3],
exact_indices=False)
a = a[['B', 'C', 'A']].head(2)
@@ -713,9 +692,9 @@ def test_astype_bool(self):
assert res['A'].dtype == np.bool
assert res['B'].dtype == np.bool
- def test_fillna(self):
- df = self.zframe.reindex(lrange(5))
- dense = self.zorig.reindex(lrange(5))
+ def test_fillna(self, float_frame_fill0, float_frame_fill0_dense):
+ df = float_frame_fill0.reindex(lrange(5))
+ dense = float_frame_fill0_dense.reindex(lrange(5))
result = df.fillna(0)
expected = dense.fillna(0)
@@ -795,45 +774,48 @@ def test_sparse_frame_fillna_limit(self):
expected = expected.to_sparse()
tm.assert_frame_equal(result, expected)
- def test_rename(self):
- result = self.frame.rename(index=str)
- expected = SparseDataFrame(self.data, index=self.dates.strftime(
- "%Y-%m-%d %H:%M:%S"))
+ def test_rename(self, float_frame):
+ result = float_frame.rename(index=str)
+ expected = SparseDataFrame(float_frame.values,
+ index=float_frame.index.strftime(
+ "%Y-%m-%d %H:%M:%S"),
+ columns=list('ABCD'))
tm.assert_sp_frame_equal(result, expected)
- result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x)))
+ result = float_frame.rename(columns=lambda x: '%s%d' % (x, 1))
data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'C1': np.arange(10, dtype=np.float64),
'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
- expected = SparseDataFrame(data, index=self.dates)
+ expected = SparseDataFrame(data, index=float_frame.index)
tm.assert_sp_frame_equal(result, expected)
- def test_corr(self):
- res = self.frame.corr()
- tm.assert_frame_equal(res, self.frame.to_dense().corr())
+ def test_corr(self, float_frame):
+ res = float_frame.corr()
+ tm.assert_frame_equal(res, float_frame.to_dense().corr())
- def test_describe(self):
- self.frame['foo'] = np.nan
- self.frame.get_dtype_counts()
- str(self.frame)
- desc = self.frame.describe() # noqa
+ def test_describe(self, float_frame):
+ float_frame['foo'] = np.nan
+ float_frame.get_dtype_counts()
+ str(float_frame)
+ desc = float_frame.describe() # noqa
- def test_join(self):
- left = self.frame.loc[:, ['A', 'B']]
- right = self.frame.loc[:, ['C', 'D']]
+ def test_join(self, float_frame):
+ left = float_frame.loc[:, ['A', 'B']]
+ right = float_frame.loc[:, ['C', 'D']]
joined = left.join(right)
- tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False)
+ tm.assert_sp_frame_equal(joined, float_frame, exact_indices=False)
- right = self.frame.loc[:, ['B', 'D']]
+ right = float_frame.loc[:, ['B', 'D']]
pytest.raises(Exception, left.join, right)
with tm.assert_raises_regex(ValueError,
'Other Series must have a name'):
- self.frame.join(Series(
- np.random.randn(len(self.frame)), index=self.frame.index))
+ float_frame.join(Series(
+ np.random.randn(len(float_frame)), index=float_frame.index))
- def test_reindex(self):
+ def test_reindex(self, float_frame, float_frame_int_kind,
+ float_frame_fill0, float_frame_fill2):
def _check_frame(frame):
index = frame.index
@@ -876,26 +858,27 @@ def _check_frame(frame):
frame.default_fill_value)
assert np.isnan(reindexed['Z'].sp_values).all()
- _check_frame(self.frame)
- _check_frame(self.iframe)
- _check_frame(self.zframe)
- _check_frame(self.fill_frame)
+ _check_frame(float_frame)
+ _check_frame(float_frame_int_kind)
+ _check_frame(float_frame_fill0)
+ _check_frame(float_frame_fill2)
# with copy=False
- reindexed = self.frame.reindex(self.frame.index, copy=False)
+ reindexed = float_frame.reindex(float_frame.index, copy=False)
reindexed['F'] = reindexed['A']
- assert 'F' in self.frame
+ assert 'F' in float_frame
- reindexed = self.frame.reindex(self.frame.index)
+ reindexed = float_frame.reindex(float_frame.index)
reindexed['G'] = reindexed['A']
- assert 'G' not in self.frame
+ assert 'G' not in float_frame
- def test_reindex_fill_value(self):
+ def test_reindex_fill_value(self, float_frame_fill0,
+ float_frame_fill0_dense):
rng = bdate_range('20110110', periods=20)
- result = self.zframe.reindex(rng, fill_value=0)
- exp = self.zorig.reindex(rng, fill_value=0)
- exp = exp.to_sparse(self.zframe.default_fill_value)
+ result = float_frame_fill0.reindex(rng, fill_value=0)
+ exp = float_frame_fill0_dense.reindex(rng, fill_value=0)
+ exp = exp.to_sparse(float_frame_fill0.default_fill_value)
tm.assert_sp_frame_equal(result, exp)
def test_reindex_method(self):
@@ -968,41 +951,50 @@ def test_reindex_method(self):
with pytest.raises(NotImplementedError):
sparse.reindex(columns=range(6), method='ffill')
- def test_take(self):
- result = self.frame.take([1, 0, 2], axis=1)
- expected = self.frame.reindex(columns=['B', 'A', 'C'])
+ def test_take(self, float_frame):
+ result = float_frame.take([1, 0, 2], axis=1)
+ expected = float_frame.reindex(columns=['B', 'A', 'C'])
tm.assert_sp_frame_equal(result, expected)
- def test_to_dense(self):
+ def test_to_dense(self, float_frame, float_frame_int_kind,
+ float_frame_dense,
+ float_frame_fill0, float_frame_fill0_dense,
+ float_frame_fill2, float_frame_fill2_dense):
def _check(frame, orig):
dense_dm = frame.to_dense()
tm.assert_frame_equal(frame, dense_dm)
tm.assert_frame_equal(dense_dm, orig, check_dtype=False)
- self._check_all(_check)
-
- def test_stack_sparse_frame(self):
- with catch_warnings(record=True):
+ _check(float_frame, float_frame_dense)
+ _check(float_frame_int_kind, float_frame_dense)
+ _check(float_frame_fill0, float_frame_fill0_dense)
+ _check(float_frame_fill2, float_frame_fill2_dense)
- def _check(frame):
- dense_frame = frame.to_dense() # noqa
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+ def test_stack_sparse_frame(self, float_frame, float_frame_int_kind,
+ float_frame_fill0, float_frame_fill2):
+ def _check(frame):
+ dense_frame = frame.to_dense() # noqa
- wp = Panel.from_dict({'foo': frame})
- from_dense_lp = wp.to_frame()
+ wp = Panel.from_dict({'foo': frame})
+ from_dense_lp = wp.to_frame()
- from_sparse_lp = spf.stack_sparse_frame(frame)
+ from_sparse_lp = spf.stack_sparse_frame(frame)
- tm.assert_numpy_array_equal(from_dense_lp.values,
- from_sparse_lp.values)
+ tm.assert_numpy_array_equal(from_dense_lp.values,
+ from_sparse_lp.values)
- _check(self.frame)
- _check(self.iframe)
+ _check(float_frame)
+ _check(float_frame_int_kind)
- # for now
- pytest.raises(Exception, _check, self.zframe)
- pytest.raises(Exception, _check, self.fill_frame)
+ # for now
+ pytest.raises(Exception, _check, float_frame_fill0)
+ pytest.raises(Exception, _check, float_frame_fill2)
- def test_transpose(self):
+ def test_transpose(self, float_frame, float_frame_int_kind,
+ float_frame_dense,
+ float_frame_fill0, float_frame_fill0_dense,
+ float_frame_fill2, float_frame_fill2_dense):
def _check(frame, orig):
transposed = frame.T
@@ -1013,9 +1005,14 @@ def _check(frame, orig):
tm.assert_frame_equal(frame.T.T.to_dense(), orig.T.T)
tm.assert_sp_frame_equal(frame, frame.T.T, exact_indices=False)
- self._check_all(_check)
+ _check(float_frame, float_frame_dense)
+ _check(float_frame_int_kind, float_frame_dense)
+ _check(float_frame_fill0, float_frame_fill0_dense)
+ _check(float_frame_fill2, float_frame_fill2_dense)
- def test_shift(self):
+ def test_shift(self, float_frame, float_frame_int_kind, float_frame_dense,
+ float_frame_fill0, float_frame_fill0_dense,
+ float_frame_fill2, float_frame_fill2_dense):
def _check(frame, orig):
shifted = frame.shift(0)
@@ -1042,32 +1039,29 @@ def _check(frame, orig):
kind=frame.default_kind)
tm.assert_frame_equal(shifted, exp)
- self._check_all(_check)
+ _check(float_frame, float_frame_dense)
+ _check(float_frame_int_kind, float_frame_dense)
+ _check(float_frame_fill0, float_frame_fill0_dense)
+ _check(float_frame_fill2, float_frame_fill2_dense)
- def test_count(self):
- dense_result = self.frame.to_dense().count()
+ def test_count(self, float_frame):
+ dense_result = float_frame.to_dense().count()
- result = self.frame.count()
+ result = float_frame.count()
tm.assert_series_equal(result, dense_result)
- result = self.frame.count(axis=None)
+ result = float_frame.count(axis=None)
tm.assert_series_equal(result, dense_result)
- result = self.frame.count(axis=0)
+ result = float_frame.count(axis=0)
tm.assert_series_equal(result, dense_result)
- result = self.frame.count(axis=1)
- dense_result = self.frame.to_dense().count(axis=1)
+ result = float_frame.count(axis=1)
+ dense_result = float_frame.to_dense().count(axis=1)
# win32 don't check dtype
tm.assert_series_equal(result, dense_result, check_dtype=False)
- def _check_all(self, check_func):
- check_func(self.frame, self.orig)
- check_func(self.iframe, self.iorig)
- check_func(self.zframe, self.zorig)
- check_func(self.fill_frame, self.fill_orig)
-
def test_numpy_transpose(self):
sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a'])
result = np.transpose(np.transpose(sdf))
@@ -1076,8 +1070,8 @@ def test_numpy_transpose(self):
msg = "the 'axes' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1)
- def test_combine_first(self):
- df = self.frame
+ def test_combine_first(self, float_frame):
+ df = float_frame
result = df[::2].combine_first(df)
result2 = df[::2].combine_first(df.to_dense())
@@ -1088,8 +1082,8 @@ def test_combine_first(self):
tm.assert_sp_frame_equal(result, result2)
tm.assert_sp_frame_equal(result, expected)
- def test_combine_add(self):
- df = self.frame.to_dense()
+ def test_combine_add(self, float_frame):
+ df = float_frame.to_dense()
df2 = df.copy()
df2['C'][:3] = np.nan
df['A'][:3] = 5.7
@@ -1214,51 +1208,42 @@ def test_comparison_op_scalar(self):
class TestSparseDataFrameAnalytics(object):
- def setup_method(self, method):
- self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
- 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
- 'C': np.arange(10, dtype=float),
- 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
-
- self.dates = bdate_range('1/1/2011', periods=10)
-
- self.frame = SparseDataFrame(self.data, index=self.dates)
- def test_cumsum(self):
- expected = SparseDataFrame(self.frame.to_dense().cumsum())
+ def test_cumsum(self, float_frame):
+ expected = SparseDataFrame(float_frame.to_dense().cumsum())
- result = self.frame.cumsum()
+ result = float_frame.cumsum()
tm.assert_sp_frame_equal(result, expected)
- result = self.frame.cumsum(axis=None)
+ result = float_frame.cumsum(axis=None)
tm.assert_sp_frame_equal(result, expected)
- result = self.frame.cumsum(axis=0)
+ result = float_frame.cumsum(axis=0)
tm.assert_sp_frame_equal(result, expected)
- def test_numpy_cumsum(self):
- result = np.cumsum(self.frame)
- expected = SparseDataFrame(self.frame.to_dense().cumsum())
+ def test_numpy_cumsum(self, float_frame):
+ result = np.cumsum(float_frame)
+ expected = SparseDataFrame(float_frame.to_dense().cumsum())
tm.assert_sp_frame_equal(result, expected)
msg = "the 'dtype' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.cumsum,
- self.frame, dtype=np.int64)
+ float_frame, dtype=np.int64)
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.cumsum,
- self.frame, out=result)
+ float_frame, out=result)
- def test_numpy_func_call(self):
+ def test_numpy_func_call(self, float_frame):
# no exception should be raised even though
# numpy passes in 'axis=None' or `axis=-1'
funcs = ['sum', 'cumsum', 'var',
'mean', 'prod', 'cumprod',
'std', 'min', 'max']
for func in funcs:
- getattr(np, func)(self.frame)
+ getattr(np, func)(float_frame)
- @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
+ @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)',
strict=True)
def test_quantile(self):
# GH 17386
@@ -1275,7 +1260,7 @@ def test_quantile(self):
tm.assert_series_equal(result, dense_expected)
tm.assert_sp_series_equal(result, sparse_expected)
- @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)',
+ @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)',
strict=True)
def test_quantile_multi(self):
# GH 17386
diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py
index aef49c84fc2ad..a7f64bbe9a49f 100644
--- a/pandas/tests/sparse/frame/test_to_from_scipy.py
+++ b/pandas/tests/sparse/frame/test_to_from_scipy.py
@@ -1,6 +1,5 @@
import pytest
import numpy as np
-from warnings import catch_warnings
from pandas.util import testing as tm
from pandas import SparseDataFrame, SparseSeries
from distutils.version import LooseVersion
@@ -12,12 +11,16 @@
scipy = pytest.importorskip('scipy')
+ignore_matrix_warning = pytest.mark.filterwarnings(
+ "ignore:the matrix subclass:PendingDeprecationWarning"
+)
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('def')])
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
+@ignore_matrix_warning
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
# GH 4343
# Make one ndarray and from it one sparse matrix, both to be used for
@@ -69,6 +72,8 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
+@ignore_matrix_warning
+@pytest.mark.filterwarnings("ignore:object dtype is not supp:UserWarning")
def test_from_to_scipy_object(spmatrix, fill_value):
# GH 4343
dtype = object
@@ -108,8 +113,7 @@ def test_from_to_scipy_object(spmatrix, fill_value):
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
# Assert spmatrices equal
- with catch_warnings(record=True):
- assert dict(sdf.to_coo().todok()) == dict(spm.todok())
+ assert dict(sdf.to_coo().todok()) == dict(spm.todok())
# Ensure dtype is preserved if possible
res_dtype = object
@@ -117,6 +121,7 @@ def test_from_to_scipy_object(spmatrix, fill_value):
assert sdf.to_coo().dtype == res_dtype
+@ignore_matrix_warning
def test_from_scipy_correct_ordering(spmatrix):
# GH 16179
arr = np.arange(1, 5).reshape(2, 2)
@@ -135,6 +140,7 @@ def test_from_scipy_correct_ordering(spmatrix):
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
+@ignore_matrix_warning
def test_from_scipy_fillna(spmatrix):
# GH 16112
arr = np.eye(3)
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
index 921c30234660f..5b50606bf37bd 100644
--- a/pandas/tests/sparse/series/test_series.py
+++ b/pandas/tests/sparse/series/test_series.py
@@ -1022,6 +1022,9 @@ def test_round_trip_preserve_multiindex_names(self):
@td.skip_if_no_scipy
+@pytest.mark.filterwarnings(
+ "ignore:the matrix subclass:PendingDeprecationWarning"
+)
class TestSparseSeriesScipyInteraction(object):
# Issue 8048: add SparseSeries coo methods
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 64d2e155aa9a9..b2ddbf715b480 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -520,6 +520,36 @@ def test_different_nans(self):
expected = np.array([np.nan])
tm.assert_numpy_array_equal(result, expected)
+ def test_first_nan_kept(self):
+ # GH 22295
+ # create different nans from bit-patterns:
+ bits_for_nan1 = 0xfff8000000000001
+ bits_for_nan2 = 0x7ff8000000000001
+ NAN1 = struct.unpack("d", struct.pack("=Q", bits_for_nan1))[0]
+ NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0]
+ assert NAN1 != NAN1
+ assert NAN2 != NAN2
+ for el_type in [np.float64, np.object]:
+ a = np.array([NAN1, NAN2], dtype=el_type)
+ result = pd.unique(a)
+ assert result.size == 1
+ # use bit patterns to identify which nan was kept:
+ result_nan_bits = struct.unpack("=Q",
+ struct.pack("d", result[0]))[0]
+ assert result_nan_bits == bits_for_nan1
+
+ def test_do_not_mangle_na_values(self, unique_nulls_fixture,
+ unique_nulls_fixture2):
+ # GH 22295
+ if unique_nulls_fixture is unique_nulls_fixture2:
+ return # skip it, values not unique
+ a = np.array([unique_nulls_fixture,
+ unique_nulls_fixture2], dtype=np.object)
+ result = pd.unique(a)
+ assert result.size == 2
+ assert a[0] is unique_nulls_fixture
+ assert a[1] is unique_nulls_fixture2
+
class TestIsin(object):
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index 868525e818b62..ae46bee901ff2 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -1,11 +1,13 @@
# -*- coding: utf-8 -*-
import collections
+import string
from functools import partial
import numpy as np
import pytest
+import pandas as pd
from pandas import Series, Timestamp
from pandas.core import (
common as com,
@@ -110,3 +112,10 @@ def test_standardize_mapping():
dd = collections.defaultdict(list)
assert isinstance(com.standardize_mapping(dd), partial)
+
+
+def test_git_version():
+ # GH 21295
+ git_version = pd.__git_version__
+ assert len(git_version) == 40
+ assert all(c in string.hexdigits for c in git_version)
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index 70973801d7cda..abcfa4b320b22 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -62,6 +62,8 @@ def test_oo_optimizable():
@tm.network
+# Cython import warning
+@pytest.mark.filterwarnings("ignore:can't:ImportWarning")
def test_statsmodels():
statsmodels = import_module('statsmodels') # noqa
@@ -71,6 +73,8 @@ def test_statsmodels():
smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit()
+# Cython import warning
+@pytest.mark.filterwarnings("ignore:can't:ImportWarning")
def test_scikit_learn(df):
sklearn = import_module('sklearn') # noqa
@@ -82,7 +86,9 @@ def test_scikit_learn(df):
clf.predict(digits.data[-1:])
+# Cython import warning and traitlets
@tm.network
+@pytest.mark.filterwarnings("ignore")
def test_seaborn():
seaborn = import_module('seaborn')
@@ -104,6 +110,10 @@ def test_pandas_datareader():
'F', 'quandl', '2017-01-01', '2017-02-01')
+# importing from pandas, Cython import warning
+@pytest.mark.filterwarnings("ignore:The 'warn':DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:pandas.util:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning")
def test_geopandas():
geopandas = import_module('geopandas') # noqa
@@ -111,6 +121,8 @@ def test_geopandas():
assert geopandas.read_file(fp) is not None
+# Cython import warning
+@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning")
def test_pyarrow(df):
pyarrow = import_module('pyarrow') # noqa
diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py
index 7f9cddf9859a5..76e003c463e7d 100644
--- a/pandas/tests/test_errors.py
+++ b/pandas/tests/test_errors.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import pytest
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import pandas # noqa
import pandas as pd
from pandas.errors import AbstractMethodError
@@ -48,6 +48,7 @@ def test_error_rename():
pass
with catch_warnings(record=True):
+ simplefilter("ignore")
try:
raise ParserError()
except pd.parser.CParserError:
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
index 468463d3eba5f..c101fd25ce5e5 100644
--- a/pandas/tests/test_expressions.py
+++ b/pandas/tests/test_expressions.py
@@ -2,7 +2,7 @@
from __future__ import print_function
# pylint: disable-msg=W0612,E1101
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import re
import operator
import pytest
@@ -38,6 +38,7 @@
columns=list('ABCD'), dtype='int64')
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
_frame_panel = Panel(dict(ItemA=_frame.copy(),
ItemB=(_frame.copy() + 3),
ItemC=_frame.copy(),
@@ -191,6 +192,7 @@ def test_integer_arithmetic_series(self):
self.run_series(self.integer.iloc[:, 0], self.integer.iloc[:, 0])
@pytest.mark.slow
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_integer_panel(self):
self.run_panel(_integer2_panel, np.random.randint(1, 100))
@@ -201,6 +203,7 @@ def test_float_arithmetic_series(self):
self.run_series(self.frame2.iloc[:, 0], self.frame2.iloc[:, 0])
@pytest.mark.slow
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_float_panel(self):
self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8)
@@ -215,6 +218,7 @@ def test_mixed_arithmetic_series(self):
self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4)
@pytest.mark.slow
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_mixed_panel(self):
self.run_panel(_mixed2_panel, np.random.randint(1, 100),
binary_comp=-2)
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index ecd0af9c13d34..1718c6beaef55 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# pylint: disable-msg=W0612,E1101,W0141
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
import datetime
import itertools
import pytest
@@ -194,6 +194,7 @@ def test_reindex(self):
tm.assert_frame_equal(reindexed, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]]
tm.assert_frame_equal(reindexed, expected)
@@ -206,6 +207,7 @@ def test_reindex_preserve_levels(self):
assert chunk.index is new_index
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
chunk = self.ymd.ix[new_index]
assert chunk.index is new_index
@@ -269,6 +271,7 @@ def test_series_getitem(self):
tm.assert_series_equal(result, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
tm.assert_series_equal(result, expected)
@@ -348,6 +351,7 @@ def test_frame_getitem_setitem_multislice(self):
tm.assert_series_equal(df['value'], result)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[:, 'value']
tm.assert_series_equal(df['value'], result)
@@ -423,6 +427,7 @@ def test_getitem_tuple_plus_slice(self):
expected = idf.loc[0, 0]
expected2 = idf.xs((0, 0))
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
expected3 = idf.ix[0, 0]
tm.assert_series_equal(result, expected)
@@ -684,6 +689,7 @@ def test_frame_setitem_ix(self):
assert df.loc[('bar', 'two'), 1] == 7
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
df = self.frame.copy()
df.columns = lrange(3)
df.ix[('bar', 'two'), 1] = 7
@@ -713,6 +719,7 @@ def test_getitem_partial_column_select(self):
tm.assert_frame_equal(result, expected)
with catch_warnings(record=True):
+ simplefilter("ignore", DeprecationWarning)
result = df.ix[('a', 'y'), [1, 0]]
tm.assert_frame_equal(result, expected)
@@ -1294,6 +1301,7 @@ def test_swaplevel(self):
def test_swaplevel_panel(self):
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2})
expected = panel.copy()
expected.major_axis = expected.major_axis.swaplevel(0, 1)
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index a70ee80aee180..b6c2c65fb6dce 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -359,6 +359,7 @@ def test_returned_dtype(self):
def test_nanmedian(self):
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
self.check_funs(nanops.nanmedian, np.median, allow_complex=False,
allow_str=False, allow_date=False,
allow_tdelta=True, allow_obj='convert')
@@ -394,12 +395,14 @@ def _minmax_wrap(self, value, axis=None, func=None):
def test_nanmin(self):
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
func = partial(self._minmax_wrap, func=np.min)
self.check_funs(nanops.nanmin, func,
allow_str=False, allow_obj=False)
def test_nanmax(self):
- with warnings.catch_warnings(record=True):
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", RuntimeWarning)
func = partial(self._minmax_wrap, func=np.max)
self.check_funs(nanops.nanmax, func,
allow_str=False, allow_obj=False)
@@ -417,6 +420,7 @@ def _argminmax_wrap(self, value, axis=None, func=None):
def test_nanargmax(self):
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
func = partial(self._argminmax_wrap, func=np.argmax)
self.check_funs(nanops.nanargmax, func,
allow_str=False, allow_obj=False,
@@ -424,6 +428,7 @@ def test_nanargmax(self):
def test_nanargmin(self):
with warnings.catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
func = partial(self._argminmax_wrap, func=np.argmin)
self.check_funs(nanops.nanargmin, func, allow_str=False,
allow_obj=False)
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index b968c52ce3dfd..51c779c6a97a3 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# pylint: disable=W0612,E1101
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from datetime import datetime
import operator
import pytest
@@ -30,49 +30,47 @@
def make_test_panel():
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
_panel = tm.makePanel()
tm.add_nans(_panel)
_panel = _panel.copy()
return _panel
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class PanelTests(object):
panel = None
def test_pickle(self):
- with catch_warnings(record=True):
- unpickled = tm.round_trip_pickle(self.panel)
- assert_frame_equal(unpickled['ItemA'], self.panel['ItemA'])
+ unpickled = tm.round_trip_pickle(self.panel)
+ assert_frame_equal(unpickled['ItemA'], self.panel['ItemA'])
def test_rank(self):
- with catch_warnings(record=True):
- pytest.raises(NotImplementedError, lambda: self.panel.rank())
+ pytest.raises(NotImplementedError, lambda: self.panel.rank())
def test_cumsum(self):
- with catch_warnings(record=True):
- cumsum = self.panel.cumsum()
- assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum())
+ cumsum = self.panel.cumsum()
+ assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum())
def not_hashable(self):
- with catch_warnings(record=True):
- c_empty = Panel()
- c = Panel(Panel([[[1]]]))
- pytest.raises(TypeError, hash, c_empty)
- pytest.raises(TypeError, hash, c)
+ c_empty = Panel()
+ c = Panel(Panel([[[1]]]))
+ pytest.raises(TypeError, hash, c_empty)
+ pytest.raises(TypeError, hash, c)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class SafeForLongAndSparse(object):
def test_repr(self):
repr(self.panel)
def test_copy_names(self):
- with catch_warnings(record=True):
- for attr in ('major_axis', 'minor_axis'):
- getattr(self.panel, attr).name = None
- cp = self.panel.copy()
- getattr(cp, attr).name = 'foo'
- assert getattr(self.panel, attr).name is None
+ for attr in ('major_axis', 'minor_axis'):
+ getattr(self.panel, attr).name = None
+ cp = self.panel.copy()
+ getattr(cp, attr).name = 'foo'
+ assert getattr(self.panel, attr).name is None
def test_iter(self):
tm.equalContents(list(self.panel), self.panel.items)
@@ -91,6 +89,8 @@ def test_mean(self):
def test_prod(self):
self._check_stat_op('prod', np.prod, skipna_alternative=np.nanprod)
+ @pytest.mark.filterwarnings("ignore:Invalid value:RuntimeWarning")
+ @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning")
def test_median(self):
def wrapper(x):
if isna(x).any():
@@ -99,13 +99,13 @@ def wrapper(x):
self._check_stat_op('median', wrapper)
+ @pytest.mark.filterwarnings("ignore:Invalid value:RuntimeWarning")
def test_min(self):
- with catch_warnings(record=True):
- self._check_stat_op('min', np.min)
+ self._check_stat_op('min', np.min)
+ @pytest.mark.filterwarnings("ignore:Invalid value:RuntimeWarning")
def test_max(self):
- with catch_warnings(record=True):
- self._check_stat_op('max', np.max)
+ self._check_stat_op('max', np.max)
@td.skip_if_no_scipy
def test_skew(self):
@@ -181,6 +181,7 @@ def wrapper(x):
numeric_only=True)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class SafeForSparse(object):
def test_get_axis(self):
@@ -240,48 +241,46 @@ def test_get_plane_axes(self):
index, columns = self.panel._get_plane_axes(0)
def test_truncate(self):
- with catch_warnings(record=True):
- dates = self.panel.major_axis
- start, end = dates[1], dates[5]
+ dates = self.panel.major_axis
+ start, end = dates[1], dates[5]
- trunced = self.panel.truncate(start, end, axis='major')
- expected = self.panel['ItemA'].truncate(start, end)
+ trunced = self.panel.truncate(start, end, axis='major')
+ expected = self.panel['ItemA'].truncate(start, end)
- assert_frame_equal(trunced['ItemA'], expected)
+ assert_frame_equal(trunced['ItemA'], expected)
- trunced = self.panel.truncate(before=start, axis='major')
- expected = self.panel['ItemA'].truncate(before=start)
+ trunced = self.panel.truncate(before=start, axis='major')
+ expected = self.panel['ItemA'].truncate(before=start)
- assert_frame_equal(trunced['ItemA'], expected)
+ assert_frame_equal(trunced['ItemA'], expected)
- trunced = self.panel.truncate(after=end, axis='major')
- expected = self.panel['ItemA'].truncate(after=end)
+ trunced = self.panel.truncate(after=end, axis='major')
+ expected = self.panel['ItemA'].truncate(after=end)
- assert_frame_equal(trunced['ItemA'], expected)
+ assert_frame_equal(trunced['ItemA'], expected)
def test_arith(self):
- with catch_warnings(record=True):
- self._test_op(self.panel, operator.add)
- self._test_op(self.panel, operator.sub)
- self._test_op(self.panel, operator.mul)
- self._test_op(self.panel, operator.truediv)
- self._test_op(self.panel, operator.floordiv)
- self._test_op(self.panel, operator.pow)
-
- self._test_op(self.panel, lambda x, y: y + x)
- self._test_op(self.panel, lambda x, y: y - x)
- self._test_op(self.panel, lambda x, y: y * x)
- self._test_op(self.panel, lambda x, y: y / x)
- self._test_op(self.panel, lambda x, y: y ** x)
-
- self._test_op(self.panel, lambda x, y: x + y) # panel + 1
- self._test_op(self.panel, lambda x, y: x - y) # panel - 1
- self._test_op(self.panel, lambda x, y: x * y) # panel * 1
- self._test_op(self.panel, lambda x, y: x / y) # panel / 1
- self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1
-
- pytest.raises(Exception, self.panel.__add__,
- self.panel['ItemA'])
+ self._test_op(self.panel, operator.add)
+ self._test_op(self.panel, operator.sub)
+ self._test_op(self.panel, operator.mul)
+ self._test_op(self.panel, operator.truediv)
+ self._test_op(self.panel, operator.floordiv)
+ self._test_op(self.panel, operator.pow)
+
+ self._test_op(self.panel, lambda x, y: y + x)
+ self._test_op(self.panel, lambda x, y: y - x)
+ self._test_op(self.panel, lambda x, y: y * x)
+ self._test_op(self.panel, lambda x, y: y / x)
+ self._test_op(self.panel, lambda x, y: y ** x)
+
+ self._test_op(self.panel, lambda x, y: x + y) # panel + 1
+ self._test_op(self.panel, lambda x, y: x - y) # panel - 1
+ self._test_op(self.panel, lambda x, y: x * y) # panel * 1
+ self._test_op(self.panel, lambda x, y: x / y) # panel / 1
+ self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1
+
+ pytest.raises(Exception, self.panel.__add__,
+ self.panel['ItemA'])
@staticmethod
def _test_op(panel, op):
@@ -300,100 +299,99 @@ def test_iteritems(self):
assert len(list(self.panel.iteritems())) == len(self.panel.items)
def test_combineFrame(self):
- with catch_warnings(record=True):
- def check_op(op, name):
- # items
- df = self.panel['ItemA']
+ def check_op(op, name):
+ # items
+ df = self.panel['ItemA']
- func = getattr(self.panel, name)
+ func = getattr(self.panel, name)
- result = func(df, axis='items')
+ result = func(df, axis='items')
- assert_frame_equal(
- result['ItemB'], op(self.panel['ItemB'], df))
+ assert_frame_equal(
+ result['ItemB'], op(self.panel['ItemB'], df))
- # major
- xs = self.panel.major_xs(self.panel.major_axis[0])
- result = func(xs, axis='major')
+ # major
+ xs = self.panel.major_xs(self.panel.major_axis[0])
+ result = func(xs, axis='major')
- idx = self.panel.major_axis[1]
+ idx = self.panel.major_axis[1]
- assert_frame_equal(result.major_xs(idx),
- op(self.panel.major_xs(idx), xs))
+ assert_frame_equal(result.major_xs(idx),
+ op(self.panel.major_xs(idx), xs))
- # minor
- xs = self.panel.minor_xs(self.panel.minor_axis[0])
- result = func(xs, axis='minor')
+ # minor
+ xs = self.panel.minor_xs(self.panel.minor_axis[0])
+ result = func(xs, axis='minor')
- idx = self.panel.minor_axis[1]
+ idx = self.panel.minor_axis[1]
- assert_frame_equal(result.minor_xs(idx),
- op(self.panel.minor_xs(idx), xs))
+ assert_frame_equal(result.minor_xs(idx),
+ op(self.panel.minor_xs(idx), xs))
- ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod']
- if not compat.PY3:
- ops.append('div')
+ ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod']
+ if not compat.PY3:
+ ops.append('div')
- for op in ops:
- try:
- check_op(getattr(operator, op), op)
- except:
- pprint_thing("Failing operation: %r" % op)
- raise
- if compat.PY3:
- try:
- check_op(operator.truediv, 'div')
- except:
- pprint_thing("Failing operation: %r" % 'div')
- raise
+ for op in ops:
+ try:
+ check_op(getattr(operator, op), op)
+ except:
+ pprint_thing("Failing operation: %r" % op)
+ raise
+ if compat.PY3:
+ try:
+ check_op(operator.truediv, 'div')
+ except:
+ pprint_thing("Failing operation: %r" % 'div')
+ raise
def test_combinePanel(self):
- with catch_warnings(record=True):
- result = self.panel.add(self.panel)
- assert_panel_equal(result, self.panel * 2)
+ result = self.panel.add(self.panel)
+ assert_panel_equal(result, self.panel * 2)
def test_neg(self):
- with catch_warnings(record=True):
- assert_panel_equal(-self.panel, self.panel * -1)
+ assert_panel_equal(-self.panel, self.panel * -1)
# issue 7692
def test_raise_when_not_implemented(self):
- with catch_warnings(record=True):
- p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5),
- items=['ItemA', 'ItemB', 'ItemC'],
- major_axis=date_range('20130101', periods=4),
- minor_axis=list('ABCDE'))
- d = p.sum(axis=1).iloc[0]
- ops = ['add', 'sub', 'mul', 'truediv',
- 'floordiv', 'div', 'mod', 'pow']
- for op in ops:
- with pytest.raises(NotImplementedError):
- getattr(p, op)(d, axis=0)
+ p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5),
+ items=['ItemA', 'ItemB', 'ItemC'],
+ major_axis=date_range('20130101', periods=4),
+ minor_axis=list('ABCDE'))
+ d = p.sum(axis=1).iloc[0]
+ ops = ['add', 'sub', 'mul', 'truediv',
+ 'floordiv', 'div', 'mod', 'pow']
+ for op in ops:
+ with pytest.raises(NotImplementedError):
+ getattr(p, op)(d, axis=0)
def test_select(self):
- with catch_warnings(record=True):
- p = self.panel
+ p = self.panel
- # select items
+ # select items
+ with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items')
- expected = p.reindex(items=['ItemA', 'ItemC'])
- assert_panel_equal(result, expected)
+ expected = p.reindex(items=['ItemA', 'ItemC'])
+ assert_panel_equal(result, expected)
- # select major_axis
+ # select major_axis
+ with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = p.select(lambda x: x >= datetime(
2000, 1, 15), axis='major')
- new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)]
- expected = p.reindex(major=new_major)
- assert_panel_equal(result, expected)
+ new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)]
+ expected = p.reindex(major=new_major)
+ assert_panel_equal(result, expected)
- # select minor_axis
+ # select minor_axis
+ with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = p.select(lambda x: x in ('D', 'A'), axis=2)
- expected = p.reindex(minor=['A', 'D'])
- assert_panel_equal(result, expected)
+ expected = p.reindex(minor=['A', 'D'])
+ assert_panel_equal(result, expected)
- # corner case, empty thing
+ # corner case, empty thing
+ with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = p.select(lambda x: x in ('foo', ), axis='items')
- assert_panel_equal(result, p.reindex(items=[]))
+ assert_panel_equal(result, p.reindex(items=[]))
def test_get_value(self):
for item in self.panel.items:
@@ -407,211 +405,204 @@ def test_get_value(self):
def test_abs(self):
- with catch_warnings(record=True):
- result = self.panel.abs()
- result2 = abs(self.panel)
- expected = np.abs(self.panel)
- assert_panel_equal(result, expected)
- assert_panel_equal(result2, expected)
+ result = self.panel.abs()
+ result2 = abs(self.panel)
+ expected = np.abs(self.panel)
+ assert_panel_equal(result, expected)
+ assert_panel_equal(result2, expected)
- df = self.panel['ItemA']
- result = df.abs()
- result2 = abs(df)
- expected = np.abs(df)
- assert_frame_equal(result, expected)
- assert_frame_equal(result2, expected)
-
- s = df['A']
- result = s.abs()
- result2 = abs(s)
- expected = np.abs(s)
- assert_series_equal(result, expected)
- assert_series_equal(result2, expected)
- assert result.name == 'A'
- assert result2.name == 'A'
+ df = self.panel['ItemA']
+ result = df.abs()
+ result2 = abs(df)
+ expected = np.abs(df)
+ assert_frame_equal(result, expected)
+ assert_frame_equal(result2, expected)
+
+ s = df['A']
+ result = s.abs()
+ result2 = abs(s)
+ expected = np.abs(s)
+ assert_series_equal(result, expected)
+ assert_series_equal(result2, expected)
+ assert result.name == 'A'
+ assert result2.name == 'A'
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class CheckIndexing(object):
def test_getitem(self):
pytest.raises(Exception, self.panel.__getitem__, 'ItemQ')
def test_delitem_and_pop(self):
- with catch_warnings(record=True):
- expected = self.panel['ItemA']
- result = self.panel.pop('ItemA')
- assert_frame_equal(expected, result)
- assert 'ItemA' not in self.panel.items
+ expected = self.panel['ItemA']
+ result = self.panel.pop('ItemA')
+ assert_frame_equal(expected, result)
+ assert 'ItemA' not in self.panel.items
- del self.panel['ItemB']
- assert 'ItemB' not in self.panel.items
- pytest.raises(Exception, self.panel.__delitem__, 'ItemB')
+ del self.panel['ItemB']
+ assert 'ItemB' not in self.panel.items
+ pytest.raises(Exception, self.panel.__delitem__, 'ItemB')
- values = np.empty((3, 3, 3))
- values[0] = 0
- values[1] = 1
- values[2] = 2
+ values = np.empty((3, 3, 3))
+ values[0] = 0
+ values[1] = 1
+ values[2] = 2
- panel = Panel(values, lrange(3), lrange(3), lrange(3))
+ panel = Panel(values, lrange(3), lrange(3), lrange(3))
- # did we delete the right row?
+ # did we delete the right row?
- panelc = panel.copy()
- del panelc[0]
- tm.assert_frame_equal(panelc[1], panel[1])
- tm.assert_frame_equal(panelc[2], panel[2])
+ panelc = panel.copy()
+ del panelc[0]
+ tm.assert_frame_equal(panelc[1], panel[1])
+ tm.assert_frame_equal(panelc[2], panel[2])
- panelc = panel.copy()
- del panelc[1]
- tm.assert_frame_equal(panelc[0], panel[0])
- tm.assert_frame_equal(panelc[2], panel[2])
+ panelc = panel.copy()
+ del panelc[1]
+ tm.assert_frame_equal(panelc[0], panel[0])
+ tm.assert_frame_equal(panelc[2], panel[2])
- panelc = panel.copy()
- del panelc[2]
- tm.assert_frame_equal(panelc[1], panel[1])
- tm.assert_frame_equal(panelc[0], panel[0])
+ panelc = panel.copy()
+ del panelc[2]
+ tm.assert_frame_equal(panelc[1], panel[1])
+ tm.assert_frame_equal(panelc[0], panel[0])
def test_setitem(self):
- with catch_warnings(record=True):
- lp = self.panel.filter(['ItemA', 'ItemB']).to_frame()
- with pytest.raises(ValueError):
- self.panel['ItemE'] = lp
-
- # DataFrame
- df = self.panel['ItemA'][2:].filter(items=['A', 'B'])
- self.panel['ItemF'] = df
- self.panel['ItemE'] = df
-
- df2 = self.panel['ItemF']
-
- assert_frame_equal(df, df2.reindex(
- index=df.index, columns=df.columns))
-
- # scalar
- self.panel['ItemG'] = 1
- self.panel['ItemE'] = True
- assert self.panel['ItemG'].values.dtype == np.int64
- assert self.panel['ItemE'].values.dtype == np.bool_
-
- # object dtype
- self.panel['ItemQ'] = 'foo'
- assert self.panel['ItemQ'].values.dtype == np.object_
-
- # boolean dtype
- self.panel['ItemP'] = self.panel['ItemA'] > 0
- assert self.panel['ItemP'].values.dtype == np.bool_
-
- pytest.raises(TypeError, self.panel.__setitem__, 'foo',
- self.panel.loc[['ItemP']])
-
- # bad shape
- p = Panel(np.random.randn(4, 3, 2))
- with tm.assert_raises_regex(ValueError,
- r"shape of value must be "
- r"\(3, 2\), shape of given "
- r"object was \(4, 2\)"):
- p[0] = np.random.randn(4, 2)
+ lp = self.panel.filter(['ItemA', 'ItemB']).to_frame()
+ with pytest.raises(ValueError):
+ self.panel['ItemE'] = lp
+
+ # DataFrame
+ df = self.panel['ItemA'][2:].filter(items=['A', 'B'])
+ self.panel['ItemF'] = df
+ self.panel['ItemE'] = df
+
+ df2 = self.panel['ItemF']
+
+ assert_frame_equal(df, df2.reindex(
+ index=df.index, columns=df.columns))
+
+ # scalar
+ self.panel['ItemG'] = 1
+ self.panel['ItemE'] = True
+ assert self.panel['ItemG'].values.dtype == np.int64
+ assert self.panel['ItemE'].values.dtype == np.bool_
+
+ # object dtype
+ self.panel['ItemQ'] = 'foo'
+ assert self.panel['ItemQ'].values.dtype == np.object_
+
+ # boolean dtype
+ self.panel['ItemP'] = self.panel['ItemA'] > 0
+ assert self.panel['ItemP'].values.dtype == np.bool_
+
+ pytest.raises(TypeError, self.panel.__setitem__, 'foo',
+ self.panel.loc[['ItemP']])
+
+ # bad shape
+ p = Panel(np.random.randn(4, 3, 2))
+ with tm.assert_raises_regex(ValueError,
+ r"shape of value must be "
+ r"\(3, 2\), shape of given "
+ r"object was \(4, 2\)"):
+ p[0] = np.random.randn(4, 2)
def test_setitem_ndarray(self):
- with catch_warnings(record=True):
- timeidx = date_range(start=datetime(2009, 1, 1),
- end=datetime(2009, 12, 31),
- freq=MonthEnd())
- lons_coarse = np.linspace(-177.5, 177.5, 72)
- lats_coarse = np.linspace(-87.5, 87.5, 36)
- P = Panel(items=timeidx, major_axis=lons_coarse,
- minor_axis=lats_coarse)
- data = np.random.randn(72 * 36).reshape((72, 36))
- key = datetime(2009, 2, 28)
- P[key] = data
-
- assert_almost_equal(P[key].values, data)
+ timeidx = date_range(start=datetime(2009, 1, 1),
+ end=datetime(2009, 12, 31),
+ freq=MonthEnd())
+ lons_coarse = np.linspace(-177.5, 177.5, 72)
+ lats_coarse = np.linspace(-87.5, 87.5, 36)
+ P = Panel(items=timeidx, major_axis=lons_coarse,
+ minor_axis=lats_coarse)
+ data = np.random.randn(72 * 36).reshape((72, 36))
+ key = datetime(2009, 2, 28)
+ P[key] = data
+
+ assert_almost_equal(P[key].values, data)
def test_set_minor_major(self):
- with catch_warnings(record=True):
- # GH 11014
- df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan])
- df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0])
- panel = Panel({'Item1': df1, 'Item2': df2})
-
- newminor = notna(panel.iloc[:, :, 0])
- panel.loc[:, :, 'NewMinor'] = newminor
- assert_frame_equal(panel.loc[:, :, 'NewMinor'],
- newminor.astype(object))
-
- newmajor = notna(panel.iloc[:, 0, :])
- panel.loc[:, 'NewMajor', :] = newmajor
- assert_frame_equal(panel.loc[:, 'NewMajor', :],
- newmajor.astype(object))
+ # GH 11014
+ df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan])
+ df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0])
+ panel = Panel({'Item1': df1, 'Item2': df2})
+
+ newminor = notna(panel.iloc[:, :, 0])
+ panel.loc[:, :, 'NewMinor'] = newminor
+ assert_frame_equal(panel.loc[:, :, 'NewMinor'],
+ newminor.astype(object))
+
+ newmajor = notna(panel.iloc[:, 0, :])
+ panel.loc[:, 'NewMajor', :] = newmajor
+ assert_frame_equal(panel.loc[:, 'NewMajor', :],
+ newmajor.astype(object))
def test_major_xs(self):
- with catch_warnings(record=True):
- ref = self.panel['ItemA']
+ ref = self.panel['ItemA']
- idx = self.panel.major_axis[5]
- xs = self.panel.major_xs(idx)
+ idx = self.panel.major_axis[5]
+ xs = self.panel.major_xs(idx)
- result = xs['ItemA']
- assert_series_equal(result, ref.xs(idx), check_names=False)
- assert result.name == 'ItemA'
+ result = xs['ItemA']
+ assert_series_equal(result, ref.xs(idx), check_names=False)
+ assert result.name == 'ItemA'
- # not contained
- idx = self.panel.major_axis[0] - BDay()
- pytest.raises(Exception, self.panel.major_xs, idx)
+ # not contained
+ idx = self.panel.major_axis[0] - BDay()
+ pytest.raises(Exception, self.panel.major_xs, idx)
def test_major_xs_mixed(self):
- with catch_warnings(record=True):
- self.panel['ItemD'] = 'foo'
- xs = self.panel.major_xs(self.panel.major_axis[0])
- assert xs['ItemA'].dtype == np.float64
- assert xs['ItemD'].dtype == np.object_
+ self.panel['ItemD'] = 'foo'
+ xs = self.panel.major_xs(self.panel.major_axis[0])
+ assert xs['ItemA'].dtype == np.float64
+ assert xs['ItemD'].dtype == np.object_
def test_minor_xs(self):
- with catch_warnings(record=True):
- ref = self.panel['ItemA']
+ ref = self.panel['ItemA']
- idx = self.panel.minor_axis[1]
- xs = self.panel.minor_xs(idx)
+ idx = self.panel.minor_axis[1]
+ xs = self.panel.minor_xs(idx)
- assert_series_equal(xs['ItemA'], ref[idx], check_names=False)
+ assert_series_equal(xs['ItemA'], ref[idx], check_names=False)
- # not contained
- pytest.raises(Exception, self.panel.minor_xs, 'E')
+ # not contained
+ pytest.raises(Exception, self.panel.minor_xs, 'E')
def test_minor_xs_mixed(self):
- with catch_warnings(record=True):
- self.panel['ItemD'] = 'foo'
+ self.panel['ItemD'] = 'foo'
- xs = self.panel.minor_xs('D')
- assert xs['ItemA'].dtype == np.float64
- assert xs['ItemD'].dtype == np.object_
+ xs = self.panel.minor_xs('D')
+ assert xs['ItemA'].dtype == np.float64
+ assert xs['ItemD'].dtype == np.object_
def test_xs(self):
- with catch_warnings(record=True):
- itemA = self.panel.xs('ItemA', axis=0)
- expected = self.panel['ItemA']
- tm.assert_frame_equal(itemA, expected)
+ itemA = self.panel.xs('ItemA', axis=0)
+ expected = self.panel['ItemA']
+ tm.assert_frame_equal(itemA, expected)
- # Get a view by default.
- itemA_view = self.panel.xs('ItemA', axis=0)
- itemA_view.values[:] = np.nan
+ # Get a view by default.
+ itemA_view = self.panel.xs('ItemA', axis=0)
+ itemA_view.values[:] = np.nan
- assert np.isnan(self.panel['ItemA'].values).all()
+ assert np.isnan(self.panel['ItemA'].values).all()
- # Mixed-type yields a copy.
- self.panel['strings'] = 'foo'
- result = self.panel.xs('D', axis=2)
- assert result._is_copy is not None
+ # Mixed-type yields a copy.
+ self.panel['strings'] = 'foo'
+ result = self.panel.xs('D', axis=2)
+ assert result._is_copy is not None
def test_getitem_fancy_labels(self):
- with catch_warnings(record=True):
- p = self.panel
+ p = self.panel
- items = p.items[[1, 0]]
- dates = p.major_axis[::2]
- cols = ['D', 'C', 'F']
+ items = p.items[[1, 0]]
+ dates = p.major_axis[::2]
+ cols = ['D', 'C', 'F']
- # all 3 specified
+ # all 3 specified
+ with catch_warnings():
+ simplefilter("ignore", FutureWarning)
+ # XXX: warning in _validate_read_indexer
assert_panel_equal(p.loc[items, dates, cols],
p.reindex(items=items, major=dates, minor=cols))
@@ -670,132 +661,127 @@ def test_getitem_fancy_xs(self):
assert_series_equal(p.loc[:, date, col], p.major_xs(date).loc[col])
def test_getitem_fancy_xs_check_view(self):
- with catch_warnings(record=True):
- item = 'ItemB'
- date = self.panel.major_axis[5]
-
- # make sure it's always a view
- NS = slice(None, None)
-
- # DataFrames
- comp = assert_frame_equal
- self._check_view(item, comp)
- self._check_view((item, NS), comp)
- self._check_view((item, NS, NS), comp)
- self._check_view((NS, date), comp)
- self._check_view((NS, date, NS), comp)
- self._check_view((NS, NS, 'C'), comp)
-
- # Series
- comp = assert_series_equal
- self._check_view((item, date), comp)
- self._check_view((item, date, NS), comp)
- self._check_view((item, NS, 'C'), comp)
- self._check_view((NS, date, 'C'), comp)
+ item = 'ItemB'
+ date = self.panel.major_axis[5]
+
+ # make sure it's always a view
+ NS = slice(None, None)
+
+ # DataFrames
+ comp = assert_frame_equal
+ self._check_view(item, comp)
+ self._check_view((item, NS), comp)
+ self._check_view((item, NS, NS), comp)
+ self._check_view((NS, date), comp)
+ self._check_view((NS, date, NS), comp)
+ self._check_view((NS, NS, 'C'), comp)
+
+ # Series
+ comp = assert_series_equal
+ self._check_view((item, date), comp)
+ self._check_view((item, date, NS), comp)
+ self._check_view((item, NS, 'C'), comp)
+ self._check_view((NS, date, 'C'), comp)
def test_getitem_callable(self):
- with catch_warnings(record=True):
- p = self.panel
- # GH 12533
+ p = self.panel
+ # GH 12533
- assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB'])
- assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']],
- p.loc[['ItemB', 'ItemC']])
+ assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB'])
+ assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']],
+ p.loc[['ItemB', 'ItemC']])
def test_ix_setitem_slice_dataframe(self):
- with catch_warnings(record=True):
- a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33],
- minor_axis=[111, 222, 333])
- b = DataFrame(np.random.randn(2, 3), index=[111, 333],
- columns=[1, 2, 3])
+ a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33],
+ minor_axis=[111, 222, 333])
+ b = DataFrame(np.random.randn(2, 3), index=[111, 333],
+ columns=[1, 2, 3])
- a.loc[:, 22, [111, 333]] = b
+ a.loc[:, 22, [111, 333]] = b
- assert_frame_equal(a.loc[:, 22, [111, 333]], b)
+ assert_frame_equal(a.loc[:, 22, [111, 333]], b)
def test_ix_align(self):
- with catch_warnings(record=True):
- from pandas import Series
- b = Series(np.random.randn(10), name=0)
- b.sort_values()
- df_orig = Panel(np.random.randn(3, 10, 2))
- df = df_orig.copy()
+ from pandas import Series
+ b = Series(np.random.randn(10), name=0)
+ b.sort_values()
+ df_orig = Panel(np.random.randn(3, 10, 2))
+ df = df_orig.copy()
- df.loc[0, :, 0] = b
- assert_series_equal(df.loc[0, :, 0].reindex(b.index), b)
+ df.loc[0, :, 0] = b
+ assert_series_equal(df.loc[0, :, 0].reindex(b.index), b)
- df = df_orig.swapaxes(0, 1)
- df.loc[:, 0, 0] = b
- assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b)
+ df = df_orig.swapaxes(0, 1)
+ df.loc[:, 0, 0] = b
+ assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b)
- df = df_orig.swapaxes(1, 2)
- df.loc[0, 0, :] = b
- assert_series_equal(df.loc[0, 0, :].reindex(b.index), b)
+ df = df_orig.swapaxes(1, 2)
+ df.loc[0, 0, :] = b
+ assert_series_equal(df.loc[0, 0, :].reindex(b.index), b)
def test_ix_frame_align(self):
- with catch_warnings(record=True):
- p_orig = tm.makePanel()
- df = p_orig.iloc[0].copy()
- assert_frame_equal(p_orig['ItemA'], df)
-
- p = p_orig.copy()
- p.iloc[0, :, :] = df
- assert_panel_equal(p, p_orig)
-
- p = p_orig.copy()
- p.iloc[0] = df
- assert_panel_equal(p, p_orig)
-
- p = p_orig.copy()
- p.iloc[0, :, :] = df
- assert_panel_equal(p, p_orig)
-
- p = p_orig.copy()
- p.iloc[0] = df
- assert_panel_equal(p, p_orig)
-
- p = p_orig.copy()
- p.loc['ItemA'] = df
- assert_panel_equal(p, p_orig)
-
- p = p_orig.copy()
- p.loc['ItemA', :, :] = df
- assert_panel_equal(p, p_orig)
-
- p = p_orig.copy()
- p['ItemA'] = df
- assert_panel_equal(p, p_orig)
-
- p = p_orig.copy()
- p.iloc[0, [0, 1, 3, 5], -2:] = df
- out = p.iloc[0, [0, 1, 3, 5], -2:]
- assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]])
-
- # GH3830, panel assignent by values/frame
- for dtype in ['float64', 'int64']:
-
- panel = Panel(np.arange(40).reshape((2, 4, 5)),
- items=['a1', 'a2'], dtype=dtype)
- df1 = panel.iloc[0]
- df2 = panel.iloc[1]
-
- tm.assert_frame_equal(panel.loc['a1'], df1)
- tm.assert_frame_equal(panel.loc['a2'], df2)
-
- # Assignment by Value Passes for 'a2'
- panel.loc['a2'] = df1.values
- tm.assert_frame_equal(panel.loc['a1'], df1)
- tm.assert_frame_equal(panel.loc['a2'], df1)
-
- # Assignment by DataFrame Ok w/o loc 'a2'
- panel['a2'] = df2
- tm.assert_frame_equal(panel.loc['a1'], df1)
- tm.assert_frame_equal(panel.loc['a2'], df2)
-
- # Assignment by DataFrame Fails for 'a2'
- panel.loc['a2'] = df2
- tm.assert_frame_equal(panel.loc['a1'], df1)
- tm.assert_frame_equal(panel.loc['a2'], df2)
+ p_orig = tm.makePanel()
+ df = p_orig.iloc[0].copy()
+ assert_frame_equal(p_orig['ItemA'], df)
+
+ p = p_orig.copy()
+ p.iloc[0, :, :] = df
+ assert_panel_equal(p, p_orig)
+
+ p = p_orig.copy()
+ p.iloc[0] = df
+ assert_panel_equal(p, p_orig)
+
+ p = p_orig.copy()
+ p.iloc[0, :, :] = df
+ assert_panel_equal(p, p_orig)
+
+ p = p_orig.copy()
+ p.iloc[0] = df
+ assert_panel_equal(p, p_orig)
+
+ p = p_orig.copy()
+ p.loc['ItemA'] = df
+ assert_panel_equal(p, p_orig)
+
+ p = p_orig.copy()
+ p.loc['ItemA', :, :] = df
+ assert_panel_equal(p, p_orig)
+
+ p = p_orig.copy()
+ p['ItemA'] = df
+ assert_panel_equal(p, p_orig)
+
+ p = p_orig.copy()
+ p.iloc[0, [0, 1, 3, 5], -2:] = df
+ out = p.iloc[0, [0, 1, 3, 5], -2:]
+ assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]])
+
+ # GH3830, panel assignent by values/frame
+ for dtype in ['float64', 'int64']:
+
+ panel = Panel(np.arange(40).reshape((2, 4, 5)),
+ items=['a1', 'a2'], dtype=dtype)
+ df1 = panel.iloc[0]
+ df2 = panel.iloc[1]
+
+ tm.assert_frame_equal(panel.loc['a1'], df1)
+ tm.assert_frame_equal(panel.loc['a2'], df2)
+
+ # Assignment by Value Passes for 'a2'
+ panel.loc['a2'] = df1.values
+ tm.assert_frame_equal(panel.loc['a1'], df1)
+ tm.assert_frame_equal(panel.loc['a2'], df1)
+
+ # Assignment by DataFrame Ok w/o loc 'a2'
+ panel['a2'] = df2
+ tm.assert_frame_equal(panel.loc['a1'], df1)
+ tm.assert_frame_equal(panel.loc['a2'], df2)
+
+ # Assignment by DataFrame Fails for 'a2'
+ panel.loc['a2'] = df2
+ tm.assert_frame_equal(panel.loc['a1'], df1)
+ tm.assert_frame_equal(panel.loc['a2'], df2)
def _check_view(self, indexer, comp):
cp = self.panel.copy()
@@ -805,83 +791,85 @@ def _check_view(self, indexer, comp):
comp(cp.loc[indexer].reindex_like(obj), obj)
def test_logical_with_nas(self):
- with catch_warnings(record=True):
- d = Panel({'ItemA': {'a': [np.nan, False]},
- 'ItemB': {'a': [True, True]}})
+ d = Panel({'ItemA': {'a': [np.nan, False]},
+ 'ItemB': {'a': [True, True]}})
- result = d['ItemA'] | d['ItemB']
- expected = DataFrame({'a': [np.nan, True]})
- assert_frame_equal(result, expected)
+ result = d['ItemA'] | d['ItemB']
+ expected = DataFrame({'a': [np.nan, True]})
+ assert_frame_equal(result, expected)
- # this is autodowncasted here
- result = d['ItemA'].fillna(False) | d['ItemB']
- expected = DataFrame({'a': [True, True]})
- assert_frame_equal(result, expected)
+ # this is autodowncasted here
+ result = d['ItemA'].fillna(False) | d['ItemB']
+ expected = DataFrame({'a': [True, True]})
+ assert_frame_equal(result, expected)
def test_neg(self):
- with catch_warnings(record=True):
- assert_panel_equal(-self.panel, -1 * self.panel)
+ assert_panel_equal(-self.panel, -1 * self.panel)
def test_invert(self):
- with catch_warnings(record=True):
- assert_panel_equal(-(self.panel < 0), ~(self.panel < 0))
+ assert_panel_equal(-(self.panel < 0), ~(self.panel < 0))
def test_comparisons(self):
- with catch_warnings(record=True):
- p1 = tm.makePanel()
- p2 = tm.makePanel()
+ p1 = tm.makePanel()
+ p2 = tm.makePanel()
- tp = p1.reindex(items=p1.items + ['foo'])
- df = p1[p1.items[0]]
+ tp = p1.reindex(items=p1.items + ['foo'])
+ df = p1[p1.items[0]]
- def test_comp(func):
+ def test_comp(func):
- # versus same index
- result = func(p1, p2)
- tm.assert_numpy_array_equal(result.values,
- func(p1.values, p2.values))
+ # versus same index
+ result = func(p1, p2)
+ tm.assert_numpy_array_equal(result.values,
+ func(p1.values, p2.values))
- # versus non-indexed same objs
- pytest.raises(Exception, func, p1, tp)
+ # versus non-indexed same objs
+ pytest.raises(Exception, func, p1, tp)
- # versus different objs
- pytest.raises(Exception, func, p1, df)
+ # versus different objs
+ pytest.raises(Exception, func, p1, df)
- # versus scalar
- result3 = func(self.panel, 0)
- tm.assert_numpy_array_equal(result3.values,
- func(self.panel.values, 0))
+ # versus scalar
+ result3 = func(self.panel, 0)
+ tm.assert_numpy_array_equal(result3.values,
+ func(self.panel.values, 0))
- with np.errstate(invalid='ignore'):
- test_comp(operator.eq)
- test_comp(operator.ne)
- test_comp(operator.lt)
- test_comp(operator.gt)
- test_comp(operator.ge)
- test_comp(operator.le)
+ with np.errstate(invalid='ignore'):
+ test_comp(operator.eq)
+ test_comp(operator.ne)
+ test_comp(operator.lt)
+ test_comp(operator.gt)
+ test_comp(operator.ge)
+ test_comp(operator.le)
def test_get_value(self):
- with catch_warnings(record=True):
- for item in self.panel.items:
- for mjr in self.panel.major_axis[::2]:
- for mnr in self.panel.minor_axis:
+ for item in self.panel.items:
+ for mjr in self.panel.major_axis[::2]:
+ for mnr in self.panel.minor_axis:
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
result = self.panel.get_value(item, mjr, mnr)
- expected = self.panel[item][mnr][mjr]
- assert_almost_equal(result, expected)
+ expected = self.panel[item][mnr][mjr]
+ assert_almost_equal(result, expected)
+ with catch_warnings():
+ simplefilter("ignore", FutureWarning)
with tm.assert_raises_regex(TypeError,
"There must be an argument "
"for each axis"):
self.panel.get_value('a')
def test_set_value(self):
- with catch_warnings(record=True):
- for item in self.panel.items:
- for mjr in self.panel.major_axis[::2]:
- for mnr in self.panel.minor_axis:
+ for item in self.panel.items:
+ for mjr in self.panel.major_axis[::2]:
+ for mnr in self.panel.minor_axis:
+ with tm.assert_produces_warning(FutureWarning,
+ check_stacklevel=False):
self.panel.set_value(item, mjr, mnr, 1.)
- tm.assert_almost_equal(self.panel[item][mnr][mjr], 1.)
+ tm.assert_almost_equal(self.panel[item][mnr][mjr], 1.)
- # resize
+ # resize
+ with catch_warnings():
+ simplefilter("ignore", FutureWarning)
res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5)
assert isinstance(res, Panel)
assert res is not self.panel
@@ -896,6 +884,7 @@ def test_set_value(self):
self.panel.set_value('a')
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestPanel(PanelTests, CheckIndexing, SafeForLongAndSparse,
SafeForSparse):
@@ -906,314 +895,298 @@ def setup_method(self, method):
self.panel.items.name = None
def test_constructor(self):
- with catch_warnings(record=True):
- # with BlockManager
- wp = Panel(self.panel._data)
- assert wp._data is self.panel._data
-
- wp = Panel(self.panel._data, copy=True)
- assert wp._data is not self.panel._data
- tm.assert_panel_equal(wp, self.panel)
-
- # strings handled prop
- wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]])
- assert wp.values.dtype == np.object_
-
- vals = self.panel.values
-
- # no copy
- wp = Panel(vals)
- assert wp.values is vals
-
- # copy
- wp = Panel(vals, copy=True)
- assert wp.values is not vals
-
- # GH #8285, test when scalar data is used to construct a Panel
- # if dtype is not passed, it should be inferred
- value_and_dtype = [(1, 'int64'), (3.14, 'float64'),
- ('foo', np.object_)]
- for (val, dtype) in value_and_dtype:
- wp = Panel(val, items=range(2), major_axis=range(3),
- minor_axis=range(4))
- vals = np.empty((2, 3, 4), dtype=dtype)
- vals.fill(val)
-
- tm.assert_panel_equal(wp, Panel(vals, dtype=dtype))
-
- # test the case when dtype is passed
- wp = Panel(1, items=range(2), major_axis=range(3),
- minor_axis=range(4),
- dtype='float32')
- vals = np.empty((2, 3, 4), dtype='float32')
- vals.fill(1)
-
- tm.assert_panel_equal(wp, Panel(vals, dtype='float32'))
+ # with BlockManager
+ wp = Panel(self.panel._data)
+ assert wp._data is self.panel._data
+
+ wp = Panel(self.panel._data, copy=True)
+ assert wp._data is not self.panel._data
+ tm.assert_panel_equal(wp, self.panel)
+
+ # strings handled prop
+ wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]])
+ assert wp.values.dtype == np.object_
+
+ vals = self.panel.values
+
+ # no copy
+ wp = Panel(vals)
+ assert wp.values is vals
+
+ # copy
+ wp = Panel(vals, copy=True)
+ assert wp.values is not vals
+
+ # GH #8285, test when scalar data is used to construct a Panel
+ # if dtype is not passed, it should be inferred
+ value_and_dtype = [(1, 'int64'), (3.14, 'float64'),
+ ('foo', np.object_)]
+ for (val, dtype) in value_and_dtype:
+ wp = Panel(val, items=range(2), major_axis=range(3),
+ minor_axis=range(4))
+ vals = np.empty((2, 3, 4), dtype=dtype)
+ vals.fill(val)
+
+ tm.assert_panel_equal(wp, Panel(vals, dtype=dtype))
+
+ # test the case when dtype is passed
+ wp = Panel(1, items=range(2), major_axis=range(3),
+ minor_axis=range(4),
+ dtype='float32')
+ vals = np.empty((2, 3, 4), dtype='float32')
+ vals.fill(1)
+
+ tm.assert_panel_equal(wp, Panel(vals, dtype='float32'))
def test_constructor_cast(self):
- with catch_warnings(record=True):
- zero_filled = self.panel.fillna(0)
+ zero_filled = self.panel.fillna(0)
- casted = Panel(zero_filled._data, dtype=int)
- casted2 = Panel(zero_filled.values, dtype=int)
+ casted = Panel(zero_filled._data, dtype=int)
+ casted2 = Panel(zero_filled.values, dtype=int)
- exp_values = zero_filled.values.astype(int)
- assert_almost_equal(casted.values, exp_values)
- assert_almost_equal(casted2.values, exp_values)
+ exp_values = zero_filled.values.astype(int)
+ assert_almost_equal(casted.values, exp_values)
+ assert_almost_equal(casted2.values, exp_values)
- casted = Panel(zero_filled._data, dtype=np.int32)
- casted2 = Panel(zero_filled.values, dtype=np.int32)
+ casted = Panel(zero_filled._data, dtype=np.int32)
+ casted2 = Panel(zero_filled.values, dtype=np.int32)
- exp_values = zero_filled.values.astype(np.int32)
- assert_almost_equal(casted.values, exp_values)
- assert_almost_equal(casted2.values, exp_values)
+ exp_values = zero_filled.values.astype(np.int32)
+ assert_almost_equal(casted.values, exp_values)
+ assert_almost_equal(casted2.values, exp_values)
- # can't cast
- data = [[['foo', 'bar', 'baz']]]
- pytest.raises(ValueError, Panel, data, dtype=float)
+ # can't cast
+ data = [[['foo', 'bar', 'baz']]]
+ pytest.raises(ValueError, Panel, data, dtype=float)
def test_constructor_empty_panel(self):
- with catch_warnings(record=True):
- empty = Panel()
- assert len(empty.items) == 0
- assert len(empty.major_axis) == 0
- assert len(empty.minor_axis) == 0
+ empty = Panel()
+ assert len(empty.items) == 0
+ assert len(empty.major_axis) == 0
+ assert len(empty.minor_axis) == 0
def test_constructor_observe_dtype(self):
- with catch_warnings(record=True):
- # GH #411
- panel = Panel(items=lrange(3), major_axis=lrange(3),
- minor_axis=lrange(3), dtype='O')
- assert panel.values.dtype == np.object_
+ # GH #411
+ panel = Panel(items=lrange(3), major_axis=lrange(3),
+ minor_axis=lrange(3), dtype='O')
+ assert panel.values.dtype == np.object_
def test_constructor_dtypes(self):
- with catch_warnings(record=True):
- # GH #797
-
- def _check_dtype(panel, dtype):
- for i in panel.items:
- assert panel[i].values.dtype.name == dtype
-
- # only nan holding types allowed here
- for dtype in ['float64', 'float32', 'object']:
- panel = Panel(items=lrange(2), major_axis=lrange(10),
- minor_axis=lrange(5), dtype=dtype)
- _check_dtype(panel, dtype)
-
- for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
- panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype),
- items=lrange(2),
- major_axis=lrange(10),
- minor_axis=lrange(5), dtype=dtype)
- _check_dtype(panel, dtype)
-
- for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
- panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'),
- items=lrange(2),
- major_axis=lrange(10),
- minor_axis=lrange(5), dtype=dtype)
- _check_dtype(panel, dtype)
-
- for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
- panel = Panel(
- np.random.randn(2, 10, 5),
- items=lrange(2), major_axis=lrange(10),
- minor_axis=lrange(5),
- dtype=dtype)
- _check_dtype(panel, dtype)
-
- for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
- df1 = DataFrame(np.random.randn(2, 5),
- index=lrange(2), columns=lrange(5))
- df2 = DataFrame(np.random.randn(2, 5),
- index=lrange(2), columns=lrange(5))
- panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype)
- _check_dtype(panel, dtype)
+ # GH #797
+
+ def _check_dtype(panel, dtype):
+ for i in panel.items:
+ assert panel[i].values.dtype.name == dtype
+
+ # only nan holding types allowed here
+ for dtype in ['float64', 'float32', 'object']:
+ panel = Panel(items=lrange(2), major_axis=lrange(10),
+ minor_axis=lrange(5), dtype=dtype)
+ _check_dtype(panel, dtype)
+
+ for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
+ panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype),
+ items=lrange(2),
+ major_axis=lrange(10),
+ minor_axis=lrange(5), dtype=dtype)
+ _check_dtype(panel, dtype)
+
+ for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
+ panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'),
+ items=lrange(2),
+ major_axis=lrange(10),
+ minor_axis=lrange(5), dtype=dtype)
+ _check_dtype(panel, dtype)
+
+ for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
+ panel = Panel(
+ np.random.randn(2, 10, 5),
+ items=lrange(2), major_axis=lrange(10),
+ minor_axis=lrange(5),
+ dtype=dtype)
+ _check_dtype(panel, dtype)
+
+ for dtype in ['float64', 'float32', 'int64', 'int32', 'object']:
+ df1 = DataFrame(np.random.randn(2, 5),
+ index=lrange(2), columns=lrange(5))
+ df2 = DataFrame(np.random.randn(2, 5),
+ index=lrange(2), columns=lrange(5))
+ panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype)
+ _check_dtype(panel, dtype)
def test_constructor_fails_with_not_3d_input(self):
- with catch_warnings(record=True):
- with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa
- Panel(np.random.randn(10, 2))
+ with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa
+ Panel(np.random.randn(10, 2))
def test_consolidate(self):
- with catch_warnings(record=True):
- assert self.panel._data.is_consolidated()
+ assert self.panel._data.is_consolidated()
- self.panel['foo'] = 1.
- assert not self.panel._data.is_consolidated()
+ self.panel['foo'] = 1.
+ assert not self.panel._data.is_consolidated()
- panel = self.panel._consolidate()
- assert panel._data.is_consolidated()
+ panel = self.panel._consolidate()
+ assert panel._data.is_consolidated()
def test_ctor_dict(self):
- with catch_warnings(record=True):
- itema = self.panel['ItemA']
- itemb = self.panel['ItemB']
+ itema = self.panel['ItemA']
+ itemb = self.panel['ItemB']
- d = {'A': itema, 'B': itemb[5:]}
- d2 = {'A': itema._series, 'B': itemb[5:]._series}
- d3 = {'A': None,
- 'B': DataFrame(itemb[5:]._series),
- 'C': DataFrame(itema._series)}
+ d = {'A': itema, 'B': itemb[5:]}
+ d2 = {'A': itema._series, 'B': itemb[5:]._series}
+ d3 = {'A': None,
+ 'B': DataFrame(itemb[5:]._series),
+ 'C': DataFrame(itema._series)}
- wp = Panel.from_dict(d)
- wp2 = Panel.from_dict(d2) # nested Dict
+ wp = Panel.from_dict(d)
+ wp2 = Panel.from_dict(d2) # nested Dict
- # TODO: unused?
- wp3 = Panel.from_dict(d3) # noqa
+ # TODO: unused?
+ wp3 = Panel.from_dict(d3) # noqa
- tm.assert_index_equal(wp.major_axis, self.panel.major_axis)
- assert_panel_equal(wp, wp2)
+ tm.assert_index_equal(wp.major_axis, self.panel.major_axis)
+ assert_panel_equal(wp, wp2)
- # intersect
- wp = Panel.from_dict(d, intersect=True)
- tm.assert_index_equal(wp.major_axis, itemb.index[5:])
+ # intersect
+ wp = Panel.from_dict(d, intersect=True)
+ tm.assert_index_equal(wp.major_axis, itemb.index[5:])
- # use constructor
- assert_panel_equal(Panel(d), Panel.from_dict(d))
- assert_panel_equal(Panel(d2), Panel.from_dict(d2))
- assert_panel_equal(Panel(d3), Panel.from_dict(d3))
+ # use constructor
+ assert_panel_equal(Panel(d), Panel.from_dict(d))
+ assert_panel_equal(Panel(d2), Panel.from_dict(d2))
+ assert_panel_equal(Panel(d3), Panel.from_dict(d3))
- # a pathological case
- d4 = {'A': None, 'B': None}
+ # a pathological case
+ d4 = {'A': None, 'B': None}
- # TODO: unused?
- wp4 = Panel.from_dict(d4) # noqa
+ # TODO: unused?
+ wp4 = Panel.from_dict(d4) # noqa
- assert_panel_equal(Panel(d4), Panel(items=['A', 'B']))
+ assert_panel_equal(Panel(d4), Panel(items=['A', 'B']))
- # cast
- dcasted = {k: v.reindex(wp.major_axis).fillna(0)
- for k, v in compat.iteritems(d)}
- result = Panel(dcasted, dtype=int)
- expected = Panel({k: v.astype(int)
- for k, v in compat.iteritems(dcasted)})
- assert_panel_equal(result, expected)
+ # cast
+ dcasted = {k: v.reindex(wp.major_axis).fillna(0)
+ for k, v in compat.iteritems(d)}
+ result = Panel(dcasted, dtype=int)
+ expected = Panel({k: v.astype(int)
+ for k, v in compat.iteritems(dcasted)})
+ assert_panel_equal(result, expected)
- result = Panel(dcasted, dtype=np.int32)
- expected = Panel({k: v.astype(np.int32)
- for k, v in compat.iteritems(dcasted)})
- assert_panel_equal(result, expected)
+ result = Panel(dcasted, dtype=np.int32)
+ expected = Panel({k: v.astype(np.int32)
+ for k, v in compat.iteritems(dcasted)})
+ assert_panel_equal(result, expected)
def test_constructor_dict_mixed(self):
- with catch_warnings(record=True):
- data = {k: v.values for k, v in self.panel.iteritems()}
- result = Panel(data)
- exp_major = Index(np.arange(len(self.panel.major_axis)))
- tm.assert_index_equal(result.major_axis, exp_major)
+ data = {k: v.values for k, v in self.panel.iteritems()}
+ result = Panel(data)
+ exp_major = Index(np.arange(len(self.panel.major_axis)))
+ tm.assert_index_equal(result.major_axis, exp_major)
- result = Panel(data, items=self.panel.items,
- major_axis=self.panel.major_axis,
- minor_axis=self.panel.minor_axis)
- assert_panel_equal(result, self.panel)
+ result = Panel(data, items=self.panel.items,
+ major_axis=self.panel.major_axis,
+ minor_axis=self.panel.minor_axis)
+ assert_panel_equal(result, self.panel)
- data['ItemC'] = self.panel['ItemC']
- result = Panel(data)
- assert_panel_equal(result, self.panel)
+ data['ItemC'] = self.panel['ItemC']
+ result = Panel(data)
+ assert_panel_equal(result, self.panel)
- # corner, blow up
- data['ItemB'] = data['ItemB'][:-1]
- pytest.raises(Exception, Panel, data)
+ # corner, blow up
+ data['ItemB'] = data['ItemB'][:-1]
+ pytest.raises(Exception, Panel, data)
- data['ItemB'] = self.panel['ItemB'].values[:, :-1]
- pytest.raises(Exception, Panel, data)
+ data['ItemB'] = self.panel['ItemB'].values[:, :-1]
+ pytest.raises(Exception, Panel, data)
def test_ctor_orderedDict(self):
- with catch_warnings(record=True):
- keys = list(set(np.random.randint(0, 5000, 100)))[
- :50] # unique random int keys
- d = OrderedDict([(k, mkdf(10, 5)) for k in keys])
- p = Panel(d)
- assert list(p.items) == keys
+ keys = list(set(np.random.randint(0, 5000, 100)))[
+ :50] # unique random int keys
+ d = OrderedDict([(k, mkdf(10, 5)) for k in keys])
+ p = Panel(d)
+ assert list(p.items) == keys
- p = Panel.from_dict(d)
- assert list(p.items) == keys
+ p = Panel.from_dict(d)
+ assert list(p.items) == keys
def test_constructor_resize(self):
- with catch_warnings(record=True):
- data = self.panel._data
- items = self.panel.items[:-1]
- major = self.panel.major_axis[:-1]
- minor = self.panel.minor_axis[:-1]
-
- result = Panel(data, items=items,
- major_axis=major, minor_axis=minor)
- expected = self.panel.reindex(
- items=items, major=major, minor=minor)
- assert_panel_equal(result, expected)
-
- result = Panel(data, items=items, major_axis=major)
- expected = self.panel.reindex(items=items, major=major)
- assert_panel_equal(result, expected)
-
- result = Panel(data, items=items)
- expected = self.panel.reindex(items=items)
- assert_panel_equal(result, expected)
-
- result = Panel(data, minor_axis=minor)
- expected = self.panel.reindex(minor=minor)
- assert_panel_equal(result, expected)
+ data = self.panel._data
+ items = self.panel.items[:-1]
+ major = self.panel.major_axis[:-1]
+ minor = self.panel.minor_axis[:-1]
+
+ result = Panel(data, items=items,
+ major_axis=major, minor_axis=minor)
+ expected = self.panel.reindex(
+ items=items, major=major, minor=minor)
+ assert_panel_equal(result, expected)
+
+ result = Panel(data, items=items, major_axis=major)
+ expected = self.panel.reindex(items=items, major=major)
+ assert_panel_equal(result, expected)
+
+ result = Panel(data, items=items)
+ expected = self.panel.reindex(items=items)
+ assert_panel_equal(result, expected)
+
+ result = Panel(data, minor_axis=minor)
+ expected = self.panel.reindex(minor=minor)
+ assert_panel_equal(result, expected)
def test_from_dict_mixed_orient(self):
- with catch_warnings(record=True):
- df = tm.makeDataFrame()
- df['foo'] = 'bar'
+ df = tm.makeDataFrame()
+ df['foo'] = 'bar'
- data = {'k1': df, 'k2': df}
+ data = {'k1': df, 'k2': df}
- panel = Panel.from_dict(data, orient='minor')
+ panel = Panel.from_dict(data, orient='minor')
- assert panel['foo'].values.dtype == np.object_
- assert panel['A'].values.dtype == np.float64
+ assert panel['foo'].values.dtype == np.object_
+ assert panel['A'].values.dtype == np.float64
def test_constructor_error_msgs(self):
- with catch_warnings(record=True):
- def testit():
- Panel(np.random.randn(3, 4, 5),
- lrange(4), lrange(5), lrange(5))
-
- tm.assert_raises_regex(ValueError,
- r"Shape of passed values is "
- r"\(3, 4, 5\), indices imply "
- r"\(4, 5, 5\)",
- testit)
-
- def testit():
- Panel(np.random.randn(3, 4, 5),
- lrange(5), lrange(4), lrange(5))
-
- tm.assert_raises_regex(ValueError,
- r"Shape of passed values is "
- r"\(3, 4, 5\), indices imply "
- r"\(5, 4, 5\)",
- testit)
-
- def testit():
- Panel(np.random.randn(3, 4, 5),
- lrange(5), lrange(5), lrange(4))
-
- tm.assert_raises_regex(ValueError,
- r"Shape of passed values is "
- r"\(3, 4, 5\), indices imply "
- r"\(5, 5, 4\)",
- testit)
+ def testit():
+ Panel(np.random.randn(3, 4, 5),
+ lrange(4), lrange(5), lrange(5))
+
+ tm.assert_raises_regex(ValueError,
+ r"Shape of passed values is "
+ r"\(3, 4, 5\), indices imply "
+ r"\(4, 5, 5\)",
+ testit)
+
+ def testit():
+ Panel(np.random.randn(3, 4, 5),
+ lrange(5), lrange(4), lrange(5))
+
+ tm.assert_raises_regex(ValueError,
+ r"Shape of passed values is "
+ r"\(3, 4, 5\), indices imply "
+ r"\(5, 4, 5\)",
+ testit)
+
+ def testit():
+ Panel(np.random.randn(3, 4, 5),
+ lrange(5), lrange(5), lrange(4))
+
+ tm.assert_raises_regex(ValueError,
+ r"Shape of passed values is "
+ r"\(3, 4, 5\), indices imply "
+ r"\(5, 5, 4\)",
+ testit)
def test_conform(self):
- with catch_warnings(record=True):
- df = self.panel['ItemA'][:-5].filter(items=['A', 'B'])
- conformed = self.panel.conform(df)
+ df = self.panel['ItemA'][:-5].filter(items=['A', 'B'])
+ conformed = self.panel.conform(df)
- tm.assert_index_equal(conformed.index, self.panel.major_axis)
- tm.assert_index_equal(conformed.columns, self.panel.minor_axis)
+ tm.assert_index_equal(conformed.index, self.panel.major_axis)
+ tm.assert_index_equal(conformed.columns, self.panel.minor_axis)
def test_convert_objects(self):
- with catch_warnings(record=True):
-
- # GH 4937
- p = Panel(dict(A=dict(a=['1', '1.0'])))
- expected = Panel(dict(A=dict(a=[1, 1.0])))
- result = p._convert(numeric=True, coerce=True)
- assert_panel_equal(result, expected)
+ # GH 4937
+ p = Panel(dict(A=dict(a=['1', '1.0'])))
+ expected = Panel(dict(A=dict(a=[1, 1.0])))
+ result = p._convert(numeric=True, coerce=True)
+ assert_panel_equal(result, expected)
def test_dtypes(self):
@@ -1222,964 +1195,933 @@ def test_dtypes(self):
assert_series_equal(result, expected)
def test_astype(self):
- with catch_warnings(record=True):
- # GH7271
- data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
- panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f'])
+ # GH7271
+ data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
+ panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f'])
- str_data = np.array([[['1', '2'], ['3', '4']],
- [['5', '6'], ['7', '8']]])
- expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f'])
- assert_panel_equal(panel.astype(str), expected)
+ str_data = np.array([[['1', '2'], ['3', '4']],
+ [['5', '6'], ['7', '8']]])
+ expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f'])
+ assert_panel_equal(panel.astype(str), expected)
- pytest.raises(NotImplementedError, panel.astype, {0: str})
+ pytest.raises(NotImplementedError, panel.astype, {0: str})
def test_apply(self):
- with catch_warnings(record=True):
- # GH1148
-
- # ufunc
- applied = self.panel.apply(np.sqrt)
- with np.errstate(invalid='ignore'):
- expected = np.sqrt(self.panel.values)
- assert_almost_equal(applied.values, expected)
-
- # ufunc same shape
- result = self.panel.apply(lambda x: x * 2, axis='items')
- expected = self.panel * 2
- assert_panel_equal(result, expected)
- result = self.panel.apply(lambda x: x * 2, axis='major_axis')
- expected = self.panel * 2
- assert_panel_equal(result, expected)
- result = self.panel.apply(lambda x: x * 2, axis='minor_axis')
- expected = self.panel * 2
- assert_panel_equal(result, expected)
-
- # reduction to DataFrame
- result = self.panel.apply(lambda x: x.dtype, axis='items')
- expected = DataFrame(np.dtype('float64'),
- index=self.panel.major_axis,
- columns=self.panel.minor_axis)
- assert_frame_equal(result, expected)
- result = self.panel.apply(lambda x: x.dtype, axis='major_axis')
- expected = DataFrame(np.dtype('float64'),
- index=self.panel.minor_axis,
- columns=self.panel.items)
- assert_frame_equal(result, expected)
- result = self.panel.apply(lambda x: x.dtype, axis='minor_axis')
- expected = DataFrame(np.dtype('float64'),
- index=self.panel.major_axis,
- columns=self.panel.items)
- assert_frame_equal(result, expected)
-
- # reductions via other dims
- expected = self.panel.sum(0)
- result = self.panel.apply(lambda x: x.sum(), axis='items')
- assert_frame_equal(result, expected)
- expected = self.panel.sum(1)
- result = self.panel.apply(lambda x: x.sum(), axis='major_axis')
- assert_frame_equal(result, expected)
- expected = self.panel.sum(2)
- result = self.panel.apply(lambda x: x.sum(), axis='minor_axis')
- assert_frame_equal(result, expected)
-
- # pass kwargs
- result = self.panel.apply(
- lambda x, y: x.sum() + y, axis='items', y=5)
- expected = self.panel.sum(0) + 5
- assert_frame_equal(result, expected)
+ # GH1148
+
+ # ufunc
+ applied = self.panel.apply(np.sqrt)
+ with np.errstate(invalid='ignore'):
+ expected = np.sqrt(self.panel.values)
+ assert_almost_equal(applied.values, expected)
+
+ # ufunc same shape
+ result = self.panel.apply(lambda x: x * 2, axis='items')
+ expected = self.panel * 2
+ assert_panel_equal(result, expected)
+ result = self.panel.apply(lambda x: x * 2, axis='major_axis')
+ expected = self.panel * 2
+ assert_panel_equal(result, expected)
+ result = self.panel.apply(lambda x: x * 2, axis='minor_axis')
+ expected = self.panel * 2
+ assert_panel_equal(result, expected)
+
+ # reduction to DataFrame
+ result = self.panel.apply(lambda x: x.dtype, axis='items')
+ expected = DataFrame(np.dtype('float64'),
+ index=self.panel.major_axis,
+ columns=self.panel.minor_axis)
+ assert_frame_equal(result, expected)
+ result = self.panel.apply(lambda x: x.dtype, axis='major_axis')
+ expected = DataFrame(np.dtype('float64'),
+ index=self.panel.minor_axis,
+ columns=self.panel.items)
+ assert_frame_equal(result, expected)
+ result = self.panel.apply(lambda x: x.dtype, axis='minor_axis')
+ expected = DataFrame(np.dtype('float64'),
+ index=self.panel.major_axis,
+ columns=self.panel.items)
+ assert_frame_equal(result, expected)
+
+ # reductions via other dims
+ expected = self.panel.sum(0)
+ result = self.panel.apply(lambda x: x.sum(), axis='items')
+ assert_frame_equal(result, expected)
+ expected = self.panel.sum(1)
+ result = self.panel.apply(lambda x: x.sum(), axis='major_axis')
+ assert_frame_equal(result, expected)
+ expected = self.panel.sum(2)
+ result = self.panel.apply(lambda x: x.sum(), axis='minor_axis')
+ assert_frame_equal(result, expected)
+
+ # pass kwargs
+ result = self.panel.apply(
+ lambda x, y: x.sum() + y, axis='items', y=5)
+ expected = self.panel.sum(0) + 5
+ assert_frame_equal(result, expected)
def test_apply_slabs(self):
- with catch_warnings(record=True):
-
- # same shape as original
- result = self.panel.apply(lambda x: x * 2,
- axis=['items', 'major_axis'])
- expected = (self.panel * 2).transpose('minor_axis', 'major_axis',
- 'items')
- assert_panel_equal(result, expected)
- result = self.panel.apply(lambda x: x * 2,
- axis=['major_axis', 'items'])
- assert_panel_equal(result, expected)
-
- result = self.panel.apply(lambda x: x * 2,
- axis=['items', 'minor_axis'])
- expected = (self.panel * 2).transpose('major_axis', 'minor_axis',
- 'items')
- assert_panel_equal(result, expected)
- result = self.panel.apply(lambda x: x * 2,
- axis=['minor_axis', 'items'])
- assert_panel_equal(result, expected)
-
- result = self.panel.apply(lambda x: x * 2,
- axis=['major_axis', 'minor_axis'])
- expected = self.panel * 2
- assert_panel_equal(result, expected)
- result = self.panel.apply(lambda x: x * 2,
- axis=['minor_axis', 'major_axis'])
- assert_panel_equal(result, expected)
-
- # reductions
- result = self.panel.apply(lambda x: x.sum(0), axis=[
- 'items', 'major_axis'
- ])
- expected = self.panel.sum(1).T
- assert_frame_equal(result, expected)
+
+ # same shape as original
+ result = self.panel.apply(lambda x: x * 2,
+ axis=['items', 'major_axis'])
+ expected = (self.panel * 2).transpose('minor_axis', 'major_axis',
+ 'items')
+ assert_panel_equal(result, expected)
+ result = self.panel.apply(lambda x: x * 2,
+ axis=['major_axis', 'items'])
+ assert_panel_equal(result, expected)
+
+ result = self.panel.apply(lambda x: x * 2,
+ axis=['items', 'minor_axis'])
+ expected = (self.panel * 2).transpose('major_axis', 'minor_axis',
+ 'items')
+ assert_panel_equal(result, expected)
+ result = self.panel.apply(lambda x: x * 2,
+ axis=['minor_axis', 'items'])
+ assert_panel_equal(result, expected)
+
+ result = self.panel.apply(lambda x: x * 2,
+ axis=['major_axis', 'minor_axis'])
+ expected = self.panel * 2
+ assert_panel_equal(result, expected)
+ result = self.panel.apply(lambda x: x * 2,
+ axis=['minor_axis', 'major_axis'])
+ assert_panel_equal(result, expected)
+
+ # reductions
+ result = self.panel.apply(lambda x: x.sum(0), axis=[
+ 'items', 'major_axis'
+ ])
+ expected = self.panel.sum(1).T
+ assert_frame_equal(result, expected)
+
+ result = self.panel.apply(lambda x: x.sum(1), axis=[
+ 'items', 'major_axis'
+ ])
+ expected = self.panel.sum(0)
+ assert_frame_equal(result, expected)
+
+ # transforms
+ f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T
# make sure that we don't trigger any warnings
- with catch_warnings(record=True):
- result = self.panel.apply(lambda x: x.sum(1), axis=[
- 'items', 'major_axis'
- ])
- expected = self.panel.sum(0)
- assert_frame_equal(result, expected)
-
- # transforms
- f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T
-
- # make sure that we don't trigger any warnings
- result = self.panel.apply(f, axis=['items', 'major_axis'])
- expected = Panel({ax: f(self.panel.loc[:, :, ax])
- for ax in self.panel.minor_axis})
- assert_panel_equal(result, expected)
-
- result = self.panel.apply(f, axis=['major_axis', 'minor_axis'])
- expected = Panel({ax: f(self.panel.loc[ax])
- for ax in self.panel.items})
- assert_panel_equal(result, expected)
-
- result = self.panel.apply(f, axis=['minor_axis', 'items'])
- expected = Panel({ax: f(self.panel.loc[:, ax])
- for ax in self.panel.major_axis})
- assert_panel_equal(result, expected)
-
- # with multi-indexes
- # GH7469
- index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), (
- 'two', 'a'), ('two', 'b')])
- dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape(
- 4, 3), columns=list("ABC"), index=index)
- dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape(
- 4, 3), columns=list("ABC"), index=index)
- p = Panel({'f': dfa, 'g': dfb})
- result = p.apply(lambda x: x.sum(), axis=0)
-
- # on windows this will be in32
- result = result.astype('int64')
- expected = p.sum(0)
- assert_frame_equal(result, expected)
+ result = self.panel.apply(f, axis=['items', 'major_axis'])
+ expected = Panel({ax: f(self.panel.loc[:, :, ax])
+ for ax in self.panel.minor_axis})
+ assert_panel_equal(result, expected)
+
+ result = self.panel.apply(f, axis=['major_axis', 'minor_axis'])
+ expected = Panel({ax: f(self.panel.loc[ax])
+ for ax in self.panel.items})
+ assert_panel_equal(result, expected)
+
+ result = self.panel.apply(f, axis=['minor_axis', 'items'])
+ expected = Panel({ax: f(self.panel.loc[:, ax])
+ for ax in self.panel.major_axis})
+ assert_panel_equal(result, expected)
+
+ # with multi-indexes
+ # GH7469
+ index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), (
+ 'two', 'a'), ('two', 'b')])
+ dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape(
+ 4, 3), columns=list("ABC"), index=index)
+ dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape(
+ 4, 3), columns=list("ABC"), index=index)
+ p = Panel({'f': dfa, 'g': dfb})
+ result = p.apply(lambda x: x.sum(), axis=0)
+
+ # on windows this will be in32
+ result = result.astype('int64')
+ expected = p.sum(0)
+ assert_frame_equal(result, expected)
def test_apply_no_or_zero_ndim(self):
- with catch_warnings(record=True):
- # GH10332
- self.panel = Panel(np.random.rand(5, 5, 5))
+ # GH10332
+ self.panel = Panel(np.random.rand(5, 5, 5))
- result_int = self.panel.apply(lambda df: 0, axis=[1, 2])
- result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2])
- result_int64 = self.panel.apply(
- lambda df: np.int64(0), axis=[1, 2])
- result_float64 = self.panel.apply(lambda df: np.float64(0.0),
- axis=[1, 2])
+ result_int = self.panel.apply(lambda df: 0, axis=[1, 2])
+ result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2])
+ result_int64 = self.panel.apply(
+ lambda df: np.int64(0), axis=[1, 2])
+ result_float64 = self.panel.apply(lambda df: np.float64(0.0),
+ axis=[1, 2])
- expected_int = expected_int64 = Series([0] * 5)
- expected_float = expected_float64 = Series([0.0] * 5)
+ expected_int = expected_int64 = Series([0] * 5)
+ expected_float = expected_float64 = Series([0.0] * 5)
- assert_series_equal(result_int, expected_int)
- assert_series_equal(result_int64, expected_int64)
- assert_series_equal(result_float, expected_float)
- assert_series_equal(result_float64, expected_float64)
+ assert_series_equal(result_int, expected_int)
+ assert_series_equal(result_int64, expected_int64)
+ assert_series_equal(result_float, expected_float)
+ assert_series_equal(result_float64, expected_float64)
def test_reindex(self):
- with catch_warnings(record=True):
- ref = self.panel['ItemB']
+ ref = self.panel['ItemB']
- # items
- result = self.panel.reindex(items=['ItemA', 'ItemB'])
- assert_frame_equal(result['ItemB'], ref)
+ # items
+ result = self.panel.reindex(items=['ItemA', 'ItemB'])
+ assert_frame_equal(result['ItemB'], ref)
- # major
- new_major = list(self.panel.major_axis[:10])
- result = self.panel.reindex(major=new_major)
- assert_frame_equal(result['ItemB'], ref.reindex(index=new_major))
+ # major
+ new_major = list(self.panel.major_axis[:10])
+ result = self.panel.reindex(major=new_major)
+ assert_frame_equal(result['ItemB'], ref.reindex(index=new_major))
- # raise exception put both major and major_axis
- pytest.raises(Exception, self.panel.reindex,
- major_axis=new_major,
- major=new_major)
+ # raise exception put both major and major_axis
+ pytest.raises(Exception, self.panel.reindex,
+ major_axis=new_major,
+ major=new_major)
- # minor
- new_minor = list(self.panel.minor_axis[:2])
- result = self.panel.reindex(minor=new_minor)
- assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor))
+ # minor
+ new_minor = list(self.panel.minor_axis[:2])
+ result = self.panel.reindex(minor=new_minor)
+ assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor))
- # raise exception put both major and major_axis
- pytest.raises(Exception, self.panel.reindex,
- minor_axis=new_minor,
- minor=new_minor)
+ # raise exception put both major and major_axis
+ pytest.raises(Exception, self.panel.reindex,
+ minor_axis=new_minor,
+ minor=new_minor)
- # this ok
- result = self.panel.reindex()
- assert_panel_equal(result, self.panel)
- assert result is not self.panel
+ # this ok
+ result = self.panel.reindex()
+ assert_panel_equal(result, self.panel)
+ assert result is not self.panel
- # with filling
- smaller_major = self.panel.major_axis[::5]
- smaller = self.panel.reindex(major=smaller_major)
+ # with filling
+ smaller_major = self.panel.major_axis[::5]
+ smaller = self.panel.reindex(major=smaller_major)
- larger = smaller.reindex(major=self.panel.major_axis, method='pad')
+ larger = smaller.reindex(major=self.panel.major_axis, method='pad')
- assert_frame_equal(larger.major_xs(self.panel.major_axis[1]),
- smaller.major_xs(smaller_major[0]))
+ assert_frame_equal(larger.major_xs(self.panel.major_axis[1]),
+ smaller.major_xs(smaller_major[0]))
- # don't necessarily copy
- result = self.panel.reindex(
- major=self.panel.major_axis, copy=False)
- assert_panel_equal(result, self.panel)
- assert result is self.panel
+ # don't necessarily copy
+ result = self.panel.reindex(
+ major=self.panel.major_axis, copy=False)
+ assert_panel_equal(result, self.panel)
+ assert result is self.panel
def test_reindex_axis_style(self):
- with catch_warnings(record=True):
- panel = Panel(np.random.rand(5, 5, 5))
- expected0 = Panel(panel.values).iloc[[0, 1]]
- expected1 = Panel(panel.values).iloc[:, [0, 1]]
- expected2 = Panel(panel.values).iloc[:, :, [0, 1]]
+ panel = Panel(np.random.rand(5, 5, 5))
+ expected0 = Panel(panel.values).iloc[[0, 1]]
+ expected1 = Panel(panel.values).iloc[:, [0, 1]]
+ expected2 = Panel(panel.values).iloc[:, :, [0, 1]]
- result = panel.reindex([0, 1], axis=0)
- assert_panel_equal(result, expected0)
+ result = panel.reindex([0, 1], axis=0)
+ assert_panel_equal(result, expected0)
- result = panel.reindex([0, 1], axis=1)
- assert_panel_equal(result, expected1)
+ result = panel.reindex([0, 1], axis=1)
+ assert_panel_equal(result, expected1)
- result = panel.reindex([0, 1], axis=2)
- assert_panel_equal(result, expected2)
+ result = panel.reindex([0, 1], axis=2)
+ assert_panel_equal(result, expected2)
- result = panel.reindex([0, 1], axis=2)
- assert_panel_equal(result, expected2)
+ result = panel.reindex([0, 1], axis=2)
+ assert_panel_equal(result, expected2)
def test_reindex_multi(self):
- with catch_warnings(record=True):
-
- # with and without copy full reindexing
- result = self.panel.reindex(
- items=self.panel.items,
- major=self.panel.major_axis,
- minor=self.panel.minor_axis, copy=False)
-
- assert result.items is self.panel.items
- assert result.major_axis is self.panel.major_axis
- assert result.minor_axis is self.panel.minor_axis
-
- result = self.panel.reindex(
- items=self.panel.items,
- major=self.panel.major_axis,
- minor=self.panel.minor_axis, copy=False)
- assert_panel_equal(result, self.panel)
-
- # multi-axis indexing consistency
- # GH 5900
- df = DataFrame(np.random.randn(4, 3))
- p = Panel({'Item1': df})
- expected = Panel({'Item1': df})
- expected['Item2'] = np.nan
-
- items = ['Item1', 'Item2']
- major_axis = np.arange(4)
- minor_axis = np.arange(3)
-
- results = []
- results.append(p.reindex(items=items, major_axis=major_axis,
- copy=True))
- results.append(p.reindex(items=items, major_axis=major_axis,
- copy=False))
- results.append(p.reindex(items=items, minor_axis=minor_axis,
- copy=True))
- results.append(p.reindex(items=items, minor_axis=minor_axis,
- copy=False))
- results.append(p.reindex(items=items, major_axis=major_axis,
- minor_axis=minor_axis, copy=True))
- results.append(p.reindex(items=items, major_axis=major_axis,
- minor_axis=minor_axis, copy=False))
-
- for i, r in enumerate(results):
- assert_panel_equal(expected, r)
+
+ # with and without copy full reindexing
+ result = self.panel.reindex(
+ items=self.panel.items,
+ major=self.panel.major_axis,
+ minor=self.panel.minor_axis, copy=False)
+
+ assert result.items is self.panel.items
+ assert result.major_axis is self.panel.major_axis
+ assert result.minor_axis is self.panel.minor_axis
+
+ result = self.panel.reindex(
+ items=self.panel.items,
+ major=self.panel.major_axis,
+ minor=self.panel.minor_axis, copy=False)
+ assert_panel_equal(result, self.panel)
+
+ # multi-axis indexing consistency
+ # GH 5900
+ df = DataFrame(np.random.randn(4, 3))
+ p = Panel({'Item1': df})
+ expected = Panel({'Item1': df})
+ expected['Item2'] = np.nan
+
+ items = ['Item1', 'Item2']
+ major_axis = np.arange(4)
+ minor_axis = np.arange(3)
+
+ results = []
+ results.append(p.reindex(items=items, major_axis=major_axis,
+ copy=True))
+ results.append(p.reindex(items=items, major_axis=major_axis,
+ copy=False))
+ results.append(p.reindex(items=items, minor_axis=minor_axis,
+ copy=True))
+ results.append(p.reindex(items=items, minor_axis=minor_axis,
+ copy=False))
+ results.append(p.reindex(items=items, major_axis=major_axis,
+ minor_axis=minor_axis, copy=True))
+ results.append(p.reindex(items=items, major_axis=major_axis,
+ minor_axis=minor_axis, copy=False))
+
+ for i, r in enumerate(results):
+ assert_panel_equal(expected, r)
def test_reindex_like(self):
- with catch_warnings(record=True):
- # reindex_like
- smaller = self.panel.reindex(items=self.panel.items[:-1],
- major=self.panel.major_axis[:-1],
- minor=self.panel.minor_axis[:-1])
- smaller_like = self.panel.reindex_like(smaller)
- assert_panel_equal(smaller, smaller_like)
+ # reindex_like
+ smaller = self.panel.reindex(items=self.panel.items[:-1],
+ major=self.panel.major_axis[:-1],
+ minor=self.panel.minor_axis[:-1])
+ smaller_like = self.panel.reindex_like(smaller)
+ assert_panel_equal(smaller, smaller_like)
def test_take(self):
- with catch_warnings(record=True):
- # axis == 0
- result = self.panel.take([2, 0, 1], axis=0)
- expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB'])
- assert_panel_equal(result, expected)
+ # axis == 0
+ result = self.panel.take([2, 0, 1], axis=0)
+ expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB'])
+ assert_panel_equal(result, expected)
- # axis >= 1
- result = self.panel.take([3, 0, 1, 2], axis=2)
- expected = self.panel.reindex(minor=['D', 'A', 'B', 'C'])
- assert_panel_equal(result, expected)
+ # axis >= 1
+ result = self.panel.take([3, 0, 1, 2], axis=2)
+ expected = self.panel.reindex(minor=['D', 'A', 'B', 'C'])
+ assert_panel_equal(result, expected)
- # neg indices ok
- expected = self.panel.reindex(minor=['D', 'D', 'B', 'C'])
- result = self.panel.take([3, -1, 1, 2], axis=2)
- assert_panel_equal(result, expected)
+ # neg indices ok
+ expected = self.panel.reindex(minor=['D', 'D', 'B', 'C'])
+ result = self.panel.take([3, -1, 1, 2], axis=2)
+ assert_panel_equal(result, expected)
- pytest.raises(Exception, self.panel.take, [4, 0, 1, 2], axis=2)
+ pytest.raises(Exception, self.panel.take, [4, 0, 1, 2], axis=2)
def test_sort_index(self):
- with catch_warnings(record=True):
- import random
-
- ritems = list(self.panel.items)
- rmajor = list(self.panel.major_axis)
- rminor = list(self.panel.minor_axis)
- random.shuffle(ritems)
- random.shuffle(rmajor)
- random.shuffle(rminor)
-
- random_order = self.panel.reindex(items=ritems)
- sorted_panel = random_order.sort_index(axis=0)
- assert_panel_equal(sorted_panel, self.panel)
-
- # descending
- random_order = self.panel.reindex(items=ritems)
- sorted_panel = random_order.sort_index(axis=0, ascending=False)
- assert_panel_equal(
- sorted_panel,
- self.panel.reindex(items=self.panel.items[::-1]))
-
- random_order = self.panel.reindex(major=rmajor)
- sorted_panel = random_order.sort_index(axis=1)
- assert_panel_equal(sorted_panel, self.panel)
-
- random_order = self.panel.reindex(minor=rminor)
- sorted_panel = random_order.sort_index(axis=2)
- assert_panel_equal(sorted_panel, self.panel)
+ import random
+
+ ritems = list(self.panel.items)
+ rmajor = list(self.panel.major_axis)
+ rminor = list(self.panel.minor_axis)
+ random.shuffle(ritems)
+ random.shuffle(rmajor)
+ random.shuffle(rminor)
+
+ random_order = self.panel.reindex(items=ritems)
+ sorted_panel = random_order.sort_index(axis=0)
+ assert_panel_equal(sorted_panel, self.panel)
+
+ # descending
+ random_order = self.panel.reindex(items=ritems)
+ sorted_panel = random_order.sort_index(axis=0, ascending=False)
+ assert_panel_equal(
+ sorted_panel,
+ self.panel.reindex(items=self.panel.items[::-1]))
+
+ random_order = self.panel.reindex(major=rmajor)
+ sorted_panel = random_order.sort_index(axis=1)
+ assert_panel_equal(sorted_panel, self.panel)
+
+ random_order = self.panel.reindex(minor=rminor)
+ sorted_panel = random_order.sort_index(axis=2)
+ assert_panel_equal(sorted_panel, self.panel)
def test_fillna(self):
- with catch_warnings(record=True):
- filled = self.panel.fillna(0)
- assert np.isfinite(filled.values).all()
-
- filled = self.panel.fillna(method='backfill')
- assert_frame_equal(filled['ItemA'],
- self.panel['ItemA'].fillna(method='backfill'))
-
- panel = self.panel.copy()
- panel['str'] = 'foo'
-
- filled = panel.fillna(method='backfill')
- assert_frame_equal(filled['ItemA'],
- panel['ItemA'].fillna(method='backfill'))
-
- empty = self.panel.reindex(items=[])
- filled = empty.fillna(0)
- assert_panel_equal(filled, empty)
-
- pytest.raises(ValueError, self.panel.fillna)
- pytest.raises(ValueError, self.panel.fillna, 5, method='ffill')
-
- pytest.raises(TypeError, self.panel.fillna, [1, 2])
- pytest.raises(TypeError, self.panel.fillna, (1, 2))
-
- # limit not implemented when only value is specified
- p = Panel(np.random.randn(3, 4, 5))
- p.iloc[0:2, 0:2, 0:2] = np.nan
- pytest.raises(NotImplementedError,
- lambda: p.fillna(999, limit=1))
-
- # Test in place fillNA
- # Expected result
- expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]],
- items=['a', 'b'], minor_axis=['x', 'y'],
- dtype=np.float64)
- # method='ffill'
- p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]],
- items=['a', 'b'], minor_axis=['x', 'y'],
- dtype=np.float64)
- p1.fillna(method='ffill', inplace=True)
- assert_panel_equal(p1, expected)
-
- # method='bfill'
- p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]],
- items=['a', 'b'], minor_axis=['x', 'y'],
- dtype=np.float64)
- p2.fillna(method='bfill', inplace=True)
- assert_panel_equal(p2, expected)
+ filled = self.panel.fillna(0)
+ assert np.isfinite(filled.values).all()
+
+ filled = self.panel.fillna(method='backfill')
+ assert_frame_equal(filled['ItemA'],
+ self.panel['ItemA'].fillna(method='backfill'))
+
+ panel = self.panel.copy()
+ panel['str'] = 'foo'
+
+ filled = panel.fillna(method='backfill')
+ assert_frame_equal(filled['ItemA'],
+ panel['ItemA'].fillna(method='backfill'))
+
+ empty = self.panel.reindex(items=[])
+ filled = empty.fillna(0)
+ assert_panel_equal(filled, empty)
+
+ pytest.raises(ValueError, self.panel.fillna)
+ pytest.raises(ValueError, self.panel.fillna, 5, method='ffill')
+
+ pytest.raises(TypeError, self.panel.fillna, [1, 2])
+ pytest.raises(TypeError, self.panel.fillna, (1, 2))
+
+ # limit not implemented when only value is specified
+ p = Panel(np.random.randn(3, 4, 5))
+ p.iloc[0:2, 0:2, 0:2] = np.nan
+ pytest.raises(NotImplementedError,
+ lambda: p.fillna(999, limit=1))
+
+ # Test in place fillNA
+ # Expected result
+ expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]],
+ items=['a', 'b'], minor_axis=['x', 'y'],
+ dtype=np.float64)
+ # method='ffill'
+ p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]],
+ items=['a', 'b'], minor_axis=['x', 'y'],
+ dtype=np.float64)
+ p1.fillna(method='ffill', inplace=True)
+ assert_panel_equal(p1, expected)
+
+ # method='bfill'
+ p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]],
+ items=['a', 'b'], minor_axis=['x', 'y'],
+ dtype=np.float64)
+ p2.fillna(method='bfill', inplace=True)
+ assert_panel_equal(p2, expected)
def test_ffill_bfill(self):
- with catch_warnings(record=True):
- assert_panel_equal(self.panel.ffill(),
- self.panel.fillna(method='ffill'))
- assert_panel_equal(self.panel.bfill(),
- self.panel.fillna(method='bfill'))
+ assert_panel_equal(self.panel.ffill(),
+ self.panel.fillna(method='ffill'))
+ assert_panel_equal(self.panel.bfill(),
+ self.panel.fillna(method='bfill'))
def test_truncate_fillna_bug(self):
- with catch_warnings(record=True):
- # #1823
- result = self.panel.truncate(before=None, after=None, axis='items')
+ # #1823
+ result = self.panel.truncate(before=None, after=None, axis='items')
- # it works!
- result.fillna(value=0.0)
+ # it works!
+ result.fillna(value=0.0)
def test_swapaxes(self):
- with catch_warnings(record=True):
- result = self.panel.swapaxes('items', 'minor')
- assert result.items is self.panel.minor_axis
+ result = self.panel.swapaxes('items', 'minor')
+ assert result.items is self.panel.minor_axis
- result = self.panel.swapaxes('items', 'major')
- assert result.items is self.panel.major_axis
+ result = self.panel.swapaxes('items', 'major')
+ assert result.items is self.panel.major_axis
- result = self.panel.swapaxes('major', 'minor')
- assert result.major_axis is self.panel.minor_axis
+ result = self.panel.swapaxes('major', 'minor')
+ assert result.major_axis is self.panel.minor_axis
- panel = self.panel.copy()
- result = panel.swapaxes('major', 'minor')
- panel.values[0, 0, 1] = np.nan
- expected = panel.swapaxes('major', 'minor')
- assert_panel_equal(result, expected)
+ panel = self.panel.copy()
+ result = panel.swapaxes('major', 'minor')
+ panel.values[0, 0, 1] = np.nan
+ expected = panel.swapaxes('major', 'minor')
+ assert_panel_equal(result, expected)
- # this should also work
- result = self.panel.swapaxes(0, 1)
- assert result.items is self.panel.major_axis
+ # this should also work
+ result = self.panel.swapaxes(0, 1)
+ assert result.items is self.panel.major_axis
- # this works, but return a copy
- result = self.panel.swapaxes('items', 'items')
- assert_panel_equal(self.panel, result)
- assert id(self.panel) != id(result)
+ # this works, but return a copy
+ result = self.panel.swapaxes('items', 'items')
+ assert_panel_equal(self.panel, result)
+ assert id(self.panel) != id(result)
def test_transpose(self):
- with catch_warnings(record=True):
- result = self.panel.transpose('minor', 'major', 'items')
- expected = self.panel.swapaxes('items', 'minor')
- assert_panel_equal(result, expected)
-
- # test kwargs
- result = self.panel.transpose(items='minor', major='major',
- minor='items')
- expected = self.panel.swapaxes('items', 'minor')
- assert_panel_equal(result, expected)
-
- # text mixture of args
- result = self.panel.transpose(
- 'minor', major='major', minor='items')
- expected = self.panel.swapaxes('items', 'minor')
- assert_panel_equal(result, expected)
-
- result = self.panel.transpose('minor',
- 'major',
- minor='items')
- expected = self.panel.swapaxes('items', 'minor')
- assert_panel_equal(result, expected)
-
- # duplicate axes
- with tm.assert_raises_regex(TypeError,
- 'not enough/duplicate arguments'):
- self.panel.transpose('minor', maj='major', minor='items')
-
- with tm.assert_raises_regex(ValueError,
- 'repeated axis in transpose'):
- self.panel.transpose('minor', 'major', major='minor',
- minor='items')
-
- result = self.panel.transpose(2, 1, 0)
- assert_panel_equal(result, expected)
-
- result = self.panel.transpose('minor', 'items', 'major')
- expected = self.panel.swapaxes('items', 'minor')
- expected = expected.swapaxes('major', 'minor')
- assert_panel_equal(result, expected)
-
- result = self.panel.transpose(2, 0, 1)
- assert_panel_equal(result, expected)
-
- pytest.raises(ValueError, self.panel.transpose, 0, 0, 1)
+ result = self.panel.transpose('minor', 'major', 'items')
+ expected = self.panel.swapaxes('items', 'minor')
+ assert_panel_equal(result, expected)
+
+ # test kwargs
+ result = self.panel.transpose(items='minor', major='major',
+ minor='items')
+ expected = self.panel.swapaxes('items', 'minor')
+ assert_panel_equal(result, expected)
+
+ # text mixture of args
+ result = self.panel.transpose(
+ 'minor', major='major', minor='items')
+ expected = self.panel.swapaxes('items', 'minor')
+ assert_panel_equal(result, expected)
+
+ result = self.panel.transpose('minor',
+ 'major',
+ minor='items')
+ expected = self.panel.swapaxes('items', 'minor')
+ assert_panel_equal(result, expected)
+
+ # duplicate axes
+ with tm.assert_raises_regex(TypeError,
+ 'not enough/duplicate arguments'):
+ self.panel.transpose('minor', maj='major', minor='items')
+
+ with tm.assert_raises_regex(ValueError,
+ 'repeated axis in transpose'):
+ self.panel.transpose('minor', 'major', major='minor',
+ minor='items')
+
+ result = self.panel.transpose(2, 1, 0)
+ assert_panel_equal(result, expected)
+
+ result = self.panel.transpose('minor', 'items', 'major')
+ expected = self.panel.swapaxes('items', 'minor')
+ expected = expected.swapaxes('major', 'minor')
+ assert_panel_equal(result, expected)
+
+ result = self.panel.transpose(2, 0, 1)
+ assert_panel_equal(result, expected)
+
+ pytest.raises(ValueError, self.panel.transpose, 0, 0, 1)
def test_transpose_copy(self):
- with catch_warnings(record=True):
- panel = self.panel.copy()
- result = panel.transpose(2, 0, 1, copy=True)
- expected = panel.swapaxes('items', 'minor')
- expected = expected.swapaxes('major', 'minor')
- assert_panel_equal(result, expected)
+ panel = self.panel.copy()
+ result = panel.transpose(2, 0, 1, copy=True)
+ expected = panel.swapaxes('items', 'minor')
+ expected = expected.swapaxes('major', 'minor')
+ assert_panel_equal(result, expected)
- panel.values[0, 1, 1] = np.nan
- assert notna(result.values[1, 0, 1])
+ panel.values[0, 1, 1] = np.nan
+ assert notna(result.values[1, 0, 1])
def test_to_frame(self):
- with catch_warnings(record=True):
- # filtered
- filtered = self.panel.to_frame()
- expected = self.panel.to_frame().dropna(how='any')
- assert_frame_equal(filtered, expected)
-
- # unfiltered
- unfiltered = self.panel.to_frame(filter_observations=False)
- assert_panel_equal(unfiltered.to_panel(), self.panel)
-
- # names
- assert unfiltered.index.names == ('major', 'minor')
-
- # unsorted, round trip
- df = self.panel.to_frame(filter_observations=False)
- unsorted = df.take(np.random.permutation(len(df)))
- pan = unsorted.to_panel()
- assert_panel_equal(pan, self.panel)
-
- # preserve original index names
- df = DataFrame(np.random.randn(6, 2),
- index=[['a', 'a', 'b', 'b', 'c', 'c'],
- [0, 1, 0, 1, 0, 1]],
- columns=['one', 'two'])
- df.index.names = ['foo', 'bar']
- df.columns.name = 'baz'
-
- rdf = df.to_panel().to_frame()
- assert rdf.index.names == df.index.names
- assert rdf.columns.names == df.columns.names
+ # filtered
+ filtered = self.panel.to_frame()
+ expected = self.panel.to_frame().dropna(how='any')
+ assert_frame_equal(filtered, expected)
+
+ # unfiltered
+ unfiltered = self.panel.to_frame(filter_observations=False)
+ assert_panel_equal(unfiltered.to_panel(), self.panel)
+
+ # names
+ assert unfiltered.index.names == ('major', 'minor')
+
+ # unsorted, round trip
+ df = self.panel.to_frame(filter_observations=False)
+ unsorted = df.take(np.random.permutation(len(df)))
+ pan = unsorted.to_panel()
+ assert_panel_equal(pan, self.panel)
+
+ # preserve original index names
+ df = DataFrame(np.random.randn(6, 2),
+ index=[['a', 'a', 'b', 'b', 'c', 'c'],
+ [0, 1, 0, 1, 0, 1]],
+ columns=['one', 'two'])
+ df.index.names = ['foo', 'bar']
+ df.columns.name = 'baz'
+
+ rdf = df.to_panel().to_frame()
+ assert rdf.index.names == df.index.names
+ assert rdf.columns.names == df.columns.names
def test_to_frame_mixed(self):
- with catch_warnings(record=True):
- panel = self.panel.fillna(0)
- panel['str'] = 'foo'
- panel['bool'] = panel['ItemA'] > 0
-
- lp = panel.to_frame()
- wp = lp.to_panel()
- assert wp['bool'].values.dtype == np.bool_
- # Previously, this was mutating the underlying
- # index and changing its name
- assert_frame_equal(wp['bool'], panel['bool'], check_names=False)
-
- # GH 8704
- # with categorical
- df = panel.to_frame()
- df['category'] = df['str'].astype('category')
-
- # to_panel
- # TODO: this converts back to object
- p = df.to_panel()
- expected = panel.copy()
- expected['category'] = 'foo'
- assert_panel_equal(p, expected)
+ panel = self.panel.fillna(0)
+ panel['str'] = 'foo'
+ panel['bool'] = panel['ItemA'] > 0
+
+ lp = panel.to_frame()
+ wp = lp.to_panel()
+ assert wp['bool'].values.dtype == np.bool_
+ # Previously, this was mutating the underlying
+ # index and changing its name
+ assert_frame_equal(wp['bool'], panel['bool'], check_names=False)
+
+ # GH 8704
+ # with categorical
+ df = panel.to_frame()
+ df['category'] = df['str'].astype('category')
+
+ # to_panel
+ # TODO: this converts back to object
+ p = df.to_panel()
+ expected = panel.copy()
+ expected['category'] = 'foo'
+ assert_panel_equal(p, expected)
def test_to_frame_multi_major(self):
- with catch_warnings(record=True):
- idx = MultiIndex.from_tuples(
- [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')])
- df = DataFrame([[1, 'a', 1], [2, 'b', 1],
- [3, 'c', 1], [4, 'd', 1]],
- columns=['A', 'B', 'C'], index=idx)
- wp = Panel({'i1': df, 'i2': df})
- expected_idx = MultiIndex.from_tuples(
- [
- (1, 'one', 'A'), (1, 'one', 'B'),
- (1, 'one', 'C'), (1, 'two', 'A'),
- (1, 'two', 'B'), (1, 'two', 'C'),
- (2, 'one', 'A'), (2, 'one', 'B'),
- (2, 'one', 'C'), (2, 'two', 'A'),
- (2, 'two', 'B'), (2, 'two', 'C')
- ],
- names=[None, None, 'minor'])
- expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3,
- 'c', 1, 4, 'd', 1],
- 'i2': [1, 'a', 1, 2, 'b',
- 1, 3, 'c', 1, 4, 'd', 1]},
- index=expected_idx)
- result = wp.to_frame()
- assert_frame_equal(result, expected)
-
- wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773
- result = wp.to_frame()
- assert_frame_equal(result, expected[1:])
-
- idx = MultiIndex.from_tuples(
- [(1, 'two'), (1, 'one'), (2, 'one'), (np.nan, 'two')])
- df = DataFrame([[1, 'a', 1], [2, 'b', 1],
- [3, 'c', 1], [4, 'd', 1]],
- columns=['A', 'B', 'C'], index=idx)
- wp = Panel({'i1': df, 'i2': df})
- ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'),
- (1, 'two', 'C'),
- (1, 'one', 'A'),
- (1, 'one', 'B'),
- (1, 'one', 'C'),
- (2, 'one', 'A'),
- (2, 'one', 'B'),
- (2, 'one', 'C'),
- (np.nan, 'two', 'A'),
- (np.nan, 'two', 'B'),
- (np.nan, 'two', 'C')],
- names=[None, None, 'minor'])
- expected.index = ex_idx
- result = wp.to_frame()
- assert_frame_equal(result, expected)
+ idx = MultiIndex.from_tuples(
+ [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')])
+ df = DataFrame([[1, 'a', 1], [2, 'b', 1],
+ [3, 'c', 1], [4, 'd', 1]],
+ columns=['A', 'B', 'C'], index=idx)
+ wp = Panel({'i1': df, 'i2': df})
+ expected_idx = MultiIndex.from_tuples(
+ [
+ (1, 'one', 'A'), (1, 'one', 'B'),
+ (1, 'one', 'C'), (1, 'two', 'A'),
+ (1, 'two', 'B'), (1, 'two', 'C'),
+ (2, 'one', 'A'), (2, 'one', 'B'),
+ (2, 'one', 'C'), (2, 'two', 'A'),
+ (2, 'two', 'B'), (2, 'two', 'C')
+ ],
+ names=[None, None, 'minor'])
+ expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3,
+ 'c', 1, 4, 'd', 1],
+ 'i2': [1, 'a', 1, 2, 'b',
+ 1, 3, 'c', 1, 4, 'd', 1]},
+ index=expected_idx)
+ result = wp.to_frame()
+ assert_frame_equal(result, expected)
+
+ wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773
+ result = wp.to_frame()
+ assert_frame_equal(result, expected[1:])
+
+ idx = MultiIndex.from_tuples(
+ [(1, 'two'), (1, 'one'), (2, 'one'), (np.nan, 'two')])
+ df = DataFrame([[1, 'a', 1], [2, 'b', 1],
+ [3, 'c', 1], [4, 'd', 1]],
+ columns=['A', 'B', 'C'], index=idx)
+ wp = Panel({'i1': df, 'i2': df})
+ ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'),
+ (1, 'two', 'C'),
+ (1, 'one', 'A'),
+ (1, 'one', 'B'),
+ (1, 'one', 'C'),
+ (2, 'one', 'A'),
+ (2, 'one', 'B'),
+ (2, 'one', 'C'),
+ (np.nan, 'two', 'A'),
+ (np.nan, 'two', 'B'),
+ (np.nan, 'two', 'C')],
+ names=[None, None, 'minor'])
+ expected.index = ex_idx
+ result = wp.to_frame()
+ assert_frame_equal(result, expected)
def test_to_frame_multi_major_minor(self):
- with catch_warnings(record=True):
- cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']],
- labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
- idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
- 2, 'two'), (3, 'three'), (4, 'four')])
- df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14],
- ['a', 'b', 'w', 'x'],
- ['c', 'd', 'y', 'z'], [-1, -2, -3, -4],
- [-5, -6, -7, -8]], columns=cols, index=idx)
- wp = Panel({'i1': df, 'i2': df})
-
- exp_idx = MultiIndex.from_tuples(
- [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'),
- (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'),
- (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'),
- (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'),
- (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'),
- (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'),
- (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'),
- (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'),
- (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'),
- (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'),
- (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'),
- (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')],
- names=[None, None, None, None])
- exp_val = [[1, 1], [2, 2], [11, 11], [12, 12],
- [3, 3], [4, 4],
- [13, 13], [14, 14], ['a', 'a'],
- ['b', 'b'], ['w', 'w'],
- ['x', 'x'], ['c', 'c'], ['d', 'd'], [
- 'y', 'y'], ['z', 'z'],
- [-1, -1], [-2, -2], [-3, -3], [-4, -4],
- [-5, -5], [-6, -6],
- [-7, -7], [-8, -8]]
- result = wp.to_frame()
- expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx)
- assert_frame_equal(result, expected)
+ cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']],
+ labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
+ idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
+ 2, 'two'), (3, 'three'), (4, 'four')])
+ df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14],
+ ['a', 'b', 'w', 'x'],
+ ['c', 'd', 'y', 'z'], [-1, -2, -3, -4],
+ [-5, -6, -7, -8]], columns=cols, index=idx)
+ wp = Panel({'i1': df, 'i2': df})
+
+ exp_idx = MultiIndex.from_tuples(
+ [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'),
+ (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'),
+ (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'),
+ (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'),
+ (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'),
+ (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'),
+ (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'),
+ (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'),
+ (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'),
+ (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'),
+ (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'),
+ (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')],
+ names=[None, None, None, None])
+ exp_val = [[1, 1], [2, 2], [11, 11], [12, 12],
+ [3, 3], [4, 4],
+ [13, 13], [14, 14], ['a', 'a'],
+ ['b', 'b'], ['w', 'w'],
+ ['x', 'x'], ['c', 'c'], ['d', 'd'], [
+ 'y', 'y'], ['z', 'z'],
+ [-1, -1], [-2, -2], [-3, -3], [-4, -4],
+ [-5, -5], [-6, -6],
+ [-7, -7], [-8, -8]]
+ result = wp.to_frame()
+ expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx)
+ assert_frame_equal(result, expected)
def test_to_frame_multi_drop_level(self):
- with catch_warnings(record=True):
- idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')])
- df = DataFrame({'A': [np.nan, 1, 2]}, index=idx)
- wp = Panel({'i1': df, 'i2': df})
- result = wp.to_frame()
- exp_idx = MultiIndex.from_tuples(
- [(2, 'one', 'A'), (2, 'two', 'A')],
- names=[None, None, 'minor'])
- expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx)
- assert_frame_equal(result, expected)
+ idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')])
+ df = DataFrame({'A': [np.nan, 1, 2]}, index=idx)
+ wp = Panel({'i1': df, 'i2': df})
+ result = wp.to_frame()
+ exp_idx = MultiIndex.from_tuples(
+ [(2, 'one', 'A'), (2, 'two', 'A')],
+ names=[None, None, 'minor'])
+ expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx)
+ assert_frame_equal(result, expected)
def test_to_panel_na_handling(self):
- with catch_warnings(record=True):
- df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)),
- index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
- [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]])
+ df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)),
+ index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
+ [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]])
- panel = df.to_panel()
- assert isna(panel[0].loc[1, [0, 1]]).all()
+ panel = df.to_panel()
+ assert isna(panel[0].loc[1, [0, 1]]).all()
def test_to_panel_duplicates(self):
# #2441
- with catch_warnings(record=True):
- df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]})
- idf = df.set_index(['a', 'b'])
- tm.assert_raises_regex(
- ValueError, 'non-uniquely indexed', idf.to_panel)
+ df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]})
+ idf = df.set_index(['a', 'b'])
+ tm.assert_raises_regex(
+ ValueError, 'non-uniquely indexed', idf.to_panel)
def test_panel_dups(self):
- with catch_warnings(record=True):
- # GH 4960
- # duplicates in an index
+ # GH 4960
+ # duplicates in an index
- # items
- data = np.random.randn(5, 100, 5)
- no_dup_panel = Panel(data, items=list("ABCDE"))
- panel = Panel(data, items=list("AACDE"))
+ # items
+ data = np.random.randn(5, 100, 5)
+ no_dup_panel = Panel(data, items=list("ABCDE"))
+ panel = Panel(data, items=list("AACDE"))
- expected = no_dup_panel['A']
- result = panel.iloc[0]
- assert_frame_equal(result, expected)
+ expected = no_dup_panel['A']
+ result = panel.iloc[0]
+ assert_frame_equal(result, expected)
- expected = no_dup_panel['E']
- result = panel.loc['E']
- assert_frame_equal(result, expected)
+ expected = no_dup_panel['E']
+ result = panel.loc['E']
+ assert_frame_equal(result, expected)
- expected = no_dup_panel.loc[['A', 'B']]
- expected.items = ['A', 'A']
- result = panel.loc['A']
- assert_panel_equal(result, expected)
+ expected = no_dup_panel.loc[['A', 'B']]
+ expected.items = ['A', 'A']
+ result = panel.loc['A']
+ assert_panel_equal(result, expected)
- # major
- data = np.random.randn(5, 5, 5)
- no_dup_panel = Panel(data, major_axis=list("ABCDE"))
- panel = Panel(data, major_axis=list("AACDE"))
+ # major
+ data = np.random.randn(5, 5, 5)
+ no_dup_panel = Panel(data, major_axis=list("ABCDE"))
+ panel = Panel(data, major_axis=list("AACDE"))
- expected = no_dup_panel.loc[:, 'A']
- result = panel.iloc[:, 0]
- assert_frame_equal(result, expected)
+ expected = no_dup_panel.loc[:, 'A']
+ result = panel.iloc[:, 0]
+ assert_frame_equal(result, expected)
- expected = no_dup_panel.loc[:, 'E']
- result = panel.loc[:, 'E']
- assert_frame_equal(result, expected)
+ expected = no_dup_panel.loc[:, 'E']
+ result = panel.loc[:, 'E']
+ assert_frame_equal(result, expected)
- expected = no_dup_panel.loc[:, ['A', 'B']]
- expected.major_axis = ['A', 'A']
- result = panel.loc[:, 'A']
- assert_panel_equal(result, expected)
+ expected = no_dup_panel.loc[:, ['A', 'B']]
+ expected.major_axis = ['A', 'A']
+ result = panel.loc[:, 'A']
+ assert_panel_equal(result, expected)
- # minor
- data = np.random.randn(5, 100, 5)
- no_dup_panel = Panel(data, minor_axis=list("ABCDE"))
- panel = Panel(data, minor_axis=list("AACDE"))
+ # minor
+ data = np.random.randn(5, 100, 5)
+ no_dup_panel = Panel(data, minor_axis=list("ABCDE"))
+ panel = Panel(data, minor_axis=list("AACDE"))
- expected = no_dup_panel.loc[:, :, 'A']
- result = panel.iloc[:, :, 0]
- assert_frame_equal(result, expected)
+ expected = no_dup_panel.loc[:, :, 'A']
+ result = panel.iloc[:, :, 0]
+ assert_frame_equal(result, expected)
- expected = no_dup_panel.loc[:, :, 'E']
- result = panel.loc[:, :, 'E']
- assert_frame_equal(result, expected)
+ expected = no_dup_panel.loc[:, :, 'E']
+ result = panel.loc[:, :, 'E']
+ assert_frame_equal(result, expected)
- expected = no_dup_panel.loc[:, :, ['A', 'B']]
- expected.minor_axis = ['A', 'A']
- result = panel.loc[:, :, 'A']
- assert_panel_equal(result, expected)
+ expected = no_dup_panel.loc[:, :, ['A', 'B']]
+ expected.minor_axis = ['A', 'A']
+ result = panel.loc[:, :, 'A']
+ assert_panel_equal(result, expected)
def test_filter(self):
pass
def test_compound(self):
- with catch_warnings(record=True):
- compounded = self.panel.compound()
+ compounded = self.panel.compound()
- assert_series_equal(compounded['ItemA'],
- (1 + self.panel['ItemA']).product(0) - 1,
- check_names=False)
+ assert_series_equal(compounded['ItemA'],
+ (1 + self.panel['ItemA']).product(0) - 1,
+ check_names=False)
def test_shift(self):
- with catch_warnings(record=True):
- # major
- idx = self.panel.major_axis[0]
- idx_lag = self.panel.major_axis[1]
- shifted = self.panel.shift(1)
- assert_frame_equal(self.panel.major_xs(idx),
- shifted.major_xs(idx_lag))
-
- # minor
- idx = self.panel.minor_axis[0]
- idx_lag = self.panel.minor_axis[1]
- shifted = self.panel.shift(1, axis='minor')
- assert_frame_equal(self.panel.minor_xs(idx),
- shifted.minor_xs(idx_lag))
-
- # items
- idx = self.panel.items[0]
- idx_lag = self.panel.items[1]
- shifted = self.panel.shift(1, axis='items')
- assert_frame_equal(self.panel[idx], shifted[idx_lag])
-
- # negative numbers, #2164
- result = self.panel.shift(-1)
- expected = Panel({i: f.shift(-1)[:-1]
- for i, f in self.panel.iteritems()})
- assert_panel_equal(result, expected)
-
- # mixed dtypes #6959
- data = [('item ' + ch, makeMixedDataFrame())
- for ch in list('abcde')]
- data = dict(data)
- mixed_panel = Panel.from_dict(data, orient='minor')
- shifted = mixed_panel.shift(1)
- assert_series_equal(mixed_panel.dtypes, shifted.dtypes)
+ # major
+ idx = self.panel.major_axis[0]
+ idx_lag = self.panel.major_axis[1]
+ shifted = self.panel.shift(1)
+ assert_frame_equal(self.panel.major_xs(idx),
+ shifted.major_xs(idx_lag))
+
+ # minor
+ idx = self.panel.minor_axis[0]
+ idx_lag = self.panel.minor_axis[1]
+ shifted = self.panel.shift(1, axis='minor')
+ assert_frame_equal(self.panel.minor_xs(idx),
+ shifted.minor_xs(idx_lag))
+
+ # items
+ idx = self.panel.items[0]
+ idx_lag = self.panel.items[1]
+ shifted = self.panel.shift(1, axis='items')
+ assert_frame_equal(self.panel[idx], shifted[idx_lag])
+
+ # negative numbers, #2164
+ result = self.panel.shift(-1)
+ expected = Panel({i: f.shift(-1)[:-1]
+ for i, f in self.panel.iteritems()})
+ assert_panel_equal(result, expected)
+
+ # mixed dtypes #6959
+ data = [('item ' + ch, makeMixedDataFrame())
+ for ch in list('abcde')]
+ data = dict(data)
+ mixed_panel = Panel.from_dict(data, orient='minor')
+ shifted = mixed_panel.shift(1)
+ assert_series_equal(mixed_panel.dtypes, shifted.dtypes)
def test_tshift(self):
# PeriodIndex
- with catch_warnings(record=True):
- ps = tm.makePeriodPanel()
- shifted = ps.tshift(1)
- unshifted = shifted.tshift(-1)
+ ps = tm.makePeriodPanel()
+ shifted = ps.tshift(1)
+ unshifted = shifted.tshift(-1)
- assert_panel_equal(unshifted, ps)
+ assert_panel_equal(unshifted, ps)
- shifted2 = ps.tshift(freq='B')
- assert_panel_equal(shifted, shifted2)
+ shifted2 = ps.tshift(freq='B')
+ assert_panel_equal(shifted, shifted2)
- shifted3 = ps.tshift(freq=BDay())
- assert_panel_equal(shifted, shifted3)
+ shifted3 = ps.tshift(freq=BDay())
+ assert_panel_equal(shifted, shifted3)
- tm.assert_raises_regex(ValueError, 'does not match',
- ps.tshift, freq='M')
+ tm.assert_raises_regex(ValueError, 'does not match',
+ ps.tshift, freq='M')
- # DatetimeIndex
- panel = make_test_panel()
- shifted = panel.tshift(1)
- unshifted = shifted.tshift(-1)
+ # DatetimeIndex
+ panel = make_test_panel()
+ shifted = panel.tshift(1)
+ unshifted = shifted.tshift(-1)
- assert_panel_equal(panel, unshifted)
+ assert_panel_equal(panel, unshifted)
- shifted2 = panel.tshift(freq=panel.major_axis.freq)
- assert_panel_equal(shifted, shifted2)
+ shifted2 = panel.tshift(freq=panel.major_axis.freq)
+ assert_panel_equal(shifted, shifted2)
- inferred_ts = Panel(panel.values, items=panel.items,
- major_axis=Index(np.asarray(panel.major_axis)),
- minor_axis=panel.minor_axis)
- shifted = inferred_ts.tshift(1)
- unshifted = shifted.tshift(-1)
- assert_panel_equal(shifted, panel.tshift(1))
- assert_panel_equal(unshifted, inferred_ts)
+ inferred_ts = Panel(panel.values, items=panel.items,
+ major_axis=Index(np.asarray(panel.major_axis)),
+ minor_axis=panel.minor_axis)
+ shifted = inferred_ts.tshift(1)
+ unshifted = shifted.tshift(-1)
+ assert_panel_equal(shifted, panel.tshift(1))
+ assert_panel_equal(unshifted, inferred_ts)
- no_freq = panel.iloc[:, [0, 5, 7], :]
- pytest.raises(ValueError, no_freq.tshift)
+ no_freq = panel.iloc[:, [0, 5, 7], :]
+ pytest.raises(ValueError, no_freq.tshift)
def test_pct_change(self):
- with catch_warnings(record=True):
- df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]})
- df2 = df1 + 1
- df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]})
- wp = Panel({'i1': df1, 'i2': df2, 'i3': df3})
- # major, 1
- result = wp.pct_change() # axis='major'
- expected = Panel({'i1': df1.pct_change(),
- 'i2': df2.pct_change(),
- 'i3': df3.pct_change()})
- assert_panel_equal(result, expected)
- result = wp.pct_change(axis=1)
- assert_panel_equal(result, expected)
- # major, 2
- result = wp.pct_change(periods=2)
- expected = Panel({'i1': df1.pct_change(2),
- 'i2': df2.pct_change(2),
- 'i3': df3.pct_change(2)})
- assert_panel_equal(result, expected)
- # minor, 1
- result = wp.pct_change(axis='minor')
- expected = Panel({'i1': df1.pct_change(axis=1),
- 'i2': df2.pct_change(axis=1),
- 'i3': df3.pct_change(axis=1)})
- assert_panel_equal(result, expected)
- result = wp.pct_change(axis=2)
- assert_panel_equal(result, expected)
- # minor, 2
- result = wp.pct_change(periods=2, axis='minor')
- expected = Panel({'i1': df1.pct_change(periods=2, axis=1),
- 'i2': df2.pct_change(periods=2, axis=1),
- 'i3': df3.pct_change(periods=2, axis=1)})
- assert_panel_equal(result, expected)
- # items, 1
- result = wp.pct_change(axis='items')
- expected = Panel(
- {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan],
- 'c2': [np.nan, np.nan, np.nan]}),
- 'i2': DataFrame({'c1': [1, 0.5, .2],
- 'c2': [1. / 3, 0.25, 1. / 6]}),
- 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6],
- 'c2': [.25, .2, 1. / 7]})})
- assert_panel_equal(result, expected)
- result = wp.pct_change(axis=0)
- assert_panel_equal(result, expected)
- # items, 2
- result = wp.pct_change(periods=2, axis='items')
- expected = Panel(
- {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan],
- 'c2': [np.nan, np.nan, np.nan]}),
- 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan],
- 'c2': [np.nan, np.nan, np.nan]}),
- 'i3': DataFrame({'c1': [2, 1, .4],
- 'c2': [2. / 3, .5, 1. / 3]})})
- assert_panel_equal(result, expected)
+ df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]})
+ df2 = df1 + 1
+ df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]})
+ wp = Panel({'i1': df1, 'i2': df2, 'i3': df3})
+ # major, 1
+ result = wp.pct_change() # axis='major'
+ expected = Panel({'i1': df1.pct_change(),
+ 'i2': df2.pct_change(),
+ 'i3': df3.pct_change()})
+ assert_panel_equal(result, expected)
+ result = wp.pct_change(axis=1)
+ assert_panel_equal(result, expected)
+ # major, 2
+ result = wp.pct_change(periods=2)
+ expected = Panel({'i1': df1.pct_change(2),
+ 'i2': df2.pct_change(2),
+ 'i3': df3.pct_change(2)})
+ assert_panel_equal(result, expected)
+ # minor, 1
+ result = wp.pct_change(axis='minor')
+ expected = Panel({'i1': df1.pct_change(axis=1),
+ 'i2': df2.pct_change(axis=1),
+ 'i3': df3.pct_change(axis=1)})
+ assert_panel_equal(result, expected)
+ result = wp.pct_change(axis=2)
+ assert_panel_equal(result, expected)
+ # minor, 2
+ result = wp.pct_change(periods=2, axis='minor')
+ expected = Panel({'i1': df1.pct_change(periods=2, axis=1),
+ 'i2': df2.pct_change(periods=2, axis=1),
+ 'i3': df3.pct_change(periods=2, axis=1)})
+ assert_panel_equal(result, expected)
+ # items, 1
+ result = wp.pct_change(axis='items')
+ expected = Panel(
+ {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan],
+ 'c2': [np.nan, np.nan, np.nan]}),
+ 'i2': DataFrame({'c1': [1, 0.5, .2],
+ 'c2': [1. / 3, 0.25, 1. / 6]}),
+ 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6],
+ 'c2': [.25, .2, 1. / 7]})})
+ assert_panel_equal(result, expected)
+ result = wp.pct_change(axis=0)
+ assert_panel_equal(result, expected)
+ # items, 2
+ result = wp.pct_change(periods=2, axis='items')
+ expected = Panel(
+ {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan],
+ 'c2': [np.nan, np.nan, np.nan]}),
+ 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan],
+ 'c2': [np.nan, np.nan, np.nan]}),
+ 'i3': DataFrame({'c1': [2, 1, .4],
+ 'c2': [2. / 3, .5, 1. / 3]})})
+ assert_panel_equal(result, expected)
def test_round(self):
- with catch_warnings(record=True):
- values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12],
- [-1566.213, 88.88], [-12, 94.5]],
- [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12],
- [272.212, -99.99], [23, -76.5]]]
- evalues = [[[float(np.around(i)) for i in j] for j in k]
- for k in values]
- p = Panel(values, items=['Item1', 'Item2'],
- major_axis=date_range('1/1/2000', periods=5),
- minor_axis=['A', 'B'])
- expected = Panel(evalues, items=['Item1', 'Item2'],
- major_axis=date_range('1/1/2000', periods=5),
- minor_axis=['A', 'B'])
- result = p.round()
- assert_panel_equal(expected, result)
+ values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12],
+ [-1566.213, 88.88], [-12, 94.5]],
+ [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12],
+ [272.212, -99.99], [23, -76.5]]]
+ evalues = [[[float(np.around(i)) for i in j] for j in k]
+ for k in values]
+ p = Panel(values, items=['Item1', 'Item2'],
+ major_axis=date_range('1/1/2000', periods=5),
+ minor_axis=['A', 'B'])
+ expected = Panel(evalues, items=['Item1', 'Item2'],
+ major_axis=date_range('1/1/2000', periods=5),
+ minor_axis=['A', 'B'])
+ result = p.round()
+ assert_panel_equal(expected, result)
def test_numpy_round(self):
- with catch_warnings(record=True):
- values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12],
- [-1566.213, 88.88], [-12, 94.5]],
- [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12],
- [272.212, -99.99], [23, -76.5]]]
- evalues = [[[float(np.around(i)) for i in j] for j in k]
- for k in values]
- p = Panel(values, items=['Item1', 'Item2'],
- major_axis=date_range('1/1/2000', periods=5),
- minor_axis=['A', 'B'])
- expected = Panel(evalues, items=['Item1', 'Item2'],
- major_axis=date_range('1/1/2000', periods=5),
- minor_axis=['A', 'B'])
- result = np.round(p)
- assert_panel_equal(expected, result)
-
- msg = "the 'out' parameter is not supported"
- tm.assert_raises_regex(ValueError, msg, np.round, p, out=p)
-
+ values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12],
+ [-1566.213, 88.88], [-12, 94.5]],
+ [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12],
+ [272.212, -99.99], [23, -76.5]]]
+ evalues = [[[float(np.around(i)) for i in j] for j in k]
+ for k in values]
+ p = Panel(values, items=['Item1', 'Item2'],
+ major_axis=date_range('1/1/2000', periods=5),
+ minor_axis=['A', 'B'])
+ expected = Panel(evalues, items=['Item1', 'Item2'],
+ major_axis=date_range('1/1/2000', periods=5),
+ minor_axis=['A', 'B'])
+ result = np.round(p)
+ assert_panel_equal(expected, result)
+
+ msg = "the 'out' parameter is not supported"
+ tm.assert_raises_regex(ValueError, msg, np.round, p, out=p)
+
+ # removing Panel before NumPy enforces, so just ignore
+ @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_multiindex_get(self):
- with catch_warnings(record=True):
- ind = MultiIndex.from_tuples(
- [('a', 1), ('a', 2), ('b', 1), ('b', 2)],
- names=['first', 'second'])
- wp = Panel(np.random.random((4, 5, 5)),
- items=ind,
- major_axis=np.arange(5),
- minor_axis=np.arange(5))
- f1 = wp['a']
- f2 = wp.loc['a']
- assert_panel_equal(f1, f2)
-
- assert (f1.items == [1, 2]).all()
- assert (f2.items == [1, 2]).all()
-
- MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)],
- names=['first', 'second'])
-
+ ind = MultiIndex.from_tuples(
+ [('a', 1), ('a', 2), ('b', 1), ('b', 2)],
+ names=['first', 'second'])
+ wp = Panel(np.random.random((4, 5, 5)),
+ items=ind,
+ major_axis=np.arange(5),
+ minor_axis=np.arange(5))
+ f1 = wp['a']
+ f2 = wp.loc['a']
+ assert_panel_equal(f1, f2)
+
+ assert (f1.items == [1, 2]).all()
+ assert (f2.items == [1, 2]).all()
+
+ MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)],
+ names=['first', 'second'])
+
+ @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
def test_multiindex_blocks(self):
- with catch_warnings(record=True):
- ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)],
- names=['first', 'second'])
- wp = Panel(self.panel._data)
- wp.items = ind
- f1 = wp['a']
- assert (f1.items == [1, 2]).all()
+ ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)],
+ names=['first', 'second'])
+ wp = Panel(self.panel._data)
+ wp.items = ind
+ f1 = wp['a']
+ assert (f1.items == [1, 2]).all()
- f1 = wp[('b', 1)]
- assert (f1.columns == ['A', 'B', 'C', 'D']).all()
+ f1 = wp[('b', 1)]
+ assert (f1.columns == ['A', 'B', 'C', 'D']).all()
def test_repr_empty(self):
- with catch_warnings(record=True):
- empty = Panel()
- repr(empty)
+ empty = Panel()
+ repr(empty)
+ # ignore warning from us, because removing panel
+ @pytest.mark.filterwarnings("ignore:Using:FutureWarning")
def test_rename(self):
- with catch_warnings(record=True):
- mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'}
+ mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'}
- renamed = self.panel.rename_axis(mapper, axis=0)
- exp = Index(['foo', 'bar', 'baz'])
- tm.assert_index_equal(renamed.items, exp)
+ renamed = self.panel.rename_axis(mapper, axis=0)
+ exp = Index(['foo', 'bar', 'baz'])
+ tm.assert_index_equal(renamed.items, exp)
- renamed = self.panel.rename_axis(str.lower, axis=2)
- exp = Index(['a', 'b', 'c', 'd'])
- tm.assert_index_equal(renamed.minor_axis, exp)
+ renamed = self.panel.rename_axis(str.lower, axis=2)
+ exp = Index(['a', 'b', 'c', 'd'])
+ tm.assert_index_equal(renamed.minor_axis, exp)
- # don't copy
- renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False)
- renamed_nocopy['foo'] = 3.
- assert (self.panel['ItemA'].values == 3).all()
+ # don't copy
+ renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False)
+ renamed_nocopy['foo'] = 3.
+ assert (self.panel['ItemA'].values == 3).all()
def test_get_attr(self):
assert_frame_equal(self.panel['ItemA'], self.panel.ItemA)
@@ -2191,13 +2133,12 @@ def test_get_attr(self):
assert_frame_equal(self.panel['i'], self.panel.i)
def test_from_frame_level1_unsorted(self):
- with catch_warnings(record=True):
- tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1),
- ('MSFT', 1)]
- midx = MultiIndex.from_tuples(tuples)
- df = DataFrame(np.random.rand(5, 4), index=midx)
- p = df.to_panel()
- assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index())
+ tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1),
+ ('MSFT', 1)]
+ midx = MultiIndex.from_tuples(tuples)
+ df = DataFrame(np.random.rand(5, 4), index=midx)
+ p = df.to_panel()
+ assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index())
def test_to_excel(self):
try:
@@ -2239,194 +2180,188 @@ def test_to_excel_xlsxwriter(self):
recdf = reader.parse(str(item), index_col=0)
assert_frame_equal(df, recdf)
+ @pytest.mark.filterwarnings("ignore:'.reindex:FutureWarning")
def test_dropna(self):
- with catch_warnings(record=True):
- p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde'))
- p.loc[:, ['b', 'd'], 0] = np.nan
+ p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde'))
+ p.loc[:, ['b', 'd'], 0] = np.nan
- result = p.dropna(axis=1)
- exp = p.loc[:, ['a', 'c', 'e'], :]
- assert_panel_equal(result, exp)
- inp = p.copy()
- inp.dropna(axis=1, inplace=True)
- assert_panel_equal(inp, exp)
+ result = p.dropna(axis=1)
+ exp = p.loc[:, ['a', 'c', 'e'], :]
+ assert_panel_equal(result, exp)
+ inp = p.copy()
+ inp.dropna(axis=1, inplace=True)
+ assert_panel_equal(inp, exp)
- result = p.dropna(axis=1, how='all')
- assert_panel_equal(result, p)
+ result = p.dropna(axis=1, how='all')
+ assert_panel_equal(result, p)
- p.loc[:, ['b', 'd'], :] = np.nan
- result = p.dropna(axis=1, how='all')
- exp = p.loc[:, ['a', 'c', 'e'], :]
- assert_panel_equal(result, exp)
+ p.loc[:, ['b', 'd'], :] = np.nan
+ result = p.dropna(axis=1, how='all')
+ exp = p.loc[:, ['a', 'c', 'e'], :]
+ assert_panel_equal(result, exp)
- p = Panel(np.random.randn(4, 5, 6), items=list('abcd'))
- p.loc[['b'], :, 0] = np.nan
+ p = Panel(np.random.randn(4, 5, 6), items=list('abcd'))
+ p.loc[['b'], :, 0] = np.nan
- result = p.dropna()
- exp = p.loc[['a', 'c', 'd']]
- assert_panel_equal(result, exp)
+ result = p.dropna()
+ exp = p.loc[['a', 'c', 'd']]
+ assert_panel_equal(result, exp)
- result = p.dropna(how='all')
- assert_panel_equal(result, p)
+ result = p.dropna(how='all')
+ assert_panel_equal(result, p)
- p.loc['b'] = np.nan
- result = p.dropna(how='all')
- exp = p.loc[['a', 'c', 'd']]
- assert_panel_equal(result, exp)
+ p.loc['b'] = np.nan
+ result = p.dropna(how='all')
+ exp = p.loc[['a', 'c', 'd']]
+ assert_panel_equal(result, exp)
def test_drop(self):
- with catch_warnings(record=True):
- df = DataFrame({"A": [1, 2], "B": [3, 4]})
- panel = Panel({"One": df, "Two": df})
+ df = DataFrame({"A": [1, 2], "B": [3, 4]})
+ panel = Panel({"One": df, "Two": df})
- def check_drop(drop_val, axis_number, aliases, expected):
- try:
- actual = panel.drop(drop_val, axis=axis_number)
+ def check_drop(drop_val, axis_number, aliases, expected):
+ try:
+ actual = panel.drop(drop_val, axis=axis_number)
+ assert_panel_equal(actual, expected)
+ for alias in aliases:
+ actual = panel.drop(drop_val, axis=alias)
assert_panel_equal(actual, expected)
- for alias in aliases:
- actual = panel.drop(drop_val, axis=alias)
- assert_panel_equal(actual, expected)
- except AssertionError:
- pprint_thing("Failed with axis_number %d and aliases: %s" %
- (axis_number, aliases))
- raise
- # Items
- expected = Panel({"One": df})
- check_drop('Two', 0, ['items'], expected)
-
- pytest.raises(KeyError, panel.drop, 'Three')
-
- # errors = 'ignore'
- dropped = panel.drop('Three', errors='ignore')
- assert_panel_equal(dropped, panel)
- dropped = panel.drop(['Two', 'Three'], errors='ignore')
- expected = Panel({"One": df})
- assert_panel_equal(dropped, expected)
-
- # Major
- exp_df = DataFrame({"A": [2], "B": [4]}, index=[1])
- expected = Panel({"One": exp_df, "Two": exp_df})
- check_drop(0, 1, ['major_axis', 'major'], expected)
-
- exp_df = DataFrame({"A": [1], "B": [3]}, index=[0])
- expected = Panel({"One": exp_df, "Two": exp_df})
- check_drop([1], 1, ['major_axis', 'major'], expected)
-
- # Minor
- exp_df = df[['B']]
- expected = Panel({"One": exp_df, "Two": exp_df})
- check_drop(["A"], 2, ['minor_axis', 'minor'], expected)
-
- exp_df = df[['A']]
- expected = Panel({"One": exp_df, "Two": exp_df})
- check_drop("B", 2, ['minor_axis', 'minor'], expected)
+ except AssertionError:
+ pprint_thing("Failed with axis_number %d and aliases: %s" %
+ (axis_number, aliases))
+ raise
+ # Items
+ expected = Panel({"One": df})
+ check_drop('Two', 0, ['items'], expected)
+
+ pytest.raises(KeyError, panel.drop, 'Three')
+
+ # errors = 'ignore'
+ dropped = panel.drop('Three', errors='ignore')
+ assert_panel_equal(dropped, panel)
+ dropped = panel.drop(['Two', 'Three'], errors='ignore')
+ expected = Panel({"One": df})
+ assert_panel_equal(dropped, expected)
+
+ # Major
+ exp_df = DataFrame({"A": [2], "B": [4]}, index=[1])
+ expected = Panel({"One": exp_df, "Two": exp_df})
+ check_drop(0, 1, ['major_axis', 'major'], expected)
+
+ exp_df = DataFrame({"A": [1], "B": [3]}, index=[0])
+ expected = Panel({"One": exp_df, "Two": exp_df})
+ check_drop([1], 1, ['major_axis', 'major'], expected)
+
+ # Minor
+ exp_df = df[['B']]
+ expected = Panel({"One": exp_df, "Two": exp_df})
+ check_drop(["A"], 2, ['minor_axis', 'minor'], expected)
+
+ exp_df = df[['A']]
+ expected = Panel({"One": exp_df, "Two": exp_df})
+ check_drop("B", 2, ['minor_axis', 'minor'], expected)
def test_update(self):
- with catch_warnings(record=True):
- pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]],
- [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]])
-
- other = Panel(
- [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1])
-
- pan.update(other)
-
- expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.], [1.5, np.nan, 3.]],
- [[3.6, 2., 3], [1.5, np.nan, 7],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]])
+ pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]],
+ [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]])
- assert_panel_equal(pan, expected)
+ other = Panel(
+ [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1])
+
+ pan.update(other)
+
+ expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.], [1.5, np.nan, 3.]],
+ [[3.6, 2., 3], [1.5, np.nan, 7],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]])
+
+ assert_panel_equal(pan, expected)
def test_update_from_dict(self):
- with catch_warnings(record=True):
- pan = Panel({'one': DataFrame([[1.5, np.nan, 3],
- [1.5, np.nan, 3],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]),
- 'two': DataFrame([[1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]])})
-
- other = {'two': DataFrame(
- [[3.6, 2., np.nan], [np.nan, np.nan, 7]])}
-
- pan.update(other)
-
- expected = Panel(
- {'one': DataFrame([[1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]),
- 'two': DataFrame([[3.6, 2., 3],
- [1.5, np.nan, 7],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]])
- }
- )
-
- assert_panel_equal(pan, expected)
+ pan = Panel({'one': DataFrame([[1.5, np.nan, 3],
+ [1.5, np.nan, 3],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]),
+ 'two': DataFrame([[1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]])})
+
+ other = {'two': DataFrame(
+ [[3.6, 2., np.nan], [np.nan, np.nan, 7]])}
+
+ pan.update(other)
+
+ expected = Panel(
+ {'one': DataFrame([[1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]),
+ 'two': DataFrame([[3.6, 2., 3],
+ [1.5, np.nan, 7],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]])
+ }
+ )
+
+ assert_panel_equal(pan, expected)
def test_update_nooverwrite(self):
- with catch_warnings(record=True):
- pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]],
- [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]])
-
- other = Panel(
- [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1])
-
- pan.update(other, overwrite=False)
-
- expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3],
- [1.5, np.nan, 3.], [1.5, np.nan, 3.]],
- [[1.5, 2., 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]])
+ pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]],
+ [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]])
+
+ other = Panel(
+ [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1])
+
+ pan.update(other, overwrite=False)
- assert_panel_equal(pan, expected)
+ expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3],
+ [1.5, np.nan, 3.], [1.5, np.nan, 3.]],
+ [[1.5, 2., 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]])
+
+ assert_panel_equal(pan, expected)
def test_update_filtered(self):
- with catch_warnings(record=True):
- pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]],
- [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]])
+ pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]],
+ [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]])
- other = Panel(
- [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1])
+ other = Panel(
+ [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1])
- pan.update(other, filter_func=lambda x: x > 2)
+ pan.update(other, filter_func=lambda x: x > 2)
- expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.], [1.5, np.nan, 3.]],
- [[1.5, np.nan, 3], [1.5, np.nan, 7],
- [1.5, np.nan, 3.], [1.5, np.nan, 3.]]])
+ expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.], [1.5, np.nan, 3.]],
+ [[1.5, np.nan, 3], [1.5, np.nan, 7],
+ [1.5, np.nan, 3.], [1.5, np.nan, 3.]]])
- assert_panel_equal(pan, expected)
+ assert_panel_equal(pan, expected)
def test_update_raise(self):
- with catch_warnings(record=True):
- pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]],
- [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
- [1.5, np.nan, 3.],
- [1.5, np.nan, 3.]]])
+ pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]],
+ [[1.5, np.nan, 3.], [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.],
+ [1.5, np.nan, 3.]]])
- pytest.raises(Exception, pan.update, *(pan, ),
- **{'raise_conflict': True})
+ pytest.raises(Exception, pan.update, *(pan, ),
+ **{'raise_conflict': True})
def test_all_any(self):
assert (self.panel.all(axis=0).values == nanall(
@@ -2452,6 +2387,7 @@ def test_sort_values(self):
pytest.raises(NotImplementedError, self.panel.sort_values, 'ItemA')
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
class TestPanelFrame(object):
"""
Check that conversions to and from Panel to DataFrame work.
@@ -2463,90 +2399,82 @@ def setup_method(self, method):
self.unfiltered_panel = panel.to_frame(filter_observations=False)
def test_ops_differently_indexed(self):
- with catch_warnings(record=True):
- # trying to set non-identically indexed panel
- wp = self.panel.to_panel()
- wp2 = wp.reindex(major=wp.major_axis[:-1])
- lp2 = wp2.to_frame()
+ # trying to set non-identically indexed panel
+ wp = self.panel.to_panel()
+ wp2 = wp.reindex(major=wp.major_axis[:-1])
+ lp2 = wp2.to_frame()
- result = self.panel + lp2
- assert_frame_equal(result.reindex(lp2.index), lp2 * 2)
+ result = self.panel + lp2
+ assert_frame_equal(result.reindex(lp2.index), lp2 * 2)
- # careful, mutation
- self.panel['foo'] = lp2['ItemA']
- assert_series_equal(self.panel['foo'].reindex(lp2.index),
- lp2['ItemA'],
- check_names=False)
+ # careful, mutation
+ self.panel['foo'] = lp2['ItemA']
+ assert_series_equal(self.panel['foo'].reindex(lp2.index),
+ lp2['ItemA'],
+ check_names=False)
def test_ops_scalar(self):
- with catch_warnings(record=True):
- result = self.panel.mul(2)
- expected = DataFrame.__mul__(self.panel, 2)
- assert_frame_equal(result, expected)
+ result = self.panel.mul(2)
+ expected = DataFrame.__mul__(self.panel, 2)
+ assert_frame_equal(result, expected)
def test_combineFrame(self):
- with catch_warnings(record=True):
- wp = self.panel.to_panel()
- result = self.panel.add(wp['ItemA'].stack(), axis=0)
- assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2)
+ wp = self.panel.to_panel()
+ result = self.panel.add(wp['ItemA'].stack(), axis=0)
+ assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2)
def test_combinePanel(self):
- with catch_warnings(record=True):
- wp = self.panel.to_panel()
- result = self.panel.add(self.panel)
- wide_result = result.to_panel()
- assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA'])
+ wp = self.panel.to_panel()
+ result = self.panel.add(self.panel)
+ wide_result = result.to_panel()
+ assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA'])
- # one item
- result = self.panel.add(self.panel.filter(['ItemA']))
+ # one item
+ result = self.panel.add(self.panel.filter(['ItemA']))
def test_combine_scalar(self):
- with catch_warnings(record=True):
- result = self.panel.mul(2)
- expected = DataFrame(self.panel._data) * 2
- assert_frame_equal(result, expected)
+ result = self.panel.mul(2)
+ expected = DataFrame(self.panel._data) * 2
+ assert_frame_equal(result, expected)
def test_combine_series(self):
- with catch_warnings(record=True):
- s = self.panel['ItemA'][:10]
- result = self.panel.add(s, axis=0)
- expected = DataFrame.add(self.panel, s, axis=0)
- assert_frame_equal(result, expected)
+ s = self.panel['ItemA'][:10]
+ result = self.panel.add(s, axis=0)
+ expected = DataFrame.add(self.panel, s, axis=0)
+ assert_frame_equal(result, expected)
- s = self.panel.iloc[5]
- result = self.panel + s
- expected = DataFrame.add(self.panel, s, axis=1)
- assert_frame_equal(result, expected)
+ s = self.panel.iloc[5]
+ result = self.panel + s
+ expected = DataFrame.add(self.panel, s, axis=1)
+ assert_frame_equal(result, expected)
def test_operators(self):
- with catch_warnings(record=True):
- wp = self.panel.to_panel()
- result = (self.panel + 1).to_panel()
- assert_frame_equal(wp['ItemA'] + 1, result['ItemA'])
+ wp = self.panel.to_panel()
+ result = (self.panel + 1).to_panel()
+ assert_frame_equal(wp['ItemA'] + 1, result['ItemA'])
def test_arith_flex_panel(self):
- with catch_warnings(record=True):
- ops = ['add', 'sub', 'mul', 'div',
- 'truediv', 'pow', 'floordiv', 'mod']
- if not compat.PY3:
- aliases = {}
- else:
- aliases = {'div': 'truediv'}
- self.panel = self.panel.to_panel()
-
- for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]:
- for op in ops:
- alias = aliases.get(op, op)
- f = getattr(operator, alias)
- exp = f(self.panel, n)
- result = getattr(self.panel, op)(n)
- assert_panel_equal(result, exp, check_panel_type=True)
-
- # rops
- r_f = lambda x, y: f(y, x)
- exp = r_f(self.panel, n)
- result = getattr(self.panel, 'r' + op)(n)
- assert_panel_equal(result, exp)
+ ops = ['add', 'sub', 'mul', 'div',
+ 'truediv', 'pow', 'floordiv', 'mod']
+ if not compat.PY3:
+ aliases = {}
+ else:
+ aliases = {'div': 'truediv'}
+ self.panel = self.panel.to_panel()
+
+ for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]:
+ for op in ops:
+ alias = aliases.get(op, op)
+ f = getattr(operator, alias)
+ exp = f(self.panel, n)
+ result = getattr(self.panel, op)(n)
+ assert_panel_equal(result, exp, check_panel_type=True)
+
+ # rops
+ r_f = lambda x, y: f(y, x)
+ exp = r_f(self.panel, n)
+ result = getattr(self.panel, 'r' + op)(n)
+ assert_panel_equal(result, exp)
def test_sort(self):
def is_sorted(arr):
@@ -2569,44 +2497,43 @@ def test_to_sparse(self):
self.panel.to_sparse)
def test_truncate(self):
- with catch_warnings(record=True):
- dates = self.panel.index.levels[0]
- start, end = dates[1], dates[5]
+ dates = self.panel.index.levels[0]
+ start, end = dates[1], dates[5]
- trunced = self.panel.truncate(start, end).to_panel()
- expected = self.panel.to_panel()['ItemA'].truncate(start, end)
+ trunced = self.panel.truncate(start, end).to_panel()
+ expected = self.panel.to_panel()['ItemA'].truncate(start, end)
- # TODO truncate drops index.names
- assert_frame_equal(trunced['ItemA'], expected, check_names=False)
+ # TODO truncate drops index.names
+ assert_frame_equal(trunced['ItemA'], expected, check_names=False)
- trunced = self.panel.truncate(before=start).to_panel()
- expected = self.panel.to_panel()['ItemA'].truncate(before=start)
+ trunced = self.panel.truncate(before=start).to_panel()
+ expected = self.panel.to_panel()['ItemA'].truncate(before=start)
- # TODO truncate drops index.names
- assert_frame_equal(trunced['ItemA'], expected, check_names=False)
+ # TODO truncate drops index.names
+ assert_frame_equal(trunced['ItemA'], expected, check_names=False)
- trunced = self.panel.truncate(after=end).to_panel()
- expected = self.panel.to_panel()['ItemA'].truncate(after=end)
+ trunced = self.panel.truncate(after=end).to_panel()
+ expected = self.panel.to_panel()['ItemA'].truncate(after=end)
- # TODO truncate drops index.names
- assert_frame_equal(trunced['ItemA'], expected, check_names=False)
+ # TODO truncate drops index.names
+ assert_frame_equal(trunced['ItemA'], expected, check_names=False)
- # truncate on dates that aren't in there
- wp = self.panel.to_panel()
- new_index = wp.major_axis[::5]
+ # truncate on dates that aren't in there
+ wp = self.panel.to_panel()
+ new_index = wp.major_axis[::5]
- wp2 = wp.reindex(major=new_index)
+ wp2 = wp.reindex(major=new_index)
- lp2 = wp2.to_frame()
- lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2])
+ lp2 = wp2.to_frame()
+ lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2])
- wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2])
+ wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2])
- assert_panel_equal(wp_trunc, lp_trunc.to_panel())
+ assert_panel_equal(wp_trunc, lp_trunc.to_panel())
- # throw proper exception
- pytest.raises(Exception, lp2.truncate, wp.major_axis[-2],
- wp.major_axis[2])
+ # throw proper exception
+ pytest.raises(Exception, lp2.truncate, wp.major_axis[-2],
+ wp.major_axis[2])
def test_axis_dummies(self):
from pandas.core.reshape.reshape import make_axis_dummies
@@ -2635,46 +2562,42 @@ def test_get_dummies(self):
tm.assert_numpy_array_equal(dummies.values, minor_dummies.values)
def test_mean(self):
- with catch_warnings(record=True):
- means = self.panel.mean(level='minor')
+ means = self.panel.mean(level='minor')
- # test versus Panel version
- wide_means = self.panel.to_panel().mean('major')
- assert_frame_equal(means, wide_means)
+ # test versus Panel version
+ wide_means = self.panel.to_panel().mean('major')
+ assert_frame_equal(means, wide_means)
def test_sum(self):
- with catch_warnings(record=True):
- sums = self.panel.sum(level='minor')
+ sums = self.panel.sum(level='minor')
- # test versus Panel version
- wide_sums = self.panel.to_panel().sum('major')
- assert_frame_equal(sums, wide_sums)
+ # test versus Panel version
+ wide_sums = self.panel.to_panel().sum('major')
+ assert_frame_equal(sums, wide_sums)
def test_count(self):
- with catch_warnings(record=True):
- index = self.panel.index
+ index = self.panel.index
- major_count = self.panel.count(level=0)['ItemA']
- labels = index.labels[0]
- for i, idx in enumerate(index.levels[0]):
- assert major_count[i] == (labels == i).sum()
+ major_count = self.panel.count(level=0)['ItemA']
+ labels = index.labels[0]
+ for i, idx in enumerate(index.levels[0]):
+ assert major_count[i] == (labels == i).sum()
- minor_count = self.panel.count(level=1)['ItemA']
- labels = index.labels[1]
- for i, idx in enumerate(index.levels[1]):
- assert minor_count[i] == (labels == i).sum()
+ minor_count = self.panel.count(level=1)['ItemA']
+ labels = index.labels[1]
+ for i, idx in enumerate(index.levels[1]):
+ assert minor_count[i] == (labels == i).sum()
def test_join(self):
- with catch_warnings(record=True):
- lp1 = self.panel.filter(['ItemA', 'ItemB'])
- lp2 = self.panel.filter(['ItemC'])
+ lp1 = self.panel.filter(['ItemA', 'ItemB'])
+ lp2 = self.panel.filter(['ItemC'])
- joined = lp1.join(lp2)
+ joined = lp1.join(lp2)
- assert len(joined.columns) == 3
+ assert len(joined.columns) == 3
- pytest.raises(Exception, lp1.join,
- self.panel.filter(['ItemB', 'ItemC']))
+ pytest.raises(Exception, lp1.join,
+ self.panel.filter(['ItemB', 'ItemC']))
def test_panel_index():
@@ -2685,8 +2608,8 @@ def test_panel_index():
tm.assert_index_equal(index, expected)
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_panel_np_all():
- with catch_warnings(record=True):
- wp = Panel({"A": DataFrame({'b': [1, 2]})})
+ wp = Panel({"A": DataFrame({'b': [1, 2]})})
result = np.all(wp)
assert result == np.bool_(True)
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index 669fa9742a705..ccd2461d1512e 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -1,6 +1,6 @@
# pylint: disable=E1101
-from warnings import catch_warnings
+from warnings import catch_warnings, simplefilter
from datetime import datetime, timedelta
from functools import partial
from textwrap import dedent
@@ -1463,6 +1463,7 @@ def test_resample_panel(self):
n = len(rng)
with catch_warnings(record=True):
+ simplefilter("ignore", FutureWarning)
panel = Panel(np.random.randn(3, n, 5),
items=['one', 'two', 'three'],
major_axis=rng,
@@ -1485,6 +1486,7 @@ def p_apply(panel, f):
lambda x: x.resample('M', axis=1).mean())
tm.assert_panel_equal(result, expected)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_resample_panel_numpy(self):
rng = date_range('1/1/2000', '6/30/2000')
n = len(rng)
@@ -2483,6 +2485,22 @@ def test_with_local_timezone_dateutil(self):
expected = Series(1, index=expected_index)
assert_series_equal(result, expected)
+ def test_resample_nonexistent_time_bin_edge(self):
+ # GH 19375
+ index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T')
+ s = Series(np.zeros(len(index)), index=index)
+ expected = s.tz_localize('US/Pacific')
+ result = expected.resample('900S').mean()
+ tm.assert_series_equal(result, expected)
+
+ def test_resample_ambiguous_time_bin_edge(self):
+ # GH 10117
+ idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00",
+ freq="30T", tz="Europe/London")
+ expected = Series(np.zeros(len(idx)), index=idx)
+ result = expected.resample('30T').mean()
+ tm.assert_series_equal(result, expected)
+
def test_fill_method_and_how_upsample(self):
# GH2073
s = Series(np.arange(9, dtype='int64'),
@@ -3237,25 +3255,25 @@ def test_apply_iteration(self):
result = grouped.apply(f)
tm.assert_index_equal(result.index, df.index)
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_panel_aggregation(self):
ind = pd.date_range('1/1/2000', periods=100)
data = np.random.randn(2, len(ind), 4)
- with catch_warnings(record=True):
- wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
- minor_axis=['A', 'B', 'C', 'D'])
+ wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
+ minor_axis=['A', 'B', 'C', 'D'])
- tg = TimeGrouper('M', axis=1)
- _, grouper, _ = tg._get_grouper(wp)
- bingrouped = wp.groupby(grouper)
- binagg = bingrouped.mean()
+ tg = TimeGrouper('M', axis=1)
+ _, grouper, _ = tg._get_grouper(wp)
+ bingrouped = wp.groupby(grouper)
+ binagg = bingrouped.mean()
- def f(x):
- assert (isinstance(x, Panel))
- return x.mean(1)
+ def f(x):
+ assert (isinstance(x, Panel))
+ return x.mean(1)
- result = bingrouped.agg(f)
- tm.assert_panel_equal(result, binagg)
+ result = bingrouped.agg(f)
+ tm.assert_panel_equal(result, binagg)
def test_fails_on_no_datetime_index(self):
index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex')
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index ec6d83062c8b0..cc663fc59cbf1 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -1,3 +1,4 @@
+from collections import OrderedDict
from itertools import product
import pytest
import warnings
@@ -153,6 +154,8 @@ def test_agg(self):
tm.assert_frame_equal(result, expected)
with catch_warnings(record=True):
+ # using a dict with renaming
+ warnings.simplefilter("ignore", FutureWarning)
result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}})
expected = concat([a_mean, a_sum], axis=1)
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
@@ -160,6 +163,7 @@ def test_agg(self):
tm.assert_frame_equal(result, expected, check_like=True)
with catch_warnings(record=True):
+ warnings.simplefilter("ignore", FutureWarning)
result = r.aggregate({'A': {'mean': 'mean',
'sum': 'sum'},
'B': {'mean2': 'mean',
@@ -223,11 +227,13 @@ def f():
expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), (
'ra', 'std'), ('rb', 'mean'), ('rb', 'std')])
with catch_warnings(record=True):
+ warnings.simplefilter("ignore", FutureWarning)
result = r[['A', 'B']].agg({'A': {'ra': ['mean', 'std']},
'B': {'rb': ['mean', 'std']}})
tm.assert_frame_equal(result, expected, check_like=True)
with catch_warnings(record=True):
+ warnings.simplefilter("ignore", FutureWarning)
result = r.agg({'A': {'ra': ['mean', 'std']},
'B': {'rb': ['mean', 'std']}})
expected.columns = pd.MultiIndex.from_tuples([('A', 'ra', 'mean'), (
@@ -278,6 +284,7 @@ def test_count_nonnumeric_types(self):
tm.assert_frame_equal(result, expected)
@td.skip_if_no_scipy
+ @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning")
def test_window_with_args(self):
# make sure that we are aggregating window functions correctly with arg
r = Series(np.random.randn(100)).rolling(window=10, min_periods=1,
@@ -308,7 +315,55 @@ def test_preserve_metadata(self):
assert s2.name == 'foo'
assert s3.name == 'foo'
+ @pytest.mark.parametrize("func,window_size,expected_vals", [
+ ('rolling', 2, [[np.nan, np.nan, np.nan, np.nan],
+ [15., 20., 25., 20.],
+ [25., 30., 35., 30.],
+ [np.nan, np.nan, np.nan, np.nan],
+ [20., 30., 35., 30.],
+ [35., 40., 60., 40.],
+ [60., 80., 85., 80]]),
+ ('expanding', None, [[10., 10., 20., 20.],
+ [15., 20., 25., 20.],
+ [20., 30., 30., 20.],
+ [10., 10., 30., 30.],
+ [20., 30., 35., 30.],
+ [26.666667, 40., 50., 30.],
+ [40., 80., 60., 30.]])])
+ def test_multiple_agg_funcs(self, func, window_size, expected_vals):
+ # GH 15072
+ df = pd.DataFrame([
+ ['A', 10, 20],
+ ['A', 20, 30],
+ ['A', 30, 40],
+ ['B', 10, 30],
+ ['B', 30, 40],
+ ['B', 40, 80],
+ ['B', 80, 90]], columns=['stock', 'low', 'high'])
+
+ f = getattr(df.groupby('stock'), func)
+ if window_size:
+ window = f(window_size)
+ else:
+ window = f()
+
+ index = pd.MultiIndex.from_tuples([
+ ('A', 0), ('A', 1), ('A', 2),
+ ('B', 3), ('B', 4), ('B', 5), ('B', 6)], names=['stock', None])
+ columns = pd.MultiIndex.from_tuples([
+ ('low', 'mean'), ('low', 'max'), ('high', 'mean'),
+ ('high', 'min')])
+ expected = pd.DataFrame(expected_vals, index=index, columns=columns)
+
+ result = window.agg(OrderedDict((
+ ('low', ['mean', 'max']),
+ ('high', ['mean', 'min']),
+ )))
+
+ tm.assert_frame_equal(result, expected)
+
+@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
class TestWindow(Base):
def setup_method(self, method):
@@ -940,6 +995,7 @@ def _create_data(self):
"datetime64[ns, UTC] is not supported ATM")
+@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
class TestMoments(Base):
def setup_method(self, method):
@@ -1901,6 +1957,7 @@ def test_no_pairwise_with_other(self, f):
for (df, result) in zip(self.df1s, results):
if result is not None:
with catch_warnings(record=True):
+ warnings.simplefilter("ignore", RuntimeWarning)
# we can have int and str columns
expected_index = df.index.union(self.df2.index)
expected_columns = df.columns.union(self.df2.columns)
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index f9f5fc2484bda..b8fabbf52159d 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1825,6 +1825,7 @@ def test_weekmask_and_holidays(self):
xp_egypt = datetime(2013, 5, 5)
assert xp_egypt == dt + 2 * bday_egypt
+ @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning")
def test_calendar(self):
calendar = USFederalHolidayCalendar()
dt = datetime(2014, 1, 17)
@@ -1987,6 +1988,7 @@ def test_holidays(self):
assert dt + bm_offset == datetime(2012, 1, 30)
assert dt + 2 * bm_offset == datetime(2012, 2, 27)
+ @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning")
def test_datetimeindex(self):
from pandas.tseries.holiday import USFederalHolidayCalendar
hcal = USFederalHolidayCalendar()
@@ -2105,6 +2107,7 @@ def test_holidays(self):
assert dt + bm_offset == datetime(2012, 1, 2)
assert dt + 2 * bm_offset == datetime(2012, 2, 3)
+ @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning")
def test_datetimeindex(self):
hcal = USFederalHolidayCalendar()
cbmb = CBMonthBegin(calendar=hcal)
diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py
index f19066ba76b20..07a6895d1e231 100644
--- a/pandas/tests/tseries/offsets/test_offsets_properties.py
+++ b/pandas/tests/tseries/offsets/test_offsets_properties.py
@@ -8,6 +8,7 @@
You may wish to consult the previous version for inspiration on further
tests, or when trying to pin down the bugs exposed by the tests below.
"""
+import warnings
import pytest
from hypothesis import given, assume, strategies as st
@@ -25,6 +26,11 @@
# ----------------------------------------------------------------
# Helpers for generating random data
+with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ min_dt = pd.Timestamp(1900, 1, 1).to_pydatetime(),
+ max_dt = pd.Timestamp(1900, 1, 1).to_pydatetime(),
+
gen_date_range = st.builds(
pd.date_range,
start=st.datetimes(
@@ -38,8 +44,8 @@
)
gen_random_datetime = st.datetimes(
- min_value=pd.Timestamp.min.to_pydatetime(),
- max_value=pd.Timestamp.max.to_pydatetime(),
+ min_value=min_dt,
+ max_value=max_dt,
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones())
)
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
index 14c9ca1f6cc54..466a22e5916e9 100644
--- a/pandas/tests/tslibs/test_parsing.py
+++ b/pandas/tests/tslibs/test_parsing.py
@@ -92,6 +92,7 @@ def test_parsers_monthfreq(self):
assert result1 == expected
+@pytest.mark.filterwarnings("ignore:_timelex:DeprecationWarning")
class TestGuessDatetimeFormat(object):
@td.skip_if_not_us_locale
@@ -160,6 +161,8 @@ def test_guess_datetime_format_invalid_inputs(self):
('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'),
('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'),
('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')])
+ # https://github.com/pandas-dev/pandas/issues/21322 for _timelex
+ @pytest.mark.filterwarnings("ignore:_timelex:DeprecationWarning")
def test_guess_datetime_format_nopadding(self, string, format):
# GH 11142
result = parsing._guess_datetime_format(string)
diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py
index 0c14dcb49c56f..b62260071d996 100644
--- a/pandas/tests/util/test_hashing.py
+++ b/pandas/tests/util/test_hashing.py
@@ -1,7 +1,6 @@
import pytest
import datetime
-from warnings import catch_warnings
import numpy as np
import pandas as pd
@@ -216,12 +215,12 @@ def test_categorical_with_nan_consistency(self):
assert result[0] in expected
assert result[1] in expected
+ @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
def test_pandas_errors(self):
with pytest.raises(TypeError):
hash_pandas_object(pd.Timestamp('20130101'))
- with catch_warnings(record=True):
- obj = tm.makePanel()
+ obj = tm.makePanel()
with pytest.raises(TypeError):
hash_pandas_object(obj)
diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py
index 33dcf6d64b302..0497a827e2e1b 100644
--- a/pandas/tseries/holiday.py
+++ b/pandas/tseries/holiday.py
@@ -1,6 +1,7 @@
import warnings
from pandas import DateOffset, DatetimeIndex, Series, Timestamp
+from pandas.errors import PerformanceWarning
from pandas.compat import add_metaclass
from datetime import datetime, timedelta
from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU # noqa
@@ -281,7 +282,8 @@ def _apply_rule(self, dates):
# if we are adding a non-vectorized value
# ignore the PerformanceWarnings:
- with warnings.catch_warnings(record=True):
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", PerformanceWarning)
dates += offset
return dates
@@ -292,7 +294,7 @@ def _apply_rule(self, dates):
def register(cls):
try:
name = cls.name
- except:
+ except AttributeError:
name = cls.__name__
holiday_calendars[name] = cls
@@ -424,7 +426,7 @@ def merge_class(base, other):
"""
try:
other = other.rules
- except:
+ except AttributeError:
pass
if not isinstance(other, list):
@@ -433,7 +435,7 @@ def merge_class(base, other):
try:
base = base.rules
- except:
+ except AttributeError:
pass
if not isinstance(base, list):
diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py
index 5600834f3b615..03fc82a3acef5 100644
--- a/pandas/util/_print_versions.py
+++ b/pandas/util/_print_versions.py
@@ -21,7 +21,7 @@ def get_sys_info():
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
so, serr = pipe.communicate()
- except:
+ except (OSError, ValueError):
pass
else:
if pipe.returncode == 0:
@@ -50,7 +50,7 @@ def get_sys_info():
("LANG", "{lang}".format(lang=os.environ.get('LANG', "None"))),
("LOCALE", '.'.join(map(str, locale.getlocale()))),
])
- except:
+ except (KeyError, ValueError):
pass
return blob
@@ -108,7 +108,7 @@ def show_versions(as_json=False):
mod = importlib.import_module(modname)
ver = ver_f(mod)
deps_blob.append((modname, ver))
- except:
+ except ImportError:
deps_blob.append((modname, None))
if (as_json):
diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py
index a96563051e7de..e51e0c88e5b95 100644
--- a/pandas/util/_validators.py
+++ b/pandas/util/_validators.py
@@ -59,7 +59,7 @@ def _check_for_default_values(fname, arg_val_dict, compat_args):
# could not compare them directly, so try comparison
# using the 'is' operator
- except:
+ except ValueError:
match = (arg_val_dict[key] is compat_args[key])
if not match:
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 1e8c123fa6f13..3db251e89842d 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -205,8 +205,12 @@ def decompress_file(path, compression):
msg = 'Unrecognized compression type: {}'.format(compression)
raise ValueError(msg)
- yield f
- f.close()
+ try:
+ yield f
+ finally:
+ f.close()
+ if compression == "zip":
+ zip_file.close()
def assert_almost_equal(left, right, check_dtype="equiv",
@@ -221,7 +225,7 @@ def assert_almost_equal(left, right, check_dtype="equiv",
----------
left : object
right : object
- check_dtype : bool / string {'equiv'}, default False
+ check_dtype : bool / string {'equiv'}, default 'equiv'
Check dtype if both a and b are the same type. If 'equiv' is passed in,
then `RangeIndex` and `Int64Index` are also considered equivalent
when doing type checking.
@@ -783,7 +787,7 @@ def assert_index_equal(left, right, exact='equiv', check_names=True,
----------
left : Index
right : Index
- exact : bool / string {'equiv'}, default False
+ exact : bool / string {'equiv'}, default 'equiv'
Whether to check the Index class, dtype and inferred_type
are identical. If 'equiv', then RangeIndex can be substituted for
Int64Index as well.
@@ -1030,7 +1034,7 @@ def assert_interval_array_equal(left, right, exact='equiv',
Whether to check the Index class, dtype and inferred_type
are identical. If 'equiv', then RangeIndex can be substituted for
Int64Index as well.
- obj : str, default 'Categorical'
+ obj : str, default 'IntervalArray'
Specify object name being compared, internally used to show appropriate
assertion message
"""
@@ -1322,12 +1326,13 @@ def assert_frame_equal(left, right, check_dtype=True,
Second DataFrame to compare.
check_dtype : bool, default True
Whether to check the DataFrame dtype is identical.
- check_index_type : {'equiv'} or bool, default 'equiv'
+ check_index_type : bool / string {'equiv'}, default 'equiv'
Whether to check the Index class, dtype and inferred_type
are identical.
- check_column_type : {'equiv'} or bool, default 'equiv'
+ check_column_type : bool / string {'equiv'}, default 'equiv'
Whether to check the columns class, dtype and inferred_type
- are identical.
+ are identical. Is passed as the ``exact`` argument of
+ :func:`assert_index_equal`.
check_frame_type : bool, default True
Whether to check the DataFrame class is identical.
check_less_precise : bool or int, default False
@@ -1897,6 +1902,7 @@ def makePeriodFrame(nper=None):
def makePanel(nper=None):
with warnings.catch_warnings(record=True):
+ warnings.filterwarnings("ignore", "\\nPanel", FutureWarning)
cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]]
data = {c: makeTimeDataFrame(nper) for c in cols}
return Panel.fromDict(data)
@@ -1904,6 +1910,7 @@ def makePanel(nper=None):
def makePeriodPanel(nper=None):
with warnings.catch_warnings(record=True):
+ warnings.filterwarnings("ignore", "\\nPanel", FutureWarning)
cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]]
data = {c: makePeriodFrame(nper) for c in cols}
return Panel.fromDict(data)
diff --git a/scripts/convert_deps.py b/scripts/convert_deps.py
index aabeb24a0c3c8..3ff157e0a0d7b 100755
--- a/scripts/convert_deps.py
+++ b/scripts/convert_deps.py
@@ -1,6 +1,7 @@
"""
Convert the conda environment.yaml to a pip requirements.txt
"""
+import re
import yaml
exclude = {'python=3'}
@@ -15,6 +16,7 @@
required = dev['dependencies']
required = [rename.get(dep, dep) for dep in required if dep not in exclude]
optional = [rename.get(dep, dep) for dep in optional if dep not in exclude]
+optional = [re.sub("(?<=[^<>])=", '==', dep) for dep in optional]
with open("ci/requirements_dev.txt", 'wt') as f:
diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py
index 0c0757c6963d7..00496f771570b 100644
--- a/scripts/tests/test_validate_docstrings.py
+++ b/scripts/tests/test_validate_docstrings.py
@@ -362,6 +362,15 @@ def multi_line(self):
which is not correct.
"""
+ def two_paragraph_multi_line(self):
+ """
+ Extends beyond one line
+ which is not correct.
+
+ Extends beyond one line, which in itself is correct but the
+ previous short summary should still be an issue.
+ """
+
class BadParameters(object):
"""
@@ -556,7 +565,9 @@ def test_bad_generic_functions(self, func):
('BadSummaries', 'no_capitalization',
('Summary must start with infinitive verb',)),
('BadSummaries', 'multi_line',
- ('a short summary in a single line should be present',)),
+ ('Summary should fit in a single line.',)),
+ ('BadSummaries', 'two_paragraph_multi_line',
+ ('Summary should fit in a single line.',)),
# Parameters tests
('BadParameters', 'missing_params',
('Parameters {**kwargs} not documented',)),
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 83bb382480eaa..790a62b53845b 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -163,10 +163,12 @@ def double_blank_lines(self):
@property
def summary(self):
- if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1:
- return ''
return ' '.join(self.doc['Summary'])
+ @property
+ def num_summary_lines(self):
+ return len(self.doc['Summary'])
+
@property
def extended_summary(self):
if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1:
@@ -452,6 +454,8 @@ def validate_one(func_name):
errs.append('Summary must start with infinitive verb, '
'not third person (e.g. use "Generate" instead of '
'"Generates")')
+ if doc.num_summary_lines > 1:
+ errs.append("Summary should fit in a single line.")
if not doc.extended_summary:
wrns.append('No extended summary found')
diff --git a/setup.cfg b/setup.cfg
index 5fc0236066b93..e4a2357def474 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -15,7 +15,6 @@ parentdir_prefix = pandas-
ignore =
E402, # module level import not at top of file
E731, # do not assign a lambda expression, use a def
- E741, # do not use variables named 'l', 'O', or 'I'
W503, # line break before binary operator
C405, # Unnecessary (list/tuple) literal - rewrite as a set literal.
C406, # Unnecessary (list/tuple) literal - rewrite as a dict literal.
@@ -38,9 +37,9 @@ markers =
slow: mark a test as slow
network: mark a test as network
high_memory: mark a test as a high-memory only
+ clipboard: mark a pd.read_clipboard test
doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
-addopts = --strict-data-files
-
+addopts = --strict-data-files --durations=10
[coverage:run]
branch = False
diff --git a/setup.py b/setup.py
index 19438d950e8a7..bfd0c50c9e9be 100755
--- a/setup.py
+++ b/setup.py
@@ -77,7 +77,6 @@ def is_platform_windows():
'_libs/algos_rank_helper.pxi.in'],
'groupby': ['_libs/groupby_helper.pxi.in'],
'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'],
- 'reshape': ['_libs/reshape_helper.pxi.in'],
'hashtable': ['_libs/hashtable_class_helper.pxi.in',
'_libs/hashtable_func_helper.pxi.in'],
'index': ['_libs/index_class_helper.pxi.in'],
@@ -545,8 +544,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
'_libs.parsers': {
'pyxfile': '_libs/parsers',
'depends': ['pandas/_libs/src/parser/tokenizer.h',
- 'pandas/_libs/src/parser/io.h',
- 'pandas/_libs/src/numpy_helper.h'],
+ 'pandas/_libs/src/parser/io.h'],
'sources': ['pandas/_libs/src/parser/tokenizer.c',
'pandas/_libs/src/parser/io.c']},
'_libs.reduction': {
@@ -558,7 +556,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
'include': []},
'_libs.reshape': {
'pyxfile': '_libs/reshape',
- 'depends': _pxi_dep['reshape']},
+ 'depends': []},
'_libs.skiplist': {
'pyxfile': '_libs/skiplist',
'depends': ['pandas/_libs/src/skiplist.h']},