Skip to content

Commit

Permalink
Merge branch 'master' into excel_style
Browse files Browse the repository at this point in the history
  • Loading branch information
jnothman committed Apr 19, 2017
2 parents 3071bac + e082eb2 commit 14035c5
Show file tree
Hide file tree
Showing 412 changed files with 17,926 additions and 19,984 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,4 @@ doc/build/html/index.html
# Windows specific leftover:
doc/tmp.sv
doc/source/styled.xlsx
doc/source/templates/
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ global-exclude *.png
# recursive-include LICENSES *
include versioneer.py
include pandas/_version.py
include pandas/io/formats/templates/*.tpl
2 changes: 1 addition & 1 deletion asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
// The Pythons you'd like to test against. If not provided, defaults
// to the current version of Python used to run `asv`.
// "pythons": ["2.7", "3.4"],
"pythons": ["2.7"],
"pythons": ["3.6"],

// The matrix of dependencies to test. Each key is the name of a
// package (in PyPI) and the values are version numbers. An empty
Expand Down
13 changes: 9 additions & 4 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
import pandas as pd
from pandas.util import testing as tm

try:
from pandas.tools.hashing import hash_pandas_object
except ImportError:
pass


class Algorithms(object):
goal_time = 0.2
Expand Down Expand Up @@ -103,13 +108,13 @@ def setup(self):
self.df.iloc[10:20] = np.nan

def time_frame(self):
self.df.hash()
hash_pandas_object(self.df)

def time_series_int(self):
self.df.E.hash()
hash_pandas_object(self.df.E)

def time_series_string(self):
self.df.B.hash()
hash_pandas_object(self.df.B)

def time_series_categorical(self):
self.df.C.hash()
hash_pandas_object(self.df.C)
5 changes: 4 additions & 1 deletion asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from .pandas_vb_common import *
import pandas.computation.expressions as expr
try:
import pandas.core.computation.expressions as expr
except ImportError:
import pandas.computation.expressions as expr


class Ops(object):
Expand Down
7 changes: 5 additions & 2 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from .pandas_vb_common import *
try:
from pandas.types.concat import union_categoricals
from pandas.api.types import union_categoricals
except ImportError:
pass
try:
from pandas.types.concat import union_categoricals
except ImportError:
pass


class Categoricals(object):
Expand Down
5 changes: 4 additions & 1 deletion asv_bench/benchmarks/eval.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from .pandas_vb_common import *
import pandas as pd
import pandas.computation.expressions as expr
try:
import pandas.core.computation.expressions as expr
except ImportError:
import pandas.computation.expressions as expr


class Eval(object):
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/frame_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ def setup(self):
self.data = self.frame.to_dict()
except:
self.data = self.frame.toDict()
self.some_dict = self.data.values()[0]
self.some_dict = list(self.data.values())[0]
self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values]

self.data2 = dict(
((i, dict(((j, float(j)) for j in range(100)))) for i in
xrange(2000)))
range(2000)))

def time_frame_ctor_list_of_dict(self):
DataFrame(self.dict_list)
Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def time_reindex_both_axes_ix(self):
self.df.ix[(self.idx, self.idx)]

def time_reindex_upcast(self):
self.df2.reindex(permutation(range(1200)))
self.df2.reindex(np.random.permutation(range(1200)))


#----------------------------------------------------------------------
Expand Down Expand Up @@ -583,7 +583,7 @@ class frame_assign_timeseries_index(object):
goal_time = 0.2

def setup(self):
self.idx = date_range('1/1/2000', periods=100000, freq='D')
self.idx = date_range('1/1/2000', periods=100000, freq='H')
self.df = DataFrame(randn(100000, 1), columns=['A'], index=self.idx)

def time_frame_assign_timeseries_index(self):
Expand Down
18 changes: 12 additions & 6 deletions asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from .pandas_vb_common import *
from pandas.core import common as com

from pandas.core.algorithms import take_1d

try:
from cStringIO import StringIO
except ImportError:
from io import StringIO

try:
from pandas._libs import algos
except ImportError:
from pandas import algos

try:
from pandas.util.testing import test_parallel

Expand Down Expand Up @@ -167,11 +173,11 @@ def time_nogil_take1d_float64(self):

@test_parallel(num_threads=2)
def take_1d_pg2_int64(self):
com.take_1d(self.df.int64.values, self.indexer)
take_1d(self.df.int64.values, self.indexer)

@test_parallel(num_threads=2)
def take_1d_pg2_float64(self):
com.take_1d(self.df.float64.values, self.indexer)
take_1d(self.df.float64.values, self.indexer)


class nogil_take1d_int64(object):
Expand All @@ -193,11 +199,11 @@ def time_nogil_take1d_int64(self):

@test_parallel(num_threads=2)
def take_1d_pg2_int64(self):
com.take_1d(self.df.int64.values, self.indexer)
take_1d(self.df.int64.values, self.indexer)

@test_parallel(num_threads=2)
def take_1d_pg2_float64(self):
com.take_1d(self.df.float64.values, self.indexer)
take_1d(self.df.float64.values, self.indexer)


class nogil_kth_smallest(object):
Expand Down Expand Up @@ -226,7 +232,7 @@ class nogil_datetime_fields(object):

def setup(self):
self.N = 100000000
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D')
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='T')
self.period = self.dti.to_period('D')
if (not have_real_test_parallel):
raise NotImplementedError
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def setup(self):

def get_test_data(self, ngroups=100, n=100000):
self.unique_groups = range(self.ngroups)
self.arr = np.asarray(np.tile(self.unique_groups, (n / self.ngroups)), dtype=object)
self.arr = np.asarray(np.tile(self.unique_groups, int(n / self.ngroups)), dtype=object)
if (len(self.arr) < n):
self.arr = np.asarray((list(self.arr) + self.unique_groups[:(n - len(self.arr))]), dtype=object)
random.shuffle(self.arr)
Expand Down
48 changes: 23 additions & 25 deletions asv_bench/benchmarks/hdfstore_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,12 @@ def setup(self):
self.remove(self.f)

self.store = HDFStore(self.f)
self.store.put('df1', self.df)
self.store.put('df_mixed', self.df_mixed)

self.store.append('df5', self.df_mixed)
self.store.append('df7', self.df)

self.store.append('df9', self.df_wide)

self.store.append('df11', self.df_wide2)
self.store.append('df12', self.df2)
self.store.put('fixed', self.df)
self.store.put('fixed_mixed', self.df_mixed)
self.store.append('table', self.df2)
self.store.append('table_mixed', self.df_mixed)
self.store.append('table_wide', self.df_wide)
self.store.append('table_wide2', self.df_wide2)

def teardown(self):
self.store.close()
Expand All @@ -52,45 +48,47 @@ def remove(self, f):
pass

def time_read_store(self):
self.store.get('df1')
self.store.get('fixed')

def time_read_store_mixed(self):
self.store.get('df_mixed')
self.store.get('fixed_mixed')

def time_write_store(self):
self.store.put('df2', self.df)
self.store.put('fixed_write', self.df)

def time_write_store_mixed(self):
self.store.put('df_mixed2', self.df_mixed)
self.store.put('fixed_mixed_write', self.df_mixed)

def time_read_store_table_mixed(self):
self.store.select('df5')
self.store.select('table_mixed')

def time_write_store_table_mixed(self):
self.store.append('df6', self.df_mixed)
self.store.append('table_mixed_write', self.df_mixed)

def time_read_store_table(self):
self.store.select('df7')
self.store.select('table')

def time_write_store_table(self):
self.store.append('df8', self.df)
self.store.append('table_write', self.df)

def time_read_store_table_wide(self):
self.store.select('df9')
self.store.select('table_wide')

def time_write_store_table_wide(self):
self.store.append('df10', self.df_wide)
self.store.append('table_wide_write', self.df_wide)

def time_write_store_table_dc(self):
self.store.append('df15', self.df, data_columns=True)
self.store.append('table_dc_write', self.df_dc, data_columns=True)

def time_query_store_table_wide(self):
self.store.select('df11', [('index', '>', self.df_wide2.index[10000]),
('index', '<', self.df_wide2.index[15000])])
start = self.df_wide2.index[10000]
stop = self.df_wide2.index[15000]
self.store.select('table_wide', where="index > start and index < stop")

def time_query_store_table(self):
self.store.select('df12', [('index', '>', self.df2.index[10000]),
('index', '<', self.df2.index[15000])])
start = self.df2.index[10000]
stop = self.df2.index[15000]
self.store.select('table', where="index > start and index < stop")


class HDF5Panel(object):
Expand Down
24 changes: 20 additions & 4 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
from .pandas_vb_common import *
try:
import pandas.computation.expressions as expr
except:
expr = None


class Int64Indexing(object):
Expand Down Expand Up @@ -226,6 +222,26 @@ def time_is_monotonic(self):
self.miint.is_monotonic


class IntervalIndexing(object):
goal_time = 0.2

def setup(self):
self.monotonic = Series(np.arange(1000000),
index=IntervalIndex.from_breaks(np.arange(1000001)))

def time_getitem_scalar(self):
self.monotonic[80000]

def time_loc_scalar(self):
self.monotonic.loc[80000]

def time_getitem_list(self):
self.monotonic[80000:]

def time_loc_list(self):
self.monotonic.loc[80000:]


class PanelIndexing(object):
goal_time = 0.2

Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,5 @@ def setup(self):
self.na_values = set()

def time_convert(self):
pd.lib.maybe_convert_numeric(self.data, self.na_values,
coerce_numeric=False)
lib.maybe_convert_numeric(self.data, self.na_values,
coerce_numeric=False)
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,12 @@ def setup(self):

self.df1 = pd.DataFrame(
{'time': np.random.randint(0, one_count / 20, one_count),
'key': np.random.choice(list(string.uppercase), one_count),
'key': np.random.choice(list(string.ascii_uppercase), one_count),
'key2': np.random.randint(0, 25, one_count),
'value1': np.random.randn(one_count)})
self.df2 = pd.DataFrame(
{'time': np.random.randint(0, two_count / 20, two_count),
'key': np.random.choice(list(string.uppercase), two_count),
'key': np.random.choice(list(string.ascii_uppercase), two_count),
'key2': np.random.randint(0, 25, two_count),
'value2': np.random.randn(two_count)})

Expand Down
16 changes: 9 additions & 7 deletions asv_bench/benchmarks/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,20 @@ def time_packers_read_stata_with_validation(self):
class packers_read_sas(_Packers):

def setup(self):
self.f = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'io', 'tests', 'sas', 'data',
'test1.sas7bdat')
self.f2 = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'io', 'tests', 'sas', 'data',
'paxraw_d_short.xpt')

testdir = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'tests', 'io', 'sas')
if not os.path.exists(testdir):
testdir = os.path.join(os.path.dirname(__file__), '..', '..',
'pandas', 'io', 'tests', 'sas')
self.f = os.path.join(testdir, 'data', 'test1.sas7bdat')
self.f2 = os.path.join(testdir, 'data', 'paxraw_d_short.xpt')

def time_read_sas7bdat(self):
pd.read_sas(self.f, format='sas7bdat')

def time_read_xport(self):
pd.read_sas(self.f, format='xport')
pd.read_sas(self.f2, format='xport')


class CSV(_Packers):
Expand Down
4 changes: 1 addition & 3 deletions asv_bench/benchmarks/pandas_vb_common.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from pandas import *
import pandas as pd
from datetime import timedelta
from numpy.random import randn
from numpy.random import randint
from numpy.random import permutation
import pandas.util.testing as tm
import random
import numpy as np
Expand All @@ -18,7 +16,7 @@
np.random.seed(1234)

# try em until it works!
for imp in ['pandas_tseries', 'pandas.lib', 'pandas._libs.lib']:
for imp in ['pandas._libs.lib', 'pandas.lib', 'pandas_tseries']:
try:
lib = import_module(imp)
break
Expand Down
5 changes: 3 additions & 2 deletions asv_bench/benchmarks/panel_ctor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .pandas_vb_common import *
from datetime import timedelta


class Constructors1(object):
Expand All @@ -24,7 +25,7 @@ class Constructors2(object):
def setup(self):
self.data_frames = {}
for x in range(100):
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1)))
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D'))
self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr)
self.data_frames[x] = self.df

Expand All @@ -36,7 +37,7 @@ class Constructors3(object):
goal_time = 0.2

def setup(self):
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq=datetools.Day(1)))
self.dr = np.asarray(DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D'))
self.data_frames = {}
for x in range(100):
self.df = DataFrame({'a': ([0] * len(self.dr)), 'b': ([1] * len(self.dr)), 'c': ([2] * len(self.dr)), }, index=self.dr)
Expand Down
Loading

0 comments on commit 14035c5

Please sign in to comment.