Skip to content

Commit

Permalink
CLN: Add teardowns for some benchmarks (#17616) (#18388)
Browse files Browse the repository at this point in the history
Added teardowns for hdfstore, io and packers benchmarks.
  • Loading branch information
dmanikowski-reef authored and jorisvandenbossche committed Nov 23, 2017
1 parent b45325e commit 492040b
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 46 deletions.
6 changes: 4 additions & 2 deletions asv_bench/benchmarks/hdfstore_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ def setup(self):

def teardown(self):
self.store.close()
self.remove(self.f)

def remove(self, f):
try:
os.remove(self.f)
os.remove(f)
except:
pass

Expand Down Expand Up @@ -115,10 +116,11 @@ def setup(self):

def teardown(self):
self.store.close()
self.remove(self.f)

def remove(self, f):
try:
os.remove(self.f)
os.remove(f)
except:
pass

Expand Down
63 changes: 40 additions & 23 deletions asv_bench/benchmarks/io_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,36 @@
import timeit


class frame_to_csv(object):
class _BenchTeardown(object):
"""
base class for teardown method implementation
"""
fname = None

def remove(self, f):
try:
os.remove(f)
except:
pass

def teardown(self):
self.remove(self.fname)


class frame_to_csv(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.df = DataFrame(np.random.randn(3000, 30))

def time_frame_to_csv(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)


class frame_to_csv2(object):
class frame_to_csv2(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.df = DataFrame({'A': range(50000), })
Expand All @@ -28,22 +46,24 @@ def setup(self):
self.df['D'] = (self.df.A + 3.0)

def time_frame_to_csv2(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)


class frame_to_csv_date_formatting(object):
class frame_to_csv_date_formatting(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.rng = date_range('1/1/2000', periods=1000)
self.data = DataFrame(self.rng, index=self.rng)

def time_frame_to_csv_date_formatting(self):
self.data.to_csv('__test__.csv', date_format='%Y%m%d')
self.data.to_csv(self.fname, date_format='%Y%m%d')


class frame_to_csv_mixed(object):
class frame_to_csv_mixed(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.df_float = DataFrame(np.random.randn(5000, 5), dtype='float64', columns=self.create_cols('float'))
Expand All @@ -55,7 +75,7 @@ def setup(self):
self.df = concat([self.df_float, self.df_int, self.df_bool, self.df_object, self.df_dt], axis=1)

def time_frame_to_csv_mixed(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)

def create_cols(self, name):
return [('%s%03d' % (name, i)) for i in range(5)]
Expand Down Expand Up @@ -94,28 +114,30 @@ def time_read_csv_infer_datetime_format_ymd(self):
read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True)


class read_csv_skiprows(object):
class read_csv_skiprows(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.index = tm.makeStringIndex(20000)
self.df = DataFrame({'float1': randn(20000), 'float2': randn(20000), 'string1': (['foo'] * 20000), 'bool1': ([True] * 20000), 'int1': np.random.randint(0, 200000, size=20000), }, index=self.index)
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)

def time_read_csv_skiprows(self):
read_csv('__test__.csv', skiprows=10000)
read_csv(self.fname, skiprows=10000)


class read_csv_standard(object):
class read_csv_standard(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.index = tm.makeStringIndex(10000)
self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index)
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)

def time_read_csv_standard(self):
read_csv('__test__.csv')
read_csv(self.fname)


class read_parse_dates_iso8601(object):
Expand Down Expand Up @@ -152,15 +174,16 @@ def time_read_uint64_na_values(self):
read_csv(StringIO(self.data1), header=None, na_values=self.na_values)


class write_csv_standard(object):
class write_csv_standard(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.index = tm.makeStringIndex(10000)
self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index)

def time_write_csv_standard(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)


class read_csv_from_s3(object):
Expand Down Expand Up @@ -195,7 +218,7 @@ def time_read_nrows(self, compression, engine):
compression=compression, engine=engine)


class read_json_lines(object):
class read_json_lines(_BenchTeardown):
goal_time = 0.2
fname = "__test__.json"

Expand All @@ -205,12 +228,6 @@ def setup(self):
self.df = DataFrame({('float{0}'.format(i), randn(self.N)) for i in range(self.C)})
self.df.to_json(self.fname,orient="records",lines=True)

def teardown(self):
try:
os.remove(self.fname)
except:
pass

def time_read_json_lines(self):
pd.read_json(self.fname, lines=True)

Expand Down
29 changes: 8 additions & 21 deletions asv_bench/benchmarks/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
from random import randrange


class _Packers(object):
goal_time = 0.2

Expand All @@ -24,12 +25,15 @@ def _setup(self):

def remove(self, f):
try:
os.remove(self.f)
os.remove(f)
except:
pass

def teardown(self):
self.remove(self.f)


class Packers(_Packers):
goal_time = 0.2

def setup(self):
self._setup()
Expand All @@ -38,8 +42,8 @@ def setup(self):
def time_packers_read_csv(self):
pd.read_csv(self.f)


class packers_read_excel(_Packers):
goal_time = 0.2

def setup(self):
self._setup()
Expand All @@ -54,7 +58,6 @@ def time_packers_read_excel(self):


class packers_read_hdf_store(_Packers):
goal_time = 0.2

def setup(self):
self._setup()
Expand Down Expand Up @@ -115,6 +118,7 @@ def setup(self):
def time_packers_read_pickle(self):
pd.read_pickle(self.f)


class packers_read_sql(_Packers):

def setup(self):
Expand Down Expand Up @@ -177,9 +181,6 @@ def setup(self):
def time_write_csv(self):
self.df.to_csv(self.f)

def teardown(self):
self.remove(self.f)


class Excel(_Packers):

Expand Down Expand Up @@ -217,8 +218,6 @@ def time_write_hdf_store(self):
def time_write_hdf_table(self):
self.df2.to_hdf(self.f, 'df', table=True)

def teardown(self):
self.remove(self.f)

class JSON(_Packers):

Expand Down Expand Up @@ -259,9 +258,6 @@ def time_write_json_mixed_float_int_str(self):
def time_write_json_lines(self):
self.df.to_json(self.f, orient="records", lines=True)

def teardown(self):
self.remove(self.f)


class MsgPack(_Packers):

Expand All @@ -271,9 +267,6 @@ def setup(self):
def time_write_msgpack(self):
self.df2.to_msgpack(self.f)

def teardown(self):
self.remove(self.f)


class Pickle(_Packers):

Expand All @@ -283,9 +276,6 @@ def setup(self):
def time_write_pickle(self):
self.df2.to_pickle(self.f)

def teardown(self):
self.remove(self.f)


class SQL(_Packers):

Expand Down Expand Up @@ -313,6 +303,3 @@ def time_write_stata(self):

def time_write_stata_with_validation(self):
self.df3.to_stata(self.f, {'index': 'tc', })

def teardown(self):
self.remove(self.f)

0 comments on commit 492040b

Please sign in to comment.