Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Add teardowns for some benchmarks (#17616) #18388

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions asv_bench/benchmarks/hdfstore_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ def setup(self):

def teardown(self):
self.store.close()
self.remove(self.f)

def remove(self, f):
try:
os.remove(self.f)
os.remove(f)
except:
pass

Expand Down Expand Up @@ -115,10 +116,11 @@ def setup(self):

def teardown(self):
self.store.close()
self.remove(self.f)

def remove(self, f):
try:
os.remove(self.f)
os.remove(f)
except:
pass

Expand Down
63 changes: 40 additions & 23 deletions asv_bench/benchmarks/io_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,36 @@
import timeit


class frame_to_csv(object):
class _BenchTeardown(object):
"""
base class for teardown method implementation
"""
fname = None

def remove(self, f):
try:
os.remove(f)
except:
pass

def teardown(self):
self.remove(self.fname)


class frame_to_csv(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.df = DataFrame(np.random.randn(3000, 30))

def time_frame_to_csv(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)


class frame_to_csv2(object):
class frame_to_csv2(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.df = DataFrame({'A': range(50000), })
Expand All @@ -28,22 +46,24 @@ def setup(self):
self.df['D'] = (self.df.A + 3.0)

def time_frame_to_csv2(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)


class frame_to_csv_date_formatting(object):
class frame_to_csv_date_formatting(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.rng = date_range('1/1/2000', periods=1000)
self.data = DataFrame(self.rng, index=self.rng)

def time_frame_to_csv_date_formatting(self):
self.data.to_csv('__test__.csv', date_format='%Y%m%d')
self.data.to_csv(self.fname, date_format='%Y%m%d')


class frame_to_csv_mixed(object):
class frame_to_csv_mixed(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.df_float = DataFrame(np.random.randn(5000, 5), dtype='float64', columns=self.create_cols('float'))
Expand All @@ -55,7 +75,7 @@ def setup(self):
self.df = concat([self.df_float, self.df_int, self.df_bool, self.df_object, self.df_dt], axis=1)

def time_frame_to_csv_mixed(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)

def create_cols(self, name):
return [('%s%03d' % (name, i)) for i in range(5)]
Expand Down Expand Up @@ -94,28 +114,30 @@ def time_read_csv_infer_datetime_format_ymd(self):
read_csv(StringIO(self.data), header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=True)


class read_csv_skiprows(object):
class read_csv_skiprows(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.index = tm.makeStringIndex(20000)
self.df = DataFrame({'float1': randn(20000), 'float2': randn(20000), 'string1': (['foo'] * 20000), 'bool1': ([True] * 20000), 'int1': np.random.randint(0, 200000, size=20000), }, index=self.index)
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)

def time_read_csv_skiprows(self):
read_csv('__test__.csv', skiprows=10000)
read_csv(self.fname, skiprows=10000)


class read_csv_standard(object):
class read_csv_standard(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.index = tm.makeStringIndex(10000)
self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index)
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)

def time_read_csv_standard(self):
read_csv('__test__.csv')
read_csv(self.fname)


class read_parse_dates_iso8601(object):
Expand Down Expand Up @@ -152,15 +174,16 @@ def time_read_uint64_na_values(self):
read_csv(StringIO(self.data1), header=None, na_values=self.na_values)


class write_csv_standard(object):
class write_csv_standard(_BenchTeardown):
goal_time = 0.2
fname = '__test__.csv'

def setup(self):
self.index = tm.makeStringIndex(10000)
self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index)

def time_write_csv_standard(self):
self.df.to_csv('__test__.csv')
self.df.to_csv(self.fname)


class read_csv_from_s3(object):
Expand Down Expand Up @@ -195,7 +218,7 @@ def time_read_nrows(self, compression, engine):
compression=compression, engine=engine)


class read_json_lines(object):
class read_json_lines(_BenchTeardown):
goal_time = 0.2
fname = "__test__.json"

Expand All @@ -205,12 +228,6 @@ def setup(self):
self.df = DataFrame({('float{0}'.format(i), randn(self.N)) for i in range(self.C)})
self.df.to_json(self.fname,orient="records",lines=True)

def teardown(self):
try:
os.remove(self.fname)
except:
pass

def time_read_json_lines(self):
pd.read_json(self.fname, lines=True)

Expand Down
29 changes: 8 additions & 21 deletions asv_bench/benchmarks/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
from random import randrange


class _Packers(object):
goal_time = 0.2

Expand All @@ -24,12 +25,15 @@ def _setup(self):

def remove(self, f):
try:
os.remove(self.f)
os.remove(f)
except:
pass

def teardown(self):
self.remove(self.f)


class Packers(_Packers):
goal_time = 0.2

def setup(self):
self._setup()
Expand All @@ -38,8 +42,8 @@ def setup(self):
def time_packers_read_csv(self):
pd.read_csv(self.f)


class packers_read_excel(_Packers):
goal_time = 0.2

def setup(self):
self._setup()
Expand All @@ -54,7 +58,6 @@ def time_packers_read_excel(self):


class packers_read_hdf_store(_Packers):
goal_time = 0.2

def setup(self):
self._setup()
Expand Down Expand Up @@ -115,6 +118,7 @@ def setup(self):
def time_packers_read_pickle(self):
pd.read_pickle(self.f)


class packers_read_sql(_Packers):

def setup(self):
Expand Down Expand Up @@ -177,9 +181,6 @@ def setup(self):
def time_write_csv(self):
self.df.to_csv(self.f)

def teardown(self):
self.remove(self.f)


class Excel(_Packers):

Expand Down Expand Up @@ -217,8 +218,6 @@ def time_write_hdf_store(self):
def time_write_hdf_table(self):
self.df2.to_hdf(self.f, 'df', table=True)

def teardown(self):
self.remove(self.f)

class JSON(_Packers):

Expand Down Expand Up @@ -259,9 +258,6 @@ def time_write_json_mixed_float_int_str(self):
def time_write_json_lines(self):
self.df.to_json(self.f, orient="records", lines=True)

def teardown(self):
self.remove(self.f)


class MsgPack(_Packers):

Expand All @@ -271,9 +267,6 @@ def setup(self):
def time_write_msgpack(self):
self.df2.to_msgpack(self.f)

def teardown(self):
self.remove(self.f)


class Pickle(_Packers):

Expand All @@ -283,9 +276,6 @@ def setup(self):
def time_write_pickle(self):
self.df2.to_pickle(self.f)

def teardown(self):
self.remove(self.f)


class SQL(_Packers):

Expand Down Expand Up @@ -313,6 +303,3 @@ def time_write_stata(self):

def time_write_stata_with_validation(self):
self.df3.to_stata(self.f, {'index': 'tc', })

def teardown(self):
self.remove(self.f)