Skip to content

Commit

Permalink
BENCH: asv csv reading benchmarks no longer read StringIO objects off…
Browse files Browse the repository at this point in the history
… the end (#21807)

* benchmarks for read_csv() now properly rewind StringIO objects prior to
reading them in; previously, all iterations of an asv repeat timing run
would read in no data because the StringIO object was pointing to its end after
the first iteration--setup() only runs between repeats, not iterations within
repeats of timeit
  • Loading branch information
tylerjereddy authored and jreback committed Jul 28, 2018
1 parent 848b69c commit 0b7a08b
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,14 @@ def time_frame_date_formatting(self):
self.data.to_csv(self.fname, date_format='%Y%m%d')


class ReadCSVDInferDatetimeFormat(object):
class StringIORewind(object):

def data(self, stringio_object):
stringio_object.seek(0)
return stringio_object


class ReadCSVDInferDatetimeFormat(StringIORewind):

goal_time = 0.2
params = ([True, False], ['custom', 'iso8601', 'ymd'])
Expand All @@ -66,10 +73,12 @@ def setup(self, infer_datetime_format, format):
'iso8601': '%Y-%m-%d %H:%M:%S',
'ymd': '%Y%m%d'}
dt_format = formats[format]
self.data = StringIO('\n'.join(rng.strftime(dt_format).tolist()))
self.StringIO_input = StringIO('\n'.join(
rng.strftime(dt_format).tolist()))

def time_read_csv(self, infer_datetime_format, format):
read_csv(self.data, header=None, names=['foo'], parse_dates=['foo'],
read_csv(self.data(self.StringIO_input),
header=None, names=['foo'], parse_dates=['foo'],
infer_datetime_format=infer_datetime_format)


Expand All @@ -95,7 +104,7 @@ def time_skipprows(self, skiprows):
read_csv(self.fname, skiprows=skiprows)


class ReadUint64Integers(object):
class ReadUint64Integers(StringIORewind):

goal_time = 0.2

Expand All @@ -108,13 +117,13 @@ def setup(self):
self.data2 = StringIO('\n'.join(arr.astype(str).tolist()))

def time_read_uint64(self):
read_csv(self.data1, header=None, names=['foo'])
read_csv(self.data(self.data1), header=None, names=['foo'])

def time_read_uint64_neg_values(self):
read_csv(self.data2, header=None, names=['foo'])
read_csv(self.data(self.data2), header=None, names=['foo'])

def time_read_uint64_na_values(self):
read_csv(self.data1, header=None, names=['foo'],
read_csv(self.data(self.data1), header=None, names=['foo'],
na_values=self.na_values)


Expand All @@ -140,19 +149,20 @@ def time_thousands(self, sep, thousands):
read_csv(self.fname, sep=sep, thousands=thousands)


class ReadCSVComment(object):
class ReadCSVComment(StringIORewind):

goal_time = 0.2

def setup(self):
data = ['A,B,C'] + (['1,2,3 # comment'] * 100000)
self.s_data = StringIO('\n'.join(data))
self.StringIO_input = StringIO('\n'.join(data))

def time_comment(self):
read_csv(self.s_data, comment='#', header=None, names=list('abc'))
read_csv(self.data(self.StringIO_input), comment='#',
header=None, names=list('abc'))


class ReadCSVFloatPrecision(object):
class ReadCSVFloatPrecision(StringIORewind):

goal_time = 0.2
params = ([',', ';'], ['.', '_'], [None, 'high', 'round_trip'])
Expand All @@ -164,14 +174,14 @@ def setup(self, sep, decimal, float_precision):
rows = sep.join(['0{}'.format(decimal) + '{}'] * 3) + '\n'
data = rows * 5
data = data.format(*floats) * 200 # 1000 x 3 strings csv
self.s_data = StringIO(data)
self.StringIO_input = StringIO(data)

def time_read_csv(self, sep, decimal, float_precision):
read_csv(self.s_data, sep=sep, header=None, names=list('abc'),
float_precision=float_precision)
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
names=list('abc'), float_precision=float_precision)

def time_read_csv_python_engine(self, sep, decimal, float_precision):
read_csv(self.s_data, sep=sep, header=None, engine='python',
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
float_precision=None, names=list('abc'))


Expand All @@ -193,7 +203,7 @@ def time_convert_direct(self):
read_csv(self.fname, dtype='category')


class ReadCSVParseDates(object):
class ReadCSVParseDates(StringIORewind):

goal_time = 0.2

Expand All @@ -206,12 +216,14 @@ def setup(self):
"""
two_cols = ['KORD,19990127'] * 5
data = data.format(*two_cols)
self.s_data = StringIO(data)
self.StringIO_input = StringIO(data)

def time_multiple_date(self):
read_csv(self.s_data, sep=',', header=None,
names=list(string.digits[:9]), parse_dates=[[1, 2], [1, 3]])
read_csv(self.data(self.StringIO_input), sep=',', header=None,
names=list(string.digits[:9]),
parse_dates=[[1, 2], [1, 3]])

def time_baseline(self):
read_csv(self.s_data, sep=',', header=None, parse_dates=[1],
read_csv(self.data(self.StringIO_input), sep=',', header=None,
parse_dates=[1],
names=list(string.digits[:9]))

0 comments on commit 0b7a08b

Please sign in to comment.