diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 0f5d07f9fac55..2d4bdc7ae812a 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -54,7 +54,14 @@ def time_frame_date_formatting(self): self.data.to_csv(self.fname, date_format='%Y%m%d') -class ReadCSVDInferDatetimeFormat(object): +class StringIORewind(object): + + def data(self, stringio_object): + stringio_object.seek(0) + return stringio_object + + +class ReadCSVDInferDatetimeFormat(StringIORewind): goal_time = 0.2 params = ([True, False], ['custom', 'iso8601', 'ymd']) @@ -66,10 +73,12 @@ def setup(self, infer_datetime_format, format): 'iso8601': '%Y-%m-%d %H:%M:%S', 'ymd': '%Y%m%d'} dt_format = formats[format] - self.data = StringIO('\n'.join(rng.strftime(dt_format).tolist())) + self.StringIO_input = StringIO('\n'.join( + rng.strftime(dt_format).tolist())) def time_read_csv(self, infer_datetime_format, format): - read_csv(self.data, header=None, names=['foo'], parse_dates=['foo'], + read_csv(self.data(self.StringIO_input), + header=None, names=['foo'], parse_dates=['foo'], infer_datetime_format=infer_datetime_format) @@ -95,7 +104,7 @@ def time_skipprows(self, skiprows): read_csv(self.fname, skiprows=skiprows) -class ReadUint64Integers(object): +class ReadUint64Integers(StringIORewind): goal_time = 0.2 @@ -108,13 +117,13 @@ def setup(self): self.data2 = StringIO('\n'.join(arr.astype(str).tolist())) def time_read_uint64(self): - read_csv(self.data1, header=None, names=['foo']) + read_csv(self.data(self.data1), header=None, names=['foo']) def time_read_uint64_neg_values(self): - read_csv(self.data2, header=None, names=['foo']) + read_csv(self.data(self.data2), header=None, names=['foo']) def time_read_uint64_na_values(self): - read_csv(self.data1, header=None, names=['foo'], + read_csv(self.data(self.data1), header=None, names=['foo'], na_values=self.na_values) @@ -140,19 +149,20 @@ def time_thousands(self, sep, thousands): read_csv(self.fname, sep=sep, thousands=thousands) -class ReadCSVComment(object): +class ReadCSVComment(StringIORewind): goal_time = 0.2 def setup(self): data = ['A,B,C'] + (['1,2,3 # comment'] * 100000) - self.s_data = StringIO('\n'.join(data)) + self.StringIO_input = StringIO('\n'.join(data)) def time_comment(self): - read_csv(self.s_data, comment='#', header=None, names=list('abc')) + read_csv(self.data(self.StringIO_input), comment='#', + header=None, names=list('abc')) -class ReadCSVFloatPrecision(object): +class ReadCSVFloatPrecision(StringIORewind): goal_time = 0.2 params = ([',', ';'], ['.', '_'], [None, 'high', 'round_trip']) @@ -164,14 +174,14 @@ def setup(self, sep, decimal, float_precision): rows = sep.join(['0{}'.format(decimal) + '{}'] * 3) + '\n' data = rows * 5 data = data.format(*floats) * 200 # 1000 x 3 strings csv - self.s_data = StringIO(data) + self.StringIO_input = StringIO(data) def time_read_csv(self, sep, decimal, float_precision): - read_csv(self.s_data, sep=sep, header=None, names=list('abc'), - float_precision=float_precision) + read_csv(self.data(self.StringIO_input), sep=sep, header=None, + names=list('abc'), float_precision=float_precision) def time_read_csv_python_engine(self, sep, decimal, float_precision): - read_csv(self.s_data, sep=sep, header=None, engine='python', + read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python', float_precision=None, names=list('abc')) @@ -193,7 +203,7 @@ def time_convert_direct(self): read_csv(self.fname, dtype='category') -class ReadCSVParseDates(object): +class ReadCSVParseDates(StringIORewind): goal_time = 0.2 @@ -206,12 +216,14 @@ def setup(self): """ two_cols = ['KORD,19990127'] * 5 data = data.format(*two_cols) - self.s_data = StringIO(data) + self.StringIO_input = StringIO(data) def time_multiple_date(self): - read_csv(self.s_data, sep=',', header=None, - names=list(string.digits[:9]), parse_dates=[[1, 2], [1, 3]]) + read_csv(self.data(self.StringIO_input), sep=',', header=None, + names=list(string.digits[:9]), + parse_dates=[[1, 2], [1, 3]]) def time_baseline(self): - read_csv(self.s_data, sep=',', header=None, parse_dates=[1], + read_csv(self.data(self.StringIO_input), sep=',', header=None, + parse_dates=[1], names=list(string.digits[:9]))