Skip to content

Commit

Permalink
CLN: ASV eval benchmark (#18500)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and jreback committed Nov 26, 2017
1 parent 38f41e6 commit c44a063
Showing 1 changed file with 27 additions and 30 deletions.
57 changes: 27 additions & 30 deletions asv_bench/benchmarks/eval.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .pandas_vb_common import *
import numpy as np
import pandas as pd
try:
import pandas.core.computation.expressions as expr
Expand All @@ -7,64 +7,61 @@


class Eval(object):

goal_time = 0.2

params = [['numexpr', 'python'], [1, 'all']]
param_names = ['engine', 'threads']

def setup(self, engine, threads):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))
self.df3 = DataFrame(np.random.randn(20000, 100))
self.df4 = DataFrame(np.random.randn(20000, 100))
np.random.seed(1234)
self.df = pd.DataFrame(np.random.randn(20000, 100))
self.df2 = pd.DataFrame(np.random.randn(20000, 100))
self.df3 = pd.DataFrame(np.random.randn(20000, 100))
self.df4 = pd.DataFrame(np.random.randn(20000, 100))

if threads == 1:
expr.set_numexpr_threads(1)

def time_add(self, engine, threads):
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4
pd.eval('df + df2 + df3 + df4', engine=engine)
pd.eval('self.df + self.df2 + self.df3 + self.df4', engine=engine)

def time_and(self, engine, threads):
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4
pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine=engine)
pd.eval('(self.df > 0) & (self.df2 > 0) & '
'(self.df3 > 0) & (self.df4 > 0)', engine=engine)

def time_chained_cmp(self, engine, threads):
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4
pd.eval('df < df2 < df3 < df4', engine=engine)
pd.eval('self.df < self.df2 < self.df3 < self.df4', engine=engine)

def time_mult(self, engine, threads):
df, df2, df3, df4 = self.df, self.df2, self.df3, self.df4
pd.eval('df * df2 * df3 * df4', engine=engine)
pd.eval('self.df * self.df2 * self.df3 * self.df4', engine=engine)

def teardown(self, engine, threads):
expr.set_numexpr_threads()


class Query(object):

goal_time = 0.2

def setup(self):
self.N = 1000000
self.halfway = ((self.N // 2) - 1)
self.index = date_range('20010101', periods=self.N, freq='T')
self.s = Series(self.index)
np.random.seed(1234)
self.N = 10**6
self.halfway = (self.N // 2) - 1
self.index = pd.date_range('20010101', periods=self.N, freq='T')
self.s = pd.Series(self.index)
self.ts = self.s.iloc[self.halfway]
self.df = DataFrame({'a': np.random.randn(self.N), }, index=self.index)
self.df2 = DataFrame({'dates': self.s.values,})

self.df3 = DataFrame({'a': np.random.randn(self.N),})
self.min_val = self.df3['a'].min()
self.max_val = self.df3['a'].max()
self.df = pd.DataFrame({'a': np.random.randn(self.N), 'dates': self.s},
index=self.index)
self.data = np.random.randn(self.N)
self.min_val = self.data.min()
self.max_val = self.data.max()

def time_query_datetime_index(self):
ts = self.ts
self.df.query('index < @ts')
self.df.query('index < @self.ts')

def time_query_datetime_series(self):
ts = self.ts
self.df2.query('dates < @ts')
def time_query_datetime_column(self):
self.df.query('dates < @self.ts')

def time_query_with_boolean_selection(self):
min_val, max_val = self.min_val, self.max_val
self.df.query('(a >= @min_val) & (a <= @max_val)')
self.df.query('(a >= @self.min_val) & (a <= @self.max_val)')

0 comments on commit c44a063

Please sign in to comment.